scripts/python/searxng-addons/search_answers_llm/plugins_langchain_llm.py at main · bearlike/scripts · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
#!/usr/bin/env python3
"""This plugin uses LangChain to generate AI answers with rich formatting.

Set LLM_MODEL_NAME, LLM_BASE_URL, LLM_API_KEY environment variables to
configure the LLM model. Bind python/searxng-addons/search_answers_llm/llm_answer.html
to your own template to customize the answer display.
"""
from __future__ import annotations
from os import environ
import traceback
import typing
import markdown

from searx.search.models import SearchQuery, EngineRef
from searx.result_types import EngineResults, Answer
from searx.plugins import Plugin, PluginInfo
from flask_babel import gettext
from searx.search import Search
from searx import engines
from pydantic import SecretStr

from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage, SystemMessage
from langfuse.langchain import CallbackHandler as LangfuseLangchainCallbackHandler
from langfuse import get_client


if typing.TYPE_CHECKING:
    from searx.search import SearchWithPlugins
    from searx.extended_types import SXNG_Request
    from searx.plugins import PluginCfg


try:
    langfuse = get_client()
    langchain_callback_handler = LangfuseLangchainCallbackHandler()
    print("Langfuse client initialized successfully.")
except Exception as exc:  # pragma: no cover - fallback when Langfuse is unavailable
    print("Langfuse client initialization failed: %s. Tracing disabled.", exc)

    class _DummySpan:  # type: ignore
        def update(self, *_, **__):
            pass

        def __enter__(self):
            return self

        def __exit__(self, exc_type, exc, tb):
            return False

    class _DummyLangfuse:
        def start_as_current_span(self, *_, **__):
            return _DummySpan()

        def shutdown(self):
            pass

        def flush(self):
            pass

    class _DummyCallbackHandler:
        def __init__(self, *_, **__):
            pass

    langfuse = _DummyLangfuse()
    langchain_callback_handler = _DummyCallbackHandler()


class SXNGPlugin(Plugin):
    """LangChain LLM Answer Plugin that generates contextual answers with rich formatting."""

    id = "langchain_llm"

    def __init__(self, plg_cfg: "PluginCfg") -> None:
        super().__init__(plg_cfg)
        print(f"[DEBUG] LangChain plugin initialized with active={plg_cfg.active}")

        self.info = PluginInfo(
            id=self.id,
            name=gettext("LangChain LLM"),
            description=gettext("Generate AI answers using LLM with rich formatting"),
            preference_section="general",
        )

        self.model_name = environ.get("LLM_MODEL_NAME", "gemini-2.0-flash")
        # Initialize ChatOpenAI once and reuse
        self.llm = ChatOpenAI(
            model=self.model_name,
            temperature=0.5,
            base_url=environ.get(
                "LLM_BASE_URL",
                "https://generativelanguage.googleapis.com/v1beta/openai/",
            ),
            api_key=SecretStr(environ.get("LLM_API_KEY", "dummy-key")),
        )

        # Initialize markdown converter with common extensions
        self.md_converter = markdown.Markdown(
            extensions=["extra", "codehilite", "toc"],
            extension_configs={"codehilite": {"css_class": "highlight"}},
        )

    def post_search(
        self, request: "SXNG_Request", search: "SearchWithPlugins"
    ) -> EngineResults:
        results = EngineResults()

        print(f"[DEBUG] post_search called for query: {search.search_query.query}")

        # Only process on first page
        if search.search_query.pageno > 1:
            print("[DEBUG] Skipping, not on first page.")
            return results

        query = search.search_query.query
        print(f"[DEBUG] Processing query: {query}")

        try:
            # Get search context from Google and DuckDuckGo
            search_context = self._get_search_context(query)

            if search_context:
                print(
                    f"[DEBUG] Retrieved {len(search_context)} search results for context"
                )
                # Generate LLM response with search context
                llm_answer_html = self._generate_contextual_answer_html(
                    query, search_context
                )
                if llm_answer_html:
                    print("[DEBUG] Generated contextual HTML answer")

                    # Wrap the answer with data attributes for the template to use
                    wrapped_answer = f"""<div data-model-name="{self.model_name}" data-has-context="true">{llm_answer_html}</div>"""

                    # Create Answer with custom template
                    answer = Answer(
                        answer=wrapped_answer,
                        template="answer/llm_answer.html",
                    )
                    results.add(answer)
                    print("[DEBUG] Added HTML Answer to results")
                else:
                    print("[DEBUG] No contextual answer generated")
            else:
                print(
                    "[DEBUG] No search context retrieved, falling back to simple answer"
                )
                # Fallback to simple answer if no search context
                simple_answer_html = self._generate_simple_answer_html(query)
                if simple_answer_html:
                    # Wrap the answer with data attributes for the template to use
                    wrapped_answer = f"""<div data-model-name="{self.model_name}" data-has-context="false">{simple_answer_html}</div>"""

                    answer = Answer(
                        answer=wrapped_answer, template="answer/llm_answer.html"
                    )
                    results.add(answer)

        except Exception as e:
            print(f"[DEBUG] Exception in post_search: {e}")
            traceback.print_exc()

        return results

    def _get_search_context(self, query: str) -> list[dict]:
        """Fetch search results from Google and DuckDuckGo for context."""
        print(f"[DEBUG] Fetching search context for: {query}")

        try:
            # Create engine references for Google and DuckDuckGo
            engine_refs = []

            # Check if Google is available and enabled
            if "google" in engines.engines:
                engine_refs.append(EngineRef("google", "general"))
                print("[DEBUG] Added Google engine")

            # Check if DuckDuckGo is available and enabled
            if "duckduckgo" in engines.engines:
                engine_refs.append(EngineRef("duckduckgo", "general"))
                print("[DEBUG] Added DuckDuckGo engine")

            if not engine_refs:
                print("[DEBUG] No suitable engines found")
                return []

            # Create a search query for just these engines
            context_search_query = SearchQuery(
                query=query,
                engineref_list=engine_refs,
                lang="en-US",
                safesearch=0,
                pageno=1,
                timeout_limit=5.0,  # 5 second timeout for context search
            )

            print(f"[DEBUG] Created SearchQuery with {len(engine_refs)} engines")

            # Execute the search
            context_search = Search(context_search_query)
            context_results = context_search.search()

            # Extract relevant results
            ordered_results = context_results.get_ordered_results()
            print(f"[DEBUG] Retrieved {len(ordered_results)} raw results")

            # Convert to simplified format for LLM context
            search_context = []
            for i, result in enumerate(ordered_results[:5]):  # Top 5 results
                try:
                    context_item = {
                        "title": getattr(result, "title", ""),
                        "content": getattr(result, "content", ""),
                        "url": getattr(result, "url", ""),
                        "engine": getattr(result, "engine", ""),
                    }

                    # Filter out empty results
                    if context_item["title"] or context_item["content"]:
                        search_context.append(context_item)
                        print(
                            f"[DEBUG] Added result {i+1}: {context_item['title'][:50]}..."
                        )

                except Exception as e:
                    print(f"[DEBUG] Error processing result {i}: {e}")
                    continue

            print(f"[DEBUG] Final search context: {len(search_context)} items")
            return search_context

        except Exception as e:
            print(f"[DEBUG] Error in _get_search_context: {e}")

            traceback.print_exc()
            return []

    def _generate_contextual_answer_html(
        self, query: str, search_context: list[dict]
    ) -> str:
        """Generate LLM answer with markdown formatting using search results as context."""
        print(f"[DEBUG] Generating contextual markdown answer for: {query}")

        try:
            # Use the pre-initialized ChatOpenAI instance
            llm = self.llm

            # Prepare context from search results
            context_text = self._format_search_context(search_context)

            # Create messages with search context - Updated to request markdown
            messages = [
                SystemMessage(
                    content="""You are a helpful Search Engine assistant that provides accurate answers and sources based on search results.
                    Use extractive summarization to identify key information from search results and avoid fillers.
                    Identify the most important information and links from the search results.
                    Format your response using Markdown syntax for better readability.
                    Keep the response concise but well-formatted in Markdown."""
                ),
                HumanMessage(
                    content=f"""Query: {query}

Search Results Context:
{context_text}

Based on the search results above, provide a helpful and accurate answer to the query using Markdown formatting. If the search results don't contain relevant information, say so and provide what general knowledge you can."""
                ),
            ]

            # Generate response
            response = llm.invoke(messages)
            answer = str(response.content).strip()
            langfuse.flush()

            print(f"[DEBUG] Generated contextual response: {answer[:100]}...")

            # Create formatted HTML answer from markdown
            formatted_answer = self._format_html_answer(answer, has_context=True)
            return formatted_answer

        except Exception as e:
            print(f"[DEBUG] Error in _generate_contextual_answer_html: {e}")

            traceback.print_exc()
            return ""

    def _generate_simple_answer_html(self, query: str) -> str:
        """Generate a simple LLM answer with markdown formatting (fallback)."""
        print(f"[DEBUG] Generating simple markdown answer for: {query}")

        try:
            # Use the pre-initialized ChatOpenAI instance
            llm = self.llm

            # Create simple messages - Updated to request markdown
            messages = [
                SystemMessage(
                    content="""You are a helpful assistant that provides concise answers using Markdown formatting.
                    Use Markdown syntax like **bold**, *italics*, bullet lists, and code blocks for better readability.
                    Keep responses brief but well-formatted."""
                ),
                HumanMessage(
                    content=f"Question: {query}\n\nProvide a brief, helpful answer using Markdown formatting:"
                ),
            ]

            # Generate response
            response = llm.invoke(messages)
            answer = str(response.content).strip()
            langfuse.flush()

            print(f"[DEBUG] Generated simple response: {answer[:100]}...")

            # Create formatted HTML answer from markdown
            formatted_answer = self._format_html_answer(answer, has_context=False)
            return formatted_answer

        except Exception as e:
            print(f"[DEBUG] Error in _generate_simple_answer_html: {e}")

            traceback.print_exc()
            return ""

    def _format_html_answer(self, markdown_answer: str, has_context: bool) -> str:
        """
        Convert markdown answer to HTML.
        The template is now responsible for all layout, headers, and footers.
        """
        try:
            # Convert markdown to HTML
            html_content = self.md_converter.convert(markdown_answer)
            # Reset the converter for the next use
            self.md_converter.reset()
            return html_content
        except Exception as e:
            print(f"[DEBUG] Error in _format_html_answer: {e}")
            traceback.print_exc()
            # Fallback to the original text if markdown conversion fails
            return f"<div>{markdown_answer}</div>"

    def _format_search_context(self, search_context: list[dict]) -> str:
        """Format search results into text context for the LLM."""
        if not search_context:
            return "No search results available."

        context_parts = []
        for i, result in enumerate(search_context, 1):
            context_parts.append(f"Result {i}:")
            context_parts.append(f"Title: {result.get('title', 'N/A')}")

            content = result.get("content", "")
            if content:
                # Truncate content to avoid token limits
                content = content[:300] + "..." if len(content) > 300 else content
                context_parts.append(f"Content: {content}")

            source = result.get("engine", "Unknown")
            context_parts.append(f"Source: {source}")
            context_parts.append("")  # Empty line between results

        return "\n".join(context_parts)