Browse Source

Integrated SearXNG search as built-in tool (#3363)

Co-authored-by: crazywoola <427733928@qq.com>
junytang 1 year ago
parent
commit
e76693cad9

+ 1 - 0
api/core/tools/provider/_position.yaml

@@ -1,6 +1,7 @@
 - google
 - bing
 - duckduckgo
+- searxng
 - dalle
 - azuredalle
 - wikipedia

+ 56 - 0
api/core/tools/provider/builtin/searxng/_assets/icon.svg

@@ -0,0 +1,56 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   id="svg8"
+   version="1.1"
+   viewBox="0 0 92 92"
+   height="92mm"
+   width="92mm">
+  <defs
+     id="defs2" />
+  <metadata
+     id="metadata5">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <g
+     transform="translate(-40.921303,-17.416526)"
+     id="layer1">
+    <circle
+       r="0"
+       style="fill:none;stroke:#000000;stroke-width:12;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       cy="92"
+       cx="75"
+       id="path3713" />
+    <circle
+       r="30"
+       cy="53.902557"
+       cx="75.921303"
+       id="path834"
+       style="fill:none;fill-opacity:1;stroke:#3050ff;stroke-width:10;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
+    <path
+       d="m 67.514849,37.91524 a 18,18 0 0 1 21.051475,3.312407 18,18 0 0 1 3.137312,21.078282"
+       id="path852"
+       style="fill:none;fill-opacity:1;stroke:#3050ff;stroke-width:5;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
+    <rect
+       transform="rotate(-46.234709)"
+       ry="1.8669105e-13"
+       y="122.08995"
+       x="3.7063529"
+       height="39.963303"
+       width="18.846331"
+       id="rect912"
+       style="opacity:1;fill:#3050ff;fill-opacity:1;stroke:none;stroke-width:8;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
+  </g>
+</svg>

+ 25 - 0
api/core/tools/provider/builtin/searxng/searxng.py

@@ -0,0 +1,25 @@
+from typing import Any
+
+from core.tools.errors import ToolProviderCredentialValidationError
+from core.tools.provider.builtin.searxng.tools.searxng_search import SearXNGSearchTool
+from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController
+
+
+class SearXNGProvider(BuiltinToolProviderController):
+    def _validate_credentials(self, credentials: dict[str, Any]) -> None:
+        try:
+            SearXNGSearchTool().fork_tool_runtime(
+                meta={
+                    "credentials": credentials,
+                }
+            ).invoke(
+                user_id='',
+                tool_parameters={
+                    "query": "SearXNG",
+                    "limit": 1,
+                    "search_type": "page",
+                    "result_type": "link"
+                },
+            )
+        except Exception as e:
+            raise ToolProviderCredentialValidationError(str(e))

+ 24 - 0
api/core/tools/provider/builtin/searxng/searxng.yaml

@@ -0,0 +1,24 @@
+identity:
+  author: Junytang
+  name: searxng
+  label:
+    en_US: SearXNG
+    zh_Hans: SearXNG
+  description:
+    en_US: A free internet metasearch engine.
+    zh_Hans: 开源互联网元搜索引擎
+  icon: icon.svg
+credentials_for_provider:
+  searxng_base_url:
+    type: secret-input
+    required: true
+    label:
+      en_US: SearXNG base URL
+      zh_Hans: SearXNG base URL
+    help:
+      en_US: Please input your SearXNG base URL
+      zh_Hans: 请输入您的 SearXNG base URL
+    placeholder:
+      en_US: Please input your SearXNG base URL
+      zh_Hans: 请输入您的 SearXNG base URL
+    url: https://docs.dify.ai/tutorials/tool-configuration/searxng

+ 124 - 0
api/core/tools/provider/builtin/searxng/tools/searxng_search.py

@@ -0,0 +1,124 @@
+import json
+from typing import Any
+
+import requests
+
+from core.tools.entities.tool_entities import ToolInvokeMessage
+from core.tools.tool.builtin_tool import BuiltinTool
+
+
+class SearXNGSearchResults(dict):
+    """Wrapper for search results."""
+
+    def __init__(self, data: str):
+        super().__init__(json.loads(data))
+        self.__dict__ = self
+
+    @property
+    def results(self) -> Any:
+        return self.get("results", [])
+
+
+class SearXNGSearchTool(BuiltinTool):
+    """
+    Tool for performing a search using SearXNG engine.
+    """
+
+    SEARCH_TYPE = {
+        "page": "general",
+        "news": "news",
+        "image": "images",
+        # "video": "videos",
+        # "file": "files"
+    }
+    LINK_FILED = {
+        "page": "url",
+        "news": "url",
+        "image": "img_src",
+        # "video": "iframe_src",
+        # "file": "magnetlink"
+    }
+    TEXT_FILED = {
+        "page": "content",
+        "news": "content",
+        "image": "img_src",
+        # "video": "iframe_src",
+        # "file": "magnetlink"
+    }
+
+    def _invoke_query(self, user_id: str, host: str, query: str, search_type: str, result_type: str, topK: int = 5) -> list[dict]:
+        """Run query and return the results."""
+
+        search_type = search_type.lower()
+        if search_type not in self.SEARCH_TYPE.keys():
+            search_type= "page"
+
+        response = requests.get(host, params={
+            "q": query, 
+            "format": "json", 
+            "categories": self.SEARCH_TYPE[search_type]
+        })
+
+        if response.status_code != 200:
+            raise Exception(f'Error {response.status_code}: {response.text}')
+        
+        search_results = SearXNGSearchResults(response.text).results[:topK]
+
+        if result_type == 'link':
+            results = []
+            if search_type == "page" or search_type == "news":
+                for r in search_results:
+                    results.append(self.create_text_message(
+                        text=f'{r["title"]}: {r.get(self.LINK_FILED[search_type], "")}'
+                    ))
+            elif search_type == "image":
+                for r in search_results:
+                    results.append(self.create_image_message(
+                        image=r.get(self.LINK_FILED[search_type], "")
+                    ))
+            else:
+                for r in search_results:
+                    results.append(self.create_link_message(
+                        link=r.get(self.LINK_FILED[search_type], "")
+                    ))
+
+            return results
+        else:
+            text = ''
+            for i, r in enumerate(search_results):
+                text += f'{i+1}: {r["title"]} - {r.get(self.TEXT_FILED[search_type], "")}\n'
+
+            return self.create_text_message(text=self.summary(user_id=user_id, content=text))
+
+
+    def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> ToolInvokeMessage | list[ToolInvokeMessage]:
+        """
+        Invoke the SearXNG search tool.
+
+        Args:
+            user_id (str): The ID of the user invoking the tool.
+            tool_parameters (dict[str, Any]): The parameters for the tool invocation.
+
+        Returns:
+            ToolInvokeMessage | list[ToolInvokeMessage]: The result of the tool invocation.
+        """
+
+        host = self.runtime.credentials.get('searxng_base_url', None)
+        if not host:
+            raise Exception('SearXNG api is required')
+                
+        query = tool_parameters.get('query', None)
+        if not query:
+            return self.create_text_message('Please input query')
+                
+        num_results = min(tool_parameters.get('num_results', 5), 20)
+        search_type = tool_parameters.get('search_type', 'page') or 'page'
+        result_type = tool_parameters.get('result_type', 'text') or 'text'
+
+        return self._invoke_query(
+            user_id=user_id, 
+            host=host, 
+            query=query, 
+            search_type=search_type, 
+            result_type=result_type, 
+            topK=num_results)

+ 89 - 0
api/core/tools/provider/builtin/searxng/tools/searxng_search.yaml

@@ -0,0 +1,89 @@
+identity:
+  name: searxng_search
+  author: Tice
+  label:
+    en_US: SearXNG Search
+    zh_Hans: SearXNG 搜索
+description:
+  human:
+    en_US: Perform searches on SearXNG and get results.
+    zh_Hans: 在 SearXNG 上进行搜索并获取结果。
+  llm: Perform searches on SearXNG and get results.
+parameters:
+  - name: query
+    type: string
+    required: true
+    label:
+      en_US: Query string
+      zh_Hans: 查询语句
+    human_description:
+      en_US: The search query.
+      zh_Hans: 搜索查询语句。
+    llm_description: Key words for searching
+    form: llm
+  - name: search_type
+    type: select
+    required: true
+    label:
+      en_US: search type
+      zh_Hans: 搜索类型
+      pt_BR: search type
+    human_description:
+      en_US: search type for page, news or image.
+      zh_Hans: 选择搜索的类型:网页,新闻,图片。
+      pt_BR: search type for page, news or image.
+    default: Page
+    options:
+      - value: Page
+        label:
+          en_US: Page
+          zh_Hans: 网页
+          pt_BR: Page
+      - value: News
+        label:
+          en_US: News
+          zh_Hans: 新闻
+          pt_BR: News
+      - value: Image
+        label:
+          en_US: Image
+          zh_Hans: 图片
+          pt_BR: Image
+    form: form
+  - name: num_results
+    type: number
+    required: true
+    label:
+      en_US: Number of query results
+      zh_Hans: 返回查询数量
+    human_description:
+      en_US: The number of query results.
+      zh_Hans: 返回查询结果的数量。
+    form: form
+    default: 5
+    min: 1
+    max: 20
+  - name: result_type
+    type: select
+    required: true
+    label:
+      en_US: result type
+      zh_Hans: 结果类型
+      pt_BR: result type
+    human_description:
+      en_US: return a list of links or texts.
+      zh_Hans: 返回一个连接列表还是纯文本内容。
+      pt_BR: return a list of links or texts.
+    default: text
+    options:
+      - value: link
+        label:
+          en_US: Link
+          zh_Hans: 链接
+          pt_BR: Link
+      - value: text
+        label:
+          en_US: Text
+          zh_Hans: 文本
+          pt_BR: Text
+    form: form