소스 검색

chore: refactor searXNG tool (#7220)

非法操作 11 달 전
부모
커밋
b3743a9ae5

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 2501 - 0
api/core/tools/provider/builtin/searxng/docker/settings.yml


+ 54 - 0
api/core/tools/provider/builtin/searxng/docker/uwsgi.ini

@@ -0,0 +1,54 @@
+[uwsgi]
+# Who will run the code
+uid = searxng
+gid = searxng
+
+# Number of workers (usually CPU count)
+# default value: %k (= number of CPU core, see Dockerfile)
+workers = %k
+
+# Number of threads per worker
+# default value: 4 (see Dockerfile)
+threads = 4
+
+# The right granted on the created socket
+chmod-socket = 666
+
+# Plugin to use and interpreter config
+single-interpreter = true
+master = true
+plugin = python3
+lazy-apps = true
+enable-threads = 4
+
+# Module to import
+module = searx.webapp
+
+# Virtualenv and python path
+pythonpath = /usr/local/searxng/
+chdir = /usr/local/searxng/searx/
+
+# automatically set processes name to something meaningful
+auto-procname = true
+
+# Disable request logging for privacy
+disable-logging = true
+log-5xx = true
+
+# Set the max size of a request (request-body excluded)
+buffer-size = 8192
+
+# No keep alive
+# See https://github.com/searx/searx-docker/issues/24
+add-header = Connection: close
+
+# Follow SIGTERM convention
+# See https://github.com/searxng/searxng/issues/3427
+die-on-term
+
+# uwsgi serves the static files
+static-map = /static=/usr/local/searxng/searx/static
+# expires set to one day
+static-expires = /* 86400
+static-gzip-all = True
+offload-threads = 4

+ 1 - 2
api/core/tools/provider/builtin/searxng/searxng.py

@@ -17,8 +17,7 @@ class SearXNGProvider(BuiltinToolProviderController):
                 tool_parameters={
                     "query": "SearXNG",
                     "limit": 1,
-                    "search_type": "page",
-                    "result_type": "link"
+                    "search_type": "general"
                 },
             )
         except Exception as e:

+ 1 - 4
api/core/tools/provider/builtin/searxng/searxng.yaml

@@ -6,7 +6,7 @@ identity:
     zh_Hans: SearXNG
   description:
     en_US: A free internet metasearch engine.
-    zh_Hans: 开源互联网元搜索引擎
+    zh_Hans: 开源免费的互联网元搜索引擎
   icon: icon.svg
   tags:
     - search
@@ -18,9 +18,6 @@ credentials_for_provider:
     label:
       en_US: SearXNG base URL
       zh_Hans: SearXNG base URL
-    help:
-      en_US: Please input your SearXNG base URL
-      zh_Hans: 请输入您的 SearXNG base URL
     placeholder:
       en_US: Please input your SearXNG base URL
       zh_Hans: 请输入您的 SearXNG base URL

+ 15 - 97
api/core/tools/provider/builtin/searxng/tools/searxng_search.py

@@ -1,4 +1,3 @@
-import json
 from typing import Any
 
 import requests
@@ -7,90 +6,11 @@ from core.tools.entities.tool_entities import ToolInvokeMessage
 from core.tools.tool.builtin_tool import BuiltinTool
 
 
-class SearXNGSearchResults(dict):
-    """Wrapper for search results."""
-
-    def __init__(self, data: str):
-        super().__init__(json.loads(data))
-        self.__dict__ = self
-
-    @property
-    def results(self) -> Any:
-        return self.get("results", [])
-
-
 class SearXNGSearchTool(BuiltinTool):
     """
     Tool for performing a search using SearXNG engine.
     """
 
-    SEARCH_TYPE: dict[str, str] = {
-        "page": "general",
-        "news": "news",
-        "image": "images",
-        # "video": "videos",
-        # "file": "files"
-    }
-    LINK_FILED: dict[str, str] = {
-        "page": "url",
-        "news": "url",
-        "image": "img_src",
-        # "video": "iframe_src",
-        # "file": "magnetlink"
-    }
-    TEXT_FILED: dict[str, str] = {
-        "page": "content",
-        "news": "content",
-        "image": "img_src",
-        # "video": "iframe_src",
-        # "file": "magnetlink"
-    }
-
-    def _invoke_query(self, user_id: str, host: str, query: str, search_type: str, result_type: str, topK: int = 5) -> list[dict]:
-        """Run query and return the results."""
-
-        search_type = search_type.lower()
-        if search_type not in self.SEARCH_TYPE.keys():
-            search_type= "page"
-
-        response = requests.get(host, params={
-            "q": query, 
-            "format": "json", 
-            "categories": self.SEARCH_TYPE[search_type]
-        })
-
-        if response.status_code != 200:
-            raise Exception(f'Error {response.status_code}: {response.text}')
-        
-        search_results = SearXNGSearchResults(response.text).results[:topK]
-
-        if result_type == 'link':
-            results = []
-            if search_type == "page" or search_type == "news":
-                for r in search_results:
-                    results.append(self.create_text_message(
-                        text=f'{r["title"]}: {r.get(self.LINK_FILED[search_type], "")}'
-                    ))
-            elif search_type == "image":
-                for r in search_results:
-                    results.append(self.create_image_message(
-                        image=r.get(self.LINK_FILED[search_type], "")
-                    ))
-            else:
-                for r in search_results:
-                    results.append(self.create_link_message(
-                        link=r.get(self.LINK_FILED[search_type], "")
-                    ))
-
-            return results
-        else:
-            text = ''
-            for i, r in enumerate(search_results):
-                text += f'{i+1}: {r["title"]} - {r.get(self.TEXT_FILED[search_type], "")}\n'
-
-            return self.create_text_message(text=self.summary(user_id=user_id, content=text))
-
-
     def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> ToolInvokeMessage | list[ToolInvokeMessage]:
         """
         Invoke the SearXNG search tool.
@@ -103,23 +23,21 @@ class SearXNGSearchTool(BuiltinTool):
             ToolInvokeMessage | list[ToolInvokeMessage]: The result of the tool invocation.
         """
 
-        host = self.runtime.credentials.get('searxng_base_url', None)
+        host = self.runtime.credentials.get('searxng_base_url')
         if not host:
             raise Exception('SearXNG api is required')
-                
-        query = tool_parameters.get('query')
-        if not query:
-            return self.create_text_message('Please input query')
-                
-        num_results = min(tool_parameters.get('num_results', 5), 20)
-        search_type = tool_parameters.get('search_type', 'page') or 'page'
-        result_type = tool_parameters.get('result_type', 'text') or 'text'
 
-        return self._invoke_query(
-            user_id=user_id, 
-            host=host, 
-            query=query, 
-            search_type=search_type, 
-            result_type=result_type, 
-            topK=num_results
-        )
+        response = requests.get(host, params={
+            "q": tool_parameters.get('query'),
+            "format": "json",
+            "categories": tool_parameters.get('search_type', 'general')
+        })
+
+        if response.status_code != 200:
+            raise Exception(f'Error {response.status_code}: {response.text}')
+
+        res = response.json().get("results", [])
+        if not res:
+            return self.create_text_message(f"No results found, get response: {response.content}")
+
+        return [self.create_json_message(item) for item in res]

+ 37 - 57
api/core/tools/provider/builtin/searxng/tools/searxng_search.yaml

@@ -1,13 +1,13 @@
 identity:
   name: searxng_search
-  author: Tice
+  author: Junytang
   label:
     en_US: SearXNG Search
     zh_Hans: SearXNG 搜索
 description:
   human:
-    en_US: Perform searches on SearXNG and get results.
-    zh_Hans: 在 SearXNG 上进行搜索并获取结果。
+    en_US: SearXNG is a free internet metasearch engine which aggregates results from more than 70 search services.
+    zh_Hans: SearXNG 是一个免费的互联网元搜索引擎,它从70多个不同的搜索服务中聚合搜索结果。
   llm: Perform searches on SearXNG and get results.
 parameters:
   - name: query
@@ -16,9 +16,6 @@ parameters:
     label:
       en_US: Query string
       zh_Hans: 查询语句
-    human_description:
-      en_US: The search query.
-      zh_Hans: 搜索查询语句。
     llm_description: Key words for searching
     form: llm
   - name: search_type
@@ -27,63 +24,46 @@ parameters:
     label:
       en_US: search type
       zh_Hans: 搜索类型
-      pt_BR: search type
-    human_description:
-      en_US: search type for page, news or image.
-      zh_Hans: 选择搜索的类型:网页,新闻,图片。
-      pt_BR: search type for page, news or image.
-    default: Page
+    default: general
     options:
-      - value: Page
+      - value: general
         label:
-          en_US: Page
-          zh_Hans: 网页
-          pt_BR: Page
-      - value: News
+          en_US: General
+          zh_Hans: 综合
+      - value: images
+        label:
+          en_US: Images
+          zh_Hans: 图片
+      - value: videos
+        label:
+          en_US: Videos
+          zh_Hans: 视频
+      - value: news
         label:
           en_US: News
           zh_Hans: 新闻
-          pt_BR: News
-      - value: Image
+      - value: map
         label:
-          en_US: Image
-          zh_Hans: 图片
-          pt_BR: Image
-    form: form
-  - name: num_results
-    type: number
-    required: true
-    label:
-      en_US: Number of query results
-      zh_Hans: 返回查询数量
-    human_description:
-      en_US: The number of query results.
-      zh_Hans: 返回查询结果的数量。
-    form: form
-    default: 5
-    min: 1
-    max: 20
-  - name: result_type
-    type: select
-    required: true
-    label:
-      en_US: result type
-      zh_Hans: 结果类型
-      pt_BR: result type
-    human_description:
-      en_US: return a list of links or texts.
-      zh_Hans: 返回一个连接列表还是纯文本内容。
-      pt_BR: return a list of links or texts.
-    default: text
-    options:
-      - value: link
+          en_US: Map
+          zh_Hans: 地图
+      - value: music
+        label:
+          en_US: Music
+          zh_Hans: 音乐
+      - value: it
+        label:
+          en_US: It
+          zh_Hans: 信息技术
+      - value: science
+        label:
+          en_US: Science
+          zh_Hans: 科学
+      - value: files
         label:
-          en_US: Link
-          zh_Hans: 链接
-          pt_BR: Link
-      - value: text
+          en_US: Files
+          zh_Hans: 文件
+      - value: social_media
         label:
-          en_US: Text
-          zh_Hans: 文本
-          pt_BR: Text
+          en_US: Social Media
+          zh_Hans: 社交媒体
     form: form