ソースを参照

feat: jina reader (#3468)

Yeuoly 1 年間 前
コミット
5b3133f9fc

+ 1 - 0
api/core/tools/provider/_position.yaml

@@ -13,6 +13,7 @@
 - pubmed
 - stablediffusion
 - webscraper
+- jina
 - model.zhipuai
 - aippt
 - youtube

+ 4 - 0
api/core/tools/provider/builtin/jina/_assets/icon.svg

@@ -0,0 +1,4 @@
+<svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M6.56053 21.4486C9.07925 21.4486 11.1211 19.4068 11.1211 16.8882C11.1211 14.3696 9.07925 12.3279 6.56053 12.3279C4.04182 12.3279 2 14.3696 2 16.8882C2 19.4068 4.04182 21.4486 6.56053 21.4486Z" fill="#EB6161"/>
+<path d="M22.0002 3.59467L21.9406 12.3279C21.9406 17.3055 17.9464 21.3591 12.9685 21.4485L12.8789 12.3577L12.8791 3.62447C12.8791 3.02835 13.356 2.55145 13.9522 2.55145H20.9271C21.5233 2.55145 22.0002 2.99854 22.0002 3.59467Z" fill="#009191"/>
+</svg>

+ 12 - 0
api/core/tools/provider/builtin/jina/jina.py

@@ -0,0 +1,12 @@
+from typing import Any
+
+from core.tools.errors import ToolProviderCredentialValidationError
+from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController
+
+
+class GoogleProvider(BuiltinToolProviderController):
+    def _validate_credentials(self, credentials: dict[str, Any]) -> None:
+        try:
+            pass
+        except Exception as e:
+            raise ToolProviderCredentialValidationError(str(e))

+ 13 - 0
api/core/tools/provider/builtin/jina/jina.yaml

@@ -0,0 +1,13 @@
+identity:
+  author: Dify
+  name: jina
+  label:
+    en_US: JinaReader
+    zh_Hans: JinaReader
+    pt_BR: JinaReader
+  description:
+    en_US: Convert any URL to an LLM-friendly input. Experience improved output for your agent and RAG systems at no cost.
+    zh_Hans: 将任何 URL 转换为 LLM 友好的输入。无需付费即可体验为您的 Agent 和 RAG 系统提供的改进输出。
+    pt_BR: Converta qualquer URL em uma entrada amigável ao LLM. Experimente uma saída aprimorada para seus sistemas de agente e RAG sem custo.
+  icon: icon.svg
+credentials_for_provider:

+ 35 - 0
api/core/tools/provider/builtin/jina/tools/jina_reader.py

@@ -0,0 +1,35 @@
+from typing import Any, Union
+
+from yarl import URL
+
+from core.helper import ssrf_proxy
+from core.tools.entities.tool_entities import ToolInvokeMessage
+from core.tools.tool.builtin_tool import BuiltinTool
+
+
+class JinaReaderTool(BuiltinTool):
+    _jina_reader_endpoint = 'https://r.jina.ai/'
+
+    def _invoke(self, 
+                user_id: str,
+               tool_parameters: dict[str, Any], 
+        ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+        """
+            invoke tools
+        """
+        url = tool_parameters['url']
+
+        headers = {
+            'Accept': 'text/event-stream'
+        }
+
+        response = ssrf_proxy.get(
+            str(URL(self._jina_reader_endpoint + url)), 
+            headers=headers,
+            timeout=(10, 60)
+        )
+
+        if tool_parameters.get('summary', False):
+            return self.create_text_message(self.summary(user_id, response.text))
+        
+        return self.create_text_message(response.text)

+ 41 - 0
api/core/tools/provider/builtin/jina/tools/jina_reader.yaml

@@ -0,0 +1,41 @@
+identity:
+  name: jina_reader
+  author: Dify
+  label:
+    en_US: JinaReader
+    zh_Hans: JinaReader
+    pt_BR: JinaReader
+description:
+  human:
+    en_US: Convert any URL to an LLM-friendly input. Experience improved output for your agent and RAG systems at no cost.
+    zh_Hans: 将任何 URL 转换为 LLM 友好的输入。无需付费即可体验为您的 Agent 和 RAG 系统提供的改进输出。
+    pt_BR: Converta qualquer URL em uma entrada amigável ao LLM. Experimente uma saída aprimorada para seus sistemas de agente e RAG sem custo.
+  llm: A tool for scraping webpages. Input should be a URL.
+parameters:
+  - name: url
+    type: string
+    required: true
+    label:
+      en_US: URL
+      zh_Hans: 网页链接
+      pt_BR: URL
+    human_description:
+      en_US: used for linking to webpages
+      zh_Hans: 用于链接到网页
+      pt_BR: used for linking to webpages
+    llm_description: url for scraping
+    form: llm
+  - name: summary
+    type: boolean
+    required: false
+    default: false
+    label:
+      en_US: Enable summary
+      zh_Hans: 是否启用摘要
+      pt_BR: Habilitar resumo
+    human_description:
+      en_US: Enable summary for the output
+      zh_Hans: 为输出启用摘要
+      pt_BR: Habilitar resumo para a saída
+    llm_description: enable summary
+    form: form