Quellcode durchsuchen

feat: add retireval_top_n to config in env (#11132)

Cling_o3 vor 4 Monaten
Ursprung
Commit
f9c2aa7689

+ 3 - 1
api/.env.example

@@ -411,4 +411,6 @@ POSITION_PROVIDER_EXCLUDES=
 # Reset password token expiry minutes
 # Reset password token expiry minutes
 RESET_PASSWORD_TOKEN_EXPIRY_MINUTES=5
 RESET_PASSWORD_TOKEN_EXPIRY_MINUTES=5
 
 
-CREATE_TIDB_SERVICE_JOB_ENABLED=false
+CREATE_TIDB_SERVICE_JOB_ENABLED=false
+
+RETRIEVAL_TOP_N=0

+ 2 - 0
api/configs/feature/__init__.py

@@ -626,6 +626,8 @@ class DataSetConfig(BaseSettings):
         default=30,
         default=30,
     )
     )
 
 
+    RETRIEVAL_TOP_N: int = Field(description="number of retrieval top_n", default=0)
+
 
 
 class WorkspaceConfig(BaseSettings):
 class WorkspaceConfig(BaseSettings):
     """
     """

+ 14 - 3
api/core/rag/datasource/retrieval_service.py

@@ -3,6 +3,7 @@ from typing import Optional
 
 
 from flask import Flask, current_app
 from flask import Flask, current_app
 
 
+from configs import DifyConfig
 from core.rag.data_post_processor.data_post_processor import DataPostProcessor
 from core.rag.data_post_processor.data_post_processor import DataPostProcessor
 from core.rag.datasource.keyword.keyword_factory import Keyword
 from core.rag.datasource.keyword.keyword_factory import Keyword
 from core.rag.datasource.vdb.vector_factory import Vector
 from core.rag.datasource.vdb.vector_factory import Vector
@@ -110,8 +111,12 @@ class RetrievalService:
                 str(dataset.tenant_id), reranking_mode, reranking_model, weights, False
                 str(dataset.tenant_id), reranking_mode, reranking_model, weights, False
             )
             )
             all_documents = data_post_processor.invoke(
             all_documents = data_post_processor.invoke(
-                query=query, documents=all_documents, score_threshold=score_threshold, top_n=top_k
+                query=query,
+                documents=all_documents,
+                score_threshold=score_threshold,
+                top_n=DifyConfig.RETRIEVAL_TOP_N or top_k,
             )
             )
+
         return all_documents
         return all_documents
 
 
     @classmethod
     @classmethod
@@ -178,7 +183,10 @@ class RetrievalService:
                         )
                         )
                         all_documents.extend(
                         all_documents.extend(
                             data_post_processor.invoke(
                             data_post_processor.invoke(
-                                query=query, documents=documents, score_threshold=score_threshold, top_n=len(documents)
+                                query=query,
+                                documents=documents,
+                                score_threshold=score_threshold,
+                                top_n=DifyConfig.RETRIEVAL_TOP_N or len(documents),
                             )
                             )
                         )
                         )
                     else:
                     else:
@@ -220,7 +228,10 @@ class RetrievalService:
                         )
                         )
                         all_documents.extend(
                         all_documents.extend(
                             data_post_processor.invoke(
                             data_post_processor.invoke(
-                                query=query, documents=documents, score_threshold=score_threshold, top_n=len(documents)
+                                query=query,
+                                documents=documents,
+                                score_threshold=score_threshold,
+                                top_n=DifyConfig.RETRIEVAL_TOP_N or len(documents),
                             )
                             )
                         )
                         )
                     else:
                     else:

+ 1 - 0
docker/docker-compose.yaml

@@ -287,6 +287,7 @@ x-shared-env: &shared-api-worker-env
   OCEANBASE_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai}
   OCEANBASE_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai}
   OCEANBASE_MEMORY_LIMIT: ${OCEANBASE_MEMORY_LIMIT:-6G}
   OCEANBASE_MEMORY_LIMIT: ${OCEANBASE_MEMORY_LIMIT:-6G}
   CREATE_TIDB_SERVICE_JOB_ENABLED: ${CREATE_TIDB_SERVICE_JOB_ENABLED:-false}
   CREATE_TIDB_SERVICE_JOB_ENABLED: ${CREATE_TIDB_SERVICE_JOB_ENABLED:-false}
+  RETRIEVAL_TOP_N: ${RETRIEVAL_TOP_N:-0}
 
 
 services:
 services:
   # API service
   # API service