Selaa lähdekoodia

fix: Fix parent child retrieval issues (#12206)

Co-authored-by: NFish <douxc512@gmail.com>
Co-authored-by: nite-knite <nkCoding@gmail.com>
Wu Tianwei 6 kuukautta sitten
vanhempi
commit
09d759d196
34 muutettua tiedostoa jossa 446 lisäystä ja 387 poistoa
  1. 34 1
      web/app/(commonLayout)/datasets/template/template.en.mdx
  2. 36 3
      web/app/(commonLayout)/datasets/template/template.zh.mdx
  3. 49 70
      web/app/components/app/configuration/dataset-config/params-config/config-content.tsx
  4. 19 18
      web/app/components/app/configuration/dataset-config/params-config/index.tsx
  5. 5 15
      web/app/components/app/configuration/dataset-config/settings-modal/index.tsx
  6. 3 3
      web/app/components/app/configuration/index.tsx
  7. 8 7
      web/app/components/datasets/common/check-rerank-model.ts
  8. 4 1
      web/app/components/datasets/common/economical-retrieval-method-config/index.tsx
  9. 71 45
      web/app/components/datasets/common/retrieval-method-config/index.tsx
  10. 43 59
      web/app/components/datasets/common/retrieval-param-config/index.tsx
  11. 3 0
      web/app/components/datasets/create/embedding-process/index.tsx
  12. 46 58
      web/app/components/datasets/create/step-two/index.tsx
  13. 2 2
      web/app/components/datasets/create/step-two/option-card.tsx
  14. 16 8
      web/app/components/datasets/documents/detail/completed/index.tsx
  15. 1 1
      web/app/components/datasets/documents/detail/completed/segment-list.tsx
  16. 14 4
      web/app/components/datasets/documents/detail/index.tsx
  17. 16 2
      web/app/components/datasets/documents/index.tsx
  18. 17 21
      web/app/components/datasets/documents/list.tsx
  19. 1 1
      web/app/components/datasets/hit-testing/components/child-chunks-item.tsx
  20. 1 1
      web/app/components/datasets/hit-testing/components/chunk-detail-modal.tsx
  21. 2 7
      web/app/components/datasets/hit-testing/components/result-item.tsx
  22. 4 4
      web/app/components/datasets/hit-testing/components/score.tsx
  23. 1 1
      web/app/components/datasets/hit-testing/index.tsx
  24. 2 14
      web/app/components/datasets/hit-testing/modify-retrieval-modal.tsx
  25. 7 19
      web/app/components/datasets/settings/form/index.tsx
  26. 1 0
      web/app/components/header/account-setting/model-provider-page/model-selector/model-trigger.tsx
  27. 5 4
      web/app/components/workflow/nodes/knowledge-retrieval/components/retrieval-config.tsx
  28. 1 1
      web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts
  29. 23 10
      web/app/components/workflow/nodes/knowledge-retrieval/utils.ts
  30. 1 1
      web/i18n/en-US/app-debug.ts
  31. 2 2
      web/i18n/en-US/workflow.ts
  32. 1 1
      web/i18n/zh-Hans/app-debug.ts
  33. 1 1
      web/i18n/zh-Hans/workflow.ts
  34. 6 2
      web/service/knowledge/use-document.ts

+ 34 - 1
web/app/(commonLayout)/datasets/template/template.en.mdx

@@ -52,6 +52,15 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
           - <code>high_quality</code> High quality: embedding using embedding model, built as vector database index
           - <code>economy</code> Economy: Build using inverted index of keyword table index
       </Property>
+      <Property name='doc_form' type='string' key='doc_form'>
+        Format of indexed content
+          - <code>text_model</code> Text documents are directly embedded; `economy` mode defaults to using this form
+          - <code>hierarchical_model</code> Parent-child mode
+          - <code>qa_model</code> Q&A Mode: Generates Q&A pairs for segmented documents and then embeds the questions
+      </Property>
+      <Property name='doc_language' type='string' key='doc_language'>
+        In Q&A mode, specify the language of the document, for example: <code>English</code>, <code>Chinese</code>
+      </Property>
       <Property name='process_rule' type='object' key='process_rule'>
         Processing rules
           - <code>mode</code> (string) Cleaning, segmentation mode, automatic / custom
@@ -65,6 +74,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
             - <code>segmentation</code> (object) Segmentation rules
               - <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
               - <code>max_tokens</code> Maximum length (token) defaults to 1000
+            - <code>parent_mode</code> Retrieval mode of parent chunks: <code>full-doc</code> full text retrieval / <code>paragraph</code> paragraph retrieval
+            - <code>subchunk_segmentation</code> (object) Child chunk rules
+              - <code>separator</code> Segmentation identifier. Currently, only one delimiter is allowed. The default is <code>***</code>
+              - <code>max_tokens</code> The maximum length (tokens) must be validated to be shorter than the length of the parent chunk
       </Property>
     </Properties>
   </Col>
@@ -155,6 +168,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
           - <code>high_quality</code> High quality: embedding using embedding model, built as vector database index
           - <code>economy</code> Economy: Build using inverted index of keyword table index
 
+        - <code>doc_form</code> Format of indexed content
+          - <code>text_model</code> Text documents are directly embedded; `economy` mode defaults to using this form
+          - <code>hierarchical_model</code> Parent-child mode
+          - <code>qa_model</code> Q&A Mode: Generates Q&A pairs for segmented documents and then embeds the questions
+
+        - <code>doc_language</code> In Q&A mode, specify the language of the document, for example: <code>English</code>, <code>Chinese</code>
+
         - <code>process_rule</code> Processing rules
           - <code>mode</code> (string) Cleaning, segmentation mode, automatic / custom
           - <code>rules</code> (object) Custom rules (in automatic mode, this field is empty)
@@ -167,6 +187,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
             - <code>segmentation</code> (object) Segmentation rules
               - <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
               - <code>max_tokens</code> Maximum length (token) defaults to 1000
+            - <code>parent_mode</code> Retrieval mode of parent chunks: <code>full-doc</code> full text retrieval / <code>paragraph</code> paragraph retrieval
+            - <code>subchunk_segmentation</code> (object) Child chunk rules
+              - <code>separator</code> Segmentation identifier. Currently, only one delimiter is allowed. The default is <code>***</code>
+              - <code>max_tokens</code> The maximum length (tokens) must be validated to be shorter than the length of the parent chunk
       </Property>
       <Property name='file' type='multipart/form-data' key='file'>
         Files that need to be uploaded.
@@ -449,6 +473,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
             - <code>segmentation</code> (object) Segmentation rules
               - <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
               - <code>max_tokens</code> Maximum length (token) defaults to 1000
+            - <code>parent_mode</code> Retrieval mode of parent chunks: <code>full-doc</code> full text retrieval / <code>paragraph</code> paragraph retrieval
+            - <code>subchunk_segmentation</code> (object) Child chunk rules
+              - <code>separator</code> Segmentation identifier. Currently, only one delimiter is allowed. The default is <code>***</code>
+              - <code>max_tokens</code> The maximum length (tokens) must be validated to be shorter than the length of the parent chunk
       </Property>
     </Properties>
   </Col>
@@ -546,6 +574,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
             - <code>segmentation</code> (object) Segmentation rules
               - <code>separator</code> Custom segment identifier, currently only allows one delimiter to be set. Default is \n
               - <code>max_tokens</code> Maximum length (token) defaults to 1000
+            - <code>parent_mode</code> Retrieval mode of parent chunks: <code>full-doc</code> full text retrieval / <code>paragraph</code> paragraph retrieval
+            - <code>subchunk_segmentation</code> (object) Child chunk rules
+              - <code>separator</code> Segmentation identifier. Currently, only one delimiter is allowed. The default is <code>***</code>
+              - <code>max_tokens</code> The maximum length (tokens) must be validated to be shorter than the length of the parent chunk
       </Property>
     </Properties>
   </Col>
@@ -984,7 +1016,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
 <Heading
   url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
   method='POST'
-  title='Update a Chunk in a Document '
+  title='Update a Chunk in a Document'
   name='#update_segment'
 />
 <Row>
@@ -1009,6 +1041,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
         - <code>answer</code> (text) Answer content, passed if the knowledge is in Q&A mode (optional)
         - <code>keywords</code> (list) Keyword (optional)
         - <code>enabled</code> (bool) False / true (optional)
+        - <code>regenerate_child_chunks</code> (bool) Whether to regenerate child chunks (optional)
       </Property>
     </Properties>
   </Col>

+ 36 - 3
web/app/(commonLayout)/datasets/template/template.zh.mdx

@@ -52,6 +52,15 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
           - <code>high_quality</code> 高质量:使用  embedding 模型进行嵌入,构建为向量数据库索引
           - <code>economy</code> 经济:使用 keyword table index 的倒排索引进行构建
       </Property>
+      <Property name='doc_form' type='string' key='doc_form'>
+        索引内容的形式
+          - <code>text_model</code> text 文档直接 embedding,经济模式默认为该模式
+          - <code>hierarchical_model</code> parent-child 模式
+          - <code>qa_model</code> Q&A 模式:为分片文档生成 Q&A 对,然后对问题进行 embedding
+      </Property>
+      <Property name='doc_language' type='string' key='doc_language'>
+        在 Q&A 模式下,指定文档的语言,例如:<code>English</code>、<code>Chinese</code>
+      </Property>
       <Property name='process_rule' type='object' key='process_rule'>
         处理规则
           - <code>mode</code> (string) 清洗、分段模式 ,automatic 自动 / custom 自定义
@@ -63,8 +72,12 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
                   - <code>remove_urls_emails</code> 删除 URL、电子邮件地址
               - <code>enabled</code> (bool) 是否选中该规则,不传入文档 ID 时代表默认值
             - <code>segmentation</code> (object) 分段规则
-              - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
+              - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 <code>\n</code>
               - <code>max_tokens</code> 最大长度(token)默认为 1000
+            - <code>parent_mode</code> 父分段的召回模式 <code>full-doc</code> 全文召回 / <code>paragraph</code> 段落召回
+            - <code>subchunk_segmentation</code> (object) 子分段规则
+              - <code>separator</code> 分段标识符,目前仅允许设置一个分隔符。默认为 <code>***</code>
+              - <code>max_tokens</code> 最大长度 (token) 需要校验小于父级的长度
       </Property>
     </Properties>
   </Col>
@@ -155,6 +168,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
           - <code>high_quality</code> 高质量:使用  embedding 模型进行嵌入,构建为向量数据库索引
           - <code>economy</code> 经济:使用 keyword table index 的倒排索引进行构建
 
+        - <code>doc_form</code> 索引内容的形式
+          - <code>text_model</code> text 文档直接 embedding,经济模式默认为该模式
+          - <code>hierarchical_model</code> parent-child 模式
+          - <code>qa_model</code> Q&A 模式:为分片文档生成 Q&A 对,然后对问题进行 embedding
+
+        - <code>doc_language</code> 在 Q&A 模式下,指定文档的语言,例如:<code>English</code>、<code>Chinese</code>
+
         - <code>process_rule</code> 处理规则
           - <code>mode</code> (string) 清洗、分段模式 ,automatic 自动 / custom 自定义
           - <code>rules</code> (object) 自定义规则(自动模式下,该字段为空)
@@ -167,6 +187,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
             - <code>segmentation</code> (object) 分段规则
               - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
               - <code>max_tokens</code> 最大长度(token)默认为 1000
+            - <code>parent_mode</code> 父分段的召回模式 <code>full-doc</code> 全文召回 / <code>paragraph</code> 段落召回
+            - <code>subchunk_segmentation</code> (object) 子分段规则
+              - <code>separator</code> 分段标识符,目前仅允许设置一个分隔符。默认为 <code>***</code>
+              - <code>max_tokens</code> 最大长度 (token) 需要校验小于父级的长度
       </Property>
       <Property name='file' type='multipart/form-data' key='file'>
         需要上传的文件。
@@ -411,7 +435,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
 <Heading
   url='/datasets/{dataset_id}/documents/{document_id}/update-by-text'
   method='POST'
-  title='通过文本更新文档 '
+  title='通过文本更新文档'
   name='#update-by-text'
 />
 <Row>
@@ -449,6 +473,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
             - <code>segmentation</code> (object) 分段规则
               - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
               - <code>max_tokens</code> 最大长度(token)默认为 1000
+            - <code>parent_mode</code> 父分段的召回模式 <code>full-doc</code> 全文召回 / <code>paragraph</code> 段落召回
+            - <code>subchunk_segmentation</code> (object) 子分段规则
+              - <code>separator</code> 分段标识符,目前仅允许设置一个分隔符。默认为 <code>***</code>
+              - <code>max_tokens</code> 最大长度 (token) 需要校验小于父级的长度
       </Property>
     </Properties>
   </Col>
@@ -508,7 +536,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
 <Heading
   url='/datasets/{dataset_id}/documents/{document_id}/update-by-file'
   method='POST'
-  title='通过文件更新文档  '
+  title='通过文件更新文档'
   name='#update-by-file'
 />
 <Row>
@@ -546,6 +574,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
             - <code>segmentation</code> (object) 分段规则
               - <code>separator</code> 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
               - <code>max_tokens</code> 最大长度(token)默认为 1000
+            - <code>parent_mode</code> 父分段的召回模式 <code>full-doc</code> 全文召回 / <code>paragraph</code> 段落召回
+            - <code>subchunk_segmentation</code> (object) 子分段规则
+              - <code>separator</code> 分段标识符,目前仅允许设置一个分隔符。默认为 <code>***</code>
+              - <code>max_tokens</code> 最大长度 (token) 需要校验小于父级的长度
       </Property>
     </Properties>
   </Col>
@@ -1009,6 +1041,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
         - <code>answer</code> (text) 答案内容,非必填,如果知识库的模式为 Q&A 模式则传值
         - <code>keywords</code> (list) 关键字,非必填
         - <code>enabled</code> (bool) false/true,非必填
+        - <code>regenerate_child_chunks</code> (bool) 是否重新生成子分段,非必填
       </Property>
     </Properties>
   </Col>

+ 49 - 70
web/app/components/app/configuration/dataset-config/params-config/config-content.tsx

@@ -59,36 +59,24 @@ const ConfigContent: FC<Props> = ({
 
   const {
     modelList: rerankModelList,
-    defaultModel: rerankDefaultModel,
-    currentModel: isRerankDefaultModelValid,
   } = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank)
 
   const {
     currentModel: currentRerankModel,
   } = useCurrentProviderAndModel(
     rerankModelList,
-    rerankDefaultModel
-      ? {
-        ...rerankDefaultModel,
-        provider: rerankDefaultModel.provider.provider,
-      }
-      : undefined,
+    {
+      provider: datasetConfigs.reranking_model?.reranking_provider_name,
+      model: datasetConfigs.reranking_model?.reranking_model_name,
+    },
   )
 
-  const rerankModel = (() => {
-    if (datasetConfigs.reranking_model?.reranking_provider_name) {
-      return {
-        provider_name: datasetConfigs.reranking_model.reranking_provider_name,
-        model_name: datasetConfigs.reranking_model.reranking_model_name,
-      }
+  const rerankModel = useMemo(() => {
+    return {
+      provider_name: datasetConfigs?.reranking_model?.reranking_provider_name ?? '',
+      model_name: datasetConfigs?.reranking_model?.reranking_model_name ?? '',
     }
-    else if (rerankDefaultModel) {
-      return {
-        provider_name: rerankDefaultModel.provider.provider,
-        model_name: rerankDefaultModel.model,
-      }
-    }
-  })()
+  }, [datasetConfigs.reranking_model])
 
   const handleParamChange = (key: string, value: number) => {
     if (key === 'top_k') {
@@ -133,6 +121,12 @@ const ConfigContent: FC<Props> = ({
   }
 
   const handleRerankModeChange = (mode: RerankingModeEnum) => {
+    if (mode === datasetConfigs.reranking_mode)
+      return
+
+    if (mode === RerankingModeEnum.RerankingModel && !currentRerankModel)
+      Toast.notify({ type: 'error', message: t('workflow.errorMsg.rerankModelRequired') })
+
     onChange({
       ...datasetConfigs,
       reranking_mode: mode,
@@ -162,31 +156,25 @@ const ConfigContent: FC<Props> = ({
 
   const canManuallyToggleRerank = useMemo(() => {
     return (selectedDatasetsMode.allInternal && selectedDatasetsMode.allEconomic)
-      || selectedDatasetsMode.allExternal
+    || selectedDatasetsMode.allExternal
   }, [selectedDatasetsMode.allEconomic, selectedDatasetsMode.allExternal, selectedDatasetsMode.allInternal])
 
   const showRerankModel = useMemo(() => {
     if (!canManuallyToggleRerank)
       return true
-    else if (canManuallyToggleRerank && !isRerankDefaultModelValid)
-      return false
 
     return datasetConfigs.reranking_enable
-  }, [canManuallyToggleRerank, datasetConfigs.reranking_enable, isRerankDefaultModelValid])
+  }, [datasetConfigs.reranking_enable, canManuallyToggleRerank])
 
-  const handleDisabledSwitchClick = useCallback(() => {
-    if (!currentRerankModel && !showRerankModel)
+  const handleDisabledSwitchClick = useCallback((enable: boolean) => {
+    if (!currentRerankModel && enable)
       Toast.notify({ type: 'error', message: t('workflow.errorMsg.rerankModelRequired') })
-  }, [currentRerankModel, showRerankModel, t])
-
-  useEffect(() => {
-    if (canManuallyToggleRerank && showRerankModel !== datasetConfigs.reranking_enable) {
-      onChange({
-        ...datasetConfigs,
-        reranking_enable: showRerankModel,
-      })
-    }
-  }, [canManuallyToggleRerank, showRerankModel, datasetConfigs, onChange])
+    onChange({
+      ...datasetConfigs,
+      reranking_enable: enable,
+    })
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [currentRerankModel, datasetConfigs, onChange])
 
   return (
     <div>
@@ -267,24 +255,12 @@ const ConfigContent: FC<Props> = ({
                 <div className='flex items-center'>
                   {
                     selectedDatasetsMode.allEconomic && !selectedDatasetsMode.mixtureInternalAndExternal && (
-                      <div
-                        className='flex items-center'
-                        onClick={handleDisabledSwitchClick}
-                      >
-                        <Switch
-                          size='md'
-                          defaultValue={showRerankModel}
-                          disabled={!currentRerankModel || !canManuallyToggleRerank}
-                          onChange={(v) => {
-                            if (canManuallyToggleRerank) {
-                              onChange({
-                                ...datasetConfigs,
-                                reranking_enable: v,
-                              })
-                            }
-                          }}
-                        />
-                      </div>
+                      <Switch
+                        size='md'
+                        defaultValue={showRerankModel}
+                        disabled={!canManuallyToggleRerank}
+                        onChange={handleDisabledSwitchClick}
+                      />
                     )
                   }
                   <div className='leading-[32px] ml-1 text-text-secondary system-sm-semibold'>{t('common.modelProvider.rerankModel.key')}</div>
@@ -298,21 +274,24 @@ const ConfigContent: FC<Props> = ({
                     triggerClassName='ml-1 w-4 h-4'
                   />
                 </div>
-                <div>
-                  <ModelSelector
-                    defaultModel={rerankModel && { provider: rerankModel?.provider_name, model: rerankModel?.model_name }}
-                    onSelect={(v) => {
-                      onChange({
-                        ...datasetConfigs,
-                        reranking_model: {
-                          reranking_provider_name: v.provider,
-                          reranking_model_name: v.model,
-                        },
-                      })
-                    }}
-                    modelList={rerankModelList}
-                  />
-                </div>
+                {
+                  showRerankModel && (
+                    <div>
+                      <ModelSelector
+                        defaultModel={rerankModel && { provider: rerankModel?.provider_name, model: rerankModel?.model_name }}
+                        onSelect={(v) => {
+                          onChange({
+                            ...datasetConfigs,
+                            reranking_model: {
+                              reranking_provider_name: v.provider,
+                              reranking_model_name: v.model,
+                            },
+                          })
+                        }}
+                        modelList={rerankModelList}
+                      />
+                    </div>
+                  )}
               </div>
             )
           }

+ 19 - 18
web/app/components/app/configuration/dataset-config/params-config/index.tsx

@@ -10,7 +10,7 @@ import Modal from '@/app/components/base/modal'
 import Button from '@/app/components/base/button'
 import { RETRIEVE_TYPE } from '@/types/app'
 import Toast from '@/app/components/base/toast'
-import { useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
+import { useCurrentProviderAndModel, useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
 import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
 import { RerankingModeEnum } from '@/models/datasets'
 import type { DataSet } from '@/models/datasets'
@@ -41,17 +41,27 @@ const ParamsConfig = ({
   }, [datasetConfigs])
 
   const {
-    defaultModel: rerankDefaultModel,
-    currentModel: isRerankDefaultModelValid,
+    modelList: rerankModelList,
+    currentModel: rerankDefaultModel,
     currentProvider: rerankDefaultProvider,
   } = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank)
 
+  const {
+    currentModel: isCurrentRerankModelValid,
+  } = useCurrentProviderAndModel(
+    rerankModelList,
+    {
+      provider: tempDataSetConfigs.reranking_model?.reranking_provider_name ?? '',
+      model: tempDataSetConfigs.reranking_model?.reranking_model_name ?? '',
+    },
+  )
+
   const isValid = () => {
     let errMsg = ''
     if (tempDataSetConfigs.retrieval_model === RETRIEVE_TYPE.multiWay) {
       if (tempDataSetConfigs.reranking_enable
         && tempDataSetConfigs.reranking_mode === RerankingModeEnum.RerankingModel
-        && !isRerankDefaultModelValid
+        && !isCurrentRerankModelValid
       )
         errMsg = t('appDebug.datasetConfig.rerankModelRequired')
     }
@@ -66,16 +76,7 @@ const ParamsConfig = ({
   const handleSave = () => {
     if (!isValid())
       return
-    const config = { ...tempDataSetConfigs }
-    if (config.retrieval_model === RETRIEVE_TYPE.multiWay
-      && config.reranking_mode === RerankingModeEnum.RerankingModel
-      && !config.reranking_model) {
-      config.reranking_model = {
-        reranking_provider_name: rerankDefaultModel?.provider?.provider,
-        reranking_model_name: rerankDefaultModel?.model,
-      } as any
-    }
-    setDatasetConfigs(config)
+    setDatasetConfigs(tempDataSetConfigs)
     setRerankSettingModalOpen(false)
   }
 
@@ -94,14 +95,14 @@ const ParamsConfig = ({
       reranking_enable: restConfigs.reranking_enable,
     }, selectedDatasets, selectedDatasets, {
       provider: rerankDefaultProvider?.provider,
-      model: isRerankDefaultModelValid?.model,
+      model: rerankDefaultModel?.model,
     })
 
     setTempDataSetConfigs({
       ...retrievalConfig,
-      reranking_model: restConfigs.reranking_model && {
-        reranking_provider_name: restConfigs.reranking_model.reranking_provider_name,
-        reranking_model_name: restConfigs.reranking_model.reranking_model_name,
+      reranking_model: {
+        reranking_provider_name: retrievalConfig.reranking_model?.provider || '',
+        reranking_model_name: retrievalConfig.reranking_model?.model || '',
       },
       retrieval_model,
       score_threshold_enabled,

+ 5 - 15
web/app/components/app/configuration/dataset-config/settings-modal/index.tsx

@@ -12,7 +12,7 @@ import Divider from '@/app/components/base/divider'
 import Button from '@/app/components/base/button'
 import Input from '@/app/components/base/input'
 import Textarea from '@/app/components/base/textarea'
-import { type DataSet, RerankingModeEnum } from '@/models/datasets'
+import { type DataSet } from '@/models/datasets'
 import { useToastContext } from '@/app/components/base/toast'
 import { updateDatasetSetting } from '@/service/datasets'
 import { useAppContext } from '@/context/app-context'
@@ -21,7 +21,7 @@ import type { RetrievalConfig } from '@/types/app'
 import RetrievalSettings from '@/app/components/datasets/external-knowledge-base/create/RetrievalSettings'
 import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config'
 import EconomicalRetrievalMethodConfig from '@/app/components/datasets/common/economical-retrieval-method-config'
-import { ensureRerankModelSelected, isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
+import { isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
 import { AlertTriangle } from '@/app/components/base/icons/src/vender/solid/alertsAndFeedback'
 import PermissionSelector from '@/app/components/datasets/settings/permission-selector'
 import ModelSelector from '@/app/components/header/account-setting/model-provider-page/model-selector'
@@ -99,8 +99,6 @@ const SettingsModal: FC<SettingsModalProps> = ({
     }
     if (
       !isReRankModelSelected({
-        rerankDefaultModel,
-        isRerankDefaultModelValid: !!isRerankDefaultModelValid,
         rerankModelList,
         retrievalConfig,
         indexMethod,
@@ -109,14 +107,6 @@ const SettingsModal: FC<SettingsModalProps> = ({
       notify({ type: 'error', message: t('appDebug.datasetConfig.rerankModelRequired') })
       return
     }
-    const postRetrievalConfig = ensureRerankModelSelected({
-      rerankDefaultModel: rerankDefaultModel!,
-      retrievalConfig: {
-        ...retrievalConfig,
-        reranking_enable: retrievalConfig.reranking_mode === RerankingModeEnum.RerankingModel,
-      },
-      indexMethod,
-    })
     try {
       setLoading(true)
       const { id, name, description, permission } = localeCurrentDataset
@@ -128,8 +118,8 @@ const SettingsModal: FC<SettingsModalProps> = ({
           permission,
           indexing_technique: indexMethod,
           retrieval_model: {
-            ...postRetrievalConfig,
-            score_threshold: postRetrievalConfig.score_threshold_enabled ? postRetrievalConfig.score_threshold : 0,
+            ...retrievalConfig,
+            score_threshold: retrievalConfig.score_threshold_enabled ? retrievalConfig.score_threshold : 0,
           },
           embedding_model: localeCurrentDataset.embedding_model,
           embedding_model_provider: localeCurrentDataset.embedding_model_provider,
@@ -157,7 +147,7 @@ const SettingsModal: FC<SettingsModalProps> = ({
       onSave({
         ...localeCurrentDataset,
         indexing_technique: indexMethod,
-        retrieval_model_dict: postRetrievalConfig,
+        retrieval_model_dict: retrievalConfig,
       })
     }
     catch (e) {

+ 3 - 3
web/app/components/app/configuration/index.tsx

@@ -287,9 +287,9 @@ const Configuration: FC = () => {
 
     setDatasetConfigs({
       ...retrievalConfig,
-      reranking_model: restConfigs.reranking_model && {
-        reranking_provider_name: restConfigs.reranking_model.reranking_provider_name,
-        reranking_model_name: restConfigs.reranking_model.reranking_model_name,
+      reranking_model: {
+        reranking_provider_name: retrievalConfig?.reranking_model?.provider || '',
+        reranking_model_name: retrievalConfig?.reranking_model?.model || '',
       },
       retrieval_model,
       score_threshold_enabled,

+ 8 - 7
web/app/components/datasets/common/check-rerank-model.ts

@@ -6,14 +6,10 @@ import type {
 import { RerankingModeEnum } from '@/models/datasets'
 
 export const isReRankModelSelected = ({
-  rerankDefaultModel,
-  isRerankDefaultModelValid,
   retrievalConfig,
   rerankModelList,
   indexMethod,
 }: {
-  rerankDefaultModel?: DefaultModelResponse
-  isRerankDefaultModelValid: boolean
   retrievalConfig: RetrievalConfig
   rerankModelList: Model[]
   indexMethod?: string
@@ -25,14 +21,19 @@ export const isReRankModelSelected = ({
       return provider?.models.find(({ model }) => model === retrievalConfig.reranking_model?.reranking_model_name)
     }
 
-    if (isRerankDefaultModelValid)
-      return !!rerankDefaultModel
-
     return false
   })()
 
   if (
     indexMethod === 'high_quality'
+    && ([RETRIEVE_METHOD.semantic, RETRIEVE_METHOD.fullText].includes(retrievalConfig.search_method))
+    && retrievalConfig.reranking_enable
+    && !rerankModelSelected
+  )
+    return false
+
+  if (
+    indexMethod === 'high_quality'
     && (retrievalConfig.search_method === RETRIEVE_METHOD.hybrid && retrievalConfig.reranking_mode !== RerankingModeEnum.WeightedScore)
     && !rerankModelSelected
   )

+ 4 - 1
web/app/components/datasets/common/economical-retrieval-method-config/index.tsx

@@ -10,11 +10,13 @@ import { RETRIEVE_METHOD } from '@/types/app'
 import type { RetrievalConfig } from '@/types/app'
 
 type Props = {
+  disabled?: boolean
   value: RetrievalConfig
   onChange: (value: RetrievalConfig) => void
 }
 
 const EconomicalRetrievalMethodConfig: FC<Props> = ({
+  disabled = false,
   value,
   onChange,
 }) => {
@@ -22,7 +24,8 @@ const EconomicalRetrievalMethodConfig: FC<Props> = ({
 
   return (
     <div className='space-y-2'>
-      <OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.vector} alt='' />}
+      <OptionCard
+        disabled={disabled} icon={<Image className='w-4 h-4' src={retrievalIcon.vector} alt='' />}
         title={t('dataset.retrieval.invertedIndex.title')}
         description={t('dataset.retrieval.invertedIndex.description')} isActive
         activeHeaderClassName='bg-dataset-option-card-purple-gradient'

+ 71 - 45
web/app/components/datasets/common/retrieval-method-config/index.tsx

@@ -1,6 +1,6 @@
 'use client'
 import type { FC } from 'react'
-import React from 'react'
+import React, { useCallback } from 'react'
 import { useTranslation } from 'react-i18next'
 import Image from 'next/image'
 import RetrievalParamConfig from '../retrieval-param-config'
@@ -10,7 +10,7 @@ import { retrievalIcon } from '../../create/icons'
 import type { RetrievalConfig } from '@/types/app'
 import { RETRIEVE_METHOD } from '@/types/app'
 import { useProviderContext } from '@/context/provider-context'
-import { useDefaultModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
+import { useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
 import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
 import {
   DEFAULT_WEIGHTED_SCORE,
@@ -20,54 +20,87 @@ import {
 import Badge from '@/app/components/base/badge'
 
 type Props = {
+  disabled?: boolean
   value: RetrievalConfig
   onChange: (value: RetrievalConfig) => void
 }
 
 const RetrievalMethodConfig: FC<Props> = ({
-  value: passValue,
+  disabled = false,
+  value,
   onChange,
 }) => {
   const { t } = useTranslation()
   const { supportRetrievalMethods } = useProviderContext()
-  const { data: rerankDefaultModel } = useDefaultModel(ModelTypeEnum.rerank)
-  const value = (() => {
-    if (!passValue.reranking_model.reranking_model_name) {
-      return {
-        ...passValue,
-        reranking_model: {
-          reranking_provider_name: rerankDefaultModel?.provider.provider || '',
-          reranking_model_name: rerankDefaultModel?.model || '',
-        },
-        reranking_mode: passValue.reranking_mode || (rerankDefaultModel ? RerankingModeEnum.RerankingModel : RerankingModeEnum.WeightedScore),
-        weights: passValue.weights || {
-          weight_type: WeightedScoreEnum.Customized,
-          vector_setting: {
-            vector_weight: DEFAULT_WEIGHTED_SCORE.other.semantic,
-            embedding_provider_name: '',
-            embedding_model_name: '',
-          },
-          keyword_setting: {
-            keyword_weight: DEFAULT_WEIGHTED_SCORE.other.keyword,
-          },
-        },
-      }
+  const {
+    defaultModel: rerankDefaultModel,
+    currentModel: isRerankDefaultModelValid,
+  } = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank)
+
+  const onSwitch = useCallback((retrieveMethod: RETRIEVE_METHOD) => {
+    if ([RETRIEVE_METHOD.semantic, RETRIEVE_METHOD.fullText].includes(retrieveMethod)) {
+      onChange({
+        ...value,
+        search_method: retrieveMethod,
+        ...(!value.reranking_model.reranking_model_name
+          ? {
+            reranking_model: {
+              reranking_provider_name: isRerankDefaultModelValid ? rerankDefaultModel?.provider?.provider ?? '' : '',
+              reranking_model_name: isRerankDefaultModelValid ? rerankDefaultModel?.model ?? '' : '',
+            },
+            reranking_enable: !!isRerankDefaultModelValid,
+          }
+          : {
+            reranking_enable: true,
+          }),
+      })
     }
-    return passValue
-  })()
+    if (retrieveMethod === RETRIEVE_METHOD.hybrid) {
+      onChange({
+        ...value,
+        search_method: retrieveMethod,
+        ...(!value.reranking_model.reranking_model_name
+          ? {
+            reranking_model: {
+              reranking_provider_name: isRerankDefaultModelValid ? rerankDefaultModel?.provider?.provider ?? '' : '',
+              reranking_model_name: isRerankDefaultModelValid ? rerankDefaultModel?.model ?? '' : '',
+            },
+            reranking_enable: !!isRerankDefaultModelValid,
+            reranking_mode: isRerankDefaultModelValid ? RerankingModeEnum.RerankingModel : RerankingModeEnum.WeightedScore,
+          }
+          : {
+            reranking_enable: true,
+            reranking_mode: RerankingModeEnum.RerankingModel,
+          }),
+        ...(!value.weights
+          ? {
+            weights: {
+              weight_type: WeightedScoreEnum.Customized,
+              vector_setting: {
+                vector_weight: DEFAULT_WEIGHTED_SCORE.other.semantic,
+                embedding_provider_name: '',
+                embedding_model_name: '',
+              },
+              keyword_setting: {
+                keyword_weight: DEFAULT_WEIGHTED_SCORE.other.keyword,
+              },
+            },
+          }
+          : {}),
+      })
+    }
+  }, [value, rerankDefaultModel, isRerankDefaultModelValid, onChange])
+
   return (
     <div className='space-y-2'>
       {supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && (
-        <OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.vector} alt='' />}
+        <OptionCard disabled={disabled} icon={<Image className='w-4 h-4' src={retrievalIcon.vector} alt='' />}
           title={t('dataset.retrieval.semantic_search.title')}
           description={t('dataset.retrieval.semantic_search.description')}
           isActive={
             value.search_method === RETRIEVE_METHOD.semantic
           }
-          onSwitched={() => onChange({
-            ...value,
-            search_method: RETRIEVE_METHOD.semantic,
-          })}
+          onSwitched={() => onSwitch(RETRIEVE_METHOD.semantic)}
           effectImg={Effect.src}
           activeHeaderClassName='bg-dataset-option-card-purple-gradient'
         >
@@ -78,17 +111,14 @@ const RetrievalMethodConfig: FC<Props> = ({
           />
         </OptionCard>
       )}
-      {supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && (
-        <OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.fullText} alt='' />}
+      {supportRetrievalMethods.includes(RETRIEVE_METHOD.fullText) && (
+        <OptionCard disabled={disabled} icon={<Image className='w-4 h-4' src={retrievalIcon.fullText} alt='' />}
           title={t('dataset.retrieval.full_text_search.title')}
           description={t('dataset.retrieval.full_text_search.description')}
           isActive={
             value.search_method === RETRIEVE_METHOD.fullText
           }
-          onSwitched={() => onChange({
-            ...value,
-            search_method: RETRIEVE_METHOD.fullText,
-          })}
+          onSwitched={() => onSwitch(RETRIEVE_METHOD.fullText)}
           effectImg={Effect.src}
           activeHeaderClassName='bg-dataset-option-card-purple-gradient'
         >
@@ -99,8 +129,8 @@ const RetrievalMethodConfig: FC<Props> = ({
           />
         </OptionCard>
       )}
-      {supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && (
-        <OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.hybrid} alt='' />}
+      {supportRetrievalMethods.includes(RETRIEVE_METHOD.hybrid) && (
+        <OptionCard disabled={disabled} icon={<Image className='w-4 h-4' src={retrievalIcon.hybrid} alt='' />}
           title={
             <div className='flex items-center space-x-1'>
               <div>{t('dataset.retrieval.hybrid_search.title')}</div>
@@ -110,11 +140,7 @@ const RetrievalMethodConfig: FC<Props> = ({
           description={t('dataset.retrieval.hybrid_search.description')} isActive={
             value.search_method === RETRIEVE_METHOD.hybrid
           }
-          onSwitched={() => onChange({
-            ...value,
-            search_method: RETRIEVE_METHOD.hybrid,
-            reranking_enable: true,
-          })}
+          onSwitched={() => onSwitch(RETRIEVE_METHOD.hybrid)}
           effectImg={Effect.src}
           activeHeaderClassName='bg-dataset-option-card-purple-gradient'
         >

+ 43 - 59
web/app/components/datasets/common/retrieval-param-config/index.tsx

@@ -1,6 +1,6 @@
 'use client'
 import type { FC } from 'react'
-import React, { useCallback } from 'react'
+import React, { useCallback, useMemo } from 'react'
 import { useTranslation } from 'react-i18next'
 
 import Image from 'next/image'
@@ -39,8 +39,8 @@ const RetrievalParamConfig: FC<Props> = ({
   const { t } = useTranslation()
   const canToggleRerankModalEnable = type !== RETRIEVE_METHOD.hybrid
   const isEconomical = type === RETRIEVE_METHOD.invertedIndex
+  const isHybridSearch = type === RETRIEVE_METHOD.hybrid
   const {
-    defaultModel: rerankDefaultModel,
     modelList: rerankModelList,
   } = useModelListAndDefaultModel(ModelTypeEnum.rerank)
 
@@ -48,35 +48,28 @@ const RetrievalParamConfig: FC<Props> = ({
     currentModel,
   } = useCurrentProviderAndModel(
     rerankModelList,
-    rerankDefaultModel
-      ? {
-        ...rerankDefaultModel,
-        provider: rerankDefaultModel.provider.provider,
-      }
-      : undefined,
+    {
+      provider: value.reranking_model?.reranking_provider_name ?? '',
+      model: value.reranking_model?.reranking_model_name ?? '',
+    },
   )
 
-  const handleDisabledSwitchClick = useCallback(() => {
-    if (!currentModel)
+  const handleDisabledSwitchClick = useCallback((enable: boolean) => {
+    if (enable && !currentModel)
       Toast.notify({ type: 'error', message: t('workflow.errorMsg.rerankModelRequired') })
-  }, [currentModel, rerankDefaultModel, t])
-
-  const isHybridSearch = type === RETRIEVE_METHOD.hybrid
+    onChange({
+      ...value,
+      reranking_enable: enable,
+    })
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [currentModel, onChange, value])
 
-  const rerankModel = (() => {
-    if (value.reranking_model) {
-      return {
-        provider_name: value.reranking_model.reranking_provider_name,
-        model_name: value.reranking_model.reranking_model_name,
-      }
-    }
-    else if (rerankDefaultModel) {
-      return {
-        provider_name: rerankDefaultModel.provider.provider,
-        model_name: rerankDefaultModel.model,
-      }
+  const rerankModel = useMemo(() => {
+    return {
+      provider_name: value.reranking_model.reranking_provider_name,
+      model_name: value.reranking_model.reranking_model_name,
     }
-  })()
+  }, [value.reranking_model])
 
   const handleChangeRerankMode = (v: RerankingModeEnum) => {
     if (v === value.reranking_mode)
@@ -100,6 +93,8 @@ const RetrievalParamConfig: FC<Props> = ({
         },
       }
     }
+    if (v === RerankingModeEnum.RerankingModel && !currentModel)
+      Toast.notify({ type: 'error', message: t('workflow.errorMsg.rerankModelRequired') })
     onChange(result)
   }
 
@@ -122,22 +117,11 @@ const RetrievalParamConfig: FC<Props> = ({
         <div>
           <div className='flex items-center space-x-2 mb-2'>
             {canToggleRerankModalEnable && (
-              <div
-                className='flex items-center'
-                onClick={handleDisabledSwitchClick}
-              >
-                <Switch
-                  size='md'
-                  defaultValue={currentModel ? value.reranking_enable : false}
-                  onChange={(v) => {
-                    onChange({
-                      ...value,
-                      reranking_enable: v,
-                    })
-                  }}
-                  disabled={!currentModel}
-                />
-              </div>
+              <Switch
+                size='md'
+                defaultValue={value.reranking_enable}
+                onChange={handleDisabledSwitchClick}
+              />
             )}
             <div className='flex items-center'>
               <span className='mr-0.5 system-sm-semibold text-text-secondary'>{t('common.modelProvider.rerankModel.key')}</span>
@@ -148,21 +132,23 @@ const RetrievalParamConfig: FC<Props> = ({
               />
             </div>
           </div>
-          <ModelSelector
-            triggerClassName={`${!value.reranking_enable && '!opacity-60 !cursor-not-allowed'}`}
-            defaultModel={rerankModel && { provider: rerankModel.provider_name, model: rerankModel.model_name }}
-            modelList={rerankModelList}
-            readonly={!value.reranking_enable}
-            onSelect={(v) => {
-              onChange({
-                ...value,
-                reranking_model: {
-                  reranking_provider_name: v.provider,
-                  reranking_model_name: v.model,
-                },
-              })
-            }}
-          />
+          {
+            value.reranking_enable && (
+              <ModelSelector
+                defaultModel={rerankModel && { provider: rerankModel.provider_name, model: rerankModel.model_name }}
+                modelList={rerankModelList}
+                onSelect={(v) => {
+                  onChange({
+                    ...value,
+                    reranking_model: {
+                      reranking_provider_name: v.provider,
+                      reranking_model_name: v.model,
+                    },
+                  })
+                }}
+              />
+            )
+          }
         </div>
       )}
       {
@@ -255,10 +241,8 @@ const RetrievalParamConfig: FC<Props> = ({
             {
               value.reranking_mode !== RerankingModeEnum.WeightedScore && (
                 <ModelSelector
-                  triggerClassName={`${!value.reranking_enable && '!opacity-60 !cursor-not-allowed'}`}
                   defaultModel={rerankModel && { provider: rerankModel.provider_name, model: rerankModel.model_name }}
                   modelList={rerankModelList}
-                  readonly={!value.reranking_enable}
                   onSelect={(v) => {
                     onChange({
                       ...value,

+ 3 - 0
web/app/components/datasets/create/embedding-process/index.tsx

@@ -30,6 +30,7 @@ import { useProviderContext } from '@/context/provider-context'
 import { sleep } from '@/utils'
 import { RETRIEVE_METHOD } from '@/types/app'
 import Tooltip from '@/app/components/base/tooltip'
+import { useInvalidDocumentList } from '@/service/knowledge/use-document'
 
 type Props = {
   datasetId: string
@@ -207,7 +208,9 @@ const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], index
   })
 
   const router = useRouter()
+  const invalidDocumentList = useInvalidDocumentList()
   const navToDocumentList = () => {
+    invalidDocumentList()
     router.push(`/datasets/${datasetId}/documents`)
   }
   const navToApiDocs = () => {

+ 46 - 58
web/app/components/datasets/create/step-two/index.tsx

@@ -31,17 +31,17 @@ import LanguageSelect from './language-select'
 import { DelimiterInput, MaxLengthInput, OverlapInput } from './inputs'
 import cn from '@/utils/classnames'
 import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, DocumentItem, FullDocumentDetail, ParentMode, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets'
+import { ChunkingMode, DataSourceType, ProcessMode } from '@/models/datasets'
 
 import Button from '@/app/components/base/button'
 import FloatRightContainer from '@/app/components/base/float-right-container'
 import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config'
 import EconomicalRetrievalMethodConfig from '@/app/components/datasets/common/economical-retrieval-method-config'
 import { type RetrievalConfig } from '@/types/app'
-import { ensureRerankModelSelected, isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
+import { isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
 import Toast from '@/app/components/base/toast'
 import type { NotionPage } from '@/models/common'
 import { DataSourceProvider } from '@/models/common'
-import { ChunkingMode, DataSourceType, RerankingModeEnum } from '@/models/datasets'
 import { useDatasetDetailContext } from '@/context/dataset-detail'
 import I18n from '@/context/i18n'
 import { RETRIEVE_METHOD } from '@/types/app'
@@ -90,17 +90,13 @@ type StepTwoProps = {
   onCancel?: () => void
 }
 
-export enum SegmentType {
-  AUTO = 'automatic',
-  CUSTOM = 'custom',
-}
 export enum IndexingType {
   QUALIFIED = 'high_quality',
   ECONOMICAL = 'economy',
 }
 
 const DEFAULT_SEGMENT_IDENTIFIER = '\\n\\n'
-const DEFAULT_MAXMIMUM_CHUNK_LENGTH = 500
+const DEFAULT_MAXIMUM_CHUNK_LENGTH = 500
 const DEFAULT_OVERLAP = 50
 
 type ParentChildConfig = {
@@ -131,7 +127,6 @@ const StepTwo = ({
   isSetting,
   documentDetail,
   isAPIKeySet,
-  onSetting,
   datasetId,
   indexingType,
   dataSourceType: inCreatePageDataSourceType,
@@ -162,12 +157,12 @@ const StepTwo = ({
 
   const isInCreatePage = !datasetId || (datasetId && !currentDataset?.data_source_type)
   const dataSourceType = isInCreatePage ? inCreatePageDataSourceType : currentDataset?.data_source_type
-  const [segmentationType, setSegmentationType] = useState<SegmentType>(SegmentType.CUSTOM)
+  const [segmentationType, setSegmentationType] = useState<ProcessMode>(ProcessMode.general)
   const [segmentIdentifier, doSetSegmentIdentifier] = useState(DEFAULT_SEGMENT_IDENTIFIER)
   const setSegmentIdentifier = useCallback((value: string, canEmpty?: boolean) => {
     doSetSegmentIdentifier(value ? escape(value) : (canEmpty ? '' : DEFAULT_SEGMENT_IDENTIFIER))
   }, [])
-  const [maxChunkLength, setMaxChunkLength] = useState(DEFAULT_MAXMIMUM_CHUNK_LENGTH) // default chunk length
+  const [maxChunkLength, setMaxChunkLength] = useState(DEFAULT_MAXIMUM_CHUNK_LENGTH) // default chunk length
   const [limitMaxChunkLength, setLimitMaxChunkLength] = useState(4000)
   const [overlap, setOverlap] = useState(DEFAULT_OVERLAP)
   const [rules, setRules] = useState<PreProcessingRule[]>([])
@@ -198,7 +193,6 @@ const StepTwo = ({
   )
 
   // QA Related
-  const [isLanguageSelectDisabled, _setIsLanguageSelectDisabled] = useState(false)
   const [isQAConfirmDialogOpen, setIsQAConfirmDialogOpen] = useState(false)
   const [docForm, setDocForm] = useState<ChunkingMode>(
     (datasetId && documentDetail) ? documentDetail.doc_form as ChunkingMode : ChunkingMode.text,
@@ -348,7 +342,7 @@ const StepTwo = ({
   }
 
   const updatePreview = () => {
-    if (segmentationType === SegmentType.CUSTOM && maxChunkLength > 4000) {
+    if (segmentationType === ProcessMode.general && maxChunkLength > 4000) {
       Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck') })
       return
     }
@@ -373,13 +367,42 @@ const StepTwo = ({
         model: defaultEmbeddingModel?.model || '',
       },
   )
+  const [retrievalConfig, setRetrievalConfig] = useState(currentDataset?.retrieval_model_dict || {
+    search_method: RETRIEVE_METHOD.semantic,
+    reranking_enable: false,
+    reranking_model: {
+      reranking_provider_name: '',
+      reranking_model_name: '',
+    },
+    top_k: 3,
+    score_threshold_enabled: false,
+    score_threshold: 0.5,
+  } as RetrievalConfig)
+
+  useEffect(() => {
+    if (currentDataset?.retrieval_model_dict)
+      return
+    setRetrievalConfig({
+      search_method: RETRIEVE_METHOD.semantic,
+      reranking_enable: !!isRerankDefaultModelValid,
+      reranking_model: {
+        reranking_provider_name: isRerankDefaultModelValid ? rerankDefaultModel?.provider.provider ?? '' : '',
+        reranking_model_name: isRerankDefaultModelValid ? rerankDefaultModel?.model ?? '' : '',
+      },
+      top_k: 3,
+      score_threshold_enabled: false,
+      score_threshold: 0.5,
+    })
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [rerankDefaultModel, isRerankDefaultModelValid])
+
   const getCreationParams = () => {
     let params
-    if (segmentationType === SegmentType.CUSTOM && overlap > maxChunkLength) {
+    if (segmentationType === ProcessMode.general && overlap > maxChunkLength) {
       Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.overlapCheck') })
       return
     }
-    if (segmentationType === SegmentType.CUSTOM && maxChunkLength > limitMaxChunkLength) {
+    if (segmentationType === ProcessMode.general && maxChunkLength > limitMaxChunkLength) {
       Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck', { limit: limitMaxChunkLength }) })
       return
     }
@@ -389,7 +412,6 @@ const StepTwo = ({
         doc_form: currentDocForm,
         doc_language: docLanguage,
         process_rule: getProcessRule(),
-        // eslint-disable-next-line @typescript-eslint/no-use-before-define
         retrieval_model: retrievalConfig, // Readonly. If want to changed, just go to settings page.
         embedding_model: embeddingModel.model, // Readonly
         embedding_model_provider: embeddingModel.provider, // Readonly
@@ -400,10 +422,7 @@ const StepTwo = ({
       const indexMethod = getIndexing_technique()
       if (
         !isReRankModelSelected({
-          rerankDefaultModel,
-          isRerankDefaultModelValid: !!isRerankDefaultModelValid,
           rerankModelList,
-          // eslint-disable-next-line @typescript-eslint/no-use-before-define
           retrievalConfig,
           indexMethod: indexMethod as string,
         })
@@ -411,16 +430,6 @@ const StepTwo = ({
         Toast.notify({ type: 'error', message: t('appDebug.datasetConfig.rerankModelRequired') })
         return
       }
-      const postRetrievalConfig = ensureRerankModelSelected({
-        rerankDefaultModel: rerankDefaultModel!,
-        retrievalConfig: {
-          // eslint-disable-next-line @typescript-eslint/no-use-before-define
-          ...retrievalConfig,
-          // eslint-disable-next-line @typescript-eslint/no-use-before-define
-          reranking_enable: retrievalConfig.reranking_mode === RerankingModeEnum.RerankingModel,
-        },
-        indexMethod: indexMethod as string,
-      })
       params = {
         data_source: {
           type: dataSourceType,
@@ -432,8 +441,7 @@ const StepTwo = ({
         process_rule: getProcessRule(),
         doc_form: currentDocForm,
         doc_language: docLanguage,
-
-        retrieval_model: postRetrievalConfig,
+        retrieval_model: retrievalConfig,
         embedding_model: embeddingModel.model,
         embedding_model_provider: embeddingModel.provider,
       } as CreateDocumentReq
@@ -490,7 +498,6 @@ const StepTwo = ({
 
   const getDefaultMode = () => {
     if (documentDetail)
-      // @ts-expect-error fix after api refactored
       setSegmentationType(documentDetail.dataset_process_rule.mode)
   }
 
@@ -525,7 +532,6 @@ const StepTwo = ({
           onSuccess(data) {
             updateIndexingTypeCache && updateIndexingTypeCache(indexType as string)
             updateResultCache && updateResultCache(data)
-            // eslint-disable-next-line @typescript-eslint/no-use-before-define
             updateRetrievalMethodCache && updateRetrievalMethodCache(retrievalConfig.search_method as string)
           },
         },
@@ -545,14 +551,6 @@ const StepTwo = ({
     isSetting && onSave && onSave()
   }
 
-  const changeToEconomicalType = () => {
-    if (docForm !== ChunkingMode.text)
-      return
-
-    if (!hasSetIndexType)
-      setIndexType(IndexingType.ECONOMICAL)
-  }
-
   useEffect(() => {
     // fetch rules
     if (!isSetting) {
@@ -574,18 +572,6 @@ const StepTwo = ({
       setIndexType(isAPIKeySet ? IndexingType.QUALIFIED : IndexingType.ECONOMICAL)
   }, [isAPIKeySet, indexingType, datasetId])
 
-  const [retrievalConfig, setRetrievalConfig] = useState(currentDataset?.retrieval_model_dict || {
-    search_method: RETRIEVE_METHOD.semantic,
-    reranking_enable: false,
-    reranking_model: {
-      reranking_provider_name: rerankDefaultModel?.provider.provider,
-      reranking_model_name: rerankDefaultModel?.model,
-    },
-    top_k: 3,
-    score_threshold_enabled: false,
-    score_threshold: 0.5,
-  } as RetrievalConfig)
-
   const economyDomRef = useRef<HTMLDivElement>(null)
   const isHoveringEconomy = useHover(economyDomRef)
 
@@ -984,12 +970,14 @@ const StepTwo = ({
               getIndexing_technique() === IndexingType.QUALIFIED
                 ? (
                   <RetrievalMethodConfig
+                    disabled={!!datasetId}
                     value={retrievalConfig}
                     onChange={setRetrievalConfig}
                   />
                 )
                 : (
                   <EconomicalRetrievalMethodConfig
+                    disabled={!!datasetId}
                     value={retrievalConfig}
                     onChange={setRetrievalConfig}
                   />
@@ -1010,7 +998,7 @@ const StepTwo = ({
           )
           : (
             <div className='flex items-center mt-8 py-2'>
-              <Button loading={isCreating} variant='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.save')}</Button>
+              {!datasetId && <Button loading={isCreating} variant='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.save')}</Button>}
               <Button className='ml-2' onClick={onCancel}>{t('datasetCreation.stepTwo.cancel')}</Button>
             </div>
           )}
@@ -1081,11 +1069,11 @@ const StepTwo = ({
               }
               {
                 currentDocForm !== ChunkingMode.qa
-                  && <Badge text={t(
-                    'datasetCreation.stepTwo.previewChunkCount', {
-                      count: estimate?.total_segments || 0,
-                    }) as string}
-                  />
+                && <Badge text={t(
+                  'datasetCreation.stepTwo.previewChunkCount', {
+                    count: estimate?.total_segments || 0,
+                  }) as string}
+                />
               }
             </div>
           </PreviewHeader>}

+ 2 - 2
web/app/components/datasets/create/step-two/option-card.tsx

@@ -4,7 +4,7 @@ import classNames from '@/utils/classnames'
 
 const TriangleArrow: FC<ComponentProps<'svg'>> = props => (
   <svg xmlns="http://www.w3.org/2000/svg" width="24" height="11" viewBox="0 0 24 11" fill="none" {...props}>
-    <path d="M9.87868 1.12132C11.0503 -0.0502525 12.9497 -0.0502525 14.1213 1.12132L23.3137 10.3137H0.686292L9.87868 1.12132Z" fill="currentColor"/>
+    <path d="M9.87868 1.12132C11.0503 -0.0502525 12.9497 -0.0502525 14.1213 1.12132L23.3137 10.3137H0.686292L9.87868 1.12132Z" fill="currentColor" />
   </svg>
 )
 
@@ -65,7 +65,7 @@ export const OptionCard: FC<OptionCardProps> = forwardRef((props, ref) => {
       (isActive && !noHighlight)
         ? 'border-[1.5px] border-components-option-card-option-selected-border'
         : 'border border-components-option-card-option-border',
-      disabled && 'opacity-50 cursor-not-allowed',
+      disabled && 'opacity-50 pointer-events-none',
       className,
     )}
     style={{

+ 16 - 8
web/app/components/datasets/documents/detail/completed/index.tsx

@@ -232,6 +232,16 @@ const Completed: FC<ICompletedProps> = ({
     setFullScreen(false)
   }, [])
 
+  const onCloseNewSegmentModal = useCallback(() => {
+    onNewSegmentModalChange(false)
+    setFullScreen(false)
+  }, [onNewSegmentModalChange])
+
+  const onCloseNewChildChunkModal = useCallback(() => {
+    setShowNewChildSegmentModal(false)
+    setFullScreen(false)
+  }, [])
+
   const { mutateAsync: enableSegment } = useEnableSegment()
   const { mutateAsync: disableSegment } = useDisableSegment()
 
@@ -623,6 +633,7 @@ const Completed: FC<ICompletedProps> = ({
       <FullScreenDrawer
         isOpen={currSegment.showModal}
         fullScreen={fullScreen}
+        onClose={onCloseSegmentDetail}
       >
         <SegmentDetail
           segInfo={currSegment.segInfo ?? { id: '' }}
@@ -636,13 +647,11 @@ const Completed: FC<ICompletedProps> = ({
       <FullScreenDrawer
         isOpen={showNewSegmentModal}
         fullScreen={fullScreen}
+        onClose={onCloseNewSegmentModal}
       >
         <NewSegment
           docForm={docForm}
-          onCancel={() => {
-            onNewSegmentModalChange(false)
-            setFullScreen(false)
-          }}
+          onCancel={onCloseNewSegmentModal}
           onSave={resetList}
           viewNewlyAddedChunk={viewNewlyAddedChunk}
         />
@@ -651,6 +660,7 @@ const Completed: FC<ICompletedProps> = ({
       <FullScreenDrawer
         isOpen={currChildChunk.showModal}
         fullScreen={fullScreen}
+        onClose={onCloseChildSegmentDetail}
       >
         <ChildSegmentDetail
           chunkId={currChunkId}
@@ -664,13 +674,11 @@ const Completed: FC<ICompletedProps> = ({
       <FullScreenDrawer
         isOpen={showNewChildSegmentModal}
         fullScreen={fullScreen}
+        onClose={onCloseNewChildChunkModal}
       >
         <NewChildSegment
           chunkId={currChunkId}
-          onCancel={() => {
-            setShowNewChildSegmentModal(false)
-            setFullScreen(false)
-          }}
+          onCancel={onCloseNewChildChunkModal}
           onSave={onSaveNewChildChunk}
           viewNewlyAddedChildChunk={viewNewlyAddedChildChunk}
         />

+ 1 - 1
web/app/components/datasets/documents/detail/completed/segment-list.tsx

@@ -80,7 +80,7 @@ ref: ForwardedRef<HTMLDivElement>,
                 checked={selectedSegmentIds.includes(segItem.id)}
                 onCheck={() => onSelected(segItem.id)}
               />
-              <div className='grow'>
+              <div className='grow min-w-0'>
                 <SegmentCard
                   key={`${segItem.id}-card`}
                   detail={segItem}

+ 14 - 4
web/app/components/datasets/documents/detail/index.tsx

@@ -22,8 +22,9 @@ import { useDatasetDetailContext } from '@/context/dataset-detail'
 import FloatRightContainer from '@/app/components/base/float-right-container'
 import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
 import { LayoutRight2LineMod } from '@/app/components/base/icons/src/public/knowledge'
-import { useCheckSegmentBatchImportProgress, useSegmentBatchImport } from '@/service/knowledge/use-segment'
+import { useCheckSegmentBatchImportProgress, useChildSegmentListKey, useSegmentBatchImport, useSegmentListKey } from '@/service/knowledge/use-segment'
 import { useDocumentDetail, useDocumentMetadata } from '@/service/knowledge/use-document'
+import { useInvalid } from '@/service/use-base'
 
 type DocumentContextValue = {
   datasetId?: string
@@ -149,11 +150,20 @@ const DocumentDetail: FC<Props> = ({ datasetId, documentId }) => {
 
   const embedding = ['queuing', 'indexing', 'paused'].includes((documentDetail?.display_status || '').toLowerCase())
 
+  const invalidChunkList = useInvalid(useSegmentListKey)
+  const invalidChildChunkList = useInvalid(useChildSegmentListKey)
+
   const handleOperate = (operateName?: string) => {
-    if (operateName === 'delete')
+    if (operateName === 'delete') {
       backToPrev()
-    else
+    }
+    else {
       detailMutate()
+      setTimeout(() => {
+        invalidChunkList()
+        invalidChildChunkList()
+      }, 5000)
+    }
   }
 
   const mode = useMemo(() => {
@@ -245,7 +255,7 @@ const DocumentDetail: FC<Props> = ({ datasetId, documentId }) => {
         <div className='flex flex-row flex-1' style={{ height: 'calc(100% - 4rem)' }}>
           {isDetailLoading
             ? <Loading type='app' />
-            : <div className={cn('h-full w-full flex flex-col',
+            : <div className={cn('h-full grow min-w-0 flex flex-col',
               embedding ? '' : isFullDocMode ? 'relative pt-4 pr-11 pl-11' : 'relative pt-3 pr-11 pl-5',
             )}>
               {embedding

Tiedoston diff-näkymää rajattu, sillä se on liian suuri
+ 16 - 2
web/app/components/datasets/documents/index.tsx


+ 17 - 21
web/app/components/datasets/documents/list.tsx

@@ -134,6 +134,16 @@ export const StatusItem: FC<{
       {DOC_INDEX_STATUS_MAP[localStatus]?.text}
     </span>
     {
+      errorMessage && (
+        <Tooltip
+          popupContent={
+            <div className='max-w-[260px] break-all'>{errorMessage}</div>
+          }
+          triggerClassName='ml-1 w-4 h-4'
+        />
+      )
+    }
+    {
       scene === 'detail' && (
         <div className='flex justify-between items-center ml-1.5'>
           <Tooltip
@@ -152,16 +162,6 @@ export const StatusItem: FC<{
         </div>
       )
     }
-    {
-      errorMessage && (
-        <Tooltip
-          popupContent={
-            <div className='max-w-[260px] break-all'>{errorMessage}</div>
-          }
-          triggerClassName='ml-1 w-4 h-4'
-        />
-      )
-    }
   </div>
 }
 
@@ -561,18 +561,14 @@ const DocumentList: FC<IDocumentListProps> = ({
                 </div>
               </td>
               <td>
-                <div className={'group flex items-center justify-between mr-6 hover:mr-0'}>
-                  <span className={cn(s.tdValue, 'flex items-center')}>
-                    {doc?.data_source_type === DataSourceType.NOTION && <NotionIcon className='inline-flex -mt-[3px] mr-1.5 align-middle' type='page' src={doc.data_source_info.notion_page_icon} />
-                    }
+                <div className={'group flex items-center mr-6 hover:mr-0 max-w-[460px]'}>
+                  <div className='shrink-0'>
+                    {doc?.data_source_type === DataSourceType.NOTION && <NotionIcon className='inline-flex -mt-[3px] mr-1.5 align-middle' type='page' src={doc.data_source_info.notion_page_icon} />}
                     {doc?.data_source_type === DataSourceType.FILE && <FileTypeIcon type={extensionToFileType(doc?.data_source_info?.upload_file?.extension ?? fileType)} className='mr-1.5' />}
-                    {doc?.data_source_type === DataSourceType.WEB && <Globe01 className='inline-flex -mt-[3px] mr-1.5 align-middle' />
-                    }
-                    {
-                      doc.name
-                    }
-                  </span>
-                  <div className='group-hover:flex hidden'>
+                    {doc?.data_source_type === DataSourceType.WEB && <Globe01 className='inline-flex -mt-[3px] mr-1.5 align-middle' />}
+                  </div>
+                  <span className='text-sm truncate grow-1'>{doc.name}</span>
+                  <div className='group-hover:flex group-hover:ml-auto hidden shrink-0'>
                     <Tooltip
                       popupContent={t('datasetDocuments.list.table.rename')}
                     >

+ 1 - 1
web/app/components/datasets/hit-testing/components/child-chunks-item.tsx

@@ -17,7 +17,7 @@ const ChildChunks: FC<Props> = ({
   const { id, score, content, position } = payload
   return (
     <div
-      className={!isShowAll ? 'line-clamp-2' : ''}
+      className={!isShowAll ? 'line-clamp-2 break-all' : ''}
     >
       <div className='inline-flex items-center relative top-[-2px]'>
         <div className='flex items-center h-[20.5px] bg-state-accent-solid  system-2xs-semibold-uppercase text-text-primary-on-surface px-1'>C-{position}</div>

+ 1 - 1
web/app/components/datasets/hit-testing/components/chunk-detail-modal.tsx

@@ -56,7 +56,7 @@ const ChunkDetailModal: FC<Props> = ({
             </div>
             <Score value={score} />
           </div>
-          <div className={cn('mt-2 body-md-regular text-text-secondary', heighClassName)}>
+          <div className={cn('mt-2 body-md-regular text-text-secondary break-all', heighClassName)}>
             {content}
           </div>
           {!isParentChildRetrieval && keywords && keywords.length > 0 && (

+ 2 - 7
web/app/components/datasets/hit-testing/components/result-item.tsx

@@ -43,13 +43,8 @@ const ResultItem: FC<Props> = ({
     setFalse: hideDetailModal,
   }] = useBoolean(false)
 
-  const handleClickCard = () => {
-    if (!isParentChildRetrieval)
-      showDetailModal()
-  }
-
   return (
-    <div className={cn('pt-3 bg-chat-bubble-bg rounded-xl hover:shadow-lg', !isParentChildRetrieval && 'cursor-pointer')} onClick={handleClickCard}>
+    <div className={cn('pt-3 bg-chat-bubble-bg rounded-xl hover:shadow-lg cursor-pointer')} onClick={showDetailModal}>
       {/* Meta info */}
       <div className='flex justify-between items-center px-3'>
         <div className='flex items-center space-x-2'>
@@ -66,7 +61,7 @@ const ResultItem: FC<Props> = ({
 
       {/* Main */}
       <div className='mt-1 px-3'>
-        <div className='line-clamp-2 body-md-regular'>{content}</div>
+        <div className='line-clamp-2 body-md-regular break-all'>{content}</div>
         {isParentChildRetrieval && (
           <div className='mt-1'>
             <div className={cn('inline-flex items-center h-6 space-x-0.5 text-text-secondary select-none rounded-lg cursor-pointer', isFold && 'pl-1 bg-[linear-gradient(90deg,_rgba(200,_206,_218,_0.20)_0%,_rgba(200,_206,_218,_0.04)_100%)]')} onClick={toggleFold}>

+ 4 - 4
web/app/components/datasets/hit-testing/components/score.tsx

@@ -12,15 +12,15 @@ const Score: FC<Props> = ({
   value,
   besideChunkName,
 }) => {
-  if (!value)
+  if (!value || isNaN(value))
     return null
-
   return (
-    <div className={cn('relative items-center px-[5px] border border-components-progress-bar-border overflow-hidden', besideChunkName ? 'border-l-0 h-[20.5px]' : 'h-[20px] rounded-md')}>
+    <div className={cn('relative items-center px-[5px] border border-components-progress-bar-border overflow-hidden',
+      besideChunkName ? 'border-l-0 h-[20.5px]' : 'h-[20px] rounded-md')}>
       <div className={cn('absolute top-0 left-0 h-full bg-util-colors-blue-brand-blue-brand-100 border-r-[1.5px] border-components-progress-brand-progress', value === 1 && 'border-r-0')} style={{ width: `${value * 100}%` }} />
       <div className={cn('relative flex items-center h-full space-x-0.5 text-util-colors-blue-brand-blue-brand-700')}>
         <div className='system-2xs-medium-uppercase'>score</div>
-        <div className='system-xs-semibold'>{value.toFixed(2)}</div>
+        <div className='system-xs-semibold'>{value?.toFixed(2)}</div>
       </div>
     </div>
   )

+ 1 - 1
web/app/components/datasets/hit-testing/index.tsx

@@ -192,7 +192,7 @@ const HitTesting: FC<Props> = ({ datasetId }: Props) => {
           }
         </div>
       </FloatRightContainer>
-      <Drawer isOpen={isShowModifyRetrievalModal} onClose={() => setIsShowModifyRetrievalModal(false)} footer={null} mask={isMobile} panelClassname='mt-16 mx-2 sm:mr-2 mb-3 !p-0 !max-w-[640px] rounded-xl'>
+      <Drawer unmount={true} isOpen={isShowModifyRetrievalModal} onClose={() => setIsShowModifyRetrievalModal(false)} footer={null} mask={isMobile} panelClassname='mt-16 mx-2 sm:mr-2 mb-3 !p-0 !max-w-[640px] rounded-xl'>
         <ModifyRetrievalModal
           indexMethod={currentDataset?.indexing_technique || ''}
           value={retrievalConfig}

+ 2 - 14
web/app/components/datasets/hit-testing/modify-retrieval-modal.tsx

@@ -9,9 +9,8 @@ import type { RetrievalConfig } from '@/types/app'
 import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config'
 import EconomicalRetrievalMethodConfig from '@/app/components/datasets/common/economical-retrieval-method-config'
 import Button from '@/app/components/base/button'
-import { ensureRerankModelSelected, isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
+import { isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
 import { useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
-import { RerankingModeEnum } from '@/models/datasets'
 
 type Props = {
   indexMethod: string
@@ -39,15 +38,11 @@ const ModifyRetrievalModal: FC<Props> = ({
 
   const {
     modelList: rerankModelList,
-    defaultModel: rerankDefaultModel,
-    currentModel: isRerankDefaultModelValid,
   } = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank)
 
   const handleSave = () => {
     if (
       !isReRankModelSelected({
-        rerankDefaultModel,
-        isRerankDefaultModelValid: !!isRerankDefaultModelValid,
         rerankModelList,
         retrievalConfig,
         indexMethod,
@@ -56,14 +51,7 @@ const ModifyRetrievalModal: FC<Props> = ({
       Toast.notify({ type: 'error', message: t('appDebug.datasetConfig.rerankModelRequired') })
       return
     }
-    onSave(ensureRerankModelSelected({
-      rerankDefaultModel: rerankDefaultModel!,
-      retrievalConfig: {
-        ...retrievalConfig,
-        reranking_enable: retrievalConfig.reranking_mode === RerankingModeEnum.RerankingModel,
-      },
-      indexMethod,
-    }))
+    onSave(retrievalConfig)
   }
 
   if (!isShow)

+ 7 - 19
web/app/components/datasets/settings/form/index.tsx

@@ -17,11 +17,11 @@ import Input from '@/app/components/base/input'
 import Textarea from '@/app/components/base/textarea'
 import { ApiConnectionMod } from '@/app/components/base/icons/src/vender/solid/development'
 import { updateDatasetSetting } from '@/service/datasets'
-import { type DataSetListResponse, RerankingModeEnum } from '@/models/datasets'
+import { type DataSetListResponse } from '@/models/datasets'
 import DatasetDetailContext from '@/context/dataset-detail'
 import { type RetrievalConfig } from '@/types/app'
 import { useAppContext } from '@/context/app-context'
-import { ensureRerankModelSelected, isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
+import { isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
 import ModelSelector from '@/app/components/header/account-setting/model-provider-page/model-selector'
 import {
   useModelList,
@@ -74,8 +74,6 @@ const Form = () => {
   )
   const {
     modelList: rerankModelList,
-    defaultModel: rerankDefaultModel,
-    currentModel: isRerankDefaultModelValid,
   } = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank)
   const { data: embeddingModelList } = useModelList(ModelTypeEnum.textEmbedding)
 
@@ -109,8 +107,6 @@ const Form = () => {
     }
     if (
       !isReRankModelSelected({
-        rerankDefaultModel,
-        isRerankDefaultModelValid: !!isRerankDefaultModelValid,
         rerankModelList,
         retrievalConfig,
         indexMethod,
@@ -119,17 +115,9 @@ const Form = () => {
       notify({ type: 'error', message: t('appDebug.datasetConfig.rerankModelRequired') })
       return
     }
-    const postRetrievalConfig = ensureRerankModelSelected({
-      rerankDefaultModel: rerankDefaultModel!,
-      retrievalConfig: {
-        ...retrievalConfig,
-        reranking_enable: retrievalConfig.reranking_mode === RerankingModeEnum.RerankingModel,
-      },
-      indexMethod,
-    })
-    if (postRetrievalConfig.weights) {
-      postRetrievalConfig.weights.vector_setting.embedding_provider_name = currentDataset?.embedding_model_provider || ''
-      postRetrievalConfig.weights.vector_setting.embedding_model_name = currentDataset?.embedding_model || ''
+    if (retrievalConfig.weights) {
+      retrievalConfig.weights.vector_setting.embedding_provider_name = currentDataset?.embedding_model_provider || ''
+      retrievalConfig.weights.vector_setting.embedding_model_name = currentDataset?.embedding_model || ''
     }
     try {
       setLoading(true)
@@ -141,8 +129,8 @@ const Form = () => {
           permission,
           indexing_technique: indexMethod,
           retrieval_model: {
-            ...postRetrievalConfig,
-            score_threshold: postRetrievalConfig.score_threshold_enabled ? postRetrievalConfig.score_threshold : 0,
+            ...retrievalConfig,
+            score_threshold: retrievalConfig.score_threshold_enabled ? retrievalConfig.score_threshold : 0,
           },
           embedding_model: embeddingModel.model,
           embedding_model_provider: embeddingModel.provider,

+ 1 - 0
web/app/components/header/account-setting/model-provider-page/model-selector/model-trigger.tsx

@@ -36,6 +36,7 @@ const ModelTrigger: FC<ModelTriggerProps> = ({
       className={classNames(
         'group flex items-center px-2 h-8 rounded-lg bg-components-input-bg-normal',
         !readonly && 'hover:bg-components-input-bg-hover cursor-pointer',
+        !!readonly && 'opacity-50',
         className,
         open && '!bg-components-input-bg-hover',
         model.status !== ModelStatusEnum.active && '!bg-[#FFFAEB]',

+ 5 - 4
web/app/components/workflow/nodes/knowledge-retrieval/components/retrieval-config.tsx

@@ -59,7 +59,8 @@ const RetrievalConfig: FC<Props> = ({
   }, [onOpenFromPropsChange])
 
   const {
-    defaultModel: rerankDefaultModel,
+    currentProvider: validRerankDefaultProvider,
+    currentModel: validRerankDefaultModel,
   } = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank)
 
   const { multiple_retrieval_config } = payload
@@ -75,8 +76,8 @@ const RetrievalConfig: FC<Props> = ({
         ? undefined
         : (!configs.reranking_model?.reranking_provider_name
           ? {
-            provider: rerankDefaultModel?.provider?.provider || '',
-            model: rerankDefaultModel?.model || '',
+            provider: validRerankDefaultProvider?.provider || '',
+            model: validRerankDefaultModel?.model || '',
           }
           : {
             provider: configs.reranking_model?.reranking_provider_name,
@@ -86,7 +87,7 @@ const RetrievalConfig: FC<Props> = ({
       weights: configs.weights as any,
       reranking_enable: configs.reranking_enable,
     })
-  }, [onMultipleRetrievalConfigChange, payload.retrieval_mode, rerankDefaultModel?.provider?.provider, rerankDefaultModel?.model, onRetrievalModeChange])
+  }, [onMultipleRetrievalConfigChange, payload.retrieval_mode, validRerankDefaultProvider, validRerankDefaultModel, onRetrievalModeChange])
 
   return (
     <PortalToFollowElem

+ 1 - 1
web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts

@@ -156,7 +156,7 @@ const useConfig = (id: string, payload: KnowledgeRetrievalNodeType) => {
     })
     setInputs(newInput)
   // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, [currentProvider?.provider, currentModel, rerankDefaultModel])
+  }, [currentProvider?.provider, currentModel, currentRerankModel, rerankDefaultModel])
   const [selectedDatasets, setSelectedDatasets] = useState<DataSet[]>([])
   const [rerankModelOpen, setRerankModelOpen] = useState(false)
   const handleRetrievalModeChange = useCallback((newMode: RETRIEVE_TYPE) => {

+ 23 - 10
web/app/components/workflow/nodes/knowledge-retrieval/utils.ts

@@ -126,7 +126,7 @@ export const getMultipleRetrievalConfig = (
     reranking_mode,
     reranking_model,
     weights,
-    reranking_enable: ((allInternal && allEconomic) || allExternal) ? reranking_enable : true,
+    reranking_enable: ((allInternal && allEconomic) || allExternal) ? reranking_enable : shouldSetWeightDefaultValue,
   }
 
   const setDefaultWeights = () => {
@@ -152,16 +152,20 @@ export const getMultipleRetrievalConfig = (
 
   if (allEconomic || mixtureHighQualityAndEconomic || inconsistentEmbeddingModel || allExternal || mixtureInternalAndExternal) {
     result.reranking_mode = RerankingModeEnum.RerankingModel
-
-    if (rerankModelIsValid) {
-      result.reranking_mode = RerankingModeEnum.RerankingModel
-      result.reranking_model = {
-        provider: validRerankModel?.provider || '',
-        model: validRerankModel?.model || '',
+    if (!result.reranking_model?.provider || !result.reranking_model?.model) {
+      if (rerankModelIsValid) {
+        result.reranking_enable = true
+        result.reranking_model = {
+          provider: validRerankModel?.provider || '',
+          model: validRerankModel?.model || '',
+        }
+      }
+      else {
+        result.reranking_model = {
+          provider: '',
+          model: '',
+        }
       }
-    }
-    else {
-      result.reranking_model = undefined
     }
   }
 
@@ -169,6 +173,7 @@ export const getMultipleRetrievalConfig = (
     if (!reranking_mode) {
       if (validRerankModel?.provider && validRerankModel?.model) {
         result.reranking_mode = RerankingModeEnum.RerankingModel
+        result.reranking_enable = true
         result.reranking_model = {
           provider: validRerankModel.provider,
           model: validRerankModel.model,
@@ -186,6 +191,7 @@ export const getMultipleRetrievalConfig = (
     if (reranking_mode === RerankingModeEnum.WeightedScore && weights && shouldSetWeightDefaultValue) {
       if (rerankModelIsValid) {
         result.reranking_mode = RerankingModeEnum.RerankingModel
+        result.reranking_enable = true
         result.reranking_model = {
           provider: validRerankModel.provider || '',
           model: validRerankModel.model || '',
@@ -199,6 +205,13 @@ export const getMultipleRetrievalConfig = (
       result.reranking_mode = RerankingModeEnum.WeightedScore
       setDefaultWeights()
     }
+    if (reranking_mode === RerankingModeEnum.RerankingModel && rerankModelIsValid) {
+      result.reranking_enable = true
+      result.reranking_model = {
+        provider: validRerankModel.provider || '',
+        model: validRerankModel.model || '',
+      }
+    }
   }
 
   return result

+ 1 - 1
web/i18n/en-US/app-debug.ts

@@ -483,7 +483,7 @@ const translation = {
       title: 'Multi-path retrieval',
       description: 'Based on user intent, queries across all Knowledge, retrieves relevant text from multi-sources, and selects the best results matching the user query after reranking. ',
     },
-    rerankModelRequired: 'Rerank model is required',
+    rerankModelRequired: 'A configured Rerank Model is required',
     params: 'Params',
     top_k: 'Top K',
     top_kTip: 'Used to filter chunks that are most similar to user questions. The system will also dynamically adjust the value of Top K, according to max_tokens of the selected model.',

+ 2 - 2
web/i18n/en-US/workflow.ts

@@ -183,7 +183,7 @@ const translation = {
   },
   errorMsg: {
     fieldRequired: '{{field}} is required',
-    rerankModelRequired: 'Before turning on the Rerank Model, please confirm that the model has been successfully configured in the settings.',
+    rerankModelRequired: 'A configured Rerank Model is required',
     authRequired: 'Authorization is required',
     invalidJson: '{{field}} is invalid JSON',
     fields: {
@@ -191,7 +191,7 @@ const translation = {
       variableValue: 'Variable Value',
       code: 'Code',
       model: 'Model',
-      rerankModel: 'Rerank Model',
+      rerankModel: 'A configured Rerank Model',
       visionVariable: 'Vision Variable',
     },
     invalidVariable: 'Invalid variable',

+ 1 - 1
web/i18n/zh-Hans/app-debug.ts

@@ -475,7 +475,7 @@ const translation = {
       title: '多路召回',
       description: '根据用户意图同时匹配所有知识库,从多路知识库查询相关文本片段,经过重排序步骤,从多路查询结果中选择匹配用户问题的最佳结果。',
     },
-    rerankModelRequired: '请选择 Rerank 模型',
+    rerankModelRequired: '未配置 Rerank 模型',
     params: '参数设置',
     top_k: 'Top K',
     top_kTip: '用于筛选与用户问题相似度最高的文本片段。系统同时会根据选用模型上下文窗口大小动态调整分段数量。',

+ 1 - 1
web/i18n/zh-Hans/workflow.ts

@@ -183,7 +183,7 @@ const translation = {
   },
   errorMsg: {
     fieldRequired: '{{field}} 不能为空',
-    rerankModelRequired: '开启 Rerank 模型前,请务必确认模型已在设置中成功配置。',
+    rerankModelRequired: '未配置 Rerank 模型',
     authRequired: '请先授权',
     invalidJson: '{{field}} 是非法的 JSON',
     fields: {

+ 6 - 2
web/service/knowledge/use-document.ts

@@ -29,6 +29,10 @@ export const useDocumentList = (payload: {
   })
 }
 
+export const useInvalidDocumentList = () => {
+  return useInvalid(useDocumentListKey)
+}
+
 const useAutoDisabledDocumentKey = [NAME_SPACE, 'autoDisabledDocument']
 export const useAutoDisabledDocuments = (datasetId: string) => {
   return useQuery({
@@ -94,7 +98,7 @@ export const useSyncWebsite = () => {
   })
 }
 
-const useDocumentDetailKey = [NAME_SPACE, 'documentDetail']
+const useDocumentDetailKey = [NAME_SPACE, 'documentDetail', 'withoutMetaData']
 export const useDocumentDetail = (payload: {
   datasetId: string
   documentId: string
@@ -114,7 +118,7 @@ export const useDocumentMetadata = (payload: {
 }) => {
   const { datasetId, documentId, params } = payload
   return useQuery<DocumentDetailResponse>({
-    queryKey: [...useDocumentDetailKey, 'withMetaData', datasetId, documentId],
+    queryKey: [...useDocumentDetailKey, 'onlyMetaData', datasetId, documentId],
     queryFn: () => get<DocumentDetailResponse>(`/datasets/${datasetId}/documents/${documentId}`, { params }),
   })
 }