Browse Source

doc: Added explanation of chunk_overlap to knowledge API (#12247)

Co-authored-by: crazywoola <427733928@qq.com>
yagiyuki 3 months ago
parent
commit
7c71bd7be7

+ 4 - 0
web/app/(commonLayout)/datasets/template/template.en.mdx

@@ -78,6 +78,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
             - <code>subchunk_segmentation</code> (object) Child chunk rules
               - <code>separator</code> Segmentation identifier. Currently, only one delimiter is allowed. The default is <code>***</code>
               - <code>max_tokens</code> The maximum length (tokens) must be validated to be shorter than the length of the parent chunk
+              - <code>chunk_overlap</code> Define the overlap between adjacent chunks (optional)
       </Property>
     </Properties>
   </Col>
@@ -191,6 +192,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
             - <code>subchunk_segmentation</code> (object) Child chunk rules
               - <code>separator</code> Segmentation identifier. Currently, only one delimiter is allowed. The default is <code>***</code>
               - <code>max_tokens</code> The maximum length (tokens) must be validated to be shorter than the length of the parent chunk
+              - <code>chunk_overlap</code> Define the overlap between adjacent chunks (optional)
       </Property>
       <Property name='file' type='multipart/form-data' key='file'>
         Files that need to be uploaded.
@@ -477,6 +479,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
             - <code>subchunk_segmentation</code> (object) Child chunk rules
               - <code>separator</code> Segmentation identifier. Currently, only one delimiter is allowed. The default is <code>***</code>
               - <code>max_tokens</code> The maximum length (tokens) must be validated to be shorter than the length of the parent chunk
+              - <code>chunk_overlap</code> Define the overlap between adjacent chunks (optional)
       </Property>
     </Properties>
   </Col>
@@ -578,6 +581,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
             - <code>subchunk_segmentation</code> (object) Child chunk rules
               - <code>separator</code> Segmentation identifier. Currently, only one delimiter is allowed. The default is <code>***</code>
               - <code>max_tokens</code> The maximum length (tokens) must be validated to be shorter than the length of the parent chunk
+              - <code>chunk_overlap</code> Define the overlap between adjacent chunks (optional)
       </Property>
     </Properties>
   </Col>

+ 4 - 0
web/app/(commonLayout)/datasets/template/template.zh.mdx

@@ -78,6 +78,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
             - <code>subchunk_segmentation</code> (object) 子分段规则
               - <code>separator</code> 分段标识符,目前仅允许设置一个分隔符。默认为 <code>***</code>
               - <code>max_tokens</code> 最大长度 (token) 需要校验小于父级的长度
+              - <code>chunk_overlap</code> 分段重叠指的是在对数据进行分段时,段与段之间存在一定的重叠部分(选填)
       </Property>
     </Properties>
   </Col>
@@ -191,6 +192,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
             - <code>subchunk_segmentation</code> (object) 子分段规则
               - <code>separator</code> 分段标识符,目前仅允许设置一个分隔符。默认为 <code>***</code>
               - <code>max_tokens</code> 最大长度 (token) 需要校验小于父级的长度
+              - <code>chunk_overlap</code> 分段重叠指的是在对数据进行分段时,段与段之间存在一定的重叠部分(选填)
       </Property>
       <Property name='file' type='multipart/form-data' key='file'>
         需要上传的文件。
@@ -477,6 +479,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
             - <code>subchunk_segmentation</code> (object) 子分段规则
               - <code>separator</code> 分段标识符,目前仅允许设置一个分隔符。默认为 <code>***</code>
               - <code>max_tokens</code> 最大长度 (token) 需要校验小于父级的长度
+              - <code>chunk_overlap</code> 分段重叠指的是在对数据进行分段时,段与段之间存在一定的重叠部分(选填)
       </Property>
     </Properties>
   </Col>
@@ -578,6 +581,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
             - <code>subchunk_segmentation</code> (object) 子分段规则
               - <code>separator</code> 分段标识符,目前仅允许设置一个分隔符。默认为 <code>***</code>
               - <code>max_tokens</code> 最大长度 (token) 需要校验小于父级的长度
+              - <code>chunk_overlap</code> 分段重叠指的是在对数据进行分段时,段与段之间存在一定的重叠部分(选填)
       </Property>
     </Properties>
   </Col>