/* eslint-disable no-mixed-operators */ 'use client' import React, { useEffect, useLayoutEffect, useRef, useState } from 'react' import { useTranslation } from 'react-i18next' import { useBoolean } from 'ahooks' import { XMarkIcon } from '@heroicons/react/20/solid' import cn from 'classnames' import Link from 'next/link' import PreviewItem from './preview-item' import s from './index.module.css' import type { CreateDocumentReq, File, FullDocumentDetail, FileIndexingEstimateResponse as IndexingEstimateResponse, PreProcessingRule, Rules, createDocumentResponse } from '@/models/datasets' import { createDocument, createFirstDocument, fetchFileIndexingEstimate as didFetchFileIndexingEstimate, fetchDefaultProcessRule, } from '@/service/datasets' import Button from '@/app/components/base/button' import Loading from '@/app/components/base/loading' import Toast from '@/app/components/base/toast' import { formatNumber } from '@/utils/format' type StepTwoProps = { isSetting?: boolean documentDetail?: FullDocumentDetail hasSetAPIKEY: boolean onSetting: () => void datasetId?: string indexingType?: string file?: File onStepChange?: (delta: number) => void updateIndexingTypeCache?: (type: string) => void updateResultCache?: (res: createDocumentResponse) => void onSave?: () => void onCancel?: () => void } enum SegmentType { AUTO = 'automatic', CUSTOM = 'custom', } enum IndexingType { QUALIFIED = 'high_quality', ECONOMICAL = 'economy', } const StepTwo = ({ isSetting, documentDetail, hasSetAPIKEY, onSetting, datasetId, indexingType, file, onStepChange, updateIndexingTypeCache, updateResultCache, onSave, onCancel, }: StepTwoProps) => { const { t } = useTranslation() const scrollRef = useRef(null) const [scrolled, setScrolled] = useState(false) const previewScrollRef = useRef(null) const [previewScrolled, setPreviewScrolled] = useState(false) const [segmentationType, setSegmentationType] = useState(SegmentType.AUTO) const [segmentIdentifier, setSegmentIdentifier] = useState('\\n') const [max, setMax] = useState(1000) const [rules, setRules] = useState([]) const [defaultConfig, setDefaultConfig] = useState() const hasSetIndexType = !!indexingType const [indexType, setIndexType] = useState( indexingType || hasSetAPIKEY ? IndexingType.QUALIFIED : IndexingType.ECONOMICAL, ) const [showPreview, { setTrue: setShowPreview, setFalse: hidePreview }] = useBoolean() const [customFileIndexingEstimate, setCustomFileIndexingEstimate] = useState(null) const [automaticFileIndexingEstimate, setAutomaticFileIndexingEstimate] = useState(null) const fileIndexingEstimate = (() => { return segmentationType === SegmentType.AUTO ? automaticFileIndexingEstimate : customFileIndexingEstimate })() const scrollHandle = (e: any) => { if (e.target.scrollTop > 0) setScrolled(true) else setScrolled(false) } const previewScrollHandle = (e: any) => { if (e.target.scrollTop > 0) setPreviewScrolled(true) else setPreviewScrolled(false) } const getFileName = (name: string) => { const arr = name.split('.') return arr.slice(0, -1).join('.') } const getRuleName = (key: string) => { if (key === 'remove_extra_spaces') return t('datasetCreation.stepTwo.removeExtraSpaces') if (key === 'remove_urls_emails') return t('datasetCreation.stepTwo.removeUrlEmails') if (key === 'remove_stopwords') return t('datasetCreation.stepTwo.removeStopwords') } const ruleChangeHandle = (id: string) => { const newRules = rules.map((rule) => { if (rule.id === id) { return { id: rule.id, enabled: !rule.enabled, } } return rule }) setRules(newRules) } const resetRules = () => { if (defaultConfig) { setSegmentIdentifier(defaultConfig.segmentation.separator === '\n' ? '\\n' : defaultConfig.segmentation.separator || '\\n') setMax(defaultConfig.segmentation.max_tokens) setRules(defaultConfig.pre_processing_rules) } } const fetchFileIndexingEstimate = async () => { // eslint-disable-next-line @typescript-eslint/no-use-before-define const res = await didFetchFileIndexingEstimate(getFileIndexingEstimateParams()) if (segmentationType === SegmentType.CUSTOM) setCustomFileIndexingEstimate(res) else setAutomaticFileIndexingEstimate(res) } const confirmChangeCustomConfig = async () => { setCustomFileIndexingEstimate(null) setShowPreview() await fetchFileIndexingEstimate() } const getIndexing_technique = () => indexingType || indexType const getProcessRule = () => { const processRule: any = { rules: {}, // api will check this. It will be removed after api refactored. mode: segmentationType, } if (segmentationType === SegmentType.CUSTOM) { const ruleObj = { pre_processing_rules: rules, segmentation: { separator: segmentIdentifier === '\\n' ? '\n' : segmentIdentifier, max_tokens: max, }, } processRule.rules = ruleObj } return processRule } const getFileIndexingEstimateParams = () => { const params = { file_id: file?.id, dataset_id: datasetId, indexing_technique: getIndexing_technique(), process_rule: getProcessRule(), } return params } const getCreationParams = () => { let params if (isSetting) { params = { original_document_id: documentDetail?.id, process_rule: getProcessRule(), } as CreateDocumentReq } else { params = { data_source: { type: 'upload_file', info: file?.id, name: file?.name, }, indexing_technique: getIndexing_technique(), process_rule: getProcessRule(), } as CreateDocumentReq } return params } const getRules = async () => { try { const res = await fetchDefaultProcessRule({ url: '/datasets/process-rule' }) const separator = res.rules.segmentation.separator setSegmentIdentifier(separator === '\n' ? '\\n' : separator || '\\n') setMax(res.rules.segmentation.max_tokens) setRules(res.rules.pre_processing_rules) setDefaultConfig(res.rules) } catch (err) { console.log(err) } } const getRulesFromDetail = () => { if (documentDetail) { const rules = documentDetail.dataset_process_rule.rules const separator = rules.segmentation.separator const max = rules.segmentation.max_tokens setSegmentIdentifier(separator === '\n' ? '\\n' : separator || '\\n') setMax(max) setRules(rules.pre_processing_rules) setDefaultConfig(rules) } } const getDefaultMode = () => { if (documentDetail) setSegmentationType(documentDetail.dataset_process_rule.mode) } const createHandle = async () => { try { let res const params = getCreationParams() if (!datasetId) { res = await createFirstDocument({ body: params, }) updateIndexingTypeCache && updateIndexingTypeCache(indexType) updateResultCache && updateResultCache(res) } else { res = await createDocument({ datasetId, body: params, }) updateIndexingTypeCache && updateIndexingTypeCache(indexType) updateResultCache && updateResultCache({ document: res, }) } onStepChange && onStepChange(+1) isSetting && onSave && onSave() } catch (err) { Toast.notify({ type: 'error', message: `${err}`, }) } } useEffect(() => { // fetch rules if (!isSetting) { getRules() } else { getRulesFromDetail() getDefaultMode() } }, []) useEffect(() => { scrollRef.current?.addEventListener('scroll', scrollHandle) return () => { scrollRef.current?.removeEventListener('scroll', scrollHandle) } }, []) useLayoutEffect(() => { if (showPreview) { previewScrollRef.current?.addEventListener('scroll', previewScrollHandle) return () => { previewScrollRef.current?.removeEventListener('scroll', previewScrollHandle) } } }, [showPreview]) useEffect(() => { // get indexing type by props if (indexingType) setIndexType(indexingType as IndexingType) else setIndexType(hasSetAPIKEY ? IndexingType.QUALIFIED : IndexingType.ECONOMICAL) }, [hasSetAPIKEY, indexingType, datasetId]) useEffect(() => { if (segmentationType === SegmentType.AUTO) { setAutomaticFileIndexingEstimate(null) setShowPreview() fetchFileIndexingEstimate() } else { hidePreview() setCustomFileIndexingEstimate(null) } }, [segmentationType, indexType]) return (
{t('datasetCreation.steps.two')}
{t('datasetCreation.stepTwo.segmentation')}
setSegmentationType(SegmentType.AUTO)} >
{t('datasetCreation.stepTwo.auto')}
{t('datasetCreation.stepTwo.autoDescription')}
setSegmentationType(SegmentType.CUSTOM)} >
{t('datasetCreation.stepTwo.custom')}
{t('datasetCreation.stepTwo.customDescription')}
{segmentationType === SegmentType.CUSTOM && (
{t('datasetCreation.stepTwo.separator')}
setSegmentIdentifier(e.target.value)} />
{t('datasetCreation.stepTwo.maxLength')}
setMax(Number(e.target.value))} />
{t('datasetCreation.stepTwo.rules')}
{rules.map(rule => (
ruleChangeHandle(rule.id)} className="w-4 h-4 rounded border-gray-300 text-blue-700 focus:ring-blue-700" />
))}
)}
{t('datasetCreation.stepTwo.indexMode')}
{(!hasSetIndexType || (hasSetIndexType && indexingType === IndexingType.QUALIFIED)) && (
{ if (hasSetAPIKEY) setIndexType(IndexingType.QUALIFIED) }} > {!hasSetIndexType && }
{t('datasetCreation.stepTwo.qualified')} {!hasSetIndexType && {t('datasetCreation.stepTwo.recommend')}}
{t('datasetCreation.stepTwo.qualifiedTip')}
{t('datasetCreation.stepTwo.emstimateCost')}
{ fileIndexingEstimate ? (
{formatNumber(fileIndexingEstimate.tokens)} tokens(${formatNumber(fileIndexingEstimate.total_price)})
) : (
{t('datasetCreation.stepTwo.calculating')}
) }
{!hasSetAPIKEY && (
{t('datasetCreation.stepTwo.warning')}  {t('datasetCreation.stepTwo.click')}
)}
)} {(!hasSetIndexType || (hasSetIndexType && indexingType === IndexingType.ECONOMICAL)) && (
!hasSetIndexType && setIndexType(IndexingType.ECONOMICAL)} > {!hasSetIndexType && }
{t('datasetCreation.stepTwo.economical')}
{t('datasetCreation.stepTwo.economicalTip')}
{t('datasetCreation.stepTwo.emstimateCost')}
0 tokens
)}
{hasSetIndexType && (
{t('datasetCreation.stepTwo.indexSettedTip')} {t('datasetCreation.stepTwo.datasetSettingLink')}
)}
{t('datasetCreation.stepTwo.fileName')}
{getFileName(file?.name || '')}
{t('datasetCreation.stepTwo.emstimateSegment')}
{ fileIndexingEstimate ? (
{formatNumber(fileIndexingEstimate.total_segments)}
) : (
{t('datasetCreation.stepTwo.calculating')}
) }
{!isSetting ? (
) : (
)}
{(showPreview) ? (
{t('datasetCreation.stepTwo.previewTitle')}
{fileIndexingEstimate?.preview ? ( <> {fileIndexingEstimate?.preview.map((item, index) => ( ))} ) :
}
) : (
{t('datasetCreation.stepTwo.sideTipTitle')}

{t('datasetCreation.stepTwo.sideTipP1')}

{t('datasetCreation.stepTwo.sideTipP2')}

{t('datasetCreation.stepTwo.sideTipP3')}

{t('datasetCreation.stepTwo.sideTipP4')}

)}
) } export default StepTwo