index.tsx 14 KB


  1. import type { FC } from 'react'
  2. import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react'
  3. import useSWR from 'swr'
  4. import { useRouter } from 'next/navigation'
  5. import { useTranslation } from 'react-i18next'
  6. import { omit } from 'lodash-es'
  7. import { ArrowRightIcon } from '@heroicons/react/24/solid'
  8. import {
  9. RiCheckboxCircleFill,
  10. RiErrorWarningFill,
  11. RiLoader2Fill,
  12. RiTerminalBoxLine,
  13. } from '@remixicon/react'
  14. import Image from 'next/image'
  15. import { indexMethodIcon, retrievalIcon } from '../icons'
  16. import { IndexingType } from '../step-two'
  17. import DocumentFileIcon from '../../common/document-file-icon'
  18. import cn from '@/utils/classnames'
  19. import { FieldInfo } from '@/app/components/datasets/documents/detail/metadata'
  20. import Button from '@/app/components/base/button'
  21. import type { FullDocumentDetail, IndexingStatusResponse, ProcessRuleResponse } from '@/models/datasets'
  22. import { fetchIndexingStatusBatch as doFetchIndexingStatus, fetchProcessRule } from '@/service/datasets'
  23. import { DataSourceType, ProcessMode } from '@/models/datasets'
  24. import NotionIcon from '@/app/components/base/notion-icon'
  25. import PriorityLabel from '@/app/components/billing/priority-label'
  26. import { Plan } from '@/app/components/billing/type'
  27. import { ZapFast } from '@/app/components/base/icons/src/vender/solid/general'
  28. import UpgradeBtn from '@/app/components/billing/upgrade-btn'
  29. import { useProviderContext } from '@/context/provider-context'
  30. import { sleep } from '@/utils'
  31. import { RETRIEVE_METHOD } from '@/types/app'
  32. import Tooltip from '@/app/components/base/tooltip'
  33. import { useInvalidDocumentList } from '@/service/knowledge/use-document'
  34. type Props = {
  35. datasetId: string
  36. batchId: string
  37. documents?: FullDocumentDetail[]
  38. indexingType?: string
  39. retrievalMethod?: string
  40. }
  41. const RuleDetail: FC<{
  42. sourceData?: ProcessRuleResponse
  43. indexingType?: string
  44. retrievalMethod?: string
  45. }> = ({ sourceData, indexingType, retrievalMethod }) => {
  46. const { t } = useTranslation()
  47. const segmentationRuleMap = {
  48. mode: t('datasetDocuments.embedding.mode'),
  49. segmentLength: t('datasetDocuments.embedding.segmentLength'),
  50. textCleaning: t('datasetDocuments.embedding.textCleaning'),
  51. }
  52. const getRuleName = (key: string) => {
  53. if (key === 'remove_extra_spaces')
  54. return t('datasetCreation.stepTwo.removeExtraSpaces')
  55. if (key === 'remove_urls_emails')
  56. return t('datasetCreation.stepTwo.removeUrlEmails')
  57. if (key === 'remove_stopwords')
  58. return t('datasetCreation.stepTwo.removeStopwords')
  59. }
  60. const isNumber = (value: unknown) => {
  61. return typeof value === 'number'
  62. }
  63. const getValue = useCallback((field: string) => {
  64. let value: string | number | undefined = '-'
  65. const maxTokens = isNumber(sourceData?.rules?.segmentation?.max_tokens)
  66. ? sourceData.rules.segmentation.max_tokens
  67. : value
  68. const childMaxTokens = isNumber(sourceData?.rules?.subchunk_segmentation?.max_tokens)
  69. ? sourceData.rules.subchunk_segmentation.max_tokens
  70. : value
  71. switch (field) {
  72. case 'mode':
  73. value = !sourceData?.mode
  74. ? value
  75. : sourceData.mode === ProcessMode.general
  76. ? (t('datasetDocuments.embedding.custom') as string)
  77. : `${t('datasetDocuments.embedding.hierarchical')} · ${sourceData?.rules?.parent_mode === 'paragraph'
  78. ? t('dataset.parentMode.paragraph')
  79. : t('dataset.parentMode.fullDoc')}`
  80. break
  81. case 'segmentLength':
  82. value = !sourceData?.mode
  83. ? value
  84. : sourceData.mode === ProcessMode.general
  85. ? maxTokens
  86. : `${t('datasetDocuments.embedding.parentMaxTokens')} ${maxTokens}; ${t('datasetDocuments.embedding.childMaxTokens')} ${childMaxTokens}`
  87. break
  88. default:
  89. value = !sourceData?.mode
  90. ? value
  91. : sourceData?.rules?.pre_processing_rules?.filter(rule =>
  92. rule.enabled).map(rule => getRuleName(rule.id)).join(',')
  93. break
  94. }
  95. return value
  96. // eslint-disable-next-line react-hooks/exhaustive-deps
  97. }, [sourceData])
  98. return <div className='flex flex-col gap-1'>
  99. {Object.keys(segmentationRuleMap).map((field) => {
  100. return <FieldInfo
  101. key={field}
  102. label={segmentationRuleMap[field as keyof typeof segmentationRuleMap]}
  103. displayedValue={String(getValue(field))}
  104. />
  105. })}
  106. <FieldInfo
  107. label={t('datasetCreation.stepTwo.indexMode')}
  108. displayedValue={t(`datasetCreation.stepTwo.${indexingType === IndexingType.ECONOMICAL ? 'economical' : 'qualified'}`) as string}
  109. valueIcon={
  110. <Image
  111. className='size-4'
  112. src={
  113. indexingType === IndexingType.ECONOMICAL
  114. ? indexMethodIcon.economical
  115. : indexMethodIcon.high_quality
  116. }
  117. alt=''
  118. />
  119. }
  120. />
  121. <FieldInfo
  122. label={t('datasetSettings.form.retrievalSetting.title')}
  123. // displayedValue={t(`datasetSettings.form.retrievalSetting.${retrievalMethod}`) as string}
  124. displayedValue={t(`dataset.retrieval.${indexingType === IndexingType.ECONOMICAL ? 'invertedIndex' : retrievalMethod}.title`) as string}
  125. valueIcon={
  126. <Image
  127. className='size-4'
  128. src={
  129. retrievalMethod === RETRIEVE_METHOD.fullText
  130. ? retrievalIcon.fullText
  131. : retrievalMethod === RETRIEVE_METHOD.hybrid
  132. ? retrievalIcon.hybrid
  133. : retrievalIcon.vector
  134. }
  135. alt=''
  136. />
  137. }
  138. />
  139. </div>
  140. }
  141. const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], indexingType, retrievalMethod }) => {
  142. const { t } = useTranslation()
  143. const { enableBilling, plan } = useProviderContext()
  144. const getFirstDocument = documents[0]
  145. const [indexingStatusBatchDetail, setIndexingStatusDetail] = useState<IndexingStatusResponse[]>([])
  146. const fetchIndexingStatus = async () => {
  147. const status = await doFetchIndexingStatus({ datasetId, batchId })
  148. setIndexingStatusDetail(status.data)
  149. return status.data
  150. }
  151. const [isStopQuery, setIsStopQuery] = useState(false)
  152. const isStopQueryRef = useRef(isStopQuery)
  153. useEffect(() => {
  154. isStopQueryRef.current = isStopQuery
  155. }, [isStopQuery])
  156. const stopQueryStatus = () => {
  157. setIsStopQuery(true)
  158. }
  159. const startQueryStatus = async () => {
  160. if (isStopQueryRef.current)
  161. return
  162. try {
  163. const indexingStatusBatchDetail = await fetchIndexingStatus()
  164. const isCompleted = indexingStatusBatchDetail.every(indexingStatusDetail => ['completed', 'error', 'paused'].includes(indexingStatusDetail.indexing_status))
  165. if (isCompleted) {
  166. stopQueryStatus()
  167. return
  168. }
  169. await sleep(2500)
  170. await startQueryStatus()
  171. }
  172. catch (e) {
  173. await sleep(2500)
  174. await startQueryStatus()
  175. }
  176. }
  177. useEffect(() => {
  178. setIsStopQuery(false)
  179. startQueryStatus()
  180. return () => {
  181. stopQueryStatus()
  182. }
  183. // eslint-disable-next-line react-hooks/exhaustive-deps
  184. }, [])
  185. // get rule
  186. const { data: ruleDetail } = useSWR({
  187. action: 'fetchProcessRule',
  188. params: { documentId: getFirstDocument.id },
  189. }, apiParams => fetchProcessRule(omit(apiParams, 'action')), {
  190. revalidateOnFocus: false,
  191. })
  192. const router = useRouter()
  193. const invalidDocumentList = useInvalidDocumentList()
  194. const navToDocumentList = () => {
  195. invalidDocumentList()
  196. router.push(`/datasets/${datasetId}/documents`)
  197. }
  198. const navToApiDocs = () => {
  199. router.push('/datasets?category=api')
  200. }
  201. const isEmbedding = useMemo(() => {
  202. return indexingStatusBatchDetail.some(indexingStatusDetail => ['indexing', 'splitting', 'parsing', 'cleaning'].includes(indexingStatusDetail?.indexing_status || ''))
  203. }, [indexingStatusBatchDetail])
  204. const isEmbeddingCompleted = useMemo(() => {
  205. return indexingStatusBatchDetail.every(indexingStatusDetail => ['completed', 'error', 'paused'].includes(indexingStatusDetail?.indexing_status || ''))
  206. }, [indexingStatusBatchDetail])
  207. const getSourceName = (id: string) => {
  208. const doc = documents.find(document => document.id === id)
  209. return doc?.name
  210. }
  211. const getFileType = (name?: string) => name?.split('.').pop() || 'txt'
  212. const getSourcePercent = (detail: IndexingStatusResponse) => {
  213. const completedCount = detail.completed_segments || 0
  214. const totalCount = detail.total_segments || 0
  215. if (totalCount === 0)
  216. return 0
  217. const percent = Math.round(completedCount * 100 / totalCount)
  218. return percent > 100 ? 100 : percent
  219. }
  220. const getSourceType = (id: string) => {
  221. const doc = documents.find(document => document.id === id)
  222. return doc?.data_source_type as DataSourceType
  223. }
  224. const getIcon = (id: string) => {
  225. const doc = documents.find(document => document.id === id)
  226. return doc?.data_source_info.notion_page_icon
  227. }
  228. const isSourceEmbedding = (detail: IndexingStatusResponse) =>
  229. ['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'].includes(detail.indexing_status || '')
  230. return (
  231. <>
  232. <div className="h-5 flex items-center mb-3">
  233. <div className="flex items-center justify-between text-gray-900 font-medium text-sm mr-2">
  234. {isEmbedding && <div className='flex items-center'>
  235. <RiLoader2Fill className='size-4 mr-1 animate-spin' />
  236. {t('datasetDocuments.embedding.processing')}
  237. </div>}
  238. {isEmbeddingCompleted && t('datasetDocuments.embedding.completed')}
  239. </div>
  240. </div>
  241. {
  242. enableBilling && plan.type !== Plan.team && (
  243. <div className='flex items-center mb-3 p-3 h-14 bg-white border-[0.5px] border-black/5 shadow-md rounded-xl'>
  244. <div className='shrink-0 flex items-center justify-center w-8 h-8 bg-[#FFF6ED] rounded-lg'>
  245. <ZapFast className='w-4 h-4 text-[#FB6514]' />
  246. </div>
  247. <div className='grow mx-3 text-[13px] font-medium text-gray-700'>
  248. {t('billing.plansCommon.documentProcessingPriorityUpgrade')}
  249. </div>
  250. <UpgradeBtn loc='knowledge-speed-up' />
  251. </div>
  252. )
  253. }
  254. <div className="flex flex-col gap-0.5 pb-2">
  255. {indexingStatusBatchDetail.map(indexingStatusDetail => (
  256. <div key={indexingStatusDetail.id} className={cn(
  257. 'relative h-[26px] bg-components-progress-bar-bg rounded-md overflow-hidden',
  258. indexingStatusDetail.indexing_status === 'error' && 'bg-state-destructive-hover-alt',
  259. // indexingStatusDetail.indexing_status === 'completed' && 's.success',
  260. )}>
  261. {isSourceEmbedding(indexingStatusDetail) && (
  262. <div className="absolute top-0 left-0 h-full min-w-0.5 bg-components-progress-bar-progress border-r-[2px] border-r-components-progress-bar-progress-highlight" style={{ width: `${getSourcePercent(indexingStatusDetail)}%` }} />
  263. )}
  264. <div className="flex gap-1 pl-[6px] pr-2 h-full items-center z-[1]">
  265. {getSourceType(indexingStatusDetail.id) === DataSourceType.FILE && (
  266. // <div className={cn(
  267. // 'shrink-0 marker:size-4 bg-center bg-no-repeat bg-contain',
  268. // s[getFileType(getSourceName(indexingStatusDetail.id))] || s.unknownFileIcon,
  269. // )} />
  270. <DocumentFileIcon
  271. className="shrink-0 size-4"
  272. name={getSourceName(indexingStatusDetail.id)}
  273. extension={getFileType(getSourceName(indexingStatusDetail.id))}
  274. />
  275. )}
  276. {getSourceType(indexingStatusDetail.id) === DataSourceType.NOTION && (
  277. <NotionIcon
  278. className='shrink-0'
  279. type='page'
  280. src={getIcon(indexingStatusDetail.id)}
  281. />
  282. )}
  283. <div className="grow flex items-center gap-1 w-0" title={getSourceName(indexingStatusDetail.id)}>
  284. <div className="text-xs truncate">
  285. {getSourceName(indexingStatusDetail.id)}
  286. </div>
  287. {
  288. enableBilling && (
  289. <PriorityLabel className='ml-0' />
  290. )
  291. }
  292. </div>
  293. {isSourceEmbedding(indexingStatusDetail) && (
  294. <div className="shrink-0 text-xs">{`${getSourcePercent(indexingStatusDetail)}%`}</div>
  295. )}
  296. {indexingStatusDetail.indexing_status === 'error' && (
  297. <Tooltip
  298. popupClassName='px-4 py-[14px] max-w-60 text-sm leading-4 text-text-secondary border-[0.5px] border-components-panel-border rounded-xl'
  299. offset={4}
  300. popupContent={indexingStatusDetail.error}
  301. >
  302. <span>
  303. <RiErrorWarningFill className='shrink-0 size-4 text-text-destructive' />
  304. </span>
  305. </Tooltip>
  306. )}
  307. {indexingStatusDetail.indexing_status === 'completed' && (
  308. <RiCheckboxCircleFill className='shrink-0 size-4 text-text-success' />
  309. )}
  310. </div>
  311. </div>
  312. ))}
  313. </div>
  314. <hr className="my-3 h-[1px] bg-divider-subtle border-0" />
  315. <RuleDetail
  316. sourceData={ruleDetail}
  317. indexingType={indexingType}
  318. retrievalMethod={retrievalMethod}
  319. />
  320. <div className='flex items-center gap-2 my-10'>
  321. <Button className='w-fit' onClick={navToApiDocs}>
  322. <RiTerminalBoxLine className='size-4 mr-2' />
  323. <span>Access the API</span>
  324. </Button>
  325. <Button className='w-fit' variant='primary' onClick={navToDocumentList}>
  326. <span>{t('datasetCreation.stepThree.navTo')}</span>
  327. <ArrowRightIcon className='size-4 ml-2 stroke-current stroke-1' />
  328. </Button>
  329. </div>
  330. </>
  331. )
  332. }
  333. export default EmbeddingProcess