index.tsx 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. 'use client'
  2. import React, { useCallback, useEffect, useState } from 'react'
  3. import { useTranslation } from 'react-i18next'
  4. import AppUnavailable from '../../base/app-unavailable'
  5. import { ModelTypeEnum } from '../../header/account-setting/model-provider-page/declarations'
  6. import StepOne from './step-one'
  7. import StepTwo from './step-two'
  8. import StepThree from './step-three'
  9. import { Topbar } from './top-bar'
  10. import { DataSourceType } from '@/models/datasets'
  11. import type { CrawlOptions, CrawlResultItem, DataSet, FileItem, createDocumentResponse } from '@/models/datasets'
  12. import { fetchDataSource } from '@/service/common'
  13. import { fetchDatasetDetail } from '@/service/datasets'
  14. import { DataSourceProvider, type NotionPage } from '@/models/common'
  15. import { useModalContext } from '@/context/modal-context'
  16. import { useDefaultModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
  17. type DatasetUpdateFormProps = {
  18. datasetId?: string
  19. }
  20. const DEFAULT_CRAWL_OPTIONS: CrawlOptions = {
  21. crawl_sub_pages: true,
  22. only_main_content: true,
  23. includes: '',
  24. excludes: '',
  25. limit: 10,
  26. max_depth: '',
  27. use_sitemap: true,
  28. }
  29. const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
  30. const { t } = useTranslation()
  31. const { setShowAccountSettingModal } = useModalContext()
  32. const [hasConnection, setHasConnection] = useState(true)
  33. const [dataSourceType, setDataSourceType] = useState<DataSourceType>(DataSourceType.FILE)
  34. const [step, setStep] = useState(1)
  35. const [indexingTypeCache, setIndexTypeCache] = useState('')
  36. const [retrievalMethodCache, setRetrievalMethodCache] = useState('')
  37. const [fileList, setFiles] = useState<FileItem[]>([])
  38. const [result, setResult] = useState<createDocumentResponse | undefined>()
  39. const [hasError, setHasError] = useState(false)
  40. const { data: embeddingsDefaultModel } = useDefaultModel(ModelTypeEnum.textEmbedding)
  41. const [notionPages, setNotionPages] = useState<NotionPage[]>([])
  42. const updateNotionPages = (value: NotionPage[]) => {
  43. setNotionPages(value)
  44. }
  45. const [websitePages, setWebsitePages] = useState<CrawlResultItem[]>([])
  46. const [crawlOptions, setCrawlOptions] = useState<CrawlOptions>(DEFAULT_CRAWL_OPTIONS)
  47. const updateFileList = (preparedFiles: FileItem[]) => {
  48. setFiles(preparedFiles)
  49. }
  50. const [websiteCrawlProvider, setWebsiteCrawlProvider] = useState<DataSourceProvider>(DataSourceProvider.fireCrawl)
  51. const [websiteCrawlJobId, setWebsiteCrawlJobId] = useState('')
  52. const updateFile = (fileItem: FileItem, progress: number, list: FileItem[]) => {
  53. const targetIndex = list.findIndex(file => file.fileID === fileItem.fileID)
  54. list[targetIndex] = {
  55. ...list[targetIndex],
  56. progress,
  57. }
  58. setFiles([...list])
  59. // use follow code would cause dirty list update problem
  60. // const newList = list.map((file) => {
  61. // if (file.fileID === fileItem.fileID) {
  62. // return {
  63. // ...fileItem,
  64. // progress,
  65. // }
  66. // }
  67. // return file
  68. // })
  69. // setFiles(newList)
  70. }
  71. const updateIndexingTypeCache = (type: string) => {
  72. setIndexTypeCache(type)
  73. }
  74. const updateResultCache = (res?: createDocumentResponse) => {
  75. setResult(res)
  76. }
  77. const updateRetrievalMethodCache = (method: string) => {
  78. setRetrievalMethodCache(method)
  79. }
  80. const nextStep = useCallback(() => {
  81. setStep(step + 1)
  82. }, [step, setStep])
  83. const changeStep = useCallback((delta: number) => {
  84. setStep(step + delta)
  85. }, [step, setStep])
  86. const checkNotionConnection = async () => {
  87. const { data } = await fetchDataSource({ url: '/data-source/integrates' })
  88. const hasConnection = data.filter(item => item.provider === 'notion') || []
  89. setHasConnection(hasConnection.length > 0)
  90. }
  91. useEffect(() => {
  92. checkNotionConnection()
  93. }, [])
  94. const [detail, setDetail] = useState<DataSet | null>(null)
  95. useEffect(() => {
  96. (async () => {
  97. if (datasetId) {
  98. try {
  99. const detail = await fetchDatasetDetail(datasetId)
  100. setDetail(detail)
  101. }
  102. catch (e) {
  103. setHasError(true)
  104. }
  105. }
  106. })()
  107. }, [datasetId])
  108. if (hasError)
  109. return <AppUnavailable code={500} unknownReason={t('datasetCreation.error.unavailable') as string} />
  110. return (
  111. <div className='flex flex-col bg-components-panel-bg' style={{ height: 'calc(100vh - 56px)' }}>
  112. <Topbar activeIndex={step - 1} />
  113. <div style={{ height: 'calc(100% - 52px)' }}>
  114. {step === 1 && <StepOne
  115. hasConnection={hasConnection}
  116. onSetting={() => setShowAccountSettingModal({ payload: 'data-source' })}
  117. datasetId={datasetId}
  118. dataSourceType={dataSourceType}
  119. dataSourceTypeDisable={!!detail?.data_source_type}
  120. changeType={setDataSourceType}
  121. files={fileList}
  122. updateFile={updateFile}
  123. updateFileList={updateFileList}
  124. notionPages={notionPages}
  125. updateNotionPages={updateNotionPages}
  126. onStepChange={nextStep}
  127. websitePages={websitePages}
  128. updateWebsitePages={setWebsitePages}
  129. onWebsiteCrawlProviderChange={setWebsiteCrawlProvider}
  130. onWebsiteCrawlJobIdChange={setWebsiteCrawlJobId}
  131. crawlOptions={crawlOptions}
  132. onCrawlOptionsChange={setCrawlOptions}
  133. />}
  134. {(step === 2 && (!datasetId || (datasetId && !!detail))) && <StepTwo
  135. isAPIKeySet={!!embeddingsDefaultModel}
  136. onSetting={() => setShowAccountSettingModal({ payload: 'provider' })}
  137. indexingType={detail?.indexing_technique}
  138. datasetId={datasetId}
  139. dataSourceType={dataSourceType}
  140. files={fileList.map(file => file.file)}
  141. notionPages={notionPages}
  142. websitePages={websitePages}
  143. websiteCrawlProvider={websiteCrawlProvider}
  144. websiteCrawlJobId={websiteCrawlJobId}
  145. onStepChange={changeStep}
  146. updateIndexingTypeCache={updateIndexingTypeCache}
  147. updateRetrievalMethodCache={updateRetrievalMethodCache}
  148. updateResultCache={updateResultCache}
  149. crawlOptions={crawlOptions}
  150. />}
  151. {step === 3 && <StepThree
  152. datasetId={datasetId}
  153. datasetName={detail?.name}
  154. indexingType={detail?.indexing_technique || indexingTypeCache}
  155. retrievalMethod={detail?.retrieval_model_dict?.search_method || retrievalMethodCache}
  156. creationCache={result}
  157. />}
  158. </div>
  159. </div>
  160. )
  161. }
  162. export default DatasetUpdateForm