datasets.ts 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. import type { DataSourceNotionPage } from './common'
  2. import type { AppMode, RetrievalConfig } from '@/types/app'
  3. import type { Tag } from '@/app/components/base/tag-management/constant'
  4. export enum DataSourceType {
  5. FILE = 'upload_file',
  6. NOTION = 'notion_import',
  7. WEB = 'web_import',
  8. }
  9. export type DataSet = {
  10. id: string
  11. name: string
  12. icon: string
  13. icon_background: string
  14. description: string
  15. permission: 'only_me' | 'all_team_members'
  16. data_source_type: DataSourceType
  17. indexing_technique: 'high_quality' | 'economy'
  18. created_by: string
  19. updated_by: string
  20. updated_at: number
  21. app_count: number
  22. document_count: number
  23. word_count: number
  24. embedding_model: string
  25. embedding_model_provider: string
  26. embedding_available: boolean
  27. retrieval_model_dict: RetrievalConfig
  28. retrieval_model: RetrievalConfig
  29. tags: Tag[]
  30. }
  31. export type CustomFile = File & {
  32. id?: string
  33. extension?: string
  34. mime_type?: string
  35. created_by?: string
  36. created_at?: number
  37. }
  38. export type FileItem = {
  39. fileID: string
  40. file: CustomFile
  41. progress: number
  42. }
  43. export type DataSetListResponse = {
  44. data: DataSet[]
  45. has_more: boolean
  46. limit: number
  47. page: number
  48. total: number
  49. }
  50. export type QA = {
  51. question: string
  52. answer: string
  53. }
  54. export type IndexingEstimateResponse = {
  55. tokens: number
  56. total_price: number
  57. currency: string
  58. total_segments: number
  59. preview: string[]
  60. qa_preview?: QA[]
  61. }
  62. export type FileIndexingEstimateResponse = {
  63. total_nodes: number
  64. } & IndexingEstimateResponse
  65. export type IndexingStatusResponse = {
  66. id: string
  67. indexing_status: DocumentIndexingStatus
  68. processing_started_at: number
  69. parsing_completed_at: number
  70. cleaning_completed_at: number
  71. splitting_completed_at: number
  72. completed_at: any
  73. paused_at: any
  74. error: any
  75. stopped_at: any
  76. completed_segments: number
  77. total_segments: number
  78. }
  79. export type IndexingStatusBatchResponse = {
  80. data: IndexingStatusResponse[]
  81. }
  82. export type ProcessMode = 'automatic' | 'custom'
  83. export type ProcessRuleResponse = {
  84. mode: ProcessMode
  85. rules: Rules
  86. }
  87. export type Rules = {
  88. pre_processing_rules: PreProcessingRule[]
  89. segmentation: Segmentation
  90. }
  91. export type PreProcessingRule = {
  92. id: string
  93. enabled: boolean
  94. }
  95. export type Segmentation = {
  96. separator: string
  97. max_tokens: number
  98. chunk_overlap: number
  99. }
  100. export const DocumentIndexingStatusList = [
  101. 'waiting',
  102. 'parsing',
  103. 'cleaning',
  104. 'splitting',
  105. 'indexing',
  106. 'paused',
  107. 'error',
  108. 'completed',
  109. ] as const
  110. export type DocumentIndexingStatus = typeof DocumentIndexingStatusList[number]
  111. export const DisplayStatusList = [
  112. 'queuing',
  113. 'indexing',
  114. 'paused',
  115. 'error',
  116. 'available',
  117. 'enabled',
  118. 'disabled',
  119. 'archived',
  120. ] as const
  121. export type DocumentDisplayStatus = typeof DisplayStatusList[number]
  122. export type DataSourceInfo = {
  123. upload_file: {
  124. id: string
  125. name: string
  126. size: number
  127. mime_type: string
  128. created_at: number
  129. created_by: string
  130. extension: string
  131. }
  132. notion_page_icon?: string
  133. }
  134. export type InitialDocumentDetail = {
  135. id: string
  136. batch: string
  137. position: number
  138. dataset_id: string
  139. data_source_type: DataSourceType
  140. data_source_info: DataSourceInfo
  141. dataset_process_rule_id: string
  142. name: string
  143. created_from: 'api' | 'web'
  144. created_by: string
  145. created_at: number
  146. indexing_status: DocumentIndexingStatus
  147. display_status: DocumentDisplayStatus
  148. completed_segments?: number
  149. total_segments?: number
  150. doc_form: 'text_model' | 'qa_model'
  151. }
  152. export type SimpleDocumentDetail = InitialDocumentDetail & {
  153. enabled: boolean
  154. word_count: number
  155. error?: string | null
  156. archived: boolean
  157. updated_at: number
  158. hit_count: number
  159. dataset_process_rule_id?: string
  160. data_source_detail_dict?: {
  161. upload_file: {
  162. name: string
  163. extension: string
  164. }
  165. }
  166. }
  167. export type DocumentListResponse = {
  168. data: SimpleDocumentDetail[]
  169. has_more: boolean
  170. total: number
  171. page: number
  172. limit: number
  173. }
  174. export type DocumentReq = {
  175. original_document_id?: string
  176. indexing_technique?: string
  177. doc_form: 'text_model' | 'qa_model'
  178. doc_language: string
  179. process_rule: ProcessRule
  180. }
  181. export type CreateDocumentReq = DocumentReq & {
  182. data_source: DataSource
  183. retrieval_model: RetrievalConfig
  184. }
  185. export type IndexingEstimateParams = DocumentReq & Partial<DataSource> & {
  186. dataset_id: string
  187. }
  188. export type DataSource = {
  189. type: DataSourceType
  190. info_list: {
  191. data_source_type: DataSourceType
  192. notion_info_list?: NotionInfo[]
  193. file_info_list?: {
  194. file_ids: string[]
  195. }
  196. }
  197. }
  198. export type NotionInfo = {
  199. workspace_id: string
  200. pages: DataSourceNotionPage[]
  201. }
  202. export type NotionPage = {
  203. page_id: string
  204. type: string
  205. }
  206. export type ProcessRule = {
  207. mode: string
  208. rules: Rules
  209. }
  210. export type createDocumentResponse = {
  211. dataset?: DataSet
  212. batch: string
  213. documents: InitialDocumentDetail[]
  214. }
  215. export type FullDocumentDetail = SimpleDocumentDetail & {
  216. batch: string
  217. created_api_request_id: string
  218. processing_started_at: number
  219. parsing_completed_at: number
  220. cleaning_completed_at: number
  221. splitting_completed_at: number
  222. tokens: number
  223. indexing_latency: number
  224. completed_at: number
  225. paused_by: string
  226. paused_at: number
  227. stopped_at: number
  228. indexing_status: string
  229. disabled_at: number
  230. disabled_by: string
  231. archived_reason: 'rule_modified' | 're_upload'
  232. archived_by: string
  233. archived_at: number
  234. doc_type?: DocType | null | 'others'
  235. doc_metadata?: DocMetadata | null
  236. segment_count: number
  237. [key: string]: any
  238. }
  239. export type DocMetadata = {
  240. title: string
  241. language: string
  242. author: string
  243. publisher: string
  244. publicationDate: string
  245. ISBN: string
  246. category: string
  247. [key: string]: string
  248. }
  249. export const CUSTOMIZABLE_DOC_TYPES = [
  250. 'book',
  251. 'web_page',
  252. 'paper',
  253. 'social_media_post',
  254. 'personal_document',
  255. 'business_document',
  256. 'im_chat_log',
  257. ] as const
  258. export const FIXED_DOC_TYPES = ['synced_from_github', 'synced_from_notion', 'wikipedia_entry'] as const
  259. export type CustomizableDocType = typeof CUSTOMIZABLE_DOC_TYPES[number]
  260. export type FixedDocType = typeof FIXED_DOC_TYPES[number]
  261. export type DocType = CustomizableDocType | FixedDocType
  262. export type DocumentDetailResponse = FullDocumentDetail
  263. export const SEGMENT_STATUS_LIST = ['waiting', 'completed', 'error', 'indexing']
  264. export type SegmentStatus = typeof SEGMENT_STATUS_LIST[number]
  265. export type SegmentsQuery = {
  266. last_id?: string
  267. limit: number
  268. // status?: SegmentStatus
  269. hit_count_gte?: number
  270. keyword?: string
  271. enabled?: boolean
  272. }
  273. export type SegmentDetailModel = {
  274. id: string
  275. position: number
  276. document_id: string
  277. content: string
  278. word_count: number
  279. tokens: number
  280. keywords: string[]
  281. index_node_id: string
  282. index_node_hash: string
  283. hit_count: number
  284. enabled: boolean
  285. disabled_at: number
  286. disabled_by: string
  287. status: SegmentStatus
  288. created_by: string
  289. created_at: number
  290. indexing_at: number
  291. completed_at: number
  292. error: string | null
  293. stopped_at: number
  294. answer?: string
  295. }
  296. export type SegmentsResponse = {
  297. data: SegmentDetailModel[]
  298. has_more: boolean
  299. limit: number
  300. total: number
  301. }
  302. export type HitTestingRecord = {
  303. id: string
  304. content: string
  305. source: 'app' | 'hit_testing' | 'plugin'
  306. source_app_id: string
  307. created_by_role: 'account' | 'end_user'
  308. created_by: string
  309. created_at: number
  310. }
  311. export type HitTesting = {
  312. segment: Segment
  313. score: number
  314. tsne_position: TsnePosition
  315. }
  316. export type Segment = {
  317. id: string
  318. document: Document
  319. content: string
  320. position: number
  321. word_count: number
  322. tokens: number
  323. keywords: string[]
  324. hit_count: number
  325. index_node_hash: string
  326. }
  327. export type Document = {
  328. id: string
  329. data_source_type: string
  330. name: string
  331. doc_type: DocType
  332. }
  333. export type HitTestingRecordsResponse = {
  334. data: HitTestingRecord[]
  335. has_more: boolean
  336. limit: number
  337. total: number
  338. page: number
  339. }
  340. export type TsnePosition = {
  341. x: number
  342. y: number
  343. }
  344. export type HitTestingResponse = {
  345. query: {
  346. content: string
  347. tsne_position: TsnePosition
  348. }
  349. records: Array<HitTesting>
  350. }
  351. export type RelatedApp = {
  352. id: string
  353. name: string
  354. mode: AppMode
  355. icon: string
  356. icon_background: string
  357. }
  358. export type RelatedAppResponse = {
  359. data: Array<RelatedApp>
  360. total: number
  361. }
  362. export type SegmentUpdator = {
  363. content: string
  364. answer?: string
  365. keywords?: string[]
  366. }
  367. export enum DocForm {
  368. TEXT = 'text_model',
  369. QA = 'qa_model',
  370. }
  371. export type ErrorDocsResponse = {
  372. data: IndexingStatusResponse[]
  373. total: number
  374. }