datasets.ts 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
  1. import type { DataSourceNotionPage } from './common'
  2. import type { AppMode, RetrievalConfig } from '@/types/app'
  3. import type { Tag } from '@/app/components/base/tag-management/constant'
  4. export enum DataSourceType {
  5. FILE = 'upload_file',
  6. NOTION = 'notion_import',
  7. WEB = 'web_import',
  8. }
  9. export type DataSet = {
  10. id: string
  11. name: string
  12. icon: string
  13. icon_background: string
  14. description: string
  15. permission: 'only_me' | 'all_team_members'
  16. data_source_type: DataSourceType
  17. indexing_technique: 'high_quality' | 'economy'
  18. created_by: string
  19. updated_by: string
  20. updated_at: number
  21. app_count: number
  22. document_count: number
  23. word_count: number
  24. embedding_model: string
  25. embedding_model_provider: string
  26. embedding_available: boolean
  27. retrieval_model_dict: RetrievalConfig
  28. retrieval_model: RetrievalConfig
  29. tags: Tag[]
  30. }
  31. export type CustomFile = File & {
  32. id?: string
  33. extension?: string
  34. mime_type?: string
  35. created_by?: string
  36. created_at?: number
  37. }
  38. export type FileItem = {
  39. fileID: string
  40. file: CustomFile
  41. progress: number
  42. }
  43. export type DataSetListResponse = {
  44. data: DataSet[]
  45. has_more: boolean
  46. limit: number
  47. page: number
  48. total: number
  49. }
  50. export type QA = {
  51. question: string
  52. answer: string
  53. }
  54. export type IndexingEstimateResponse = {
  55. tokens: number
  56. total_price: number
  57. currency: string
  58. total_segments: number
  59. preview: string[]
  60. qa_preview?: QA[]
  61. }
  62. export type FileIndexingEstimateResponse = {
  63. total_nodes: number
  64. } & IndexingEstimateResponse
  65. export type IndexingStatusResponse = {
  66. id: string
  67. indexing_status: DocumentIndexingStatus
  68. processing_started_at: number
  69. parsing_completed_at: number
  70. cleaning_completed_at: number
  71. splitting_completed_at: number
  72. completed_at: any
  73. paused_at: any
  74. error: any
  75. stopped_at: any
  76. completed_segments: number
  77. total_segments: number
  78. }
  79. export type IndexingStatusBatchResponse = {
  80. data: IndexingStatusResponse[]
  81. }
  82. export type ProcessMode = 'automatic' | 'custom'
  83. export type ProcessRuleResponse = {
  84. mode: ProcessMode
  85. rules: Rules
  86. }
  87. export type Rules = {
  88. pre_processing_rules: PreProcessingRule[]
  89. segmentation: Segmentation
  90. }
  91. export type PreProcessingRule = {
  92. id: string
  93. enabled: boolean
  94. }
  95. export type Segmentation = {
  96. separator: string
  97. max_tokens: number
  98. chunk_overlap: number
  99. }
  100. export const DocumentIndexingStatusList = [
  101. 'waiting',
  102. 'parsing',
  103. 'cleaning',
  104. 'splitting',
  105. 'indexing',
  106. 'paused',
  107. 'error',
  108. 'completed',
  109. ] as const
  110. export type DocumentIndexingStatus = typeof DocumentIndexingStatusList[number]
  111. export const DisplayStatusList = [
  112. 'queuing',
  113. 'indexing',
  114. 'paused',
  115. 'error',
  116. 'available',
  117. 'enabled',
  118. 'disabled',
  119. 'archived',
  120. ] as const
  121. export type DocumentDisplayStatus = typeof DisplayStatusList[number]
  122. export type DataSourceInfo = {
  123. upload_file: {
  124. id: string
  125. name: string
  126. size: number
  127. mime_type: string
  128. created_at: number
  129. created_by: string
  130. extension: string
  131. }
  132. notion_page_icon?: string
  133. }
  134. export type InitialDocumentDetail = {
  135. id: string
  136. batch: string
  137. position: number
  138. dataset_id: string
  139. data_source_type: DataSourceType
  140. data_source_info: DataSourceInfo
  141. dataset_process_rule_id: string
  142. name: string
  143. created_from: 'api' | 'web'
  144. created_by: string
  145. created_at: number
  146. indexing_status: DocumentIndexingStatus
  147. display_status: DocumentDisplayStatus
  148. completed_segments?: number
  149. total_segments?: number
  150. doc_form: 'text_model' | 'qa_model'
  151. }
  152. export type SimpleDocumentDetail = InitialDocumentDetail & {
  153. enabled: boolean
  154. word_count: number
  155. error?: string | null
  156. archived: boolean
  157. updated_at: number
  158. hit_count: number
  159. dataset_process_rule_id?: string
  160. }
  161. export type DocumentListResponse = {
  162. data: SimpleDocumentDetail[]
  163. has_more: boolean
  164. total: number
  165. page: number
  166. limit: number
  167. }
  168. export type DocumentReq = {
  169. original_document_id?: string
  170. indexing_technique?: string
  171. doc_form: 'text_model' | 'qa_model'
  172. doc_language: string
  173. process_rule: ProcessRule
  174. }
  175. export type CreateDocumentReq = DocumentReq & {
  176. data_source: DataSource
  177. retrieval_model: RetrievalConfig
  178. }
  179. export type IndexingEstimateParams = DocumentReq & Partial<DataSource> & {
  180. dataset_id: string
  181. }
  182. export type DataSource = {
  183. type: DataSourceType
  184. info_list: {
  185. data_source_type: DataSourceType
  186. notion_info_list?: NotionInfo[]
  187. file_info_list?: {
  188. file_ids: string[]
  189. }
  190. }
  191. }
  192. export type NotionInfo = {
  193. workspace_id: string
  194. pages: DataSourceNotionPage[]
  195. }
  196. export type NotionPage = {
  197. page_id: string
  198. type: string
  199. }
  200. export type ProcessRule = {
  201. mode: string
  202. rules: Rules
  203. }
  204. export type createDocumentResponse = {
  205. dataset?: DataSet
  206. batch: string
  207. documents: InitialDocumentDetail[]
  208. }
  209. export type FullDocumentDetail = SimpleDocumentDetail & {
  210. batch: string
  211. created_api_request_id: string
  212. processing_started_at: number
  213. parsing_completed_at: number
  214. cleaning_completed_at: number
  215. splitting_completed_at: number
  216. tokens: number
  217. indexing_latency: number
  218. completed_at: number
  219. paused_by: string
  220. paused_at: number
  221. stopped_at: number
  222. indexing_status: string
  223. disabled_at: number
  224. disabled_by: string
  225. archived_reason: 'rule_modified' | 're_upload'
  226. archived_by: string
  227. archived_at: number
  228. doc_type?: DocType | null | 'others'
  229. doc_metadata?: DocMetadata | null
  230. segment_count: number
  231. [key: string]: any
  232. }
  233. export type DocMetadata = {
  234. title: string
  235. language: string
  236. author: string
  237. publisher: string
  238. publicationDate: string
  239. ISBN: string
  240. category: string
  241. [key: string]: string
  242. }
  243. export const CUSTOMIZABLE_DOC_TYPES = [
  244. 'book',
  245. 'web_page',
  246. 'paper',
  247. 'social_media_post',
  248. 'personal_document',
  249. 'business_document',
  250. 'im_chat_log',
  251. ] as const
  252. export const FIXED_DOC_TYPES = ['synced_from_github', 'synced_from_notion', 'wikipedia_entry'] as const
  253. export type CustomizableDocType = typeof CUSTOMIZABLE_DOC_TYPES[number]
  254. export type FixedDocType = typeof FIXED_DOC_TYPES[number]
  255. export type DocType = CustomizableDocType | FixedDocType
  256. export type DocumentDetailResponse = FullDocumentDetail
  257. export const SEGMENT_STATUS_LIST = ['waiting', 'completed', 'error', 'indexing']
  258. export type SegmentStatus = typeof SEGMENT_STATUS_LIST[number]
  259. export type SegmentsQuery = {
  260. last_id?: string
  261. limit: number
  262. // status?: SegmentStatus
  263. hit_count_gte?: number
  264. keyword?: string
  265. enabled?: boolean
  266. }
  267. export type SegmentDetailModel = {
  268. id: string
  269. position: number
  270. document_id: string
  271. content: string
  272. word_count: number
  273. tokens: number
  274. keywords: string[]
  275. index_node_id: string
  276. index_node_hash: string
  277. hit_count: number
  278. enabled: boolean
  279. disabled_at: number
  280. disabled_by: string
  281. status: SegmentStatus
  282. created_by: string
  283. created_at: number
  284. indexing_at: number
  285. completed_at: number
  286. error: string | null
  287. stopped_at: number
  288. answer?: string
  289. }
  290. export type SegmentsResponse = {
  291. data: SegmentDetailModel[]
  292. has_more: boolean
  293. limit: number
  294. total: number
  295. }
  296. export type HitTestingRecord = {
  297. id: string
  298. content: string
  299. source: 'app' | 'hit_testing' | 'plugin'
  300. source_app_id: string
  301. created_by_role: 'account' | 'end_user'
  302. created_by: string
  303. created_at: number
  304. }
  305. export type HitTesting = {
  306. segment: Segment
  307. score: number
  308. tsne_position: TsnePosition
  309. }
  310. export type Segment = {
  311. id: string
  312. document: Document
  313. content: string
  314. position: number
  315. word_count: number
  316. tokens: number
  317. keywords: string[]
  318. hit_count: number
  319. index_node_hash: string
  320. }
  321. export type Document = {
  322. id: string
  323. data_source_type: string
  324. name: string
  325. doc_type: DocType
  326. }
  327. export type HitTestingRecordsResponse = {
  328. data: HitTestingRecord[]
  329. has_more: boolean
  330. limit: number
  331. total: number
  332. page: number
  333. }
  334. export type TsnePosition = {
  335. x: number
  336. y: number
  337. }
  338. export type HitTestingResponse = {
  339. query: {
  340. content: string
  341. tsne_position: TsnePosition
  342. }
  343. records: Array<HitTesting>
  344. }
  345. export type RelatedApp = {
  346. id: string
  347. name: string
  348. mode: AppMode
  349. icon: string
  350. icon_background: string
  351. }
  352. export type RelatedAppResponse = {
  353. data: Array<RelatedApp>
  354. total: number
  355. }
  356. export type SegmentUpdator = {
  357. content: string
  358. answer?: string
  359. keywords?: string[]
  360. }
  361. export enum DocForm {
  362. TEXT = 'text_model',
  363. QA = 'qa_model',
  364. }
  365. export type ErrorDocsResponse = {
  366. data: IndexingStatusResponse[]
  367. total: number
  368. }