datasets.ts 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. import type { AppMode } from './app'
  2. import type { DataSourceNotionPage } from './common'
  3. import type { RetrievalConfig } from '@/types/app'
  4. export enum DataSourceType {
  5. FILE = 'upload_file',
  6. NOTION = 'notion_import',
  7. WEB = 'web_import',
  8. }
  9. export type DataSet = {
  10. id: string
  11. name: string
  12. icon: string
  13. icon_background: string
  14. description: string
  15. permission: 'only_me' | 'all_team_members'
  16. data_source_type: DataSourceType
  17. indexing_technique: 'high_quality' | 'economy'
  18. created_by: string
  19. updated_by: string
  20. updated_at: number
  21. app_count: number
  22. document_count: number
  23. word_count: number
  24. embedding_model: string
  25. embedding_model_provider: string
  26. embedding_available: boolean
  27. retrieval_model_dict: RetrievalConfig
  28. retrieval_model: RetrievalConfig
  29. }
  30. export type CustomFile = File & {
  31. id?: string
  32. extension?: string
  33. mime_type?: string
  34. created_by?: string
  35. created_at?: number
  36. }
  37. export type FileItem = {
  38. fileID: string
  39. file: CustomFile
  40. progress: number
  41. }
  42. export type DataSetListResponse = {
  43. data: DataSet[]
  44. has_more: boolean
  45. limit: number
  46. page: number
  47. total: number
  48. }
  49. export type QA = {
  50. question: string
  51. answer: string
  52. }
  53. export type IndexingEstimateResponse = {
  54. tokens: number
  55. total_price: number
  56. currency: string
  57. total_segments: number
  58. preview: string[]
  59. qa_preview?: QA[]
  60. }
  61. export type FileIndexingEstimateResponse = {
  62. total_nodes: number
  63. } & IndexingEstimateResponse
  64. export type IndexingStatusResponse = {
  65. id: string
  66. indexing_status: DocumentIndexingStatus
  67. processing_started_at: number
  68. parsing_completed_at: number
  69. cleaning_completed_at: number
  70. splitting_completed_at: number
  71. completed_at: any
  72. paused_at: any
  73. error: any
  74. stopped_at: any
  75. completed_segments: number
  76. total_segments: number
  77. }
  78. export type IndexingStatusBatchResponse = {
  79. data: IndexingStatusResponse[]
  80. }
  81. export type ProcessMode = 'automatic' | 'custom'
  82. export type ProcessRuleResponse = {
  83. mode: ProcessMode
  84. rules: Rules
  85. }
  86. export type Rules = {
  87. pre_processing_rules: PreProcessingRule[]
  88. segmentation: Segmentation
  89. }
  90. export type PreProcessingRule = {
  91. id: string
  92. enabled: boolean
  93. }
  94. export type Segmentation = {
  95. separator: string
  96. max_tokens: number
  97. chunk_overlap: number
  98. }
  99. export const DocumentIndexingStatusList = [
  100. 'waiting',
  101. 'parsing',
  102. 'cleaning',
  103. 'splitting',
  104. 'indexing',
  105. 'paused',
  106. 'error',
  107. 'completed',
  108. ] as const
  109. export type DocumentIndexingStatus = typeof DocumentIndexingStatusList[number]
  110. export const DisplayStatusList = [
  111. 'queuing',
  112. 'indexing',
  113. 'paused',
  114. 'error',
  115. 'available',
  116. 'enabled',
  117. 'disabled',
  118. 'archived',
  119. ] as const
  120. export type DocumentDisplayStatus = typeof DisplayStatusList[number]
  121. export type DataSourceInfo = {
  122. upload_file: {
  123. id: string
  124. name: string
  125. size: number
  126. mime_type: string
  127. created_at: number
  128. created_by: string
  129. extension: string
  130. }
  131. notion_page_icon?: string
  132. }
  133. export type InitialDocumentDetail = {
  134. id: string
  135. batch: string
  136. position: number
  137. dataset_id: string
  138. data_source_type: DataSourceType
  139. data_source_info: DataSourceInfo
  140. dataset_process_rule_id: string
  141. name: string
  142. created_from: 'api' | 'web'
  143. created_by: string
  144. created_at: number
  145. indexing_status: DocumentIndexingStatus
  146. display_status: DocumentDisplayStatus
  147. completed_segments?: number
  148. total_segments?: number
  149. doc_form: 'text_model' | 'qa_model'
  150. }
  151. export type SimpleDocumentDetail = InitialDocumentDetail & {
  152. enabled: boolean
  153. word_count: number
  154. error?: string | null
  155. archived: boolean
  156. updated_at: number
  157. hit_count: number
  158. dataset_process_rule_id?: string
  159. }
  160. export type DocumentListResponse = {
  161. data: SimpleDocumentDetail[]
  162. has_more: boolean
  163. total: number
  164. page: number
  165. limit: number
  166. }
  167. export type DocumentReq = {
  168. original_document_id?: string
  169. indexing_technique?: string
  170. doc_form: 'text_model' | 'qa_model'
  171. doc_language: string
  172. process_rule: ProcessRule
  173. }
  174. export type CreateDocumentReq = DocumentReq & {
  175. data_source: DataSource
  176. retrieval_model: RetrievalConfig
  177. }
  178. export type IndexingEstimateParams = DocumentReq & Partial<DataSource> & {
  179. dataset_id: string
  180. }
  181. export type DataSource = {
  182. type: DataSourceType
  183. info_list: {
  184. data_source_type: DataSourceType
  185. notion_info_list?: NotionInfo[]
  186. file_info_list?: {
  187. file_ids: string[]
  188. }
  189. }
  190. }
  191. export type NotionInfo = {
  192. workspace_id: string
  193. pages: DataSourceNotionPage[]
  194. }
  195. export type NotionPage = {
  196. page_id: string
  197. type: string
  198. }
  199. export type ProcessRule = {
  200. mode: string
  201. rules: Rules
  202. }
  203. export type createDocumentResponse = {
  204. dataset?: DataSet
  205. batch: string
  206. documents: InitialDocumentDetail[]
  207. }
  208. export type FullDocumentDetail = SimpleDocumentDetail & {
  209. batch: string
  210. created_api_request_id: string
  211. processing_started_at: number
  212. parsing_completed_at: number
  213. cleaning_completed_at: number
  214. splitting_completed_at: number
  215. tokens: number
  216. indexing_latency: number
  217. completed_at: number
  218. paused_by: string
  219. paused_at: number
  220. stopped_at: number
  221. indexing_status: string
  222. disabled_at: number
  223. disabled_by: string
  224. archived_reason: 'rule_modified' | 're_upload'
  225. archived_by: string
  226. archived_at: number
  227. doc_type?: DocType | null | 'others'
  228. doc_metadata?: DocMetadata | null
  229. segment_count: number
  230. [key: string]: any
  231. }
  232. export type DocMetadata = {
  233. title: string
  234. language: string
  235. author: string
  236. publisher: string
  237. publicationDate: string
  238. ISBN: string
  239. category: string
  240. [key: string]: string
  241. }
  242. export const CUSTOMIZABLE_DOC_TYPES = [
  243. 'book',
  244. 'web_page',
  245. 'paper',
  246. 'social_media_post',
  247. 'personal_document',
  248. 'business_document',
  249. 'im_chat_log',
  250. ] as const
  251. export const FIXED_DOC_TYPES = ['synced_from_github', 'synced_from_notion', 'wikipedia_entry'] as const
  252. export type CustomizableDocType = typeof CUSTOMIZABLE_DOC_TYPES[number]
  253. export type FixedDocType = typeof FIXED_DOC_TYPES[number]
  254. export type DocType = CustomizableDocType | FixedDocType
  255. export type DocumentDetailResponse = FullDocumentDetail
  256. export const SEGMENT_STATUS_LIST = ['waiting', 'completed', 'error', 'indexing']
  257. export type SegmentStatus = typeof SEGMENT_STATUS_LIST[number]
  258. export type SegmentsQuery = {
  259. last_id?: string
  260. limit: number
  261. // status?: SegmentStatus
  262. hit_count_gte?: number
  263. keyword?: string
  264. enabled?: boolean
  265. }
  266. export type SegmentDetailModel = {
  267. id: string
  268. position: number
  269. document_id: string
  270. content: string
  271. word_count: number
  272. tokens: number
  273. keywords: string[]
  274. index_node_id: string
  275. index_node_hash: string
  276. hit_count: number
  277. enabled: boolean
  278. disabled_at: number
  279. disabled_by: string
  280. status: SegmentStatus
  281. created_by: string
  282. created_at: number
  283. indexing_at: number
  284. completed_at: number
  285. error: string | null
  286. stopped_at: number
  287. answer?: string
  288. }
  289. export type SegmentsResponse = {
  290. data: SegmentDetailModel[]
  291. has_more: boolean
  292. limit: number
  293. total: number
  294. }
  295. export type HitTestingRecord = {
  296. id: string
  297. content: string
  298. source: 'app' | 'hit_testing' | 'plugin'
  299. source_app_id: string
  300. created_by_role: 'account' | 'end_user'
  301. created_by: string
  302. created_at: number
  303. }
  304. export type HitTesting = {
  305. segment: Segment
  306. score: number
  307. tsne_position: TsnePosition
  308. }
  309. export type Segment = {
  310. id: string
  311. document: Document
  312. content: string
  313. position: number
  314. word_count: number
  315. tokens: number
  316. keywords: string[]
  317. hit_count: number
  318. index_node_hash: string
  319. }
  320. export type Document = {
  321. id: string
  322. data_source_type: string
  323. name: string
  324. doc_type: DocType
  325. }
  326. export type HitTestingRecordsResponse = {
  327. data: HitTestingRecord[]
  328. has_more: boolean
  329. limit: number
  330. total: number
  331. page: number
  332. }
  333. export type TsnePosition = {
  334. x: number
  335. y: number
  336. }
  337. export type HitTestingResponse = {
  338. query: {
  339. content: string
  340. tsne_position: TsnePosition
  341. }
  342. records: Array<HitTesting>
  343. }
  344. export type RelatedApp = {
  345. id: string
  346. name: string
  347. mode: AppMode
  348. icon: string
  349. icon_background: string
  350. }
  351. export type RelatedAppResponse = {
  352. data: Array<RelatedApp>
  353. total: number
  354. }
  355. export type SegmentUpdator = {
  356. content: string
  357. answer?: string
  358. keywords?: string[]
  359. }
  360. export enum DocForm {
  361. TEXT = 'text_model',
  362. QA = 'qa_model',
  363. }