datasets.ts 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. import type { AppMode } from './app'
  2. import type { DataSourceNotionPage } from './common'
  3. export enum DataSourceType {
  4. FILE = 'upload_file',
  5. NOTION = 'notion_import',
  6. WEB = 'web_import',
  7. }
  8. export type DataSet = {
  9. id: string
  10. name: string
  11. icon: string
  12. icon_background: string
  13. description: string
  14. permission: 'only_me' | 'all_team_members'
  15. data_source_type: DataSourceType
  16. indexing_technique: 'high_quality' | 'economy'
  17. created_by: string
  18. updated_by: string
  19. updated_at: number
  20. app_count: number
  21. document_count: number
  22. word_count: number
  23. embedding_model: string
  24. embedding_model_provider: string
  25. embedding_available: boolean
  26. }
  27. export type CustomFile = File & {
  28. id?: string
  29. extension?: string
  30. mime_type?: string
  31. created_by?: string
  32. created_at?: number
  33. }
  34. export type FileItem = {
  35. fileID: string
  36. file: CustomFile
  37. progress: number
  38. }
  39. export type DataSetListResponse = {
  40. data: DataSet[]
  41. has_more: boolean
  42. limit: number
  43. page: number
  44. total: number
  45. }
  46. export type QA = {
  47. question: string
  48. answer: string
  49. }
  50. export type IndexingEstimateResponse = {
  51. tokens: number
  52. total_price: number
  53. currency: string
  54. total_segments: number
  55. preview: string[]
  56. qa_preview?: QA[]
  57. }
  58. export type FileIndexingEstimateResponse = {
  59. total_nodes: number
  60. } & IndexingEstimateResponse
  61. export type IndexingStatusResponse = {
  62. id: string
  63. indexing_status: DocumentIndexingStatus
  64. processing_started_at: number
  65. parsing_completed_at: number
  66. cleaning_completed_at: number
  67. splitting_completed_at: number
  68. completed_at: any
  69. paused_at: any
  70. error: any
  71. stopped_at: any
  72. completed_segments: number
  73. total_segments: number
  74. }
  75. export type IndexingStatusBatchResponse = {
  76. data: IndexingStatusResponse[]
  77. }
  78. export type ProcessMode = 'automatic' | 'custom'
  79. export type ProcessRuleResponse = {
  80. mode: ProcessMode
  81. rules: Rules
  82. }
  83. export type Rules = {
  84. pre_processing_rules: PreProcessingRule[]
  85. segmentation: Segmentation
  86. }
  87. export type PreProcessingRule = {
  88. id: string
  89. enabled: boolean
  90. }
  91. export type Segmentation = {
  92. separator: string
  93. max_tokens: number
  94. }
  95. export const DocumentIndexingStatusList = [
  96. 'waiting',
  97. 'parsing',
  98. 'cleaning',
  99. 'splitting',
  100. 'indexing',
  101. 'paused',
  102. 'error',
  103. 'completed',
  104. ] as const
  105. export type DocumentIndexingStatus = typeof DocumentIndexingStatusList[number]
  106. export const DisplayStatusList = [
  107. 'queuing',
  108. 'indexing',
  109. 'paused',
  110. 'error',
  111. 'available',
  112. 'enabled',
  113. 'disabled',
  114. 'archived',
  115. ] as const
  116. export type DocumentDisplayStatus = typeof DisplayStatusList[number]
  117. export type DataSourceInfo = {
  118. upload_file: {
  119. id: string
  120. name: string
  121. size: number
  122. mime_type: string
  123. created_at: number
  124. created_by: string
  125. extension: string
  126. }
  127. notion_page_icon?: string
  128. }
  129. export type InitialDocumentDetail = {
  130. id: string
  131. batch: string
  132. position: number
  133. dataset_id: string
  134. data_source_type: DataSourceType
  135. data_source_info: DataSourceInfo
  136. dataset_process_rule_id: string
  137. name: string
  138. created_from: 'api' | 'web'
  139. created_by: string
  140. created_at: number
  141. indexing_status: DocumentIndexingStatus
  142. display_status: DocumentDisplayStatus
  143. completed_segments?: number
  144. total_segments?: number
  145. doc_form: 'text_model' | 'qa_model'
  146. }
  147. export type SimpleDocumentDetail = InitialDocumentDetail & {
  148. enabled: boolean
  149. word_count: number
  150. error?: string | null
  151. archived: boolean
  152. updated_at: number
  153. hit_count: number
  154. dataset_process_rule_id?: string
  155. }
  156. export type DocumentListResponse = {
  157. data: SimpleDocumentDetail[]
  158. has_more: boolean
  159. total: number
  160. page: number
  161. limit: number
  162. }
  163. export type DocumentReq = {
  164. original_document_id?: string
  165. indexing_technique?: string
  166. doc_form: 'text_model' | 'qa_model'
  167. doc_language: string
  168. process_rule: ProcessRule
  169. }
  170. export type CreateDocumentReq = DocumentReq & {
  171. data_source: DataSource
  172. }
  173. export type IndexingEstimateParams = DocumentReq & Partial<DataSource> & {
  174. dataset_id: string
  175. }
  176. export type DataSource = {
  177. type: DataSourceType
  178. info_list: {
  179. data_source_type: DataSourceType
  180. notion_info_list?: NotionInfo[]
  181. file_info_list?: {
  182. file_ids: string[]
  183. }
  184. }
  185. }
  186. export type NotionInfo = {
  187. workspace_id: string
  188. pages: DataSourceNotionPage[]
  189. }
  190. export type NotionPage = {
  191. page_id: string
  192. type: string
  193. }
  194. export type ProcessRule = {
  195. mode: string
  196. rules: Rules
  197. }
  198. export type createDocumentResponse = {
  199. dataset?: DataSet
  200. batch: string
  201. documents: InitialDocumentDetail[]
  202. }
  203. export type FullDocumentDetail = SimpleDocumentDetail & {
  204. batch: string
  205. created_api_request_id: string
  206. processing_started_at: number
  207. parsing_completed_at: number
  208. cleaning_completed_at: number
  209. splitting_completed_at: number
  210. tokens: number
  211. indexing_latency: number
  212. completed_at: number
  213. paused_by: string
  214. paused_at: number
  215. stopped_at: number
  216. indexing_status: string
  217. disabled_at: number
  218. disabled_by: string
  219. archived_reason: 'rule_modified' | 're_upload'
  220. archived_by: string
  221. archived_at: number
  222. doc_type?: DocType | null | 'others'
  223. doc_metadata?: DocMetadata | null
  224. segment_count: number
  225. [key: string]: any
  226. }
  227. export type DocMetadata = {
  228. title: string
  229. language: string
  230. author: string
  231. publisher: string
  232. publicationDate: string
  233. ISBN: string
  234. category: string
  235. [key: string]: string
  236. }
  237. export const CUSTOMIZABLE_DOC_TYPES = [
  238. 'book',
  239. 'web_page',
  240. 'paper',
  241. 'social_media_post',
  242. 'personal_document',
  243. 'business_document',
  244. 'im_chat_log',
  245. ] as const
  246. export const FIXED_DOC_TYPES = ['synced_from_github', 'synced_from_notion', 'wikipedia_entry'] as const
  247. export type CustomizableDocType = typeof CUSTOMIZABLE_DOC_TYPES[number]
  248. export type FixedDocType = typeof FIXED_DOC_TYPES[number]
  249. export type DocType = CustomizableDocType | FixedDocType
  250. export type DocumentDetailResponse = FullDocumentDetail
  251. export const SEGMENT_STATUS_LIST = ['waiting', 'completed', 'error', 'indexing']
  252. export type SegmentStatus = typeof SEGMENT_STATUS_LIST[number]
  253. export type SegmentsQuery = {
  254. last_id?: string
  255. limit: number
  256. // status?: SegmentStatus
  257. hit_count_gte?: number
  258. keyword?: string
  259. enabled?: boolean
  260. }
  261. export type SegmentDetailModel = {
  262. id: string
  263. position: number
  264. document_id: string
  265. content: string
  266. word_count: number
  267. tokens: number
  268. keywords: string[]
  269. index_node_id: string
  270. index_node_hash: string
  271. hit_count: number
  272. enabled: boolean
  273. disabled_at: number
  274. disabled_by: string
  275. status: SegmentStatus
  276. created_by: string
  277. created_at: number
  278. indexing_at: number
  279. completed_at: number
  280. error: string | null
  281. stopped_at: number
  282. answer?: string
  283. }
  284. export type SegmentsResponse = {
  285. data: SegmentDetailModel[]
  286. has_more: boolean
  287. limit: number
  288. total: number
  289. }
  290. export type HitTestingRecord = {
  291. id: string
  292. content: string
  293. source: 'app' | 'hit_testing' | 'plugin'
  294. source_app_id: string
  295. created_by_role: 'account' | 'end_user'
  296. created_by: string
  297. created_at: number
  298. }
  299. export type HitTesting = {
  300. segment: Segment
  301. score: number
  302. tsne_position: TsnePosition
  303. }
  304. export type Segment = {
  305. id: string
  306. document: Document
  307. content: string
  308. position: number
  309. word_count: number
  310. tokens: number
  311. keywords: string[]
  312. hit_count: number
  313. index_node_hash: string
  314. }
  315. export type Document = {
  316. id: string
  317. data_source_type: string
  318. name: string
  319. doc_type: DocType
  320. }
  321. export type HitTestingRecordsResponse = {
  322. data: HitTestingRecord[]
  323. has_more: boolean
  324. limit: number
  325. total: number
  326. page: number
  327. }
  328. export type TsnePosition = {
  329. x: number
  330. y: number
  331. }
  332. export type HitTestingResponse = {
  333. query: {
  334. content: string
  335. tsne_position: TsnePosition
  336. }
  337. records: Array<HitTesting>
  338. }
  339. export type RelatedApp = {
  340. id: string
  341. name: string
  342. mode: AppMode
  343. icon: string
  344. icon_background: string
  345. }
  346. export type RelatedAppResponse = {
  347. data: Array<RelatedApp>
  348. total: number
  349. }
  350. export type SegmentUpdator = {
  351. content: string
  352. answer?: string
  353. keywords?: string[]
  354. }
  355. export enum DocForm {
  356. TEXT = 'text_model',
  357. QA = 'qa_model',
  358. }