datasets.ts 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. import type { AppMode } from './app'
  2. import type { DataSourceNotionPage } from './common'
  3. export enum DataSourceType {
  4. FILE = 'upload_file',
  5. NOTION = 'notion_import',
  6. WEB = 'web_import',
  7. }
  8. export type DataSet = {
  9. id: string
  10. name: string
  11. icon: string
  12. icon_background: string
  13. description: string
  14. permission: 'only_me' | 'all_team_members'
  15. data_source_type: DataSourceType
  16. indexing_technique: 'high_quality' | 'economy'
  17. created_by: string
  18. updated_by: string
  19. updated_at: number
  20. app_count: number
  21. document_count: number
  22. word_count: number
  23. }
  24. export type File = {
  25. id: string
  26. name: string
  27. size: number
  28. extension: string
  29. mime_type: string
  30. created_by: string
  31. created_at: number
  32. }
  33. export type DataSetListResponse = {
  34. data: DataSet[]
  35. has_more: boolean
  36. limit: number
  37. page: number
  38. total: number
  39. }
  40. export type QA = {
  41. question: string
  42. answer: string
  43. }
  44. export type IndexingEstimateResponse = {
  45. tokens: number
  46. total_price: number
  47. currency: string
  48. total_segments: number
  49. preview: string[]
  50. qa_preview?: QA[]
  51. }
  52. export type FileIndexingEstimateResponse = {
  53. total_nodes: number
  54. } & IndexingEstimateResponse
  55. export type IndexingStatusResponse = {
  56. id: string
  57. indexing_status: DocumentIndexingStatus
  58. processing_started_at: number
  59. parsing_completed_at: number
  60. cleaning_completed_at: number
  61. splitting_completed_at: number
  62. completed_at: any
  63. paused_at: any
  64. error: any
  65. stopped_at: any
  66. completed_segments: number
  67. total_segments: number
  68. }
  69. export type IndexingStatusBatchResponse = {
  70. data: IndexingStatusResponse[]
  71. }
  72. export type ProcessMode = 'automatic' | 'custom'
  73. export type ProcessRuleResponse = {
  74. mode: ProcessMode
  75. rules: Rules
  76. }
  77. export type Rules = {
  78. pre_processing_rules: PreProcessingRule[]
  79. segmentation: Segmentation
  80. }
  81. export type PreProcessingRule = {
  82. id: string
  83. enabled: boolean
  84. }
  85. export type Segmentation = {
  86. separator: string
  87. max_tokens: number
  88. }
  89. export const DocumentIndexingStatusList = [
  90. 'waiting',
  91. 'parsing',
  92. 'cleaning',
  93. 'splitting',
  94. 'indexing',
  95. 'paused',
  96. 'error',
  97. 'completed',
  98. ] as const
  99. export type DocumentIndexingStatus = typeof DocumentIndexingStatusList[number]
  100. export const DisplayStatusList = [
  101. 'queuing',
  102. 'indexing',
  103. 'paused',
  104. 'error',
  105. 'available',
  106. 'enabled',
  107. 'disabled',
  108. 'archived',
  109. ] as const
  110. export type DocumentDisplayStatus = typeof DisplayStatusList[number]
  111. export type DataSourceInfo = {
  112. upload_file: {
  113. id: string
  114. name: string
  115. size: number
  116. mime_type: string
  117. created_at: number
  118. created_by: string
  119. extension: string
  120. }
  121. }
  122. export type InitialDocumentDetail = {
  123. id: string
  124. batch: string
  125. position: number
  126. dataset_id: string
  127. data_source_type: DataSourceType
  128. data_source_info: DataSourceInfo
  129. dataset_process_rule_id: string
  130. name: string
  131. created_from: 'api' | 'web'
  132. created_by: string
  133. created_at: number
  134. indexing_status: DocumentIndexingStatus
  135. display_status: DocumentDisplayStatus
  136. completed_segments?: number
  137. total_segments?: number
  138. doc_form: 'text_model' | 'qa_model'
  139. }
  140. export type SimpleDocumentDetail = InitialDocumentDetail & {
  141. enabled: boolean
  142. word_count: number
  143. error?: string | null
  144. archived: boolean
  145. updated_at: number
  146. hit_count: number
  147. dataset_process_rule_id?: string
  148. }
  149. export type DocumentListResponse = {
  150. data: SimpleDocumentDetail[]
  151. has_more: boolean
  152. total: number
  153. page: number
  154. limit: number
  155. }
  156. export type CreateDocumentReq = {
  157. original_document_id?: string
  158. indexing_technique?: string
  159. doc_form: 'text_model' | 'qa_model'
  160. data_source: DataSource
  161. process_rule: ProcessRule
  162. }
  163. export type DataSource = {
  164. type: DataSourceType
  165. info_list: {
  166. data_source_type: DataSourceType
  167. notion_info_list?: NotionInfo[]
  168. file_info_list?: {
  169. file_ids: string[]
  170. }
  171. }
  172. }
  173. export type NotionInfo = {
  174. workspace_id: string
  175. pages: DataSourceNotionPage[]
  176. }
  177. export type NotionPage = {
  178. page_id: string
  179. type: string
  180. }
  181. export type ProcessRule = {
  182. mode: string
  183. rules: Rules
  184. }
  185. export type createDocumentResponse = {
  186. dataset?: DataSet
  187. batch: string
  188. documents: InitialDocumentDetail[]
  189. }
  190. export type FullDocumentDetail = SimpleDocumentDetail & {
  191. batch: string
  192. created_api_request_id: string
  193. processing_started_at: number
  194. parsing_completed_at: number
  195. cleaning_completed_at: number
  196. splitting_completed_at: number
  197. tokens: number
  198. indexing_latency: number
  199. completed_at: number
  200. paused_by: string
  201. paused_at: number
  202. stopped_at: number
  203. indexing_status: string
  204. disabled_at: number
  205. disabled_by: string
  206. archived_reason: 'rule_modified' | 're_upload'
  207. archived_by: string
  208. archived_at: number
  209. doc_type?: DocType | null
  210. doc_metadata?: DocMetadata | null
  211. segment_count: number
  212. [key: string]: any
  213. }
  214. export type DocMetadata = {
  215. title: string
  216. language: string
  217. author: string
  218. publisher: string
  219. publicationDate: string
  220. ISBN: string
  221. category: string
  222. [key: string]: string
  223. }
  224. export const CUSTOMIZABLE_DOC_TYPES = [
  225. 'book',
  226. 'web_page',
  227. 'paper',
  228. 'social_media_post',
  229. 'personal_document',
  230. 'business_document',
  231. 'im_chat_log',
  232. ] as const
  233. export const FIXED_DOC_TYPES = ['synced_from_github', 'synced_from_notion', 'wikipedia_entry'] as const
  234. export type CustomizableDocType = typeof CUSTOMIZABLE_DOC_TYPES[number]
  235. export type FixedDocType = typeof FIXED_DOC_TYPES[number]
  236. export type DocType = CustomizableDocType | FixedDocType
  237. export type DocumentDetailResponse = FullDocumentDetail
  238. export const SEGMENT_STATUS_LIST = ['waiting', 'completed', 'error', 'indexing']
  239. export type SegmentStatus = typeof SEGMENT_STATUS_LIST[number]
  240. export type SegmentsQuery = {
  241. last_id?: string
  242. limit: number
  243. // status?: SegmentStatus
  244. hit_count_gte?: number
  245. keyword?: string
  246. enabled?: boolean
  247. }
  248. export type SegmentDetailModel = {
  249. id: string
  250. position: number
  251. document_id: string
  252. content: string
  253. word_count: number
  254. tokens: number
  255. keywords: string[]
  256. index_node_id: string
  257. index_node_hash: string
  258. hit_count: number
  259. enabled: boolean
  260. disabled_at: number
  261. disabled_by: string
  262. status: SegmentStatus
  263. created_by: string
  264. created_at: number
  265. indexing_at: number
  266. completed_at: number
  267. error: string | null
  268. stopped_at: number
  269. answer?: string
  270. }
  271. export type SegmentsResponse = {
  272. data: SegmentDetailModel[]
  273. has_more: boolean
  274. limit: number
  275. total: number
  276. }
  277. export type HitTestingRecord = {
  278. id: string
  279. content: string
  280. source: 'app' | 'hit_testing' | 'plugin'
  281. source_app_id: string
  282. created_by_role: 'account' | 'end_user'
  283. created_by: string
  284. created_at: number
  285. }
  286. export type HitTesting = {
  287. segment: Segment
  288. score: number
  289. tsne_position: TsnePosition
  290. }
  291. export type Segment = {
  292. id: string
  293. document: Document
  294. content: string
  295. position: number
  296. word_count: number
  297. tokens: number
  298. keywords: string[]
  299. hit_count: number
  300. index_node_hash: string
  301. }
  302. export type Document = {
  303. id: string
  304. data_source_type: string
  305. name: string
  306. doc_type: DocType
  307. }
  308. export type HitTestingRecordsResponse = {
  309. data: HitTestingRecord[]
  310. has_more: boolean
  311. limit: number
  312. total: number
  313. page: number
  314. }
  315. export type TsnePosition = {
  316. x: number
  317. y: number
  318. }
  319. export type HitTestingResponse = {
  320. query: {
  321. content: string
  322. tsne_position: TsnePosition
  323. }
  324. records: Array<HitTesting>
  325. }
  326. export type RelatedApp = {
  327. id: string
  328. name: string
  329. mode: AppMode
  330. icon: string
  331. icon_background: string
  332. }
  333. export type RelatedAppResponse = {
  334. data: Array<RelatedApp>
  335. total: number
  336. }
  337. export type SegmentUpdator = {
  338. content: string
  339. answer?: string
  340. }