datasets.ts 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. import type { AppMode } from './app'
  2. import type { DataSourceNotionPage } from './common'
  3. export enum DataSourceType {
  4. FILE = 'upload_file',
  5. NOTION = 'notion_import',
  6. WEB = 'web_import',
  7. }
  8. export type DataSet = {
  9. id: string
  10. name: string
  11. icon: string
  12. icon_background: string
  13. description: string
  14. permission: 'only_me' | 'all_team_members'
  15. data_source_type: DataSourceType
  16. indexing_technique: 'high_quality' | 'economy'
  17. created_by: string
  18. updated_by: string
  19. updated_at: number
  20. app_count: number
  21. document_count: number
  22. word_count: number
  23. }
  24. export type File = {
  25. id: string
  26. name: string
  27. size: number
  28. extension: string
  29. mime_type: string
  30. created_by: string
  31. created_at: number
  32. }
  33. export type DataSetListResponse = {
  34. data: DataSet[]
  35. has_more: boolean
  36. limit: number
  37. page: number
  38. total: number
  39. }
  40. export type IndexingEstimateResponse = {
  41. tokens: number
  42. total_price: number
  43. currency: string
  44. total_segments: number
  45. preview: string[]
  46. }
  47. export type FileIndexingEstimateResponse = {
  48. total_nodes: number
  49. } & IndexingEstimateResponse
  50. export type IndexingStatusResponse = {
  51. id: string
  52. indexing_status: DocumentIndexingStatus
  53. processing_started_at: number
  54. parsing_completed_at: number
  55. cleaning_completed_at: number
  56. splitting_completed_at: number
  57. completed_at: any
  58. paused_at: any
  59. error: any
  60. stopped_at: any
  61. completed_segments: number
  62. total_segments: number
  63. }
  64. export type IndexingStatusBatchResponse = {
  65. data: IndexingStatusResponse[]
  66. }
  67. export type ProcessMode = 'automatic' | 'custom'
  68. export type ProcessRuleResponse = {
  69. mode: ProcessMode
  70. rules: Rules
  71. }
  72. export type Rules = {
  73. pre_processing_rules: PreProcessingRule[]
  74. segmentation: Segmentation
  75. }
  76. export type PreProcessingRule = {
  77. id: string
  78. enabled: boolean
  79. }
  80. export type Segmentation = {
  81. separator: string
  82. max_tokens: number
  83. }
  84. export const DocumentIndexingStatusList = [
  85. 'waiting',
  86. 'parsing',
  87. 'cleaning',
  88. 'splitting',
  89. 'indexing',
  90. 'paused',
  91. 'error',
  92. 'completed',
  93. ] as const
  94. export type DocumentIndexingStatus = typeof DocumentIndexingStatusList[number]
  95. export const DisplayStatusList = [
  96. 'queuing',
  97. 'indexing',
  98. 'paused',
  99. 'error',
  100. 'available',
  101. 'enabled',
  102. 'disabled',
  103. 'archived',
  104. ] as const
  105. export type DocumentDisplayStatus = typeof DisplayStatusList[number]
  106. export type DataSourceInfo = {
  107. upload_file: {
  108. id: string
  109. name: string
  110. size: number
  111. mime_type: string
  112. created_at: number
  113. created_by: string
  114. extension: string
  115. }
  116. }
  117. export type InitialDocumentDetail = {
  118. id: string
  119. batch: string
  120. position: number
  121. dataset_id: string
  122. data_source_type: DataSourceType
  123. data_source_info: DataSourceInfo
  124. dataset_process_rule_id: string
  125. name: string
  126. created_from: 'api' | 'web'
  127. created_by: string
  128. created_at: number
  129. indexing_status: DocumentIndexingStatus
  130. display_status: DocumentDisplayStatus
  131. completed_segments?: number
  132. total_segments?: number
  133. }
  134. export type SimpleDocumentDetail = InitialDocumentDetail & {
  135. enabled: boolean
  136. word_count: number
  137. error?: string | null
  138. archived: boolean
  139. updated_at: number
  140. hit_count: number
  141. dataset_process_rule_id?: string
  142. }
  143. export type DocumentListResponse = {
  144. data: SimpleDocumentDetail[]
  145. has_more: boolean
  146. total: number
  147. page: number
  148. limit: number
  149. }
  150. export type CreateDocumentReq = {
  151. original_document_id?: string
  152. indexing_technique?: string
  153. data_source: DataSource
  154. process_rule: ProcessRule
  155. }
  156. export type DataSource = {
  157. type: DataSourceType
  158. info_list: {
  159. data_source_type: DataSourceType
  160. notion_info_list?: NotionInfo[]
  161. file_info_list?: {
  162. file_ids: string[]
  163. }
  164. }
  165. }
  166. export type NotionInfo = {
  167. workspace_id: string
  168. pages: DataSourceNotionPage[]
  169. }
  170. export type NotionPage = {
  171. page_id: string
  172. type: string
  173. }
  174. export type ProcessRule = {
  175. mode: string
  176. rules: Rules
  177. }
  178. export type createDocumentResponse = {
  179. dataset?: DataSet
  180. batch: string
  181. documents: InitialDocumentDetail[]
  182. }
  183. export type FullDocumentDetail = SimpleDocumentDetail & {
  184. batch: string
  185. created_api_request_id: string
  186. processing_started_at: number
  187. parsing_completed_at: number
  188. cleaning_completed_at: number
  189. splitting_completed_at: number
  190. tokens: number
  191. indexing_latency: number
  192. completed_at: number
  193. paused_by: string
  194. paused_at: number
  195. stopped_at: number
  196. indexing_status: string
  197. disabled_at: number
  198. disabled_by: string
  199. archived_reason: 'rule_modified' | 're_upload'
  200. archived_by: string
  201. archived_at: number
  202. doc_type?: DocType | null
  203. doc_metadata?: DocMetadata | null
  204. segment_count: number
  205. [key: string]: any
  206. }
  207. export type DocMetadata = {
  208. title: string
  209. language: string
  210. author: string
  211. publisher: string
  212. publicationDate: string
  213. ISBN: string
  214. category: string
  215. [key: string]: string
  216. }
  217. export const CUSTOMIZABLE_DOC_TYPES = [
  218. 'book',
  219. 'web_page',
  220. 'paper',
  221. 'social_media_post',
  222. 'personal_document',
  223. 'business_document',
  224. 'im_chat_log',
  225. ] as const
  226. export const FIXED_DOC_TYPES = ['synced_from_github', 'synced_from_notion', 'wikipedia_entry'] as const
  227. export type CustomizableDocType = typeof CUSTOMIZABLE_DOC_TYPES[number]
  228. export type FixedDocType = typeof FIXED_DOC_TYPES[number]
  229. export type DocType = CustomizableDocType | FixedDocType
  230. export type DocumentDetailResponse = FullDocumentDetail
  231. export const SEGMENT_STATUS_LIST = ['waiting', 'completed', 'error', 'indexing']
  232. export type SegmentStatus = typeof SEGMENT_STATUS_LIST[number]
  233. export type SegmentsQuery = {
  234. last_id?: string
  235. limit: number
  236. // status?: SegmentStatus
  237. hit_count_gte?: number
  238. keyword?: string
  239. enabled?: boolean
  240. }
  241. export type SegmentDetailModel = {
  242. id: string
  243. position: number
  244. document_id: string
  245. content: string
  246. word_count: number
  247. tokens: number
  248. keywords: string[]
  249. index_node_id: string
  250. index_node_hash: string
  251. hit_count: number
  252. enabled: boolean
  253. disabled_at: number
  254. disabled_by: string
  255. status: SegmentStatus
  256. created_by: string
  257. created_at: number
  258. indexing_at: number
  259. completed_at: number
  260. error: string | null
  261. stopped_at: number
  262. }
  263. export type SegmentsResponse = {
  264. data: SegmentDetailModel[]
  265. has_more: boolean
  266. limit: number
  267. total: number
  268. }
  269. export type HitTestingRecord = {
  270. id: string
  271. content: string
  272. source: 'app' | 'hit_testing' | 'plugin'
  273. source_app_id: string
  274. created_by_role: 'account' | 'end_user'
  275. created_by: string
  276. created_at: number
  277. }
  278. export type HitTesting = {
  279. segment: Segment
  280. score: number
  281. tsne_position: TsnePosition
  282. }
  283. export type Segment = {
  284. id: string
  285. document: Document
  286. content: string
  287. position: number
  288. word_count: number
  289. tokens: number
  290. keywords: string[]
  291. hit_count: number
  292. index_node_hash: string
  293. }
  294. export type Document = {
  295. id: string
  296. data_source_type: string
  297. name: string
  298. doc_type: DocType
  299. }
  300. export type HitTestingRecordsResponse = {
  301. data: HitTestingRecord[]
  302. has_more: boolean
  303. limit: number
  304. total: number
  305. page: number
  306. }
  307. export type TsnePosition = {
  308. x: number
  309. y: number
  310. }
  311. export type HitTestingResponse = {
  312. query: {
  313. content: string
  314. tsne_position: TsnePosition
  315. }
  316. records: Array<HitTesting>
  317. }
  318. export type RelatedApp = {
  319. id: string
  320. name: string
  321. mode: AppMode
  322. icon: string
  323. icon_background: string
  324. }
  325. export type RelatedAppResponse = {
  326. data: Array<RelatedApp>
  327. total: number
  328. }