| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707 | 
							- import type { DataSourceNotionPage, DataSourceProvider } from './common'
 
- import type { AppIconType, AppMode, RetrievalConfig } from '@/types/app'
 
- import type { Tag } from '@/app/components/base/tag-management/constant'
 
- import type { IndexingType } from '@/app/components/datasets/create/step-two'
 
- import type { MetadataFilteringVariableType } from '@/app/components/workflow/nodes/knowledge-retrieval/types'
 
- import type { MetadataItemWithValue } from '@/app/components/datasets/metadata/types'
 
- export enum DataSourceType {
 
-   FILE = 'upload_file',
 
-   NOTION = 'notion_import',
 
-   WEB = 'website_crawl',
 
- }
 
- export enum DatasetPermission {
 
-   onlyMe = 'only_me',
 
-   allTeamMembers = 'all_team_members',
 
-   partialMembers = 'partial_members',
 
- }
 
- export enum ChunkingMode {
 
-   text = 'text_model', // General text
 
-   qa = 'qa_model', // General QA
 
-   parentChild = 'hierarchical_model', // Parent-Child
 
- }
 
- export type MetadataInDoc = {
 
-   value: string
 
-   id: string
 
-   type: MetadataFilteringVariableType
 
-   name: string
 
- }
 
- export type DataSet = {
 
-   categories: any;
 
-   id: string
 
-   name: string
 
-   icon: string
 
-   icon_background: string
 
-   description: string
 
-   permission: DatasetPermission
 
-   data_source_type: DataSourceType
 
-   indexing_technique: IndexingType
 
-   created_by: string
 
-   updated_by: string
 
-   updated_at: number
 
-   app_count: number
 
-   doc_form: ChunkingMode
 
-   document_count: number
 
-   word_count: number
 
-   provider: string
 
-   embedding_model: string
 
-   embedding_model_provider: string
 
-   embedding_available: boolean
 
-   retrieval_model_dict: RetrievalConfig
 
-   retrieval_model: RetrievalConfig
 
-   tags: Tag[]
 
-   partial_member_list?: string[]
 
-   external_knowledge_info: {
 
-     external_knowledge_id: string
 
-     external_knowledge_api_id: string
 
-     external_knowledge_api_name: string
 
-     external_knowledge_api_endpoint: string
 
-   }
 
-   external_retrieval_model: {
 
-     top_k: number
 
-     score_threshold: number
 
-     score_threshold_enabled: boolean
 
-   }
 
-   built_in_field_enabled: boolean
 
-   doc_metadata?: MetadataInDoc[],
 
-   has_edit_permission: boolean
 
-   dept_id: string
 
-   edit_auth: number
 
- }
 
- export type ExternalAPIItem = {
 
-   id: string
 
-   tenant_id: string
 
-   name: string
 
-   description: string
 
-   settings: {
 
-     endpoint: string
 
-     api_key: string
 
-   }
 
-   dataset_bindings: { id: string; name: string }[]
 
-   created_by: string
 
-   created_at: string
 
- }
 
- export type ExternalKnowledgeItem = {
 
-   id: string
 
-   name: string
 
-   description: string | null
 
-   provider: 'external'
 
-   permission: DatasetPermission
 
-   data_source_type: null
 
-   indexing_technique: null
 
-   app_count: number
 
-   document_count: number
 
-   word_count: number
 
-   created_by: string
 
-   created_at: string
 
-   updated_by: string
 
-   updated_at: string
 
-   tags: Tag[]
 
- }
 
- export type ExternalAPIDeleteResponse = {
 
-   result: 'success' | 'error'
 
- }
 
- export type ExternalAPIUsage = {
 
-   is_using: boolean
 
-   count: number
 
- }
 
- export type CustomFile = File & {
 
-   id?: string
 
-   extension?: string
 
-   mime_type?: string
 
-   created_by?: string
 
-   created_at?: number
 
- }
 
- export type DocumentItem = {
 
-   id: string
 
-   name: string
 
-   extension: string
 
- }
 
- export type CrawlOptions = {
 
-   crawl_sub_pages: boolean
 
-   only_main_content: boolean
 
-   includes: string
 
-   excludes: string
 
-   limit: number | string
 
-   max_depth: number | string
 
-   use_sitemap: boolean
 
- }
 
- export type CrawlResultItem = {
 
-   title: string
 
-   markdown: string
 
-   description: string
 
-   source_url: string
 
- }
 
- export type FileItem = {
 
-   fileID: string
 
-   file: CustomFile
 
-   progress: number
 
- }
 
- export type FetchDatasetsParams = {
 
-   url: string
 
-   params: {
 
-     page: number
 
-     ids?: string[]
 
-     tag_ids?: string[]
 
-     limit?: number
 
-     include_all?: boolean
 
-     keyword?: string,
 
-     category_ids?: string[],
 
-     type?: string,
 
-     creatorDept?: string,
 
-     authType?: string,
 
-   }
 
- }
 
- export type DataSetListResponse = {
 
-   data: DataSet[]
 
-   has_more: boolean
 
-   limit: number
 
-   page: number
 
-   total: number
 
- }
 
- export type ExternalAPIListResponse = {
 
-   data: ExternalAPIItem[]
 
-   has_more: boolean
 
-   limit: number
 
-   page: number
 
-   total: number
 
- }
 
- export type QA = {
 
-   question: string
 
-   answer: string
 
- }
 
- export type IndexingEstimateResponse = {
 
-   tokens: number
 
-   total_price: number
 
-   currency: string
 
-   total_segments: number
 
-   preview: Array<{ content: string; child_chunks: string[] }>
 
-   qa_preview?: QA[]
 
- }
 
- export type FileIndexingEstimateResponse = {
 
-   total_nodes: number
 
- } & IndexingEstimateResponse
 
- export type IndexingStatusResponse = {
 
-   id: string
 
-   indexing_status: DocumentIndexingStatus
 
-   processing_started_at: number
 
-   parsing_completed_at: number
 
-   cleaning_completed_at: number
 
-   splitting_completed_at: number
 
-   completed_at: any
 
-   paused_at: any
 
-   error: any
 
-   stopped_at: any
 
-   completed_segments: number
 
-   total_segments: number
 
- }
 
- export type IndexingStatusBatchResponse = {
 
-   data: IndexingStatusResponse[]
 
- }
 
- export enum ProcessMode {
 
-   general = 'custom',
 
-   parentChild = 'hierarchical',
 
- }
 
- export type ParentMode = 'full-doc' | 'paragraph'
 
- export type ProcessRuleResponse = {
 
-   mode: ProcessMode
 
-   rules: Rules
 
-   limits: Limits
 
- }
 
- export type Rules = {
 
-   pre_processing_rules: PreProcessingRule[]
 
-   segmentation: Segmentation
 
-   parent_mode: ParentMode
 
-   subchunk_segmentation: Segmentation
 
- }
 
- export type Limits = {
 
-   indexing_max_segmentation_tokens_length: number
 
- }
 
- export type PreProcessingRule = {
 
-   id: string
 
-   enabled: boolean
 
- }
 
- export type Segmentation = {
 
-   separator: string
 
-   max_tokens: number
 
-   chunk_overlap?: number
 
- }
 
- export const DocumentIndexingStatusList = [
 
-   'waiting',
 
-   'parsing',
 
-   'cleaning',
 
-   'splitting',
 
-   'indexing',
 
-   'paused',
 
-   'error',
 
-   'completed',
 
- ] as const
 
- export type DocumentIndexingStatus = typeof DocumentIndexingStatusList[number]
 
- export const DisplayStatusList = [
 
-   'queuing',
 
-   'indexing',
 
-   'paused',
 
-   'error',
 
-   'available',
 
-   'enabled',
 
-   'disabled',
 
-   'archived',
 
- ] as const
 
- export type DocumentDisplayStatus = typeof DisplayStatusList[number]
 
- export type DataSourceInfo = {
 
-   upload_file: {
 
-     id: string
 
-     name: string
 
-     size: number
 
-     mime_type: string
 
-     created_at: number
 
-     created_by: string
 
-     extension: string
 
-   }
 
-   notion_page_icon?: string
 
-   notion_workspace_id?: string
 
-   notion_page_id?: string
 
-   provider?: DataSourceProvider
 
-   job_id: string
 
-   url: string
 
- }
 
- export type InitialDocumentDetail = {
 
-   id: string
 
-   batch: string
 
-   position: number
 
-   dataset_id: string
 
-   data_source_type: DataSourceType
 
-   data_source_info: DataSourceInfo
 
-   dataset_process_rule_id: string
 
-   name: string
 
-   created_from: 'api' | 'web'
 
-   created_by: string
 
-   created_at: number
 
-   indexing_status: DocumentIndexingStatus
 
-   display_status: DocumentDisplayStatus
 
-   completed_segments?: number
 
-   total_segments?: number
 
-   doc_form: ChunkingMode
 
-   doc_language: string
 
- }
 
- export type SimpleDocumentDetail = InitialDocumentDetail & {
 
-   enabled: boolean
 
-   word_count: number
 
-   is_qa: boolean // TODO waiting for backend to add this field
 
-   error?: string | null
 
-   archived: boolean
 
-   updated_at: number
 
-   hit_count: number
 
-   dataset_process_rule_id?: string
 
-   data_source_detail_dict?: {
 
-     upload_file: {
 
-       name: string
 
-       extension: string
 
-     }
 
-   }
 
-   doc_metadata?: MetadataItemWithValue[]
 
-   check_status: number
 
-   check_by: string
 
-   enable_application: string
 
- }
 
- export type DocumentListResponse = {
 
-   data: SimpleDocumentDetail[]
 
-   has_more: boolean
 
-   total: number
 
-   page: number
 
-   limit: number
 
- }
 
- export type DocumentReq = {
 
-   original_document_id?: string
 
-   indexing_technique?: string
 
-   doc_form: ChunkingMode
 
-   doc_language: string
 
-   process_rule: ProcessRule
 
- }
 
- export type CreateDocumentReq = DocumentReq & {
 
-   data_source: DataSource
 
-   retrieval_model: RetrievalConfig
 
-   embedding_model: string
 
-   embedding_model_provider: string
 
- }
 
- export type IndexingEstimateParams = DocumentReq & Partial<DataSource> & {
 
-   dataset_id: string
 
- }
 
- export type DataSource = {
 
-   type: DataSourceType
 
-   info_list: {
 
-     data_source_type: DataSourceType
 
-     notion_info_list?: NotionInfo[]
 
-     file_info_list?: {
 
-       file_ids: string[]
 
-     }
 
-     website_info_list?: {
 
-       provider: string
 
-       job_id: string
 
-       urls: string[]
 
-     }
 
-   }
 
- }
 
- export type NotionInfo = {
 
-   workspace_id: string
 
-   pages: DataSourceNotionPage[]
 
- }
 
- export type NotionPage = {
 
-   page_id: string
 
-   type: string
 
- }
 
- export type ProcessRule = {
 
-   mode: ProcessMode
 
-   rules: Rules
 
- }
 
- export type createDocumentResponse = {
 
-   dataset?: DataSet
 
-   batch: string
 
-   documents: InitialDocumentDetail[]
 
- }
 
- export type PrecessRule = {
 
-   mode: ProcessMode
 
-   rules: Rules
 
- }
 
- export type FullDocumentDetail = SimpleDocumentDetail & {
 
-   batch: string
 
-   created_api_request_id: string
 
-   processing_started_at: number
 
-   parsing_completed_at: number
 
-   cleaning_completed_at: number
 
-   splitting_completed_at: number
 
-   tokens: number
 
-   indexing_latency: number
 
-   completed_at: number
 
-   paused_by: string
 
-   paused_at: number
 
-   stopped_at: number
 
-   indexing_status: string
 
-   disabled_at: number
 
-   disabled_by: string
 
-   archived_reason: 'rule_modified' | 're_upload'
 
-   archived_by: string
 
-   archived_at: number
 
-   doc_type?: DocType | null | 'others'
 
-   doc_metadata?: DocMetadata | null
 
-   segment_count: number
 
-   dataset_process_rule: PrecessRule
 
-   document_process_rule: ProcessRule
 
-   [key: string]: any
 
- }
 
- export type DocMetadata = {
 
-   title: string
 
-   language: string
 
-   author: string
 
-   publisher: string
 
-   publicationDate: string
 
-   ISBN: string
 
-   category: string
 
-   [key: string]: string
 
- }
 
- export const CUSTOMIZABLE_DOC_TYPES = [
 
-   'book',
 
-   'web_page',
 
-   'paper',
 
-   'social_media_post',
 
-   'personal_document',
 
-   'business_document',
 
-   'im_chat_log',
 
- ] as const
 
- export const FIXED_DOC_TYPES = ['synced_from_github', 'synced_from_notion', 'wikipedia_entry'] as const
 
- export type CustomizableDocType = typeof CUSTOMIZABLE_DOC_TYPES[number]
 
- export type FixedDocType = typeof FIXED_DOC_TYPES[number]
 
- export type DocType = CustomizableDocType | FixedDocType
 
- export type DocumentDetailResponse = FullDocumentDetail
 
- export const SEGMENT_STATUS_LIST = ['waiting', 'completed', 'error', 'indexing']
 
- export type SegmentStatus = typeof SEGMENT_STATUS_LIST[number]
 
- export type SegmentsQuery = {
 
-   page?: string
 
-   limit: number
 
-   // status?: SegmentStatus
 
-   hit_count_gte?: number
 
-   keyword?: string
 
-   enabled?: boolean | 'all'
 
- }
 
- export type SegmentDetailModel = {
 
-   id: string
 
-   position: number
 
-   document_id: string
 
-   content: string
 
-   sign_content: string
 
-   word_count: number
 
-   tokens: number
 
-   keywords: string[]
 
-   index_node_id: string
 
-   index_node_hash: string
 
-   hit_count: number
 
-   enabled: boolean
 
-   disabled_at: number
 
-   disabled_by: string
 
-   status: SegmentStatus
 
-   created_by: string
 
-   created_at: number
 
-   indexing_at: number
 
-   completed_at: number
 
-   error: string | null
 
-   stopped_at: number
 
-   answer?: string
 
-   child_chunks?: ChildChunkDetail[]
 
-   updated_at: number
 
- }
 
- export type SegmentsResponse = {
 
-   data: SegmentDetailModel[]
 
-   has_more: boolean
 
-   limit: number
 
-   total: number
 
-   total_pages: number
 
-   page: number
 
- }
 
- export type HitTestingRecord = {
 
-   id: string
 
-   content: string
 
-   source: 'app' | 'hit_testing' | 'plugin'
 
-   source_app_id: string
 
-   created_by_role: 'account' | 'end_user'
 
-   created_by: string
 
-   created_at: number
 
- }
 
- export type HitTestingChildChunk = {
 
-   id: string
 
-   content: string
 
-   position: number
 
-   score: number
 
- }
 
- export type HitTesting = {
 
-   segment: Segment
 
-   content: Segment
 
-   score: number
 
-   tsne_position: TsnePosition
 
-   child_chunks?: HitTestingChildChunk[] | null
 
- }
 
- export type ExternalKnowledgeBaseHitTesting = {
 
-   content: string
 
-   title: string
 
-   score: number
 
-   metadata: {
 
-     'x-amz-bedrock-kb-source-uri': string
 
-     'x-amz-bedrock-kb-data-source-id': string
 
-   }
 
- }
 
- export type Segment = {
 
-   id: string
 
-   document: Document
 
-   content: string
 
-   sign_content: string
 
-   position: number
 
-   word_count: number
 
-   tokens: number
 
-   keywords: string[]
 
-   hit_count: number
 
-   index_node_hash: string
 
- }
 
- export type Document = {
 
-   id: string
 
-   data_source_type: string
 
-   name: string
 
-   doc_type: DocType
 
- }
 
- export type HitTestingRecordsResponse = {
 
-   data: HitTestingRecord[]
 
-   has_more: boolean
 
-   limit: number
 
-   total: number
 
-   page: number
 
- }
 
- export type TsnePosition = {
 
-   x: number
 
-   y: number
 
- }
 
- export type HitTestingResponse = {
 
-   query: {
 
-     content: string
 
-     tsne_position: TsnePosition
 
-   }
 
-   records: Array<HitTesting>
 
- }
 
- export type ExternalKnowledgeBaseHitTestingResponse = {
 
-   query: {
 
-     content: string
 
-   }
 
-   records: Array<ExternalKnowledgeBaseHitTesting>
 
- }
 
- export type RelatedApp = {
 
-   id: string
 
-   name: string
 
-   mode: AppMode
 
-   icon_type: AppIconType | null
 
-   icon: string
 
-   icon_background: string
 
-   icon_url: string
 
- }
 
- export type RelatedAppResponse = {
 
-   data: Array<RelatedApp>
 
-   total: number
 
- }
 
- export type SegmentUpdater = {
 
-   content: string
 
-   answer?: string
 
-   keywords?: string[]
 
-   regenerate_child_chunks?: boolean
 
- }
 
- export type ErrorDocsResponse = {
 
-   data: IndexingStatusResponse[]
 
-   total: number
 
- }
 
- export type SelectedDatasetsMode = {
 
-   allHighQuality: boolean
 
-   allHighQualityVectorSearch: boolean
 
-   allHighQualityFullTextSearch: boolean
 
-   allEconomic: boolean
 
-   mixtureHighQualityAndEconomic: boolean
 
-   allInternal: boolean
 
-   allExternal: boolean
 
-   mixtureInternalAndExternal: boolean
 
-   inconsistentEmbeddingModel: boolean
 
- }
 
- export enum WeightedScoreEnum {
 
-   SemanticFirst = 'semantic_first',
 
-   KeywordFirst = 'keyword_first',
 
-   Customized = 'customized',
 
- }
 
- export enum RerankingModeEnum {
 
-   RerankingModel = 'reranking_model',
 
-   WeightedScore = 'weighted_score',
 
- }
 
- export const DEFAULT_WEIGHTED_SCORE = {
 
-   allHighQualityVectorSearch: {
 
-     semantic: 1.0,
 
-     keyword: 0,
 
-   },
 
-   allHighQualityFullTextSearch: {
 
-     semantic: 0,
 
-     keyword: 1.0,
 
-   },
 
-   other: {
 
-     semantic: 0.7,
 
-     keyword: 0.3,
 
-   },
 
- }
 
- export type ChildChunkType = 'automatic' | 'customized'
 
- export type ChildChunkDetail = {
 
-   id: string
 
-   position: number
 
-   segment_id: string
 
-   content: string
 
-   word_count: number
 
-   created_at: number
 
-   updated_at: number
 
-   type: ChildChunkType
 
- }
 
- export type ChildSegmentsResponse = {
 
-   data: ChildChunkDetail[]
 
-   total: number
 
-   total_pages: number
 
-   page: number
 
-   limit: number
 
- }
 
- export type UpdateDocumentParams = {
 
-   datasetId: string
 
-   documentId: string
 
- }
 
- // Used in api url
 
- export enum DocumentActionType {
 
-   enable = 'enable',
 
-   disable = 'disable',
 
-   archive = 'archive',
 
-   unArchive = 'un_archive',
 
-   delete = 'delete',
 
-   check_fail = 'check_fail',
 
- }
 
- export type UpdateDocumentBatchParams = {
 
-   datasetId: string
 
-   documentId?: string
 
-   documentIds?: string[] | string
 
- }
 
- export type BatchImportResponse = {
 
-   job_id: string
 
-   job_status: string
 
- }
 
 
  |