| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224 | 
							- import groupBy from 'lodash-es/groupBy'
 
- import type { MutationOptions } from '@tanstack/react-query'
 
- import { useMutation } from '@tanstack/react-query'
 
- import { createDocument, createFirstDocument, fetchDefaultProcessRule, fetchFileIndexingEstimate } from '../datasets'
 
- import type { IndexingType } from '@/app/components/datasets/create/step-two'
 
- import type { ChunkingMode, CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, DataSourceType, FileIndexingEstimateResponse, IndexingEstimateParams, NotionInfo, ProcessRule, ProcessRuleResponse, createDocumentResponse } from '@/models/datasets'
 
- import type { DataSourceProvider, NotionPage } from '@/models/common'
 
- export const getNotionInfo = (
 
-   notionPages: NotionPage[],
 
- ) => {
 
-   const workspacesMap = groupBy(notionPages, 'workspace_id')
 
-   const workspaces = Object.keys(workspacesMap).map((workspaceId) => {
 
-     return {
 
-       workspaceId,
 
-       pages: workspacesMap[workspaceId],
 
-     }
 
-   })
 
-   return workspaces.map((workspace) => {
 
-     return {
 
-       workspace_id: workspace.workspaceId,
 
-       pages: workspace.pages.map((page) => {
 
-         const { page_id, page_name, page_icon, type } = page
 
-         return {
 
-           page_id,
 
-           page_name,
 
-           page_icon,
 
-           type,
 
-         }
 
-       }),
 
-     }
 
-   }) as NotionInfo[]
 
- }
 
- export const getWebsiteInfo = (
 
-   opts: {
 
-     websiteCrawlProvider: DataSourceProvider
 
-     websiteCrawlJobId: string
 
-     websitePages: CrawlResultItem[]
 
-     crawlOptions?: CrawlOptions
 
-   },
 
- ) => {
 
-   const { websiteCrawlProvider, websiteCrawlJobId, websitePages, crawlOptions } = opts
 
-   return {
 
-     provider: websiteCrawlProvider,
 
-     job_id: websiteCrawlJobId,
 
-     urls: websitePages.map(page => page.source_url),
 
-     only_main_content: crawlOptions?.only_main_content,
 
-   }
 
- }
 
- type GetFileIndexingEstimateParamsOptionBase = {
 
-   docForm: ChunkingMode
 
-   docLanguage: string
 
-   indexingTechnique: IndexingType
 
-   processRule: ProcessRule
 
-   dataset_id: string
 
- }
 
- type GetFileIndexingEstimateParamsOptionFile = GetFileIndexingEstimateParamsOptionBase & {
 
-   dataSourceType: DataSourceType.FILE
 
-   files: CustomFile[]
 
- }
 
- const getFileIndexingEstimateParamsForFile = ({
 
-   docForm,
 
-   docLanguage,
 
-   dataSourceType,
 
-   files,
 
-   indexingTechnique,
 
-   processRule,
 
-   dataset_id,
 
- }: GetFileIndexingEstimateParamsOptionFile): IndexingEstimateParams => {
 
-   return {
 
-     info_list: {
 
-       data_source_type: dataSourceType,
 
-       file_info_list: {
 
-         file_ids: files.map(file => file.id) as string[],
 
-       },
 
-     },
 
-     indexing_technique: indexingTechnique,
 
-     process_rule: processRule,
 
-     doc_form: docForm,
 
-     doc_language: docLanguage,
 
-     dataset_id,
 
-   }
 
- }
 
- export const useFetchFileIndexingEstimateForFile = (
 
-   options: GetFileIndexingEstimateParamsOptionFile,
 
-   mutationOptions: MutationOptions<FileIndexingEstimateResponse> = {},
 
- ) => {
 
-   return useMutation({
 
-     mutationFn: async () => {
 
-       return fetchFileIndexingEstimate(getFileIndexingEstimateParamsForFile(options))
 
-     },
 
-     ...mutationOptions,
 
-   })
 
- }
 
- type GetFileIndexingEstimateParamsOptionNotion = GetFileIndexingEstimateParamsOptionBase & {
 
-   dataSourceType: DataSourceType.NOTION
 
-   notionPages: NotionPage[]
 
- }
 
- const getFileIndexingEstimateParamsForNotion = ({
 
-   docForm,
 
-   docLanguage,
 
-   dataSourceType,
 
-   notionPages,
 
-   indexingTechnique,
 
-   processRule,
 
-   dataset_id,
 
- }: GetFileIndexingEstimateParamsOptionNotion): IndexingEstimateParams => {
 
-   return {
 
-     info_list: {
 
-       data_source_type: dataSourceType,
 
-       notion_info_list: getNotionInfo(notionPages),
 
-     },
 
-     indexing_technique: indexingTechnique,
 
-     process_rule: processRule,
 
-     doc_form: docForm,
 
-     doc_language: docLanguage,
 
-     dataset_id,
 
-   }
 
- }
 
- export const useFetchFileIndexingEstimateForNotion = (
 
-   options: GetFileIndexingEstimateParamsOptionNotion,
 
-   mutationOptions: MutationOptions<FileIndexingEstimateResponse> = {},
 
- ) => {
 
-   return useMutation({
 
-     mutationFn: async () => {
 
-       return fetchFileIndexingEstimate(getFileIndexingEstimateParamsForNotion(options))
 
-     },
 
-     ...mutationOptions,
 
-   })
 
- }
 
- type GetFileIndexingEstimateParamsOptionWeb = GetFileIndexingEstimateParamsOptionBase & {
 
-   dataSourceType: DataSourceType.WEB
 
-   websitePages: CrawlResultItem[]
 
-   crawlOptions?: CrawlOptions
 
-   websiteCrawlProvider: DataSourceProvider
 
-   websiteCrawlJobId: string
 
- }
 
- const getFileIndexingEstimateParamsForWeb = ({
 
-   docForm,
 
-   docLanguage,
 
-   dataSourceType,
 
-   websitePages,
 
-   crawlOptions,
 
-   websiteCrawlProvider,
 
-   websiteCrawlJobId,
 
-   indexingTechnique,
 
-   processRule,
 
-   dataset_id,
 
- }: GetFileIndexingEstimateParamsOptionWeb): IndexingEstimateParams => {
 
-   return {
 
-     info_list: {
 
-       data_source_type: dataSourceType,
 
-       website_info_list: getWebsiteInfo({
 
-         websiteCrawlProvider,
 
-         websiteCrawlJobId,
 
-         websitePages,
 
-         crawlOptions,
 
-       }),
 
-     },
 
-     indexing_technique: indexingTechnique,
 
-     process_rule: processRule,
 
-     doc_form: docForm,
 
-     doc_language: docLanguage,
 
-     dataset_id,
 
-   }
 
- }
 
- export const useFetchFileIndexingEstimateForWeb = (
 
-   options: GetFileIndexingEstimateParamsOptionWeb,
 
-   mutationOptions: MutationOptions<FileIndexingEstimateResponse> = {},
 
- ) => {
 
-   return useMutation({
 
-     mutationFn: async () => {
 
-       return fetchFileIndexingEstimate(getFileIndexingEstimateParamsForWeb(options))
 
-     },
 
-     ...mutationOptions,
 
-   })
 
- }
 
- export const useCreateFirstDocument = (
 
-   mutationOptions: MutationOptions<createDocumentResponse, Error, CreateDocumentReq> = {},
 
- ) => {
 
-   return useMutation({
 
-     mutationFn: async (createDocumentReq: CreateDocumentReq,
 
-     ) => {
 
-       return createFirstDocument({ body: createDocumentReq })
 
-     },
 
-     ...mutationOptions,
 
-   })
 
- }
 
- export const useCreateDocument = (
 
-   datasetId: string,
 
-   mutationOptions: MutationOptions<createDocumentResponse, Error, CreateDocumentReq> = {},
 
- ) => {
 
-   return useMutation({
 
-     mutationFn: async (req: CreateDocumentReq) => {
 
-       return createDocument({ datasetId, body: req })
 
-     },
 
-     ...mutationOptions,
 
-   })
 
- }
 
- export const useFetchDefaultProcessRule = (
 
-   mutationOptions: MutationOptions<ProcessRuleResponse, Error, string> = {},
 
- ) => {
 
-   return useMutation({
 
-     mutationFn: async (url: string) => {
 
-       return fetchDefaultProcessRule({ url })
 
-     },
 
-     ...mutationOptions,
 
-   })
 
- }
 
 
  |