123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224 |
- import groupBy from 'lodash-es/groupBy'
- import type { MutationOptions } from '@tanstack/react-query'
- import { useMutation } from '@tanstack/react-query'
- import { createDocument, createFirstDocument, fetchDefaultProcessRule, fetchFileIndexingEstimate } from '../datasets'
- import { type IndexingType } from '@/app/components/datasets/create/step-two'
- import type { ChunkingMode, CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, DataSourceType, FileIndexingEstimateResponse, IndexingEstimateParams, NotionInfo, ProcessRule, ProcessRuleResponse, createDocumentResponse } from '@/models/datasets'
- import type { DataSourceProvider, NotionPage } from '@/models/common'
- export const getNotionInfo = (
- notionPages: NotionPage[],
- ) => {
- const workspacesMap = groupBy(notionPages, 'workspace_id')
- const workspaces = Object.keys(workspacesMap).map((workspaceId) => {
- return {
- workspaceId,
- pages: workspacesMap[workspaceId],
- }
- })
- return workspaces.map((workspace) => {
- return {
- workspace_id: workspace.workspaceId,
- pages: workspace.pages.map((page) => {
- const { page_id, page_name, page_icon, type } = page
- return {
- page_id,
- page_name,
- page_icon,
- type,
- }
- }),
- }
- }) as NotionInfo[]
- }
- export const getWebsiteInfo = (
- opts: {
- websiteCrawlProvider: DataSourceProvider
- websiteCrawlJobId: string
- websitePages: CrawlResultItem[]
- crawlOptions?: CrawlOptions
- },
- ) => {
- const { websiteCrawlProvider, websiteCrawlJobId, websitePages, crawlOptions } = opts
- return {
- provider: websiteCrawlProvider,
- job_id: websiteCrawlJobId,
- urls: websitePages.map(page => page.source_url),
- only_main_content: crawlOptions?.only_main_content,
- }
- }
- type GetFileIndexingEstimateParamsOptionBase = {
- docForm: ChunkingMode
- docLanguage: string
- indexingTechnique: IndexingType
- processRule: ProcessRule
- dataset_id: string
- }
- type GetFileIndexingEstimateParamsOptionFile = GetFileIndexingEstimateParamsOptionBase & {
- dataSourceType: DataSourceType.FILE
- files: CustomFile[]
- }
- const getFileIndexingEstimateParamsForFile = ({
- docForm,
- docLanguage,
- dataSourceType,
- files,
- indexingTechnique,
- processRule,
- dataset_id,
- }: GetFileIndexingEstimateParamsOptionFile): IndexingEstimateParams => {
- return {
- info_list: {
- data_source_type: dataSourceType,
- file_info_list: {
- file_ids: files.map(file => file.id) as string[],
- },
- },
- indexing_technique: indexingTechnique,
- process_rule: processRule,
- doc_form: docForm,
- doc_language: docLanguage,
- dataset_id,
- }
- }
- export const useFetchFileIndexingEstimateForFile = (
- options: GetFileIndexingEstimateParamsOptionFile,
- mutationOptions: MutationOptions<FileIndexingEstimateResponse> = {},
- ) => {
- return useMutation({
- mutationFn: async () => {
- return fetchFileIndexingEstimate(getFileIndexingEstimateParamsForFile(options))
- },
- ...mutationOptions,
- })
- }
- type GetFileIndexingEstimateParamsOptionNotion = GetFileIndexingEstimateParamsOptionBase & {
- dataSourceType: DataSourceType.NOTION
- notionPages: NotionPage[]
- }
- const getFileIndexingEstimateParamsForNotion = ({
- docForm,
- docLanguage,
- dataSourceType,
- notionPages,
- indexingTechnique,
- processRule,
- dataset_id,
- }: GetFileIndexingEstimateParamsOptionNotion): IndexingEstimateParams => {
- return {
- info_list: {
- data_source_type: dataSourceType,
- notion_info_list: getNotionInfo(notionPages),
- },
- indexing_technique: indexingTechnique,
- process_rule: processRule,
- doc_form: docForm,
- doc_language: docLanguage,
- dataset_id,
- }
- }
- export const useFetchFileIndexingEstimateForNotion = (
- options: GetFileIndexingEstimateParamsOptionNotion,
- mutationOptions: MutationOptions<FileIndexingEstimateResponse> = {},
- ) => {
- return useMutation({
- mutationFn: async () => {
- return fetchFileIndexingEstimate(getFileIndexingEstimateParamsForNotion(options))
- },
- ...mutationOptions,
- })
- }
- type GetFileIndexingEstimateParamsOptionWeb = GetFileIndexingEstimateParamsOptionBase & {
- dataSourceType: DataSourceType.WEB
- websitePages: CrawlResultItem[]
- crawlOptions?: CrawlOptions
- websiteCrawlProvider: DataSourceProvider
- websiteCrawlJobId: string
- }
- const getFileIndexingEstimateParamsForWeb = ({
- docForm,
- docLanguage,
- dataSourceType,
- websitePages,
- crawlOptions,
- websiteCrawlProvider,
- websiteCrawlJobId,
- indexingTechnique,
- processRule,
- dataset_id,
- }: GetFileIndexingEstimateParamsOptionWeb): IndexingEstimateParams => {
- return {
- info_list: {
- data_source_type: dataSourceType,
- website_info_list: getWebsiteInfo({
- websiteCrawlProvider,
- websiteCrawlJobId,
- websitePages,
- crawlOptions,
- }),
- },
- indexing_technique: indexingTechnique,
- process_rule: processRule,
- doc_form: docForm,
- doc_language: docLanguage,
- dataset_id,
- }
- }
- export const useFetchFileIndexingEstimateForWeb = (
- options: GetFileIndexingEstimateParamsOptionWeb,
- mutationOptions: MutationOptions<FileIndexingEstimateResponse> = {},
- ) => {
- return useMutation({
- mutationFn: async () => {
- return fetchFileIndexingEstimate(getFileIndexingEstimateParamsForWeb(options))
- },
- ...mutationOptions,
- })
- }
- export const useCreateFirstDocument = (
- mutationOptions: MutationOptions<createDocumentResponse, Error, CreateDocumentReq> = {},
- ) => {
- return useMutation({
- mutationFn: async (createDocumentReq: CreateDocumentReq,
- ) => {
- return createFirstDocument({ body: createDocumentReq })
- },
- ...mutationOptions,
- })
- }
- export const useCreateDocument = (
- datasetId: string,
- mutationOptions: MutationOptions<createDocumentResponse, Error, CreateDocumentReq> = {},
- ) => {
- return useMutation({
- mutationFn: async (req: CreateDocumentReq) => {
- return createDocument({ datasetId, body: req })
- },
- ...mutationOptions,
- })
- }
- export const useFetchDefaultProcessRule = (
- mutationOptions: MutationOptions<ProcessRuleResponse, Error, string> = {},
- ) => {
- return useMutation({
- mutationFn: async (url: string) => {
- return fetchDefaultProcessRule({ url })
- },
- ...mutationOptions,
- })
- }
|