| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121 | from enum import Enumfrom typing import Literal, Optionalfrom pydantic import BaseModelclass SegmentUpdateEntity(BaseModel):    content: str    answer: Optional[str] = None    keywords: Optional[list[str]] = None    enabled: Optional[bool] = Noneclass ParentMode(str, Enum):    FULL_DOC = "full-doc"    PARAGRAPH = "paragraph"class NotionIcon(BaseModel):    type: str    url: Optional[str] = None    emoji: Optional[str] = Noneclass NotionPage(BaseModel):    page_id: str    page_name: str    page_icon: Optional[NotionIcon] = None    type: strclass NotionInfo(BaseModel):    workspace_id: str    pages: list[NotionPage]class WebsiteInfo(BaseModel):    provider: str    job_id: str    urls: list[str]    only_main_content: bool = Trueclass FileInfo(BaseModel):    file_ids: list[str]class InfoList(BaseModel):    data_source_type: Literal["upload_file", "notion_import", "website_crawl"]    notion_info_list: Optional[list[NotionInfo]] = None    file_info_list: Optional[FileInfo] = None    website_info_list: Optional[WebsiteInfo] = Noneclass DataSource(BaseModel):    info_list: InfoListclass PreProcessingRule(BaseModel):    id: str    enabled: boolclass Segmentation(BaseModel):    separator: str = "\n"    max_tokens: int    chunk_overlap: int = 0class Rule(BaseModel):    pre_processing_rules: Optional[list[PreProcessingRule]] = None    segmentation: Optional[Segmentation] = None    parent_mode: Optional[Literal["full-doc", "paragraph"]] = None    subchunk_segmentation: Optional[Segmentation] = Noneclass ProcessRule(BaseModel):    mode: Literal["automatic", "custom", "hierarchical"]    rules: Optional[Rule] = Noneclass RerankingModel(BaseModel):    reranking_provider_name: Optional[str] = None    reranking_model_name: Optional[str] = Noneclass RetrievalModel(BaseModel):    search_method: Literal["hybrid_search", "semantic_search", "full_text_search"]    reranking_enable: bool    reranking_model: Optional[RerankingModel] = None    top_k: int    score_threshold_enabled: bool    score_threshold: Optional[float] = Noneclass KnowledgeConfig(BaseModel):    original_document_id: Optional[str] = None    duplicate: bool = True    indexing_technique: Literal["high_quality", "economy"]    data_source: Optional[DataSource] = None    process_rule: Optional[ProcessRule] = None    retrieval_model: Optional[RetrievalModel] = None    doc_form: str = "text_model"    doc_language: str = "English"    embedding_model: Optional[str] = None    embedding_model_provider: Optional[str] = None    name: Optional[str] = Noneclass SegmentUpdateArgs(BaseModel):    content: Optional[str] = None    answer: Optional[str] = None    keywords: Optional[list[str]] = None    regenerate_child_chunks: bool = False    enabled: Optional[bool] = Noneclass ChildChunkUpdateArgs(BaseModel):    id: Optional[str] = None    content: str
 |