knowledge_entities.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. from enum import Enum
  2. from typing import Literal, Optional
  3. from pydantic import BaseModel
  4. class SegmentUpdateEntity(BaseModel):
  5. content: str
  6. answer: Optional[str] = None
  7. keywords: Optional[list[str]] = None
  8. enabled: Optional[bool] = None
  9. class ParentMode(str, Enum):
  10. FULL_DOC = "full-doc"
  11. PARAGRAPH = "paragraph"
  12. class NotionIcon(BaseModel):
  13. type: str
  14. url: Optional[str] = None
  15. emoji: Optional[str] = None
  16. class NotionPage(BaseModel):
  17. page_id: str
  18. page_name: str
  19. page_icon: Optional[NotionIcon] = None
  20. type: str
  21. class NotionInfo(BaseModel):
  22. workspace_id: str
  23. pages: list[NotionPage]
  24. class WebsiteInfo(BaseModel):
  25. provider: str
  26. job_id: str
  27. urls: list[str]
  28. only_main_content: bool = True
  29. class FileInfo(BaseModel):
  30. file_ids: list[str]
  31. class InfoList(BaseModel):
  32. data_source_type: Literal["upload_file", "notion_import", "website_crawl"]
  33. notion_info_list: Optional[list[NotionInfo]] = None
  34. file_info_list: Optional[FileInfo] = None
  35. website_info_list: Optional[WebsiteInfo] = None
  36. class DataSource(BaseModel):
  37. info_list: InfoList
  38. class PreProcessingRule(BaseModel):
  39. id: str
  40. enabled: bool
  41. class Segmentation(BaseModel):
  42. separator: str = "\n"
  43. max_tokens: int
  44. chunk_overlap: int = 0
  45. class Rule(BaseModel):
  46. pre_processing_rules: Optional[list[PreProcessingRule]] = None
  47. segmentation: Optional[Segmentation] = None
  48. parent_mode: Optional[Literal["full-doc", "paragraph"]] = None
  49. subchunk_segmentation: Optional[Segmentation] = None
  50. class ProcessRule(BaseModel):
  51. mode: Literal["automatic", "custom", "hierarchical"]
  52. rules: Optional[Rule] = None
  53. class RerankingModel(BaseModel):
  54. reranking_provider_name: Optional[str] = None
  55. reranking_model_name: Optional[str] = None
  56. class WeightVectorSetting(BaseModel):
  57. vector_weight: float
  58. embedding_provider_name: str
  59. embedding_model_name: str
  60. class WeightKeywordSetting(BaseModel):
  61. keyword_weight: float
  62. class WeightModel(BaseModel):
  63. weight_type: str
  64. vector_setting: Optional[WeightVectorSetting] = None
  65. keyword_setting: Optional[WeightKeywordSetting] = None
  66. class RetrievalModel(BaseModel):
  67. search_method: Literal["hybrid_search", "semantic_search", "full_text_search"]
  68. reranking_enable: bool
  69. reranking_model: Optional[RerankingModel] = None
  70. reranking_mode: Optional[str] = None
  71. top_k: int
  72. score_threshold_enabled: bool
  73. score_threshold: Optional[float] = None
  74. weights: Optional[WeightModel] = None
  75. class MetaDataConfig(BaseModel):
  76. doc_type: str
  77. doc_metadata: dict
  78. class KnowledgeConfig(BaseModel):
  79. original_document_id: Optional[str] = None
  80. duplicate: bool = True
  81. indexing_technique: Literal["high_quality", "economy"]
  82. data_source: Optional[DataSource] = None
  83. process_rule: Optional[ProcessRule] = None
  84. retrieval_model: Optional[RetrievalModel] = None
  85. doc_form: str = "text_model"
  86. doc_language: str = "English"
  87. embedding_model: Optional[str] = None
  88. embedding_model_provider: Optional[str] = None
  89. name: Optional[str] = None
  90. class SegmentUpdateArgs(BaseModel):
  91. content: Optional[str] = None
  92. answer: Optional[str] = None
  93. keywords: Optional[list[str]] = None
  94. regenerate_child_chunks: bool = False
  95. enabled: Optional[bool] = None
  96. class ChildChunkUpdateArgs(BaseModel):
  97. id: Optional[str] = None
  98. content: str
  99. class MetadataArgs(BaseModel):
  100. type: Literal["string", "number", "time"]
  101. name: str
  102. class MetadataUpdateArgs(BaseModel):
  103. name: str
  104. value: Optional[str | int | float] = None
  105. class MetadataValueUpdateArgs(BaseModel):
  106. fields: list[MetadataUpdateArgs]
  107. class MetadataDetail(BaseModel):
  108. id: str
  109. name: str
  110. value: Optional[str | int | float] = None
  111. class DocumentMetadataOperation(BaseModel):
  112. document_id: str
  113. metadata_list: list[MetadataDetail]
  114. class MetadataOperationData(BaseModel):
  115. """
  116. Metadata operation data
  117. """
  118. operation_data: list[DocumentMetadataOperation]