| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495 | 
							- from abc import ABC, abstractmethod
 
- from collections.abc import Sequence
 
- from typing import Any, Optional
 
- from pydantic import BaseModel
 
- class ChildDocument(BaseModel):
 
-     """Class for storing a piece of text and associated metadata."""
 
-     page_content: str
 
-     vector: Optional[list[float]] = None
 
-     """Arbitrary metadata about the page content (e.g., source, relationships to other
 
-         documents, etc.).
 
-     """
 
-     metadata: dict = {}
 
- class Document(BaseModel):
 
-     """Class for storing a piece of text and associated metadata."""
 
-     page_content: str
 
-     vector: Optional[list[float]] = None
 
-     """Arbitrary metadata about the page content (e.g., source, relationships to other
 
-         documents, etc.).
 
-     """
 
-     metadata: dict = {}
 
-     provider: Optional[str] = "dify"
 
-     children: Optional[list[ChildDocument]] = None
 
- class BaseDocumentTransformer(ABC):
 
-     """Abstract base class for document transformation systems.
 
-     A document transformation system takes a sequence of Documents and returns a
 
-     sequence of transformed Documents.
 
-     Example:
 
-         .. code-block:: python
 
-             class EmbeddingsRedundantFilter(BaseDocumentTransformer, BaseModel):
 
-                 embeddings: Embeddings
 
-                 similarity_fn: Callable = cosine_similarity
 
-                 similarity_threshold: float = 0.95
 
-                 class Config:
 
-                     arbitrary_types_allowed = True
 
-                 def transform_documents(
 
-                     self, documents: Sequence[Document], **kwargs: Any
 
-                 ) -> Sequence[Document]:
 
-                     stateful_documents = get_stateful_documents(documents)
 
-                     embedded_documents = _get_embeddings_from_stateful_docs(
 
-                         self.embeddings, stateful_documents
 
-                     )
 
-                     included_idxs = _filter_similar_embeddings(
 
-                         embedded_documents, self.similarity_fn, self.similarity_threshold
 
-                     )
 
-                     return [stateful_documents[i] for i in sorted(included_idxs)]
 
-                 async def atransform_documents(
 
-                     self, documents: Sequence[Document], **kwargs: Any
 
-                 ) -> Sequence[Document]:
 
-                     raise NotImplementedError
 
-     """
 
-     @abstractmethod
 
-     def transform_documents(self, documents: Sequence[Document], **kwargs: Any) -> Sequence[Document]:
 
-         """Transform a list of documents.
 
-         Args:
 
-             documents: A sequence of Documents to be transformed.
 
-         Returns:
 
-             A list of transformed Documents.
 
-         """
 
-     @abstractmethod
 
-     async def atransform_documents(self, documents: Sequence[Document], **kwargs: Any) -> Sequence[Document]:
 
-         """Asynchronously transform a list of documents.
 
-         Args:
 
-             documents: A sequence of Documents to be transformed.
 
-         Returns:
 
-             A list of transformed Documents.
 
-         """
 
 
  |