rerank_model.py 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. from typing import Optional
  2. from core.model_manager import ModelInstance
  3. from core.rag.models.document import Document
  4. class RerankModelRunner:
  5. def __init__(self, rerank_model_instance: ModelInstance) -> None:
  6. self.rerank_model_instance = rerank_model_instance
  7. def run(
  8. self,
  9. query: str,
  10. documents: list[Document],
  11. score_threshold: Optional[float] = None,
  12. top_n: Optional[int] = None,
  13. user: Optional[str] = None,
  14. ) -> list[Document]:
  15. """
  16. Run rerank model
  17. :param query: search query
  18. :param documents: documents for reranking
  19. :param score_threshold: score threshold
  20. :param top_n: top n
  21. :param user: unique user id if needed
  22. :return:
  23. """
  24. docs = []
  25. doc_id = []
  26. unique_documents = []
  27. for document in documents:
  28. if document.metadata["doc_id"] not in doc_id:
  29. doc_id.append(document.metadata["doc_id"])
  30. docs.append(document.page_content)
  31. unique_documents.append(document)
  32. documents = unique_documents
  33. rerank_result = self.rerank_model_instance.invoke_rerank(
  34. query=query, docs=docs, score_threshold=score_threshold, top_n=top_n, user=user
  35. )
  36. rerank_documents = []
  37. for result in rerank_result.docs:
  38. # format document
  39. rerank_document = Document(
  40. page_content=result.text,
  41. metadata={
  42. "doc_id": documents[result.index].metadata["doc_id"],
  43. "doc_hash": documents[result.index].metadata["doc_hash"],
  44. "document_id": documents[result.index].metadata["document_id"],
  45. "dataset_id": documents[result.index].metadata["dataset_id"],
  46. "score": result.score,
  47. },
  48. )
  49. rerank_documents.append(rerank_document)
  50. return rerank_documents