rerank_model.py 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. from typing import Optional
  2. from core.model_manager import ModelInstance
  3. from core.rag.models.document import Document
  4. from core.rag.rerank.rerank_base import BaseRerankRunner
  5. class RerankModelRunner(BaseRerankRunner):
  6. def __init__(self, rerank_model_instance: ModelInstance) -> None:
  7. self.rerank_model_instance = rerank_model_instance
  8. def run(
  9. self,
  10. query: str,
  11. documents: list[Document],
  12. score_threshold: Optional[float] = None,
  13. top_n: Optional[int] = None,
  14. user: Optional[str] = None,
  15. ) -> list[Document]:
  16. """
  17. Run rerank model
  18. :param query: search query
  19. :param documents: documents for reranking
  20. :param score_threshold: score threshold
  21. :param top_n: top n
  22. :param user: unique user id if needed
  23. :return:
  24. """
  25. docs = []
  26. doc_id = []
  27. unique_documents = []
  28. dify_documents = [item for item in documents if item.provider == "dify"]
  29. external_documents = [item for item in documents if item.provider == "external"]
  30. for document in dify_documents:
  31. if document.metadata["doc_id"] not in doc_id:
  32. doc_id.append(document.metadata["doc_id"])
  33. docs.append(document.page_content)
  34. unique_documents.append(document)
  35. for document in external_documents:
  36. docs.append(document.page_content)
  37. unique_documents.append(document)
  38. documents = unique_documents
  39. rerank_result = self.rerank_model_instance.invoke_rerank(
  40. query=query, docs=docs, score_threshold=score_threshold, top_n=top_n, user=user
  41. )
  42. rerank_documents = []
  43. for result in rerank_result.docs:
  44. # format document
  45. rerank_document = Document(
  46. page_content=result.text,
  47. metadata=documents[result.index].metadata,
  48. provider=documents[result.index].provider,
  49. )
  50. rerank_document.metadata["score"] = result.score
  51. rerank_documents.append(rerank_document)
  52. return rerank_documents