clean_embedding_cache_task.py 1022 B

123456789101112131415161718192021222324252627282930
  1. import datetime
  2. import time
  3. import click
  4. from werkzeug.exceptions import NotFound
  5. import app
  6. from configs import dify_config
  7. from extensions.ext_database import db
  8. from models.dataset import Embedding
  9. @app.celery.task(queue='dataset')
  10. def clean_embedding_cache_task():
  11. click.echo(click.style('Start clean embedding cache.', fg='green'))
  12. clean_days = int(dify_config.CLEAN_DAY_SETTING)
  13. start_at = time.perf_counter()
  14. thirty_days_ago = datetime.datetime.now() - datetime.timedelta(days=clean_days)
  15. while True:
  16. try:
  17. embeddings = db.session.query(Embedding).filter(Embedding.created_at < thirty_days_ago) \
  18. .order_by(Embedding.created_at.desc()).limit(100).all()
  19. except NotFound:
  20. break
  21. for embedding in embeddings:
  22. db.session.delete(embedding)
  23. db.session.commit()
  24. end_at = time.perf_counter()
  25. click.echo(click.style('Cleaned embedding cache from db success latency: {}'.format(end_at - start_at), fg='green'))