clean_embedding_cache_task.py 1.1 KB

123456789101112131415161718192021222324252627282930
  1. import app
  2. import datetime
  3. import time
  4. import click
  5. from flask import current_app
  6. from werkzeug.exceptions import NotFound
  7. from extensions.ext_database import db
  8. from models.dataset import Embedding
  9. @app.celery.task(queue='dataset')
  10. def clean_embedding_cache_task():
  11. click.echo(click.style('Start clean embedding cache.', fg='green'))
  12. clean_days = int(current_app.config.get('CLEAN_DAY_SETTING'))
  13. start_at = time.perf_counter()
  14. thirty_days_ago = datetime.datetime.now() - datetime.timedelta(days=clean_days)
  15. page = 1
  16. while True:
  17. try:
  18. embeddings = db.session.query(Embedding).filter(Embedding.created_at < thirty_days_ago) \
  19. .order_by(Embedding.created_at.desc()).paginate(page=page, per_page=100)
  20. except NotFound:
  21. break
  22. for embedding in embeddings:
  23. db.session.delete(embedding)
  24. db.session.commit()
  25. page += 1
  26. end_at = time.perf_counter()
  27. click.echo(click.style('Cleaned embedding cache from db success latency: {}'.format(end_at - start_at), fg='green'))