123456789101112131415161718192021222324252627282930 |
- import datetime
- import time
- import click
- from werkzeug.exceptions import NotFound
- import app
- from configs import dify_config
- from extensions.ext_database import db
- from models.dataset import Embedding
- @app.celery.task(queue='dataset')
- def clean_embedding_cache_task():
- click.echo(click.style('Start clean embedding cache.', fg='green'))
- clean_days = int(dify_config.CLEAN_DAY_SETTING)
- start_at = time.perf_counter()
- thirty_days_ago = datetime.datetime.now() - datetime.timedelta(days=clean_days)
- while True:
- try:
- embeddings = db.session.query(Embedding).filter(Embedding.created_at < thirty_days_ago) \
- .order_by(Embedding.created_at.desc()).limit(100).all()
- except NotFound:
- break
- for embedding in embeddings:
- db.session.delete(embedding)
- db.session.commit()
- end_at = time.perf_counter()
- click.echo(click.style('Cleaned embedding cache from db success latency: {}'.format(end_at - start_at), fg='green'))
|