rag_web_reader.py 640 B

123456789101112131415161718
  1. import re
  2. def get_image_upload_file_ids(content):
  3. pattern = r"!\[image\]\((http?://.*?(file-preview|image-preview))\)"
  4. matches = re.findall(pattern, content)
  5. image_upload_file_ids = []
  6. for match in matches:
  7. if match[1] == "file-preview":
  8. content_pattern = r"files/([^/]+)/file-preview"
  9. else:
  10. content_pattern = r"files/([^/]+)/image-preview"
  11. content_match = re.search(content_pattern, match[0])
  12. if content_match:
  13. image_upload_file_id = content_match.group(1)
  14. image_upload_file_ids.append(image_upload_file_id)
  15. return image_upload_file_ids