import re from urllib.parse import urlparse import requests from sqlalchemy import func from .engine import db from .types import StringUUID class ExternalApplication(db.Model): __tablename__ = "external_applications" __table_args__ = (db.PrimaryKeyConstraint("id", name="external_application_pkey"),) EXTERNAL_APPLICATION_TYPE_LIST = ["QUESTION_ANSWER", "SEARCH", "RECOMMEND"] id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()")) name = db.Column(db.String(255), nullable=False) type = db.Column(db.String(255), nullable=False) url = db.Column(db.String(255), nullable=False) method = db.Column(db.String(255), nullable=False) # status = db.Column(db.Boolean, nullable=False, server_default=db.text("true")) created_by = db.Column(StringUUID, nullable=False) created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp()) updated_by = db.Column(StringUUID, nullable=True) updated_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp()) @property def status(self): # 0). 使用正则表达式验证URL格式 regex = re.compile( r'^(?:http|ftp)s?://' # http:// or https:// r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain... r'localhost|' # localhost... r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|' # ...or ipv4 r'\[?[A-F0-9]*:[A-F0-9:]+\]?)' # ...or ipv6 r'(?::\d+)?' # optional port r'(?:/?|[/?]\S+)$', re.IGNORECASE) is_valid_format = re.match(regex, self.url) is not None if not is_valid_format: return False # 1). 使用urllib库进行URL解析 parsed_url = urlparse(self.url) # 2). 检查URL的scheme部分 is_valid_scheme = parsed_url.scheme in ["http", "https"] if not is_valid_scheme: return False # 3). 检查URL的netloc部分 is_valid_netloc = parsed_url.netloc if not is_valid_netloc: return False # 4). 使用requests库发送HTTP请求 try: response = requests.get(self.url, timeout=5) if response.status_code == 200: return True else: return False except requests.exceptions.RequestException: return False