| 123456789101112131415161718192021222324252627282930313233343536 | import loggingfrom typing import Listfrom bs4 import BeautifulSoupfrom langchain.document_loaders.base import BaseLoaderfrom langchain.schema import Documentlogger = logging.getLogger(__name__)class HTMLLoader(BaseLoader):    """Load html files.    Args:        file_path: Path to the file to load.    """    def __init__(        self,        file_path: str    ):        """Initialize with file path."""        self._file_path = file_path    def load(self) -> List[Document]:        return [Document(page_content=self._load_as_text())]    def _load_as_text(self) -> str:        with open(self._file_path, "rb") as fp:            soup = BeautifulSoup(fp, 'html.parser')            text = soup.get_text()            text = text.strip() if text else ''        return text
 |