import posixpath as path import logging import asyncio from urllib.parse import quote_plus # pyright: reportWildcardImportFromLibrary=false from typing import * # pyright: reportPrivateImportUsage=false from aiohttp_client_cache import CachedSession, SQLiteBackend class EngineError(Exception): pass class EngineResponseError(EngineError): pass class EngineRateLimitError(EngineError): pass class ReverseImageSearchEngine(object): """Base class for reverse image search engines Different reverse image search engines can inherit from this class Args: url_base (str): Base url of the engine url_path (str): Path and query to the reverse image search function. Should contain `{image_url}` in which the input url will be placed to perform the search needed. name (str, optional): Name of the search engine """ logger = logging.getLogger(__name__) def __init__(self, url_base, url_path, name=None, config={}, loop=None, **request_args) -> None: self.url_base = url_base self.url_path = url_path self.name = name self.request_args = request_args self.engine_config = config if not loop: loop = asyncio.get_event_loop() self.loop = loop # session used to make async cached requests self._session = CachedSession( cache=SQLiteBackend( __name__.lower(), expire_after=604800, # 1 week allowable_methods=('GET', 'HEAD', 'POST') ), loop=loop ) def _build_search_url(self, url) -> str: search_path = self.url_path.format(image_url=quote_plus(url)) return path.join(self.url_base, search_path) async def _request(self, method, url, **kwargs) -> str: async with self._session as session: resp = await session.request(method, url, **kwargs) return await resp.text() async def _request_get(self, url, **kwargs) -> str: return await self._request('GET', url, **kwargs) async def _request_post(self, url, **kwargs) -> str: return await self._request('POST', url, **kwargs) async def search(self, url, post=False, **request_args): search_url = self._build_search_url(url) req_func = self._request_post if post else self._request_get result = await req_func(search_url, **request_args, **self.request_args) return result def parse_html(self, html): """Parses the search engine's html to a useable dictionary Override this method when inheriting from this class with the site-specific implementation. """ raise NotImplementedError async def top_matches(self, url, limit=3): """Get a list of the top matched results from the engine Override this method when inheriting from this class with the site-specific implementation. """ raise NotImplementedError async def best_match(self, url): """Get the best matched result from the reverse image search engine Override this method when inheriting from this class with the site-specific implementation. """ raise NotImplementedError