99 lines
3.2 KiB
Python
99 lines
3.2 KiB
Python
import posixpath as path
|
|
import logging
|
|
import asyncio
|
|
from urllib.parse import quote_plus
|
|
|
|
# pyright: reportWildcardImportFromLibrary=false
|
|
from typing import *
|
|
|
|
# pyright: reportPrivateImportUsage=false
|
|
from aiohttp_client_cache import CachedSession, SQLiteBackend
|
|
|
|
class EngineError(Exception):
|
|
pass
|
|
|
|
class EngineResponseError(EngineError):
|
|
pass
|
|
|
|
class EngineRateLimitError(EngineError):
|
|
pass
|
|
|
|
class ReverseImageSearchEngine(object):
|
|
"""Base class for reverse image search engines
|
|
|
|
Different reverse image search engines can inherit from this class
|
|
|
|
Args:
|
|
url_base (str): Base url of the engine
|
|
url_path (str): Path and query to the reverse image search function.
|
|
Should contain `{image_url}` in which the input url will be placed
|
|
to perform the search needed.
|
|
name (str, optional): Name of the search engine
|
|
"""
|
|
logger = logging.getLogger(__name__)
|
|
|
|
def __init__(self, url_base, url_path, name=None, config={}, loop=None, **request_args) -> None:
|
|
self.url_base = url_base
|
|
self.url_path = url_path
|
|
self.name = name
|
|
self.request_args = request_args
|
|
self.engine_config = config
|
|
|
|
if not loop:
|
|
loop = asyncio.get_event_loop()
|
|
self.loop = loop
|
|
|
|
# session used to make async cached requests
|
|
self._session = CachedSession(
|
|
cache=SQLiteBackend(
|
|
__name__.lower(),
|
|
expire_after=604800, # 1 week
|
|
allowable_methods=('GET', 'HEAD', 'POST')
|
|
),
|
|
loop=loop
|
|
)
|
|
|
|
def _build_search_url(self, url) -> str:
|
|
search_path = self.url_path.format(image_url=quote_plus(url))
|
|
return path.join(self.url_base, search_path)
|
|
|
|
async def _request(self, method, url, **kwargs) -> str:
|
|
async with self._session as session:
|
|
resp = await session.request(method, url, **kwargs)
|
|
return await resp.text()
|
|
|
|
async def _request_get(self, url, **kwargs) -> str:
|
|
return await self._request('GET', url, **kwargs)
|
|
|
|
async def _request_post(self, url, **kwargs) -> str:
|
|
return await self._request('POST', url, **kwargs)
|
|
|
|
async def search(self, url, post=False, **request_args):
|
|
search_url = self._build_search_url(url)
|
|
req_func = self._request_post if post else self._request_get
|
|
result = await req_func(search_url, **request_args, **self.request_args)
|
|
return result
|
|
|
|
def parse_html(self, html):
|
|
"""Parses the search engine's html to a useable dictionary
|
|
|
|
Override this method when inheriting from this class with the
|
|
site-specific implementation.
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
async def top_matches(self, url, limit=3):
|
|
"""Get a list of the top matched results from the engine
|
|
|
|
Override this method when inheriting from this class with the
|
|
site-specific implementation.
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
async def best_match(self, url):
|
|
"""Get the best matched result from the reverse image search engine
|
|
|
|
Override this method when inheriting from this class with the
|
|
site-specific implementation.
|
|
"""
|
|
raise NotImplementedError |