SaucePlz/engines/__init__.py
2022-04-25 00:23:37 -04:00

99 lines
3.2 KiB
Python

import posixpath as path
import logging
import asyncio
from urllib.parse import quote_plus
# pyright: reportWildcardImportFromLibrary=false
from typing import *
# pyright: reportPrivateImportUsage=false
from aiohttp_client_cache import CachedSession, SQLiteBackend
class EngineError(Exception):
pass
class EngineResponseError(EngineError):
pass
class EngineRateLimitError(EngineError):
pass
class ReverseImageSearchEngine(object):
"""Base class for reverse image search engines
Different reverse image search engines can inherit from this class
Args:
url_base (str): Base url of the engine
url_path (str): Path and query to the reverse image search function.
Should contain `{image_url}` in which the input url will be placed
to perform the search needed.
name (str, optional): Name of the search engine
"""
logger = logging.getLogger(__name__)
def __init__(self, url_base, url_path, name=None, config={}, loop=None, **request_args) -> None:
self.url_base = url_base
self.url_path = url_path
self.name = name
self.request_args = request_args
self.engine_config = config
if not loop:
loop = asyncio.get_event_loop()
self.loop = loop
# session used to make async cached requests
self._session = CachedSession(
cache=SQLiteBackend(
__name__.lower(),
expire_after=604800, # 1 week
allowable_methods=('GET', 'HEAD', 'POST')
),
loop=loop
)
def _build_search_url(self, url) -> str:
search_path = self.url_path.format(image_url=quote_plus(url))
return path.join(self.url_base, search_path)
async def _request(self, method, url, **kwargs) -> str:
async with self._session as session:
resp = await session.request(method, url, **kwargs)
return await resp.text()
async def _request_get(self, url, **kwargs) -> str:
return await self._request('GET', url, **kwargs)
async def _request_post(self, url, **kwargs) -> str:
return await self._request('POST', url, **kwargs)
async def search(self, url, post=False, **request_args):
search_url = self._build_search_url(url)
req_func = self._request_post if post else self._request_get
result = await req_func(search_url, **request_args, **self.request_args)
return result
def parse_html(self, html):
"""Parses the search engine's html to a useable dictionary
Override this method when inheriting from this class with the
site-specific implementation.
"""
raise NotImplementedError
async def top_matches(self, url, limit=3):
"""Get a list of the top matched results from the engine
Override this method when inheriting from this class with the
site-specific implementation.
"""
raise NotImplementedError
async def best_match(self, url):
"""Get the best matched result from the reverse image search engine
Override this method when inheriting from this class with the
site-specific implementation.
"""
raise NotImplementedError