SaucePlz/utils.py
2022-04-25 00:23:37 -04:00

85 lines
3.0 KiB
Python

import re
import json
import traceback
REGEXES = {
# group 1 = single image id, or "gallery", or "a"
# group 2 = empty if single image, or album/gallery id
"imgur" : re.compile(r"https?://(?:www.)?imgur.com/((?:(?!\/).)+)/?((?:(?![\./]).)+)?"),
# group 1 = gfycat/redgifs id
"gfycat" : re.compile(r"https?://(?:www.|giant.)?gfycat.com/(?:gifs/detail/)?((?:(?![\./-]).)+)"),
"redgifs": re.compile(r"https?://(?:www.)?redgifs.com/watch/((?:(?![\./-]).)+)"),
# group 1 = giphy id
"giphy" : re.compile(r'https?://(?:www.)?giphy.com/gifs/(?:.*\-)?((?:(?![\./-]).)+)'),
# group 1 = tenor id
"tenor" : re.compile(r'https?://(?:www.)?tenor.com?/view/(?:.*\-)?((?:(?![\./-]).)+)')
}
GIPHY_DIRECT_URL = "https://media.giphy.com/media/{0}/giphy.gif"
def _match_url(url):
for site, regex in REGEXES.items():
match = regex.match(url)
if match:
return (site, match)
return (None, None)
async def parse_image_url(session, url):
site, match = _match_url(url)
if not match:
return url
image_id = match.group(1)
if site == "imgur":
request_url = f"https://imgur.com/{image_id}"
if image_id == "gallery" or image_id == "a":
album_type = image_id
album_id = match.group(2)
request_url = f"https://imgur.com/{album_type}/{album_id}"
try:
r = await session.get(request_url)
r.raise_for_status()
image_page_html = await r.text()
image_url_match = re.search(r"<meta name=\"twitter:image\" (?:.*?)content=\"(.*?)\"(?:.*?)>", image_page_html)
image_url = image_url_match.group(1).strip() if image_url_match else None
if not image_url:
return url
return image_url
except Exception:
traceback.print_exc()
return url
elif site == "gfycat" or site == "redgifs":
try:
r = await session.get(f"https://api.{site}.com/v1/gfycats/{image_id}")
r.raise_for_status()
gif_info = await r.json()
poster_url = gif_info.get('gfyItem', {}).get('posterUrl')
if not poster_url:
return url
return poster_url
except Exception:
traceback.print_exc()
return url
elif site == "giphy":
return GIPHY_DIRECT_URL.format(image_id)
elif site == "tenor":
try:
r = await session.get(f"https://tenor.com/view/{image_id}")
r.raise_for_status()
gif_page_html = await r.text()
gif_info_match = re.search(r"<script class=\"dynamic\" type=\"application/ld\+json\">(.*?)</script>", gif_page_html)
gif_info_raw = gif_info_match.group(1).strip() if gif_info_match else ""
if not gif_info_raw:
return url
gif_info = json.loads(gif_info_raw)
return gif_info['image']['contentUrl']
except Exception:
traceback.print_exc()
return url
else:
return url