85 lines
3.0 KiB
Python
85 lines
3.0 KiB
Python
import re
|
|
import json
|
|
import traceback
|
|
|
|
REGEXES = {
|
|
# group 1 = single image id, or "gallery", or "a"
|
|
# group 2 = empty if single image, or album/gallery id
|
|
"imgur" : re.compile(r"https?://(?:www.)?imgur.com/((?:(?!\/).)+)/?((?:(?![\./]).)+)?"),
|
|
|
|
# group 1 = gfycat/redgifs id
|
|
"gfycat" : re.compile(r"https?://(?:www.|giant.)?gfycat.com/(?:gifs/detail/)?((?:(?![\./-]).)+)"),
|
|
"redgifs": re.compile(r"https?://(?:www.)?redgifs.com/watch/((?:(?![\./-]).)+)"),
|
|
|
|
# group 1 = giphy id
|
|
"giphy" : re.compile(r'https?://(?:www.)?giphy.com/gifs/(?:.*\-)?((?:(?![\./-]).)+)'),
|
|
|
|
# group 1 = tenor id
|
|
"tenor" : re.compile(r'https?://(?:www.)?tenor.com?/view/(?:.*\-)?((?:(?![\./-]).)+)')
|
|
}
|
|
|
|
GIPHY_DIRECT_URL = "https://media.giphy.com/media/{0}/giphy.gif"
|
|
|
|
def _match_url(url):
|
|
for site, regex in REGEXES.items():
|
|
match = regex.match(url)
|
|
if match:
|
|
return (site, match)
|
|
return (None, None)
|
|
|
|
async def parse_image_url(session, url):
|
|
site, match = _match_url(url)
|
|
if not match:
|
|
return url
|
|
|
|
image_id = match.group(1)
|
|
if site == "imgur":
|
|
request_url = f"https://imgur.com/{image_id}"
|
|
if image_id == "gallery" or image_id == "a":
|
|
album_type = image_id
|
|
album_id = match.group(2)
|
|
request_url = f"https://imgur.com/{album_type}/{album_id}"
|
|
|
|
try:
|
|
r = await session.get(request_url)
|
|
r.raise_for_status()
|
|
image_page_html = await r.text()
|
|
image_url_match = re.search(r"<meta name=\"twitter:image\" (?:.*?)content=\"(.*?)\"(?:.*?)>", image_page_html)
|
|
image_url = image_url_match.group(1).strip() if image_url_match else None
|
|
if not image_url:
|
|
return url
|
|
return image_url
|
|
except Exception:
|
|
traceback.print_exc()
|
|
return url
|
|
elif site == "gfycat" or site == "redgifs":
|
|
try:
|
|
r = await session.get(f"https://api.{site}.com/v1/gfycats/{image_id}")
|
|
r.raise_for_status()
|
|
gif_info = await r.json()
|
|
poster_url = gif_info.get('gfyItem', {}).get('posterUrl')
|
|
if not poster_url:
|
|
return url
|
|
return poster_url
|
|
except Exception:
|
|
traceback.print_exc()
|
|
return url
|
|
elif site == "giphy":
|
|
return GIPHY_DIRECT_URL.format(image_id)
|
|
elif site == "tenor":
|
|
try:
|
|
r = await session.get(f"https://tenor.com/view/{image_id}")
|
|
r.raise_for_status()
|
|
gif_page_html = await r.text()
|
|
gif_info_match = re.search(r"<script class=\"dynamic\" type=\"application/ld\+json\">(.*?)</script>", gif_page_html)
|
|
gif_info_raw = gif_info_match.group(1).strip() if gif_info_match else ""
|
|
if not gif_info_raw:
|
|
return url
|
|
gif_info = json.loads(gif_info_raw)
|
|
return gif_info['image']['contentUrl']
|
|
except Exception:
|
|
traceback.print_exc()
|
|
return url
|
|
else:
|
|
return url
|