105 lines
3.7 KiB
Python
105 lines
3.7 KiB
Python
import sys
|
|
from os import remove, write
|
|
from threading import Thread
|
|
|
|
from time import sleep
|
|
import requests
|
|
import bs4
|
|
|
|
from .useragents import get_random_user_agent
|
|
from .config import IMAGE_CACHE, SINGLE_IMAGE_DELETE_AFTER_SECS
|
|
|
|
def delete_file(path):
|
|
sleep(SINGLE_IMAGE_DELETE_AFTER_SECS)
|
|
print('Erasing', path)
|
|
try:
|
|
remove(path)
|
|
except FileNotFoundError:
|
|
pass
|
|
|
|
|
|
def error(msg):
|
|
sys.stderr.write(msg + "\n")
|
|
sys.stderr.flush()
|
|
|
|
def get(url: str, write_dir: str, delete=True):
|
|
ua = get_random_user_agent()
|
|
orig_url = url
|
|
if not url.startswith('https://imgur.com/'):
|
|
url = 'https://imgur.com/' + url
|
|
|
|
album = False
|
|
if url.startswith("https://imgur.com/a/"):
|
|
album = True
|
|
if not url.endswith("blog"):
|
|
url += "/layout/blog"
|
|
|
|
|
|
if not album:
|
|
print('Getting img', url)
|
|
url = 'https://i.imgur.com/' + url.rsplit('/', 1)[-1]
|
|
with open(f'{write_dir}/{url[-11:]}', 'wb') as img:
|
|
img.write(requests.get(url, headers={'User-Agent': ua}).content)
|
|
if delete:
|
|
Thread(target=delete_file, args=[f"{write_dir}/{url[-11:]}"]).start()
|
|
return None
|
|
else:
|
|
found_url = ''
|
|
found_urls = []
|
|
found_list_file = ''
|
|
title = ''
|
|
metas = []
|
|
print('Detecting album/gallery images (contentUrl)', url)
|
|
soup = bs4.BeautifulSoup(requests.get(url, headers={'User-Agent': ua}).text, 'html.parser')
|
|
try:
|
|
title = soup.select('meta[property="og:title"]')[0]['content']
|
|
if title == "Imgur":
|
|
title = ''
|
|
except (KeyError, IndexError):
|
|
title = ''
|
|
for count, el in enumerate(soup.select('.post-image-container'), start=1):
|
|
if el is None:
|
|
continue
|
|
minisoup = bs4.BeautifulSoup(str(el), 'html.parser')
|
|
try:
|
|
found_url = "https:" + minisoup.select('.post-image meta[itemprop="contentUrl"]')[0]['content']
|
|
if '?1' in found_url:
|
|
continue
|
|
except (KeyError, IndexError):
|
|
error("Could not obtain url for detected image (contentUrl), trying id method")
|
|
try:
|
|
found_url = "https://i.imgur.com/" + el['id'] + ".jpg" # equivalent to .png
|
|
except KeyError:
|
|
error("Could not obtain url for detected image (id)")
|
|
continue
|
|
if found_url.endswith('ico.jpg'):
|
|
continue
|
|
found_urls.append(found_url[-11:])
|
|
print(f"Downloading image {count}: {found_url}")
|
|
|
|
print("Writing image", f"{write_dir}{found_url[-11:]}")
|
|
with open(f"{write_dir}{found_url[-11:]}", "wb") as f:
|
|
f.write(requests.get(found_url, headers={'User-Agent': ua}).content)
|
|
|
|
if delete:
|
|
Thread(target=delete_file, args=[f"{write_dir}{found_url[-11:]}"]).start()
|
|
|
|
subtitle = ''
|
|
try:
|
|
subtitle = minisoup.select('.post-image-title')[0].string
|
|
except IndexError:
|
|
subtitle = ''
|
|
desc = ''
|
|
try:
|
|
desc = minisoup.select('.post-image-description')[0].string
|
|
except IndexError:
|
|
desc = ''
|
|
date = ''
|
|
metas.append((subtitle, desc))
|
|
# Write the found urls to a file with the name of the album so the viewer endpoint can get them
|
|
found_list_file = write_dir + orig_url.replace('/', '_')
|
|
with open(found_list_file, 'w') as f:
|
|
f.write(','.join(found_urls))
|
|
Thread(target=delete_file, args=[found_list_file]).start()
|
|
return title, metas
|