This commit is contained in:
Austin Huang 2022-01-09 14:32:26 -05:00
parent f197c47a06
commit d07e7caab2
No known key found for this signature in database
GPG Key ID: 84C23AA04587A91F

View File

@ -25,9 +25,6 @@ def error(msg):
def get(url: str, write_dir: str, delete=True): def get(url: str, write_dir: str, delete=True):
orig_url = url orig_url = url
if not url.startswith('https://imgur.com/'): if not url.startswith('https://imgur.com/'):
found_url = ''
found_urls = []
found_list_file = ''
url = 'https://imgur.com/' + url url = 'https://imgur.com/' + url
album = False album = False
@ -45,42 +42,39 @@ def get(url: str, write_dir: str, delete=True):
if delete: if delete:
Thread(target=delete_file, args=[f"{write_dir}/{url[-11:]}"]).start() Thread(target=delete_file, args=[f"{write_dir}/{url[-11:]}"]).start()
else: else:
found_url = ''
found_urls = []
found_list_file = ''
print('Detecting album/gallery images (contentUrl)', url) print('Detecting album/gallery images (contentUrl)', url)
soup = bs4.BeautifulSoup(requests.get(url).text, 'html.parser') soup = bs4.BeautifulSoup(requests.get(url).text, 'html.parser')
for count, el in enumerate(soup.select('.post-image meta[itemprop="contentUrl"]'), start=1): for count, el in enumerate(soup.select('.post-image-container'), start=1):
if el is None:
continue
minisoup = bs4.BeautifulSoup(str(el), 'html.parser')
try: try:
found_url = "https:" + el['content'] found_url = "https:" + minisoup.select('.post-image meta[itemprop="contentUrl"]')[0]['content']
if '?1' in found_url: if '?1' in found_url:
continue continue
except KeyError: except (KeyError, IndexError):
error("Could not obtain url for detected image (contentUrl)") error("Could not obtain url for detected image (contentUrl), trying id method")
continue
if found_url.endswith('ico.jpg'):
continue
found_urls.append(found_url[-11:])
write(count, found_url, write_dir, delete)
if len(found_urls) == 0:
print('Detecting album/gallery images (id)', url)
for count, el in enumerate(soup.select('.post-image-container'), start=1):
try: try:
found_url = "https://i.imgur.com/" + el['id'] + ".jpg" # equivalent to .png found_url = "https://i.imgur.com/" + el['id'] + ".jpg" # equivalent to .png
except KeyError: except KeyError:
error("Could not obtain url for detected image (id)") error("Could not obtain url for detected image (id)")
continue continue
found_urls.append(found_url[-11:]) if found_url.endswith('ico.jpg'):
write(count, found_url, write_dir, delete) continue
found_urls.append(found_url[-11:])
print(f"Downloading image {count}: {found_url}")
print("Writing image", f"{write_dir}{found_url[-11:]}")
with open(f"{write_dir}{found_url[-11:]}", "wb") as f:
f.write(requests.get(found_url).content)
if delete:
Thread(target=delete_file, args=[f"{write_dir}{found_url[-11:]}"]).start()
# Write the found urls to a file with the name of the album so the viewer endpoint can get them # Write the found urls to a file with the name of the album so the viewer endpoint can get them
found_list_file = write_dir + orig_url.replace('/', '_') found_list_file = write_dir + orig_url.replace('/', '_')
with open(found_list_file, 'w') as f: with open(found_list_file, 'w') as f:
f.write(','.join(found_urls)) f.write(','.join(found_urls))
Thread(target=delete_file, args=[found_list_file]).start() Thread(target=delete_file, args=[found_list_file]).start()
def write(count: int, found_url: str, write_dir: str, delete=True):
print(f"Downloading image {count}: {found_url}")
print("Writing image", f"{write_dir}{found_url[-11:]}")
with open(f"{write_dir}{found_url[-11:]}", "wb") as f:
f.write(requests.get(found_url).content)
if delete:
Thread(target=delete_file, args=[f"{write_dir}{found_url[-11:]}"]).start()