From 41dc8def0e003583d32fdc197301fba1f396ec69 Mon Sep 17 00:00:00 2001 From: Austin Huang Date: Thu, 6 Jan 2022 18:11:05 -0500 Subject: [PATCH] add alternative album detection method --- imgin/get.py | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/imgin/get.py b/imgin/get.py index 50abaf9..7eb1f69 100644 --- a/imgin/get.py +++ b/imgin/get.py @@ -25,7 +25,7 @@ def error(msg): def get(url: str, write_dir: str, delete=True): orig_url = url if not url.startswith('https://imgur.com/'): - url = 'https://imgur.com/' + url + url = 'https://imgur.com' + url found_url = '' found_urls = [] found_list_file = '' @@ -47,7 +47,7 @@ def get(url: str, write_dir: str, delete=True): if delete: Thread(target=delete_file, args=[f"{write_dir}/{url[-11:]}"]).start() else: - print('Detecting album/gallery images', url) + print('Detecting album/gallery images (contentUrl)', url) soup = bs4.BeautifulSoup(requests.get(url).text, 'html.parser') for count, el in enumerate(soup.select('.post-image meta[itemprop="contentUrl"]'), start=1): try: @@ -55,23 +55,34 @@ def get(url: str, write_dir: str, delete=True): if '?1' in found_url: continue except KeyError: - error("Could not obtain url for detected image") + error("Could not obtain url for detected image (contentUrl)") continue if found_url.endswith('ico.jpg'): continue found_urls.append(found_url[-11:]) - print(f"Downloading image {count}: {found_url}") - - print("Writing image", f"{write_dir}{found_url[-11:]}") - with open(f"{write_dir}{found_url[-11:]}", "wb") as f: - f.write(requests.get(found_url).content) - - if delete: - Thread(target=delete_file, args=[f"{write_dir}{found_url[-11:]}"]).start() + write(count, found_url, write_dir, delete) + if len(found_urls) == 0: + print('Detecting album/gallery images (id)', url) + for count, el in enumerate(soup.select('.post-image-container'), start=1): + try: + found_url = "https://i.imgur.com/" + el['id'] + ".jpg" # equivalent to .png + except KeyError: + error("Could not obtain url for detected image (id)") + continue + found_urls.append(found_url[-11:]) + write(count, found_url, write_dir, delete) # Write the found urls to a file with the name of the album so the viewer endpoint can get them found_list_file = write_dir + orig_url.replace('/', '_') with open(found_list_file, 'w') as f: f.write(','.join(found_urls)) Thread(target=delete_file, args=[found_list_file]).start() +def write(count: int, found_url: str, write_dir: str, delete=True): + print(f"Downloading image {count}: {found_url}") + print("Writing image", f"{write_dir}{found_url[-11:]}") + with open(f"{write_dir}{found_url[-11:]}", "wb") as f: + f.write(requests.get(found_url).content) + + if delete: + Thread(target=delete_file, args=[f"{write_dir}{found_url[-11:]}"]).start()