From f197c47a06263a0c9ca748dfed1176b547c4afe4 Mon Sep 17 00:00:00 2001 From: Austin Huang Date: Sun, 9 Jan 2022 14:30:21 -0500 Subject: [PATCH 1/4] better path handling --- imgin/__init__.py | 4 ++-- imgin/get.py | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/imgin/__init__.py b/imgin/__init__.py index 85f6cbd..b38d032 100644 --- a/imgin/__init__.py +++ b/imgin/__init__.py @@ -21,8 +21,8 @@ def get_timestamp_of_file(file): def album(id): req = IMAGE_CACHE - get("/a/" + id, req) - found_list_file = IMAGE_CACHE + ("/a/" + id).replace('/', '_') + get("a/" + id, req) + found_list_file = IMAGE_CACHE + ("a/" + id).replace('/', '_') with open(found_list_file, 'r') as f: imgs = f.read().split(',') diff --git a/imgin/get.py b/imgin/get.py index 7eb1f69..4b05bea 100644 --- a/imgin/get.py +++ b/imgin/get.py @@ -25,15 +25,13 @@ def error(msg): def get(url: str, write_dir: str, delete=True): orig_url = url if not url.startswith('https://imgur.com/'): - url = 'https://imgur.com' + url found_url = '' found_urls = [] found_list_file = '' + url = 'https://imgur.com/' + url album = False - if "gallery" in url: - url = url.replace("gallery", "a") - if "/a/" in url: + if url.startswith("https://imgur.com/a/"): album = True if not url.endswith("blog"): url += "/layout/blog" From d07e7caab245944371db6eb463f71ec698093afb Mon Sep 17 00:00:00 2001 From: Austin Huang Date: Sun, 9 Jan 2022 14:32:26 -0500 Subject: [PATCH 2/4] refactor #4 --- imgin/get.py | 48 +++++++++++++++++++++--------------------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/imgin/get.py b/imgin/get.py index 4b05bea..1c7ad7f 100644 --- a/imgin/get.py +++ b/imgin/get.py @@ -25,9 +25,6 @@ def error(msg): def get(url: str, write_dir: str, delete=True): orig_url = url if not url.startswith('https://imgur.com/'): - found_url = '' - found_urls = [] - found_list_file = '' url = 'https://imgur.com/' + url album = False @@ -45,42 +42,39 @@ def get(url: str, write_dir: str, delete=True): if delete: Thread(target=delete_file, args=[f"{write_dir}/{url[-11:]}"]).start() else: + found_url = '' + found_urls = [] + found_list_file = '' print('Detecting album/gallery images (contentUrl)', url) soup = bs4.BeautifulSoup(requests.get(url).text, 'html.parser') - for count, el in enumerate(soup.select('.post-image meta[itemprop="contentUrl"]'), start=1): + for count, el in enumerate(soup.select('.post-image-container'), start=1): + if el is None: + continue + minisoup = bs4.BeautifulSoup(str(el), 'html.parser') try: - found_url = "https:" + el['content'] + found_url = "https:" + minisoup.select('.post-image meta[itemprop="contentUrl"]')[0]['content'] if '?1' in found_url: continue - except KeyError: - error("Could not obtain url for detected image (contentUrl)") - continue - if found_url.endswith('ico.jpg'): - continue - found_urls.append(found_url[-11:]) - write(count, found_url, write_dir, delete) - if len(found_urls) == 0: - print('Detecting album/gallery images (id)', url) - for count, el in enumerate(soup.select('.post-image-container'), start=1): + except (KeyError, IndexError): + error("Could not obtain url for detected image (contentUrl), trying id method") try: found_url = "https://i.imgur.com/" + el['id'] + ".jpg" # equivalent to .png except KeyError: error("Could not obtain url for detected image (id)") continue - found_urls.append(found_url[-11:]) - write(count, found_url, write_dir, delete) + if found_url.endswith('ico.jpg'): + continue + found_urls.append(found_url[-11:]) + print(f"Downloading image {count}: {found_url}") + + print("Writing image", f"{write_dir}{found_url[-11:]}") + with open(f"{write_dir}{found_url[-11:]}", "wb") as f: + f.write(requests.get(found_url).content) + + if delete: + Thread(target=delete_file, args=[f"{write_dir}{found_url[-11:]}"]).start() # Write the found urls to a file with the name of the album so the viewer endpoint can get them found_list_file = write_dir + orig_url.replace('/', '_') with open(found_list_file, 'w') as f: f.write(','.join(found_urls)) Thread(target=delete_file, args=[found_list_file]).start() - -def write(count: int, found_url: str, write_dir: str, delete=True): - print(f"Downloading image {count}: {found_url}") - - print("Writing image", f"{write_dir}{found_url[-11:]}") - with open(f"{write_dir}{found_url[-11:]}", "wb") as f: - f.write(requests.get(found_url).content) - - if delete: - Thread(target=delete_file, args=[f"{write_dir}{found_url[-11:]}"]).start() From 7eef48e341b955064f9f4426d7c50cb59602d00e Mon Sep 17 00:00:00 2001 From: Austin Huang Date: Sun, 9 Jan 2022 14:52:28 -0500 Subject: [PATCH 3/4] render album title --- imgin/__init__.py | 4 ++-- imgin/get.py | 9 +++++++++ imgin/web/gallery.html | 5 ++++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/imgin/__init__.py b/imgin/__init__.py index b38d032..eb714d5 100644 --- a/imgin/__init__.py +++ b/imgin/__init__.py @@ -21,7 +21,7 @@ def get_timestamp_of_file(file): def album(id): req = IMAGE_CACHE - get("a/" + id, req) + title = get("a/" + id, req) found_list_file = IMAGE_CACHE + ("a/" + id).replace('/', '_') with open(found_list_file, 'r') as f: @@ -40,7 +40,7 @@ def album(id): with open(f'{template_dir}gallery.html', 'r') as img_view: tpl = SimpleTemplate(img_view) - return tpl.render(imgs=imgs) + return tpl.render(imgs=imgs, title=title) @route('/') @route('') diff --git a/imgin/get.py b/imgin/get.py index 1c7ad7f..b7ad1c3 100644 --- a/imgin/get.py +++ b/imgin/get.py @@ -41,12 +41,20 @@ def get(url: str, write_dir: str, delete=True): img.write(requests.get(url).content) if delete: Thread(target=delete_file, args=[f"{write_dir}/{url[-11:]}"]).start() + return None else: found_url = '' found_urls = [] found_list_file = '' print('Detecting album/gallery images (contentUrl)', url) soup = bs4.BeautifulSoup(requests.get(url).text, 'html.parser') + title = '' + try: + title = soup.select('meta[property="og:title"]')[0]['content'] + if title == "Imgur": + title = '' + except (KeyError, IndexError): + title = '' for count, el in enumerate(soup.select('.post-image-container'), start=1): if el is None: continue @@ -78,3 +86,4 @@ def get(url: str, write_dir: str, delete=True): with open(found_list_file, 'w') as f: f.write(','.join(found_urls)) Thread(target=delete_file, args=[found_list_file]).start() + return title diff --git a/imgin/web/gallery.html b/imgin/web/gallery.html index 8d7b4ce..a233a75 100644 --- a/imgin/web/gallery.html +++ b/imgin/web/gallery.html @@ -4,11 +4,14 @@ - imgin - minimal & private imgur proxy + {{title + " - imgin" if title else "imgin - minimal & private imgur proxy"}} + % if title != '': +

{{title}}

+ % end % for img in imgs:
From 500cbab146b7c034fbc9ce8afff9baca91774071 Mon Sep 17 00:00:00 2001 From: Austin Huang Date: Sun, 9 Jan 2022 15:26:35 -0500 Subject: [PATCH 4/4] render descriptions and small titles --- imgin/__init__.py | 4 ++-- imgin/get.py | 18 ++++++++++++++++-- imgin/web/gallery.html | 7 +++++-- 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/imgin/__init__.py b/imgin/__init__.py index eb714d5..3abc465 100644 --- a/imgin/__init__.py +++ b/imgin/__init__.py @@ -21,7 +21,7 @@ def get_timestamp_of_file(file): def album(id): req = IMAGE_CACHE - title = get("a/" + id, req) + title, metas = get("a/" + id, req) found_list_file = IMAGE_CACHE + ("a/" + id).replace('/', '_') with open(found_list_file, 'r') as f: @@ -34,7 +34,7 @@ def album(id): imgs = sorted(imgs, key=get_timestamp_of_file) for c, img in enumerate(imgs): - imgs[c] = img.replace(IMAGE_CACHE, '/') + imgs[c] = (img.replace(IMAGE_CACHE, '/'), metas[c][0], metas[c][1]) diff --git a/imgin/get.py b/imgin/get.py index b7ad1c3..bff6209 100644 --- a/imgin/get.py +++ b/imgin/get.py @@ -46,9 +46,10 @@ def get(url: str, write_dir: str, delete=True): found_url = '' found_urls = [] found_list_file = '' + title = '' + metas = [] print('Detecting album/gallery images (contentUrl)', url) soup = bs4.BeautifulSoup(requests.get(url).text, 'html.parser') - title = '' try: title = soup.select('meta[property="og:title"]')[0]['content'] if title == "Imgur": @@ -81,9 +82,22 @@ def get(url: str, write_dir: str, delete=True): if delete: Thread(target=delete_file, args=[f"{write_dir}{found_url[-11:]}"]).start() + + subtitle = '' + try: + subtitle = minisoup.select('.post-image-title')[0].string + except IndexError: + subtitle = '' + desc = '' + try: + desc = minisoup.select('.post-image-description')[0].string + except IndexError: + desc = '' + date = '' + metas.append((subtitle, desc)) # Write the found urls to a file with the name of the album so the viewer endpoint can get them found_list_file = write_dir + orig_url.replace('/', '_') with open(found_list_file, 'w') as f: f.write(','.join(found_urls)) Thread(target=delete_file, args=[found_list_file]).start() - return title + return title, metas diff --git a/imgin/web/gallery.html b/imgin/web/gallery.html index a233a75..694b136 100644 --- a/imgin/web/gallery.html +++ b/imgin/web/gallery.html @@ -13,8 +13,11 @@

{{title}}

% end % for img in imgs: - -
+ % if img[1] != '': +

{{img[1]}}

+ % end +

{{img[2]}}

+ % end