diff --git a/CHANGELOG.md b/CHANGELOG.md index d48872b..f98666a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,5 @@ # Changelog -## 0.0.4 - -* Improved image scraping (Thanks https://austinhuang.me/) -* Added bind address/port to config.py (Thanks @a9) - - ## 0.0.3 * Added test coverage for image getter diff --git a/imgin/__init__.py b/imgin/__init__.py index 3abc465..85f6cbd 100644 --- a/imgin/__init__.py +++ b/imgin/__init__.py @@ -21,8 +21,8 @@ def get_timestamp_of_file(file): def album(id): req = IMAGE_CACHE - title, metas = get("a/" + id, req) - found_list_file = IMAGE_CACHE + ("a/" + id).replace('/', '_') + get("/a/" + id, req) + found_list_file = IMAGE_CACHE + ("/a/" + id).replace('/', '_') with open(found_list_file, 'r') as f: imgs = f.read().split(',') @@ -34,13 +34,13 @@ def album(id): imgs = sorted(imgs, key=get_timestamp_of_file) for c, img in enumerate(imgs): - imgs[c] = (img.replace(IMAGE_CACHE, '/'), metas[c][0], metas[c][1]) + imgs[c] = img.replace(IMAGE_CACHE, '/') with open(f'{template_dir}gallery.html', 'r') as img_view: tpl = SimpleTemplate(img_view) - return tpl.render(imgs=imgs, title=title) + return tpl.render(imgs=imgs) @route('/') @route('') diff --git a/imgin/get.py b/imgin/get.py index bff6209..50abaf9 100644 --- a/imgin/get.py +++ b/imgin/get.py @@ -26,9 +26,14 @@ def get(url: str, write_dir: str, delete=True): orig_url = url if not url.startswith('https://imgur.com/'): url = 'https://imgur.com/' + url + found_url = '' + found_urls = [] + found_list_file = '' album = False - if url.startswith("https://imgur.com/a/"): + if "gallery" in url: + url = url.replace("gallery", "a") + if "/a/" in url: album = True if not url.endswith("blog"): url += "/layout/blog" @@ -41,36 +46,17 @@ def get(url: str, write_dir: str, delete=True): img.write(requests.get(url).content) if delete: Thread(target=delete_file, args=[f"{write_dir}/{url[-11:]}"]).start() - return None else: - found_url = '' - found_urls = [] - found_list_file = '' - title = '' - metas = [] - print('Detecting album/gallery images (contentUrl)', url) + print('Detecting album/gallery images', url) soup = bs4.BeautifulSoup(requests.get(url).text, 'html.parser') - try: - title = soup.select('meta[property="og:title"]')[0]['content'] - if title == "Imgur": - title = '' - except (KeyError, IndexError): - title = '' - for count, el in enumerate(soup.select('.post-image-container'), start=1): - if el is None: - continue - minisoup = bs4.BeautifulSoup(str(el), 'html.parser') + for count, el in enumerate(soup.select('.post-image meta[itemprop="contentUrl"]'), start=1): try: - found_url = "https:" + minisoup.select('.post-image meta[itemprop="contentUrl"]')[0]['content'] + found_url = "https:" + el['content'] if '?1' in found_url: continue - except (KeyError, IndexError): - error("Could not obtain url for detected image (contentUrl), trying id method") - try: - found_url = "https://i.imgur.com/" + el['id'] + ".jpg" # equivalent to .png - except KeyError: - error("Could not obtain url for detected image (id)") - continue + except KeyError: + error("Could not obtain url for detected image") + continue if found_url.endswith('ico.jpg'): continue found_urls.append(found_url[-11:]) @@ -82,22 +68,10 @@ def get(url: str, write_dir: str, delete=True): if delete: Thread(target=delete_file, args=[f"{write_dir}{found_url[-11:]}"]).start() - - subtitle = '' - try: - subtitle = minisoup.select('.post-image-title')[0].string - except IndexError: - subtitle = '' - desc = '' - try: - desc = minisoup.select('.post-image-description')[0].string - except IndexError: - desc = '' - date = '' - metas.append((subtitle, desc)) # Write the found urls to a file with the name of the album so the viewer endpoint can get them found_list_file = write_dir + orig_url.replace('/', '_') with open(found_list_file, 'w') as f: f.write(','.join(found_urls)) Thread(target=delete_file, args=[found_list_file]).start() - return title, metas + + diff --git a/imgin/web/gallery.html b/imgin/web/gallery.html index 694b136..8d7b4ce 100644 --- a/imgin/web/gallery.html +++ b/imgin/web/gallery.html @@ -4,20 +4,14 @@ - {{title + " - imgin" if title else "imgin - minimal & private imgur proxy"}} + imgin - minimal & private imgur proxy - % if title != '': -

{{title}}

- % end % for img in imgs: - % if img[1] != '': -

{{img[1]}}

- % end -

{{img[2]}}

- + +
% end