diff --git a/.gitignore b/.gitignore index f2e1f52..9556ed4 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,3 @@ venv/* imgin/__pycache__/* testdata/* .vscode/* -*.egg-info \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index d48872b..7a79fdb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,25 +1,6 @@ # Changelog -## 0.0.4 - -* Improved image scraping (Thanks https://austinhuang.me/) -* Added bind address/port to config.py (Thanks @a9) - - -## 0.0.3 - -* Added test coverage for image getter -* Fixed non album images being included in albums - - -## 0.0.2 - -Improved gallery view responsiveness and mac font - - -## 0.0.1 - -Switched from glob to a list file which fixes albums overwriting each other (Reported by SeerLite) +This project uses Semantic Versioning ## 0.0.0 diff --git a/imgin/__init__.py b/imgin/__init__.py index 3abc465..a067648 100644 --- a/imgin/__init__.py +++ b/imgin/__init__.py @@ -4,6 +4,7 @@ monkey.patch_all() from threading import Thread from os import remove, mkdir, path, stat from shutil import rmtree +from glob import glob from uuid import uuid4 @@ -12,35 +13,31 @@ from bottle import static_file from bottle import SimpleTemplate from .get import get -from .config import IMAGE_CACHE, template_dir, bind_ip, bind_port +from .config import IMAGE_CACHE, template_dir def get_timestamp_of_file(file): return stat(file).st_ctime def album(id): + req_id = str(uuid4()) req = IMAGE_CACHE - title, metas = get("a/" + id, req) - found_list_file = IMAGE_CACHE + ("a/" + id).replace('/', '_') + get("/a/" + id, req) - with open(found_list_file, 'r') as f: - imgs = f.read().split(',') - - for c, img in enumerate(imgs): - imgs[c] = IMAGE_CACHE + imgs[c] + imgs = glob(req + "*") # sort image order (file creation time) imgs = sorted(imgs, key=get_timestamp_of_file) for c, img in enumerate(imgs): - imgs[c] = (img.replace(IMAGE_CACHE, '/'), metas[c][0], metas[c][1]) + imgs[c] = img.replace(IMAGE_CACHE, '/') with open(f'{template_dir}gallery.html', 'r') as img_view: tpl = SimpleTemplate(img_view) - return tpl.render(imgs=imgs, title=title) + return tpl.render(imgs=imgs) @route('/') @route('') @@ -65,7 +62,6 @@ def gallery(id=''): def img(img=''): if not img.endswith("jpeg") and not img.endswith("jpg") and not img.endswith("png"): img = img + ".jpg" - img = img.replace('jpeg', 'jpg') if not path.exists(IMAGE_CACHE + img): get(img, IMAGE_CACHE) return static_file(img, root=IMAGE_CACHE) @@ -78,4 +74,4 @@ def start_server(): pass mkdir(IMAGE_CACHE) - run(server='gevent', host=bind_ip, port=bind_port) \ No newline at end of file + run(server='gevent', host='0.0.0.0') \ No newline at end of file diff --git a/imgin/config.py b/imgin/config.py index 44c86dc..728fc41 100644 --- a/imgin/config.py +++ b/imgin/config.py @@ -1,5 +1,3 @@ IMAGE_CACHE = '/tmp/imgin-imgur-images/' SINGLE_IMAGE_DELETE_AFTER_SECS = 600 -template_dir = 'imgin/web/' -bind_ip = '0.0.0.0' -bind_port = '8080' \ No newline at end of file +template_dir = 'imgin/web/' \ No newline at end of file diff --git a/imgin/get.py b/imgin/get.py index bff6209..d7de595 100644 --- a/imgin/get.py +++ b/imgin/get.py @@ -1,13 +1,12 @@ import sys -from os import remove, write +from os import remove from threading import Thread -#from gevent import sleep -from time import sleep import requests import bs4 +from gevent import sleep -from .config import IMAGE_CACHE, SINGLE_IMAGE_DELETE_AFTER_SECS +from .config import SINGLE_IMAGE_DELETE_AFTER_SECS def delete_file(path): sleep(SINGLE_IMAGE_DELETE_AFTER_SECS) @@ -23,12 +22,14 @@ def error(msg): sys.stderr.flush() def get(url: str, write_dir: str, delete=True): - orig_url = url if not url.startswith('https://imgur.com/'): url = 'https://imgur.com/' + url + found_url = '' album = False - if url.startswith("https://imgur.com/a/"): + if "gallery" in url: + url = url.replace("gallery", "a") + if "/a/" in url: album = True if not url.endswith("blog"): url += "/layout/blog" @@ -36,68 +37,27 @@ def get(url: str, write_dir: str, delete=True): if not album: print('Getting img', url) - url = 'https://i.imgur.com/' + url.rsplit('/', 1)[-1] + url = 'https://i.imgur.com/' + url.rsplit('/', 1)[-1].replace('jpeg', 'jpg') with open(f'{write_dir}/{url[-11:]}', 'wb') as img: img.write(requests.get(url).content) if delete: Thread(target=delete_file, args=[f"{write_dir}/{url[-11:]}"]).start() - return None else: - found_url = '' - found_urls = [] - found_list_file = '' - title = '' - metas = [] - print('Detecting album/gallery images (contentUrl)', url) + print('Detecting album/gallery images', url) soup = bs4.BeautifulSoup(requests.get(url).text, 'html.parser') - try: - title = soup.select('meta[property="og:title"]')[0]['content'] - if title == "Imgur": - title = '' - except (KeyError, IndexError): - title = '' - for count, el in enumerate(soup.select('.post-image-container'), start=1): - if el is None: - continue - minisoup = bs4.BeautifulSoup(str(el), 'html.parser') + for count, el in enumerate(soup.select('.post-image meta[itemprop="contentUrl"]'), start=1): try: - found_url = "https:" + minisoup.select('.post-image meta[itemprop="contentUrl"]')[0]['content'] - if '?1' in found_url: - continue - except (KeyError, IndexError): - error("Could not obtain url for detected image (contentUrl), trying id method") - try: - found_url = "https://i.imgur.com/" + el['id'] + ".jpg" # equivalent to .png - except KeyError: - error("Could not obtain url for detected image (id)") - continue + found_url = "https:" + el['content'] + except KeyError: + error("Could not obtain url for detected image") + continue if found_url.endswith('ico.jpg'): continue - found_urls.append(found_url[-11:]) print(f"Downloading image {count}: {found_url}") print("Writing image", f"{write_dir}{found_url[-11:]}") with open(f"{write_dir}{found_url[-11:]}", "wb") as f: f.write(requests.get(found_url).content) - if delete: Thread(target=delete_file, args=[f"{write_dir}{found_url[-11:]}"]).start() - subtitle = '' - try: - subtitle = minisoup.select('.post-image-title')[0].string - except IndexError: - subtitle = '' - desc = '' - try: - desc = minisoup.select('.post-image-description')[0].string - except IndexError: - desc = '' - date = '' - metas.append((subtitle, desc)) - # Write the found urls to a file with the name of the album so the viewer endpoint can get them - found_list_file = write_dir + orig_url.replace('/', '_') - with open(found_list_file, 'w') as f: - f.write(','.join(found_urls)) - Thread(target=delete_file, args=[found_list_file]).start() - return title, metas diff --git a/imgin/web/gallery.html b/imgin/web/gallery.html index 694b136..8d7b4ce 100644 --- a/imgin/web/gallery.html +++ b/imgin/web/gallery.html @@ -4,20 +4,14 @@ - {{title + " - imgin" if title else "imgin - minimal & private imgur proxy"}} + imgin - minimal & private imgur proxy - % if title != '': -

{{title}}

- % end % for img in imgs: - % if img[1] != '': -

{{img[1]}}

- % end -

{{img[2]}}

- + +
% end