Bumped version

Merge branch 'austin'
add alternative album detection method
2022-01-09 00:08:40 -06:00 · 2022-01-09 00:03:53 -06:00 · 2022-01-06 18:11:05 -05:00
3 changed files with 29 additions and 12 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,5 +1,11 @@
 # Changelog

+## 0.0.4
+
+* Improved image scraping (Thanks https://austinhuang.me/)
+* Added bind address/port to config.py (Thanks @a9)
+
+
 ## 0.0.3

 * Added test coverage for image getter
--- a/imgin/get.py
+++ b/imgin/get.py
@ -25,7 +25,7 @@ def error(msg):
 def get(url: str, write_dir: str, delete=True):
    orig_url = url
    if not url.startswith('https://imgur.com/'):
-        url = 'https://imgur.com/' + url
+        url = 'https://imgur.com' + url
    found_url = ''
    found_urls = []
    found_list_file = ''
@ -47,7 +47,7 @@ def get(url: str, write_dir: str, delete=True):
        if delete:
            Thread(target=delete_file, args=[f"{write_dir}/{url[-11:]}"]).start()
    else:
-        print('Detecting album/gallery images', url)
+        print('Detecting album/gallery images (contentUrl)', url)
        soup = bs4.BeautifulSoup(requests.get(url).text, 'html.parser')
        for count, el in enumerate(soup.select('.post-image meta[itemprop="contentUrl"]'), start=1):
            try:
@ -55,23 +55,34 @@ def get(url: str, write_dir: str, delete=True):
                if '?1' in found_url:
                    continue
            except KeyError:
-                error("Could not obtain url for detected image")
+                error("Could not obtain url for detected image (contentUrl)")
                continue
            if found_url.endswith('ico.jpg'):
                continue
            found_urls.append(found_url[-11:])
-            print(f"Downloading image {count}: {found_url}")
-
-            print("Writing image", f"{write_dir}{found_url[-11:]}")
-            with open(f"{write_dir}{found_url[-11:]}", "wb") as f:
-                f.write(requests.get(found_url).content)
-
-            if delete:
-                Thread(target=delete_file, args=[f"{write_dir}{found_url[-11:]}"]).start()
+            write(count, found_url, write_dir, delete)
+        if len(found_urls) == 0:
+            print('Detecting album/gallery images (id)', url)
+            for count, el in enumerate(soup.select('.post-image-container'), start=1):
+                try:
+                    found_url = "https://i.imgur.com/" + el['id'] + ".jpg" # equivalent to .png
+                except KeyError:
+                    error("Could not obtain url for detected image (id)")
+                    continue
+                found_urls.append(found_url[-11:])
+                write(count, found_url, write_dir, delete)
        # Write the found urls to a file with the name of the album so the viewer endpoint can get them
        found_list_file = write_dir + orig_url.replace('/', '_')
        with open(found_list_file, 'w') as f:
            f.write(','.join(found_urls))
        Thread(target=delete_file, args=[found_list_file]).start()

+def write(count: int, found_url: str, write_dir: str, delete=True):
+    print(f"Downloading image {count}: {found_url}")

+    print("Writing image", f"{write_dir}{found_url[-11:]}")
+    with open(f"{write_dir}{found_url[-11:]}", "wb") as f:
+        f.write(requests.get(found_url).content)
+
+    if delete:
+        Thread(target=delete_file, args=[f"{write_dir}{found_url[-11:]}"]).start()
--- a/imgin/web/index.html
+++ b/imgin/web/index.html
@ -39,7 +39,7 @@
      <p>Or, run it with python by installing the requirements.txt and ./run.py. Or use the Dockerfile. Contact me if you want help or find a bug.</p>
      <footer>
        <small>
-            <p>Imgin Version 0.0.2</p>
+            <p>Imgin Version 0.0.4</p>
            Powered by <a href="https://voidnet.tech/">VoidNetwork LLC</a><br>
            This website does not claim ownership of any media.
            <br>This service simply acts as a proxy to Imgur.com and does not store images aside from a brief cache.
Author	SHA1	Message	Date
Kevin F	fb8baf35f4	Bumped version	2022-01-09 00:08:40 -06:00
Kevin F	59b2a5e893	Merge branch 'austin'	2022-01-09 00:03:53 -06:00
Austin Huang	41dc8def0e	add alternative album detection method	2022-01-06 18:11:05 -05:00