imgin/imgin/get.py

import sys
from os import remove, write
from threading import Thread

from time import sleep
import requests
import bs4

from .useragents import get_random_user_agent
from .config import IMAGE_CACHE, SINGLE_IMAGE_DELETE_AFTER_SECS

def delete_file(path):
    sleep(SINGLE_IMAGE_DELETE_AFTER_SECS)
    print('Erasing', path)
    try:
        remove(path)
    except FileNotFoundError:
        pass


def error(msg):
    sys.stderr.write(msg + "\n")
    sys.stderr.flush()

def get(url: str, write_dir: str, delete=True):
    ua = get_random_user_agent()
    orig_url = url
    if not url.startswith('https://imgur.com/'):
        url = 'https://imgur.com/' + url

    album = False
    if url.startswith("https://imgur.com/a/"):
        album = True
        if not url.endswith("blog"):
            url += "/layout/blog"


    if not album:
        print('Getting img', url)
        url = 'https://i.imgur.com/' + url.rsplit('/', 1)[-1]
        with open(f'{write_dir}/{url[-11:]}', 'wb') as img:
            img.write(requests.get(url, headers={'User-Agent': ua}).content)
        if delete:
            Thread(target=delete_file, args=[f"{write_dir}/{url[-11:]}"]).start()
        return None
    else:
        found_url = ''
        found_urls = []
        found_list_file = ''
        title = ''
        metas = []
        print('Detecting album/gallery images (contentUrl)', url)
        soup = bs4.BeautifulSoup(requests.get(url, headers={'User-Agent': ua}).text, 'html.parser')
        try:
            title = soup.select('meta[property="og:title"]')[0]['content']
            if title == "Imgur":
                title = ''
        except (KeyError, IndexError):
            title = ''
        for count, el in enumerate(soup.select('.post-image-container'), start=1):
            if el is None:
                continue
            minisoup = bs4.BeautifulSoup(str(el), 'html.parser')
            try:
                found_url = "https:" + minisoup.select('.post-image meta[itemprop="contentUrl"]')[0]['content']
                if '?1' in found_url:
                    continue
            except (KeyError, IndexError):
                error("Could not obtain url for detected image (contentUrl), trying id method")
                try:
                    found_url = "https://i.imgur.com/" + el['id'] + ".jpg" # equivalent to .png
                except KeyError:
                    error("Could not obtain url for detected image (id)")
                    continue
            if found_url.endswith('ico.jpg'):
                continue
            found_urls.append(found_url[-11:])
            print(f"Downloading image {count}: {found_url}")

            print("Writing image", f"{write_dir}{found_url[-11:]}")
            with open(f"{write_dir}{found_url[-11:]}", "wb") as f:
                f.write(requests.get(found_url, headers={'User-Agent': ua}).content)

            if delete:
                Thread(target=delete_file, args=[f"{write_dir}{found_url[-11:]}"]).start()

            subtitle = ''
            try:
                subtitle = minisoup.select('.post-image-title')[0].string
            except IndexError:
                subtitle = ''
            desc = ''
            try:
                desc = minisoup.select('.post-image-description')[0].string
            except IndexError:
                desc = ''
            date = ''
            metas.append((subtitle, desc))
        # Write the found urls to a file with the name of the album so the viewer endpoint can get them
        found_list_file = write_dir + orig_url.replace('/', '_')
        with open(found_list_file, 'w') as f:
            f.write(','.join(found_urls))
        Thread(target=delete_file, args=[found_list_file]).start()
        return title, metas