Randomize useragent

This commit is contained in:
Kevin F 2023-08-23 13:44:22 -05:00
parent 0a4bd99771
commit b8eb526782
4 changed files with 16 additions and 5 deletions

View File

@ -1,5 +1,9 @@
# Changelog # Changelog
## 0.0.5
* Randomize user agent to circumvent ratelimit
## 0.0.4 ## 0.0.4
* Improved image scraping (Thanks https://austinhuang.me/) * Improved image scraping (Thanks https://austinhuang.me/)

View File

@ -2,11 +2,11 @@ import sys
from os import remove, write from os import remove, write
from threading import Thread from threading import Thread
#from gevent import sleep
from time import sleep from time import sleep
import requests import requests
import bs4 import bs4
from .useragents import get_random_user_agent
from .config import IMAGE_CACHE, SINGLE_IMAGE_DELETE_AFTER_SECS from .config import IMAGE_CACHE, SINGLE_IMAGE_DELETE_AFTER_SECS
def delete_file(path): def delete_file(path):
@ -23,6 +23,7 @@ def error(msg):
sys.stderr.flush() sys.stderr.flush()
def get(url: str, write_dir: str, delete=True): def get(url: str, write_dir: str, delete=True):
ua = get_random_user_agent()
orig_url = url orig_url = url
if not url.startswith('https://imgur.com/'): if not url.startswith('https://imgur.com/'):
url = 'https://imgur.com/' + url url = 'https://imgur.com/' + url
@ -38,7 +39,7 @@ def get(url: str, write_dir: str, delete=True):
print('Getting img', url) print('Getting img', url)
url = 'https://i.imgur.com/' + url.rsplit('/', 1)[-1] url = 'https://i.imgur.com/' + url.rsplit('/', 1)[-1]
with open(f'{write_dir}/{url[-11:]}', 'wb') as img: with open(f'{write_dir}/{url[-11:]}', 'wb') as img:
img.write(requests.get(url).content) img.write(requests.get(url, headers={'User-Agent': ua}).content)
if delete: if delete:
Thread(target=delete_file, args=[f"{write_dir}/{url[-11:]}"]).start() Thread(target=delete_file, args=[f"{write_dir}/{url[-11:]}"]).start()
return None return None
@ -49,7 +50,7 @@ def get(url: str, write_dir: str, delete=True):
title = '' title = ''
metas = [] metas = []
print('Detecting album/gallery images (contentUrl)', url) print('Detecting album/gallery images (contentUrl)', url)
soup = bs4.BeautifulSoup(requests.get(url).text, 'html.parser') soup = bs4.BeautifulSoup(requests.get(url, headers={'User-Agent': ua}).text, 'html.parser')
try: try:
title = soup.select('meta[property="og:title"]')[0]['content'] title = soup.select('meta[property="og:title"]')[0]['content']
if title == "Imgur": if title == "Imgur":
@ -78,7 +79,7 @@ def get(url: str, write_dir: str, delete=True):
print("Writing image", f"{write_dir}{found_url[-11:]}") print("Writing image", f"{write_dir}{found_url[-11:]}")
with open(f"{write_dir}{found_url[-11:]}", "wb") as f: with open(f"{write_dir}{found_url[-11:]}", "wb") as f:
f.write(requests.get(found_url).content) f.write(requests.get(found_url, headers={'User-Agent': ua}).content)
if delete: if delete:
Thread(target=delete_file, args=[f"{write_dir}{found_url[-11:]}"]).start() Thread(target=delete_file, args=[f"{write_dir}{found_url[-11:]}"]).start()

6
imgin/useragents.py Normal file

File diff suppressed because one or more lines are too long

View File

@ -39,7 +39,7 @@
<p>Or, run it with python by installing the requirements.txt and ./run.py. Or use the Dockerfile. Contact me if you want help or find a bug.</p> <p>Or, run it with python by installing the requirements.txt and ./run.py. Or use the Dockerfile. Contact me if you want help or find a bug.</p>
<footer> <footer>
<small> <small>
<p>Imgin Version 0.0.4</p> <p>Imgin Version 0.0.5</p>
Powered by <a href="https://voidnet.tech/">VoidNetwork LLC</a><br> Powered by <a href="https://voidnet.tech/">VoidNetwork LLC</a><br>
This website does not claim ownership of any media. This website does not claim ownership of any media.
<br>This service simply acts as a proxy to Imgur.com and does not store images aside from a brief cache. <br>This service simply acts as a proxy to Imgur.com and does not store images aside from a brief cache.