Randomize useragent
This commit is contained in:
parent
0a4bd99771
commit
b8eb526782
@ -1,5 +1,9 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## 0.0.5
|
||||||
|
|
||||||
|
* Randomize user agent to circumvent ratelimit
|
||||||
|
|
||||||
## 0.0.4
|
## 0.0.4
|
||||||
|
|
||||||
* Improved image scraping (Thanks https://austinhuang.me/)
|
* Improved image scraping (Thanks https://austinhuang.me/)
|
||||||
|
@ -2,11 +2,11 @@ import sys
|
|||||||
from os import remove, write
|
from os import remove, write
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
|
||||||
#from gevent import sleep
|
|
||||||
from time import sleep
|
from time import sleep
|
||||||
import requests
|
import requests
|
||||||
import bs4
|
import bs4
|
||||||
|
|
||||||
|
from .useragents import get_random_user_agent
|
||||||
from .config import IMAGE_CACHE, SINGLE_IMAGE_DELETE_AFTER_SECS
|
from .config import IMAGE_CACHE, SINGLE_IMAGE_DELETE_AFTER_SECS
|
||||||
|
|
||||||
def delete_file(path):
|
def delete_file(path):
|
||||||
@ -23,6 +23,7 @@ def error(msg):
|
|||||||
sys.stderr.flush()
|
sys.stderr.flush()
|
||||||
|
|
||||||
def get(url: str, write_dir: str, delete=True):
|
def get(url: str, write_dir: str, delete=True):
|
||||||
|
ua = get_random_user_agent()
|
||||||
orig_url = url
|
orig_url = url
|
||||||
if not url.startswith('https://imgur.com/'):
|
if not url.startswith('https://imgur.com/'):
|
||||||
url = 'https://imgur.com/' + url
|
url = 'https://imgur.com/' + url
|
||||||
@ -38,7 +39,7 @@ def get(url: str, write_dir: str, delete=True):
|
|||||||
print('Getting img', url)
|
print('Getting img', url)
|
||||||
url = 'https://i.imgur.com/' + url.rsplit('/', 1)[-1]
|
url = 'https://i.imgur.com/' + url.rsplit('/', 1)[-1]
|
||||||
with open(f'{write_dir}/{url[-11:]}', 'wb') as img:
|
with open(f'{write_dir}/{url[-11:]}', 'wb') as img:
|
||||||
img.write(requests.get(url).content)
|
img.write(requests.get(url, headers={'User-Agent': ua}).content)
|
||||||
if delete:
|
if delete:
|
||||||
Thread(target=delete_file, args=[f"{write_dir}/{url[-11:]}"]).start()
|
Thread(target=delete_file, args=[f"{write_dir}/{url[-11:]}"]).start()
|
||||||
return None
|
return None
|
||||||
@ -49,7 +50,7 @@ def get(url: str, write_dir: str, delete=True):
|
|||||||
title = ''
|
title = ''
|
||||||
metas = []
|
metas = []
|
||||||
print('Detecting album/gallery images (contentUrl)', url)
|
print('Detecting album/gallery images (contentUrl)', url)
|
||||||
soup = bs4.BeautifulSoup(requests.get(url).text, 'html.parser')
|
soup = bs4.BeautifulSoup(requests.get(url, headers={'User-Agent': ua}).text, 'html.parser')
|
||||||
try:
|
try:
|
||||||
title = soup.select('meta[property="og:title"]')[0]['content']
|
title = soup.select('meta[property="og:title"]')[0]['content']
|
||||||
if title == "Imgur":
|
if title == "Imgur":
|
||||||
@ -78,7 +79,7 @@ def get(url: str, write_dir: str, delete=True):
|
|||||||
|
|
||||||
print("Writing image", f"{write_dir}{found_url[-11:]}")
|
print("Writing image", f"{write_dir}{found_url[-11:]}")
|
||||||
with open(f"{write_dir}{found_url[-11:]}", "wb") as f:
|
with open(f"{write_dir}{found_url[-11:]}", "wb") as f:
|
||||||
f.write(requests.get(found_url).content)
|
f.write(requests.get(found_url, headers={'User-Agent': ua}).content)
|
||||||
|
|
||||||
if delete:
|
if delete:
|
||||||
Thread(target=delete_file, args=[f"{write_dir}{found_url[-11:]}"]).start()
|
Thread(target=delete_file, args=[f"{write_dir}{found_url[-11:]}"]).start()
|
||||||
|
6
imgin/useragents.py
Normal file
6
imgin/useragents.py
Normal file
File diff suppressed because one or more lines are too long
@ -39,7 +39,7 @@
|
|||||||
<p>Or, run it with python by installing the requirements.txt and ./run.py. Or use the Dockerfile. Contact me if you want help or find a bug.</p>
|
<p>Or, run it with python by installing the requirements.txt and ./run.py. Or use the Dockerfile. Contact me if you want help or find a bug.</p>
|
||||||
<footer>
|
<footer>
|
||||||
<small>
|
<small>
|
||||||
<p>Imgin Version 0.0.4</p>
|
<p>Imgin Version 0.0.5</p>
|
||||||
Powered by <a href="https://voidnet.tech/">VoidNetwork LLC</a><br>
|
Powered by <a href="https://voidnet.tech/">VoidNetwork LLC</a><br>
|
||||||
This website does not claim ownership of any media.
|
This website does not claim ownership of any media.
|
||||||
<br>This service simply acts as a proxy to Imgur.com and does not store images aside from a brief cache.
|
<br>This service simply acts as a proxy to Imgur.com and does not store images aside from a brief cache.
|
||||||
|
Loading…
Reference in New Issue
Block a user