125 lines
3.4 KiB
Python
125 lines
3.4 KiB
Python
#coding: utf8
|
||
import downloader
|
||
import ree as re
|
||
from utils import Soup, urljoin, Downloader, join, LazyUrl, Session, get_print
|
||
import os
|
||
from timee import sleep
|
||
from translator import tr_
|
||
|
||
|
||
|
||
def get_id(url):
|
||
try:
|
||
return int(url)
|
||
except:
|
||
if '/gallery/' in url:
|
||
return int(re.find('/gallery/[0-9]+/([0-9]+)', url))
|
||
else:
|
||
return int(re.find('/g/([0-9]+)', url))
|
||
|
||
|
||
|
||
class Downloader_asmhentai(Downloader):
|
||
type = 'asmhentai'
|
||
URLS = ['asmhentai.com']
|
||
MAX_CORE = 8
|
||
display_name = 'AsmHentai'
|
||
|
||
def init(self):
|
||
self.session = Session()
|
||
|
||
@classmethod
|
||
def fix_url(cls, url):
|
||
id_ = get_id(url)
|
||
return 'https://asmhentai.com/g/{}/'.format(id_)
|
||
|
||
def read(self):
|
||
info = get_info(self.url, self.session, self.cw)
|
||
|
||
# 1225
|
||
artist = join(info['artists'])
|
||
self.artist = artist
|
||
group = join(info['groups']) if info['groups'] else u'N/A'
|
||
lang = info['language'][0] if info['language'] else u'N/A'
|
||
series = info['parodies'][0] if info['parodies'] else u'N/A'
|
||
title = self.format_title(info['category'][0], info['id'], info['title'], artist, group, series, lang)
|
||
|
||
self.urls += [img.url for img in info['imgs']]
|
||
|
||
self.title = title
|
||
|
||
|
||
class Image:
|
||
def __init__(self, url, referer):
|
||
self.url = LazyUrl(referer, lambda _:url, self)
|
||
self.filename = os.path.basename(url)
|
||
|
||
|
||
def get_info(url, session, cw):
|
||
print_ = get_print(cw)
|
||
html = downloader.read_html(url, session=session)
|
||
soup = Soup(html)
|
||
|
||
info = {}
|
||
|
||
info['id'] = get_id(url)
|
||
|
||
title = soup.find('h1').text.strip()
|
||
info['title'] = title
|
||
|
||
for tag in soup.findAll('span', class_='tag'):
|
||
href = tag.parent.attrs['href']
|
||
href = urljoin(url, href).strip('/')
|
||
|
||
key = href.split('/')[3]
|
||
value = href.split('/')[-1]
|
||
|
||
if key == 'language' and value == 'translated':
|
||
continue
|
||
|
||
if key in info:
|
||
info[key].append(value)
|
||
else:
|
||
info[key] = [value]
|
||
|
||
for key in ['artists', 'groups', 'parodies', 'tags', 'characters']:
|
||
if key not in info:
|
||
info[key] = []
|
||
|
||
info['imgs'] = []
|
||
def read_imgs(soup):
|
||
c = 0
|
||
for img in soup.findAll('div', class_='preview_thumb'):
|
||
img = img.find('img').attrs.get('data-src') or img.find('img').attrs.get('src')
|
||
img = urljoin(url, img).replace('t.jpg', '.jpg')
|
||
img = Image(img, url)
|
||
info['imgs'].append(img)
|
||
c += 1
|
||
if not c:
|
||
raise Exception('no imgs')
|
||
|
||
read_imgs(soup)
|
||
|
||
csrf = soup.find('meta', {'name':'csrf-token'})['content']
|
||
print_(f'csrf: {csrf}')
|
||
t_pages = int(soup.find('input', type='hidden', id='t_pages')['value'])
|
||
print_(f't_pages: {t_pages}')
|
||
|
||
while len(info['imgs']) < t_pages: #4971
|
||
print_('imgs: {}'.format(len(info['imgs'])))
|
||
sleep(1, cw)
|
||
cw.setTitle('{} {} - {} / {}'.format(tr_('읽는 중...'), info['title'], len(info['imgs']), t_pages))
|
||
data = {
|
||
'_token': csrf,
|
||
'id': str(info['id']),
|
||
'dir': soup.find('input', type='hidden', id='dir')['value'],
|
||
'v_pages': len(info['imgs']),
|
||
't_pages': str(t_pages),
|
||
'type': '1',
|
||
}
|
||
r = session.post('https://asmhentai.com/inc/thumbs_loader.php', data=data)
|
||
soup_more = Soup(r.text)
|
||
read_imgs(soup_more)
|
||
|
||
return info
|