^q^
This commit is contained in:
parent
a69b757610
commit
bcd33f9118
Binary file not shown.
Before Width: | Height: | Size: 475 KiB After Width: | Height: | Size: 974 KiB |
|
@ -14,7 +14,18 @@ import math
|
|||
import ree as re
|
||||
import utils
|
||||
from collections import OrderedDict
|
||||
_VALID_URL = 'https?://(?:www\\.|bangumi\\.|)bilibili\\.(?:tv|com)/(?:video/av|anime/(?P<anime_id>\\d+)/play#)(?P<id>\\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:(?:www|bangumi)\.)?
|
||||
bilibili\.(?:tv|com)/
|
||||
(?:
|
||||
(?:
|
||||
video/[aA][vV]|
|
||||
anime/(?P<anime_id>\d+)/play\#
|
||||
)(?P<id_bv>\d+)|
|
||||
video/[bB][vV](?P<id>[^/?#&]+)
|
||||
)
|
||||
'''
|
||||
_APP_KEY = 'iVGUTjsxvpLeuDCf'
|
||||
_BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt'
|
||||
RESOLS = OrderedDict()
|
||||
|
|
|
@ -0,0 +1,219 @@
|
|||
# uncompyle6 version 3.5.0
|
||||
# Python bytecode 2.7 (62211)
|
||||
# Decompiled from: Python 2.7.16 (v2.7.16:413a49145e, Mar 4 2019, 01:30:55) [MSC v.1500 32 bit (Intel)]
|
||||
# Embedded file name: daumtoon_downloader.pyo
|
||||
# Compiled at: 2019-10-03 10:11:29
|
||||
import downloader
|
||||
from utils import Soup, Session, LazyUrl, Downloader, try_n, get_imgs_already, clean_title, get_print
|
||||
import json, os
|
||||
from timee import time, sleep
|
||||
import ree as re
|
||||
from translator import tr_
|
||||
import page_selector
|
||||
|
||||
|
||||
class Page(object):
|
||||
|
||||
def __init__(self, id, url, title, serviceType):
|
||||
self.id = id
|
||||
self.url = url
|
||||
self.title = title
|
||||
self.serviceType = serviceType
|
||||
|
||||
|
||||
class Image(object):
|
||||
|
||||
def __init__(self, url, page, p):
|
||||
self._url = url
|
||||
self.url = LazyUrl(page.url, self.get, self)
|
||||
ext = os.path.splitext(url.split('?')[0])[1]
|
||||
if ext.lower()[1:] not in ('jpg', 'jpeg', 'bmp', 'png', 'gif', 'webm', 'webp'):
|
||||
ext = '.jpg'
|
||||
self.filename = (u'{}/{:04}{}').format(clean_title(page.title), p, ext)
|
||||
|
||||
def get(self, _):
|
||||
return self._url
|
||||
|
||||
|
||||
def get_id(url):
|
||||
if '/league/' in url:
|
||||
header = 'league_'
|
||||
else:
|
||||
header = ''
|
||||
body = re.find('/viewer/([0-9a-zA-Z_-]+)', url) or re.find('/view/([0-9a-zA-Z_-]+)', url)
|
||||
return header, body
|
||||
|
||||
|
||||
def get_info(url, session):
|
||||
referer = url
|
||||
header, id = get_id(referer)
|
||||
if 'league_' in id:
|
||||
type_ = 'leaguetoon'
|
||||
else:
|
||||
type_ = 'webtoon'
|
||||
|
||||
info = {}
|
||||
ids = set()
|
||||
pages = []
|
||||
for p in range(1, 1+10):
|
||||
if p == 1:
|
||||
url = 'http://webtoon.daum.net/data/pc/{}/view/{}?timeStamp={}'.format(type_, id, int(time()))
|
||||
else:
|
||||
if type_ == 'webtoon':
|
||||
break
|
||||
url = 'http://webtoon.daum.net/data/pc/{}/view/{}?page_no={}&timeStamp={}'.format(type_, id, p, int(time()))
|
||||
print(url)
|
||||
info_raw = downloader.read_html(url, referer=referer, session=session)
|
||||
_info = json.loads(info_raw)
|
||||
webtoon = _info['data'].get('webtoon') or _info['data'].get('leaguetoon')
|
||||
if webtoon is None:
|
||||
raise Exception('No webtoon')
|
||||
|
||||
if p == 1:
|
||||
info['title'] = webtoon['title']
|
||||
artists = []
|
||||
for artist in webtoon['cartoon']['artists']:
|
||||
artist = artist['penName']
|
||||
if artist in artists:
|
||||
continue
|
||||
artists.append(artist)
|
||||
|
||||
if len(artists) > 1:
|
||||
artists = [
|
||||
artists[1], artists[0]] + artists[2:]
|
||||
info['artists'] = artists
|
||||
|
||||
eps = webtoon.get('webtoonEpisodes') or webtoon.get('leaguetoonEpisodes')
|
||||
if not eps:
|
||||
if p > 1:
|
||||
eps = []
|
||||
else:
|
||||
raise Exception('No eps')
|
||||
c = 0
|
||||
for ep in eps:
|
||||
id_ = ep.get('articleId') or ep.get('id')
|
||||
title = ep['title']
|
||||
serviceType = 'free' if type_ =='leaguetoon' else ep['serviceType']
|
||||
if type_ == 'leaguetoon':
|
||||
url = 'http://webtoon.daum.net/league/viewer/{}'.format(id_)
|
||||
else:
|
||||
url = 'http://webtoon.daum.net/webtoon/viewer/{}'.format(id_)
|
||||
if id_ in ids:
|
||||
continue
|
||||
c += 1
|
||||
ids.add(id_)
|
||||
page = Page(id_, url, title, serviceType)
|
||||
pages.append(page)
|
||||
if c == 0:
|
||||
print('c == 0; break')
|
||||
break
|
||||
|
||||
info['pages'] = sorted(pages, key=lambda x: x.id)
|
||||
return info
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_daumtoon(Downloader):
|
||||
type = 'daumtoon'
|
||||
URLS = ['webtoon.daum.net']
|
||||
MAX_CORE = 16
|
||||
MAX_SPEED = 4.0
|
||||
display_name = 'Daum Webtoon'
|
||||
|
||||
def init(self):
|
||||
if '/viewer/' in self.url:
|
||||
return self.Invalid(tr_('목록 주소를 입력해주세요: {}').format(self.url))
|
||||
if '/view/' not in self.url and not self.url.lower().startswith('http'):
|
||||
self.url = ('http://webtoon.daum.net/webtoon/view/{}').format(self.url)
|
||||
self.session = None
|
||||
self._info = get_info(self.url, self.session)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
title = self._info['title']
|
||||
artists = self._info['artists']
|
||||
artist = artists[0] if artists else 'N/A'
|
||||
title = self.format_title('N/A', ''.join(get_id(self.url)), title, artist, 'N/A', 'N/A', 'Korean', prefix='daumtoon_')
|
||||
return clean_title(title)
|
||||
|
||||
def read(self):
|
||||
self.title = tr_(u'\uc77d\ub294 \uc911... {}').format(self.name)
|
||||
imgs = get_imgs_all(self._info, self.name, self.session, cw=self.cw)
|
||||
for img in imgs:
|
||||
if isinstance(img, Image):
|
||||
self.urls.append(img.url)
|
||||
else:
|
||||
self.urls.append(img)
|
||||
|
||||
self.title = self.name
|
||||
self.session = None
|
||||
return
|
||||
|
||||
|
||||
def get_imgs(page, session, cw):
|
||||
print_ = get_print(cw)
|
||||
html = downloader.read_html(page.url, session=session)
|
||||
header, id = get_id(page.url)
|
||||
t = int(time())
|
||||
soup = Soup(html)
|
||||
if 'league_' in id:
|
||||
type_ = 'leaguetoon'
|
||||
else:
|
||||
type_ = 'webtoon'
|
||||
|
||||
url_data = 'http://webtoon.daum.net/data/pc/{}/viewer/{}?timeStamp={}'.format(type_, id, t)
|
||||
data_raw = downloader.read_html(url_data, session=session, referer=page.url)
|
||||
data = json.loads(data_raw)
|
||||
m_type = data['data']['webtoonEpisode']['multiType']
|
||||
print_('m_type: {}'.format(m_type))
|
||||
|
||||
if m_type == 'chatting':
|
||||
page.url = page.url.replace('daum.net/', 'daum.net/m/')
|
||||
url_data = 'http://webtoon.daum.net/data/mobile/{}/viewer?id={}&{}'.format(type_, id, t)
|
||||
data_raw = downloader.read_html(url_data, session=session, referer=page.url)
|
||||
data = json.loads(data_raw)
|
||||
imgs = []
|
||||
for chat in data['data']['webtoonEpisodeChattings']:
|
||||
img = chat.get('image')
|
||||
if not img:
|
||||
continue
|
||||
img = Image(img['url'], page, len(imgs))
|
||||
imgs.append(img)
|
||||
else:
|
||||
url_data = 'http://webtoon.daum.net/data/pc/{}/viewer_images/{}?timeStamp={}'.format(type_, id, t)
|
||||
data_raw = downloader.read_html(url_data, session=session, referer=page.url)
|
||||
data = json.loads(data_raw)
|
||||
imgs = []
|
||||
for img in data['data']:
|
||||
img = Image(img['url'], page, len(imgs))
|
||||
imgs.append(img)
|
||||
|
||||
return imgs
|
||||
|
||||
|
||||
def get_imgs_all(info, title, session, cw=None):
|
||||
pages = info['pages']
|
||||
pages = page_selector.filter(pages, cw)
|
||||
imgs = []
|
||||
for p, page in enumerate(pages):
|
||||
if page.serviceType != 'free':
|
||||
continue
|
||||
imgs_already = get_imgs_already('daumtoon', title, page, cw)
|
||||
if imgs_already:
|
||||
imgs += imgs_already
|
||||
continue
|
||||
imgs += get_imgs(page, session, cw)
|
||||
if cw is not None:
|
||||
cw.setTitle(tr_(u'\uc77d\ub294 \uc911... {} / {} ({}/{})').format(title, page.title, p + 1, len(pages)))
|
||||
if not cw.alive:
|
||||
break
|
||||
|
||||
return imgs
|
||||
|
||||
|
||||
@page_selector.register('daumtoon')
|
||||
@try_n(4)
|
||||
def f(url):
|
||||
info = get_info(url, None)
|
||||
return info['pages']
|
||||
|
|
@ -0,0 +1,101 @@
|
|||
import downloader
|
||||
from utils import Soup, try_n, LazyUrl, Downloader, lock, get_print, clean_title
|
||||
from timee import sleep
|
||||
import base64
|
||||
import json
|
||||
import constants
|
||||
import ree as re
|
||||
KEY = b'gefdzfdef'
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_epio(Downloader):
|
||||
type = 'epio'
|
||||
URLS = ['epio.app']
|
||||
|
||||
def read(self):
|
||||
info = get_info(self.url, cw=self.cw)
|
||||
|
||||
imgs = info['imgs']
|
||||
|
||||
for img in imgs:
|
||||
self.urls.append(img.url)
|
||||
|
||||
self.title = clean_title(info['title'])
|
||||
|
||||
|
||||
class Image(object):
|
||||
|
||||
def __init__(self, url, referer, p):
|
||||
self._url = url
|
||||
self.url = LazyUrl(referer, self.get, self)
|
||||
ext = '.jpg'#
|
||||
self.filename = u'{:04}{}'.format(p, ext)
|
||||
|
||||
def get(self, referer):
|
||||
return self._url
|
||||
|
||||
|
||||
def get_info(url, cw=None):
|
||||
info = _get_info(url, cw)
|
||||
|
||||
imgs = []
|
||||
html = info['content']
|
||||
soup = Soup(html)
|
||||
for img in soup.findAll('img'):
|
||||
src = img.attrs.get('src')
|
||||
if not src:
|
||||
continue
|
||||
|
||||
# 1696
|
||||
if not isinstance(src, bytes):
|
||||
src = src.encode('utf8')
|
||||
t = base64.b64encode(src)
|
||||
if isinstance(t, bytes):
|
||||
t = t.decode('utf8')
|
||||
src = 'https://cdn1-images.epio.app/image/download/{}'.format(t)
|
||||
|
||||
img = Image(src, url, len(imgs))
|
||||
imgs.append(img)
|
||||
info['imgs'] = imgs
|
||||
|
||||
return info
|
||||
|
||||
|
||||
def get_id(url):
|
||||
return re.find('article/detail/([0-9a-z]+)', url)
|
||||
|
||||
|
||||
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
|
||||
from cryptography.hazmat.backends import default_backend
|
||||
import aes
|
||||
backend = default_backend()
|
||||
def decrypt(s, cw=None):
|
||||
print_ = get_print(cw)
|
||||
key, iv = aes.key_and_iv(s[:16], KEY)
|
||||
print_('key: {}\niv: {}'.format(key, iv))
|
||||
cipher = Cipher(algorithms.AES(key), modes.CBC(iv), backend=backend)
|
||||
r = -len(s) % 16
|
||||
if r:
|
||||
s += b'\x00' * r
|
||||
dec = cipher.decryptor()
|
||||
s_dec = dec.update(s[16:]) + dec.finalize()
|
||||
s_dec = s_dec[:-s_dec[-1]]
|
||||
if r:
|
||||
s_dec = s_dec[:-r]
|
||||
return s_dec
|
||||
|
||||
|
||||
|
||||
def _get_info(url, cw=None):
|
||||
id = get_id(url)
|
||||
|
||||
url_api = 'https://girlimg.epio.app/api/articles/{}?lang=en-us'.format(id)
|
||||
html = downloader.read_html(url_api, referer=url)
|
||||
s = json.loads(html)['string']
|
||||
|
||||
s = base64.b64decode(s)
|
||||
s = decrypt(s, cw)
|
||||
info = json.loads(s)
|
||||
|
||||
return info
|
|
@ -0,0 +1,186 @@
|
|||
import downloader
|
||||
import ytdl
|
||||
from utils import Downloader, Session, try_n, LazyUrl, get_ext, format_filename, clean_title, get_print
|
||||
from io import BytesIO
|
||||
import ree as re
|
||||
from m3u8_tools import playlist2stream, M3u8_stream
|
||||
import utils
|
||||
import ffmpeg
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_etc(Downloader):
|
||||
type = 'etc'
|
||||
URLS = []
|
||||
single = True
|
||||
MAX_PARALLEL = 8
|
||||
display_name = 'Etc'
|
||||
|
||||
def init(self):
|
||||
self.session = Session()
|
||||
name = ytdl.get_extractor_name(self.url)
|
||||
self.print_('extractor: {}'.format(name))
|
||||
if name == 'generic':
|
||||
raise NotImplementedError()
|
||||
|
||||
def read(self):
|
||||
video = get_video(self.url, self.session, self.cw)
|
||||
|
||||
if video.artist:
|
||||
self.artist = video.artist
|
||||
|
||||
self.urls.append(video.url)
|
||||
|
||||
self.print_('url_thumb: {}'.format(video.url_thumb))
|
||||
self.setIcon(video.thumb)
|
||||
if video.header.lower() not in ['yourporn', 'spankbang']:
|
||||
self.enableSegment()#
|
||||
if isinstance(video.url(), M3u8_stream):
|
||||
self.disableSegment()
|
||||
|
||||
self.title = '[{}] {}'.format(video.header, video.title)
|
||||
|
||||
|
||||
def int_or_none(s):
|
||||
try:
|
||||
return int(s)
|
||||
except:
|
||||
return None
|
||||
|
||||
|
||||
def format_(f):
|
||||
if f is None:
|
||||
return 'None'
|
||||
return '{} - {} - {} - {}'.format(f['format'], f['_resolution'], f['_audio'], f['url'])
|
||||
|
||||
|
||||
@try_n(4)
|
||||
def get_video(url, session, cw, ie_key=None):
|
||||
print_ = get_print(cw)
|
||||
options = {
|
||||
'noplaylist': True,
|
||||
#'extract_flat': True,
|
||||
'playlistend': 1,
|
||||
}
|
||||
|
||||
ydl = ytdl.YoutubeDL(options)
|
||||
info = ydl.extract_info(url)
|
||||
if not ie_key:
|
||||
ie_key = ytdl.get_extractor_name(url)
|
||||
info['ie_key'] = ie_key
|
||||
url_new = info.get('url')
|
||||
print('url: {} -> {}'.format(url, url_new))
|
||||
formats = info.get('formats', [])
|
||||
print(info.keys())
|
||||
|
||||
if not formats and (info.get('entries') or 'title' not in info):
|
||||
if 'entries' in info:
|
||||
entry = info['entries'][0]
|
||||
url_new = entry.get('url') or entry['webpage_url']
|
||||
if url_new != url:
|
||||
return get_video(url_new, session, cw, ie_key=get_ie_key(info))
|
||||
|
||||
session.headers.update(info.get('http_headers', {}))
|
||||
#session.cookies.update(ydl.cookiejar)
|
||||
|
||||
if not formats:
|
||||
print('no formats')
|
||||
if url_new:
|
||||
f = {'url': url_new, 'format': ''}
|
||||
formats.append(f)
|
||||
|
||||
fs = []
|
||||
for i, f in enumerate(formats):
|
||||
f['_index'] = i
|
||||
f['_resolution'] = f.get('vbr') or int_or_none(re.find('([0-9]+)p', f['format'], re.IGNORECASE)) or f.get('height') or f.get('width') or int(f.get('vcodec', 'none') != 'none')
|
||||
f['_audio'] = f.get('abr') or f.get('asr') or int(f.get('acodec', 'none') != 'none')
|
||||
print_(format_(f))
|
||||
fs.append(f)
|
||||
|
||||
if not fs:
|
||||
raise Exception('No videos')
|
||||
|
||||
f = sorted(fs, key=lambda f:(f['_resolution'], f['_index']))[-1]
|
||||
if f['_audio']:
|
||||
f_audio = None
|
||||
else:
|
||||
fs_audio = sorted([f_audio for f_audio in fs if (not f_audio['_resolution'] and f_audio['_audio'])], key=lambda f:(f['_audio'], f['_index']))
|
||||
if fs_audio:
|
||||
f_audio = fs_audio[-1]
|
||||
else:
|
||||
try:
|
||||
f = sorted([f for f in fs if f['_audio']], key=lambda f:(f['_resolution'], f['_index']))[-1]
|
||||
except IndexError:
|
||||
pass
|
||||
f_audio = None
|
||||
print_('video: {}'.format(format_(f)))
|
||||
print_('audio: {}'.format(format_(f_audio)))
|
||||
video = Video(f, f_audio, info, session, url, cw=cw)
|
||||
|
||||
return video
|
||||
|
||||
|
||||
def get_ie_key(info):
|
||||
ie_key = info.get('ie_key') or info['extractor']
|
||||
ie_key = ie_key.split(':')[0]
|
||||
if ie_key.lower().endswith('playlist'):
|
||||
ie_key = ie_key[:-len('playlist')]
|
||||
return ie_key
|
||||
|
||||
|
||||
class Video(object):
|
||||
def __init__(self, f, f_audio, info, session, referer, cw=None):
|
||||
self.f_audio = f_audio
|
||||
self.cw = cw
|
||||
self.title = title = info['title']
|
||||
self.id = info['id']
|
||||
self.url = f['url']
|
||||
self.artist = info.get('uploader')
|
||||
self.header = utils.capitalize(get_ie_key(info))
|
||||
self.session = session
|
||||
self.referer = referer
|
||||
|
||||
self.url_thumb = info.get('thumbnail')
|
||||
self.thumb = BytesIO()
|
||||
if self.url_thumb:
|
||||
downloader.download(self.url_thumb, referer=referer, buffer=self.thumb, session=session)
|
||||
|
||||
try:
|
||||
ext = downloader.get_ext(self.url, session, referer)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ext = get_ext(self.url)
|
||||
|
||||
if not ext:
|
||||
print('empty ext')
|
||||
if f['_resolution']:
|
||||
ext = '.mp4'
|
||||
else:
|
||||
ext = '.mp3'
|
||||
|
||||
if ext.lower() == '.m3u8':
|
||||
try:
|
||||
url = playlist2stream(self.url, referer, session=session, n_thread=4)
|
||||
except:
|
||||
url = M3u8_stream(self.url, referer=referer, session=session, n_thread=4)
|
||||
ext = '.mp4'
|
||||
else:
|
||||
url = self.url
|
||||
self.url = LazyUrl(referer, lambda x: url, self, pp=self.pp)
|
||||
self.filename = format_filename(title, self.id, ext, header=self.header)
|
||||
|
||||
def pp(self, filename):
|
||||
if self.cw:
|
||||
with self.cw.convert(self):
|
||||
return self._pp(filename)
|
||||
else:
|
||||
return self._pp(filename)
|
||||
|
||||
def _pp(self, filename):
|
||||
if self.f_audio:
|
||||
f = BytesIO()
|
||||
downloader.download(self.f_audio['url'], buffer=f, referer=self.referer, session=self.session)
|
||||
ffmpeg.merge(filename, f, cw=self.cw)
|
||||
return filename
|
||||
|
||||
|
|
@ -0,0 +1,260 @@
|
|||
#coding:utf8
|
||||
import downloader
|
||||
from utils import Session, urljoin, Soup, LazyUrl, try_n, Downloader, get_outdir, clean_title
|
||||
import ree as re
|
||||
import json
|
||||
import os
|
||||
from translator import tr_
|
||||
from timee import sleep
|
||||
from downloader import getsize
|
||||
import errors
|
||||
PATTERN_CURSOR = '".+?&cursor=([0-9]+)'
|
||||
UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
|
||||
|
||||
|
||||
class Image(object):
|
||||
def __init__(self, url):
|
||||
if 'fbid=' in url:
|
||||
id = int(re.findall('fbid=([0-9]+)', url)[0])
|
||||
elif 'photos/' in url:
|
||||
id = int(url.split('photos/')[1].split('/')[1])
|
||||
else:
|
||||
id = int(url)
|
||||
self.id = id
|
||||
def f(_):
|
||||
img = get_img(url)
|
||||
ext = os.path.splitext(img.split('?')[0])[1]
|
||||
self.filename = u'{}{}'.format(id, ext)
|
||||
return img
|
||||
self.url = LazyUrl(url, f, self)
|
||||
|
||||
|
||||
@try_n(4)
|
||||
def get_img(url):
|
||||
#print('get_img', url)
|
||||
html = read_html(url)
|
||||
soup = Soup(html)
|
||||
|
||||
for div in soup.findAll('div'):
|
||||
href = div.attrs.get('data-full-size-href')
|
||||
if href:
|
||||
img = href
|
||||
break
|
||||
else:
|
||||
img = None
|
||||
|
||||
if img is None:
|
||||
|
||||
# 1869
|
||||
for code in soup.findAll('code'):
|
||||
code = code.string
|
||||
hidden = Soup(code)
|
||||
soup.append(hidden)
|
||||
|
||||
for a in soup.findAll('a'):
|
||||
target = a.attrs.get('target')
|
||||
if target == '_blank':
|
||||
img = a.attrs['href']
|
||||
break
|
||||
else:
|
||||
raise Exception('No img')
|
||||
|
||||
return img
|
||||
|
||||
|
||||
def suitable(url):
|
||||
if 'facebook.com' not in url.lower():
|
||||
return False
|
||||
if '/videos/' in url or 'video.php?' in url:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_facebook(Downloader):
|
||||
type = 'facebook'
|
||||
URLS = [suitable]
|
||||
_soup = None
|
||||
MAX_CORE = 8
|
||||
|
||||
@classmethod
|
||||
def fix_url(cls, url):
|
||||
if 'facebook.com/' not in url:
|
||||
url = 'https://facebook.com/{}'.format(url)
|
||||
url = url.replace('m.facebook.', 'facebook.')
|
||||
if 'www.facebook.com/' not in url:
|
||||
url = url.replace('facebook.com/', 'www.facebook.com/', 1)
|
||||
if '/profile.php?' not in url:
|
||||
url = url.split('?')[0]
|
||||
return url.split('#')[0].strip('/')
|
||||
|
||||
@property
|
||||
def username(self):
|
||||
username = get_username(self.url)
|
||||
return username
|
||||
|
||||
@property
|
||||
def soup(self):
|
||||
if self._soup is None:
|
||||
html = read_html(self.url)
|
||||
self._soup = Soup(html)
|
||||
return self._soup
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
title = get_title(self.soup)
|
||||
id_ = 'facebook_{}'.format(self.username)
|
||||
title = u'{} ({})'.format(title, id_)
|
||||
return clean_title(title)
|
||||
|
||||
@property
|
||||
def album(self):
|
||||
if 'album_id=' in self.url:
|
||||
album = re.findall('album_id=([0-9]+)', self.url)[0]
|
||||
else:
|
||||
album = None
|
||||
return album
|
||||
|
||||
def read(self):
|
||||
self.print_(self.name)
|
||||
self.title = tr_(u'읽는 중... {}').format(self.name)
|
||||
|
||||
imgs = get_imgs(self.username, self.name, cw=self.cw)
|
||||
|
||||
for img in imgs:
|
||||
if isinstance(img, Image):
|
||||
self.urls.append(img.url)
|
||||
else:
|
||||
self.urls.append(img)
|
||||
|
||||
self.title = self.name
|
||||
|
||||
|
||||
def read_html(url):
|
||||
return downloader.read_html(url, user_agent=UA)
|
||||
|
||||
|
||||
def get_title(soup):
|
||||
html = str(soup)
|
||||
name = re.find(r'"__isProfile":"Page","name":(".*?")', html) or re.find(r'"name":(".*?")', html)
|
||||
if not name:
|
||||
gc = soup.find('div', id='globalContainer')
|
||||
if gc and gc.find('form', id='login_form'):
|
||||
raise errors.LoginRequired()
|
||||
raise Exception('no name')
|
||||
title = json.loads(name)
|
||||
return title
|
||||
|
||||
|
||||
def get_imgs(username, title, cw=None):
|
||||
urls = [
|
||||
'https://m.facebook.com/{}/photos'.format(username),
|
||||
'https://m.facebook.com/profile.php?id={}&sk=photos'.format(username), # no custom URL
|
||||
]
|
||||
|
||||
for url in urls:
|
||||
print('get_imgs url:', url)
|
||||
try:
|
||||
html = read_html(url)
|
||||
except:
|
||||
continue
|
||||
soup = Soup(html)
|
||||
if soup.find('a', id='signup-button'):
|
||||
raise errors.LoginRequired()
|
||||
|
||||
photo = soup.find('div', class_='_5v64')
|
||||
if photo is not None:
|
||||
break
|
||||
else:
|
||||
raise Exception('No photo div')
|
||||
|
||||
cursor = photo.a.attrs['href'].split('/photos/')[1].split('/')[1]
|
||||
print('first cursor:', cursor)
|
||||
|
||||
href = re.find(r'(/photos/pandora/\?album_token=.+?)"', html)
|
||||
href = urljoin(url, href)
|
||||
href = re.sub('&cursor=[0-9]+', '&cursor={}'.format(cursor), href)
|
||||
|
||||
cursors = set([cursor])
|
||||
|
||||
imgs = []
|
||||
|
||||
dups = {}
|
||||
dir = os.path.join(get_outdir('facebook'), title)
|
||||
try:
|
||||
filenames = os.listdir(dir)
|
||||
except:
|
||||
filenames = []
|
||||
for filename in filenames:
|
||||
name, ext = os.path.splitext(filename)
|
||||
if name.isdigit():
|
||||
dups[int(name)] = os.path.join(dir, filename)
|
||||
|
||||
pages = set()
|
||||
|
||||
while True:
|
||||
print(href)
|
||||
html = read_html(href)
|
||||
data_raw = html.replace('for (;;);', '')
|
||||
data = json.loads(data_raw)
|
||||
actions = data['payload']['actions']
|
||||
for action in actions:
|
||||
if action['target'] == 'm_more_photos':
|
||||
break
|
||||
else:
|
||||
print('No more photos')
|
||||
break
|
||||
html = action['html']
|
||||
soup = Soup(html)
|
||||
photos = soup.findAll('div' ,class_='_5v64')
|
||||
for photo in photos:
|
||||
for a in photo.findAll('a'):
|
||||
page = a.attrs['href']
|
||||
page = urljoin(href, page)
|
||||
|
||||
# remove duplicate pages
|
||||
if page in pages:
|
||||
continue
|
||||
pages.add(page)
|
||||
|
||||
img = Image(page)
|
||||
id = img.id
|
||||
if id in dups and getsize(dups[id]) > 0:
|
||||
print('skip', id)
|
||||
imgs.append(dups[id])
|
||||
else:
|
||||
imgs.append(img)
|
||||
|
||||
s = u'{} {} - {}'.format(tr_(u'읽는 중...'), title, len(imgs))
|
||||
if cw is not None:
|
||||
cw.setTitle(s)
|
||||
if not cw.alive:
|
||||
return []
|
||||
else:
|
||||
print(s)
|
||||
|
||||
cursor = re.find(PATTERN_CURSOR, data_raw)
|
||||
#print(cursor)
|
||||
if cursor is None:
|
||||
print('no cursor')
|
||||
break
|
||||
if cursor in cursors:
|
||||
print('same cursor')
|
||||
break
|
||||
cursors.add(cursor)
|
||||
|
||||
href = re.sub('&cursor=[0-9]+', '&cursor={}'.format(cursor), href)
|
||||
|
||||
return imgs
|
||||
|
||||
|
||||
|
||||
def get_username(url):
|
||||
if '/profile.php?' in url:
|
||||
id = re.find(r'/profile\.php[\?&]id=([0-9]+)', url)
|
||||
return id
|
||||
else:
|
||||
url = url.replace('facebook.com/pg/', 'facebook.com/')
|
||||
return url.split('?')[0].split('facebook.com/')[1].split('/')[0]
|
||||
|
||||
|
|
@ -0,0 +1,128 @@
|
|||
#coding: utf-8
|
||||
import downloader
|
||||
import flickr_api
|
||||
from timee import sleep
|
||||
from utils import Downloader, LazyUrl, query_url, clean_title
|
||||
import os
|
||||
from translator import tr_
|
||||
import ree as re
|
||||
from datetime import datetime
|
||||
import flickr_auth
|
||||
|
||||
|
||||
alphabet = '123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ'
|
||||
base = len(alphabet)
|
||||
def b58encode(div, s=''):
|
||||
if div >= base:
|
||||
div, mod = divmod(div, base)
|
||||
return b58encode(div, alphabet[mod] + s)
|
||||
return alphabet[div] + s
|
||||
def b58decode(s):
|
||||
return sum(alphabet.index(c) * pow(base, i) for i, c in enumerate(reversed(s)))
|
||||
|
||||
|
||||
|
||||
class Image(object):
|
||||
def __init__(self, photo):
|
||||
self.photo = photo
|
||||
self.id = photo.id
|
||||
self.filename = None
|
||||
|
||||
def f(_=None):
|
||||
url = photo.getPhotoFile()
|
||||
#url = 'https://flic.kr/p/{}'.format(b58encode(int(photo.id)))
|
||||
ext = os.path.splitext(url)[1]
|
||||
date = datetime.fromtimestamp(int(photo.dateuploaded))
|
||||
date = u'{:02}-{:02}-{:02}'.format(date.year%100, date.month, date.day)
|
||||
self.filename = u'[{}] {}{}'.format(date, self.id, ext)
|
||||
return url
|
||||
self.url = LazyUrl(u'flickr_{}'.format(self.id), f, self)
|
||||
|
||||
|
||||
def find_ps(url):
|
||||
user = flickr_api.Person.findByUrl(url)
|
||||
id = re.search('/albums/([0-9]+)', url).groups()[0]
|
||||
pss = user.getPhotosets()
|
||||
for ps in pss:
|
||||
if ps.id == id:
|
||||
break
|
||||
else:
|
||||
raise Exception('Not found photoset id')
|
||||
return user, ps
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_flickr(Downloader):
|
||||
type = 'flickr'
|
||||
URLS = ['flickr.com']
|
||||
_name = None
|
||||
|
||||
def init(self):
|
||||
if 'flickr.com' in self.url.lower():
|
||||
self.url = self.url.replace('http://', 'https://')
|
||||
else:
|
||||
self.url = 'https://www.flickr.com/people/{}'.format(self.url)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
global pss
|
||||
if self._name is None:
|
||||
url = self.url
|
||||
flickr_auth.get_api(url, self.cw)
|
||||
if '/albums/' in url:
|
||||
user, ps = find_ps(url)
|
||||
self._name = u'{} (flickr_album_{}_{})'.format(ps.title, user.id, ps.id)
|
||||
else:
|
||||
user = flickr_api.Person.findByUrl(url)
|
||||
self._name = u'{} (flickr_{})'.format(user.username, user.id)
|
||||
return clean_title(self._name)
|
||||
|
||||
|
||||
def read(self):
|
||||
self.title = self.name
|
||||
|
||||
imgs = get_imgs(self.url, self.title, cw=self.cw)
|
||||
|
||||
for img in imgs:
|
||||
self.urls.append(img.url)
|
||||
|
||||
self.title = self.name
|
||||
|
||||
|
||||
def get_imgs(url, title=None, cw=None):
|
||||
flickr_auth.get_api(title, cw)
|
||||
if not flickr_auth.isAuth:
|
||||
raise Exception('No Auth')
|
||||
|
||||
|
||||
if '/albums/' in url:
|
||||
user, ps = find_ps(url)
|
||||
handle = ps
|
||||
else:
|
||||
user = flickr_api.Person.findByUrl(url)
|
||||
handle = user
|
||||
|
||||
photos = []
|
||||
|
||||
per_page = 500
|
||||
for page in range(1, 200):
|
||||
photos_new = handle.getPhotos(per_page=per_page, page=page)
|
||||
photos += photos_new
|
||||
if len(photos_new) < per_page:
|
||||
break
|
||||
|
||||
msg = u'{} {} - {}'.format(tr_(u'읽는 중...'), title, len(photos))
|
||||
if cw:
|
||||
if not cw.alive:
|
||||
break
|
||||
cw.setTitle(msg)
|
||||
else:
|
||||
print(msg)
|
||||
|
||||
imgs = []
|
||||
for photo in photos:
|
||||
img = Image(photo)
|
||||
imgs.append(img)
|
||||
|
||||
return imgs
|
||||
|
|
@ -0,0 +1,131 @@
|
|||
# uncompyle6 version 3.5.0
|
||||
# Python bytecode 2.7 (62211)
|
||||
# Decompiled from: Python 2.7.16 (v2.7.16:413a49145e, Mar 4 2019, 01:30:55) [MSC v.1500 32 bit (Intel)]
|
||||
# Embedded file name: imgur_downloader.pyo
|
||||
# Compiled at: 2019-10-07 05:58:14
|
||||
import downloader
|
||||
from utils import Downloader, Soup, try_n, urljoin, get_max_range, clean_title, cut_pair
|
||||
import ree as re, json, os
|
||||
from timee import sleep
|
||||
from translator import tr_
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_imgur(Downloader):
|
||||
type = 'imgur'
|
||||
URLS = ['imgur.com']
|
||||
MAX_CORE = 16
|
||||
|
||||
def init(self):
|
||||
self.info = get_info(self.url)
|
||||
|
||||
@property
|
||||
def id_(self):
|
||||
return re.find('imgur.com/.+?/([0-9a-zA-Z]+)', self.url)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
title = self.info['title'] or 'N/A'
|
||||
return clean_title(title, n=100)
|
||||
|
||||
def read(self):
|
||||
imgs = get_imgs(self.url, self.info, self.cw)
|
||||
for img in imgs:
|
||||
ext = os.path.splitext(img.split('?')[0])[1]
|
||||
if len(imgs) > 1:
|
||||
self.filenames[img] = (u'{:04}{}').format(len(self.urls), ext)
|
||||
else:
|
||||
self.filenames[img] = clean_title(self.name, n=-len(ext)) + ext
|
||||
self.urls.append(img)
|
||||
|
||||
self.single = len(imgs) == 1
|
||||
self.referer = self.url
|
||||
self.title = u'{} (imgur_{})'.format(self.name, self.id_)
|
||||
|
||||
|
||||
@try_n(4)
|
||||
def get_info(url):
|
||||
url = url.replace('/gallery/', '/a/')
|
||||
if '/r/' in url and url.split('/r/')[1].strip('/').count('/') == 0:
|
||||
title = re.find(r'/r/([^/]+)', url)
|
||||
info = {}
|
||||
info['title'] = title
|
||||
info['type'] = 'r'
|
||||
else:
|
||||
try: # legacy
|
||||
html = downloader.read_html(url, cookies={'over18':'1'})
|
||||
s = re.find('image *: *({.+)', html)
|
||||
info_raw = cut_pair(s)
|
||||
except Exception as e: # new
|
||||
print(e)
|
||||
id_ = re.find(r'/a/([0-9a-zA-Z_]+)', url) or re.find(r'/r/[0-9a-zA-Z_]+/([0-9a-zA-Z_]+)', url, err='no id')
|
||||
url_api = 'https://api.imgur.com/post/v1/albums/{}?client_id=546c25a59c58ad7&include=media%2Cadconfig%2Caccount'.format(id_)
|
||||
info_raw = downloader.read_html(url_api, cookies={'over18':'1'})
|
||||
info = json.loads(info_raw)
|
||||
info['type'] = 'a'
|
||||
return info
|
||||
|
||||
|
||||
def get_imgs(url, info=None, cw=None):
|
||||
print('get_imgs', url)
|
||||
if info is None:
|
||||
info = get_info(url)
|
||||
imgs = []
|
||||
|
||||
# Range
|
||||
max_pid = get_max_range(cw)
|
||||
|
||||
if info['type'] == 'a':
|
||||
if 'album_images' in info: # legacy
|
||||
imgs_ = info['album_images']['images']
|
||||
elif 'media' in info: # new
|
||||
imgs_ = info['media']
|
||||
else: # legacy
|
||||
imgs_ = [info]
|
||||
|
||||
for img in imgs_:
|
||||
img_url = img.get('url') # new
|
||||
if not img_url: # legacy
|
||||
hash = img['hash']
|
||||
ext = img['ext']
|
||||
img_url = 'https://i.imgur.com/{}{}'.format(hash, ext)
|
||||
if img_url in imgs:
|
||||
continue
|
||||
imgs.append(img_url)
|
||||
|
||||
elif info['type'] == 'r':
|
||||
urls = set()
|
||||
for p in range(100):
|
||||
url_api = 'https://imgur.com/r/{}/new/page/{}/hit?scrolled'.format(info['title'], p)
|
||||
print(url_api)
|
||||
html = downloader.read_html(url_api, referer=url)
|
||||
soup = Soup(html)
|
||||
|
||||
c = 0
|
||||
for post in soup.findAll('div', class_='post'):
|
||||
a = post.find('a', class_='image-list-link')
|
||||
url_post = urljoin(url, a.attrs['href'])
|
||||
if url_post in urls:
|
||||
continue
|
||||
urls.add(url_post)
|
||||
c += 1
|
||||
|
||||
try: # for r18 images
|
||||
imgs += get_imgs(url_post)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
s = (u'{} {} ({})').format(tr_(u'\uc77d\ub294 \uc911...'), info['title'], len(imgs))
|
||||
if cw is not None:
|
||||
if cw.alive:
|
||||
cw.setTitle(s)
|
||||
else:
|
||||
return []
|
||||
else:
|
||||
print(s)
|
||||
|
||||
if c == 0:
|
||||
print('same; break')
|
||||
break
|
||||
|
||||
return imgs
|
||||
|
|
@ -0,0 +1,579 @@
|
|||
#coding:utf8
|
||||
import downloader
|
||||
from timee import sleep, clock
|
||||
from constants import clean_url
|
||||
from utils import Downloader, LazyUrl, urljoin, get_max_range, Soup, Session, update_url_query, get_print, cut_pair, get_ext, clean_title, lazy, try_n, generate_csrf_token, check_alive
|
||||
import urllib
|
||||
from error_printer import print_error
|
||||
import os, requests
|
||||
from translator import tr_
|
||||
import json
|
||||
from datetime import datetime
|
||||
import hashlib
|
||||
import ree as re
|
||||
from ratelimit import limits, sleep_and_retry
|
||||
import clf2
|
||||
import errors
|
||||
FORMAT_PIN = r'/p/([0-9a-zA-Z-_]+)'
|
||||
|
||||
|
||||
def get_session(url, cw=None):
|
||||
#res = clf2.solve(url, cw=cw)
|
||||
#return res['session']
|
||||
session = Session()
|
||||
sessionid = session.cookies._cookies.get('.instagram.com', {}).get('/',{}).get('sessionid')
|
||||
if sessionid is None or sessionid.is_expired():
|
||||
raise errors.LoginRequired()
|
||||
session.headers['User-Agent'] = downloader.hdr['User-Agent']
|
||||
if not session.cookies.get('csrftoken', domain='.instagram.com'):
|
||||
csrf_token = generate_csrf_token()
|
||||
print('csrf:', csrf_token)
|
||||
session.cookies.set("csrftoken", csrf_token, domain='.instagram.com')
|
||||
return session
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_insta(Downloader):
|
||||
type = 'insta'
|
||||
URLS = ['instagram.com']
|
||||
MAX_CORE = 8
|
||||
display_name = 'Instagram'
|
||||
|
||||
def init(self):
|
||||
self.session = get_session(self.url, self.cw)
|
||||
if '/p/' in self.url:
|
||||
self.print_('single post')
|
||||
elif '/stories/' in self.url:
|
||||
self.print_('stories')
|
||||
elif 'instagram.com' in self.url:
|
||||
self.url = u'https://www.instagram.com/{}'.format(self.username)
|
||||
|
||||
@lazy
|
||||
def username(self):
|
||||
return get_username(self.url)
|
||||
|
||||
@classmethod
|
||||
def fix_url(cls, url):
|
||||
if 'instagram.com' not in url:
|
||||
url = u'https://www.instagram.com/{}'.format(url)
|
||||
return url.split('?')[0].split('#')[0].strip('/')
|
||||
|
||||
@classmethod
|
||||
def key_id(cls, url):
|
||||
return url.replace('://www.', '://')
|
||||
|
||||
@lazy
|
||||
def name(self):
|
||||
return get_name(self.url)
|
||||
|
||||
@property
|
||||
def id_(self):
|
||||
return u'{} (insta_{})'.format(clean_title(self.name), self.username)
|
||||
|
||||
def read(self):
|
||||
cw = self.cw
|
||||
title = self.id_
|
||||
self.title = title
|
||||
self.artist = self.name
|
||||
ui_setting = self.ui_setting
|
||||
|
||||
if '/p/' in self.url:
|
||||
self.print_('single')
|
||||
iter = get_imgs_single(self.url, self.session, cw=cw)
|
||||
elif '/stories/highlights/' in self.url:
|
||||
iter = get_stories_single(self.url, session=self.session, cw=cw)
|
||||
else:
|
||||
s = ui_setting.instaStories.isChecked()
|
||||
self.print_('stories: {}'.format(s))
|
||||
iter = get_imgs_all(self.url, title, session=self.session, cw=cw, d=self, stories=s)
|
||||
|
||||
imgs = []
|
||||
for img in iter:
|
||||
if cw and not cw.alive:
|
||||
return
|
||||
self.urls.append(img.url)
|
||||
|
||||
self.title = title
|
||||
|
||||
|
||||
def get_j(script):
|
||||
s = script.string
|
||||
if not s:
|
||||
return
|
||||
|
||||
try:
|
||||
s = s.replace('window._sharedData', '').strip()[1:-1].strip()
|
||||
j = json.loads(s)
|
||||
return j
|
||||
except ValueError as e:
|
||||
pass
|
||||
|
||||
|
||||
def read_html(url, session, cw):
|
||||
#res = clf2.solve(url, session=session, cw=cw)#
|
||||
#return res['html']
|
||||
return downloader.read_html(url, session=session)
|
||||
|
||||
|
||||
def check_error(soup, cw, wait):
|
||||
print_ = get_print(cw)
|
||||
|
||||
err = soup.find('div', class_='error-container')
|
||||
if err:
|
||||
err = err.text.strip()
|
||||
if wait:
|
||||
print_('err: {}'.format(err))
|
||||
sleep(60*30, cw)
|
||||
else:
|
||||
raise Exception(err)
|
||||
|
||||
|
||||
def get_sd(url, session=None, html=None, cw=None, wait=True):
|
||||
print_ = get_print(cw)
|
||||
|
||||
if html:
|
||||
soup = Soup(html)
|
||||
check_error(soup, cw, wait)
|
||||
for script in soup.findAll('script'):
|
||||
j = get_j(script)
|
||||
if j:
|
||||
break
|
||||
else:
|
||||
raise Exception('no _sharedData!!')
|
||||
else:
|
||||
for try_ in range(4):
|
||||
_wait(cw)
|
||||
html = read_html(url, session, cw)
|
||||
soup = Soup(html)
|
||||
check_error(soup, cw, wait)
|
||||
for script in soup.findAll('script'):
|
||||
j = get_j(script)
|
||||
if j:
|
||||
break
|
||||
else:
|
||||
continue
|
||||
break
|
||||
else:
|
||||
raise Exception('no _sharedData')
|
||||
for script in soup.findAll('script'):
|
||||
s = script.string
|
||||
if s and 'window.__additionalDataLoaded(' in s:
|
||||
s = cut_pair(s)
|
||||
j_add = json.loads(s)
|
||||
try:
|
||||
j['entry_data']['PostPage'][0].update(j_add)
|
||||
except:
|
||||
j['entry_data']['ProfilePage'][0].update(j_add) #2900
|
||||
|
||||
# Challenge
|
||||
challenge = j['entry_data'].get('Challenge')
|
||||
if challenge:
|
||||
for cont in challenge[0]['extraData']['content']:
|
||||
title = cont.get('title')
|
||||
if title:
|
||||
break
|
||||
else:
|
||||
title = 'Err'
|
||||
raise errors.LoginRequired(title)
|
||||
|
||||
# LoginAndSignupPage
|
||||
login = j['entry_data'].get('LoginAndSignupPage')
|
||||
if login:
|
||||
raise errors.LoginRequired()
|
||||
|
||||
return j
|
||||
|
||||
|
||||
def get_id(url):
|
||||
j = get_sd(url)
|
||||
if '/p/' in url:
|
||||
id = j['entry_data']['PostPage'][0]['graphql']['shortcode_media']['owner']['id']
|
||||
elif '/stories/' in url:
|
||||
id = j['entry_data']['StoriesPage'][0]['user']['username'] # ???
|
||||
else:
|
||||
id = j['entry_data']['ProfilePage'][0]['graphql']['user']['id']
|
||||
return id
|
||||
|
||||
|
||||
|
||||
def get_username(url):
|
||||
j = get_sd(url, wait=False)
|
||||
if '/p/' in url:
|
||||
id = j['entry_data']['PostPage'][0]['graphql']['shortcode_media']['owner']['username']
|
||||
elif '/stories/' in url:
|
||||
id = j['entry_data']['StoriesPage'][0]['user']['username']
|
||||
else:
|
||||
id = j['entry_data']['ProfilePage'][0]['graphql']['user']['username']
|
||||
return id
|
||||
|
||||
|
||||
def get_name(url):
|
||||
j = get_sd(url)
|
||||
if '/p/' in url:
|
||||
name = j['entry_data']['PostPage'][0]['graphql']['shortcode_media']['owner']['full_name']
|
||||
elif '/stories/' in url:
|
||||
id = get_id(url)
|
||||
url = 'https://www.instagram.com/{}/'.format(id)
|
||||
return get_name(url)
|
||||
else:
|
||||
name = j['entry_data']['ProfilePage'][0]['graphql']['user']['full_name']
|
||||
return name
|
||||
|
||||
|
||||
class Image(object):
|
||||
|
||||
def __init__(self, url, referer, filename, id=None):
|
||||
self._url = url
|
||||
self.url = LazyUrl(referer, self.get, self)
|
||||
self.filename = filename
|
||||
self.id = id
|
||||
|
||||
def get(self, referer):
|
||||
wait_download()
|
||||
return self._url
|
||||
|
||||
|
||||
class Image_lazy(object):
|
||||
|
||||
def __init__(self, url, session=None, cw=None):
|
||||
self.url = url
|
||||
self.session = session
|
||||
self.cw = cw
|
||||
|
||||
self.url = LazyUrl(url, self.get, self)
|
||||
|
||||
@try_n(4)
|
||||
def get(self, url):
|
||||
cw = self.cw
|
||||
if cw and not cw.alive:
|
||||
raise Exception('cw is dead')
|
||||
node = Node(url, session=self.session, cw=cw)
|
||||
img = node.imgs[0]
|
||||
ext = os.path.splitext(url)[1]
|
||||
wait_download()
|
||||
url_img = img.url()
|
||||
self.filename = img.filename
|
||||
return url_img
|
||||
|
||||
|
||||
@sleep_and_retry
|
||||
@limits(1, 10)
|
||||
def _wait(cw=None):
|
||||
if cw and not cw.alive:
|
||||
raise Exception('cw is dead while waiting')
|
||||
|
||||
|
||||
##@sleep_and_retry
|
||||
##@limits(1, 1)
|
||||
def wait_download():
|
||||
pass
|
||||
|
||||
|
||||
@try_n(2)
|
||||
def get_query(query_hash, variables, session, cw=None):
|
||||
_wait(cw)
|
||||
print_ = get_print(cw)
|
||||
csrf_token = session.cookies.get('csrftoken', domain='.instagram.com')
|
||||
if not csrf_token:
|
||||
raise Exception('no csrftoken')
|
||||
hdr = {
|
||||
"X-CSRFToken" : csrf_token, #2849
|
||||
"X-IG-App-ID" : "936619743392459",
|
||||
"X-IG-WWW-Claim" : "0",
|
||||
"X-Requested-With": "XMLHttpRequest",
|
||||
}
|
||||
url_ = update_url_query('https://www.instagram.com/graphql/query/', {'query_hash': query_hash, 'variables': json.dumps(variables)})
|
||||
#print(len(edges), url_)
|
||||
r = session.get(url_, headers=hdr)
|
||||
try:
|
||||
j = json.loads(r.text)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
j = {}
|
||||
if not j or j.get('status') == 'fail':
|
||||
msg = 'Fail: {} {}'.format(j.get('message') or 'Please wait a few minutes before you try again.', variables)
|
||||
print_(msg)
|
||||
sleep(60*30, cw)
|
||||
raise Exception(msg)
|
||||
return j
|
||||
|
||||
|
||||
def get_imgs(url, n_max=2000, title=None, cw=None, session=None):
|
||||
print_ = get_print(cw)
|
||||
|
||||
for try_ in range(4):
|
||||
try:
|
||||
html = read_html(url, session, cw)
|
||||
m = re.search('"edge_owner_to_timeline_media":{"count":([0-9]+)', html)
|
||||
if m is None:
|
||||
raise Exception('Invalid page')
|
||||
break
|
||||
except Exception as e:
|
||||
e_ = e
|
||||
print_(print_error(e)[0])
|
||||
else:
|
||||
raise e_
|
||||
n = int(m.groups()[0])
|
||||
n = min(n, n_max)
|
||||
|
||||
data = get_sd(url, html=html, cw=cw)
|
||||
|
||||
uploader_id = data['entry_data']['ProfilePage'][0]['graphql']['user']['id']
|
||||
csrf_token = data['config']['csrf_token']#
|
||||
session.cookies.set(name='ig_pr', value='1', path='/', domain='.instagram.com')
|
||||
|
||||
cursor = ''
|
||||
edges = []
|
||||
bad = 0
|
||||
while True:
|
||||
check_alive(cw)
|
||||
|
||||
variables = {
|
||||
'id': uploader_id,
|
||||
'first': 12,
|
||||
}
|
||||
if cursor:
|
||||
variables['after'] = cursor
|
||||
#print_(variables)#
|
||||
|
||||
media = None
|
||||
try:
|
||||
j = get_query('003056d32c2554def87228bc3fd9668a', variables, session, cw)
|
||||
media = j['data']['user']['edge_owner_to_timeline_media']
|
||||
sleep(2)#
|
||||
except Exception as e:
|
||||
if bad > 10:
|
||||
raise Exception('no media')
|
||||
else:
|
||||
print_(u'no media.. retry... ({}) {}'.format(bad+1, print_error(e)[0]))
|
||||
sleep(12*bad, cw)
|
||||
bad += 1
|
||||
continue
|
||||
bad = 0
|
||||
|
||||
edges_new = media.get('edges')
|
||||
if not edges_new or not isinstance(edges_new, list):
|
||||
print('no edges_new')
|
||||
break
|
||||
|
||||
edges += edges_new
|
||||
|
||||
s = u'{} {} ({}/{})'.format(tr_(u'읽는 중...'), title, len(edges), n)
|
||||
if cw is not None:
|
||||
cw.setTitle(s)
|
||||
if not cw.alive:
|
||||
return []
|
||||
else:
|
||||
print(s)
|
||||
|
||||
if len(edges) >= n:
|
||||
break
|
||||
|
||||
page_info = media.get('page_info')
|
||||
if not page_info:
|
||||
break
|
||||
if not page_info.get('has_next_page'):
|
||||
break
|
||||
cursor = page_info.get('end_cursor')
|
||||
if not cursor:
|
||||
break
|
||||
|
||||
if len(edges) <= n/2:
|
||||
raise Exception(u'Too short: {} / {}'.format(len(edges), n))
|
||||
|
||||
imgs = []
|
||||
for edge in edges:
|
||||
node = edge['node']
|
||||
type = node['__typename']
|
||||
id = node['shortcode']
|
||||
url = u'https://www.instagram.com/p/{}/'.format(id)
|
||||
## if type in ['GraphVideo', 'GraphImage']:
|
||||
## single = True
|
||||
## else:
|
||||
## single = False
|
||||
for img in Node(url, session=session, cw=cw, media=node).imgs:
|
||||
imgs.append(img)
|
||||
if len(imgs) >= n_max:
|
||||
break
|
||||
|
||||
return imgs
|
||||
|
||||
|
||||
class Node(object):
|
||||
|
||||
def __init__(self, url, format=u'[%y-%m-%d] id_ppage', session=None, cw=None, media=None):
|
||||
print('Node', url)
|
||||
print_ = get_print(cw)
|
||||
self.id = re.search(FORMAT_PIN, url).groups()[0]
|
||||
self.imgs = []
|
||||
self.session = session
|
||||
|
||||
if not media:
|
||||
if False: # Original
|
||||
j = get_sd(url, self.session, cw=cw)
|
||||
data = j['entry_data']['PostPage'][0]['graphql']
|
||||
else:
|
||||
variables = {
|
||||
"shortcode" : self.id,
|
||||
"child_comment_count" : 3,
|
||||
"fetch_comment_count" : 40,
|
||||
"parent_comment_count" : 24,
|
||||
"has_threaded_comments": True,
|
||||
}
|
||||
j = get_query('a9441f24ac73000fa17fe6e6da11d59d', variables, session, cw)
|
||||
data = j['data']
|
||||
media = data['shortcode_media']
|
||||
|
||||
if 'video_url' in media:
|
||||
urls = [
|
||||
media['video_url']]
|
||||
elif 'edge_sidecar_to_children' in media:
|
||||
edges = media['edge_sidecar_to_children']['edges']
|
||||
urls = []
|
||||
for edge in edges:
|
||||
node = edge['node']
|
||||
if 'video_url' in node:
|
||||
url_ = node['video_url']
|
||||
else:
|
||||
url_ = node['display_resources'][(-1)]['src']
|
||||
urls.append(url_)
|
||||
else:
|
||||
urls = [media['display_resources'][(-1)]['src']]
|
||||
time = media['taken_at_timestamp']
|
||||
|
||||
self.date = datetime.fromtimestamp(time)
|
||||
self.timeStamp = self.date.strftime(format).replace(':', u'\uff1a')
|
||||
for p, img in enumerate(urls):
|
||||
ext = os.path.splitext(img.split('?')[0].split('#')[0])[1]
|
||||
filename = ('{}{}').format(self.timeStamp, ext).replace('id', str(self.id)).replace('page', str(p))
|
||||
img = Image(img, url, filename)
|
||||
self.imgs.append(img)
|
||||
|
||||
|
||||
def get_imgs_all(url, title=None, cw=None, d=None, session=None, stories=True):
|
||||
max_pid = get_max_range(cw)
|
||||
url = clean_url(url)
|
||||
if stories:
|
||||
imgs_str = get_stories(url, title, cw=cw, session=session)
|
||||
else:
|
||||
imgs_str = []
|
||||
max_pid = max(0, max_pid - len(imgs_str))
|
||||
imgs = get_imgs(url, max_pid, title=title, cw=cw, session=session)
|
||||
|
||||
return imgs_str + imgs[:max_pid]
|
||||
|
||||
|
||||
def get_imgs_single(url, session=None, cw=None):
|
||||
node = Node(url, session=session, cw=cw)
|
||||
return node.imgs
|
||||
|
||||
|
||||
def get_stories(url, title=None, cw=None, session=None):
|
||||
print_ = get_print(cw)
|
||||
|
||||
html = downloader.read_html(url, session=session)
|
||||
|
||||
data = get_sd(url, html=html, cw=cw)
|
||||
|
||||
uploader_id = data['entry_data']['ProfilePage'][0]['graphql']['user']['id']
|
||||
csrf_token = data['config']['csrf_token']#
|
||||
session.cookies.set(name='ig_pr', value='1', path='/', domain='.instagram.com')
|
||||
|
||||
print('uploader_id:', uploader_id)
|
||||
variables = {
|
||||
'user_id': uploader_id,
|
||||
'include_chaining': True,
|
||||
'include_reel': True,
|
||||
'include_suggested_users': False,
|
||||
'include_logged_out_extras': False,
|
||||
'include_highlight_reels': True,
|
||||
'include_live_status': True,
|
||||
}
|
||||
j = get_query('d4d88dc1500312af6f937f7b804c68c3', variables, session, cw)
|
||||
|
||||
imgs = []
|
||||
ids = set()
|
||||
|
||||
data = j['data']
|
||||
hs = data['user']['edge_highlight_reels']
|
||||
edges = hs['edges']
|
||||
edges.insert(0, str(uploader_id))
|
||||
for i, edge in enumerate(edges):
|
||||
if isinstance(edge, str):
|
||||
id = edge
|
||||
hid = None
|
||||
url_str = url
|
||||
else:
|
||||
id = None
|
||||
hid = edge['node']['id']
|
||||
url_str = 'https://www.instagram.com/stories/highlights/{}/'.format(hid)
|
||||
try:
|
||||
imgs_new = get_stories_single(url_str, id=id, cw=cw, session=session)
|
||||
for img in imgs_new:
|
||||
if img.id in ids:
|
||||
print('duplicate: {}'.format(img.id))
|
||||
continue
|
||||
ids.add(img.id)
|
||||
imgs.append(img)
|
||||
print_('stories: {}'.format(hid))
|
||||
except Exception as e:
|
||||
print_(u'Failed to get stories: {}'.format(hid))
|
||||
print(e)
|
||||
msg = u'{} {} ({}/{})'.format(tr_(u'스토리 읽는 중...'), title, i+1, len(edges))
|
||||
if cw:
|
||||
if not cw.alive:
|
||||
return
|
||||
cw.setTitle(msg)
|
||||
else:
|
||||
print(msg)
|
||||
imgs = sort_str(imgs)
|
||||
return imgs
|
||||
|
||||
|
||||
def sort_str(imgs):
|
||||
imgs = sorted(imgs, key=lambda img: int(img.id), reverse=True)
|
||||
return imgs
|
||||
|
||||
|
||||
def get_stories_single(url, id=None, cw=None, session=None):
|
||||
j = get_sd(url, session=session, cw=cw)
|
||||
hid = re.find('/stories/highlights/([0-9]+)', url)
|
||||
reel_ids = []
|
||||
highlight_reel_ids = []
|
||||
if hid is None:
|
||||
if id is None:
|
||||
id = get_id(url) # ???
|
||||
reel_ids.append(str(id))
|
||||
else:
|
||||
highlight_reel_ids.append(str(hid))
|
||||
print(id, hid)
|
||||
variables = {
|
||||
"reel_ids":reel_ids,
|
||||
"tag_names":[],
|
||||
"location_ids":[],
|
||||
"highlight_reel_ids":highlight_reel_ids,
|
||||
"precomposed_overlay":False,
|
||||
"show_story_viewer_list":True,
|
||||
"story_viewer_fetch_count":50,
|
||||
"story_viewer_cursor":"",
|
||||
"stories_video_dash_manifest":False
|
||||
}
|
||||
print(variables)
|
||||
j = get_query('f5dc1457da7a4d3f88762dae127e0238', variables, session, cw)
|
||||
data = j['data']
|
||||
m = data['reels_media'][0]
|
||||
items = m['items']
|
||||
if not items:
|
||||
raise Exception('no items')
|
||||
imgs = []
|
||||
for item in items:
|
||||
id = item['id']
|
||||
rs = item.get('video_resources') or item['display_resources']
|
||||
r = rs[-1]
|
||||
src = r['src']
|
||||
ext = get_ext(src)
|
||||
filename = u'stories_{}{}'.format(id, ext)
|
||||
img = Image(src, url, filename, id=id)
|
||||
imgs.append(img)
|
||||
imgs = sort_str(imgs)
|
||||
return imgs
|
|
@ -93,7 +93,13 @@ class Downloader_iwara(Downloader):
|
|||
|
||||
def read_channel(url, type_, cw=None):
|
||||
print_ = get_print(cw)
|
||||
username = re.find(r'/users/([^/]+)', url, err='no username')
|
||||
html = downloader.read_html(url)
|
||||
soup = Soup(html)
|
||||
if soup.find('div', id='block-mainblocks-user-connect'):
|
||||
username = re.find(r'''/messages/new\?user=(.+)['"]''', html, err='no username')
|
||||
else:
|
||||
username = re.find(r'/users/([^/]+)', url, err='no username')
|
||||
print_('username: {}'.format(username))
|
||||
info = {}
|
||||
urls = []
|
||||
urls_set = set()
|
||||
|
|
|
@ -0,0 +1,79 @@
|
|||
import downloader
|
||||
from utils import Downloader, Soup, get_print, json_loads, compatstr, LazyUrl, format_filename, clean_title
|
||||
import devtools
|
||||
import js2py
|
||||
import ree as re
|
||||
from m3u8_tools import playlist2stream
|
||||
from io import BytesIO
|
||||
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_javfinder(Downloader):
|
||||
type = 'javfinder'
|
||||
URLS = ['javfinder.la']
|
||||
single = True
|
||||
display_name = 'JavFinder'
|
||||
|
||||
def read(self):
|
||||
video = Video(self.url, cw=self.cw)
|
||||
|
||||
self.urls.append(video.url)
|
||||
self.setIcon(video.thumb)
|
||||
|
||||
self.title = video.title
|
||||
|
||||
|
||||
class Video(object):
|
||||
|
||||
def __init__(self, url, cw=None):
|
||||
info = solve(url, cw=cw)
|
||||
url_video = info['file']
|
||||
stream = playlist2stream(url_video, n_thread=4)
|
||||
self.url = LazyUrl(url, lambda x: stream, self)
|
||||
self.title = info['title']
|
||||
id = info['id']
|
||||
self.filename = format_filename(self.title, id, '.mp4')
|
||||
self.thumb = BytesIO()
|
||||
downloader.download(info['url_thumb'], buffer=self.thumb)
|
||||
|
||||
|
||||
def solve(url, cw=None):
|
||||
print_ = get_print(cw)
|
||||
info = {}
|
||||
res = devtools.watch_network(url, cw=cw)
|
||||
|
||||
#html = res['html']
|
||||
html = downloader.read_html(url) # ???
|
||||
|
||||
soup = Soup(html)
|
||||
info['title'] = soup.find('h1').text.strip()
|
||||
|
||||
info['url_thumb'] = soup.find('meta', {'property': 'og:image'})['content'].strip()
|
||||
|
||||
for r in res['rs']:
|
||||
url_player = r.url()
|
||||
if 'streamsb.net/embed-' in url_player:
|
||||
break
|
||||
else:
|
||||
raise Exception('no player')
|
||||
print_('player: {}'.format(url_player))
|
||||
|
||||
info['id'] = ''#
|
||||
|
||||
html = downloader.read_html(url_player, url)
|
||||
soup = Soup(html)
|
||||
for script in soup.findAll('script'):
|
||||
script = script.string or ''
|
||||
if 'function(p,a,c,k,e,d)' in script:
|
||||
break
|
||||
else:
|
||||
raise Exception('no function(p,a,c,k,e,d)')
|
||||
js = script.strip()[5:-1].replace('function(p,a,c,k,e,d)', 'function hack(p,a,c,k,e,d)').replace('return p}', 'return p};hack')
|
||||
context = js2py.EvalJs()
|
||||
t = context.eval(js)
|
||||
sources = re.find(r'sources *: *(\[\{.+?\}\])', t, err='no sources')
|
||||
sources = json_loads(sources)
|
||||
info['file'] = sources[0]['file']
|
||||
return info
|
||||
|
|
@ -0,0 +1,207 @@
|
|||
import downloader
|
||||
from utils import Soup, urljoin, Downloader, fix_title, Session, get_print, LazyUrl, clean_title, get_imgs_already
|
||||
import ree as re
|
||||
from timee import sleep
|
||||
from translator import tr_
|
||||
import os
|
||||
from constants import try_n, clean_url
|
||||
import urllib, page_selector
|
||||
import bs4
|
||||
PATTERN = r'jmana[0-9]*.*/(comic_list_title|book)\?book'
|
||||
PATTERN_ALL = r'jmana[0-9]*.*/(comic_list_title|book|bookdetail)\?book'
|
||||
PATTERN_ID = '[?&]bookdetailid=([0-9]+)'
|
||||
|
||||
|
||||
class Image(object):
|
||||
|
||||
def __init__(self, url, page, p):
|
||||
self.url = LazyUrl(page.url, lambda _: url, self)
|
||||
ext = '.jpg'
|
||||
name = (u'{:04}{}').format(p, ext)
|
||||
self.filename = (u'{}/{}').format(page.title, name)
|
||||
|
||||
|
||||
class Page(object):
|
||||
|
||||
def __init__(self, title, url):
|
||||
self.title = clean_title(title)
|
||||
self.url = url
|
||||
self.id = int(re.find(PATTERN_ID, url))
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_jmana(Downloader):
|
||||
type = 'jmana'
|
||||
URLS = ['regex:'+PATTERN_ALL]
|
||||
MAX_CORE = 8
|
||||
_soup = None
|
||||
|
||||
def init(self):
|
||||
self.url = clean_url(self.url)
|
||||
self.session = Session()
|
||||
if re.search(PATTERN_ID, self.url): #1799
|
||||
select = self.soup.find('select', class_='bookselect')
|
||||
for i, op in enumerate(select.findAll('option')[::-1]):
|
||||
if 'selected' in op.attrs:
|
||||
break
|
||||
else:
|
||||
raise Exception('no selected option')
|
||||
for a in self.soup.findAll('a'):
|
||||
url = urljoin(self.url, a.get('href') or '')
|
||||
if re.search(PATTERN, url):
|
||||
break
|
||||
else:
|
||||
raise Exception('list not found')
|
||||
self.url = self.fix_url(url)
|
||||
self._soup = None
|
||||
|
||||
for i, page in enumerate(get_pages(self.url, self.session, self.soup)):
|
||||
if page.id == int(op['value']):
|
||||
break
|
||||
else:
|
||||
raise Exception('can not find page')
|
||||
self.cw.range_p = [i]
|
||||
|
||||
@classmethod
|
||||
def fix_url(cls, url):
|
||||
return url
|
||||
|
||||
@property
|
||||
def soup(self):
|
||||
if self._soup is None:
|
||||
html = downloader.read_html(self.url, session=self.session)
|
||||
soup = Soup(html)
|
||||
self._soup = soup
|
||||
return self._soup
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
title = get_title(self.soup)
|
||||
artist = get_artist(self.soup)
|
||||
title = fix_title(self, title, artist)
|
||||
return title
|
||||
|
||||
def read(self):
|
||||
title = self.name
|
||||
artist = get_artist(self.soup)
|
||||
self.artist = artist
|
||||
for img in get_imgs(self.url, title, self.session, soup=self.soup, cw=self.cw):
|
||||
if isinstance(img, Image):
|
||||
self.urls.append(img.url)
|
||||
else:
|
||||
self.urls.append(img)
|
||||
|
||||
self.title = self.name
|
||||
|
||||
|
||||
|
||||
def get_title(soup):
|
||||
a = soup.find('a', class_='tit')
|
||||
if a:
|
||||
return a.text.strip()
|
||||
return re.find(r'제목 *: *(.+)', soup.find('a', class_='tit').text, err='no title')
|
||||
|
||||
|
||||
def get_artist(soup):
|
||||
return re.find(r'작가 *: *(.+)', soup.text, default='').strip() or 'N/A'
|
||||
|
||||
|
||||
@try_n(4, sleep=60)
|
||||
def get_imgs_page(page, referer, session, cw=None):
|
||||
print_ = get_print(cw)
|
||||
sleep(5, cw) #2017
|
||||
html = downloader.read_html(page.url, referer, session=session)
|
||||
|
||||
inserted = re.find(r'''var *inserted *= *['"](.*?)['"]''', html)
|
||||
print_('inserted: {}'.format(inserted))
|
||||
|
||||
inserted = set(int(i) for i in inserted.split(',')) if inserted else set()
|
||||
|
||||
soup = Soup(html)
|
||||
|
||||
view = soup.find(class_='pdf-wrap')
|
||||
|
||||
imgs = []
|
||||
for i, img in enumerate(child for child in view.children if isinstance(child, bs4.element.Tag)):
|
||||
src = img.get('data-src') or img.get('src') or ''
|
||||
|
||||
if i in inserted:
|
||||
print_('remove: {}'.format(src))
|
||||
continue
|
||||
|
||||
if not src:
|
||||
continue
|
||||
src = urljoin(page.url, src.strip())
|
||||
if '/adimg/' in src:
|
||||
print('adimg:', src)
|
||||
continue
|
||||
if '/notice' in src:
|
||||
print('notice:', src)
|
||||
continue
|
||||
|
||||
img = Image(src, page, len(imgs))
|
||||
imgs.append(img)
|
||||
|
||||
return imgs
|
||||
|
||||
|
||||
def get_pages(url, session=None, soup=None):
|
||||
if soup is None:
|
||||
html = downloader.read_html(url, session=session)
|
||||
soup = Soup(html)
|
||||
pages = []
|
||||
for inner in soup.findAll('div', class_='inner'):
|
||||
a = inner.find('a')
|
||||
if not a:
|
||||
continue
|
||||
href = a.attrs.get('href', '')
|
||||
if not re.search(PATTERN_ID, href):
|
||||
continue
|
||||
if a.find('img'):
|
||||
print('skip img', a.attrs.get('href'))
|
||||
continue
|
||||
href = urljoin(url, href)
|
||||
title_page = a.text
|
||||
page = Page(title_page, href)
|
||||
pages.append(page)
|
||||
|
||||
pages = list(reversed(pages))
|
||||
return pages
|
||||
|
||||
|
||||
@page_selector.register('jmana')
|
||||
@try_n(4)
|
||||
def f(url):
|
||||
if re.search(PATTERN_ID, url):
|
||||
raise Exception(tr_(u'목록 주소를 입력해주세요'))
|
||||
session = Session()
|
||||
pages = get_pages(url, session=session)
|
||||
return pages
|
||||
|
||||
|
||||
def get_imgs(url, title, session, soup=None, cw=None):
|
||||
print_ = get_print(cw)
|
||||
if soup is None:
|
||||
html = downloader.read_html(url, session=session)
|
||||
soup = Soup(html)
|
||||
pages = get_pages(url, soup=soup)
|
||||
print_('pages: {}'.format(len(pages)))
|
||||
pages = page_selector.filter(pages, cw)
|
||||
imgs = []
|
||||
for i, page in enumerate(pages):
|
||||
imgs_already = get_imgs_already('jmana', title, page, cw)
|
||||
if imgs_already:
|
||||
imgs += imgs_already
|
||||
continue
|
||||
|
||||
imgs += get_imgs_page(page, url, session, cw)
|
||||
if cw is not None:
|
||||
if not cw.alive:
|
||||
return
|
||||
cw.setTitle((u'{} {} / {} ({} / {})').format(tr_(u'\uc77d\ub294 \uc911...'), title, page.title, i + 1, len(pages)))
|
||||
|
||||
if not imgs:
|
||||
raise Exception('no imgs')
|
||||
|
||||
return imgs
|
||||
|
|
@ -0,0 +1,192 @@
|
|||
import downloader
|
||||
import ree as re
|
||||
from utils import Session, LazyUrl, Soup, Downloader, try_n, get_print, clean_title, print_error, urljoin
|
||||
from time import sleep
|
||||
from translator import tr_
|
||||
import page_selector
|
||||
import json
|
||||
UA = downloader.hdr['User-Agent']
|
||||
|
||||
|
||||
class Page(object):
|
||||
|
||||
def __init__(self, id_, title):
|
||||
self.id_ = id_
|
||||
self.title = title
|
||||
self.url = 'https://page.kakao.com/viewer?productId={}'.format(id_)
|
||||
|
||||
|
||||
class Image(object):
|
||||
|
||||
def __init__(self, url, page, p):
|
||||
self.url = LazyUrl('https://page.kakao.com/', lambda _: url, self)
|
||||
ext = '.jpg'
|
||||
self.filename = '{}/{:04}{}'.format(clean_title(page.title), p, ext)
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_kakaopage(Downloader):
|
||||
type = 'kakaopage'
|
||||
URLS = ['page.kakao.com/home']
|
||||
MAX_CORE = 8
|
||||
MAX_SPEED = 4.0
|
||||
display_name = 'KakaoPage'
|
||||
|
||||
def init(self):
|
||||
self.session = Session()
|
||||
self.session.headers['User-Agent'] = UA
|
||||
|
||||
@classmethod
|
||||
def fix_url(cls, url):
|
||||
id = re.find('/home/.+?/([0-9]+)', url)
|
||||
if id is not None:
|
||||
url = id
|
||||
if url.isdecimal():
|
||||
url = 'https://page.kakao.com/home?seriesId={}'.format(url)
|
||||
return url
|
||||
|
||||
def read(self):
|
||||
info = get_info(self.url, self.session, cw=self.cw)
|
||||
|
||||
for img in info['imgs']:
|
||||
self.urls.append(img.url)
|
||||
|
||||
self.artist = info['artist']
|
||||
|
||||
self.title = clean_title('[{}] {}'.format(info['artist'], info['title']))
|
||||
|
||||
|
||||
|
||||
def get_id(url):
|
||||
id_ = re.find('seriesId=([0-9]+)', url, err='No seriesId')
|
||||
return id_
|
||||
|
||||
|
||||
|
||||
def get_pages(url, session):
|
||||
id_ = get_id(url)
|
||||
|
||||
pages = []
|
||||
ids = set()
|
||||
for p in range(100):
|
||||
url_api = 'https://api2-page.kakao.com/api/v5/store/singles'
|
||||
data = {
|
||||
'seriesid': id_,
|
||||
'page': str(p),
|
||||
'direction': 'asc',
|
||||
'page_size': '20',
|
||||
'without_hidden': 'true',
|
||||
}
|
||||
r = session.post(url_api, data=data, headers={'Referer': url})
|
||||
print(p, r)
|
||||
data = r.json()
|
||||
|
||||
singles = data['singles']
|
||||
if not singles:
|
||||
print('no singles')
|
||||
break
|
||||
|
||||
for single in singles:
|
||||
title_page = single['title']
|
||||
id_page = single['id']
|
||||
if id_page in ids:
|
||||
print('dup id')
|
||||
continue
|
||||
ids.add(id_page)
|
||||
page = Page(id_page, title_page)
|
||||
pages.append(page)
|
||||
sleep(.5)
|
||||
return pages
|
||||
|
||||
|
||||
@try_n(2)
|
||||
def get_imgs_page(page, session):
|
||||
html = downloader.read_html(page.url, session=session)
|
||||
did = re.find('"did" *: *"(.+?)"', html, err='no did')
|
||||
url_api = 'https://api2-page.kakao.com/api/v1/inven/get_download_data/web'
|
||||
data = {
|
||||
'productId': page.id_,
|
||||
'device_mgr_uid': 'Windows - Chrome',
|
||||
'device_model': 'Windows - Chrome',
|
||||
'deviceId': did,
|
||||
}
|
||||
print(data)
|
||||
r = session.post(url_api, data=data, headers={'Referer': page.url})
|
||||
data = r.json()
|
||||
if data['result_code']:
|
||||
raise Exception(data['message'])
|
||||
imgs = []
|
||||
for file in data['downloadData']['members']['files']:
|
||||
url = file['secureUrl']
|
||||
url = urljoin('https://page-edge-jz.kakao.com/sdownload/resource/', url)
|
||||
img = Image(url, page, len(imgs))
|
||||
imgs.append(img)
|
||||
return imgs
|
||||
|
||||
|
||||
def get_info(url, session, cw=None):
|
||||
print_ = get_print(cw)
|
||||
pages = get_pages(url, session)
|
||||
pages = page_selector.filter(pages, cw)
|
||||
if not pages:
|
||||
raise Exception('no pages')
|
||||
|
||||
info = {}
|
||||
|
||||
html = downloader.read_html(url, session=session)
|
||||
soup = Soup(html)
|
||||
|
||||
__NEXT_DATA__ = soup.find('script', id='__NEXT_DATA__')
|
||||
if __NEXT_DATA__:
|
||||
data = json.loads(__NEXT_DATA__.string)
|
||||
tid = data['props']['initialState']['common']['constant']['tid']
|
||||
print_('tid: {}'.format(tid))
|
||||
session.cookies['_kptid'] = tid
|
||||
html = downloader.read_html(url, session=session)
|
||||
soup = Soup(html)
|
||||
|
||||
title = soup.find('h2').text.strip()
|
||||
info['title'] = title
|
||||
artist = soup.find('meta', {'name': 'author'})['content']
|
||||
for x in [' ,', ', ']:
|
||||
while x in artist:
|
||||
artist = artist.replace(x, ',')
|
||||
artist = artist.replace(',', ', ')
|
||||
info['artist'] = artist
|
||||
|
||||
imgs = []
|
||||
|
||||
for i, page in enumerate(pages):
|
||||
if cw is not None:
|
||||
if not cw.alive:
|
||||
return
|
||||
cw.setTitle('{} {} / {} ({} / {})'.format(tr_('읽는 중...'), title, page.title, i + 1, len(pages)))
|
||||
|
||||
try:
|
||||
_imgs = get_imgs_page(page, session)
|
||||
e_msg = None
|
||||
except Exception as e:
|
||||
_imgs = []
|
||||
e_msg = print_error(e)[0]
|
||||
print_('{} {}'.format(page.title, len(_imgs)))
|
||||
if e_msg:
|
||||
print_(e_msg)
|
||||
|
||||
imgs += _imgs
|
||||
sleep(.2)
|
||||
|
||||
if not imgs:
|
||||
raise Exception('no imgs')
|
||||
|
||||
info['imgs'] = imgs
|
||||
|
||||
return info
|
||||
|
||||
|
||||
@page_selector.register('kakaopage')
|
||||
@try_n(4)
|
||||
def f(url):
|
||||
if 'seriesId=' not in url:
|
||||
raise Exception(tr_('목록 주소를 입력해주세요'))
|
||||
pages = get_pages(url, Session())
|
||||
return pages
|
|
@ -0,0 +1,55 @@
|
|||
import downloader
|
||||
import ytdl
|
||||
from utils import Downloader, try_n, LazyUrl, get_ext, format_filename
|
||||
from io import BytesIO as IO
|
||||
from m3u8_tools import M3u8_stream
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_vlive(Downloader):
|
||||
type = 'kakaotv'
|
||||
URLS = ['tv.kakao']
|
||||
single = True
|
||||
display_name = 'KakaoTV'
|
||||
|
||||
@classmethod
|
||||
def fix_url(cls, url):
|
||||
return url.split('?')[0].strip('/')
|
||||
|
||||
def read(self):
|
||||
video = Video(self.url)
|
||||
video.url()#
|
||||
|
||||
self.urls.append(video.url)
|
||||
self.setIcon(video.thumb)
|
||||
|
||||
self.enableSegment()
|
||||
|
||||
self.title = video.title
|
||||
|
||||
|
||||
|
||||
class Video(object):
|
||||
_url = None
|
||||
|
||||
def __init__(self, url):
|
||||
self.url = LazyUrl(url, self.get, self)
|
||||
|
||||
@try_n(2)
|
||||
def get(self, url):
|
||||
if self._url:
|
||||
return self._url
|
||||
|
||||
ydl = ytdl.YoutubeDL()
|
||||
info = ydl.extract_info(url)
|
||||
fs = [f for f in info['formats'] if f['ext'] == 'mp4']
|
||||
f = sorted(fs, key=lambda f: f['height'])[-1]
|
||||
self._url = f['url']
|
||||
|
||||
self.thumb_url = info['thumbnails'][0]['url']
|
||||
self.thumb = IO()
|
||||
downloader.download(self.thumb_url, buffer=self.thumb)
|
||||
self.title = info['title']
|
||||
ext = get_ext(self._url)
|
||||
self.filename = format_filename(self.title, info['id'], ext)
|
||||
return self._url
|
|
@ -0,0 +1,72 @@
|
|||
import downloader
|
||||
from utils import Soup, urljoin, Downloader, LazyUrl, Session, try_n, format_filename, clean_title
|
||||
from timee import sleep
|
||||
import ree as re
|
||||
from io import BytesIO
|
||||
import clf2
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_kissjav(Downloader):
|
||||
type = 'kissjav'
|
||||
URLS = ['kissjav.com']
|
||||
single = True
|
||||
display_name = 'KissJAV'
|
||||
|
||||
def read(self):
|
||||
video = get_video(self.url)
|
||||
self.urls.append(video.url)
|
||||
self.setIcon(video.thumb)
|
||||
|
||||
self.session = get_session(self.url, cw=self.cw)
|
||||
self.enableSegment(1024*1024//2)
|
||||
|
||||
self.title = video.title
|
||||
|
||||
|
||||
def get_video(url):
|
||||
html = downloader.read_html(url)
|
||||
soup = Soup(html)
|
||||
|
||||
view = soup.find('div', id='player-container-fluid')
|
||||
src_best = None
|
||||
res_best = -1
|
||||
for source in view.findAll('source'):
|
||||
src = urljoin(url, source.attrs['src'])
|
||||
res = re.find('([0-9]+)p', source.attrs['title'])
|
||||
res = int(res) if res else 0
|
||||
if res > res_best:
|
||||
src_best = src
|
||||
res_best = res
|
||||
|
||||
if src_best is None:
|
||||
raise Exception('No source')
|
||||
|
||||
title = soup.find('h1').text.strip()
|
||||
id = soup.find('div', id='video').attrs['data-id']
|
||||
|
||||
url_thumb = soup.find('meta', {'property': 'og:image'}).attrs['content']
|
||||
|
||||
#src_best = downloader.real_url(src_best)
|
||||
|
||||
video = Video(src_best, url_thumb, url, title, id)
|
||||
return video
|
||||
|
||||
|
||||
class Video(object):
|
||||
def __init__(self, url, url_thumb, referer, title, id):
|
||||
self.title = title
|
||||
self.filename = format_filename(title, id, '.mp4')
|
||||
self.url = LazyUrl(referer, lambda x: url, self)
|
||||
|
||||
self.thumb = BytesIO()
|
||||
self.url_thumb = url_thumb
|
||||
downloader.download(url_thumb, buffer=self.thumb)
|
||||
|
||||
|
||||
@try_n(2)
|
||||
def get_session(url, cw=None):
|
||||
session = Session()
|
||||
clf2.solve(url, session=session, cw=cw)
|
||||
return session
|
||||
|
|
@ -0,0 +1,165 @@
|
|||
#coding:utf8
|
||||
import downloader
|
||||
from utils import Soup, urljoin, LazyUrl, Downloader, try_n, Session, clean_title, get_print
|
||||
import os
|
||||
from translator import tr_
|
||||
import page_selector
|
||||
import clf2
|
||||
import utils
|
||||
import base64
|
||||
from image_reader import QPixmap
|
||||
|
||||
|
||||
class Image(object):
|
||||
def __init__(self, url, page, p):
|
||||
self._url = url
|
||||
self.url = LazyUrl(page.url, self.get, self)#, pp=self.pp)
|
||||
ext = os.path.splitext(url)[1]
|
||||
if ext.lower()[1:] not in ['jpg', 'jpeg', 'bmp', 'png', 'gif', 'webm', 'webp']:
|
||||
ext = '.jpg'
|
||||
self.filename = u'{}/{:04}{}'.format(page.title, p, ext)
|
||||
|
||||
def get(self, _):
|
||||
return self._url
|
||||
|
||||
## def pp(self, filename):
|
||||
## pixmap = QPixmap(filename)
|
||||
## pixmap.save(filename)
|
||||
## return filename
|
||||
|
||||
|
||||
class Page(object):
|
||||
def __init__(self, title, url):
|
||||
self.title = clean_title(title)
|
||||
self.url = url
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_lhscan(Downloader):
|
||||
type = 'lhscan'
|
||||
URLS = ['lhscan.net', 'loveheaven.net', 'lovehug.net']
|
||||
MAX_CORE = 16
|
||||
display_name = 'LHScan'
|
||||
_soup = None
|
||||
|
||||
def init(self):
|
||||
self.url = self.url.replace('lhscan.net', 'loveheaven.net')
|
||||
self.session = Session()
|
||||
#clf2.solve(self.url, session=self.session, cw=self.cw)
|
||||
soup = self.soup
|
||||
if not soup.find('ul', class_='manga-info'):
|
||||
self.Invalid(u'{}: {}'.format(tr_(u'목록 주소를 입력해주세요'), self.url))
|
||||
|
||||
@property
|
||||
def soup(self):
|
||||
if self._soup is None:
|
||||
for try_ in range(8):
|
||||
try:
|
||||
html = downloader.read_html(self.url, session=self.session)
|
||||
break
|
||||
except Exception as e:
|
||||
print(e)
|
||||
else:
|
||||
raise
|
||||
self._soup = Soup(html)
|
||||
return self._soup
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
title = self.soup.findAll('span', {'itemprop': 'name'})[-1].text.strip()
|
||||
return clean_title(title)
|
||||
|
||||
def read(self):
|
||||
self.title = tr_(u'읽는 중... {}').format(self.name)
|
||||
|
||||
imgs = get_imgs(self.url, self.name, self.session, self.soup, self.cw)
|
||||
|
||||
for img in imgs:
|
||||
self.urls.append(img.url)
|
||||
|
||||
self.title = self.name
|
||||
|
||||
|
||||
@try_n(8)
|
||||
def get_imgs_page(page, session, cw=None):
|
||||
print_ = get_print(cw)
|
||||
print_(page.title)
|
||||
html = downloader.read_html(page.url, session=session)
|
||||
soup = Soup(html)
|
||||
|
||||
view = soup.find('div', class_='chapter-content')
|
||||
|
||||
if not view:
|
||||
raise Exception('no chapter-content')
|
||||
|
||||
imgs = []
|
||||
for img in soup.findAll('img', class_='chapter-img'):
|
||||
src = img.get('data-pagespeed-lazy-src') or img.get('data-src') or img.get('data-srcset') or img.get('data-aload') or img['src']
|
||||
try:
|
||||
src = base64.b64decode(src).strip().decode('utf8')
|
||||
except:
|
||||
pass
|
||||
src = urljoin(page.url, src)
|
||||
if 'Credit_LHScan_' in src or '5e1ad960d67b2_5e1ad962338c7' in src:
|
||||
continue
|
||||
if 'fe132b3d32acc39f5adcea9075bedad4LoveHeaven' in src:
|
||||
continue
|
||||
if 'LoveHug_600cfd96e98ff.jpg' in src:
|
||||
continue
|
||||
img = Image(src.strip(), page, len(imgs))
|
||||
imgs.append(img)
|
||||
|
||||
return imgs
|
||||
|
||||
|
||||
def get_pages(url, session, soup=None, cw=None):
|
||||
if soup is None:
|
||||
html = downloader.read_html(url, session=session)
|
||||
soup = Soup(html)
|
||||
|
||||
tab = soup.find('ul', class_='list-chapters')
|
||||
|
||||
pages = []
|
||||
for li in tab.findAll('li'):
|
||||
text = li.find('div', class_='chapter-name').text.strip()
|
||||
href = li.parent['href']
|
||||
href = urljoin(url, href)
|
||||
page = Page(text, href)
|
||||
pages.append(page)
|
||||
|
||||
if not pages:
|
||||
raise Exception('no pages')
|
||||
|
||||
return pages[::-1]
|
||||
|
||||
|
||||
@page_selector.register('lhscan')
|
||||
@try_n(4)
|
||||
def f(url):
|
||||
session = Session()
|
||||
#clf2.solve(url, session=session)
|
||||
pages = get_pages(url, session)
|
||||
return pages
|
||||
|
||||
|
||||
@try_n(2)
|
||||
def get_imgs(url, title, session, soup=None, cw=None):
|
||||
if soup is None:
|
||||
html = downloader.read_html(url, session=session)
|
||||
soup = Soup(html)
|
||||
|
||||
pages = get_pages(url, session, soup, cw)
|
||||
pages = page_selector.filter(pages, cw)
|
||||
|
||||
imgs = []
|
||||
for i, page in enumerate(pages):
|
||||
imgs += get_imgs_page(page, session, cw)
|
||||
s = u'{} {} / {} ({} / {})'.format(tr_(u'읽는 중...'), title, page.title, i+1, len(pages))
|
||||
if cw is not None:
|
||||
if not cw.alive:
|
||||
return
|
||||
cw.setTitle(s)
|
||||
else:
|
||||
print(s)
|
||||
|
||||
return imgs
|
|
@ -0,0 +1,119 @@
|
|||
import downloader
|
||||
from utils import Session, Downloader, get_ext, LazyUrl, get_print
|
||||
import ree as re
|
||||
import json
|
||||
from io import BytesIO
|
||||
from translator import tr_
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_likee(Downloader):
|
||||
type = 'likee'
|
||||
URLS = ['likee.video']
|
||||
single = True
|
||||
display_name = 'Likee'
|
||||
|
||||
def init(self):
|
||||
self.session = Session()
|
||||
|
||||
def read(self):
|
||||
info = get_info(self.url, self.session, self.cw)
|
||||
self.print_('type: {}'.format(info['type']))
|
||||
self.artist = info['artist']
|
||||
|
||||
if info['type'] != 'single':
|
||||
video = self.process_playlist(info['title'], info['videos'])
|
||||
else:
|
||||
video = info['videos'][0]
|
||||
video.url()
|
||||
self.urls.append(video.url)
|
||||
self.title = info['title']
|
||||
|
||||
thumb = BytesIO()
|
||||
downloader.download(video.url_thumb, referer=self.url, buffer=thumb)
|
||||
self.setIcon(thumb)
|
||||
|
||||
|
||||
def get_info(url, session, cw=None):
|
||||
print_ = get_print(cw)
|
||||
|
||||
info = {}
|
||||
info['videos'] = []
|
||||
|
||||
if '/video/' in url:
|
||||
info['type'] = 'single'
|
||||
video = Video(url, session)
|
||||
video.url()
|
||||
info['videos'].append(video)
|
||||
info['title'] = video.id_
|
||||
info['artist'] = video.artist
|
||||
return info
|
||||
|
||||
info['type'] = 'channel'
|
||||
html = downloader.read_html(url, session=session)
|
||||
data_raw = html.split('window.data = ')[1].split('};')[0]+'}'
|
||||
data = json.loads(data_raw)
|
||||
info['uid'] = data['userinfo']['uid']
|
||||
info['username'] = data['userinfo']['yyuid']
|
||||
info['artist'] = data['userinfo']['nick_name']
|
||||
info['title'] = '{} (likee_{})'.format(info['artist'], info['username'])
|
||||
|
||||
lastPostId = ''
|
||||
urls = set()
|
||||
while True:
|
||||
url_api = 'https://likee.video/official_website/VideoApi/getUserVideo'
|
||||
r = session.post(url_api, data={'uid': info['uid'], 'count': '30', 'lastPostId': lastPostId})
|
||||
data = json.loads(r.text)
|
||||
|
||||
videos = data['data']['videoList']
|
||||
if not videos:
|
||||
break
|
||||
|
||||
for data in videos:
|
||||
url_post = 'https://likee.video/@{}/video/{}'.format(data['likeeId'], data['postId'])
|
||||
if url_post in urls:
|
||||
print_('duplicate: {}'.format(url_post))
|
||||
continue
|
||||
urls.add(url_post)
|
||||
video = Video(url_post, session, data)
|
||||
video.url()
|
||||
info['videos'].append(video)
|
||||
lastPostId = data['postId']
|
||||
|
||||
msg = '{} {} - {}'.format(tr_('읽는 중...'), info['title'], len(info['videos']))
|
||||
if cw:
|
||||
if not cw.alive:
|
||||
return
|
||||
cw.setTitle(msg)
|
||||
else:
|
||||
print(msg)
|
||||
|
||||
return info
|
||||
|
||||
|
||||
class Video(object):
|
||||
def __init__(self, url, session, data=None):
|
||||
self.id_ = re.find('/video/([0-9]+)', url, err='no id')
|
||||
self._session = session
|
||||
self._data = data
|
||||
self.url = LazyUrl(url, self.get, self)
|
||||
|
||||
def get(self, url):
|
||||
if self._data:
|
||||
video = self._data
|
||||
else:
|
||||
url_api = 'https://likee.video/official_website/VideoApi/getVideoInfo'
|
||||
r = self._session.post(url_api, data={'postIds': str(self.id_)})
|
||||
|
||||
data = json.loads(r.text)
|
||||
video = data['data']['videoList'][0]
|
||||
|
||||
url_video = video['videoUrl']
|
||||
self.url_thumb = video['coverUrl']
|
||||
self.artist = video['nickname']
|
||||
ext = get_ext(url_video)
|
||||
self.title = self.id_
|
||||
self.filename = '{}{}'.format(self.id_, ext)
|
||||
|
||||
return url_video
|
||||
|
|
@ -0,0 +1,145 @@
|
|||
#coding:utf8
|
||||
import downloader
|
||||
from utils import Soup, Downloader, LazyUrl, urljoin, try_n, get_outdir, clean_title
|
||||
import ree as re
|
||||
import os
|
||||
from timee import sleep
|
||||
from translator import tr_
|
||||
from io import BytesIO
|
||||
import json
|
||||
|
||||
|
||||
class Image(object):
|
||||
def __init__(self, item, referer):
|
||||
self.item = item
|
||||
self.id = str(item['id'])
|
||||
self.referer = referer
|
||||
self.url = LazyUrl(referer, self.get, self)
|
||||
|
||||
def get(self, url):
|
||||
img = urljoin(url, self.item['url_to_original'])
|
||||
ext = os.path.splitext(img.split('?')[0])[1]
|
||||
self.filename = u'{}{}'.format(self.id, ext)
|
||||
return img
|
||||
|
||||
|
||||
class Video(object):
|
||||
def __init__(self, url, title, url_thumb):
|
||||
self.url = url
|
||||
self.title = title
|
||||
ext = os.path.splitext(url.split('?')[0])[1]
|
||||
self.filename = u'{}{}'.format(clean_title(title), ext)
|
||||
self.url_thumb = url_thumb
|
||||
self.thumb = BytesIO()
|
||||
downloader.download(self.url_thumb, buffer=self.thumb)
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_luscious(Downloader):
|
||||
type = 'luscious'
|
||||
URLS = ['luscious.net']
|
||||
MAX_CORE = 4
|
||||
|
||||
@classmethod
|
||||
def fix_url(cls, url):
|
||||
url = url.replace('members.luscious.', 'www.luscious.')
|
||||
return url
|
||||
|
||||
def read(self):
|
||||
url = fix_url(self.url)
|
||||
for try_ in range(8):
|
||||
try:
|
||||
html = downloader.read_html(url)
|
||||
break
|
||||
except Exception as e:
|
||||
print(e)
|
||||
self.print_('retry...')
|
||||
else:
|
||||
raise
|
||||
soup = Soup(html)
|
||||
title = clean_title(get_title(soup))
|
||||
|
||||
self.title = tr_(u'읽는 중... {}').format(title)
|
||||
|
||||
if '/videos/' in url:
|
||||
video = get_video(url, soup)
|
||||
imgs = [video]
|
||||
self.setIcon(video.thumb)
|
||||
else:
|
||||
imgs = get_imgs(url, soup, self.cw)
|
||||
|
||||
dir = os.path.join(get_outdir(self.type), title)
|
||||
names = {}
|
||||
try:
|
||||
for name in os.listdir(dir):
|
||||
id = os.path.splitext(name)[0]
|
||||
names[id] = name
|
||||
except:
|
||||
pass
|
||||
|
||||
for img in imgs:
|
||||
if img.id in names:
|
||||
url = os.path.join(dir, names[img.id])
|
||||
else:
|
||||
url = img.url
|
||||
self.urls.append(url)
|
||||
|
||||
self.title = title#
|
||||
|
||||
|
||||
def update(cw, title, imgs):
|
||||
s = u'{} {} ({})'.format(tr_(u'읽는 중...'), title, len(imgs))
|
||||
if cw is not None:
|
||||
cw.setTitle(s)
|
||||
else:
|
||||
print(s)
|
||||
|
||||
def fix_url(url):
|
||||
url = re.sub(r'[^./]+\.luscious', 'legacy.luscious', url)
|
||||
return url
|
||||
|
||||
def get_imgs(url, soup=None, cw=None):
|
||||
url = fix_url(url)
|
||||
if soup is None:
|
||||
html = downloader.read_html(url)
|
||||
soup = Soup(html)
|
||||
title = get_title(soup)
|
||||
|
||||
imgs = []
|
||||
for p in range(1, 81):
|
||||
imgs_new = get_imgs_p(url, p)
|
||||
if not imgs_new:
|
||||
break
|
||||
imgs += imgs_new
|
||||
update(cw, title, imgs)
|
||||
return imgs
|
||||
|
||||
|
||||
@try_n(4, sleep=30)
|
||||
def get_imgs_p(url, p=1):
|
||||
id = re.find('/albums/[^/]+?([0-9]+)/', url+'/')
|
||||
print(url, id)
|
||||
url_api = 'https://api.luscious.net/graphql/nobatch/?operationName=AlbumListOwnPictures&query=+query+AlbumListOwnPictures%28%24input%3A+PictureListInput%21%29+%7B+picture+%7B+list%28input%3A+%24input%29+%7B+info+%7B+...FacetCollectionInfo+%7D+items+%7B+...PictureStandardWithoutAlbum+%7D+%7D+%7D+%7D+fragment+FacetCollectionInfo+on+FacetCollectionInfo+%7B+page+has_next_page+has_previous_page+total_items+total_pages+items_per_page+url_complete+%7D+fragment+PictureStandardWithoutAlbum+on+Picture+%7B+__typename+id+title+created+like_status+number_of_comments+number_of_favorites+status+width+height+resolution+aspect_ratio+url_to_original+url_to_video+is_animated+position+tags+%7B+category+text+url+%7D+permissions+url+thumbnails+%7B+width+height+size+url+%7D+%7D+&variables=%7B%22input%22%3A%7B%22filters%22%3A%5B%7B%22name%22%3A%22album_id%22%2C%22value%22%3A%22{}%22%7D%5D%2C%22display%22%3A%22position%22%2C%22page%22%3A{}%7D%7D'.format(id, p)
|
||||
data_raw = downloader.read_html(url_api, referer=url)
|
||||
data = json.loads(data_raw)
|
||||
has_next_page = data['data']['picture']['list']['info']['has_next_page']
|
||||
imgs = []
|
||||
for item in data['data']['picture']['list']['items']:
|
||||
img = Image(item, url)
|
||||
imgs.append(img)
|
||||
|
||||
return imgs
|
||||
|
||||
|
||||
def get_video(url, soup):
|
||||
url_thumb = soup.find('meta', {'property': 'og:image'}).attrs['content']
|
||||
|
||||
title = re.find('videos/([^/]+)', url)
|
||||
video = soup.find('video')
|
||||
url = video.source.attrs['src']
|
||||
video = Video(url, title, url_thumb)
|
||||
return video
|
||||
|
||||
|
||||
def get_title(soup):
|
||||
return soup.find('h2').text.strip()
|
|
@ -0,0 +1,33 @@
|
|||
from utils import Downloader, LazyUrl, clean_title
|
||||
from m3u8_tools import playlist2stream, M3u8_stream
|
||||
import os
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_m3u8(Downloader):
|
||||
type = 'm3u8'
|
||||
URLS = ['.m3u8']
|
||||
single = True
|
||||
display_name = 'M3U8'
|
||||
|
||||
def init(self):
|
||||
if '://' not in self.url:
|
||||
self.url = 'http://' + self.url
|
||||
|
||||
def read(self):
|
||||
video = Video(self.url)
|
||||
|
||||
self.urls.append(video.url)
|
||||
|
||||
self.title = video.title
|
||||
|
||||
|
||||
class Video(object):
|
||||
def __init__(self, url):
|
||||
try:
|
||||
m = playlist2stream(url)
|
||||
except:
|
||||
m = M3u8_stream(url)
|
||||
self.url = LazyUrl(url, lambda _: m, self)
|
||||
self.title = os.path.splitext(os.path.basename(url))[0]
|
||||
self.filename = clean_title(self.title, n=-4) + '.mp4'
|
|
@ -0,0 +1,211 @@
|
|||
#coding:utf8
|
||||
import downloader
|
||||
from utils import Soup, urljoin, LazyUrl, Downloader, query_url, try_n, Session, get_print, clean_title
|
||||
import os
|
||||
from translator import tr_
|
||||
from timee import sleep
|
||||
import requests
|
||||
import ree as re
|
||||
import clf2#
|
||||
|
||||
|
||||
class Image(object):
|
||||
def __init__(self, url, p, page):
|
||||
ext = os.path.splitext(url)[1]
|
||||
if ext.lower()[1:] not in ['jpg', 'jpeg', 'bmp', 'png', 'gif', 'webm', 'webp']:
|
||||
ext = '.jpg'
|
||||
self.filename = u'{:04}{}'.format(p, ext)
|
||||
if page.title is not None:
|
||||
self.filename = u'{}/{}'.format(page.title, self.filename)
|
||||
def f(_):
|
||||
return url
|
||||
self.url = LazyUrl(page.url, f, self)
|
||||
|
||||
|
||||
class Page(object):
|
||||
def __init__(self, title, url, soup=None):
|
||||
self.title = clean_title(title)
|
||||
self.url = url
|
||||
self.soup = soup
|
||||
|
||||
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_mrm(Downloader):
|
||||
type = 'mrm'
|
||||
URLS = ['myreadingmanga.info']
|
||||
_soup = None
|
||||
MAX_CORE = 16
|
||||
display_name = 'MyReadingManga'
|
||||
|
||||
def init(self):
|
||||
self.session = get_session(self.url, self.cw)
|
||||
|
||||
@classmethod
|
||||
def fix_url(cls, url):
|
||||
return re.find('https?://myreadingmanga.info/[^/]+', url, err='err')
|
||||
|
||||
@property
|
||||
def soup(self):
|
||||
if self._soup is None:
|
||||
for try_ in range(8):
|
||||
try:
|
||||
html = read_html(self.url, session=self.session, cw=self.cw)
|
||||
break
|
||||
except Exception as e:
|
||||
e_ = e
|
||||
self.print_(e)
|
||||
else:
|
||||
raise e_
|
||||
self._soup = Soup(html)
|
||||
return self._soup
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
title = get_title(self.soup)
|
||||
return title
|
||||
|
||||
def read(self):
|
||||
self.title = u'읽는 중... {}'.format(self.name)
|
||||
|
||||
imgs = get_imgs(self.url, self.soup, self.session, self.cw)
|
||||
|
||||
for img in imgs:
|
||||
self.urls.append(img.url)
|
||||
|
||||
self.title = self.name
|
||||
|
||||
|
||||
def get_title(soup):
|
||||
title = soup.find('h1', class_='entry-title').text.strip()
|
||||
title = fix_title(title)
|
||||
title = clean_title(title)
|
||||
return title
|
||||
|
||||
|
||||
def get_imgs(url, soup=None, session=None, cw=None):
|
||||
if soup is None:
|
||||
html = read_html(url, session=session, cw=cw)
|
||||
soup = Soup(html)
|
||||
|
||||
title = get_title(soup)
|
||||
|
||||
pagination = soup.find('div', class_='pagination')
|
||||
|
||||
if pagination is None:
|
||||
page = Page(None, url, soup)
|
||||
imgs = get_imgs_page(page, session=session)
|
||||
else:
|
||||
pages = get_pages(url, soup, session=session)
|
||||
imgs = []
|
||||
for i, page in enumerate(pages):
|
||||
s = u'{} {} / {} ({} / {})'.format(tr_(u'읽는 중...'), title, page.title, i+1, len(pages))
|
||||
|
||||
if cw:
|
||||
if not cw.alive:
|
||||
return
|
||||
cw.setTitle(s)
|
||||
else:
|
||||
print(s)
|
||||
|
||||
imgs += get_imgs_page(page, session=session)
|
||||
|
||||
if not imgs:
|
||||
raise Exception('no imgs')
|
||||
|
||||
return imgs
|
||||
|
||||
|
||||
def get_pages(url, soup=None, session=None):
|
||||
if soup is None:
|
||||
html = read_html(url, session=session, cw=None)
|
||||
soup = Soup(html)
|
||||
pagination = soup.find('div', class_='pagination')
|
||||
|
||||
pages = []
|
||||
hrefs = set()
|
||||
for a in pagination.findAll('a'):
|
||||
href = a.attrs.get('href', '')
|
||||
href = urljoin(url, href)
|
||||
if not href.startswith(url):
|
||||
print('not match', href)
|
||||
continue
|
||||
while href.endswith('/'):
|
||||
href = href[:-1]
|
||||
if href in hrefs:
|
||||
print('duplicate', href)
|
||||
continue
|
||||
hrefs.add(href)
|
||||
text = a.text.strip()
|
||||
page = Page(text, href)
|
||||
pages.append(page)
|
||||
|
||||
if url not in hrefs:
|
||||
page = Page('1', url, soup)
|
||||
pages.insert(0, page)
|
||||
|
||||
return pages
|
||||
|
||||
|
||||
@try_n(4)
|
||||
def get_imgs_page(page, session=None):
|
||||
url = page.url
|
||||
soup = page.soup
|
||||
if soup is None:
|
||||
html = read_html(url, session=session, cw=None)
|
||||
soup = Soup(html)
|
||||
page.soup = soup
|
||||
|
||||
view = soup.find('div', class_='entry-content')
|
||||
|
||||
imgs = []
|
||||
for img in view.findAll('img'):
|
||||
img = img.attrs.get('data-lazy-src') or img.attrs.get('data-src')
|
||||
if img is None:
|
||||
continue
|
||||
img = urljoin(url, img)
|
||||
img = Image(img, len(imgs), page)
|
||||
imgs.append(img)
|
||||
print(page.title, len(imgs), page.url)
|
||||
|
||||
return imgs
|
||||
|
||||
|
||||
def fix_title(title):
|
||||
title = re.sub(r'\(?[^()]*?c\.[^() ]+\)?', '', title)
|
||||
while ' ' in title:
|
||||
title = title.replace(' ', ' ')
|
||||
return title
|
||||
|
||||
|
||||
def read_html(url, session, cw):
|
||||
## html = downloader.read_html(url, session=session)
|
||||
## soup = Soup(html)
|
||||
##
|
||||
## cf = soup.find('div', class_='cf-browser-verification')
|
||||
## if cf is None:
|
||||
## return html
|
||||
|
||||
r = clf2.solve(url, cw=cw, session=session)
|
||||
|
||||
return r['html']
|
||||
|
||||
|
||||
@try_n(4)
|
||||
def get_session(url, cw=None):
|
||||
print_ = get_print(cw)
|
||||
## html = downloader.read_html(url)
|
||||
## soup = Soup(html)
|
||||
##
|
||||
## cf = soup.find('div', class_='cf-browser-verification')
|
||||
## if cf is None:
|
||||
## print_('no cf protection')
|
||||
## return None
|
||||
|
||||
print_('cf protection')
|
||||
r = clf2.solve(url, cw=cw)
|
||||
session = r['session']
|
||||
|
||||
return session
|
||||
|
|
@ -0,0 +1,170 @@
|
|||
#coding:utf-8
|
||||
import downloader
|
||||
import re
|
||||
from utils import urljoin, Downloader, Soup, LazyUrl, clean_title
|
||||
import json
|
||||
from timee import sleep
|
||||
import collections
|
||||
PATTERNS = ['.*blog.naver.com/(?P<username>.+)/(?P<pid>[0-9]+)',
|
||||
'.*blog.naver.com/.+?blogId=(?P<username>[^&]+).+?logNo=(?P<pid>[0-9]+)',
|
||||
'.*?(?P<username>[0-9a-zA-Z_-]+)\.blog\.me/(?P<pid>[0-9]+)']
|
||||
HDR = {
|
||||
'Accept': 'text/html, application/xhtml+xml, image/jxr, */*',
|
||||
'Accept-Encoding': 'gzip, deflate',
|
||||
'Accept-Language': 'ko, en-US; q=0.7, en; q=0.3',
|
||||
'Connection': 'Keep-Alive',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393',
|
||||
}
|
||||
|
||||
def get_id(url):
|
||||
for pattern in PATTERNS:
|
||||
m = re.match(pattern, url)
|
||||
if m is None:
|
||||
continue
|
||||
username = m.group('username')
|
||||
pid = m.group('pid')
|
||||
break
|
||||
else:
|
||||
username, pid = None, None
|
||||
return username, pid
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_naver(Downloader):
|
||||
type = 'naver'
|
||||
URLS = ['blog.naver.', '.blog.me']
|
||||
display_name = 'Naver Blog'
|
||||
|
||||
def init(self):
|
||||
username, pid = get_id(self.url)
|
||||
if username is None:
|
||||
return self.Invalid('Invalid format')
|
||||
self.url = 'https://blog.naver.com/{}/{}'.format(username, pid)
|
||||
self.headers = {'User-Agent': downloader.hdr['User-Agent']}
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
username, pid = get_id(self.url)
|
||||
return clean_title(u'{}/{}'.format(username, pid))
|
||||
|
||||
def read(self):
|
||||
self.title = u'읽는 중... {}'.format(self.name)
|
||||
|
||||
imgs = get_imgs(self.url)
|
||||
|
||||
for img in imgs:
|
||||
self.urls.append(img.url)
|
||||
|
||||
self.title = self.name
|
||||
|
||||
|
||||
class Image(object):
|
||||
def __init__(self, url):
|
||||
self.url = url
|
||||
|
||||
|
||||
class Video(object):
|
||||
def __init__(self, url, referer, p):
|
||||
self.url = LazyUrl(referer, lambda _: url, self)
|
||||
self.filename = 'video_{}.mp4'.format(p)
|
||||
|
||||
|
||||
def read_page(url, depth=0):
|
||||
print('read_page', url, depth)
|
||||
if depth > 10:
|
||||
raise Exception('Too deep')
|
||||
html = downloader.read_html(url, header=HDR)
|
||||
|
||||
if len(html) < 5000:
|
||||
id = re.findall('logNo=([0-9]+)', html)[0]
|
||||
usernames = re.findall('blog.naver.com/([0-9a-zA-Z]+)', url)
|
||||
if not usernames:
|
||||
usernames = re.findall('blogId=([0-9a-zA-Z]+)', url)
|
||||
username = usernames[0]
|
||||
url = 'https://m.blog.naver.com/PostView.nhn?blogId={}&logNo={}&proxyReferer='.format(username, id)
|
||||
print('###', username, id, url)
|
||||
|
||||
soup = Soup(html)
|
||||
if soup.find('div', {'id': 'viewTypeSelector'}):
|
||||
return url, soup
|
||||
frame = soup.find('frame')
|
||||
if frame is None:
|
||||
print('frame is None')
|
||||
return read_page(url, depth+1)
|
||||
return read_page(urljoin('https://blog.naver.com', frame.attrs['src']), depth+1)
|
||||
|
||||
|
||||
|
||||
def get_imgs(url):
|
||||
url = url.replace('blog.naver', 'm.blog.naver')
|
||||
url_frame, soup = read_page(url)
|
||||
|
||||
imgs = []
|
||||
urls = set()
|
||||
view = soup.find('div', {'id': 'viewTypeSelector'})
|
||||
print('view', view is not None)
|
||||
|
||||
imgs_ = view.findAll('span', class_='_img') + view.findAll('img')
|
||||
|
||||
for img in imgs_:
|
||||
url = img.attrs.get('src', None)
|
||||
if url is None:
|
||||
url = img.attrs.get('thumburl', None)
|
||||
if url is None:
|
||||
print(u'invalid img: {}'.format(url))
|
||||
continue
|
||||
|
||||
if 'ssl.pstatic.net' in url: #
|
||||
continue
|
||||
|
||||
if 'blogpfthumb-phinf.pstatic.net' in url: # profile
|
||||
continue
|
||||
|
||||
if 'dthumb-phinf.pstatic.net' in url: # link
|
||||
continue
|
||||
|
||||
if 'storep-phinf.pstatic.net' in url: # emoticon
|
||||
continue
|
||||
|
||||
url = url.replace('mblogthumb-phinf', 'blogfiles')
|
||||
#url = re.sub('\?type=[a-zA-Z0-9]*', '?type=w1@2x', url)
|
||||
#url = re.sub('\?type=[a-zA-Z0-9]*', '', url)
|
||||
url = url.split('?')[0]
|
||||
|
||||
if url in urls:
|
||||
print('### Duplicate:', url)
|
||||
continue
|
||||
|
||||
urls.add(url)
|
||||
#url = url.split('?type=')[0]
|
||||
img = Image(url)
|
||||
imgs.append(img)
|
||||
|
||||
pairs = []
|
||||
|
||||
for video in soup.findAll('span', class_='_naverVideo'):
|
||||
vid = video.attrs['vid']
|
||||
key = video.attrs['key']
|
||||
pairs.append((vid, key))
|
||||
|
||||
for script in soup.findAll('script', class_='__se_module_data'):
|
||||
data_raw = script['data-module']
|
||||
data = json.loads(data_raw)['data']
|
||||
vid = data.get('vid')
|
||||
if not vid:
|
||||
continue
|
||||
key = data['inkey']
|
||||
pairs.append((vid, key))
|
||||
|
||||
videos = []
|
||||
for vid, key in pairs:
|
||||
url_api = 'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{}?key={}'.format(vid, key)
|
||||
data_raw = downloader.read_html(url_api)
|
||||
data = json.loads(data_raw)
|
||||
fs = data['videos']['list']
|
||||
fs = sorted(fs, key=lambda f: f['size'], reverse=True)
|
||||
video = Video(fs[0]['source'], url_frame, len(videos))
|
||||
videos.append(video)
|
||||
|
||||
return imgs + videos
|
||||
|
|
@ -0,0 +1,244 @@
|
|||
# uncompyle6 version 3.5.0
|
||||
# Python bytecode 2.7 (62211)
|
||||
# Decompiled from: Python 2.7.16 (v2.7.16:413a49145e, Mar 4 2019, 01:30:55) [MSC v.1500 32 bit (Intel)]
|
||||
# Embedded file name: navertoon_downloader.pyo
|
||||
# Compiled at: 2019-10-03 10:19:35
|
||||
import downloader
|
||||
from utils import Soup, urljoin, Downloader, LazyUrl, get_imgs_already, clean_title, get_ext, get_print
|
||||
from constants import try_n
|
||||
import ree as re, os
|
||||
from timee import sleep
|
||||
import page_selector
|
||||
from translator import tr_
|
||||
import json
|
||||
|
||||
|
||||
class Page(object):
|
||||
|
||||
def __init__(self, url, title, p):
|
||||
self.url = url
|
||||
self.title = title
|
||||
self.p = p
|
||||
|
||||
|
||||
class Image(object):
|
||||
|
||||
def __init__(self, url, page, p):
|
||||
ext = get_ext(url)
|
||||
self.filename = (u'{}/{:04}{}').format(clean_title(page.title), p, ext)
|
||||
|
||||
self.url = LazyUrl(page.url, lambda _: url, self)
|
||||
|
||||
|
||||
class Info(object):
|
||||
|
||||
def __init__(self, id, title, artist):
|
||||
self.id = id
|
||||
self.title = title
|
||||
self.artist = artist
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_navertoon(Downloader):
|
||||
type = 'navertoon'
|
||||
URLS = ['comic.naver.com']
|
||||
MAX_CORE = 8
|
||||
MAX_SPEED = 4.0
|
||||
display_name = 'Naver Webtoon'
|
||||
|
||||
def init(self):
|
||||
self.url = get_main(self.url)
|
||||
self.__info, _ = get_pages(self.url, self.cw)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
id = self.__info.id
|
||||
title = self.__info.title
|
||||
artist = self.__info.artist
|
||||
title = self.format_title('N/A', id, title, artist, 'N/A', 'N/A', 'Korean', prefix='navertoon_')
|
||||
return clean_title(title)
|
||||
|
||||
def read(self):
|
||||
self.title = tr_(u'\uc77d\ub294 \uc911... {}').format(self.name)
|
||||
imgs = get_imgs_all(self.url, self.name, cw=self.cw)
|
||||
for img in imgs:
|
||||
if isinstance(img, Image):
|
||||
self.urls.append(img.url)
|
||||
else:
|
||||
self.urls.append(img)
|
||||
|
||||
self.title = self.name
|
||||
|
||||
|
||||
def get_main(url):
|
||||
url_main = re.sub('[?&]page=[0-9]+', '', re.sub('[?&]no=[0-9]+', '', url)).replace('detail.nhn', 'list.nhn').replace('m.comic.naver.', 'comic.naver.')
|
||||
while url_main.endswith('#'):
|
||||
url_main = url_main[:-1]
|
||||
|
||||
return url_main
|
||||
|
||||
|
||||
def set_no(url, p):
|
||||
if '&no=' not in url:
|
||||
url = url + ('&no={}').format(p)
|
||||
return url
|
||||
url = re.sub('&no=[0-9]+', ('&no={}').format(p), url)
|
||||
return url
|
||||
|
||||
|
||||
def get_id(url):
|
||||
return int(url.lower().split('titleid=')[1].split('&')[0])
|
||||
|
||||
|
||||
def set_page(url, p):
|
||||
if '&page=' in url:
|
||||
url = re.sub('&page=[0-9]+', ('&page={}').format(p), url)
|
||||
else:
|
||||
url += ('&page={}').format(p)
|
||||
return url
|
||||
|
||||
|
||||
@try_n(4)
|
||||
def get_pages(url, cw=None):
|
||||
print_ = get_print(cw)
|
||||
url = get_main(url).replace('comic.naver.', 'm.comic.naver.')
|
||||
id = get_id(url)
|
||||
print('id:', id)
|
||||
print(url)
|
||||
html = downloader.read_html(url)
|
||||
soup = Soup(html)
|
||||
try:
|
||||
info = soup.find('div', class_='area_info')
|
||||
artist = info.find('span', class_='author').text.strip()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
try:
|
||||
title = ('\n').join(soup.find('div', class_='title').text.strip().split('\n')[:-1]).strip()
|
||||
except:
|
||||
title = 'artist not found'
|
||||
|
||||
raise Exception(title)
|
||||
|
||||
print('artist:', artist)
|
||||
title = soup.find('meta', {'property': 'og:title'}).attrs['content']
|
||||
pages = []
|
||||
nos = set()
|
||||
for p in range(1, 100):
|
||||
if p == 1:
|
||||
url_page = url
|
||||
else:
|
||||
url_page = set_page(url, p)
|
||||
html = downloader.read_html(url_page)
|
||||
print('read page:', url_page)
|
||||
soup = Soup(html)
|
||||
view = soup.findAll('ul', class_='section_episode_list')[(-1)]
|
||||
for lst in view.findAll('li'):
|
||||
url_page = urljoin(url, lst.find('a').attrs['href'])
|
||||
if 'detail.nhn' not in url_page.lower():
|
||||
continue
|
||||
print_('url_page: {}'.format(url_page))
|
||||
text = lst.find('strong', class_='title').find('span', class_='name').text.strip()
|
||||
no = int(re.findall('[?&]no=([0-9]+)', url_page)[0])
|
||||
if no in nos:
|
||||
print('duplicate no: {}'.format(no))
|
||||
continue
|
||||
nos.add(no)
|
||||
text = '{:04} - {}'.format(no, text)
|
||||
page = Page(url_page, text, p)
|
||||
pages.append(page)
|
||||
|
||||
btn_next = soup.find('a', class_='btn_next')
|
||||
if btn_next is None or btn_next.attrs['href'] == '#':
|
||||
print('end of page')
|
||||
break
|
||||
|
||||
info = Info(id, title, artist)
|
||||
return (
|
||||
info, pages)
|
||||
|
||||
|
||||
@page_selector.register('navertoon')
|
||||
@try_n(4)
|
||||
def f(url):
|
||||
url = get_main(url)
|
||||
info, pages = get_pages(url)
|
||||
return pages
|
||||
|
||||
|
||||
@try_n(6)
|
||||
def get_imgs(page, cw=None):
|
||||
print_ = get_print(cw)
|
||||
html = downloader.read_html(page.url)
|
||||
soup = Soup(html)
|
||||
|
||||
type_ = re.find('''webtoonType *: *['"](.+?)['"]''', html)
|
||||
print_('type: {}'.format(type_))
|
||||
|
||||
imgs = []
|
||||
if type_ == 'DEFAULT': # https://m.comic.naver.com/webtoon/detail.nhn?titleId=715772
|
||||
view = soup.find('div', class_='toon_view_lst')
|
||||
for img in view.findAll('img'):
|
||||
img = img.attrs.get('data-src')
|
||||
if not img:
|
||||
continue
|
||||
img = urljoin(page.url, img)
|
||||
img = Image(img, page, len(imgs))
|
||||
imgs.append(img)
|
||||
elif type_ == 'CUTTOON': # https://m.comic.naver.com/webtoon/detail.nhn?titleId=752803
|
||||
view = soup.find('div', class_='swiper-wrapper')
|
||||
for div in view.findAll('div', class_='swiper-slide'):
|
||||
if div.parent != view:
|
||||
continue
|
||||
if div.find('div', class_='cut_viewer_last'):
|
||||
print('cut_viewer_last')
|
||||
continue
|
||||
if div.find('div', class_='cut_viewer_recomm'):
|
||||
print('cut_viewer_recomm')
|
||||
continue
|
||||
img = div.find('img')
|
||||
img = img.attrs['data-src']
|
||||
img = urljoin(page.url, img)
|
||||
img = Image(img, page, len(imgs))
|
||||
imgs.append(img)
|
||||
elif type_ == 'EFFECTTOON': #2313; https://m.comic.naver.com/webtoon/detail.nhn?titleId=670144
|
||||
img_base = re.find('''imageUrl *: *['"](.+?)['"]''', html) + '/'
|
||||
print('img_base:', img_base)
|
||||
url_api = re.find('''documentUrl *: *['"](.+?)['"]''', html)
|
||||
data_raw = downloader.read_html(url_api, page.url)
|
||||
data = json.loads(data_raw)
|
||||
for img in data['assets']['stillcut'].values(): # ordered in python3.7+
|
||||
img = urljoin(img_base, img)
|
||||
img = Image(img, page, len(imgs))
|
||||
imgs.append(img)
|
||||
else:
|
||||
_imgs = re.findall('sImageUrl *: *[\'"](.+?)[\'"]', html)
|
||||
if not _imgs:
|
||||
raise Exception('no imgs')
|
||||
for img in _imgs:
|
||||
img = urljoin(page.url, img)
|
||||
img = Image(img, page, len(imgs))
|
||||
imgs.append(img)
|
||||
|
||||
return imgs
|
||||
|
||||
|
||||
def get_imgs_all(url, title, cw=None):
|
||||
print_ = get_print(cw)
|
||||
info, pages = get_pages(url, cw)
|
||||
pages = page_selector.filter(pages, cw)
|
||||
imgs = []
|
||||
for p, page in enumerate(pages):
|
||||
imgs_already = get_imgs_already('navertoon', title, page, cw)
|
||||
if imgs_already:
|
||||
imgs += imgs_already
|
||||
continue
|
||||
imgs_new = get_imgs(page, cw)
|
||||
print_('{}: {}'.format(page.title, len(imgs_new)))
|
||||
imgs += imgs_new
|
||||
if cw is not None:
|
||||
cw.setTitle(tr_(u'\uc77d\ub294 \uc911... {} / {} ({}/{})').format(title, page.title, p + 1, len(pages)))
|
||||
if not cw.alive:
|
||||
break
|
||||
|
||||
return imgs
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
import downloader
|
||||
import ree as re
|
||||
from io import BytesIO as IO
|
||||
import os
|
||||
from constants import try_n
|
||||
from error_printer import print_error
|
||||
from utils import Downloader, compatstr, LazyUrl, get_ext, format_filename, clean_title
|
||||
import ytdl
|
||||
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_navertv(Downloader):
|
||||
type = 'navertv'
|
||||
single = True
|
||||
URLS = ['tv.naver.com']
|
||||
display_name = 'Naver TV'
|
||||
|
||||
def init(self):
|
||||
if not re.match('https?://.+', self.url, re.IGNORECASE):
|
||||
self.url = 'https://tv.naver.com/v/{}'.format(self.url)
|
||||
|
||||
def read(self):
|
||||
video = Video(self.url)
|
||||
video.url()#
|
||||
|
||||
self.urls.append(video.url)
|
||||
self.setIcon(video.thumb)
|
||||
|
||||
self.enableSegment()
|
||||
|
||||
self.title = video.title
|
||||
|
||||
|
||||
|
||||
class Video(object):
|
||||
_url = None
|
||||
|
||||
def __init__(self, url):
|
||||
self.url = LazyUrl(url, self.get, self)
|
||||
|
||||
@try_n(4)
|
||||
def get(self, url):
|
||||
if self._url:
|
||||
return self._url
|
||||
|
||||
ydl = ytdl.YoutubeDL()
|
||||
info = ydl.extract_info(url)
|
||||
fs = [f for f in info['formats'] if f['protocol'] in ['http', 'https']]
|
||||
fs = sorted(fs, key=lambda f: int(f.get('width', 0)), reverse=True)
|
||||
if not fs:
|
||||
raise Exception('No MP4 videos')
|
||||
f = fs[0]
|
||||
self._url = f['url']
|
||||
|
||||
self.thumb_url = info['thumbnails'][0]['url']
|
||||
self.thumb = IO()
|
||||
downloader.download(self.thumb_url, buffer=self.thumb)
|
||||
self.title = info['title']
|
||||
id = info['id']
|
||||
ext = get_ext(self._url)
|
||||
self.filename = format_filename(self.title, id, ext)
|
||||
return self._url
|
|
@ -0,0 +1,97 @@
|
|||
#coding:utf8
|
||||
import downloader
|
||||
import nndownload
|
||||
from io import BytesIO
|
||||
import ree as re
|
||||
from utils import Downloader, get_print, compatstr, format_filename, clean_title, try_n
|
||||
from nico_login import login, logout
|
||||
|
||||
|
||||
def get_id(url):
|
||||
if '/watch/' in url:
|
||||
id = re.findall('/watch/([a-zA-Z0-9]+)', url)[0]
|
||||
else:
|
||||
id = url
|
||||
return id
|
||||
|
||||
|
||||
class Video(object):
|
||||
def __init__(self, session, info):
|
||||
self.session = session
|
||||
self.info = info
|
||||
self.url = info['url']
|
||||
self.title = info['title']
|
||||
self.ext = info['ext']
|
||||
self.id = info['id']
|
||||
|
||||
self.fileName = format_filename(self.title, self.id, self.ext)
|
||||
|
||||
self.url_thumb = info['thumbnail_url']
|
||||
print('thumb:', self.url_thumb)
|
||||
self.thumb = BytesIO()
|
||||
downloader.download(self.url_thumb, buffer=self.thumb)
|
||||
|
||||
def __repr__(self):
|
||||
return u'Video({})'.format(self.id)
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_nico(Downloader):
|
||||
type = 'nico'
|
||||
single = True
|
||||
URLS = ['nicovideo.jp']
|
||||
display_name = 'Niconico'
|
||||
|
||||
def init(self):
|
||||
if not re.match('https?://.+', self.url, re.IGNORECASE):
|
||||
self.url = 'https://www.nicovideo.jp/watch/{}'.format(self.url)
|
||||
|
||||
@property
|
||||
def id_(self):
|
||||
return get_id(self.url)
|
||||
|
||||
def read(self):
|
||||
ui_setting = self.ui_setting
|
||||
|
||||
if ui_setting.nicoBox.isChecked():
|
||||
username = compatstr(ui_setting.nico_id.text())
|
||||
password = compatstr(ui_setting.nico_pw.text())
|
||||
else:
|
||||
username = ''
|
||||
password = ''
|
||||
|
||||
try:
|
||||
session = login(username, password)
|
||||
except Exception as e:
|
||||
logout()
|
||||
return self.Invalid(u'Failed to login: {}'.format(self.url), fail=True)
|
||||
|
||||
self.session = session
|
||||
try:
|
||||
video = get_video(session, self.id_, cw=self.cw)
|
||||
except Exception as e:
|
||||
logout()
|
||||
raise
|
||||
|
||||
self.urls.append(video.url)
|
||||
self.filenames[video.url] = video.fileName
|
||||
self.setIcon(video.thumb)
|
||||
|
||||
self.enableSegment()
|
||||
|
||||
self.title = video.title
|
||||
|
||||
|
||||
@try_n(2)
|
||||
def get_video(session, id, cw=None):
|
||||
print_ = get_print(cw)
|
||||
|
||||
try:
|
||||
info = nndownload.request_video(session, id)
|
||||
except:
|
||||
raise Exception('Err')
|
||||
video = Video(session, info)
|
||||
|
||||
return video
|
||||
|
||||
|
|
@ -0,0 +1,164 @@
|
|||
#coding: utf-8
|
||||
import downloader
|
||||
from utils import Downloader, urljoin, get_max_range, query_url, Soup, Session, LazyUrl, get_print, clean_title, try_n, get_ext
|
||||
from translator import tr_
|
||||
from constants import clean_url
|
||||
import ree as re
|
||||
from errors import LoginRequired
|
||||
|
||||
|
||||
def get_id(url):
|
||||
return re.find('id=([0-9]+)', url)
|
||||
|
||||
|
||||
def get_name(soup):
|
||||
return soup.find('p', class_='user_icon').find('a', class_='name').text.strip()
|
||||
|
||||
|
||||
def isLogin(soup):
|
||||
if soup.find('ul', id="sub-menu"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_nijie(Downloader):
|
||||
type = 'nijie'
|
||||
URLS = ['nijie.info']
|
||||
MAX_CORE = 4
|
||||
display_name = 'ニジエ'
|
||||
|
||||
def init(self):
|
||||
if 'members.php' not in self.url and 'members_illust.php' not in self.url:
|
||||
raise NotImplementedError()
|
||||
id = get_id(self.url)
|
||||
html = downloader.read_html('https://nijie.info/members.php?id={}'.format(id))
|
||||
self.soup = Soup(html)
|
||||
|
||||
if not isLogin(self.soup):
|
||||
raise LoginRequired()
|
||||
|
||||
@classmethod
|
||||
def fix_url(cls, url):
|
||||
if 'nijie.info' not in url.lower():
|
||||
url = 'https://nijie.info/members.php?id={}'.format(url)
|
||||
return url.replace('http://', 'https://')
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
name = u'{} (nijie_{})'.format(get_name(self.soup), get_id(self.url))
|
||||
return clean_title(name)
|
||||
|
||||
def read(self):
|
||||
self.title = self.name
|
||||
|
||||
imgs = get_imgs(self.url, self.name, cw=self.cw)
|
||||
|
||||
for img in imgs:
|
||||
self.urls.append(img.url)
|
||||
|
||||
self.title = self.name
|
||||
|
||||
|
||||
|
||||
class Image(object):
|
||||
def __init__(self, id, url, p, lazy=True, img=None):
|
||||
self.id = id
|
||||
self.p = p
|
||||
if lazy:
|
||||
self.url = LazyUrl(url, self.get_single, self)
|
||||
else:
|
||||
self.url = LazyUrl(url, lambda _:img, self)
|
||||
ext = get_ext(img)
|
||||
self.filename = '{}_p{}{}'.format(id, p, ext)
|
||||
|
||||
def get_single(self, url): # single
|
||||
img = get_imgs_post(self.id, url)[0].url()
|
||||
ext = get_ext(img)
|
||||
self.filename = '{}_p{}{}'.format(self.id, self.p, ext)
|
||||
return img
|
||||
|
||||
|
||||
@try_n(8, sleep=10)
|
||||
def get_imgs_post(id, url):
|
||||
#print('get_imgs_post', id, url)
|
||||
html = downloader.read_html(url)
|
||||
soup = Soup(html)
|
||||
view = soup.find('div', id='gallery')
|
||||
imgs = []
|
||||
for img in view.findAll(class_='mozamoza'):
|
||||
url_img = urljoin(url, img['src'])
|
||||
url_img = re.sub('__rs_l[0-9]+x[0-9]+/', '', url_img)
|
||||
img = Image(id, url, len(imgs), False, url_img)
|
||||
imgs.append(img)
|
||||
return imgs
|
||||
|
||||
|
||||
def setPage(url, page):
|
||||
# Always use HTTPS
|
||||
url = url.replace('http://', 'https://')
|
||||
|
||||
# Change the page
|
||||
if 'p=' in url:
|
||||
url = re.sub('p=[0-9]*', 'p={}'.format(page), url)
|
||||
else:
|
||||
url += '&p={}'.format(page)
|
||||
|
||||
return url
|
||||
|
||||
|
||||
def get_imgs(url, title=None, cw=None):
|
||||
print_ = get_print(cw)
|
||||
url = clean_url(url)
|
||||
|
||||
id = get_id(url)
|
||||
url = u'https://nijie.info/members_illust.php?id={}'.format(id)
|
||||
|
||||
# Range
|
||||
max_pid = get_max_range(cw)
|
||||
|
||||
imgs = []
|
||||
url_imgs = set()
|
||||
for p in range(1, 1+100):
|
||||
url = setPage(url, p)
|
||||
print_(url)
|
||||
html = downloader.read_html(url)
|
||||
|
||||
soup = Soup(html)
|
||||
posts = soup.findAll('div', class_='nijie')
|
||||
if not posts:
|
||||
print('no posts')
|
||||
break
|
||||
c = 0
|
||||
for post in posts:
|
||||
url_img = urljoin(url, post.a.attrs['href'])
|
||||
if url_img in url_imgs:
|
||||
print('duplicate:', url_img)
|
||||
continue
|
||||
url_imgs.add(url_img)
|
||||
id = int(re.find('[?&]id=([0-9]+)', url_img))
|
||||
multi = post.find('div', class_='thumbnail-icon')
|
||||
if multi:
|
||||
imgs_ = get_imgs_post(id, url_img)#
|
||||
else:
|
||||
imgs_ = [Image(id, url_img, 0)]
|
||||
|
||||
imgs += imgs_
|
||||
c += 1
|
||||
|
||||
if len(imgs) >= max_pid:
|
||||
break
|
||||
|
||||
msg = u'{} {} - {}'.format(tr_(u'읽는 중...'), title, len(imgs))
|
||||
if cw:
|
||||
if not cw.alive:
|
||||
return
|
||||
cw.setTitle(msg)
|
||||
else:
|
||||
print(msg)
|
||||
|
||||
if len(imgs) >= max_pid or c == 0:
|
||||
break
|
||||
return imgs
|
||||
|
||||
|
|
@ -0,0 +1,109 @@
|
|||
import downloader
|
||||
from utils import Session, Soup, LazyUrl, get_print, Downloader, get_ext, try_n, format_filename, clean_title
|
||||
import ree as re
|
||||
import json
|
||||
from io import BytesIO
|
||||
|
||||
|
||||
|
||||
class EmbedUrlError(Exception): pass
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_pandoratv(Downloader):
|
||||
type = 'pandoratv'
|
||||
URLS = ['pandora.tv']
|
||||
single = True
|
||||
display_name = 'Pandora TV'
|
||||
|
||||
@classmethod
|
||||
def fix_url(cls, url):
|
||||
return url.split('#')[0]
|
||||
|
||||
def read(self):
|
||||
video = Video(self.url, format, cw=self.cw)
|
||||
try:
|
||||
video.url()#
|
||||
except EmbedUrlError as e:
|
||||
return self.Invalid(e.args[0])
|
||||
|
||||
self.urls.append(video.url)
|
||||
self.setIcon(video.thumb)
|
||||
|
||||
self.enableSegment()
|
||||
|
||||
self.title = video.title
|
||||
|
||||
|
||||
|
||||
def extract(name, html, cw=None):
|
||||
print_ = get_print(cw)
|
||||
value = re.find(r'''{} *= *['"](.*?)['"]'''.format(name), html)
|
||||
if value is None:
|
||||
value = json.loads(re.find(r'''{} *= *(\[.*?\])'''.format(name), html))
|
||||
print_('{}: {}'.format(name, value))
|
||||
if value is None:
|
||||
raise Exception('No {}'.format(name))
|
||||
return value
|
||||
|
||||
|
||||
class Video(object):
|
||||
_url_video = None
|
||||
|
||||
def __init__(self, url, format='title', cw=None):
|
||||
self.url = LazyUrl(url, self.get, self)
|
||||
self.format = format
|
||||
self.cw = cw
|
||||
|
||||
@try_n(2)
|
||||
def get(self, url):
|
||||
if self._url_video:
|
||||
return self._url_video
|
||||
cw = self.cw
|
||||
print_ = get_print(cw)
|
||||
html = downloader.read_html(url)
|
||||
soup = Soup(html)
|
||||
|
||||
embedUrl = extract('embedUrl', html, cw)
|
||||
if embedUrl:
|
||||
raise EmbedUrlError('[pandoratv] EmbedUrl: {}'.format(embedUrl))
|
||||
|
||||
uid = extract('strLocalChUserId', html, cw)
|
||||
pid = extract('nLocalPrgId', html, cw)
|
||||
fid = extract('strFid', html, cw)
|
||||
resolType = extract('strResolType', html, cw)
|
||||
resolArr = extract('strResolArr', html, cw)
|
||||
vodSvr = extract('nVodSvr', html, cw)
|
||||
resols = extract('nInfo', html, cw)
|
||||
runtime = extract('runtime', html, cw)
|
||||
|
||||
url_api = 'http://www.pandora.tv/external/getExternalApi/getVodUrl/'
|
||||
data = {
|
||||
'userId': uid,
|
||||
'prgId': pid,
|
||||
'fid': fid,
|
||||
'resolType': resolType,
|
||||
'resolArr': ','.join(map(str, resolArr)),
|
||||
'vodSvr': vodSvr,
|
||||
'resol': max(resols),
|
||||
'runtime': runtime,
|
||||
'tvbox': 'false',
|
||||
'defResol': 'true',
|
||||
'embed': 'false',
|
||||
}
|
||||
session = Session()
|
||||
r = session.post(url_api, headers={'Referer': url}, data=data)
|
||||
data = json.loads(r.text)
|
||||
self._url_video = data['src']
|
||||
|
||||
self.title = soup.find('meta', {'property': 'og:description'})['content']
|
||||
|
||||
ext = get_ext(self._url_video)
|
||||
self.filename = format_filename(self.title, pid, ext)
|
||||
|
||||
self.url_thumb = soup.find('meta', {'property': 'og:image'})['content']
|
||||
self.thumb = BytesIO()
|
||||
downloader.download(self.url_thumb, buffer=self.thumb)
|
||||
|
||||
return self._url_video
|
||||
|
|
@ -0,0 +1,216 @@
|
|||
# uncompyle6 version 3.5.0
|
||||
# Python bytecode 2.7 (62211)
|
||||
# Decompiled from: Python 2.7.16 (v2.7.16:413a49145e, Mar 4 2019, 01:30:55) [MSC v.1500 32 bit (Intel)]
|
||||
# Embedded file name: pinter_downloader.pyo
|
||||
# Compiled at: 2019-10-21 07:44:55
|
||||
import downloader
|
||||
from utils import Session, Downloader, LazyUrl, clean_url, try_n, Soup, clean_title
|
||||
import json, os, ree as re
|
||||
from timee import sleep
|
||||
from translator import tr_
|
||||
import urllib
|
||||
import constants
|
||||
from ratelimit import limits, sleep_and_retry
|
||||
BASE_URL = 'https://www.pinterest.com'
|
||||
|
||||
def get_info(username, board, api):
|
||||
if '/' in board:
|
||||
section = (u'/').join(board.split('/')[1:])
|
||||
board = board.split('/')[0]
|
||||
info = api.board(username, board)
|
||||
for s in api.board_sections(info['id']):
|
||||
print(s['slug'].lower(), section)
|
||||
if s['slug'].lower() == section.lower():
|
||||
break
|
||||
else:
|
||||
raise Exception('Invalid section')
|
||||
|
||||
title = s['title']
|
||||
info.update(s)
|
||||
info['name'] = (u'{}/{}').format(info['name'], title)
|
||||
print('section_id:', info['id'])
|
||||
else:
|
||||
info = api.board(username, board)
|
||||
#info = board_info(username, board)
|
||||
return info
|
||||
|
||||
|
||||
def board_info(username, board):
|
||||
url = u'https://www.pinterest.com/{}/{}/'.format(username, board)
|
||||
html = downloader.read_html(url)
|
||||
soup = Soup(html)
|
||||
data = soup.find('script', id='initial-state').text
|
||||
data = json.loads(data)['resourceResponses']
|
||||
info = data[0]['response']['data']
|
||||
return info
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_pinter(Downloader):
|
||||
type = 'pinter'
|
||||
URLS = ['pinterest.']
|
||||
type_pinter = 'board'
|
||||
display_name = 'Pinterest'
|
||||
|
||||
@try_n(4)
|
||||
def init(self):
|
||||
if 'pinterest.' not in self.url:
|
||||
self.url = u'https://www.pinterest.com/{}'.format(self.url)
|
||||
self.api = PinterestAPI()
|
||||
username, board = get_username_board(self.url)
|
||||
if '/' in board:
|
||||
self.type_pinter = 'section'
|
||||
self.print_(('type: {}').format(self.type_pinter))
|
||||
self.info = get_info(username, board, self.api)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
username = self.info['owner']['username']
|
||||
name = self.info['name']
|
||||
return clean_title((u'{}/{}').format(username, name))
|
||||
|
||||
def read(self):
|
||||
self.title = self.name
|
||||
id = self.info['id']
|
||||
imgs = get_imgs(id, self.api, cw=self.cw, title=self.name, type=self.type_pinter)
|
||||
for img in imgs:
|
||||
self.urls.append(img.url)
|
||||
|
||||
self.title = self.name
|
||||
|
||||
|
||||
class PinterestAPI:
|
||||
HEADERS = {'Accept': 'application/json, text/javascript, */*, q=0.01',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
'X-Pinterest-AppState': 'active',
|
||||
'X-APP-VERSION': 'cb1c7f9',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Origin': BASE_URL + '/'}
|
||||
|
||||
def __init__(self):
|
||||
self.session = Session()
|
||||
self.session.headers.update(self.HEADERS)
|
||||
|
||||
def pin(self, pin_id):
|
||||
options = {'id': pin_id, 'field_set_key': 'detailed'}
|
||||
return self._call('Pin', options)['resource_response']['data']
|
||||
|
||||
def pin_related(self, pin_id):
|
||||
options = {'pin': pin_id, 'add_vase': True, 'pins_only': True}
|
||||
return self._pagination('RelatedPinFeed', options)
|
||||
|
||||
def board(self, user, board):
|
||||
options = {'slug': board, 'username': user, 'field_set_key': 'detailed'}
|
||||
return self._call('Board', options)['resource_response']['data']
|
||||
|
||||
def board_pins(self, board_id):
|
||||
options = {'board_id': board_id}
|
||||
return self._pagination('BoardFeed', options)
|
||||
|
||||
def board_related(self, board_id):
|
||||
options = {'board_id': board_id, 'add_vase': True}
|
||||
return self._pagination('BoardRelatedPixieFeed', options)
|
||||
|
||||
def board_sections(self, board_id):
|
||||
options = {'board_id': board_id}
|
||||
return self._pagination('BoardSections', options)
|
||||
|
||||
def board_section_pins(self, section_id):
|
||||
options = {'section_id': section_id}
|
||||
return self._pagination('BoardSectionPins', options)
|
||||
|
||||
@try_n(4)
|
||||
@sleep_and_retry
|
||||
@limits(1, 4) # 1000 calls per hour
|
||||
def _call(self, resource, options):
|
||||
url = ('{}/resource/{}Resource/get/').format(BASE_URL, resource)
|
||||
params = {'data': json.dumps({'options': options}), 'source_url': ''}
|
||||
print('_call: {}, {}'.format(url, params))
|
||||
r = self.session.get(url, params=params)
|
||||
print(r)
|
||||
s = r.text
|
||||
status_code = r.status_code
|
||||
try:
|
||||
data = json.loads(s)
|
||||
except ValueError:
|
||||
data = {}
|
||||
else:
|
||||
if status_code < 400 and not r.history:
|
||||
return data
|
||||
|
||||
if status_code == 404 or r.history:
|
||||
raise Exception('Not Found')
|
||||
raise Exception('API request failed: {}'.format(status_code))
|
||||
|
||||
def _pagination(self, resource, options):
|
||||
while True:
|
||||
data = self._call(resource, options)
|
||||
for x in data['resource_response']['data']:
|
||||
yield x
|
||||
|
||||
try:
|
||||
bookmarks = data['resource']['options']['bookmarks']
|
||||
if not bookmarks or bookmarks[0] == '-end-' or bookmarks[0].startswith('Y2JOb25lO'):
|
||||
return
|
||||
options['bookmarks'] = bookmarks
|
||||
except KeyError:
|
||||
return
|
||||
|
||||
|
||||
class Image(object):
|
||||
|
||||
def __init__(self, img):
|
||||
self.id = img['id']
|
||||
print(self.id)
|
||||
self.url0 = img['images']['orig']['url']
|
||||
|
||||
def f(_):
|
||||
return self.url0
|
||||
|
||||
self.url = LazyUrl(('{}/pin/{}/').format(BASE_URL, self.id), f, self)
|
||||
ext = os.path.splitext(self.url0.split('?')[0].split('#')[0])[1]
|
||||
self.filename = ('{}{}').format(self.id, ext)
|
||||
|
||||
|
||||
|
||||
def get_imgs(id, api, cw=None, title=None, type='board'):
|
||||
imgs = []
|
||||
ids = set()
|
||||
print('get_imgs: type={}'.format(type))
|
||||
if type == 'board':
|
||||
gen = api.board_pins(id)
|
||||
elif type == 'section':
|
||||
gen = api.board_section_pins(id)
|
||||
else:
|
||||
raise Exception((u'Type "{}" is not supported').format(type))
|
||||
for img in gen:
|
||||
if 'images' not in img:
|
||||
print('skip img:', img['id'])
|
||||
continue
|
||||
img = Image(img)
|
||||
if img.id in ids:
|
||||
print('duplicate:', img.id)
|
||||
continue
|
||||
ids.add(img.id)
|
||||
print(img.url)
|
||||
print(img.filename)
|
||||
print
|
||||
imgs.append(img)
|
||||
if cw is not None:
|
||||
if not cw.alive:
|
||||
return []
|
||||
cw.setTitle((u'{} {} ({})').format(tr_(u'\uc77d\ub294 \uc911...'), title, len(imgs)))
|
||||
|
||||
return imgs
|
||||
|
||||
|
||||
def get_username_board(url):
|
||||
url = clean_url(url)
|
||||
m = re.search('pinterest.[a-zA-Z.]+?/([^/]+)/([^#\\?]+)', url)
|
||||
username, board = m.groups()
|
||||
board = urllib.parse.unquote(board).strip()
|
||||
while board.endswith('/'):
|
||||
board = board[:-1].strip()
|
||||
|
||||
return (username, board)
|
||||
|
|
@ -14,6 +14,10 @@ except ImportError:
|
|||
import constants
|
||||
from datetime import datetime
|
||||
import requests
|
||||
from timee import sleep
|
||||
from collections import deque
|
||||
from locker import lock
|
||||
import threading
|
||||
FORCE_LOGIN = True
|
||||
LIMIT = 48
|
||||
for header in ['pixiv_illust', 'pixiv_bmk', 'pixiv_search', 'pixiv_following', 'pixiv_following_r18']:
|
||||
|
@ -27,6 +31,7 @@ class Downloader_pixiv(Downloader):
|
|||
type = 'pixiv'
|
||||
MAX_CORE = 16
|
||||
keep_date = True
|
||||
STEP = 8, 32
|
||||
|
||||
@classmethod
|
||||
def fix_url(cls, url):
|
||||
|
@ -107,10 +112,10 @@ class PixivAPI():
|
|||
def profile(self, id_):
|
||||
return self.call('user/{}/profile/all?lang=en'.format(id_))
|
||||
|
||||
def bookmarks(self, id_, offset=0, limit=None):
|
||||
def bookmarks(self, id_, offset=0, limit=None, rest='show'):
|
||||
if limit is None:
|
||||
limit = LIMIT
|
||||
return self.call('user/{}/illusts/bookmarks?tag=&offset={}&limit={}&rest=show&lang=en'.format(id_, offset, limit))
|
||||
return self.call('user/{}/illusts/bookmarks?tag=&offset={}&limit={}&rest={}&lang=en'.format(id_, offset, limit, rest))
|
||||
|
||||
def search(self, q, order='date_d', mode='all', p=1, s_mode='s_tag', type_='all'):
|
||||
return self.call('search/artworks/{0}?word={0}&order={1}&mode={2}&p={3}&s_mode={4}&type={5}&lang=en'.format(quote(q), order, mode, p, s_mode, type_))
|
||||
|
@ -254,13 +259,17 @@ def get_info(url, cw=None, depth=0):
|
|||
id_ = api.user_id(url)
|
||||
if id_ is None: #
|
||||
id_ = my_id()
|
||||
if id_ == my_id():
|
||||
rest = 'all'
|
||||
else:
|
||||
rest = 'show'
|
||||
process_user(id_, info, api)
|
||||
info['title'] = '{} (pixiv_bmk_{})'.format(info['artist'], info['artist_id'])
|
||||
ids = []
|
||||
ids_set = set()
|
||||
offset = 0
|
||||
while len(ids) < max_pid:
|
||||
data = api.bookmarks(id_, offset)
|
||||
data = api.bookmarks(id_, offset, rest=rest)
|
||||
c = 0
|
||||
for id in [work['id'] for work in data['works']]:
|
||||
if id in ids_set:
|
||||
|
@ -359,15 +368,54 @@ def process_user(id_, info, api):
|
|||
def process_ids(ids, info, imgs, cw, depth=0):
|
||||
print_ = get_print(cw)
|
||||
max_pid = get_max_range(cw)
|
||||
for i, id_illust in enumerate(ids):
|
||||
try:
|
||||
info_illust = get_info('https://www.pixiv.net/en/artworks/{}'.format(id_illust), cw, depth=depth+1)
|
||||
except Exception as e:
|
||||
if depth == 0 and (e.args and e.args[0] == '不明なエラーが発生しました' or type(e) == errors.LoginRequired): # logout during extraction
|
||||
raise e
|
||||
print_('process_ids error ({}):\n{}'.format(depth, print_error(e)[0]))
|
||||
continue
|
||||
imgs += info_illust['imgs']
|
||||
class Thread(threading.Thread):
|
||||
alive = True
|
||||
rem = 0
|
||||
|
||||
def __init__(self, queue):
|
||||
super().__init__(daemon=True)
|
||||
self.queue = queue
|
||||
|
||||
@classmethod
|
||||
@lock
|
||||
def add_rem(cls, x):
|
||||
cls.rem += x
|
||||
|
||||
def run(self):
|
||||
while self.alive:
|
||||
try:
|
||||
id_, res, i = self.queue.popleft()
|
||||
except Exception as e:
|
||||
sleep(.1)
|
||||
continue
|
||||
try:
|
||||
info_illust = get_info('https://www.pixiv.net/en/artworks/{}'.format(id_), cw, depth=depth+1)
|
||||
res[i] = info_illust['imgs']
|
||||
except Exception as e:
|
||||
if depth == 0 and (e.args and e.args[0] == '不明なエラーが発生しました' or type(e) == errors.LoginRequired): # logout during extraction
|
||||
res[i] = e
|
||||
print_('process_ids error ({}):\n{}'.format(depth, print_error(e)[0]))
|
||||
finally:
|
||||
Thread.add_rem(-1)
|
||||
queue = deque()
|
||||
n, step = Downloader_pixiv.STEP
|
||||
print_('{} / {}'.format(n, step))
|
||||
ts = []
|
||||
for i in range(n):
|
||||
t = Thread(queue)
|
||||
t.start()
|
||||
ts.append(t)
|
||||
for i in range(0, len(ids), step):
|
||||
res = [[]]*step
|
||||
for j, id_illust in enumerate(ids[i:i+step]):
|
||||
queue.append((id_illust, res, j))
|
||||
Thread.add_rem(1)
|
||||
while Thread.rem:
|
||||
sleep(.001, cw)
|
||||
for imgs_ in res:
|
||||
if isinstance(imgs_, Exception):
|
||||
raise imgs_
|
||||
imgs += imgs_
|
||||
s = '{} {} - {}'.format(tr_('읽는 중...'), info['title'], len(imgs))
|
||||
if cw:
|
||||
cw.setTitle(s)
|
||||
|
@ -377,3 +425,5 @@ def process_ids(ids, info, imgs, cw, depth=0):
|
|||
break
|
||||
if depth == 0:
|
||||
check_alive(cw)
|
||||
for t in ts:
|
||||
t.alive = False
|
||||
|
|
|
@ -0,0 +1,530 @@
|
|||
#coding:utf8
|
||||
'''
|
||||
Pornhub Downloader
|
||||
'''
|
||||
from __future__ import division, print_function, unicode_literals
|
||||
from io import BytesIO
|
||||
import os
|
||||
import js2py
|
||||
import downloader
|
||||
import ree as re
|
||||
from utils import (Downloader, Soup, try_n, LazyUrl, urljoin, get_print,
|
||||
Session, get_max_range, filter_range, get_ext,
|
||||
lock, format_filename, clean_title, get_resolution)
|
||||
import clf2
|
||||
import utils
|
||||
from m3u8_tools import playlist2stream, M3u8_stream
|
||||
|
||||
|
||||
|
||||
class File(object):
|
||||
'''
|
||||
File
|
||||
'''
|
||||
|
||||
def __init__(self, id_, title, url, url_thumb):
|
||||
self.id_ = id_
|
||||
self.title = clean_title('{}'.format(title))
|
||||
self.url = url
|
||||
|
||||
ext = get_ext(self.url)
|
||||
if ext.lower() == '.m3u8':
|
||||
try:
|
||||
self.url = playlist2stream(self.url, n_thread=4)
|
||||
except:
|
||||
self.url = M3u8_stream(self.url, n_thread=4)
|
||||
|
||||
self.url_thumb = url_thumb
|
||||
self.thumb = BytesIO()
|
||||
downloader.download(self.url_thumb, buffer=self.thumb)
|
||||
|
||||
if ext.lower() == '.m3u8':
|
||||
ext = '.mp4'
|
||||
self.filename = format_filename(self.title, self.id_, ext)
|
||||
print('filename:', self.filename)
|
||||
|
||||
|
||||
class Video(object):
|
||||
'''
|
||||
Video
|
||||
'''
|
||||
_url = None
|
||||
filename = None
|
||||
thumb = None
|
||||
|
||||
def __init__(self, url, cw, session):
|
||||
self.url = LazyUrl(url, self.get, self)
|
||||
self.cw = cw
|
||||
self.session = session
|
||||
|
||||
def get(self, url):
|
||||
'''
|
||||
get
|
||||
'''
|
||||
cw = self.cw
|
||||
session = self.session
|
||||
print_ = get_print(cw)
|
||||
if self._url:
|
||||
return self._url
|
||||
|
||||
id_ = re.find(r'viewkey=(\w+)', url, re.IGNORECASE) or \
|
||||
re.find(r'/embed/(\w+)', url, re.IGNORECASE)
|
||||
print('id: {}'.format(id_))
|
||||
if 'viewkey=' not in url.lower() and '/gif/' not in url.lower():
|
||||
url = urljoin(url, '/view_video.php?viewkey={}'.format(id_))
|
||||
html = downloader.read_html(url, session=session)
|
||||
|
||||
soup = Soup(html)
|
||||
soup = fix_soup(soup, url, session, cw)
|
||||
html = str(soup)
|
||||
|
||||
# removed
|
||||
if soup.find('div', class_='removed'):
|
||||
raise Exception('removed')
|
||||
|
||||
gif = soup.find('div', {'id': 'gifImageSection'})
|
||||
if gif:
|
||||
print_('GIF')
|
||||
id_ = url.split('/gif/')[1]
|
||||
id_ = re.findall('[0-9a-zA-Z]+', id_)[0]
|
||||
|
||||
jss = list(gif.children)
|
||||
for js in jss:
|
||||
if 'data-mp4' in getattr(js, 'attrs', {}):
|
||||
break
|
||||
else:
|
||||
raise Exception('gif mp4 url not found')
|
||||
|
||||
title = js['data-gif-title']
|
||||
url = js['data-mp4']
|
||||
url_thumb = re.find(r'https?://.+?.phncdn.com/pics/gifs/.+?\.jpg', html, err='no thumb')
|
||||
file = File('gif_{}'.format(id_), title, url, url_thumb)
|
||||
else:
|
||||
if id_ is None:
|
||||
raise Exception('no id')
|
||||
|
||||
print_('Video')
|
||||
j = decode(html, cw)
|
||||
|
||||
# 1968
|
||||
#title = j['video_title']
|
||||
title = soup.find('h1', class_='title').text.strip()
|
||||
|
||||
url_thumb = j['image_url']
|
||||
videos = []
|
||||
for video in j['mediaDefinitions']:
|
||||
url_ = video.get('videoUrl').strip()
|
||||
ext = get_ext(url_)
|
||||
if ext.lower() not in ['.mp4', '.m3u8']:
|
||||
print('not mp4: {}'.format(ext))
|
||||
continue
|
||||
quality = video.get('quality', 0)
|
||||
if isinstance(quality, list):
|
||||
quality = quality[0]
|
||||
video['quality'] = int(quality)
|
||||
print_('[{}p] {}'.format(quality, url_))
|
||||
videos.append(video)
|
||||
|
||||
if not videos:
|
||||
raise Exception('No videos')
|
||||
|
||||
videos = sorted(videos, key=lambda video: video['quality'])
|
||||
|
||||
res = get_resolution()
|
||||
|
||||
videos_good = [video for video in videos if video['quality'] <= res]
|
||||
if videos_good:
|
||||
video = videos_good[-1]
|
||||
else:
|
||||
video = videos[0]
|
||||
print_('\n[{}p] {}'.format(video['quality'], video['videoUrl']))
|
||||
|
||||
file = File(id_, title, video['videoUrl'].strip(), url_thumb)
|
||||
|
||||
self._url = file.url
|
||||
self.title = file.title
|
||||
self.filename = file.filename
|
||||
self.thumb = file.thumb
|
||||
return self._url
|
||||
|
||||
|
||||
def is_login(session, cw=None, n=2):
|
||||
'''
|
||||
is_login
|
||||
'''
|
||||
print_ = get_print(cw)
|
||||
print_('is_login {}'.format(n))
|
||||
if n <= 0:
|
||||
return False
|
||||
url = 'https://www.pornhubpremium.com'
|
||||
soup = downloader.read_soup(url, session=session)
|
||||
soup = fix_soup(soup, url, session, cw)
|
||||
html = str(soup)
|
||||
if soup.find('ul', id='profileMenuDropdown'):
|
||||
return True
|
||||
return is_login(session, cw, n-1)
|
||||
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_pornhub(Downloader):
|
||||
'''
|
||||
Downloader
|
||||
'''
|
||||
type = 'pornhub'
|
||||
single = True
|
||||
strip_header = False
|
||||
URLS = ['pornhub.com', 'pornhubpremium.com']
|
||||
|
||||
def init(self):
|
||||
self.session = Session() # 1791
|
||||
if 'pornhub_gif_' in self.url:
|
||||
self.url = 'https://www.pornhub.com/gif/{}'.format(
|
||||
self.url.replace('pornhub_gif_', ''))
|
||||
elif 'pornhub_album_' in self.url:
|
||||
self.url = 'https://www.pornhub.com/album/{}'.format(
|
||||
self.url.replace('pornhub_album_', ''))
|
||||
elif 'pornhub_' in self.url:
|
||||
self.url = 'https://www.pornhub.com/view_video.php?viewkey={}'\
|
||||
.format(self.url.replace('pornhub_', ''))
|
||||
if 'pornhubpremium.com' in self.url.lower() and\
|
||||
not is_login(self.session, self.cw):
|
||||
return self.Invalid('[Pornhub] Login cookies required')
|
||||
|
||||
@classmethod
|
||||
def key_id(cls, url):
|
||||
for domain in cls.URLS:
|
||||
if domain in url:
|
||||
id_ = domain + url.split(domain)[1]
|
||||
break
|
||||
else:
|
||||
raise Exception('no id')
|
||||
return id_.split('#')[0]
|
||||
|
||||
def read(self):
|
||||
cw = self.cw
|
||||
session = self.session
|
||||
|
||||
videos = []
|
||||
tab = ''.join(self.url.replace('pornhubpremium.com', 'pornhub.com', 1).split('?')[0].split('#')[0].split('pornhub.com/')[-1].split('/')[2:3])
|
||||
|
||||
if '/album/' in self.url:
|
||||
self.print_('Album')
|
||||
info = read_album(self.url, session=session)
|
||||
self.single = False
|
||||
for photo in info['photos']:
|
||||
self.urls.append(photo.url)
|
||||
|
||||
self.title = clean_title(info['title'])
|
||||
elif '/photo/' in self.url:
|
||||
self.print_('Photo')
|
||||
info = read_photo(self.url, session=session)
|
||||
for photo in info['photos']:
|
||||
self.urls.append(photo.url)
|
||||
|
||||
self.title = info['title']
|
||||
elif tab not in ['', 'videos']:
|
||||
raise NotImplementedError(tab)
|
||||
elif 'viewkey=' not in self.url.lower() and\
|
||||
'/embed/' not in self.url.lower() and\
|
||||
'/gif/' not in self.url.lower():
|
||||
self.print_('videos')
|
||||
info = get_videos(self.url, cw)
|
||||
hrefs = info['hrefs']
|
||||
self.print_('videos: {}'.format(len(hrefs)))
|
||||
|
||||
if not hrefs:
|
||||
raise Exception('no hrefs')
|
||||
|
||||
videos = [Video(href, cw, session) for href in hrefs]
|
||||
video = self.process_playlist(info['title'], videos)
|
||||
self.setIcon(video.thumb)
|
||||
self.enableSegment()
|
||||
else:
|
||||
video = Video(self.url, cw, session)
|
||||
video.url()
|
||||
self.urls.append(video.url)
|
||||
self.setIcon(video.thumb)
|
||||
self.title = video.title
|
||||
self.enableSegment()
|
||||
|
||||
|
||||
|
||||
def fix_soup(soup, url, session=None, cw=None):
|
||||
'''
|
||||
fix_soup
|
||||
'''
|
||||
print_ = get_print(cw)
|
||||
if soup.find('div', class_='logo'):
|
||||
return soup
|
||||
print_('invalid soup: {}'.format(url))
|
||||
|
||||
res = clf2.solve(url, session=session, cw=cw)
|
||||
|
||||
return Soup(res['html'])
|
||||
|
||||
|
||||
|
||||
class Photo(object):
|
||||
'''
|
||||
Photo
|
||||
'''
|
||||
|
||||
def __init__(self, id_, url, referer):
|
||||
self.id_ = id_
|
||||
self.url = LazyUrl(referer, lambda x: url, self)
|
||||
ext = os.path.splitext(url.split('?')[0])[1]
|
||||
self.filename = '{}{}'.format(id_, ext)
|
||||
|
||||
|
||||
@try_n(8)
|
||||
def read_album(url, session=None):
|
||||
'''
|
||||
read_album
|
||||
'''
|
||||
soup = downloader.read_soup(url, session=session)
|
||||
id_album = re.find('/album/([0-9]+)', url, err='no album id')
|
||||
url_json = 'https://www.pornhub.com/album/show_album_json?album={}'.format(id_album)
|
||||
data = downloader.read_json(url_json, url, session=session)
|
||||
block = soup.find('div', class_='photoAlbumListBlock')
|
||||
href = block.a.attrs['href']
|
||||
id_ = re.find('/photo/([0-9]+)', href, err='no photo id')
|
||||
ids = [id_]
|
||||
while True:
|
||||
item = data[id_]
|
||||
id_ = item['next']
|
||||
if id_ in ids:
|
||||
break
|
||||
ids.append(id_)
|
||||
|
||||
photos = []
|
||||
for id_ in ids:
|
||||
item = data[id_]
|
||||
img = item['img_large']
|
||||
referer = 'https://www.pornhub.com/photo/{}'.format(id_)
|
||||
photo = Photo(id_, img, referer)
|
||||
photos.append(photo)
|
||||
|
||||
info = {}
|
||||
title = clean_title(soup.find('h1', class_='photoAlbumTitleV2').text)
|
||||
info['title'] = format_filename(title, 'album_{}'.format(id_album))
|
||||
info['photos'] = photos
|
||||
return info
|
||||
|
||||
|
||||
@try_n(8)
|
||||
def read_photo(url, session=None):
|
||||
'''
|
||||
read_photo
|
||||
'''
|
||||
id_ = re.find('/photo/([0-9]+)', url, err='no photo id')
|
||||
soup = downloader.read_soup(url, session=session)
|
||||
div = soup.find('div', id='thumbSlider')
|
||||
href = urljoin(url, div.find('a').attrs['href'])
|
||||
info = read_album(href)
|
||||
photos = []
|
||||
for photo in info['photos']:
|
||||
if str(photo.id_) == id_:
|
||||
photos.append(photo)
|
||||
|
||||
info['photos'] = photos
|
||||
info['title'] = '{} - {}'.format(info['title'], photos[0].filename)
|
||||
return info
|
||||
|
||||
|
||||
@try_n(4)
|
||||
def get_videos(url, cw=None):
|
||||
'''
|
||||
get_videos
|
||||
'''
|
||||
print_ = get_print(cw)
|
||||
|
||||
if '/users/' in url:
|
||||
mode = 'users'
|
||||
username = url.split('/users/')[1].split('/')[0]
|
||||
elif '/pornstar/' in url:
|
||||
mode = 'pornstar'
|
||||
username = url.split('/pornstar/')[1].split('/')[0]
|
||||
elif '/model/' in url:
|
||||
mode = 'model'
|
||||
username = url.split('/model/')[1].split('/')[0]
|
||||
elif '/channels/' in url:
|
||||
mode = 'channels'
|
||||
username = url.split('/channels/')[1].split('/')[0]
|
||||
elif '/playlist/' in url:
|
||||
mode = 'playlist'
|
||||
username = url.split('/playlist/')[1].split('/')[0]
|
||||
else:
|
||||
raise Exception('Not supported url')
|
||||
username = username.split('?')[0].split('#')[0]
|
||||
|
||||
session = Session()
|
||||
|
||||
if mode in ['pornstar']:
|
||||
url_main = 'https://www.pornhub.com/{}/{}'.format(mode, username)
|
||||
html = downloader.read_html(url_main, session=session)
|
||||
soup = Soup(html)
|
||||
soup = fix_soup(soup, url_main, session, cw)
|
||||
for a in soup.findAll('a'):
|
||||
if '/{}/{}/videos/upload'.format(mode, username) in a.attrs.get('href', ''):
|
||||
free = True
|
||||
break
|
||||
else:
|
||||
free = False
|
||||
print_('free: {}'.format(free))
|
||||
|
||||
# Range
|
||||
max_pid = get_max_range(cw, 500)
|
||||
max_pid = min(max_pid, 2000)#
|
||||
|
||||
html = downloader.read_html(url, session=session)
|
||||
soup = fix_soup(Soup(html), url, session, cw)
|
||||
|
||||
info = {}
|
||||
|
||||
# get title
|
||||
h1 = soup.find('h1')
|
||||
if h1:
|
||||
header = 'Playlist'
|
||||
title = h1.find(id='watchPlaylist')
|
||||
else:
|
||||
title = None
|
||||
if not title:
|
||||
header = 'Channel'
|
||||
profile = soup.find('div', class_='profileUserName')
|
||||
wrapper = soup.find('div', class_='titleWrapper')
|
||||
bio = soup.find('div', class_='withBio')
|
||||
title = soup.find('h1', {'itemprop':'name'})
|
||||
if not title and profile:
|
||||
title = profile.a
|
||||
if not title and wrapper:
|
||||
title = wrapper.h1
|
||||
if not title and bio:
|
||||
title = bio.h1
|
||||
if not title:
|
||||
raise Exception('No title')
|
||||
#print(title)
|
||||
info['title'] = '[{}] {}'.format(header, title.text.strip())
|
||||
token = re.find('''token *= *['"](.*?)['"]''', html)
|
||||
print_('token: {}'.format(token))
|
||||
|
||||
# get links
|
||||
hrefs = []
|
||||
fail = 0
|
||||
for p in range(1, 1+100):
|
||||
try:
|
||||
if mode in ['users', 'model']:
|
||||
if mode == 'users':
|
||||
url_api = 'https://www.pornhub.com/users/{}/videos/public/'\
|
||||
'ajax?o=mr&page={}'.format(username, p)
|
||||
elif mode == 'model':
|
||||
url_api = 'https://www.pornhub.com/model/{}/videos/upload/'\
|
||||
'ajax?o=mr&page={}'.format(username, p)
|
||||
r = session.post(url_api)
|
||||
soup = Soup(r.text)
|
||||
if soup.find('h1'):
|
||||
print('break: h1')
|
||||
break
|
||||
elif mode in ['pornstar']:
|
||||
if free:
|
||||
url_api = 'https://www.pornhub.com/{}/{}/videos/upload'\
|
||||
'?page={}'.format(mode, username, p)
|
||||
soup = downloader.read_soup(url_api, session=session)
|
||||
soup = fix_soup(soup, url_api, session, cw)
|
||||
soup = soup.find('div', class_='videoUList')
|
||||
else:
|
||||
url_api = 'https://www.pornhub.com/{}/{}?page={}'.format(mode, username, p)
|
||||
soup = downloader.read_soup(url_api, session=session)
|
||||
soup = fix_soup(soup, url_api, session, cw)
|
||||
soup = soup.find('ul', class_='pornstarsVideos')
|
||||
elif mode in ['channels']:
|
||||
url_api = 'https://www.pornhub.com/{}/{}/videos?page={}'.format(mode, username, p)
|
||||
soup = downloader.read_soup(url_api, session=session)
|
||||
soup = fix_soup(soup, url_api, session, cw)
|
||||
try:
|
||||
soup = soup.find('div', {'id': 'channelsBody'}).find('div', class_='rightSide')
|
||||
except:
|
||||
break
|
||||
elif mode in ['playlist']:
|
||||
#url_api = 'https://www.pornhub.com/playlist/viewChunked?id={}&offset={}&itemsPerPage=40'.format(username, len(hrefs))
|
||||
if token is None:
|
||||
raise Exception('no token')
|
||||
url_api = 'https://www.pornhub.com/playlist/viewChunked?id={}&token={}&page={}'.format(username, token, p)
|
||||
soup = downloader.read_soup(url_api, session=session)
|
||||
else:
|
||||
raise NotImplementedError(mode)
|
||||
fail = 0
|
||||
except Exception as e:
|
||||
print_(e)
|
||||
fail += 1
|
||||
if fail < 2:
|
||||
continue
|
||||
else:
|
||||
break
|
||||
finally:
|
||||
print_('{} ({})'.format(url_api, len(hrefs)))
|
||||
|
||||
if cw and not cw.alive:
|
||||
return
|
||||
|
||||
lis = soup.findAll('li', class_='videoblock')
|
||||
if not lis:
|
||||
print_('break: no lis')
|
||||
break
|
||||
|
||||
if getattr(soup.find('title'), 'text', '').strip() == 'Page Not Found':
|
||||
print_('Page Not Found')
|
||||
break
|
||||
|
||||
c = 0
|
||||
for li in lis:
|
||||
a = li.find('a')
|
||||
href = a.attrs['href']
|
||||
href = urljoin(url, href)
|
||||
if href in hrefs:
|
||||
continue
|
||||
c += 1
|
||||
if href.startswith('javascript:'): # Remove Pornhub Premium
|
||||
print(href)
|
||||
continue
|
||||
hrefs.append(href)
|
||||
if c == 0:
|
||||
print('c==0')
|
||||
break
|
||||
print(c) # 1320
|
||||
|
||||
if len(hrefs) >= max_pid:
|
||||
break
|
||||
|
||||
if cw:
|
||||
hrefs = filter_range(hrefs, cw.range)
|
||||
|
||||
info['hrefs'] = hrefs
|
||||
|
||||
return info
|
||||
|
||||
|
||||
@lock
|
||||
def decode(html, cw=None):
|
||||
'''
|
||||
decode
|
||||
'''
|
||||
print_ = get_print(cw)
|
||||
print_('decode')
|
||||
soup = Soup(html)
|
||||
|
||||
for script in soup.findAll('script'):
|
||||
script = script.text or script.string or ''
|
||||
script = script.strip()
|
||||
if 'videoUrl' in script:
|
||||
break
|
||||
else:
|
||||
raise Exception('No script')
|
||||
|
||||
flashvars = script.split()[1]
|
||||
script = 'playerObjList={};' + script
|
||||
|
||||
context = js2py.EvalJs()
|
||||
context.execute(script)
|
||||
|
||||
return context.eval(flashvars).to_dict()
|
|
@ -0,0 +1,133 @@
|
|||
import downloader
|
||||
import ree as re
|
||||
import os
|
||||
from utils import Downloader, urljoin, query_url, Soup, get_max_range, get_print, clean_title
|
||||
from translator import tr_
|
||||
try:
|
||||
from urllib import quote # python2
|
||||
except:
|
||||
from urllib.parse import quote # python3
|
||||
import sys
|
||||
from timee import sleep
|
||||
from constants import clean_url
|
||||
LIMIT = 100
|
||||
|
||||
|
||||
def get_tags(url):
|
||||
url = clean_url(url)
|
||||
qs = query_url(url)
|
||||
if 'page=favorites' in url:
|
||||
id = qs.get('id', ['N/A'])[0]
|
||||
id = u'fav_{}'.format(id)
|
||||
else:
|
||||
tags = qs.get('tags', [])
|
||||
tags.sort()
|
||||
id = u' '.join(tags)
|
||||
if not id:
|
||||
id = u'N/A'
|
||||
return id
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_rule34_xxx(Downloader):
|
||||
type = 'rule34_xxx'
|
||||
URLS = ['rule34.xxx']
|
||||
MAX_CORE = 8
|
||||
display_name = 'Rule34.xxx'
|
||||
_name = None
|
||||
|
||||
def init(self):
|
||||
if 'rule34.xxx' in self.url.lower():
|
||||
self.url = self.url.replace('http://', 'https://')
|
||||
else:
|
||||
url = self.url
|
||||
url = url.replace(' ', '+')
|
||||
while '++' in url:
|
||||
url = url.replace('++', '+')
|
||||
url = quote(url)
|
||||
url = url.replace('%2B', '+')
|
||||
self.url = u'https://rule34.xxx/index.php?page=post&s=list&tags={}'.format(url)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
if self._name is None:
|
||||
tags = get_tags(self.url)
|
||||
self._name = tags
|
||||
return clean_title(self._name)
|
||||
|
||||
def read(self):
|
||||
self.title = self.name
|
||||
|
||||
imgs = get_imgs(self.url, self.name, cw=self.cw)
|
||||
|
||||
for img in imgs:
|
||||
self.urls.append(img.url)
|
||||
self.filenames[img.url] = img.filename
|
||||
|
||||
self.title = self.name
|
||||
|
||||
|
||||
class Image(object):
|
||||
def __init__(self, id_, url):
|
||||
self.url = url
|
||||
ext = os.path.splitext(url)[1]
|
||||
self.filename = u'{}{}'.format(id_, ext)
|
||||
|
||||
|
||||
def setPage(url, page):
|
||||
# Always use HTTPS
|
||||
url = url.replace('http://', 'https://')
|
||||
|
||||
# Change the page
|
||||
if 'pid=' in url:
|
||||
url = re.sub('pid=[0-9]*', 'pid={}'.format(page), url)
|
||||
else:
|
||||
url += '&pid={}'.format(page)
|
||||
|
||||
return url
|
||||
|
||||
|
||||
def get_imgs(url, title=None, cw=None):
|
||||
url = clean_url(url)
|
||||
if 's=view' in url and 'page=favorites' not in url:
|
||||
raise NotImplementedError('Not Implemented')
|
||||
|
||||
if 'page=dapi' not in url.lower():
|
||||
tags = get_tags(url)
|
||||
tags = quote(tags, safe='/')
|
||||
tags = tags.replace('%20', '+')
|
||||
url = "https://rule34.xxx/index.php?page=dapi&s=post&q=index&tags={}&pid={}&limit={}".format(tags, 0, LIMIT)
|
||||
|
||||
print_ = get_print(cw)
|
||||
|
||||
# Range
|
||||
max_pid = get_max_range(cw)
|
||||
|
||||
imgs = []
|
||||
ids = set()
|
||||
for p in range(500): #1017
|
||||
url = setPage(url, p)
|
||||
print_(url)
|
||||
html = downloader.read_html(url)
|
||||
|
||||
soup = Soup(html)
|
||||
posts = soup.findAll('post')
|
||||
if not posts:
|
||||
break
|
||||
for post in posts:
|
||||
id_ = post.attrs['id']
|
||||
if id_ in ids:
|
||||
print('duplicate:', id_)
|
||||
continue
|
||||
ids.add(id_)
|
||||
url_img = post.attrs['file_url']
|
||||
img = Image(id_, url_img)
|
||||
imgs.append(img)
|
||||
if len(imgs) >= max_pid:
|
||||
break
|
||||
|
||||
if cw is not None:
|
||||
if not cw.alive:
|
||||
break
|
||||
cw.setTitle(u'{} {} - {}'.format(tr_(u'읽는 중...'), title, len(imgs)))
|
||||
return imgs
|
|
@ -0,0 +1,180 @@
|
|||
#coding: utf8
|
||||
import downloader
|
||||
import json
|
||||
from io import BytesIO
|
||||
from utils import Downloader, LazyUrl, get_print, try_n, lock, clean_title
|
||||
from error_printer import print_error
|
||||
import os
|
||||
from timee import sleep
|
||||
import ffmpeg
|
||||
import ytdl
|
||||
from m3u8_tools import M3u8_stream
|
||||
CLIENT_ID = None
|
||||
|
||||
|
||||
@lock
|
||||
def get_cid(force=False):
|
||||
global CLIENT_ID
|
||||
if CLIENT_ID is None or force:
|
||||
print('update cid...')
|
||||
d = ytdl.YoutubeDL()
|
||||
e = ytdl.extractor.soundcloud.SoundcloudIE(d)
|
||||
e._update_client_id()
|
||||
CLIENT_ID = e._CLIENT_ID
|
||||
return CLIENT_ID
|
||||
|
||||
|
||||
class Audio(object):
|
||||
_url = None
|
||||
|
||||
def __init__(self, info, album_art, cw=None):
|
||||
self.info = info
|
||||
self.album_art = album_art
|
||||
self.cw = cw
|
||||
self.url = LazyUrl(info['webpage_url'], self.get, self, pp=self.pp)
|
||||
|
||||
def get(self, url):
|
||||
print_ = get_print(self.cw)
|
||||
if self._url:
|
||||
return self._url
|
||||
|
||||
info = self.info
|
||||
|
||||
## ydl = ytdl.YoutubeDL()
|
||||
## info = ydl.extract_info(url)
|
||||
|
||||
formats = info['formats']
|
||||
print(formats)
|
||||
formats = sorted(formats, key=lambda x: int(x.get('abr', 0)), reverse=True)
|
||||
url_audio = None
|
||||
|
||||
for format in formats:
|
||||
protocol = format['protocol']
|
||||
print_(u'【{}】 format【{}】 abr【{}】'.format(protocol, format['format'], format.get('abr', 0)))
|
||||
if not url_audio and protocol in ['http', 'https']:
|
||||
url_audio = format['url']
|
||||
|
||||
if not url_audio:
|
||||
url_audio = M3u8_stream(formats[0]['url'])
|
||||
self.album_art = False#
|
||||
|
||||
self.username = info['uploader']
|
||||
self.title = u'{} - {}'.format(self.username, info['title'])
|
||||
self.filename = u'{}{}'.format(clean_title(self.title, allow_dot=True, n=-4), '.mp3')
|
||||
|
||||
thumb = None
|
||||
for t in info['thumbnails'][::-1]:
|
||||
width = t.get('width', 1080)
|
||||
if not 100 <= width <= 500:
|
||||
continue
|
||||
url_thumb = t['url']
|
||||
thumb = BytesIO()
|
||||
try:
|
||||
downloader.download(url_thumb, buffer=thumb)
|
||||
break
|
||||
except Exception as e:
|
||||
print(e)
|
||||
thumb = None
|
||||
self.thumb = thumb
|
||||
|
||||
self._url = url_audio
|
||||
return self._url
|
||||
|
||||
def pp(self, filename):
|
||||
cw = self.cw
|
||||
with cw.convert(self):
|
||||
return self._pp(filename)
|
||||
|
||||
def _pp(self, filename):
|
||||
if self.thumb and self.album_art:
|
||||
self.thumb.seek(0)#
|
||||
ffmpeg.add_cover(filename, self.thumb, {'artist':self.username, 'title':self.info['title']}, cw=self.cw)
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_soundcloud(Downloader):
|
||||
type = 'soundcloud'
|
||||
single = True
|
||||
URLS = ['soundcloud.com']
|
||||
#lock = True
|
||||
audio = None
|
||||
display_name = 'SoundCloud'
|
||||
|
||||
def init(self):
|
||||
if 'soundcloud.com' in self.url.lower():
|
||||
self.url = self.url.replace('http://', 'https://')
|
||||
else:
|
||||
self.url = 'https://soundcloud.com/{}'.format(self.url)
|
||||
|
||||
def read(self):
|
||||
album_art = self.ui_setting.albumArt.isChecked()
|
||||
info = get_audios(self.url, self.cw, album_art)
|
||||
audios = info['audios']
|
||||
|
||||
if not audios:
|
||||
raise Exception('no audios')
|
||||
|
||||
# first audio must be valid
|
||||
while audios:
|
||||
audio = audios[0]
|
||||
try:
|
||||
audio.url()
|
||||
break
|
||||
except Exception as e:
|
||||
e_ = e
|
||||
print(e)
|
||||
audios.remove(audio)
|
||||
else:
|
||||
raise e_
|
||||
|
||||
if len(audios) > 1:
|
||||
audio = self.process_playlist(info['title'], audios)
|
||||
else:
|
||||
self.urls.append(audio.url)
|
||||
self.title = audio.title
|
||||
|
||||
self.artist = audio.username
|
||||
self.setIcon(audio.thumb)
|
||||
|
||||
|
||||
@try_n(2)
|
||||
def get_audios(url, cw, album_art):
|
||||
print_ = get_print(cw)
|
||||
url = url.rstrip('/')
|
||||
if url.count('/') == 3:
|
||||
url += '/tracks'
|
||||
|
||||
info = {
|
||||
#'extract_flat': True,
|
||||
}
|
||||
|
||||
ydl = ytdl.YoutubeDL()
|
||||
info = ydl.extract_info(url)
|
||||
if 'entries' in info:
|
||||
entries = info['entries']
|
||||
title = info['title']
|
||||
for _type in ['All', 'Tracks', 'Albums', 'Sets', 'Reposts', 'Likes', 'Spotlight']:
|
||||
x = '({})'.format(_type)
|
||||
if x in title:
|
||||
title = title.replace(x, '')
|
||||
kind = _type
|
||||
break
|
||||
else:
|
||||
kind = 'Playlist'
|
||||
print_(u'kind: {}'.format(kind))
|
||||
info['title'] = u'[{}] {}'.format(kind.capitalize(), title)
|
||||
else:
|
||||
entries = [info]
|
||||
|
||||
audios = []
|
||||
for e in entries:
|
||||
if '/sets/' in e['webpage_url']:
|
||||
continue
|
||||
audio = Audio(e, album_art, cw=cw)
|
||||
audios.append(audio)
|
||||
|
||||
info['audios'] = audios
|
||||
|
||||
return info
|
||||
|
||||
|
|
@ -0,0 +1,250 @@
|
|||
from __future__ import division, print_function, unicode_literals
|
||||
import downloader
|
||||
import ree as re
|
||||
from utils import urljoin, Soup, LazyUrl, Downloader, try_n, compatstr, get_print, clean_title, Session, get_max_range
|
||||
import os
|
||||
import json
|
||||
import ast
|
||||
from io import BytesIO
|
||||
import random
|
||||
import clf2
|
||||
from translator import tr_
|
||||
from timee import sleep
|
||||
from error_printer import print_error
|
||||
import devtools
|
||||
HDR = {'User-Agent': downloader.hdr['User-Agent']}
|
||||
PATTERN_VID = '/(v|video)/(?P<id>[0-9]+)'
|
||||
|
||||
|
||||
def is_captcha(soup):
|
||||
return soup.find('div', class_="verify-wrap") is not None
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_tiktok(Downloader):
|
||||
type = 'tiktok'
|
||||
single = True
|
||||
URLS = ['tiktok.com']
|
||||
display_name = 'TikTok'
|
||||
|
||||
def init(self):
|
||||
cw = self.cw
|
||||
self.session = Session()
|
||||
res = clf2.solve(self.url, self.session, cw)
|
||||
soup = Soup(res['html'])
|
||||
if is_captcha(soup):
|
||||
def f(html):
|
||||
return not is_captcha(Soup(html))
|
||||
clf2.solve(self.url, self.session, cw, show=True, f=f)
|
||||
|
||||
@classmethod
|
||||
def fix_url(cls, url):
|
||||
url = url.split('?')[0].split('#')[0].strip('/')
|
||||
if 'tiktok.com' not in url.lower():
|
||||
url = 'https://www.tiktok.com/@{}'.format(url)
|
||||
return url
|
||||
|
||||
def read(self):
|
||||
format = compatstr(self.ui_setting.youtubeFormat.currentText()).lower().strip()
|
||||
|
||||
if re.search(PATTERN_VID, self.url) is None:
|
||||
info = read_channel(self.url, self.session, self.cw)
|
||||
items = info['items']
|
||||
videos = [Video('https://www.tiktok.com/@{}/video/{}'.format(info['uid'], item['id']), self.session, format) for item in items]
|
||||
title = '{} (tiktok_{})'.format(info['nickname'], info['uid'])
|
||||
video = self.process_playlist(title, videos)
|
||||
else:
|
||||
video = Video(self.url, self.session, format)
|
||||
video.url()
|
||||
self.urls.append(video.url)
|
||||
self.title = clean_title(video.title)
|
||||
|
||||
self.setIcon(video.thumb)
|
||||
|
||||
|
||||
|
||||
class Video(object):
|
||||
_url = None
|
||||
|
||||
def __init__(self, url, session, format='title (id)'):
|
||||
self.url = LazyUrl(url, self.get, self)
|
||||
self.session = session
|
||||
self.format = format
|
||||
|
||||
@try_n(2)
|
||||
def get(self, url):
|
||||
if self._url:
|
||||
return self._url
|
||||
m = re.search(PATTERN_VID, url)
|
||||
id = m.group('id')
|
||||
ext = '.mp4'
|
||||
self.title = id#
|
||||
self.filename = '{}{}'.format(clean_title(self.title, n=-len(ext)), ext)
|
||||
|
||||
html = downloader.read_html(url, session=self.session)
|
||||
soup = Soup(html)
|
||||
data = soup.find(id='__NEXT_DATA__')
|
||||
props = data.contents[0]
|
||||
data_encode = json.dumps(props)
|
||||
ast_le = ast.literal_eval(data_encode)
|
||||
data = json.loads(ast_le)
|
||||
|
||||
#info = data['props']['pageProps']['videoData']['itemInfos']
|
||||
info = data['props']['pageProps']['itemInfo']['itemStruct']
|
||||
self._url = info['video']['downloadAddr']
|
||||
|
||||
self.url_thumb = info['video']['cover']
|
||||
self.thumb = BytesIO()
|
||||
downloader.download(self.url_thumb, referer=url, buffer=self.thumb)
|
||||
|
||||
return self._url
|
||||
|
||||
|
||||
def read_channel(url, session, cw=None):
|
||||
print_ = get_print(cw)
|
||||
|
||||
info = {}
|
||||
info['items'] = []
|
||||
|
||||
ids = set()
|
||||
info['items'] = []
|
||||
sd = {
|
||||
'count_empty': 0,
|
||||
'shown': False,
|
||||
}
|
||||
|
||||
max_pid = get_max_range(cw)
|
||||
|
||||
def f(html, browser=None):
|
||||
soup = Soup(html)
|
||||
if is_captcha(soup):
|
||||
print('captcha')
|
||||
browser.show()
|
||||
sd['shown'] = True
|
||||
elif sd['shown']:
|
||||
browser.hide()
|
||||
sd['shown'] = False
|
||||
try:
|
||||
info['uid'] = soup.find('h2', class_='share-title').text.strip()
|
||||
info['nickname'] = soup.find('h1', class_='share-sub-title').text.strip()
|
||||
except Exception as e:
|
||||
print_(print_error(e)[0])
|
||||
c = 0
|
||||
ids_now = set()
|
||||
for div in soup.findAll('div', class_='video-feed-item'):
|
||||
a = div.find('a')
|
||||
if a is None:
|
||||
continue
|
||||
href = a['href']
|
||||
if not href:
|
||||
continue
|
||||
m = re.search(PATTERN_VID, href)
|
||||
if m is None:
|
||||
continue
|
||||
id_video = int(m.group('id'))
|
||||
ids_now.add(id_video)
|
||||
if id_video in ids:
|
||||
continue
|
||||
ids.add(id_video)
|
||||
info['items'].append({'id': id_video})
|
||||
c += 1
|
||||
|
||||
print_('items: {}'.format(len(info['items'])))
|
||||
if len(info['items']) >= max_pid:
|
||||
info['items'] = info['items'][:max_pid]
|
||||
return True
|
||||
|
||||
browser.runJavaScript('window.scrollTo(0, document.body.scrollHeight);')
|
||||
sleep(15, cw)
|
||||
|
||||
if c or (ids_now and min(ids_now) > min(ids)):
|
||||
sd['count_empty'] = 0
|
||||
else:
|
||||
print_('empty')
|
||||
sd['count_empty'] += 1
|
||||
msg = '{} {} (tiktok_{}) - {}'.format(tr_('읽는 중...'), info.get('nickname'), info.get('uid'), len(info['items']))
|
||||
if cw:
|
||||
if not cw.alive:
|
||||
raise Exception('cw dead')
|
||||
cw.setTitle(msg)
|
||||
else:
|
||||
print(msg)
|
||||
return sd['count_empty'] > 4
|
||||
res = clf2.solve(url, session, cw, f=f, timeout=1800, show=True)
|
||||
|
||||
if not info['items']:
|
||||
raise Exception('no items')
|
||||
|
||||
return info
|
||||
|
||||
|
||||
|
||||
@try_n(2)
|
||||
def read_channel_legacy(url, session, cw=None):
|
||||
print_ = get_print(cw)
|
||||
html = downloader.read_html(url, session=session, headers=HDR)
|
||||
uid = re.find('//user/profile/([0-9]+)', html, err='no uid')
|
||||
secUid = re.find('"secUid" *: *"([^"]+?)"', html, err='no secUid')
|
||||
verifyFp = ''.join(random.choice('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789') for i in range(16))
|
||||
maxCursor = 0
|
||||
|
||||
info = {}
|
||||
info['items'] = []
|
||||
ids = set()
|
||||
|
||||
for i in range(100):
|
||||
url_api = 'https://t.tiktok.com/api/item_list/?count=30&id={uid}&type=1&secUid={secUid}&maxCursor={maxCursor}&minCursor=0&sourceType=8&appId=1180®ion=US&language=en&verifyFp={verifyFp}'.format(uid=uid, secUid=secUid, verifyFp=verifyFp, maxCursor=maxCursor)
|
||||
|
||||
js = 'window.byted_acrawler.sign({url:"{}"});'.replace('{}', url_api)
|
||||
print(js)
|
||||
for try_ in range(4):
|
||||
try:
|
||||
sign = devtools.eval_js(url, js, session)['output']
|
||||
break
|
||||
except Exception as e:
|
||||
print(e)
|
||||
e_ = e
|
||||
else:
|
||||
raise e_
|
||||
url_api += '&_signature=' + sign
|
||||
print_(url_api)
|
||||
|
||||
data_raw = downloader.read_html(url_api, url, session=session, headers=HDR)
|
||||
data = json.loads(data_raw)
|
||||
|
||||
items = []
|
||||
for item in data.get('items', []):
|
||||
id_video = item['id']
|
||||
if id_video in ids:
|
||||
print('duplicate:', id_video)
|
||||
continue
|
||||
ids.add(id_video)
|
||||
items.append(item)
|
||||
|
||||
if not items:
|
||||
print('no items')
|
||||
break
|
||||
|
||||
info['items'] += items
|
||||
|
||||
if i == 0:
|
||||
info['uid'] = items[0]['author']['uniqueId']
|
||||
info['nickname'] = items[0]['author']['nickname']
|
||||
|
||||
msg = '{} {} (tiktok_{}) - {}'.format(tr_('읽는 중...'), info['nickname'], info['uid'], len(info['items']))
|
||||
if cw:
|
||||
if not cw.alive:
|
||||
break
|
||||
|
||||
cw.setTitle(msg)
|
||||
else:
|
||||
print(msg)
|
||||
|
||||
if not data['hasMore']:
|
||||
break
|
||||
maxCursor = data['maxCursor']
|
||||
|
||||
if not info['items']:
|
||||
raise Exception('no items')
|
||||
|
||||
return info
|
|
@ -0,0 +1,100 @@
|
|||
#coding:utf8
|
||||
import downloader
|
||||
from utils import Soup, urljoin, Downloader, cut_pair, LazyUrl, clean_title
|
||||
from timee import sleep
|
||||
from translator import tr_
|
||||
from io import BytesIO
|
||||
import ree as re
|
||||
import os
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_tokyomotion(Downloader):
|
||||
type = 'tokyomotion'
|
||||
URLS = ['tokyomotion.net']
|
||||
single = True
|
||||
_type = None
|
||||
display_name = 'TOKYO Motion'
|
||||
|
||||
def init(self):
|
||||
html = downloader.read_html(self.url)
|
||||
self.soup = Soup(html)
|
||||
if '/album/' in self.url:
|
||||
self._type = 'album'
|
||||
else:
|
||||
self._type = 'video'
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
title = get_title(self.soup)
|
||||
return clean_title(title)
|
||||
|
||||
def read(self):
|
||||
if self._type == 'video':
|
||||
video = get_video(self.url, self.soup)
|
||||
self.urls.append(video.url)
|
||||
self.setIcon(video.thumb)
|
||||
elif self._type == 'album':
|
||||
imgs = get_imgs(self.url)
|
||||
for img in imgs:
|
||||
self.urls.append(img.url)
|
||||
self.single = False
|
||||
else:
|
||||
raise NotImplementedError('Unknown type: {}'.format(self._type))
|
||||
|
||||
self.title = self.name
|
||||
|
||||
|
||||
class Video(object):
|
||||
def __init__(self, url, url_thumb, referer, filename):
|
||||
self.url = LazyUrl(referer, lambda x: url, self)
|
||||
self.url_thumb = url_thumb
|
||||
self.thumb = BytesIO()
|
||||
downloader.download(url_thumb, referer=referer, buffer=self.thumb)
|
||||
self.filename = filename
|
||||
|
||||
|
||||
def get_title(soup):
|
||||
video = soup.find('video', id='vjsplayer')
|
||||
if video:
|
||||
title = soup.find('h3').text.strip()
|
||||
else:
|
||||
title = soup.find('title').text.split(' Album - ')[0].strip()
|
||||
return title
|
||||
|
||||
|
||||
def get_video(url, soup=None):
|
||||
if soup is None:
|
||||
html = downloader.read_html(url)
|
||||
soup = Soup(html)
|
||||
|
||||
video = soup.find('video', id='vjsplayer').find('source').attrs['src']
|
||||
url_thumb = soup.find('video', id='vjsplayer').attrs['poster']
|
||||
title = get_title(soup)
|
||||
filename = u'{}.mp4'.format(clean_title(title))
|
||||
video = Video(video, url_thumb, url, filename)
|
||||
return video
|
||||
|
||||
|
||||
class Image(object):
|
||||
def __init__(self, url, referer):
|
||||
self.url = LazyUrl(referer, lambda x: url, self)
|
||||
self.filename = os.path.basename(url.split('?')[0])
|
||||
|
||||
|
||||
def get_imgs(url):
|
||||
id = re.find('album/.*?([0-9]+)', url)
|
||||
print('id:', id)
|
||||
url = 'https://www.tokyomotion.net/album/slideshow/{}'.format(id)
|
||||
|
||||
html = downloader.read_html(url)
|
||||
soup = Soup(html)
|
||||
|
||||
imgs = []
|
||||
for a in soup.findAll('a', {'data-lightbox': 'slideshow-{}'.format(id)}):
|
||||
img = a.find('img').attrs['src']
|
||||
img = img.replace('/tmb/', '/')
|
||||
img = Image(img, url)
|
||||
imgs.append(img)
|
||||
|
||||
return imgs
|
|
@ -1,4 +1,4 @@
|
|||
from utils import Downloader, speed_text, clean_title
|
||||
from utils import Downloader, clean_title
|
||||
import constants, os, downloader
|
||||
from size import Size
|
||||
try:
|
||||
|
@ -54,9 +54,10 @@ class Downloader_torrent(Downloader):
|
|||
if not files:
|
||||
raise Exception('No files')
|
||||
cw.single = self.single = len(files) == 1
|
||||
for file in files:
|
||||
filename = os.path.join(self.dir, file)
|
||||
cw.imgs.append(filename)
|
||||
if not cw.imgs:
|
||||
for file in files:
|
||||
filename = os.path.join(self.dir, file)
|
||||
cw.imgs.append(filename)
|
||||
|
||||
def start_(self):
|
||||
cw = self.cw
|
||||
|
@ -81,8 +82,11 @@ class Downloader_torrent(Downloader):
|
|||
if cw.alive:
|
||||
cw.setSpeed('')
|
||||
if cw.pause_lock and cw.pbar.value() < cw.pbar.maximum():
|
||||
cw.pause_data = {'type': self.type, 'url': self.url,
|
||||
'filesize': self._filesize_prev}
|
||||
cw.pause_data = {
|
||||
'type': self.type,
|
||||
'url': self.url,
|
||||
'filesize': self._filesize_prev,
|
||||
}
|
||||
cw.paused = True
|
||||
cw.pause_lock = False
|
||||
self.update_tools_buttons()
|
||||
|
@ -110,8 +114,8 @@ class Downloader_torrent(Downloader):
|
|||
cw.dones.add(file)
|
||||
file = constants.compact(file).replace('\\', '/')
|
||||
files = file.split('/')
|
||||
file = (u' / ').join(files[1:])
|
||||
msg = (u'Completed: {}').format(file)
|
||||
file = ' / '.join(files[1:])
|
||||
msg = 'Completed: {}'.format(file)
|
||||
self.print_(msg)
|
||||
if i == 0:
|
||||
for try_ in range(4):
|
||||
|
@ -126,20 +130,20 @@ class Downloader_torrent(Downloader):
|
|||
downloader.total_download_size += d_size
|
||||
cw.pbar.setValue(s.progress * MAX_PBAR)
|
||||
if s.state_str == 'queued':
|
||||
title_ = (u'Waiting... {}').format(title)
|
||||
title_ = 'Waiting... {}'.format(title)
|
||||
elif s.state_str == 'checking files':
|
||||
title_ = (u'Checking files... {}').format(title)
|
||||
title_ = 'Checking files... {}'.format(title)
|
||||
self._filesize_prev = filesize
|
||||
elif s.state_str == 'downloading':
|
||||
title_ = (u'{} (p: {}, s: {})').format(title, s.num_peers, s.num_seeds)
|
||||
title_ = '{} (p: {}, s: {})'.format(title, s.num_peers, s.num_seeds)
|
||||
cw.setFileSize(filesize)
|
||||
text = self.size.speed_text()
|
||||
cw.setSpeed(text)
|
||||
elif s.state_str == 'seeding':
|
||||
title_ = (u'{}').format(title)
|
||||
title_ = '{}'.format(title)
|
||||
cw.setFileSize(filesize)
|
||||
else:
|
||||
title_ = (u'{}... {}').format(s.state_str.capitalize(), title)
|
||||
title_ = '{}... {}'.format(s.state_str.capitalize(), title)
|
||||
cw.setTitle(title_, update_filter=False)
|
||||
else:
|
||||
return 'abort'
|
||||
|
|
|
@ -0,0 +1,204 @@
|
|||
#coding:utf8
|
||||
import downloader
|
||||
from translator import tr_
|
||||
from utils import Soup, Session, query_url, get_max_range, Downloader, clean_title, update_url_query, get_print, get_ext, LazyUrl
|
||||
import ree as re
|
||||
import errors
|
||||
from ratelimit import limits, sleep_and_retry
|
||||
from error_printer import print_error
|
||||
|
||||
|
||||
class Image(object):
|
||||
|
||||
def __init__(self, url, id, p=0, cw=None):
|
||||
self._url = url
|
||||
self.id_ = id
|
||||
self.p = p
|
||||
self.cw = cw
|
||||
self.url = LazyUrl(url, self.get, self)
|
||||
|
||||
def get(self, _):
|
||||
print_ = get_print(self.cw)
|
||||
url = self._url
|
||||
ext = get_ext(url)
|
||||
if ext.lower() == '.gif':
|
||||
print_('get_ext: {}, {}'.format(self.id_, url))
|
||||
try:
|
||||
ext = downloader.get_ext(url)
|
||||
except Exception as e: #3235
|
||||
print_('Err: {}, {}\n'.format(self.id_, url)+print_error(e)[0])
|
||||
self.filename = '{}_p{}{}'.format(self.id_, self.p, ext)
|
||||
return url
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_tumblr(Downloader):
|
||||
type = 'tumblr'
|
||||
URLS = ['tumblr.com']
|
||||
|
||||
def init(self):
|
||||
if u'tumblr.com/post/' in self.url:
|
||||
return self.Invalid(tr_(u'개별 다운로드는 지원하지 않습니다: {}').format(self.url))
|
||||
self.session = Session()
|
||||
|
||||
@classmethod
|
||||
def fix_url(cls, url):
|
||||
id = get_id(url)
|
||||
return 'https://{}.tumblr.com'.format(id)
|
||||
|
||||
def read(self):
|
||||
username = get_id(self.url)
|
||||
name = get_name(username, self.session)
|
||||
|
||||
for img in get_imgs(username, self.session, cw=self.cw):
|
||||
self.urls.append(img.url)
|
||||
|
||||
self.title = clean_title('{} (tumblr_{})'.format(name, username))
|
||||
|
||||
|
||||
|
||||
class TumblrAPI(object):
|
||||
_url_base = 'https://www.tumblr.com/api'
|
||||
_hdr = {
|
||||
'referer': 'https://www.tumblr.com',
|
||||
'authorization': 'Bearer aIcXSOoTtqrzR8L8YEIOmBeW94c3FmbSNSWAUbxsny9KKx5VFh',
|
||||
}
|
||||
_qs = {
|
||||
'fields[blogs]': 'name,avatar,title,url,is_adult,?is_member,description_npf,uuid,can_be_followed,?followed,?advertiser_name,is_paywall_on,theme,subscription_plan,?primary,share_likes,share_following,can_subscribe,subscribed,ask,?can_submit,?is_blocked_from_primary,?tweet,?admin,can_message,?analytics_url,?top_tags,paywall_access',
|
||||
'npf': 'true',
|
||||
'reblog_info': 'false',
|
||||
'include_pinned_posts': 'false',
|
||||
#'page_number': None,
|
||||
}
|
||||
|
||||
def __init__(self, session, cw=None):
|
||||
self.session = session
|
||||
self.cw = cw
|
||||
|
||||
def print_(self, s):
|
||||
get_print(self.cw)(s)
|
||||
|
||||
@sleep_and_retry
|
||||
@limits(1, 1)
|
||||
def call(self, path, qs, default_qs=True):
|
||||
if default_qs:
|
||||
qs_new = qs
|
||||
qs = self._qs.copy()
|
||||
qs.update(qs_new)
|
||||
url = self._url_base + path
|
||||
url = update_url_query(url, qs)
|
||||
r = self.session.get(url, headers=self._hdr)
|
||||
data = r.json()
|
||||
errs = data.get('errors', [])
|
||||
if errs:
|
||||
code = int(errs[0]['code'])
|
||||
if code == 0:
|
||||
raise Exception('Not found')
|
||||
elif code == 4012:
|
||||
raise errors.LoginRequired(errs[0]['detail'])
|
||||
r.raise_for_status()
|
||||
return data['response']
|
||||
|
||||
def name(self, username):
|
||||
path = '/v2/blog/{}/posts'.format(username)
|
||||
data = self.call(path, {})
|
||||
return data['blog']['title'] or data['blog']['name']
|
||||
|
||||
def posts(self, username):
|
||||
path = '/v2/blog/{}/posts'.format(username)
|
||||
qs = {}
|
||||
ids = set()
|
||||
default_qs = True
|
||||
while True:
|
||||
if self.cw and not self.cw.alive:
|
||||
break
|
||||
data = self.call(path, qs, default_qs=default_qs)
|
||||
for post in data['posts']:
|
||||
id_ = post['id']
|
||||
if id_ in ids:
|
||||
self.print_('duplicate: {}'.format(id_))
|
||||
continue
|
||||
ids.add(id_)
|
||||
yield Post(post, self.cw)
|
||||
try:
|
||||
links = data.get('links') or data['_links']
|
||||
path_next = links['next']['href']
|
||||
except:
|
||||
path_next = None
|
||||
if path_next:
|
||||
path = path_next
|
||||
default_qs = False
|
||||
else:
|
||||
break
|
||||
|
||||
|
||||
class Post(object):
|
||||
|
||||
def __init__(self, data, cw=None):
|
||||
id_ = data['id']
|
||||
self.imgs = []
|
||||
|
||||
cs = data['content']
|
||||
for trail in data['trail']:
|
||||
cs += trail['content']
|
||||
|
||||
for c in cs:
|
||||
if c['type'] in ['image', 'video']:
|
||||
media = c.get('media')
|
||||
if not media: #2859
|
||||
continue
|
||||
if isinstance(media, list):
|
||||
media = media[0]
|
||||
img = media['url']
|
||||
self.imgs.append(Image(img, id_, len(self.imgs), cw))
|
||||
elif c['type'] in ['text', 'link', 'audio']:
|
||||
continue
|
||||
else:
|
||||
raise NotImplementedError(id_, c)
|
||||
|
||||
|
||||
|
||||
def get_name(username, session):
|
||||
return TumblrAPI(session).name(username)
|
||||
|
||||
|
||||
def get_imgs(username, session, cw=None):
|
||||
print_ = get_print(cw)
|
||||
artist = get_name(username, session)
|
||||
imgs = []
|
||||
error_count = 0
|
||||
max_pid = get_max_range(cw)
|
||||
api = TumblrAPI(session, cw)
|
||||
for post in api.posts(username):
|
||||
imgs += post.imgs
|
||||
|
||||
s = '{} {} (tumblr_{}) - {}'.format(tr_(u'\uc77d\ub294 \uc911...'), artist, username, len(imgs))
|
||||
if cw:
|
||||
if not cw.alive:
|
||||
return
|
||||
cw.setTitle(s)
|
||||
else:
|
||||
print(s)
|
||||
|
||||
if len(imgs) > max_pid:
|
||||
break
|
||||
|
||||
return imgs[:max_pid]
|
||||
|
||||
|
||||
def get_id(url):
|
||||
if '/dashboard/blog/' in url:
|
||||
url = re.find('/dashboard/blog/([0-9a-zA-Z_-]+)', url)
|
||||
if '/login_required/' in url:
|
||||
url = url.split('/login_required/')[1].split('?')[0].split('/')[0]
|
||||
if 'tumblr.com/blog/view/' in url:
|
||||
url = url.split('tumblr.com/blog/view/')[1]
|
||||
if 'tumblr.com' in url:
|
||||
if 'www.tumblr.com' in url:
|
||||
qs = query_url(url)
|
||||
url = qs.get('url', [url])[0]
|
||||
url = url.split('.tumblr.com')[0].split('/')[(-1)]
|
||||
if url == 'www':
|
||||
raise Exception('no id')
|
||||
return url
|
||||
|
|
@ -275,6 +275,7 @@ class TwitterAPI(object):
|
|||
return
|
||||
params["cursor"] = cursor
|
||||
if params.get("cursor") is None: # nothing
|
||||
print_('no cursor')
|
||||
break
|
||||
|
||||
|
||||
|
@ -328,7 +329,8 @@ def get_imgs(username, session, title, types, n=0, format='[%y-%m-%d] id_ppage',
|
|||
names[id_].append(name)
|
||||
else:
|
||||
names[id_] = [name]
|
||||
max_id = max(ids) if ids else 0
|
||||
ids_sure = sorted(ids)[:-100]
|
||||
max_id = max(ids_sure) if ids_sure else 0 #3201
|
||||
|
||||
# 2303
|
||||
imgs_old = []
|
||||
|
@ -341,23 +343,23 @@ def get_imgs(username, session, title, types, n=0, format='[%y-%m-%d] id_ppage',
|
|||
|
||||
imgs_new = []
|
||||
enough = False
|
||||
c_old = 0
|
||||
for tweet in TwitterAPI(session, cw).timeline_media(username):
|
||||
id_ = int(tweet['id_str'])
|
||||
if id_ < max_id:
|
||||
print_('enough')
|
||||
enough = True
|
||||
break
|
||||
|
||||
imgs_ = get_imgs_from_tweet(tweet, session, types, format, cw)
|
||||
|
||||
if id_ in ids:
|
||||
print_('duplicate: {}'.format(id_))
|
||||
c_old += 1
|
||||
continue
|
||||
ids.add(id_)
|
||||
|
||||
imgs_new += imgs_
|
||||
|
||||
if len(imgs_old) + len(imgs_new) >= n:
|
||||
imgs_new += get_imgs_from_tweet(tweet, session, types, format, cw)
|
||||
|
||||
if len(imgs_new) + c_old >= n: #3201
|
||||
break
|
||||
|
||||
msg = '{} {} - {}'.format(tr_('읽는 중...'), title, len(imgs_new))
|
||||
|
@ -368,7 +370,7 @@ def get_imgs(username, session, title, types, n=0, format='[%y-%m-%d] id_ppage',
|
|||
else:
|
||||
print(msg)
|
||||
|
||||
if not enough and not imgs_new:
|
||||
if not enough and not imgs_new and c_old == 0:
|
||||
raise Exception('no imgs')
|
||||
|
||||
imgs = sorted(imgs_old + imgs_new, key=lambda img: img.id, reverse=True)
|
||||
|
|
|
@ -0,0 +1,103 @@
|
|||
#coding:utf8
|
||||
from __future__ import division, print_function, unicode_literals
|
||||
import downloader
|
||||
from utils import Soup, get_ext, LazyUrl, Downloader, try_n, clean_title, get_print
|
||||
import ree as re
|
||||
from translator import tr_
|
||||
from timee import sleep
|
||||
import errors
|
||||
|
||||
|
||||
def setPage(url, p):
|
||||
url = url.split('?')[0]
|
||||
if p > 1:
|
||||
url += '?page={}'.format(p)
|
||||
return url
|
||||
|
||||
|
||||
def getPage(url):
|
||||
p = re.find('page=([0-9]+)', url)
|
||||
return int(p or 1)
|
||||
|
||||
|
||||
class Image(object):
|
||||
def __init__(self, url, referer, p):
|
||||
self.url = LazyUrl(referer, lambda x: url, self)
|
||||
ext = get_ext(url)
|
||||
self.filename = '{:04}{}'.format(p, ext)
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_v2ph(Downloader):
|
||||
type = 'v2ph'
|
||||
URLS = ['v2ph.com/album/']
|
||||
MAX_CORE = 4
|
||||
display_name = 'V2PH'
|
||||
|
||||
@classmethod
|
||||
def fix_url(cls, url):
|
||||
return url.split('?')[0]
|
||||
|
||||
def read(self):
|
||||
info = get_info(self.url)
|
||||
|
||||
for img in get_imgs(self.url, info['title'], self.cw):
|
||||
self.urls.append(img.url)
|
||||
|
||||
self.title = clean_title(info['title'])
|
||||
|
||||
|
||||
|
||||
@try_n(2)
|
||||
def get_info(url):
|
||||
html = downloader.read_html(url)
|
||||
soup = Soup(html)
|
||||
info = {}
|
||||
info['title'] = soup.find('h1').text.strip()
|
||||
return info
|
||||
|
||||
|
||||
def get_imgs(url, title, cw=None):
|
||||
print_ = get_print(cw)
|
||||
imgs = []
|
||||
|
||||
for p in range(1, 1001):
|
||||
url = setPage(url, p)
|
||||
print_(url)
|
||||
for try_ in range(4):
|
||||
try:
|
||||
html = downloader.read_html(url, user_agent=downloader.hdr['User-Agent'])
|
||||
#sleep(1)
|
||||
break
|
||||
except Exception as e:
|
||||
print(e)
|
||||
else:
|
||||
raise
|
||||
soup = Soup(html)
|
||||
|
||||
view = soup.find('div', class_='photos-list')
|
||||
if view is None:
|
||||
if p == 1:
|
||||
raise errors.LoginRequired()
|
||||
else:
|
||||
break # Guest user
|
||||
for img in view.findAll('img'):
|
||||
img = img.attrs['data-src']
|
||||
img = Image(img, url, len(imgs))
|
||||
imgs.append(img)
|
||||
|
||||
pgn = soup.find('ul', class_='pagination')
|
||||
ps = [getPage(a.attrs['href']) for a in pgn.findAll('a')]
|
||||
if p >= max(ps):
|
||||
print('max p')
|
||||
break
|
||||
|
||||
msg = '{} {} ({} / {})'.format(tr_('읽는 중...'), title, p, max(ps))
|
||||
if cw:
|
||||
cw.setTitle(msg)
|
||||
else:
|
||||
print(msg)
|
||||
|
||||
return imgs
|
||||
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
import downloader
|
||||
import ree as re
|
||||
from io import BytesIO as IO
|
||||
from error_printer import print_error
|
||||
from utils import Downloader, LazyUrl, get_ext, format_filename, try_n
|
||||
import ytdl
|
||||
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_vimeo(Downloader):
|
||||
type = 'vimeo'
|
||||
URLS = ['vimeo.com']
|
||||
single = True
|
||||
|
||||
def init(self):
|
||||
if 'vimeo.com' not in self.url.lower():
|
||||
self.url = u'https://vimeo.com/{}'.format(self.url)
|
||||
|
||||
def read(self):
|
||||
video = Video(self.url)
|
||||
video.url()#
|
||||
|
||||
self.urls.append(video.url)
|
||||
self.setIcon(video.thumb)
|
||||
|
||||
self.enableSegment()
|
||||
|
||||
self.title = video.title
|
||||
|
||||
|
||||
class Video(object):
|
||||
_url = None
|
||||
|
||||
def __init__(self, url):
|
||||
self.url = LazyUrl(url, self.get, self)
|
||||
|
||||
@try_n(4)
|
||||
def get(self, url):
|
||||
if self._url:
|
||||
return self._url
|
||||
|
||||
ydl = ytdl.YoutubeDL()
|
||||
info = ydl.extract_info(url)
|
||||
fs = [f for f in info['formats'] if f['protocol'] in ['http', 'https']]
|
||||
fs = sorted(fs, key=lambda f: int(f.get('width', 0)), reverse=True)
|
||||
if not fs:
|
||||
raise Exception('No MP4 videos')
|
||||
f = fs[0]
|
||||
self._url = f['url']
|
||||
|
||||
self.thumb_url = info['thumbnails'][0]['url']
|
||||
self.thumb = IO()
|
||||
downloader.download(self.thumb_url, buffer=self.thumb)
|
||||
self.title = info['title']
|
||||
ext = get_ext(self._url)
|
||||
self.filename = format_filename(self.title, info['id'], ext)
|
||||
return self._url
|
|
@ -0,0 +1,76 @@
|
|||
import downloader
|
||||
import ytdl
|
||||
from utils import Downloader, try_n, LazyUrl, get_ext, format_filename, clean_title
|
||||
from io import BytesIO
|
||||
import ree as re
|
||||
from m3u8_tools import M3u8_stream
|
||||
import os
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_vlive(Downloader):
|
||||
type = 'vlive'
|
||||
URLS = ['vlive.tv']
|
||||
single = True
|
||||
display_name = 'V LIVE'
|
||||
|
||||
def init(self):
|
||||
if 'channels.vlive.tv' in self.url:
|
||||
raise NotImplementedError('channel')
|
||||
|
||||
def read(self):
|
||||
video = get_video(self.url)
|
||||
|
||||
self.urls.append(video.url)
|
||||
|
||||
self.setIcon(video.thumb)
|
||||
self.enableSegment()
|
||||
|
||||
self.title = clean_title(video.title)
|
||||
|
||||
|
||||
@try_n(4)
|
||||
def get_video(url):
|
||||
options = {
|
||||
'noplaylist': True,
|
||||
}
|
||||
|
||||
ydl = ytdl.YoutubeDL(options)
|
||||
info = ydl.extract_info(url)
|
||||
|
||||
fs = []
|
||||
for f in info['formats']:
|
||||
if f['ext'] != 'mp4':
|
||||
continue
|
||||
f['quality'] = f.get('vbr') or re.find('([0-9]+)p', f['format'], re.IGNORECASE)
|
||||
print(f['format'], f['quality'])
|
||||
fs.append(f)
|
||||
|
||||
if not fs:
|
||||
raise Exception('No videos')
|
||||
|
||||
f = sorted(fs, key=lambda f:f['quality'])[-1]
|
||||
video = Video(f, info)
|
||||
|
||||
return video
|
||||
|
||||
|
||||
class Video(object):
|
||||
def __init__(self, f, info):
|
||||
self.title = title = info['title']
|
||||
self.id = info['id']
|
||||
self.url = f['url']
|
||||
|
||||
self.thumb = BytesIO()
|
||||
downloader.download(info['thumbnail'], buffer=self.thumb)
|
||||
|
||||
ext = get_ext(self.url)
|
||||
if ext.lower() == '.m3u8':
|
||||
raise NotImplementedError('stream')#
|
||||
url = M3u8_stream(self.url, n_thread=4)
|
||||
else:
|
||||
url = self.url
|
||||
self.url = LazyUrl(self.url, lambda x: url, self)
|
||||
self.filename = format_filename(title, self.id, ext)
|
||||
|
||||
|
|
@ -0,0 +1,147 @@
|
|||
import downloader
|
||||
from utils import Soup, LazyUrl, clean_title, get_ext, get_imgs_already, urljoin, try_n, Downloader
|
||||
import os
|
||||
import page_selector
|
||||
from translator import tr_
|
||||
import ree as re
|
||||
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_webtoon(Downloader):
|
||||
type = 'webtoon'
|
||||
URLS = ['webtoon.com', 'webtoons.com']
|
||||
MAX_CORE = 8
|
||||
MAX_SPEED = 4.0
|
||||
display_name = 'WEBTOON'
|
||||
|
||||
def init(self):
|
||||
self.url = get_main(self.url)
|
||||
self.soup = downloader.read_soup(self.url)
|
||||
|
||||
@classmethod
|
||||
def fix_url(cls, url):
|
||||
return url.replace('webtoon.com', 'webtoons.com')
|
||||
|
||||
def read(self):
|
||||
title = clean_title(self.soup.find('h1').text.strip())
|
||||
self.title = tr_(u'\uc77d\ub294 \uc911... {}').format(title)
|
||||
imgs = get_imgs_all(self.url, title, cw=self.cw)
|
||||
for img in imgs:
|
||||
if isinstance(img, Image):
|
||||
self.urls.append(img.url)
|
||||
else:
|
||||
self.urls.append(img)
|
||||
|
||||
self.title = title
|
||||
|
||||
|
||||
class Page(object):
|
||||
|
||||
def __init__(self, url, title):
|
||||
self.url = url
|
||||
self.title = title
|
||||
|
||||
|
||||
class Image(object):
|
||||
|
||||
def __init__(self, url, page, p):
|
||||
ext = get_ext(url) or downloader.get_ext(url, referer=page.url)
|
||||
self.filename = '{}/{:04}{}'.format(clean_title(page.title), p, ext)
|
||||
|
||||
self.url = LazyUrl(page.url, lambda _: url, self)
|
||||
|
||||
|
||||
@try_n(2)
|
||||
def get_imgs(page):
|
||||
html = downloader.read_html(page.url)
|
||||
if 'window.__motiontoonViewerState__' in html:
|
||||
raise NotImplementedError('motiontoon')
|
||||
soup = Soup(html)
|
||||
view = soup.find('div', class_='viewer_img')
|
||||
imgs = []
|
||||
for img in view.findAll('img'):
|
||||
src = img.get('data-url') or img['src']
|
||||
img = Image(urljoin(page.url, src), page, len(imgs))
|
||||
imgs.append(img)
|
||||
return imgs
|
||||
|
||||
|
||||
def get_main(url):
|
||||
if 'episode_no=' in url:
|
||||
soup = downloader.read_soup(url)
|
||||
url = urljoin(url, soup.find('div', class_='subj_info').find('a')['href'])
|
||||
return url
|
||||
|
||||
|
||||
def set_page(url, p):
|
||||
if '&page=' not in url:
|
||||
url = url + '&page={}'.format(p)
|
||||
else:
|
||||
url = re.sub('&page=[0-9]+', '&page={}'.format(p), url)
|
||||
if p == 1:
|
||||
url = url.replace('&page=1', '')
|
||||
return url
|
||||
|
||||
|
||||
def get_pages(url):
|
||||
pages = []
|
||||
urls = set()
|
||||
for p in range(1, 101):
|
||||
url_page = set_page(url, p)
|
||||
print(url_page)
|
||||
for try_ in range(4):
|
||||
try:
|
||||
soup = downloader.read_soup(url_page)
|
||||
view = soup.find('ul', id='_listUl')
|
||||
if view is None:
|
||||
raise Exception('no view')
|
||||
break
|
||||
except Exception as e:
|
||||
e_ = e
|
||||
print(e)
|
||||
else:
|
||||
raise e_
|
||||
pages_new = []
|
||||
for li in view.findAll('li', recursive=False):
|
||||
href = urljoin(url, li.find('a')['href'])
|
||||
title = li.find('span', class_='subj').text.strip()
|
||||
if href in urls:
|
||||
continue
|
||||
urls.add(href)
|
||||
no = int(li['data-episode-no'])
|
||||
title = '{:04} - {}'.format(no, title)
|
||||
page = Page(href, title)
|
||||
pages_new.append(page)
|
||||
if not pages_new:
|
||||
break
|
||||
pages += pages_new
|
||||
return pages[::-1]
|
||||
|
||||
|
||||
@page_selector.register('webtoon')
|
||||
@try_n(4)
|
||||
def f(url):
|
||||
url = get_main(url)
|
||||
return get_pages(url)
|
||||
|
||||
|
||||
def get_imgs_all(url, title, cw=None):
|
||||
pages = get_pages(url)
|
||||
pages = page_selector.filter(pages, cw)
|
||||
imgs = []
|
||||
for p, page in enumerate(pages):
|
||||
imgs_already = get_imgs_already('webtoon', title, page, cw)
|
||||
if imgs_already:
|
||||
imgs += imgs_already
|
||||
continue
|
||||
imgs += get_imgs(page)
|
||||
msg = tr_(u'\uc77d\ub294 \uc911... {} / {} ({}/{})').format(title, page.title, p + 1, len(pages))
|
||||
if cw is not None:
|
||||
cw.setTitle(msg)
|
||||
if not cw.alive:
|
||||
break
|
||||
else:
|
||||
print(msg)
|
||||
|
||||
return imgs
|
|
@ -0,0 +1,180 @@
|
|||
#coding:utf8
|
||||
import downloader
|
||||
import ree as re
|
||||
from timee import sleep, clock, time
|
||||
from constants import clean_url
|
||||
from utils import Downloader, urljoin, try_n, Session, get_print, clean_title, Soup, fix_protocol
|
||||
import os
|
||||
from translator import tr_
|
||||
import json
|
||||
from datetime import datetime
|
||||
import constants
|
||||
import clf2
|
||||
import errors
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_weibo(Downloader):
|
||||
type = 'weibo'
|
||||
URLS = ['weibo.com', 'weibo.cn']
|
||||
|
||||
def init(self):
|
||||
self.session = Session()
|
||||
|
||||
@classmethod
|
||||
def fix_url(cls, url):
|
||||
url = url.replace('weibo.cn', 'weibo.com').split('?')[0]
|
||||
if 'weibo.com/p/' in url:
|
||||
id = re.findall('weibo.com/p/([^/]+)', url)[0]
|
||||
url = 'https://weibo.com/p/{}'.format(id)
|
||||
elif 'weibo.com/u/' in url:
|
||||
id = re.findall('weibo.com/u/([^/]+)', url)[0]
|
||||
url = 'https://weibo.com/u/{}'.format(id)
|
||||
elif 'weibo.com/' in url:
|
||||
id = re.findall('weibo.com/([^/]+)', url)[0]
|
||||
url = 'https://weibo.com/{}'.format(id)
|
||||
else:
|
||||
id = url
|
||||
url = 'https://weibo.com/u/{}'.format(id)
|
||||
url = fix_protocol(url)
|
||||
return url
|
||||
|
||||
def read(self):
|
||||
checkLogin(self.session)
|
||||
|
||||
uid, oid, name = get_id(self.url, self.cw)
|
||||
title = clean_title('{} (weibo_{})'.format(name, uid))
|
||||
|
||||
for img in get_imgs(uid, oid, title, self.session, cw=self.cw, d=self, parent=self.mainWindow):
|
||||
self.urls.append(img.url)
|
||||
self.filenames[img.url] = img.filename
|
||||
|
||||
self.title = title
|
||||
|
||||
|
||||
def checkLogin(session):
|
||||
c = session.cookies._cookies.get('.weibo.com', {}).get('/',{}).get('SUBP')
|
||||
if not c or c.is_expired():
|
||||
raise errors.LoginRequired()
|
||||
|
||||
|
||||
class Album(object):
|
||||
|
||||
def __init__(self, id, type):
|
||||
self.id = id
|
||||
self.type = type
|
||||
|
||||
|
||||
class Image(object):
|
||||
|
||||
def __init__(self, url, filename=None, timestamp=0):
|
||||
self.url = url
|
||||
if filename is None:
|
||||
filename = os.path.basename(url)
|
||||
self.filename = filename
|
||||
self.timestamp = timestamp
|
||||
|
||||
|
||||
def _get_page_id(html):
|
||||
m = re.search("CONFIG\\['page_id'\\]='([0-9]+?)'", html)
|
||||
return m
|
||||
|
||||
|
||||
def get_id(url, cw=None):
|
||||
for try_ in range(2):
|
||||
try:
|
||||
res = clf2.solve(url, cw=cw, f=_get_page_id)
|
||||
html = res['html']
|
||||
soup = Soup(html)
|
||||
if soup.find('div', class_='gn_login'):
|
||||
raise errors.LoginRequired()
|
||||
m = _get_page_id(html)
|
||||
if not m:
|
||||
raise Exception('no page_id')
|
||||
oid = m.groups()[0]
|
||||
uids = re.findall('uid=([0-9]+)', html)
|
||||
uid = max(set(uids), key=uids.count)
|
||||
name = re.findall("CONFIG\\['onick'\\]='(.+?)'", html)[0]
|
||||
break
|
||||
except errors.LoginRequired as e:
|
||||
raise
|
||||
except Exception as e:
|
||||
e_ = e
|
||||
print(e)
|
||||
else:
|
||||
raise e_
|
||||
return uid, oid, name
|
||||
|
||||
|
||||
|
||||
def get_imgs(uid, oid, title, session, cw=None, d=None, parent=None):
|
||||
print_ = get_print(cw)
|
||||
print_('uid: {}, oid:{}'.format(uid, oid))
|
||||
|
||||
@try_n(4)
|
||||
def get_album_imgs(album, page):
|
||||
url = 'https://photo.weibo.com/photos/get_all?uid={}&album_id={}&count=30&page={}&type={}&__rnd={}'.format(uid, album.id, page, album.type, int(time()*1000))
|
||||
referer = 'https://photo.weibo.com/{}/talbum/index'.format(uid)
|
||||
html = downloader.read_html(url, referer, session=session, timeout=30)
|
||||
j = json.loads(html)
|
||||
data = j['data']
|
||||
imgs = []
|
||||
for photo in data['photo_list']:
|
||||
host = photo['pic_host']
|
||||
name = photo['pic_name']
|
||||
id = photo['photo_id']
|
||||
timestamp = photo['timestamp']
|
||||
date = datetime.fromtimestamp(timestamp)
|
||||
t = '{:02}-{:02}-{:02}'.format(date.year % 100, date.month, date.day)
|
||||
url = '{}/large/{}'.format(host, name)
|
||||
ext = os.path.splitext(name)[1]
|
||||
filename = '[{}] {}{}'.format(t, id, ext)
|
||||
img = Image(url, filename, timestamp)
|
||||
imgs.append(img)
|
||||
|
||||
return imgs
|
||||
|
||||
def get_albums(page):
|
||||
url = 'https://photo.weibo.com/albums/get_all?uid={}&page={}&count=20&__rnd={}'.format(uid, page, int(time()*1000))
|
||||
referer = 'https://photo.weibo.com/{}/albums?rd=1'.format(uid)
|
||||
html = downloader.read_html(url, referer, session=session)
|
||||
j = json.loads(html)
|
||||
data = j['data']
|
||||
albums = []
|
||||
for album in data['album_list']:
|
||||
id = album['album_id']
|
||||
type = album['type']
|
||||
album = Album(id, type)
|
||||
albums.append(album)
|
||||
|
||||
return albums
|
||||
|
||||
albums = []
|
||||
for p in range(1, 101):
|
||||
albums_new = get_albums(p)
|
||||
albums += albums_new
|
||||
print_('p:{}, albums:{}'.format(p, len(albums)))
|
||||
if not albums_new:
|
||||
break
|
||||
|
||||
imgs = []
|
||||
for album in albums:
|
||||
print('Album:', album.id, album.type)
|
||||
for p in range(1, 101):
|
||||
imgs_new = get_album_imgs(album, p)
|
||||
imgs += imgs_new
|
||||
s = u'{} {} - {}'.format(tr_(u'읽는 중...'), title, len(imgs))
|
||||
if cw:
|
||||
if not cw.alive:
|
||||
return []
|
||||
cw.setTitle(s)
|
||||
else:
|
||||
print(s)
|
||||
if not imgs_new:
|
||||
break
|
||||
sleep(1)
|
||||
|
||||
imgs = sorted(imgs, key=lambda img: img.timestamp, reverse=True)
|
||||
return imgs
|
||||
|
||||
|
Loading…
Reference in New Issue