^q^
This commit is contained in:
parent
9da624da56
commit
dce00852e6
|
@ -1,7 +1,7 @@
|
|||
#coding:utf8
|
||||
from __future__ import print_function
|
||||
import downloader
|
||||
from utils import Soup, cut_pair, LazyUrl, Downloader, get_print, get_max_range, try_n, clean_title
|
||||
from utils import Soup, cut_pair, LazyUrl, Downloader, get_print, get_max_range, try_n, clean_title, check_alive
|
||||
import json
|
||||
import ree as re
|
||||
import os
|
||||
|
@ -38,6 +38,13 @@ class Downloader_bcy(Downloader):
|
|||
self.artist = self.info['artist']
|
||||
|
||||
|
||||
def get_ssr_data(html):
|
||||
s = html.split('window.__ssr_data = JSON.parse("')[1].replace('\\"', '"')
|
||||
s = cut_pair(s).replace('"', '\\"')
|
||||
data = json.loads(json.loads('"{}"'.format(s)))
|
||||
return data
|
||||
|
||||
|
||||
@try_n(2)
|
||||
def get_imgs(url, html=None, cw=None):
|
||||
if '/detail/' not in url:
|
||||
|
@ -46,10 +53,7 @@ def get_imgs(url, html=None, cw=None):
|
|||
if html is None:
|
||||
html = downloader.read_html(url)
|
||||
|
||||
s = cut_pair(html.split('window.__ssr_data = JSON.parse("')[1])
|
||||
s = json.loads(u'"{}"'.format(s))
|
||||
|
||||
data = json.loads(s)
|
||||
data = get_ssr_data(html)
|
||||
|
||||
multi = data['detail']['post_data']['multi']
|
||||
|
||||
|
@ -103,9 +107,8 @@ def get_info(url, html):
|
|||
uname = soup.find('div', class_='user-name') or soup.find('p', class_='uname') or soup.find('div', class_='user-info-name')
|
||||
|
||||
info['artist'] = uname.text.strip()
|
||||
|
||||
s = cut_pair(html.split('window.__ssr_data = JSON.parse("')[1])
|
||||
j = json.loads(json.loads(u'"{}"'.format(s)))
|
||||
|
||||
j = get_ssr_data(html)
|
||||
|
||||
if '/detail/' in url:
|
||||
info['uid'] = j['detail']['detail_user']['uid']
|
||||
|
@ -137,11 +140,14 @@ def get_imgs_channel(url, html=None, cw=None):
|
|||
if not items:
|
||||
print('no items')
|
||||
break
|
||||
c = 0
|
||||
for item in items:
|
||||
check_alive(cw)
|
||||
id = item['item_detail']['item_id']
|
||||
if id in ids:
|
||||
print('duplicate')
|
||||
continue
|
||||
c += 1
|
||||
ids.add(id)
|
||||
url_single = u'https://bcy.net/item/detail/{}'.format(id)
|
||||
imgs_single = get_imgs(url_single, cw=cw)
|
||||
|
@ -151,14 +157,15 @@ def get_imgs_channel(url, html=None, cw=None):
|
|||
imgs.append(img)
|
||||
s = u'{} {} - {}'.format(tr_(u'읽는 중...'), info['artist'], min(len(imgs), max_pid))
|
||||
if cw:
|
||||
if not cw.alive:
|
||||
return
|
||||
cw.setTitle(s)
|
||||
else:
|
||||
print(s)
|
||||
|
||||
if len(imgs) >= max_pid:
|
||||
break
|
||||
if not c:
|
||||
print('not c')
|
||||
break
|
||||
if len(imgs) >= max_pid:
|
||||
print('over max_pid:', max_pid)
|
||||
break
|
||||
|
|
|
@ -1,227 +0,0 @@
|
|||
import downloader
|
||||
from utils import Soup, Session, LazyUrl, Downloader, try_n, get_imgs_already, clean_title, get_print, check_alive
|
||||
import json, os
|
||||
from timee import time, sleep
|
||||
import ree as re
|
||||
from translator import tr_
|
||||
import page_selector
|
||||
|
||||
|
||||
class NotPaidError(Exception): pass
|
||||
|
||||
|
||||
class Page(object):
|
||||
|
||||
def __init__(self, id, url, title, serviceType):
|
||||
self.id = id
|
||||
self.url = url
|
||||
self.title = title
|
||||
self.serviceType = serviceType
|
||||
|
||||
|
||||
class Image(object):
|
||||
|
||||
def __init__(self, url, page, p):
|
||||
self._url = url
|
||||
self.url = LazyUrl(page.url, self.get, self)
|
||||
ext = os.path.splitext(url.split('?')[0])[1]
|
||||
if ext.lower()[1:] not in ('jpg', 'jpeg', 'bmp', 'png', 'gif', 'webm', 'webp'):
|
||||
ext = '.jpg'
|
||||
self.filename = '{}/{:04}{}'.format(clean_title(page.title), p, ext)
|
||||
|
||||
def get(self, _):
|
||||
return self._url
|
||||
|
||||
|
||||
def get_id(url):
|
||||
if '/league/' in url:
|
||||
header = 'league_'
|
||||
else:
|
||||
header = ''
|
||||
body = re.find('/viewer/([0-9a-zA-Z_-]+)', url) or re.find('/view/([0-9a-zA-Z_-]+)', url)
|
||||
return header, body
|
||||
|
||||
|
||||
def header_to_type(header):
|
||||
if header == 'league_':
|
||||
return 'leaguetoon'
|
||||
return 'webtoon'
|
||||
|
||||
|
||||
def get_info(url, session):
|
||||
referer = url
|
||||
header, id = get_id(referer)
|
||||
type_ = header_to_type(header)
|
||||
|
||||
info = {}
|
||||
ids = set()
|
||||
pages = []
|
||||
for p in range(1, 1+10):
|
||||
if p == 1:
|
||||
url = 'http://webtoon.daum.net/data/pc/{}/view/{}?timeStamp={}'.format(type_, id, int(time()))
|
||||
else:
|
||||
if type_ == 'webtoon':
|
||||
break
|
||||
url = 'http://webtoon.daum.net/data/pc/{}/view/{}?page_no={}&timeStamp={}'.format(type_, id, p, int(time()))
|
||||
print(url)
|
||||
info_raw = downloader.read_html(url, referer=referer, session=session)
|
||||
_info = json.loads(info_raw)
|
||||
webtoon = _info['data'].get('webtoon') or _info['data'].get('leaguetoon')
|
||||
if webtoon is None:
|
||||
raise Exception('No webtoon')
|
||||
|
||||
if p == 1:
|
||||
info['title'] = webtoon['title']
|
||||
artists = []
|
||||
for artist in webtoon['cartoon']['artists']:
|
||||
artist = artist['penName']
|
||||
if artist in artists:
|
||||
continue
|
||||
artists.append(artist)
|
||||
|
||||
if len(artists) > 1:
|
||||
artists = [
|
||||
artists[1], artists[0]] + artists[2:]
|
||||
info['artists'] = artists
|
||||
|
||||
eps = webtoon.get('webtoonEpisodes') or webtoon.get('leaguetoonEpisodes')
|
||||
if not eps:
|
||||
if p > 1:
|
||||
eps = []
|
||||
else:
|
||||
raise Exception('No eps')
|
||||
c = 0
|
||||
for ep in eps:
|
||||
id_ = ep.get('articleId') or ep.get('id')
|
||||
title = ep['title']
|
||||
serviceType = 'free' if type_ =='leaguetoon' else ep['serviceType']
|
||||
if type_ == 'leaguetoon':
|
||||
url = 'http://webtoon.daum.net/league/viewer/{}'.format(id_)
|
||||
else:
|
||||
url = 'http://webtoon.daum.net/webtoon/viewer/{}'.format(id_)
|
||||
if id_ in ids:
|
||||
continue
|
||||
c += 1
|
||||
ids.add(id_)
|
||||
page = Page(id_, url, title, serviceType)
|
||||
pages.append(page)
|
||||
if c == 0:
|
||||
print('c == 0; break')
|
||||
break
|
||||
|
||||
info['pages'] = sorted(pages, key=lambda x: x.id)
|
||||
return info
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_daumtoon(Downloader):
|
||||
type = 'daumtoon'
|
||||
URLS = ['webtoon.daum.net']
|
||||
MAX_CORE = 16
|
||||
MAX_SPEED = 4.0
|
||||
display_name = 'Daum Webtoon'
|
||||
|
||||
def init(self):
|
||||
if '/viewer/' in self.url:
|
||||
return self.Invalid(tr_('목록 주소를 입력해주세요: {}').format(self.url))
|
||||
if '/view/' not in self.url and not self.url.lower().startswith('http'):
|
||||
self.url = 'http://webtoon.daum.net/webtoon/view/{}'.format(self.url)
|
||||
self.session = None
|
||||
self._info = get_info(self.url, self.session)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
title = self._info['title']
|
||||
artists = self._info['artists']
|
||||
artist = artists[0] if artists else 'N/A'
|
||||
title = self.format_title('N/A', ''.join(get_id(self.url)), title, artist, 'N/A', 'N/A', 'Korean', prefix='daumtoon_')
|
||||
return clean_title(title)
|
||||
|
||||
def read(self):
|
||||
self.title = tr_(u'\uc77d\ub294 \uc911... {}').format(self.name)
|
||||
imgs = get_imgs_all(self._info, self.name, self.session, cw=self.cw)
|
||||
for img in imgs:
|
||||
if isinstance(img, Image):
|
||||
self.urls.append(img.url)
|
||||
else:
|
||||
self.urls.append(img)
|
||||
|
||||
self.title = self.name
|
||||
self.session = None
|
||||
|
||||
|
||||
def get_imgs(page, session, cw):
|
||||
print_ = get_print(cw)
|
||||
|
||||
if not downloader.cookiejar.get('PROF', domain='.daum.net') and page.serviceType != 'free': #3314
|
||||
raise NotPaidError()
|
||||
|
||||
html = downloader.read_html(page.url, session=session)
|
||||
header, id = get_id(page.url)
|
||||
t = int(time())
|
||||
soup = Soup(html)
|
||||
type_ = header_to_type(header)
|
||||
|
||||
url_data = 'http://webtoon.daum.net/data/pc/{}/viewer/{}?timeStamp={}'.format(type_, id, t)
|
||||
data_raw = downloader.read_html(url_data, session=session, referer=page.url)
|
||||
data = json.loads(data_raw)
|
||||
if header == 'league_':
|
||||
m_type = None
|
||||
else:
|
||||
m_type = data['data']['webtoonEpisode']['multiType']
|
||||
print_('m_type: {}'.format(m_type))
|
||||
|
||||
if m_type == 'chatting':
|
||||
page.url = page.url.replace('daum.net/', 'daum.net/m/')
|
||||
url_data = 'http://webtoon.daum.net/data/mobile/{}/viewer?id={}&{}'.format(type_, id, t)
|
||||
data_raw = downloader.read_html(url_data, session=session, referer=page.url)
|
||||
data = json.loads(data_raw)
|
||||
imgs = []
|
||||
for chat in data['data']['webtoonEpisodeChattings']:
|
||||
img = chat.get('image')
|
||||
if not img:
|
||||
continue
|
||||
img = Image(img['url'], page, len(imgs))
|
||||
imgs.append(img)
|
||||
else:
|
||||
url_data = 'http://webtoon.daum.net/data/pc/{}/viewer_images/{}?timeStamp={}'.format(type_, id, t)
|
||||
data_raw = downloader.read_html(url_data, session=session, referer=page.url)
|
||||
data = json.loads(data_raw)
|
||||
if not data.get('data'):
|
||||
raise NotPaidError()
|
||||
imgs = []
|
||||
for img in data['data']:
|
||||
img = Image(img['url'], page, len(imgs))
|
||||
imgs.append(img)
|
||||
|
||||
return imgs
|
||||
|
||||
|
||||
def get_imgs_all(info, title, session, cw=None):
|
||||
print_ = get_print(cw)
|
||||
pages = info['pages']
|
||||
pages = page_selector.filter(pages, cw)
|
||||
imgs = []
|
||||
for p, page in enumerate(pages):
|
||||
imgs_already = get_imgs_already('daumtoon', title, page, cw)
|
||||
if imgs_already:
|
||||
imgs += imgs_already
|
||||
continue
|
||||
try:
|
||||
imgs += get_imgs(page, session, cw)
|
||||
except NotPaidError:
|
||||
print_('Not paid: {}'.format(page.title)) #3314
|
||||
continue
|
||||
if cw is not None:
|
||||
cw.setTitle(tr_(u'\uc77d\ub294 \uc911... {} / {} ({}/{})').format(title, page.title, p + 1, len(pages)))
|
||||
check_alive(cw)
|
||||
|
||||
return imgs
|
||||
|
||||
|
||||
@page_selector.register('daumtoon')
|
||||
@try_n(4)
|
||||
def f(url):
|
||||
info = get_info(url, None)
|
||||
return info['pages']
|
||||
|
|
@ -1,101 +0,0 @@
|
|||
import downloader
|
||||
from utils import Soup, try_n, LazyUrl, Downloader, lock, get_print, clean_title
|
||||
from timee import sleep
|
||||
import base64
|
||||
import json
|
||||
import constants
|
||||
import ree as re
|
||||
KEY = b'gefdzfdef'
|
||||
|
||||
|
||||
@Downloader.register
|
||||
class Downloader_epio(Downloader):
|
||||
type = 'epio'
|
||||
URLS = ['epio.app']
|
||||
|
||||
def read(self):
|
||||
info = get_info(self.url, cw=self.cw)
|
||||
|
||||
imgs = info['imgs']
|
||||
|
||||
for img in imgs:
|
||||
self.urls.append(img.url)
|
||||
|
||||
self.title = clean_title(info['title'])
|
||||
|
||||
|
||||
class Image(object):
|
||||
|
||||
def __init__(self, url, referer, p):
|
||||
self._url = url
|
||||
self.url = LazyUrl(referer, self.get, self)
|
||||
ext = '.jpg'#
|
||||
self.filename = u'{:04}{}'.format(p, ext)
|
||||
|
||||
def get(self, referer):
|
||||
return self._url
|
||||
|
||||
|
||||
def get_info(url, cw=None):
|
||||
info = _get_info(url, cw)
|
||||
|
||||
imgs = []
|
||||
html = info['content']
|
||||
soup = Soup(html)
|
||||
for img in soup.findAll('img'):
|
||||
src = img.attrs.get('src')
|
||||
if not src:
|
||||
continue
|
||||
|
||||
# 1696
|
||||
if not isinstance(src, bytes):
|
||||
src = src.encode('utf8')
|
||||
t = base64.b64encode(src)
|
||||
if isinstance(t, bytes):
|
||||
t = t.decode('utf8')
|
||||
src = 'https://cdn1-images.epio.app/image/download/{}'.format(t)
|
||||
|
||||
img = Image(src, url, len(imgs))
|
||||
imgs.append(img)
|
||||
info['imgs'] = imgs
|
||||
|
||||
return info
|
||||
|
||||
|
||||
def get_id(url):
|
||||
return re.find('article/detail/([0-9a-z]+)', url)
|
||||
|
||||
|
||||
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
|
||||
from cryptography.hazmat.backends import default_backend
|
||||
import aes
|
||||
backend = default_backend()
|
||||
def decrypt(s, cw=None):
|
||||
print_ = get_print(cw)
|
||||
key, iv = aes.key_and_iv(s[:16], KEY)
|
||||
print_('key: {}\niv: {}'.format(key, iv))
|
||||
cipher = Cipher(algorithms.AES(key), modes.CBC(iv), backend=backend)
|
||||
r = -len(s) % 16
|
||||
if r:
|
||||
s += b'\x00' * r
|
||||
dec = cipher.decryptor()
|
||||
s_dec = dec.update(s[16:]) + dec.finalize()
|
||||
s_dec = s_dec[:-s_dec[-1]]
|
||||
if r:
|
||||
s_dec = s_dec[:-r]
|
||||
return s_dec
|
||||
|
||||
|
||||
|
||||
def _get_info(url, cw=None):
|
||||
id = get_id(url)
|
||||
|
||||
url_api = 'https://girlimg.epio.app/api/articles/{}?lang=en-us'.format(id)
|
||||
html = downloader.read_html(url_api, referer=url)
|
||||
s = json.loads(html)['string']
|
||||
|
||||
s = base64.b64decode(s)
|
||||
s = decrypt(s, cw)
|
||||
info = json.loads(s)
|
||||
|
||||
return info
|
|
@ -1,10 +1,10 @@
|
|||
from utils import Downloader, LazyUrl, clean_title
|
||||
import utils
|
||||
from m3u8_tools import playlist2stream, M3u8_stream
|
||||
import os
|
||||
from hashlib import md5
|
||||
from translator import tr_
|
||||
import ffmpeg
|
||||
##DEFAULT_N_THREAD = 1
|
||||
DEFAULT_N_THREAD = 2
|
||||
|
||||
|
||||
@Downloader.register
|
||||
|
@ -21,16 +21,19 @@ class Downloader_m3u8(Downloader):
|
|||
return url
|
||||
|
||||
def read(self):
|
||||
## n_thread = self.cw.format or DEFAULT_N_THREAD
|
||||
## self.print_('n_thread: {}'.format(n_thread))
|
||||
video = Video(self.url, self.cw)
|
||||
n_thread = self.cw.format or DEFAULT_N_THREAD
|
||||
self.print_('n_thread: {}'.format(n_thread))
|
||||
video = Video(self.url, n_thread)
|
||||
self.urls.append(video.url)
|
||||
self.title = '{} ({})'.format(video.title, video.id_)
|
||||
|
||||
|
||||
class Video(object):
|
||||
def __init__(self, url, cw):
|
||||
m = ffmpeg.Stream(url, cw=cw)
|
||||
def __init__(self, url, n_thread):
|
||||
try:
|
||||
m = playlist2stream(url, n_thread=n_thread)
|
||||
except:
|
||||
m = M3u8_stream(url, n_thread=n_thread)
|
||||
self.url = LazyUrl(url, lambda _: m, self)
|
||||
self.title = os.path.splitext(os.path.basename(url))[0]
|
||||
self.id_ = md5(url.encode('utf8')).hexdigest()[:8]
|
||||
|
@ -38,14 +41,14 @@ class Video(object):
|
|||
self.filename = clean_title(self.title, n=-len(tail)) + tail
|
||||
|
||||
|
||||
##import selector
|
||||
##@selector.options('m3u8')
|
||||
##def options():
|
||||
## def f(urls):
|
||||
## n_thread, ok = utils.QInputDialog.getInt(Downloader.mainWindow, tr_('Set number of threads'), tr_('Number of threads?'), value=DEFAULT_N_THREAD, min=1, max=4, step=1)
|
||||
## if not ok:
|
||||
## return
|
||||
## return n_thread
|
||||
## return [
|
||||
## {'text': 'Set number of threads...', 'format': f},
|
||||
## ]
|
||||
import selector
|
||||
@selector.options('m3u8')
|
||||
def options():
|
||||
def f(urls):
|
||||
n_thread, ok = utils.QInputDialog.getInt(Downloader.mainWindow, tr_('Set number of threads'), tr_('Number of threads?'), value=DEFAULT_N_THREAD, min=1, max=4, step=1)
|
||||
if not ok:
|
||||
return
|
||||
return n_thread
|
||||
return [
|
||||
{'text': 'Set number of threads...', 'format': f},
|
||||
]
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#coding:utf8
|
||||
import downloader
|
||||
from utils import Soup, urljoin, LazyUrl, Downloader, query_url, try_n, Session, get_print, clean_title
|
||||
from utils import Soup, urljoin, LazyUrl, Downloader, query_url, try_n, Session, get_print, clean_title, get_ext
|
||||
import os
|
||||
from translator import tr_
|
||||
from timee import sleep
|
||||
|
@ -10,16 +10,17 @@ import clf2#
|
|||
|
||||
|
||||
class Image(object):
|
||||
def __init__(self, url, p, page):
|
||||
ext = os.path.splitext(url)[1]
|
||||
if ext.lower()[1:] not in ['jpg', 'jpeg', 'bmp', 'png', 'gif', 'webm', 'webp']:
|
||||
ext = '.jpg'
|
||||
self.filename = u'{:04}{}'.format(p, ext)
|
||||
def __init__(self, url, p, page, cw):
|
||||
self.cw = cw
|
||||
ext = get_ext(url)
|
||||
self.filename = '{:04}{}'.format(p, ext)
|
||||
if page.title is not None:
|
||||
self.filename = u'{}/{}'.format(page.title, self.filename)
|
||||
def f(_):
|
||||
return url
|
||||
self.url = LazyUrl(page.url, f, self)
|
||||
self.filename = '{}/{}'.format(page.title, self.filename)
|
||||
self._url = url
|
||||
self.url = LazyUrl(page.url, self.get, self)
|
||||
|
||||
def get(self, _):
|
||||
return self._url#'tmp://' + clf2.download(self._url, cw=self.cw)
|
||||
|
||||
|
||||
class Page(object):
|
||||
|
@ -36,7 +37,7 @@ class Downloader_mrm(Downloader):
|
|||
type = 'mrm'
|
||||
URLS = ['myreadingmanga.info']
|
||||
_soup = None
|
||||
MAX_CORE = 16
|
||||
MAX_CORE = 4
|
||||
display_name = 'MyReadingManga'
|
||||
|
||||
def init(self):
|
||||
|
@ -67,7 +68,7 @@ class Downloader_mrm(Downloader):
|
|||
return title
|
||||
|
||||
def read(self):
|
||||
self.title = u'읽는 중... {}'.format(self.name)
|
||||
self.title = '읽는 중... {}'.format(self.name)
|
||||
|
||||
imgs = get_imgs(self.url, self.soup, self.session, self.cw)
|
||||
|
||||
|
@ -95,12 +96,12 @@ def get_imgs(url, soup=None, session=None, cw=None):
|
|||
|
||||
if pagination is None:
|
||||
page = Page(None, url, soup)
|
||||
imgs = get_imgs_page(page, session=session)
|
||||
imgs = get_imgs_page(page, session=session, cw=cw)
|
||||
else:
|
||||
pages = get_pages(url, soup, session=session)
|
||||
imgs = []
|
||||
for i, page in enumerate(pages):
|
||||
s = u'{} {} / {} ({} / {})'.format(tr_(u'읽는 중...'), title, page.title, i+1, len(pages))
|
||||
s = '{} {} / {} ({} / {})'.format(tr_('읽는 중...'), title, page.title, i+1, len(pages))
|
||||
|
||||
if cw:
|
||||
if not cw.alive:
|
||||
|
@ -109,7 +110,7 @@ def get_imgs(url, soup=None, session=None, cw=None):
|
|||
else:
|
||||
print(s)
|
||||
|
||||
imgs += get_imgs_page(page, session=session)
|
||||
imgs += get_imgs_page(page, session=session, cw=cw)
|
||||
|
||||
if not imgs:
|
||||
raise Exception('no imgs')
|
||||
|
@ -149,7 +150,7 @@ def get_pages(url, soup=None, session=None):
|
|||
|
||||
|
||||
@try_n(4)
|
||||
def get_imgs_page(page, session=None):
|
||||
def get_imgs_page(page, session=None, cw=None):
|
||||
url = page.url
|
||||
soup = page.soup
|
||||
if soup is None:
|
||||
|
@ -165,7 +166,7 @@ def get_imgs_page(page, session=None):
|
|||
if img is None:
|
||||
continue
|
||||
img = urljoin(url, img)
|
||||
img = Image(img, len(imgs), page)
|
||||
img = Image(img, len(imgs), page, cw)
|
||||
imgs.append(img)
|
||||
print(page.title, len(imgs), page.url)
|
||||
|
||||
|
|
|
@ -68,7 +68,8 @@ class Downloader_syosetu(Downloader):
|
|||
|
||||
title, self.artist = get_title_artist(soup)
|
||||
self.__title = title
|
||||
title_dir = clean_title((u'[{}] {}').format(self.artist, title))
|
||||
ncode = re.find(r'syosetu.com/([^/]+)', self.url, err='no ncode') #3938
|
||||
title_dir = clean_title('[{}] {} ({})'.format(self.artist, title, ncode))
|
||||
ex = soup.find('div', id='novel_ex')
|
||||
self.novel_ex = ex.text.strip() if ex else None
|
||||
texts = []
|
||||
|
|
|
@ -1,19 +1,15 @@
|
|||
from __future__ import division, print_function, unicode_literals
|
||||
import downloader
|
||||
import ree as re
|
||||
from utils import urljoin, Soup, LazyUrl, Downloader, try_n, compatstr, get_print, clean_title, Session, get_max_range
|
||||
import os
|
||||
import json
|
||||
import ast
|
||||
from utils import Soup, LazyUrl, Downloader, try_n, compatstr, get_print, clean_title, Session, get_max_range, format_filename
|
||||
from io import BytesIO
|
||||
import random
|
||||
import clf2
|
||||
from translator import tr_
|
||||
from timee import sleep
|
||||
from error_printer import print_error
|
||||
import devtools
|
||||
HDR = {'User-Agent': downloader.hdr['User-Agent']}
|
||||
import ytdl
|
||||
PATTERN_VID = '/(v|video)/(?P<id>[0-9]+)'
|
||||
SHOW = True
|
||||
|
||||
|
||||
def is_captcha(soup):
|
||||
|
@ -79,24 +75,17 @@ class Video(object):
|
|||
id = m.group('id')
|
||||
ext = '.mp4'
|
||||
self.title = id#
|
||||
self.filename = '{}{}'.format(clean_title(self.title, n=-len(ext)), ext)
|
||||
self.filename = format_filename(self.title, id, ext)
|
||||
|
||||
html = downloader.read_html(url, session=self.session)
|
||||
soup = Soup(html)
|
||||
data = soup.find(id='__NEXT_DATA__')
|
||||
props = data.contents[0]
|
||||
data_encode = json.dumps(props)
|
||||
ast_le = ast.literal_eval(data_encode)
|
||||
data = json.loads(ast_le)
|
||||
ydl = ytdl.YoutubeDL()
|
||||
info = ydl.extract_info(url)
|
||||
|
||||
#info = data['props']['pageProps']['videoData']['itemInfos']
|
||||
info = data['props']['pageProps']['itemInfo']['itemStruct']
|
||||
self._url = info['video']['downloadAddr']
|
||||
|
||||
self.url_thumb = info['video']['cover']
|
||||
self.url_thumb = info['thumbnail']
|
||||
self.thumb = BytesIO()
|
||||
downloader.download(self.url_thumb, referer=url, buffer=self.thumb)
|
||||
|
||||
self._url = info['url']
|
||||
|
||||
return self._url
|
||||
|
||||
|
||||
|
@ -117,21 +106,30 @@ def read_channel(url, session, cw=None):
|
|||
|
||||
def f(html, browser=None):
|
||||
soup = Soup(html)
|
||||
if is_captcha(soup):
|
||||
print('captcha')
|
||||
browser.show()
|
||||
sd['shown'] = True
|
||||
elif sd['shown']:
|
||||
browser.hide()
|
||||
sd['shown'] = False
|
||||
if not SHOW:
|
||||
if is_captcha(soup):
|
||||
print('captcha')
|
||||
browser.show()
|
||||
sd['shown'] = True
|
||||
elif sd['shown']:
|
||||
browser.hide()
|
||||
sd['shown'] = False
|
||||
try:
|
||||
info['uid'] = soup.find('h2', class_='share-title').text.strip()
|
||||
info['nickname'] = soup.find('h1', class_='share-sub-title').text.strip()
|
||||
st = soup.find('h2', class_='share-title')
|
||||
if st is None:
|
||||
st = soup.find('h2', class_=lambda c: c and 'ShareTitle' in c)
|
||||
info['uid'] = st.text.strip()
|
||||
st = soup.find('h1', class_='share-sub-title')
|
||||
if st is None:
|
||||
st = soup.find('h1', class_=lambda c: c and 'ShareSubTitle' in c)
|
||||
info['nickname'] = st.text.strip()
|
||||
except Exception as e:
|
||||
print_(print_error(e)[0])
|
||||
print_(info)
|
||||
c = 0
|
||||
ids_now = set()
|
||||
for div in soup.findAll('div', class_='video-feed-item'):
|
||||
items = soup.findAll('div', class_='video-feed-item') + soup.findAll('div', class_=lambda c: c and 'DivItemContainer' in c)
|
||||
for div in items:
|
||||
a = div.find('a')
|
||||
if a is None:
|
||||
continue
|
||||
|
@ -170,81 +168,10 @@ def read_channel(url, session, cw=None):
|
|||
else:
|
||||
print(msg)
|
||||
return sd['count_empty'] > 4
|
||||
res = clf2.solve(url, session, cw, f=f, timeout=1800, show=True, delay=0)
|
||||
res = clf2.solve(url, session, cw, f=f, timeout=1800, show=SHOW, delay=0)
|
||||
|
||||
if not info['items']:
|
||||
raise Exception('no items')
|
||||
|
||||
return info
|
||||
|
||||
|
||||
|
||||
@try_n(2)
|
||||
def read_channel_legacy(url, session, cw=None):
|
||||
print_ = get_print(cw)
|
||||
html = downloader.read_html(url, session=session, headers=HDR)
|
||||
uid = re.find('//user/profile/([0-9]+)', html, err='no uid')
|
||||
secUid = re.find('"secUid" *: *"([^"]+?)"', html, err='no secUid')
|
||||
verifyFp = ''.join(random.choice('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789') for i in range(16))
|
||||
maxCursor = 0
|
||||
|
||||
info = {}
|
||||
info['items'] = []
|
||||
ids = set()
|
||||
|
||||
for i in range(100):
|
||||
url_api = 'https://t.tiktok.com/api/item_list/?count=30&id={uid}&type=1&secUid={secUid}&maxCursor={maxCursor}&minCursor=0&sourceType=8&appId=1180®ion=US&language=en&verifyFp={verifyFp}'.format(uid=uid, secUid=secUid, verifyFp=verifyFp, maxCursor=maxCursor)
|
||||
|
||||
js = 'window.byted_acrawler.sign({url:"{}"});'.replace('{}', url_api)
|
||||
print(js)
|
||||
for try_ in range(4):
|
||||
try:
|
||||
sign = devtools.eval_js(url, js, session)['output']
|
||||
break
|
||||
except Exception as e:
|
||||
print(e)
|
||||
e_ = e
|
||||
else:
|
||||
raise e_
|
||||
url_api += '&_signature=' + sign
|
||||
print_(url_api)
|
||||
|
||||
data_raw = downloader.read_html(url_api, url, session=session, headers=HDR)
|
||||
data = json.loads(data_raw)
|
||||
|
||||
items = []
|
||||
for item in data.get('items', []):
|
||||
id_video = item['id']
|
||||
if id_video in ids:
|
||||
print('duplicate:', id_video)
|
||||
continue
|
||||
ids.add(id_video)
|
||||
items.append(item)
|
||||
|
||||
if not items:
|
||||
print('no items')
|
||||
break
|
||||
|
||||
info['items'] += items
|
||||
|
||||
if i == 0:
|
||||
info['uid'] = items[0]['author']['uniqueId']
|
||||
info['nickname'] = items[0]['author']['nickname']
|
||||
|
||||
msg = '{} {} (tiktok_{}) - {}'.format(tr_('읽는 중...'), info['nickname'], info['uid'], len(info['items']))
|
||||
if cw:
|
||||
if not cw.alive:
|
||||
break
|
||||
|
||||
cw.setTitle(msg)
|
||||
else:
|
||||
print(msg)
|
||||
|
||||
if not data['hasMore']:
|
||||
break
|
||||
maxCursor = data['maxCursor']
|
||||
|
||||
if not info['items']:
|
||||
raise Exception('no items')
|
||||
|
||||
return info
|
||||
|
|
|
@ -3,7 +3,6 @@ from __future__ import division, print_function, unicode_literals
|
|||
import downloader
|
||||
from utils import Downloader, Session, LazyUrl, get_ext, try_n, Soup, get_print, update_url_query, urljoin, try_n, get_max_range, get_outdir, clean_title, lock, check_alive, check_alive_iter, SkipCounter
|
||||
from timee import time, sleep
|
||||
import hashlib
|
||||
import json
|
||||
import ree as re
|
||||
from datetime import datetime, timedelta
|
||||
|
@ -12,14 +11,7 @@ from error_printer import print_error
|
|||
import os
|
||||
import ytdl
|
||||
import ffmpeg
|
||||
import random
|
||||
from m3u8_tools import M3u8_stream
|
||||
import urllib
|
||||
from ratelimit import limits, sleep_and_retry
|
||||
try:
|
||||
from urllib import quote # python2
|
||||
except:
|
||||
from urllib.parse import quote # python3
|
||||
import options
|
||||
AUTH = "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
|
||||
UA = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"
|
||||
|
@ -593,12 +585,21 @@ class Image(object):
|
|||
d = ytdl.YoutubeDL(cw=self.cw)
|
||||
info = d.extract_info(self._url)
|
||||
|
||||
url = info['url']
|
||||
fs = info['formats']
|
||||
for f in fs:
|
||||
print_('{} {} - {}'.format(f['height'], f['protocol'], f['url']))
|
||||
def key(f):
|
||||
h = f['height']
|
||||
if not f['protocol'].startswith('http'):
|
||||
h -= .1
|
||||
return h
|
||||
f = sorted(fs, key=key)[-1]
|
||||
url = f['url']
|
||||
ext = get_ext(url)
|
||||
self.ext = ext
|
||||
print_('get_video: {} {}'.format(url, ext))
|
||||
if ext.lower() == '.m3u8':
|
||||
url = M3u8_stream(url, n_thread=self.n_thread, post_processing=True)
|
||||
url = ffmpeg.Stream(url, cw=self.cw)
|
||||
self._url_cache = url
|
||||
return url
|
||||
except Exception as e:
|
||||
|
|
|
@ -33,19 +33,18 @@ class Downloader_weibo(Downloader):
|
|||
def fix_url(cls, url):
|
||||
url = url.replace('weibo.cn', 'weibo.com').split('?')[0]
|
||||
if 'weibo.com/p/' in url:
|
||||
id = re.findall('weibo.com/p/([^/]+)', url)[0]
|
||||
id = re.find(r'weibo.com/p/([^/]+)', url, err='no id')
|
||||
url = 'https://weibo.com/p/{}'.format(id)
|
||||
elif 'weibo.com/u/' in url:
|
||||
id = re.findall('weibo.com/u/([^/]+)', url)[0]
|
||||
id = re.find(r'weibo.com/u/([^/]+)', url, err='no id')
|
||||
url = 'https://weibo.com/u/{}'.format(id)
|
||||
elif 'weibo.com/' in url:
|
||||
id = re.findall('weibo.com/([^/]+)', url)[0]
|
||||
id = re.find(r'weibo.com/([^/]+)', url, err='no id')
|
||||
url = 'https://weibo.com/{}'.format(id)
|
||||
else:
|
||||
id = url
|
||||
url = 'https://weibo.com/u/{}'.format(id)
|
||||
url = fix_protocol(url)
|
||||
return url
|
||||
return fix_protocol(url)
|
||||
|
||||
def read(self):
|
||||
checkLogin(self.session)
|
||||
|
@ -84,8 +83,7 @@ class Image(object):
|
|||
|
||||
|
||||
def _get_page_id(html):
|
||||
m = re.search("CONFIG\\['page_id'\\]='([0-9]+?)'", html)
|
||||
return m
|
||||
return re.find(r"CONFIG\['page_id'\]='([0-9]+)'", html) or re.find(r'/u/page/follow/([0-9]+)', html)
|
||||
|
||||
|
||||
def get_id(url, cw=None):
|
||||
|
@ -96,13 +94,14 @@ def get_id(url, cw=None):
|
|||
soup = Soup(html)
|
||||
if soup.find('div', class_='gn_login'):
|
||||
raise errors.LoginRequired()
|
||||
m = _get_page_id(html)
|
||||
if not m:
|
||||
oid = _get_page_id(html)
|
||||
if not oid:
|
||||
raise Exception('no page_id')
|
||||
oid = m.groups()[0]
|
||||
uids = re.findall('uid=([0-9]+)', html)
|
||||
uids = re.findall(r'uid=([0-9]+)', html)
|
||||
uid = max(set(uids), key=uids.count)
|
||||
name = re.findall("CONFIG\\['onick'\\]='(.+?)'", html)[0]
|
||||
name = re.find(r"CONFIG\['onick'\]='(.+?)'", html) or soup.find('div', class_=lambda c:c and c.startswith('ProfileHeader_name')).text.strip()
|
||||
if not name:
|
||||
raise Exception('no name')
|
||||
break
|
||||
except errors.LoginRequired as e:
|
||||
raise
|
||||
|
@ -144,6 +143,7 @@ def get_imgs(uid, oid, title, session, cw=None, d=None, parent=None):
|
|||
|
||||
return imgs
|
||||
|
||||
@try_n(2)
|
||||
def get_albums(page):
|
||||
url = 'https://photo.weibo.com/albums/get_all?uid={}&page={}&count=20&__rnd={}'.format(uid, page, int(time()*1000))
|
||||
referer = 'https://photo.weibo.com/{}/albums?rd=1'.format(uid)
|
||||
|
|
|
@ -1,3 +1,39 @@
|
|||
3.7c 【】
|
||||
|
||||
[버그 해결 / 사이트 변경에 의한 수정]
|
||||
|
||||
- 일부 환경에서 특정 사이트 다운로드 안 되는 문제 해결 (#3944)
|
||||
|
||||
- #3951
|
||||
|
||||
- MyReadingManga 다운로드 안 되는 문제 해결 (#3965)
|
||||
|
||||
- Twitter 영상 다운로드 안 되는 문제 해결 (#3970)
|
||||
|
||||
- 필터 켜 놓은 상태에서 방향키, Home, End 제대로 작동하지 않는 문제 해결 (#3974)
|
||||
|
||||
- Weibo 새로운 UI 지원 (#3508)
|
||||
|
||||
- Tiktok 업데이트 대응 (#3978)
|
||||
|
||||
- 半次元 다운로드 안 되는 문제 해결
|
||||
|
||||
- 기타 자잘한 것들
|
||||
|
||||
|
||||
[변경/추가된 기능]
|
||||
|
||||
- epio.app 지원 종료
|
||||
|
||||
- M3U8 다운로드 속도 개선
|
||||
|
||||
- 小説家になろう ncode 표시 (#3938)
|
||||
|
||||
- 기타 자잘한 것들
|
||||
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------------------------------------------------------------------
|
||||
3.7b 【Oct 29, 2021】
|
||||
|
||||
[버그 해결 / 사이트 변경에 의한 수정]
|
||||
|
|
|
@ -1,3 +1,39 @@
|
|||
3.7c 【】
|
||||
|
||||
[버그 해결 / 사이트 변경에 의한 수정]
|
||||
|
||||
- 일부 환경에서 특정 사이트 다운로드 안 되는 문제 해결 (#3944)
|
||||
|
||||
- #3951
|
||||
|
||||
- MyReadingManga 다운로드 안 되는 문제 해결 (#3965)
|
||||
|
||||
- Twitter 영상 다운로드 안 되는 문제 해결 (#3970)
|
||||
|
||||
- 필터 켜 놓은 상태에서 방향키, Home, End 제대로 작동하지 않는 문제 해결 (#3974)
|
||||
|
||||
- Weibo 새로운 UI 지원 (#3508)
|
||||
|
||||
- Tiktok 업데이트 대응 (#3978)
|
||||
|
||||
- 半次元 다운로드 안 되는 문제 해결
|
||||
|
||||
- 기타 자잘한 것들
|
||||
|
||||
|
||||
[변경/추가된 기능]
|
||||
|
||||
- epio.app 지원 종료
|
||||
|
||||
- M3U8 다운로드 속도 개선
|
||||
|
||||
- 小説家になろう ncode 표시 (#3938)
|
||||
|
||||
- 기타 자잘한 것들
|
||||
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------------------------------------------------------------------
|
||||
3.7b 【Oct 29, 2021】
|
||||
|
||||
[버그 해결 / 사이트 변경에 의한 수정]
|
||||
|
|
Loading…
Reference in New Issue