This commit is contained in:
KurtBestor 2021-11-04 18:11:09 +09:00
parent 9da624da56
commit dce00852e6
11 changed files with 182 additions and 498 deletions

View File

@ -1,7 +1,7 @@
#coding:utf8
from __future__ import print_function
import downloader
from utils import Soup, cut_pair, LazyUrl, Downloader, get_print, get_max_range, try_n, clean_title
from utils import Soup, cut_pair, LazyUrl, Downloader, get_print, get_max_range, try_n, clean_title, check_alive
import json
import ree as re
import os
@ -38,6 +38,13 @@ class Downloader_bcy(Downloader):
self.artist = self.info['artist']
def get_ssr_data(html):
s = html.split('window.__ssr_data = JSON.parse("')[1].replace('\\"', '"')
s = cut_pair(s).replace('"', '\\"')
data = json.loads(json.loads('"{}"'.format(s)))
return data
@try_n(2)
def get_imgs(url, html=None, cw=None):
if '/detail/' not in url:
@ -46,10 +53,7 @@ def get_imgs(url, html=None, cw=None):
if html is None:
html = downloader.read_html(url)
s = cut_pair(html.split('window.__ssr_data = JSON.parse("')[1])
s = json.loads(u'"{}"'.format(s))
data = json.loads(s)
data = get_ssr_data(html)
multi = data['detail']['post_data']['multi']
@ -103,9 +107,8 @@ def get_info(url, html):
uname = soup.find('div', class_='user-name') or soup.find('p', class_='uname') or soup.find('div', class_='user-info-name')
info['artist'] = uname.text.strip()
s = cut_pair(html.split('window.__ssr_data = JSON.parse("')[1])
j = json.loads(json.loads(u'"{}"'.format(s)))
j = get_ssr_data(html)
if '/detail/' in url:
info['uid'] = j['detail']['detail_user']['uid']
@ -137,11 +140,14 @@ def get_imgs_channel(url, html=None, cw=None):
if not items:
print('no items')
break
c = 0
for item in items:
check_alive(cw)
id = item['item_detail']['item_id']
if id in ids:
print('duplicate')
continue
c += 1
ids.add(id)
url_single = u'https://bcy.net/item/detail/{}'.format(id)
imgs_single = get_imgs(url_single, cw=cw)
@ -151,14 +157,15 @@ def get_imgs_channel(url, html=None, cw=None):
imgs.append(img)
s = u'{} {} - {}'.format(tr_(u'읽는 중...'), info['artist'], min(len(imgs), max_pid))
if cw:
if not cw.alive:
return
cw.setTitle(s)
else:
print(s)
if len(imgs) >= max_pid:
break
if not c:
print('not c')
break
if len(imgs) >= max_pid:
print('over max_pid:', max_pid)
break

View File

@ -1,227 +0,0 @@
import downloader
from utils import Soup, Session, LazyUrl, Downloader, try_n, get_imgs_already, clean_title, get_print, check_alive
import json, os
from timee import time, sleep
import ree as re
from translator import tr_
import page_selector
class NotPaidError(Exception): pass
class Page(object):
def __init__(self, id, url, title, serviceType):
self.id = id
self.url = url
self.title = title
self.serviceType = serviceType
class Image(object):
def __init__(self, url, page, p):
self._url = url
self.url = LazyUrl(page.url, self.get, self)
ext = os.path.splitext(url.split('?')[0])[1]
if ext.lower()[1:] not in ('jpg', 'jpeg', 'bmp', 'png', 'gif', 'webm', 'webp'):
ext = '.jpg'
self.filename = '{}/{:04}{}'.format(clean_title(page.title), p, ext)
def get(self, _):
return self._url
def get_id(url):
if '/league/' in url:
header = 'league_'
else:
header = ''
body = re.find('/viewer/([0-9a-zA-Z_-]+)', url) or re.find('/view/([0-9a-zA-Z_-]+)', url)
return header, body
def header_to_type(header):
if header == 'league_':
return 'leaguetoon'
return 'webtoon'
def get_info(url, session):
referer = url
header, id = get_id(referer)
type_ = header_to_type(header)
info = {}
ids = set()
pages = []
for p in range(1, 1+10):
if p == 1:
url = 'http://webtoon.daum.net/data/pc/{}/view/{}?timeStamp={}'.format(type_, id, int(time()))
else:
if type_ == 'webtoon':
break
url = 'http://webtoon.daum.net/data/pc/{}/view/{}?page_no={}&timeStamp={}'.format(type_, id, p, int(time()))
print(url)
info_raw = downloader.read_html(url, referer=referer, session=session)
_info = json.loads(info_raw)
webtoon = _info['data'].get('webtoon') or _info['data'].get('leaguetoon')
if webtoon is None:
raise Exception('No webtoon')
if p == 1:
info['title'] = webtoon['title']
artists = []
for artist in webtoon['cartoon']['artists']:
artist = artist['penName']
if artist in artists:
continue
artists.append(artist)
if len(artists) > 1:
artists = [
artists[1], artists[0]] + artists[2:]
info['artists'] = artists
eps = webtoon.get('webtoonEpisodes') or webtoon.get('leaguetoonEpisodes')
if not eps:
if p > 1:
eps = []
else:
raise Exception('No eps')
c = 0
for ep in eps:
id_ = ep.get('articleId') or ep.get('id')
title = ep['title']
serviceType = 'free' if type_ =='leaguetoon' else ep['serviceType']
if type_ == 'leaguetoon':
url = 'http://webtoon.daum.net/league/viewer/{}'.format(id_)
else:
url = 'http://webtoon.daum.net/webtoon/viewer/{}'.format(id_)
if id_ in ids:
continue
c += 1
ids.add(id_)
page = Page(id_, url, title, serviceType)
pages.append(page)
if c == 0:
print('c == 0; break')
break
info['pages'] = sorted(pages, key=lambda x: x.id)
return info
@Downloader.register
class Downloader_daumtoon(Downloader):
type = 'daumtoon'
URLS = ['webtoon.daum.net']
MAX_CORE = 16
MAX_SPEED = 4.0
display_name = 'Daum Webtoon'
def init(self):
if '/viewer/' in self.url:
return self.Invalid(tr_('목록 주소를 입력해주세요: {}').format(self.url))
if '/view/' not in self.url and not self.url.lower().startswith('http'):
self.url = 'http://webtoon.daum.net/webtoon/view/{}'.format(self.url)
self.session = None
self._info = get_info(self.url, self.session)
@property
def name(self):
title = self._info['title']
artists = self._info['artists']
artist = artists[0] if artists else 'N/A'
title = self.format_title('N/A', ''.join(get_id(self.url)), title, artist, 'N/A', 'N/A', 'Korean', prefix='daumtoon_')
return clean_title(title)
def read(self):
self.title = tr_(u'\uc77d\ub294 \uc911... {}').format(self.name)
imgs = get_imgs_all(self._info, self.name, self.session, cw=self.cw)
for img in imgs:
if isinstance(img, Image):
self.urls.append(img.url)
else:
self.urls.append(img)
self.title = self.name
self.session = None
def get_imgs(page, session, cw):
print_ = get_print(cw)
if not downloader.cookiejar.get('PROF', domain='.daum.net') and page.serviceType != 'free': #3314
raise NotPaidError()
html = downloader.read_html(page.url, session=session)
header, id = get_id(page.url)
t = int(time())
soup = Soup(html)
type_ = header_to_type(header)
url_data = 'http://webtoon.daum.net/data/pc/{}/viewer/{}?timeStamp={}'.format(type_, id, t)
data_raw = downloader.read_html(url_data, session=session, referer=page.url)
data = json.loads(data_raw)
if header == 'league_':
m_type = None
else:
m_type = data['data']['webtoonEpisode']['multiType']
print_('m_type: {}'.format(m_type))
if m_type == 'chatting':
page.url = page.url.replace('daum.net/', 'daum.net/m/')
url_data = 'http://webtoon.daum.net/data/mobile/{}/viewer?id={}&{}'.format(type_, id, t)
data_raw = downloader.read_html(url_data, session=session, referer=page.url)
data = json.loads(data_raw)
imgs = []
for chat in data['data']['webtoonEpisodeChattings']:
img = chat.get('image')
if not img:
continue
img = Image(img['url'], page, len(imgs))
imgs.append(img)
else:
url_data = 'http://webtoon.daum.net/data/pc/{}/viewer_images/{}?timeStamp={}'.format(type_, id, t)
data_raw = downloader.read_html(url_data, session=session, referer=page.url)
data = json.loads(data_raw)
if not data.get('data'):
raise NotPaidError()
imgs = []
for img in data['data']:
img = Image(img['url'], page, len(imgs))
imgs.append(img)
return imgs
def get_imgs_all(info, title, session, cw=None):
print_ = get_print(cw)
pages = info['pages']
pages = page_selector.filter(pages, cw)
imgs = []
for p, page in enumerate(pages):
imgs_already = get_imgs_already('daumtoon', title, page, cw)
if imgs_already:
imgs += imgs_already
continue
try:
imgs += get_imgs(page, session, cw)
except NotPaidError:
print_('Not paid: {}'.format(page.title)) #3314
continue
if cw is not None:
cw.setTitle(tr_(u'\uc77d\ub294 \uc911... {} / {} ({}/{})').format(title, page.title, p + 1, len(pages)))
check_alive(cw)
return imgs
@page_selector.register('daumtoon')
@try_n(4)
def f(url):
info = get_info(url, None)
return info['pages']

View File

@ -1,101 +0,0 @@
import downloader
from utils import Soup, try_n, LazyUrl, Downloader, lock, get_print, clean_title
from timee import sleep
import base64
import json
import constants
import ree as re
KEY = b'gefdzfdef'
@Downloader.register
class Downloader_epio(Downloader):
type = 'epio'
URLS = ['epio.app']
def read(self):
info = get_info(self.url, cw=self.cw)
imgs = info['imgs']
for img in imgs:
self.urls.append(img.url)
self.title = clean_title(info['title'])
class Image(object):
def __init__(self, url, referer, p):
self._url = url
self.url = LazyUrl(referer, self.get, self)
ext = '.jpg'#
self.filename = u'{:04}{}'.format(p, ext)
def get(self, referer):
return self._url
def get_info(url, cw=None):
info = _get_info(url, cw)
imgs = []
html = info['content']
soup = Soup(html)
for img in soup.findAll('img'):
src = img.attrs.get('src')
if not src:
continue
# 1696
if not isinstance(src, bytes):
src = src.encode('utf8')
t = base64.b64encode(src)
if isinstance(t, bytes):
t = t.decode('utf8')
src = 'https://cdn1-images.epio.app/image/download/{}'.format(t)
img = Image(src, url, len(imgs))
imgs.append(img)
info['imgs'] = imgs
return info
def get_id(url):
return re.find('article/detail/([0-9a-z]+)', url)
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.backends import default_backend
import aes
backend = default_backend()
def decrypt(s, cw=None):
print_ = get_print(cw)
key, iv = aes.key_and_iv(s[:16], KEY)
print_('key: {}\niv: {}'.format(key, iv))
cipher = Cipher(algorithms.AES(key), modes.CBC(iv), backend=backend)
r = -len(s) % 16
if r:
s += b'\x00' * r
dec = cipher.decryptor()
s_dec = dec.update(s[16:]) + dec.finalize()
s_dec = s_dec[:-s_dec[-1]]
if r:
s_dec = s_dec[:-r]
return s_dec
def _get_info(url, cw=None):
id = get_id(url)
url_api = 'https://girlimg.epio.app/api/articles/{}?lang=en-us'.format(id)
html = downloader.read_html(url_api, referer=url)
s = json.loads(html)['string']
s = base64.b64decode(s)
s = decrypt(s, cw)
info = json.loads(s)
return info

View File

@ -1,10 +1,10 @@
from utils import Downloader, LazyUrl, clean_title
import utils
from m3u8_tools import playlist2stream, M3u8_stream
import os
from hashlib import md5
from translator import tr_
import ffmpeg
##DEFAULT_N_THREAD = 1
DEFAULT_N_THREAD = 2
@Downloader.register
@ -21,16 +21,19 @@ class Downloader_m3u8(Downloader):
return url
def read(self):
## n_thread = self.cw.format or DEFAULT_N_THREAD
## self.print_('n_thread: {}'.format(n_thread))
video = Video(self.url, self.cw)
n_thread = self.cw.format or DEFAULT_N_THREAD
self.print_('n_thread: {}'.format(n_thread))
video = Video(self.url, n_thread)
self.urls.append(video.url)
self.title = '{} ({})'.format(video.title, video.id_)
class Video(object):
def __init__(self, url, cw):
m = ffmpeg.Stream(url, cw=cw)
def __init__(self, url, n_thread):
try:
m = playlist2stream(url, n_thread=n_thread)
except:
m = M3u8_stream(url, n_thread=n_thread)
self.url = LazyUrl(url, lambda _: m, self)
self.title = os.path.splitext(os.path.basename(url))[0]
self.id_ = md5(url.encode('utf8')).hexdigest()[:8]
@ -38,14 +41,14 @@ class Video(object):
self.filename = clean_title(self.title, n=-len(tail)) + tail
##import selector
##@selector.options('m3u8')
##def options():
## def f(urls):
## n_thread, ok = utils.QInputDialog.getInt(Downloader.mainWindow, tr_('Set number of threads'), tr_('Number of threads?'), value=DEFAULT_N_THREAD, min=1, max=4, step=1)
## if not ok:
## return
## return n_thread
## return [
## {'text': 'Set number of threads...', 'format': f},
## ]
import selector
@selector.options('m3u8')
def options():
def f(urls):
n_thread, ok = utils.QInputDialog.getInt(Downloader.mainWindow, tr_('Set number of threads'), tr_('Number of threads?'), value=DEFAULT_N_THREAD, min=1, max=4, step=1)
if not ok:
return
return n_thread
return [
{'text': 'Set number of threads...', 'format': f},
]

View File

@ -1,6 +1,6 @@
#coding:utf8
import downloader
from utils import Soup, urljoin, LazyUrl, Downloader, query_url, try_n, Session, get_print, clean_title
from utils import Soup, urljoin, LazyUrl, Downloader, query_url, try_n, Session, get_print, clean_title, get_ext
import os
from translator import tr_
from timee import sleep
@ -10,16 +10,17 @@ import clf2#
class Image(object):
def __init__(self, url, p, page):
ext = os.path.splitext(url)[1]
if ext.lower()[1:] not in ['jpg', 'jpeg', 'bmp', 'png', 'gif', 'webm', 'webp']:
ext = '.jpg'
self.filename = u'{:04}{}'.format(p, ext)
def __init__(self, url, p, page, cw):
self.cw = cw
ext = get_ext(url)
self.filename = '{:04}{}'.format(p, ext)
if page.title is not None:
self.filename = u'{}/{}'.format(page.title, self.filename)
def f(_):
return url
self.url = LazyUrl(page.url, f, self)
self.filename = '{}/{}'.format(page.title, self.filename)
self._url = url
self.url = LazyUrl(page.url, self.get, self)
def get(self, _):
return self._url#'tmp://' + clf2.download(self._url, cw=self.cw)
class Page(object):
@ -36,7 +37,7 @@ class Downloader_mrm(Downloader):
type = 'mrm'
URLS = ['myreadingmanga.info']
_soup = None
MAX_CORE = 16
MAX_CORE = 4
display_name = 'MyReadingManga'
def init(self):
@ -67,7 +68,7 @@ class Downloader_mrm(Downloader):
return title
def read(self):
self.title = u'읽는 중... {}'.format(self.name)
self.title = '읽는 중... {}'.format(self.name)
imgs = get_imgs(self.url, self.soup, self.session, self.cw)
@ -95,12 +96,12 @@ def get_imgs(url, soup=None, session=None, cw=None):
if pagination is None:
page = Page(None, url, soup)
imgs = get_imgs_page(page, session=session)
imgs = get_imgs_page(page, session=session, cw=cw)
else:
pages = get_pages(url, soup, session=session)
imgs = []
for i, page in enumerate(pages):
s = u'{} {} / {} ({} / {})'.format(tr_(u'읽는 중...'), title, page.title, i+1, len(pages))
s = '{} {} / {} ({} / {})'.format(tr_('읽는 중...'), title, page.title, i+1, len(pages))
if cw:
if not cw.alive:
@ -109,7 +110,7 @@ def get_imgs(url, soup=None, session=None, cw=None):
else:
print(s)
imgs += get_imgs_page(page, session=session)
imgs += get_imgs_page(page, session=session, cw=cw)
if not imgs:
raise Exception('no imgs')
@ -149,7 +150,7 @@ def get_pages(url, soup=None, session=None):
@try_n(4)
def get_imgs_page(page, session=None):
def get_imgs_page(page, session=None, cw=None):
url = page.url
soup = page.soup
if soup is None:
@ -165,7 +166,7 @@ def get_imgs_page(page, session=None):
if img is None:
continue
img = urljoin(url, img)
img = Image(img, len(imgs), page)
img = Image(img, len(imgs), page, cw)
imgs.append(img)
print(page.title, len(imgs), page.url)

View File

@ -68,7 +68,8 @@ class Downloader_syosetu(Downloader):
title, self.artist = get_title_artist(soup)
self.__title = title
title_dir = clean_title((u'[{}] {}').format(self.artist, title))
ncode = re.find(r'syosetu.com/([^/]+)', self.url, err='no ncode') #3938
title_dir = clean_title('[{}] {} ({})'.format(self.artist, title, ncode))
ex = soup.find('div', id='novel_ex')
self.novel_ex = ex.text.strip() if ex else None
texts = []

View File

@ -1,19 +1,15 @@
from __future__ import division, print_function, unicode_literals
import downloader
import ree as re
from utils import urljoin, Soup, LazyUrl, Downloader, try_n, compatstr, get_print, clean_title, Session, get_max_range
import os
import json
import ast
from utils import Soup, LazyUrl, Downloader, try_n, compatstr, get_print, clean_title, Session, get_max_range, format_filename
from io import BytesIO
import random
import clf2
from translator import tr_
from timee import sleep
from error_printer import print_error
import devtools
HDR = {'User-Agent': downloader.hdr['User-Agent']}
import ytdl
PATTERN_VID = '/(v|video)/(?P<id>[0-9]+)'
SHOW = True
def is_captcha(soup):
@ -79,24 +75,17 @@ class Video(object):
id = m.group('id')
ext = '.mp4'
self.title = id#
self.filename = '{}{}'.format(clean_title(self.title, n=-len(ext)), ext)
self.filename = format_filename(self.title, id, ext)
html = downloader.read_html(url, session=self.session)
soup = Soup(html)
data = soup.find(id='__NEXT_DATA__')
props = data.contents[0]
data_encode = json.dumps(props)
ast_le = ast.literal_eval(data_encode)
data = json.loads(ast_le)
ydl = ytdl.YoutubeDL()
info = ydl.extract_info(url)
#info = data['props']['pageProps']['videoData']['itemInfos']
info = data['props']['pageProps']['itemInfo']['itemStruct']
self._url = info['video']['downloadAddr']
self.url_thumb = info['video']['cover']
self.url_thumb = info['thumbnail']
self.thumb = BytesIO()
downloader.download(self.url_thumb, referer=url, buffer=self.thumb)
self._url = info['url']
return self._url
@ -117,21 +106,30 @@ def read_channel(url, session, cw=None):
def f(html, browser=None):
soup = Soup(html)
if is_captcha(soup):
print('captcha')
browser.show()
sd['shown'] = True
elif sd['shown']:
browser.hide()
sd['shown'] = False
if not SHOW:
if is_captcha(soup):
print('captcha')
browser.show()
sd['shown'] = True
elif sd['shown']:
browser.hide()
sd['shown'] = False
try:
info['uid'] = soup.find('h2', class_='share-title').text.strip()
info['nickname'] = soup.find('h1', class_='share-sub-title').text.strip()
st = soup.find('h2', class_='share-title')
if st is None:
st = soup.find('h2', class_=lambda c: c and 'ShareTitle' in c)
info['uid'] = st.text.strip()
st = soup.find('h1', class_='share-sub-title')
if st is None:
st = soup.find('h1', class_=lambda c: c and 'ShareSubTitle' in c)
info['nickname'] = st.text.strip()
except Exception as e:
print_(print_error(e)[0])
print_(info)
c = 0
ids_now = set()
for div in soup.findAll('div', class_='video-feed-item'):
items = soup.findAll('div', class_='video-feed-item') + soup.findAll('div', class_=lambda c: c and 'DivItemContainer' in c)
for div in items:
a = div.find('a')
if a is None:
continue
@ -170,81 +168,10 @@ def read_channel(url, session, cw=None):
else:
print(msg)
return sd['count_empty'] > 4
res = clf2.solve(url, session, cw, f=f, timeout=1800, show=True, delay=0)
res = clf2.solve(url, session, cw, f=f, timeout=1800, show=SHOW, delay=0)
if not info['items']:
raise Exception('no items')
return info
@try_n(2)
def read_channel_legacy(url, session, cw=None):
print_ = get_print(cw)
html = downloader.read_html(url, session=session, headers=HDR)
uid = re.find('//user/profile/([0-9]+)', html, err='no uid')
secUid = re.find('"secUid" *: *"([^"]+?)"', html, err='no secUid')
verifyFp = ''.join(random.choice('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789') for i in range(16))
maxCursor = 0
info = {}
info['items'] = []
ids = set()
for i in range(100):
url_api = 'https://t.tiktok.com/api/item_list/?count=30&id={uid}&type=1&secUid={secUid}&maxCursor={maxCursor}&minCursor=0&sourceType=8&appId=1180&region=US&language=en&verifyFp={verifyFp}'.format(uid=uid, secUid=secUid, verifyFp=verifyFp, maxCursor=maxCursor)
js = 'window.byted_acrawler.sign({url:"{}"});'.replace('{}', url_api)
print(js)
for try_ in range(4):
try:
sign = devtools.eval_js(url, js, session)['output']
break
except Exception as e:
print(e)
e_ = e
else:
raise e_
url_api += '&_signature=' + sign
print_(url_api)
data_raw = downloader.read_html(url_api, url, session=session, headers=HDR)
data = json.loads(data_raw)
items = []
for item in data.get('items', []):
id_video = item['id']
if id_video in ids:
print('duplicate:', id_video)
continue
ids.add(id_video)
items.append(item)
if not items:
print('no items')
break
info['items'] += items
if i == 0:
info['uid'] = items[0]['author']['uniqueId']
info['nickname'] = items[0]['author']['nickname']
msg = '{} {} (tiktok_{}) - {}'.format(tr_('읽는 중...'), info['nickname'], info['uid'], len(info['items']))
if cw:
if not cw.alive:
break
cw.setTitle(msg)
else:
print(msg)
if not data['hasMore']:
break
maxCursor = data['maxCursor']
if not info['items']:
raise Exception('no items')
return info

View File

@ -3,7 +3,6 @@ from __future__ import division, print_function, unicode_literals
import downloader
from utils import Downloader, Session, LazyUrl, get_ext, try_n, Soup, get_print, update_url_query, urljoin, try_n, get_max_range, get_outdir, clean_title, lock, check_alive, check_alive_iter, SkipCounter
from timee import time, sleep
import hashlib
import json
import ree as re
from datetime import datetime, timedelta
@ -12,14 +11,7 @@ from error_printer import print_error
import os
import ytdl
import ffmpeg
import random
from m3u8_tools import M3u8_stream
import urllib
from ratelimit import limits, sleep_and_retry
try:
from urllib import quote # python2
except:
from urllib.parse import quote # python3
import options
AUTH = "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
UA = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"
@ -593,12 +585,21 @@ class Image(object):
d = ytdl.YoutubeDL(cw=self.cw)
info = d.extract_info(self._url)
url = info['url']
fs = info['formats']
for f in fs:
print_('{} {} - {}'.format(f['height'], f['protocol'], f['url']))
def key(f):
h = f['height']
if not f['protocol'].startswith('http'):
h -= .1
return h
f = sorted(fs, key=key)[-1]
url = f['url']
ext = get_ext(url)
self.ext = ext
print_('get_video: {} {}'.format(url, ext))
if ext.lower() == '.m3u8':
url = M3u8_stream(url, n_thread=self.n_thread, post_processing=True)
url = ffmpeg.Stream(url, cw=self.cw)
self._url_cache = url
return url
except Exception as e:

View File

@ -33,19 +33,18 @@ class Downloader_weibo(Downloader):
def fix_url(cls, url):
url = url.replace('weibo.cn', 'weibo.com').split('?')[0]
if 'weibo.com/p/' in url:
id = re.findall('weibo.com/p/([^/]+)', url)[0]
id = re.find(r'weibo.com/p/([^/]+)', url, err='no id')
url = 'https://weibo.com/p/{}'.format(id)
elif 'weibo.com/u/' in url:
id = re.findall('weibo.com/u/([^/]+)', url)[0]
id = re.find(r'weibo.com/u/([^/]+)', url, err='no id')
url = 'https://weibo.com/u/{}'.format(id)
elif 'weibo.com/' in url:
id = re.findall('weibo.com/([^/]+)', url)[0]
id = re.find(r'weibo.com/([^/]+)', url, err='no id')
url = 'https://weibo.com/{}'.format(id)
else:
id = url
url = 'https://weibo.com/u/{}'.format(id)
url = fix_protocol(url)
return url
return fix_protocol(url)
def read(self):
checkLogin(self.session)
@ -84,8 +83,7 @@ class Image(object):
def _get_page_id(html):
m = re.search("CONFIG\\['page_id'\\]='([0-9]+?)'", html)
return m
return re.find(r"CONFIG\['page_id'\]='([0-9]+)'", html) or re.find(r'/u/page/follow/([0-9]+)', html)
def get_id(url, cw=None):
@ -96,13 +94,14 @@ def get_id(url, cw=None):
soup = Soup(html)
if soup.find('div', class_='gn_login'):
raise errors.LoginRequired()
m = _get_page_id(html)
if not m:
oid = _get_page_id(html)
if not oid:
raise Exception('no page_id')
oid = m.groups()[0]
uids = re.findall('uid=([0-9]+)', html)
uids = re.findall(r'uid=([0-9]+)', html)
uid = max(set(uids), key=uids.count)
name = re.findall("CONFIG\\['onick'\\]='(.+?)'", html)[0]
name = re.find(r"CONFIG\['onick'\]='(.+?)'", html) or soup.find('div', class_=lambda c:c and c.startswith('ProfileHeader_name')).text.strip()
if not name:
raise Exception('no name')
break
except errors.LoginRequired as e:
raise
@ -144,6 +143,7 @@ def get_imgs(uid, oid, title, session, cw=None, d=None, parent=None):
return imgs
@try_n(2)
def get_albums(page):
url = 'https://photo.weibo.com/albums/get_all?uid={}&page={}&count=20&__rnd={}'.format(uid, page, int(time()*1000))
referer = 'https://photo.weibo.com/{}/albums?rd=1'.format(uid)

View File

@ -1,3 +1,39 @@
3.7c 【】
[버그 해결 / 사이트 변경에 의한 수정]
- 일부 환경에서 특정 사이트 다운로드 안 되는 문제 해결 (#3944)
- #3951
- MyReadingManga 다운로드 안 되는 문제 해결 (#3965)
- Twitter 영상 다운로드 안 되는 문제 해결 (#3970)
- 필터 켜 놓은 상태에서 방향키, Home, End 제대로 작동하지 않는 문제 해결 (#3974)
- Weibo 새로운 UI 지원 (#3508)
- Tiktok 업데이트 대응 (#3978)
- 半次元 다운로드 안 되는 문제 해결
- 기타 자잘한 것들
[변경/추가된 기능]
- epio.app 지원 종료
- M3U8 다운로드 속도 개선
- 小説家になろう ncode 표시 (#3938)
- 기타 자잘한 것들
--------------------------------------------------------------------------------------------------------------------------------------------
3.7b 【Oct 29, 2021】
[버그 해결 / 사이트 변경에 의한 수정]

View File

@ -1,3 +1,39 @@
3.7c 【】
[버그 해결 / 사이트 변경에 의한 수정]
- 일부 환경에서 특정 사이트 다운로드 안 되는 문제 해결 (#3944)
- #3951
- MyReadingManga 다운로드 안 되는 문제 해결 (#3965)
- Twitter 영상 다운로드 안 되는 문제 해결 (#3970)
- 필터 켜 놓은 상태에서 방향키, Home, End 제대로 작동하지 않는 문제 해결 (#3974)
- Weibo 새로운 UI 지원 (#3508)
- Tiktok 업데이트 대응 (#3978)
- 半次元 다운로드 안 되는 문제 해결
- 기타 자잘한 것들
[변경/추가된 기능]
- epio.app 지원 종료
- M3U8 다운로드 속도 개선
- 小説家になろう ncode 표시 (#3938)
- 기타 자잘한 것들
--------------------------------------------------------------------------------------------------------------------------------------------
3.7b 【Oct 29, 2021】
[버그 해결 / 사이트 변경에 의한 수정]