^q^

2021-04-05 23:38:47 +09:00 · 2021-04-05 23:38:47 +09:00 · bcd33f9118
parent a69b757610
commit bcd33f9118
41 changed files with 6013 additions and 34 deletions
--- a/imgs/how_to_download.gif
+++ b/imgs/how_to_download.gif
--- a/src/extractor/bili_downloader.py
+++ b/src/extractor/bili_downloader.py
@ -14,7 +14,18 @@ import math
 import ree as re
 import utils
 from collections import OrderedDict
-_VALID_URL = 'https?://(?:www\\.|bangumi\\.|)bilibili\\.(?:tv|com)/(?:video/av|anime/(?P<anime_id>\\d+)/play#)(?P<id>\\d+)'
+_VALID_URL = r'''(?x)
                    https?://
                        (?:(?:www|bangumi)\.)?
                        bilibili\.(?:tv|com)/
                        (?:
                            (?:
                                video/[aA][vV]|
                                anime/(?P<anime_id>\d+)/play\#
                            )(?P<id_bv>\d+)|
                            video/[bB][vV](?P<id>[^/?#&]+)
                        )
                    '''
 _APP_KEY = 'iVGUTjsxvpLeuDCf'
 _BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt'
 RESOLS = OrderedDict()
--- a/src/extractor/daumtoon_downloader.py
+++ b/src/extractor/daumtoon_downloader.py
@ -0,0 +1,219 @@
 # uncompyle6 version 3.5.0
 # Python bytecode 2.7 (62211)
 # Decompiled from: Python 2.7.16 (v2.7.16:413a49145e, Mar  4 2019, 01:30:55) [MSC v.1500 32 bit (Intel)]
 # Embedded file name: daumtoon_downloader.pyo
 # Compiled at: 2019-10-03 10:11:29
 import downloader
 from utils import Soup, Session, LazyUrl, Downloader, try_n, get_imgs_already, clean_title, get_print
 import json, os
 from timee import time, sleep
 import ree as re
 from translator import tr_
 import page_selector
 class Page(object):
    def __init__(self, id, url, title, serviceType):
        self.id = id
        self.url = url
        self.title = title
        self.serviceType = serviceType
 class Image(object):
    def __init__(self, url, page, p):
        self._url = url
        self.url = LazyUrl(page.url, self.get, self)
        ext = os.path.splitext(url.split('?')[0])[1]
        if ext.lower()[1:] not in ('jpg', 'jpeg', 'bmp', 'png', 'gif', 'webm', 'webp'):
            ext = '.jpg'
        self.filename = (u'{}/{:04}{}').format(clean_title(page.title), p, ext)
    def get(self, _):
        return self._url
 def get_id(url):
    if '/league/' in url:
        header = 'league_'
    else:
        header = ''
    body = re.find('/viewer/([0-9a-zA-Z_-]+)', url) or re.find('/view/([0-9a-zA-Z_-]+)', url)
    return header, body
 def get_info(url, session):
    referer = url
    header, id = get_id(referer)
    if 'league_' in id:
        type_ = 'leaguetoon'
    else:
        type_ = 'webtoon'
    info = {}
    ids = set()
    pages = []
    for p in range(1, 1+10):
        if p == 1:
            url = 'http://webtoon.daum.net/data/pc/{}/view/{}?timeStamp={}'.format(type_, id, int(time()))
        else:
            if type_ == 'webtoon':
                break
            url = 'http://webtoon.daum.net/data/pc/{}/view/{}?page_no={}&timeStamp={}'.format(type_, id, p, int(time()))
        print(url)
        info_raw = downloader.read_html(url, referer=referer, session=session)
        _info = json.loads(info_raw)
        webtoon = _info['data'].get('webtoon') or _info['data'].get('leaguetoon')
        if webtoon is None:
            raise Exception('No webtoon')
        if p == 1:
            info['title'] = webtoon['title']
            artists = []
            for artist in webtoon['cartoon']['artists']:
                artist = artist['penName']
                if artist in artists:
                    continue
                artists.append(artist)
            if len(artists) > 1:
                artists = [
                 artists[1], artists[0]] + artists[2:]
            info['artists'] = artists
        eps = webtoon.get('webtoonEpisodes') or webtoon.get('leaguetoonEpisodes')
        if not eps:
            if p > 1:
                eps = []
            else:
                raise Exception('No eps')
        c = 0
        for ep in eps:
            id_ = ep.get('articleId') or ep.get('id')
            title = ep['title']
            serviceType = 'free' if type_ =='leaguetoon' else ep['serviceType']
            if type_ == 'leaguetoon':
                url = 'http://webtoon.daum.net/league/viewer/{}'.format(id_)
            else:
                url = 'http://webtoon.daum.net/webtoon/viewer/{}'.format(id_)
            if id_ in ids:
                continue
            c += 1
            ids.add(id_)
            page = Page(id_, url, title, serviceType)
            pages.append(page)
        if c == 0:
            print('c == 0; break')
            break
    info['pages'] = sorted(pages, key=lambda x: x.id)
    return info
@Downloader.register
 class Downloader_daumtoon(Downloader):
    type = 'daumtoon'
    URLS = ['webtoon.daum.net']
    MAX_CORE = 16
    MAX_SPEED = 4.0
    display_name = 'Daum Webtoon'
    def init(self):
        if '/viewer/' in self.url:
            return self.Invalid(tr_('목록 주소를 입력해주세요: {}').format(self.url))
        if '/view/' not in self.url and not self.url.lower().startswith('http'):
            self.url = ('http://webtoon.daum.net/webtoon/view/{}').format(self.url)
        self.session = None
        self._info = get_info(self.url, self.session)
    @property
    def name(self):
        title = self._info['title']
        artists = self._info['artists']
        artist = artists[0] if artists else 'N/A'
        title = self.format_title('N/A', ''.join(get_id(self.url)), title, artist, 'N/A', 'N/A', 'Korean', prefix='daumtoon_')
        return clean_title(title)
    def read(self):
        self.title = tr_(u'\uc77d\ub294 \uc911... {}').format(self.name)
        imgs = get_imgs_all(self._info, self.name, self.session, cw=self.cw)
        for img in imgs:
            if isinstance(img, Image):
                self.urls.append(img.url)
            else:
                self.urls.append(img)
        self.title = self.name
        self.session = None
        return
 def get_imgs(page, session, cw):
    print_ = get_print(cw)
    html = downloader.read_html(page.url, session=session)
    header, id = get_id(page.url)
    t = int(time())
    soup = Soup(html)
    if 'league_' in id:
        type_ = 'leaguetoon'
    else:
        type_ = 'webtoon'
    url_data = 'http://webtoon.daum.net/data/pc/{}/viewer/{}?timeStamp={}'.format(type_, id, t)
    data_raw = downloader.read_html(url_data, session=session, referer=page.url)
    data = json.loads(data_raw)
    m_type = data['data']['webtoonEpisode']['multiType']
    print_('m_type: {}'.format(m_type))
    if m_type == 'chatting':
        page.url = page.url.replace('daum.net/', 'daum.net/m/')
        url_data = 'http://webtoon.daum.net/data/mobile/{}/viewer?id={}&{}'.format(type_, id, t)
        data_raw = downloader.read_html(url_data, session=session, referer=page.url)
        data = json.loads(data_raw)
        imgs = []
        for chat in data['data']['webtoonEpisodeChattings']:
            img = chat.get('image')
            if not img:
                continue
            img = Image(img['url'], page, len(imgs))
            imgs.append(img)
    else:
        url_data = 'http://webtoon.daum.net/data/pc/{}/viewer_images/{}?timeStamp={}'.format(type_, id, t)
        data_raw = downloader.read_html(url_data, session=session, referer=page.url)
        data = json.loads(data_raw)
        imgs = []
        for img in data['data']:
            img = Image(img['url'], page, len(imgs))
            imgs.append(img)
    return imgs
 def get_imgs_all(info, title, session, cw=None):
    pages = info['pages']
    pages = page_selector.filter(pages, cw)
    imgs = []
    for p, page in enumerate(pages):
        if page.serviceType != 'free':
            continue
        imgs_already = get_imgs_already('daumtoon', title, page, cw)
        if imgs_already:
            imgs += imgs_already
            continue
        imgs += get_imgs(page, session, cw)
        if cw is not None:
            cw.setTitle(tr_(u'\uc77d\ub294 \uc911... {} / {}  ({}/{})').format(title, page.title, p + 1, len(pages)))
            if not cw.alive:
                break
    return imgs
@page_selector.register('daumtoon')
@try_n(4)
 def f(url):
    info = get_info(url, None)
    return info['pages']
--- a/src/extractor/epio_downloader.py
+++ b/src/extractor/epio_downloader.py
@ -0,0 +1,101 @@
 import downloader
 from utils import Soup, try_n, LazyUrl, Downloader, lock, get_print, clean_title
 from timee import sleep
 import base64
 import json
 import constants
 import ree as re
 KEY = b'gefdzfdef'
@Downloader.register
 class Downloader_epio(Downloader):
    type = 'epio'
    URLS = ['epio.app']
    def read(self):
        info = get_info(self.url, cw=self.cw)
        imgs = info['imgs']
        for img in imgs:
            self.urls.append(img.url)
        self.title = clean_title(info['title'])
 class Image(object):
    def __init__(self, url, referer, p):
        self._url = url
        self.url = LazyUrl(referer, self.get, self)
        ext = '.jpg'#
        self.filename = u'{:04}{}'.format(p, ext)
    def get(self, referer):
        return self._url
 def get_info(url, cw=None):
    info = _get_info(url, cw)
    imgs = []
    html = info['content']
    soup = Soup(html)
    for img in soup.findAll('img'):
        src = img.attrs.get('src')
        if not src:
            continue
        # 1696
        if not isinstance(src, bytes):
            src = src.encode('utf8')
        t = base64.b64encode(src)
        if isinstance(t, bytes):
            t = t.decode('utf8')
        src = 'https://cdn1-images.epio.app/image/download/{}'.format(t)
        img = Image(src, url, len(imgs))
        imgs.append(img)
    info['imgs'] = imgs
    return info
 def get_id(url):
    return re.find('article/detail/([0-9a-z]+)', url)
 from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
 from cryptography.hazmat.backends import default_backend
 import aes
 backend = default_backend()
 def decrypt(s, cw=None):
    print_ = get_print(cw)
    key, iv = aes.key_and_iv(s[:16], KEY)
    print_('key: {}\niv: {}'.format(key, iv))
    cipher = Cipher(algorithms.AES(key), modes.CBC(iv), backend=backend)
    r = -len(s) % 16
    if r:
        s += b'\x00' * r
    dec = cipher.decryptor()
    s_dec = dec.update(s[16:]) + dec.finalize()
    s_dec = s_dec[:-s_dec[-1]]
    if r:
        s_dec = s_dec[:-r]
    return s_dec
 def _get_info(url, cw=None):
    id = get_id(url)
    url_api = 'https://girlimg.epio.app/api/articles/{}?lang=en-us'.format(id)
    html = downloader.read_html(url_api, referer=url)
    s = json.loads(html)['string']
    s = base64.b64decode(s)
    s = decrypt(s, cw)
    info = json.loads(s)
    return info
--- a/src/extractor/etc_downloader.py
+++ b/src/extractor/etc_downloader.py
@ -0,0 +1,186 @@
 import downloader
 import ytdl
 from utils import Downloader, Session, try_n, LazyUrl, get_ext, format_filename, clean_title, get_print
 from io import BytesIO
 import ree as re
 from m3u8_tools import playlist2stream, M3u8_stream
 import utils
 import ffmpeg
@Downloader.register
 class Downloader_etc(Downloader):
    type = 'etc'
    URLS = []
    single = True
    MAX_PARALLEL = 8
    display_name = 'Etc'
    def init(self):
        self.session = Session()
        name = ytdl.get_extractor_name(self.url)
        self.print_('extractor: {}'.format(name))
        if name == 'generic':
            raise NotImplementedError()
    def read(self):
        video = get_video(self.url, self.session, self.cw)
        if video.artist:
            self.artist = video.artist
        self.urls.append(video.url)
        self.print_('url_thumb: {}'.format(video.url_thumb))
        self.setIcon(video.thumb)
        if video.header.lower() not in ['yourporn', 'spankbang']:
            self.enableSegment()#
        if isinstance(video.url(), M3u8_stream):
            self.disableSegment()
        self.title = '[{}] {}'.format(video.header, video.title)
 def int_or_none(s):
    try:
        return int(s)
    except:
        return None
 def format_(f):
    if f is None:
        return 'None'
    return '{} - {} - {} - {}'.format(f['format'], f['_resolution'], f['_audio'], f['url'])
@try_n(4)
 def get_video(url, session, cw, ie_key=None):
    print_ = get_print(cw)
    options = {
        'noplaylist': True,
        #'extract_flat': True,
        'playlistend': 1,
        }
    ydl = ytdl.YoutubeDL(options)
    info = ydl.extract_info(url)
    if not ie_key:
        ie_key = ytdl.get_extractor_name(url)
    info['ie_key'] = ie_key
    url_new = info.get('url')
    print('url: {} -> {}'.format(url, url_new))
    formats = info.get('formats', [])
    print(info.keys())
    if not formats and (info.get('entries') or 'title' not in info):
        if 'entries' in info:
            entry = info['entries'][0]
            url_new = entry.get('url') or entry['webpage_url']
        if url_new != url:
            return get_video(url_new, session, cw, ie_key=get_ie_key(info))
    session.headers.update(info.get('http_headers', {}))
    #session.cookies.update(ydl.cookiejar)
    if not formats:
        print('no formats')
        if url_new:
            f = {'url': url_new, 'format': ''}
            formats.append(f)
    fs = []
    for i, f in enumerate(formats):
        f['_index'] = i
        f['_resolution'] = f.get('vbr') or int_or_none(re.find('([0-9]+)p', f['format'], re.IGNORECASE)) or f.get('height') or f.get('width') or int(f.get('vcodec', 'none') != 'none')
        f['_audio'] = f.get('abr') or f.get('asr') or int(f.get('acodec', 'none') != 'none')
        print_(format_(f))
        fs.append(f)
    if not fs:
        raise Exception('No videos')
    f = sorted(fs, key=lambda f:(f['_resolution'], f['_index']))[-1]
    if f['_audio']:
        f_audio = None
    else:
        fs_audio = sorted([f_audio for f_audio in fs if (not f_audio['_resolution'] and f_audio['_audio'])], key=lambda f:(f['_audio'], f['_index']))
        if fs_audio:
            f_audio = fs_audio[-1]
        else:
            try:
                f = sorted([f for f in fs if f['_audio']], key=lambda f:(f['_resolution'], f['_index']))[-1]
            except IndexError:
                pass
            f_audio = None
    print_('video: {}'.format(format_(f)))
    print_('audio: {}'.format(format_(f_audio)))
    video = Video(f, f_audio, info, session, url, cw=cw)
    return video
 def get_ie_key(info):
    ie_key = info.get('ie_key') or info['extractor']
    ie_key = ie_key.split(':')[0]
    if ie_key.lower().endswith('playlist'):
        ie_key = ie_key[:-len('playlist')]
    return ie_key
 class Video(object):
    def __init__(self, f, f_audio, info, session, referer, cw=None):
        self.f_audio = f_audio
        self.cw = cw
        self.title = title = info['title']
        self.id = info['id']
        self.url = f['url']
        self.artist = info.get('uploader')
        self.header = utils.capitalize(get_ie_key(info))
        self.session = session
        self.referer = referer
        self.url_thumb = info.get('thumbnail')
        self.thumb = BytesIO()
        if self.url_thumb:
            downloader.download(self.url_thumb, referer=referer, buffer=self.thumb, session=session)
        try:
            ext = downloader.get_ext(self.url, session, referer)
        except Exception as e:
            print(e)
            ext = get_ext(self.url)
        if not ext:
            print('empty ext')
            if f['_resolution']:
                ext = '.mp4'
            else:
                ext = '.mp3'
        if ext.lower() == '.m3u8':
            try:
                url = playlist2stream(self.url, referer, session=session, n_thread=4)
            except:
                url = M3u8_stream(self.url, referer=referer, session=session, n_thread=4)
            ext = '.mp4'
        else:
            url = self.url
        self.url = LazyUrl(referer, lambda x: url, self, pp=self.pp)
        self.filename = format_filename(title, self.id, ext, header=self.header)
    def pp(self, filename):
        if self.cw:
            with self.cw.convert(self):
                return self._pp(filename)
        else:
            return self._pp(filename)
    def _pp(self, filename):
        if self.f_audio:
            f = BytesIO()
            downloader.download(self.f_audio['url'], buffer=f, referer=self.referer, session=self.session)
            ffmpeg.merge(filename, f, cw=self.cw)
        return filename
--- a/src/extractor/facebook_downloader.py
+++ b/src/extractor/facebook_downloader.py
@ -0,0 +1,260 @@
 #coding:utf8
 import downloader
 from utils import Session, urljoin, Soup, LazyUrl, try_n, Downloader, get_outdir, clean_title
 import ree as re
 import json
 import os
 from translator import tr_
 from timee import sleep
 from downloader import getsize
 import errors
 PATTERN_CURSOR = '".+?&cursor=([0-9]+)'
 UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
 class Image(object):
    def __init__(self, url):
        if 'fbid=' in url:
            id = int(re.findall('fbid=([0-9]+)', url)[0])
        elif 'photos/' in url:
            id = int(url.split('photos/')[1].split('/')[1])
        else:
            id = int(url)
        self.id = id
        def f(_):
            img = get_img(url)
            ext = os.path.splitext(img.split('?')[0])[1]
            self.filename = u'{}{}'.format(id, ext)
            return img
        self.url = LazyUrl(url, f, self)
@try_n(4)
 def get_img(url):
    #print('get_img', url)
    html = read_html(url)
    soup = Soup(html)
    for div in soup.findAll('div'):
        href = div.attrs.get('data-full-size-href')
        if href:
            img = href
            break
    else:
        img = None
    if img is None:
        # 1869
        for code in soup.findAll('code'):
            code = code.string
            hidden = Soup(code)
            soup.append(hidden)
        for a in soup.findAll('a'):
            target = a.attrs.get('target')
            if target == '_blank':
                img = a.attrs['href']
                break
        else:
            raise Exception('No img')
    return img
 def suitable(url):
    if 'facebook.com' not in url.lower():
        return False
    if '/videos/' in url or 'video.php?' in url:
        return False
    return True
@Downloader.register
 class Downloader_facebook(Downloader):
    type = 'facebook'
    URLS = [suitable]
    _soup = None
    MAX_CORE = 8
    @classmethod
    def fix_url(cls, url):
        if 'facebook.com/' not in url:
            url = 'https://facebook.com/{}'.format(url)
        url = url.replace('m.facebook.', 'facebook.')
        if 'www.facebook.com/' not in url:
            url = url.replace('facebook.com/', 'www.facebook.com/', 1)
        if '/profile.php?' not in url:
            url = url.split('?')[0]
        return url.split('#')[0].strip('/')
    @property
    def username(self):
        username = get_username(self.url)
        return username
    @property
    def soup(self):
        if self._soup is None:
            html = read_html(self.url)
            self._soup = Soup(html)
        return self._soup
    @property
    def name(self):
        title = get_title(self.soup)
        id_ = 'facebook_{}'.format(self.username)
        title = u'{} ({})'.format(title, id_)
        return clean_title(title)
    @property
    def album(self):
        if 'album_id=' in self.url:
            album = re.findall('album_id=([0-9]+)', self.url)[0]
        else:
            album = None
        return album
    def read(self):
        self.print_(self.name)
        self.title = tr_(u'읽는 중... {}').format(self.name)
        imgs = get_imgs(self.username, self.name, cw=self.cw)
        for img in imgs:
            if isinstance(img, Image):
                self.urls.append(img.url)
            else:
                self.urls.append(img)
        self.title = self.name
 def read_html(url):
    return downloader.read_html(url, user_agent=UA)
 def get_title(soup):
    html = str(soup)
    name = re.find(r'"__isProfile":"Page","name":(".*?")', html) or re.find(r'"name":(".*?")', html)
    if not name:
        gc = soup.find('div', id='globalContainer')
        if gc and gc.find('form', id='login_form'):
            raise errors.LoginRequired()
        raise Exception('no name')
    title = json.loads(name)
    return title
 def get_imgs(username, title, cw=None):
    urls = [
        'https://m.facebook.com/{}/photos'.format(username),
        'https://m.facebook.com/profile.php?id={}&sk=photos'.format(username), # no custom URL
        ]
    for url in urls:
        print('get_imgs url:', url)
        try:
            html = read_html(url)
        except:
            continue
        soup = Soup(html)
        if soup.find('a', id='signup-button'):
            raise errors.LoginRequired()
        photo = soup.find('div', class_='_5v64')
        if photo is not None:
            break
    else:
        raise Exception('No photo div')
    cursor = photo.a.attrs['href'].split('/photos/')[1].split('/')[1]
    print('first cursor:', cursor)
    href = re.find(r'(/photos/pandora/\?album_token=.+?)"', html)
    href = urljoin(url, href)
    href = re.sub('&cursor=[0-9]+', '&cursor={}'.format(cursor), href)
    cursors = set([cursor])
    imgs = []
    dups = {}
    dir = os.path.join(get_outdir('facebook'), title)
    try:
        filenames = os.listdir(dir)
    except:
        filenames = []
    for filename in filenames:
        name, ext = os.path.splitext(filename)
        if name.isdigit():
            dups[int(name)] = os.path.join(dir, filename)
    pages = set()
    while True:
        print(href)
        html = read_html(href)
        data_raw = html.replace('for (;;);', '')
        data = json.loads(data_raw)
        actions = data['payload']['actions']
        for action in actions:
            if action['target'] == 'm_more_photos':
                break
        else:
            print('No more photos')
            break
        html = action['html']
        soup = Soup(html)
        photos = soup.findAll('div' ,class_='_5v64')
        for photo in photos:
            for a in photo.findAll('a'):
                page = a.attrs['href']
                page = urljoin(href, page)
                # remove duplicate pages
                if page in pages:
                    continue
                pages.add(page)
                img = Image(page)
                id = img.id
                if id in dups and getsize(dups[id]) > 0:
                    print('skip', id)
                    imgs.append(dups[id])
                else:
                    imgs.append(img)
        s = u'{} {} - {}'.format(tr_(u'읽는 중...'), title, len(imgs))
        if cw is not None:
            cw.setTitle(s)
            if not cw.alive:
                return []
        else:
            print(s)
        cursor = re.find(PATTERN_CURSOR, data_raw)
        #print(cursor)
        if cursor is None:
            print('no cursor')
            break
        if cursor in cursors:
            print('same cursor')
            break
        cursors.add(cursor)
        href = re.sub('&cursor=[0-9]+', '&cursor={}'.format(cursor), href)
    return imgs
 def get_username(url):
    if '/profile.php?' in url:
        id = re.find(r'/profile\.php[\?&]id=([0-9]+)', url)
        return id
    else:
        url = url.replace('facebook.com/pg/', 'facebook.com/')
        return url.split('?')[0].split('facebook.com/')[1].split('/')[0]
--- a/src/extractor/flickr_downloader.py
+++ b/src/extractor/flickr_downloader.py
@ -0,0 +1,128 @@
 #coding: utf-8
 import downloader
 import flickr_api
 from timee import sleep
 from utils import Downloader, LazyUrl, query_url, clean_title
 import os
 from translator import tr_
 import ree as re
 from datetime import datetime
 import flickr_auth
 alphabet = '123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ'
 base = len(alphabet)
 def b58encode(div, s=''):
    if div >= base:
        div, mod = divmod(div, base)
        return b58encode(div, alphabet[mod] + s)
    return alphabet[div] + s
 def b58decode(s):
    return sum(alphabet.index(c) * pow(base, i) for i, c in enumerate(reversed(s)))
 class Image(object):
    def __init__(self, photo):
        self.photo = photo
        self.id = photo.id
        self.filename = None
        def f(_=None):
            url = photo.getPhotoFile()
            #url = 'https://flic.kr/p/{}'.format(b58encode(int(photo.id)))
            ext = os.path.splitext(url)[1]
            date = datetime.fromtimestamp(int(photo.dateuploaded))
            date = u'{:02}-{:02}-{:02}'.format(date.year%100, date.month, date.day)
            self.filename = u'[{}] {}{}'.format(date, self.id, ext)
            return url
        self.url = LazyUrl(u'flickr_{}'.format(self.id), f, self)
 def find_ps(url):
    user = flickr_api.Person.findByUrl(url)
    id = re.search('/albums/([0-9]+)', url).groups()[0]
    pss = user.getPhotosets()
    for ps in pss:
        if ps.id == id:
            break
    else:
        raise Exception('Not found photoset id')
    return user, ps
@Downloader.register
 class Downloader_flickr(Downloader):
    type = 'flickr'
    URLS = ['flickr.com']
    _name = None
    def init(self):
        if 'flickr.com' in self.url.lower():
            self.url = self.url.replace('http://', 'https://')
        else:
            self.url = 'https://www.flickr.com/people/{}'.format(self.url)
    @property
    def name(self):
        global pss
        if self._name is None:
            url = self.url
            flickr_auth.get_api(url, self.cw)
            if '/albums/' in url:
                user, ps = find_ps(url)
                self._name = u'{} (flickr_album_{}_{})'.format(ps.title, user.id, ps.id)
            else:
                user = flickr_api.Person.findByUrl(url)
                self._name = u'{} (flickr_{})'.format(user.username, user.id)
        return clean_title(self._name)
    def read(self):
        self.title = self.name
        imgs = get_imgs(self.url, self.title, cw=self.cw)
        for img in imgs:
            self.urls.append(img.url)
        self.title = self.name
 def get_imgs(url, title=None, cw=None):
    flickr_auth.get_api(title, cw)
    if not flickr_auth.isAuth:
        raise Exception('No Auth')
    if '/albums/' in url:
        user, ps = find_ps(url)
        handle = ps
    else:
        user = flickr_api.Person.findByUrl(url)
        handle = user
    photos = []
    per_page = 500
    for page in range(1, 200):
        photos_new = handle.getPhotos(per_page=per_page, page=page)
        photos += photos_new
        if len(photos_new) < per_page:
            break
        msg = u'{}  {} - {}'.format(tr_(u'읽는 중...'), title, len(photos))
        if cw:
            if not cw.alive:
                break
            cw.setTitle(msg)
        else:
            print(msg)
    imgs = []
    for photo in photos:
        img = Image(photo)
        imgs.append(img)
    return imgs
--- a/src/extractor/imgur_downloader.py
+++ b/src/extractor/imgur_downloader.py
@ -0,0 +1,131 @@
 # uncompyle6 version 3.5.0
 # Python bytecode 2.7 (62211)
 # Decompiled from: Python 2.7.16 (v2.7.16:413a49145e, Mar  4 2019, 01:30:55) [MSC v.1500 32 bit (Intel)]
 # Embedded file name: imgur_downloader.pyo
 # Compiled at: 2019-10-07 05:58:14
 import downloader
 from utils import Downloader, Soup, try_n, urljoin, get_max_range, clean_title, cut_pair
 import ree as re, json, os
 from timee import sleep
 from translator import tr_
@Downloader.register
 class Downloader_imgur(Downloader):
    type = 'imgur'
    URLS = ['imgur.com']
    MAX_CORE = 16
    def init(self):
        self.info = get_info(self.url)
    @property
    def id_(self):
        return re.find('imgur.com/.+?/([0-9a-zA-Z]+)', self.url)
    @property
    def name(self):
        title = self.info['title'] or 'N/A'
        return clean_title(title, n=100)
    def read(self):
        imgs = get_imgs(self.url, self.info, self.cw)
        for img in imgs:
            ext = os.path.splitext(img.split('?')[0])[1]
            if len(imgs) > 1:
                self.filenames[img] = (u'{:04}{}').format(len(self.urls), ext)
            else:
                self.filenames[img] = clean_title(self.name, n=-len(ext)) + ext
            self.urls.append(img)
        self.single = len(imgs) == 1
        self.referer = self.url
        self.title = u'{} (imgur_{})'.format(self.name, self.id_)
@try_n(4)
 def get_info(url):
    url = url.replace('/gallery/', '/a/')
    if '/r/' in url and url.split('/r/')[1].strip('/').count('/') == 0:
        title = re.find(r'/r/([^/]+)', url)
        info = {}
        info['title'] = title
        info['type'] = 'r'
    else:
        try: # legacy
            html = downloader.read_html(url, cookies={'over18':'1'})
            s = re.find('image *: *({.+)', html)
            info_raw = cut_pair(s)
        except Exception as e: # new
            print(e)
            id_ = re.find(r'/a/([0-9a-zA-Z_]+)', url) or re.find(r'/r/[0-9a-zA-Z_]+/([0-9a-zA-Z_]+)', url, err='no id')
            url_api = 'https://api.imgur.com/post/v1/albums/{}?client_id=546c25a59c58ad7&include=media%2Cadconfig%2Caccount'.format(id_)
            info_raw = downloader.read_html(url_api, cookies={'over18':'1'})
        info = json.loads(info_raw)
        info['type'] = 'a'
    return info
 def get_imgs(url, info=None, cw=None):
    print('get_imgs', url)
    if info is None:
        info = get_info(url)
    imgs = []
    # Range
    max_pid = get_max_range(cw)
    if info['type'] == 'a':
        if 'album_images' in info: # legacy
            imgs_ = info['album_images']['images']
        elif 'media' in info: # new
            imgs_ = info['media']
        else: # legacy
            imgs_ = [info]
        for img in imgs_:
            img_url = img.get('url') # new
            if not img_url: # legacy
                hash = img['hash']
                ext = img['ext']
                img_url = 'https://i.imgur.com/{}{}'.format(hash, ext)
            if img_url in imgs:
                continue
            imgs.append(img_url)
    elif info['type'] == 'r':
        urls = set()
        for p in range(100):
            url_api = 'https://imgur.com/r/{}/new/page/{}/hit?scrolled'.format(info['title'], p)
            print(url_api)
            html = downloader.read_html(url_api, referer=url)
            soup = Soup(html)
            c = 0
            for post in soup.findAll('div', class_='post'):
                a = post.find('a', class_='image-list-link')
                url_post = urljoin(url, a.attrs['href'])
                if url_post in urls:
                    continue
                urls.add(url_post)
                c += 1
                try: # for r18 images
                    imgs += get_imgs(url_post)
                except Exception as e:
                    print(e)
                s = (u'{} {}  ({})').format(tr_(u'\uc77d\ub294 \uc911...'), info['title'], len(imgs))
                if cw is not None:
                    if cw.alive:
                        cw.setTitle(s)
                    else:
                        return []
                else:
                    print(s)
            if c == 0:
                print('same; break')
                break
    return imgs
--- a/src/extractor/insta_downloader.py
+++ b/src/extractor/insta_downloader.py
@ -0,0 +1,579 @@
 #coding:utf8
 import downloader
 from timee import sleep, clock
 from constants import clean_url
 from utils import Downloader, LazyUrl, urljoin, get_max_range, Soup, Session, update_url_query, get_print, cut_pair, get_ext, clean_title, lazy, try_n, generate_csrf_token, check_alive
 import urllib
 from error_printer import print_error
 import os, requests
 from translator import tr_
 import json
 from datetime import datetime
 import hashlib
 import ree as re
 from ratelimit import limits, sleep_and_retry
 import clf2
 import errors
 FORMAT_PIN = r'/p/([0-9a-zA-Z-_]+)'
 def get_session(url, cw=None):
    #res = clf2.solve(url, cw=cw)
    #return res['session']
    session = Session()
    sessionid = session.cookies._cookies.get('.instagram.com', {}).get('/',{}).get('sessionid')
    if sessionid is None or sessionid.is_expired():
        raise errors.LoginRequired()
    session.headers['User-Agent'] = downloader.hdr['User-Agent']
    if not session.cookies.get('csrftoken', domain='.instagram.com'):
        csrf_token = generate_csrf_token()
        print('csrf:', csrf_token)
        session.cookies.set("csrftoken", csrf_token, domain='.instagram.com')
    return session
@Downloader.register
 class Downloader_insta(Downloader):
    type = 'insta'
    URLS = ['instagram.com']
    MAX_CORE = 8
    display_name = 'Instagram'
    def init(self):
        self.session = get_session(self.url, self.cw)
        if '/p/' in self.url:
            self.print_('single post')
        elif '/stories/' in self.url:
            self.print_('stories')
        elif 'instagram.com' in self.url:
            self.url = u'https://www.instagram.com/{}'.format(self.username)
    @lazy
    def username(self):
        return get_username(self.url)
    @classmethod
    def fix_url(cls, url):
        if 'instagram.com' not in url:
            url = u'https://www.instagram.com/{}'.format(url)
        return url.split('?')[0].split('#')[0].strip('/')
    @classmethod
    def key_id(cls, url):
        return url.replace('://www.', '://')
    @lazy
    def name(self):
        return get_name(self.url)
    @property
    def id_(self):
        return u'{} (insta_{})'.format(clean_title(self.name), self.username)
    def read(self):
        cw = self.cw
        title = self.id_
        self.title = title
        self.artist = self.name
        ui_setting = self.ui_setting
        if '/p/' in self.url:
            self.print_('single')
            iter = get_imgs_single(self.url, self.session, cw=cw)
        elif '/stories/highlights/' in self.url:
            iter = get_stories_single(self.url, session=self.session, cw=cw)
        else:
            s = ui_setting.instaStories.isChecked()
            self.print_('stories: {}'.format(s))
            iter = get_imgs_all(self.url, title, session=self.session, cw=cw, d=self, stories=s)
        imgs = []
        for img in iter:
            if cw and not cw.alive:
                return
            self.urls.append(img.url)
        self.title = title
 def get_j(script):
    s = script.string
    if not s:
        return
    try:
        s = s.replace('window._sharedData', '').strip()[1:-1].strip()
        j = json.loads(s)
        return j
    except ValueError as e:
        pass
 def read_html(url, session, cw):
    #res = clf2.solve(url, session=session, cw=cw)#
    #return res['html']
    return downloader.read_html(url, session=session)
 def check_error(soup, cw, wait):
    print_ = get_print(cw)
    err = soup.find('div', class_='error-container')
    if err:
        err = err.text.strip()
        if wait:
            print_('err: {}'.format(err))
            sleep(60*30, cw)
        else:
            raise Exception(err)
 def get_sd(url, session=None, html=None, cw=None, wait=True):
    print_ = get_print(cw)
    if html:
        soup = Soup(html)
        check_error(soup, cw, wait)
        for script in soup.findAll('script'):
            j = get_j(script)
            if j:
                break
        else:
            raise Exception('no _sharedData!!')
    else:
        for try_ in range(4):
            _wait(cw)
            html = read_html(url, session, cw)
            soup = Soup(html)
            check_error(soup, cw, wait)
            for script in soup.findAll('script'):
                j = get_j(script)
                if j:
                    break
            else:
                continue
            break
        else:
            raise Exception('no _sharedData')
    for script in soup.findAll('script'):
        s = script.string
        if s and 'window.__additionalDataLoaded('  in s:
            s = cut_pair(s)
            j_add = json.loads(s)
            try:
                j['entry_data']['PostPage'][0].update(j_add)
            except:
                j['entry_data']['ProfilePage'][0].update(j_add) #2900
    # Challenge
    challenge = j['entry_data'].get('Challenge')
    if challenge:
        for cont in challenge[0]['extraData']['content']:
            title = cont.get('title')
            if title:
                break
        else:
            title = 'Err'
        raise errors.LoginRequired(title)
    # LoginAndSignupPage
    login = j['entry_data'].get('LoginAndSignupPage')
    if login:
        raise errors.LoginRequired()
    return j
 def get_id(url):
    j = get_sd(url)
    if '/p/' in url:
        id = j['entry_data']['PostPage'][0]['graphql']['shortcode_media']['owner']['id']
    elif '/stories/' in url:
        id = j['entry_data']['StoriesPage'][0]['user']['username'] # ???
    else:
        id = j['entry_data']['ProfilePage'][0]['graphql']['user']['id']
    return id
 def get_username(url):
    j = get_sd(url, wait=False)
    if '/p/' in url:
        id = j['entry_data']['PostPage'][0]['graphql']['shortcode_media']['owner']['username']
    elif '/stories/' in url:
        id = j['entry_data']['StoriesPage'][0]['user']['username']
    else:
        id = j['entry_data']['ProfilePage'][0]['graphql']['user']['username']
    return id
 def get_name(url):
    j = get_sd(url)
    if '/p/' in url:
        name = j['entry_data']['PostPage'][0]['graphql']['shortcode_media']['owner']['full_name']
    elif '/stories/' in url:
        id = get_id(url)
        url = 'https://www.instagram.com/{}/'.format(id)
        return get_name(url)
    else:
        name = j['entry_data']['ProfilePage'][0]['graphql']['user']['full_name']
    return name
 class Image(object):
    def __init__(self, url, referer, filename, id=None):
        self._url = url
        self.url = LazyUrl(referer, self.get, self)
        self.filename = filename
        self.id = id
    def get(self, referer):
        wait_download()
        return self._url
 class Image_lazy(object):
    def __init__(self, url, session=None, cw=None):
        self.url = url
        self.session = session
        self.cw = cw
        self.url = LazyUrl(url, self.get, self)
    @try_n(4)
    def get(self, url):
        cw = self.cw
        if cw and not cw.alive:
            raise Exception('cw is dead')
        node = Node(url, session=self.session, cw=cw)
        img = node.imgs[0]
        ext = os.path.splitext(url)[1]
        wait_download()
        url_img = img.url()
        self.filename = img.filename
        return url_img
@sleep_and_retry
@limits(1, 10)
 def _wait(cw=None):
    if cw and not cw.alive:
        raise Exception('cw is dead while waiting')
 ##@sleep_and_retry
 ##@limits(1, 1)
 def wait_download():
    pass
@try_n(2)
 def get_query(query_hash, variables, session, cw=None):
    _wait(cw)
    print_ = get_print(cw)
    csrf_token = session.cookies.get('csrftoken', domain='.instagram.com')
    if not csrf_token:
        raise Exception('no csrftoken')
    hdr = {
            "X-CSRFToken"     : csrf_token, #2849
            "X-IG-App-ID"     : "936619743392459",
            "X-IG-WWW-Claim"  : "0",
            "X-Requested-With": "XMLHttpRequest",
        }
    url_ = update_url_query('https://www.instagram.com/graphql/query/', {'query_hash': query_hash, 'variables': json.dumps(variables)})
    #print(len(edges), url_)
    r = session.get(url_, headers=hdr)
    try:
        j = json.loads(r.text)
    except Exception as e:
        print(e)
        j = {}
    if not j or j.get('status') == 'fail':
        msg = 'Fail: {} {}'.format(j.get('message') or 'Please wait a few minutes before you try again.', variables)
        print_(msg)
        sleep(60*30, cw)
        raise Exception(msg)
    return j
 def get_imgs(url, n_max=2000, title=None, cw=None, session=None):
    print_ = get_print(cw)
    for try_ in range(4):
        try:
            html = read_html(url, session, cw)
            m = re.search('"edge_owner_to_timeline_media":{"count":([0-9]+)', html)
            if m is None:
                raise Exception('Invalid page')
            break
        except Exception as e:
            e_ = e
            print_(print_error(e)[0])
    else:
        raise e_
    n = int(m.groups()[0])
    n = min(n, n_max)
    data = get_sd(url, html=html, cw=cw)
    uploader_id = data['entry_data']['ProfilePage'][0]['graphql']['user']['id']
    csrf_token = data['config']['csrf_token']#
    session.cookies.set(name='ig_pr', value='1', path='/', domain='.instagram.com')
    cursor = ''
    edges = []
    bad = 0
    while True:
        check_alive(cw)
        variables = {
                    'id': uploader_id,
                    'first': 12,
                }
        if cursor:
            variables['after'] = cursor
        #print_(variables)#
        media = None
        try:
            j = get_query('003056d32c2554def87228bc3fd9668a', variables, session, cw)
            media = j['data']['user']['edge_owner_to_timeline_media']
            sleep(2)#
        except Exception as e:
            if bad > 10:
                raise Exception('no media')
            else:
                print_(u'no media.. retry... ({}) {}'.format(bad+1, print_error(e)[0]))
                sleep(12*bad, cw)
                bad += 1
                continue
        bad = 0
        edges_new = media.get('edges')
        if not edges_new or not isinstance(edges_new, list):
            print('no edges_new')
            break
        edges += edges_new
        s = u'{} {}  ({}/{})'.format(tr_(u'읽는 중...'), title, len(edges), n)
        if cw is not None:
            cw.setTitle(s)
            if not cw.alive:
                return []
        else:
            print(s)
        if len(edges) >= n:
            break
        page_info = media.get('page_info')
        if not page_info:
            break
        if not page_info.get('has_next_page'):
            break
        cursor = page_info.get('end_cursor')
        if not cursor:
            break
    if len(edges) <= n/2:
        raise Exception(u'Too short: {} / {}'.format(len(edges), n))
    imgs = []
    for edge in edges:
        node = edge['node']
        type = node['__typename']
        id = node['shortcode']
        url = u'https://www.instagram.com/p/{}/'.format(id)
 ##        if type in ['GraphVideo', 'GraphImage']:
 ##            single = True
 ##        else:
 ##            single = False
        for img in Node(url, session=session, cw=cw, media=node).imgs:
            imgs.append(img)
        if len(imgs) >= n_max:
            break
    return imgs
 class Node(object):
    def __init__(self, url, format=u'[%y-%m-%d] id_ppage', session=None, cw=None, media=None):
        print('Node', url)
        print_ = get_print(cw)
        self.id = re.search(FORMAT_PIN, url).groups()[0]
        self.imgs = []
        self.session = session
        if not media:
            if False: # Original
                j = get_sd(url, self.session, cw=cw)
                data = j['entry_data']['PostPage'][0]['graphql']
            else:
                variables = {
                    "shortcode"            : self.id,
                    "child_comment_count"  : 3,
                    "fetch_comment_count"  : 40,
                    "parent_comment_count" : 24,
                    "has_threaded_comments": True,
                    }
                j = get_query('a9441f24ac73000fa17fe6e6da11d59d', variables, session, cw)
                data = j['data']
            media = data['shortcode_media']
        if 'video_url' in media:
            urls = [
             media['video_url']]
        elif 'edge_sidecar_to_children' in media:
            edges = media['edge_sidecar_to_children']['edges']
            urls = []
            for edge in edges:
                node = edge['node']
                if 'video_url' in node:
                    url_ = node['video_url']
                else:
                    url_ = node['display_resources'][(-1)]['src']
                urls.append(url_)
        else:
            urls = [media['display_resources'][(-1)]['src']]
        time = media['taken_at_timestamp']
        self.date = datetime.fromtimestamp(time)
        self.timeStamp = self.date.strftime(format).replace(':', u'\uff1a')
        for p, img in enumerate(urls):
            ext = os.path.splitext(img.split('?')[0].split('#')[0])[1]
            filename = ('{}{}').format(self.timeStamp, ext).replace('id', str(self.id)).replace('page', str(p))
            img = Image(img, url, filename)
            self.imgs.append(img)
 def get_imgs_all(url, title=None, cw=None, d=None, session=None, stories=True):
    max_pid = get_max_range(cw)
    url = clean_url(url)
    if stories:
        imgs_str = get_stories(url, title, cw=cw, session=session)
    else:
        imgs_str = []
    max_pid = max(0, max_pid - len(imgs_str))
    imgs = get_imgs(url, max_pid, title=title, cw=cw, session=session)
    return imgs_str + imgs[:max_pid]
 def get_imgs_single(url, session=None, cw=None):
    node = Node(url, session=session, cw=cw)
    return node.imgs
 def get_stories(url, title=None, cw=None, session=None):
    print_ = get_print(cw)
    html = downloader.read_html(url, session=session)
    data = get_sd(url, html=html, cw=cw)
    uploader_id = data['entry_data']['ProfilePage'][0]['graphql']['user']['id']
    csrf_token = data['config']['csrf_token']#
    session.cookies.set(name='ig_pr', value='1', path='/', domain='.instagram.com')
    print('uploader_id:', uploader_id)
    variables = {
        'user_id': uploader_id,
        'include_chaining': True,
        'include_reel': True,
        'include_suggested_users': False,
        'include_logged_out_extras': False,
        'include_highlight_reels': True,
        'include_live_status': True,
        }
    j = get_query('d4d88dc1500312af6f937f7b804c68c3', variables, session, cw) 
    imgs = []
    ids = set()
    data = j['data']
    hs = data['user']['edge_highlight_reels']
    edges = hs['edges']
    edges.insert(0, str(uploader_id))
    for i, edge in enumerate(edges):
        if isinstance(edge, str):
            id = edge
            hid = None
            url_str = url
        else:
            id = None
            hid = edge['node']['id']
            url_str = 'https://www.instagram.com/stories/highlights/{}/'.format(hid)
        try:
            imgs_new = get_stories_single(url_str, id=id, cw=cw, session=session)
            for img in imgs_new:
                if img.id in ids:
                    print('duplicate: {}'.format(img.id))
                    continue
                ids.add(img.id)
                imgs.append(img)
            print_('stories: {}'.format(hid))
        except Exception as e:
            print_(u'Failed to get stories: {}'.format(hid))
            print(e)
        msg = u'{} {}  ({}/{})'.format(tr_(u'스토리 읽는 중...'), title, i+1, len(edges))
        if cw:
            if not cw.alive:
                return
            cw.setTitle(msg)
        else:
            print(msg)
    imgs = sort_str(imgs)
    return imgs
 def sort_str(imgs):
    imgs = sorted(imgs, key=lambda img: int(img.id), reverse=True)
    return imgs
 def get_stories_single(url, id=None, cw=None, session=None):
    j = get_sd(url, session=session, cw=cw)
    hid = re.find('/stories/highlights/([0-9]+)', url)
    reel_ids = []
    highlight_reel_ids = []
    if hid is None:
        if id is None:
            id = get_id(url) # ???
        reel_ids.append(str(id))
    else:
        highlight_reel_ids.append(str(hid))
    print(id, hid)
    variables = {
        "reel_ids":reel_ids,
        "tag_names":[],
        "location_ids":[],
        "highlight_reel_ids":highlight_reel_ids,
        "precomposed_overlay":False,
        "show_story_viewer_list":True,
        "story_viewer_fetch_count":50,
        "story_viewer_cursor":"",
        "stories_video_dash_manifest":False
        }
    print(variables)
    j = get_query('f5dc1457da7a4d3f88762dae127e0238', variables, session, cw)
    data = j['data']
    m = data['reels_media'][0]
    items = m['items']
    if not items:
        raise Exception('no items')
    imgs = []
    for item in items:
        id = item['id']
        rs = item.get('video_resources') or item['display_resources']
        r = rs[-1]
        src = r['src']
        ext = get_ext(src)
        filename = u'stories_{}{}'.format(id, ext)
        img = Image(src, url, filename, id=id)
        imgs.append(img)
    imgs = sort_str(imgs)
    return imgs
--- a/src/extractor/iwara_downloader.py
+++ b/src/extractor/iwara_downloader.py
@ -93,7 +93,13 @@ class Downloader_iwara(Downloader):
 def read_channel(url, type_, cw=None):
    print_ = get_print(cw)
-    username = re.find(r'/users/([^/]+)', url, err='no username')
+    html = downloader.read_html(url)
    soup = Soup(html)
    if soup.find('div', id='block-mainblocks-user-connect'):
        username = re.find(r'''/messages/new\?user=(.+)['"]''', html, err='no username')
    else:
        username = re.find(r'/users/([^/]+)', url, err='no username')
    print_('username: {}'.format(username))
    info = {}
    urls = []
    urls_set = set()
--- a/src/extractor/javfinder_downloader.py
+++ b/src/extractor/javfinder_downloader.py
@ -0,0 +1,79 @@
 import downloader
 from utils import Downloader, Soup, get_print, json_loads, compatstr, LazyUrl, format_filename, clean_title
 import devtools
 import js2py
 import ree as re
 from m3u8_tools import playlist2stream
 from io import BytesIO
@Downloader.register
 class Downloader_javfinder(Downloader):
    type = 'javfinder'
    URLS = ['javfinder.la']
    single = True
    display_name = 'JavFinder'
    def read(self):
        video = Video(self.url, cw=self.cw)
        self.urls.append(video.url)
        self.setIcon(video.thumb)
        self.title = video.title
 class Video(object):
    def __init__(self, url, cw=None):
        info = solve(url, cw=cw)
        url_video = info['file']
        stream = playlist2stream(url_video, n_thread=4)
        self.url = LazyUrl(url, lambda x: stream, self)
        self.title = info['title']
        id = info['id']
        self.filename = format_filename(self.title, id, '.mp4')
        self.thumb = BytesIO()
        downloader.download(info['url_thumb'], buffer=self.thumb)
 def solve(url, cw=None):
    print_ = get_print(cw)
    info = {}
    res = devtools.watch_network(url, cw=cw)
    #html = res['html']
    html = downloader.read_html(url) # ???
    soup = Soup(html)
    info['title'] = soup.find('h1').text.strip()
    info['url_thumb'] = soup.find('meta', {'property': 'og:image'})['content'].strip()
    for r in res['rs']:
        url_player = r.url()
        if 'streamsb.net/embed-' in url_player:
            break
    else:
        raise Exception('no player')
    print_('player: {}'.format(url_player))
    info['id'] = ''#
    html = downloader.read_html(url_player, url)
    soup = Soup(html)
    for script in soup.findAll('script'):
        script = script.string or ''
        if 'function(p,a,c,k,e,d)' in script:
            break
    else:
        raise Exception('no function(p,a,c,k,e,d)')
    js = script.strip()[5:-1].replace('function(p,a,c,k,e,d)', 'function hack(p,a,c,k,e,d)').replace('return p}', 'return p};hack')
    context = js2py.EvalJs()
    t = context.eval(js)
    sources = re.find(r'sources *: *(\[\{.+?\}\])', t, err='no sources')
    sources = json_loads(sources)
    info['file'] = sources[0]['file']
    return info
--- a/src/extractor/jmana_downloader.py
+++ b/src/extractor/jmana_downloader.py
@ -0,0 +1,207 @@
 import downloader
 from utils import Soup, urljoin, Downloader, fix_title, Session, get_print, LazyUrl, clean_title, get_imgs_already
 import ree as re
 from timee import sleep
 from translator import tr_
 import os
 from constants import try_n, clean_url
 import urllib, page_selector
 import bs4
 PATTERN = r'jmana[0-9]*.*/(comic_list_title|book)\?book'
 PATTERN_ALL = r'jmana[0-9]*.*/(comic_list_title|book|bookdetail)\?book'
 PATTERN_ID = '[?&]bookdetailid=([0-9]+)'
 class Image(object):
    def __init__(self, url, page, p):
        self.url = LazyUrl(page.url, lambda _: url, self)
        ext = '.jpg'
        name = (u'{:04}{}').format(p, ext)
        self.filename = (u'{}/{}').format(page.title, name)
 class Page(object):
    def __init__(self, title, url):
        self.title = clean_title(title)
        self.url = url
        self.id = int(re.find(PATTERN_ID, url))
@Downloader.register
 class Downloader_jmana(Downloader):
    type = 'jmana'
    URLS = ['regex:'+PATTERN_ALL]
    MAX_CORE = 8
    _soup = None
    def init(self):
        self.url = clean_url(self.url)
        self.session = Session()
        if re.search(PATTERN_ID, self.url): #1799
            select = self.soup.find('select', class_='bookselect')
            for i, op in enumerate(select.findAll('option')[::-1]):
                if 'selected' in op.attrs:
                    break
            else:
                raise Exception('no selected option')
            for a in self.soup.findAll('a'):
                url = urljoin(self.url, a.get('href') or '')
                if re.search(PATTERN, url):
                    break
            else:
                raise Exception('list not found')
            self.url = self.fix_url(url)
            self._soup = None
            for i, page in enumerate(get_pages(self.url, self.session, self.soup)):
                if page.id == int(op['value']):
                    break
            else:
                raise Exception('can not find page')
            self.cw.range_p = [i]
    @classmethod
    def fix_url(cls, url):
        return url
    @property
    def soup(self):
        if self._soup is None:
            html = downloader.read_html(self.url, session=self.session)
            soup = Soup(html)
            self._soup = soup
        return self._soup
    @property
    def name(self):
        title = get_title(self.soup)
        artist = get_artist(self.soup)
        title = fix_title(self, title, artist)
        return title
    def read(self):
        title = self.name
        artist = get_artist(self.soup)
        self.artist = artist
        for img in get_imgs(self.url, title, self.session, soup=self.soup, cw=self.cw):
            if isinstance(img, Image):
                self.urls.append(img.url)
            else:
                self.urls.append(img)
        self.title = self.name
 def get_title(soup):
    a = soup.find('a', class_='tit')
    if a:
        return a.text.strip()
    return re.find(r'제목 *: *(.+)', soup.find('a', class_='tit').text, err='no title')
 def get_artist(soup):
    return re.find(r'작가 *: *(.+)', soup.text, default='').strip() or 'N/A'
@try_n(4, sleep=60)
 def get_imgs_page(page, referer, session, cw=None):
    print_ = get_print(cw)
    sleep(5, cw) #2017
    html = downloader.read_html(page.url, referer, session=session)
    inserted = re.find(r'''var *inserted *= *['"](.*?)['"]''', html)
    print_('inserted: {}'.format(inserted))
    inserted = set(int(i) for i in inserted.split(',')) if inserted else set()
    soup = Soup(html)
    view = soup.find(class_='pdf-wrap')
    imgs = []
    for i, img in enumerate(child for child in view.children if isinstance(child, bs4.element.Tag)):
        src = img.get('data-src') or img.get('src') or ''
        if i in inserted:
            print_('remove: {}'.format(src))
            continue
        if not src:
            continue
        src = urljoin(page.url, src.strip())
        if '/adimg/' in src:
            print('adimg:', src)
            continue
        if '/notice' in src:
            print('notice:', src)
            continue
        img = Image(src, page, len(imgs))
        imgs.append(img)
    return imgs
 def get_pages(url, session=None, soup=None):
    if soup is None:
        html = downloader.read_html(url, session=session)
        soup = Soup(html)
    pages = []
    for inner in soup.findAll('div', class_='inner'):
        a = inner.find('a')
        if not a:
            continue
        href = a.attrs.get('href', '')
        if not re.search(PATTERN_ID, href):
            continue
        if a.find('img'):
            print('skip img', a.attrs.get('href'))
            continue
        href = urljoin(url, href)
        title_page = a.text
        page = Page(title_page, href)
        pages.append(page)
    pages = list(reversed(pages))
    return pages
@page_selector.register('jmana')
@try_n(4)
 def f(url):
    if re.search(PATTERN_ID, url):
        raise Exception(tr_(u'목록 주소를 입력해주세요'))
    session = Session()
    pages = get_pages(url, session=session)
    return pages
 def get_imgs(url, title, session, soup=None, cw=None):
    print_ = get_print(cw)
    if soup is None:
        html = downloader.read_html(url, session=session)
        soup = Soup(html)
    pages = get_pages(url, soup=soup)
    print_('pages: {}'.format(len(pages)))
    pages = page_selector.filter(pages, cw)
    imgs = []
    for i, page in enumerate(pages):
        imgs_already = get_imgs_already('jmana', title, page, cw)
        if imgs_already:
            imgs += imgs_already
            continue
        imgs += get_imgs_page(page, url, session, cw)
        if cw is not None:
            if not cw.alive:
                return
            cw.setTitle((u'{} {} / {}  ({} / {})').format(tr_(u'\uc77d\ub294 \uc911...'), title, page.title, i + 1, len(pages)))
    if not imgs:
        raise Exception('no imgs')
    return imgs
--- a/src/extractor/kakaopage_downloader.py
+++ b/src/extractor/kakaopage_downloader.py
@ -0,0 +1,192 @@
 import downloader
 import ree as re
 from utils import Session, LazyUrl, Soup, Downloader, try_n, get_print, clean_title, print_error, urljoin
 from time import sleep
 from translator import tr_
 import page_selector
 import json
 UA = downloader.hdr['User-Agent']
 class Page(object):
    def __init__(self, id_, title):
        self.id_ = id_
        self.title = title
        self.url = 'https://page.kakao.com/viewer?productId={}'.format(id_)
 class Image(object):
    def __init__(self, url, page, p):
        self.url = LazyUrl('https://page.kakao.com/', lambda _: url, self)
        ext = '.jpg'
        self.filename = '{}/{:04}{}'.format(clean_title(page.title), p, ext)
@Downloader.register
 class Downloader_kakaopage(Downloader):
    type = 'kakaopage'
    URLS = ['page.kakao.com/home']
    MAX_CORE = 8
    MAX_SPEED = 4.0
    display_name = 'KakaoPage'
    def init(self):
        self.session = Session()
        self.session.headers['User-Agent'] = UA
    @classmethod
    def fix_url(cls, url):
        id = re.find('/home/.+?/([0-9]+)', url)
        if id is not None:
            url = id
        if url.isdecimal():
            url = 'https://page.kakao.com/home?seriesId={}'.format(url)
        return url
    def read(self):
        info = get_info(self.url, self.session, cw=self.cw)
        for img in info['imgs']:
            self.urls.append(img.url)
        self.artist = info['artist']
        self.title = clean_title('[{}] {}'.format(info['artist'], info['title']))
 def get_id(url):
    id_ = re.find('seriesId=([0-9]+)', url, err='No seriesId')
    return id_
 def get_pages(url, session):
    id_ = get_id(url)
    pages = []
    ids = set()
    for p in range(100):
        url_api = 'https://api2-page.kakao.com/api/v5/store/singles'
        data = {
            'seriesid': id_,
            'page': str(p),
            'direction': 'asc',
            'page_size': '20',
            'without_hidden': 'true',
            }
        r = session.post(url_api, data=data, headers={'Referer': url})
        print(p, r)
        data = r.json()
        singles = data['singles']
        if not singles:
            print('no singles')
            break
        for single in singles:
            title_page = single['title']
            id_page = single['id']
            if id_page in ids:
                print('dup id')
                continue
            ids.add(id_page)
            page = Page(id_page, title_page)
            pages.append(page)
        sleep(.5)
    return pages
@try_n(2)
 def get_imgs_page(page, session):
    html = downloader.read_html(page.url, session=session)
    did = re.find('"did" *: *"(.+?)"', html, err='no did')
    url_api = 'https://api2-page.kakao.com/api/v1/inven/get_download_data/web'
    data = {
        'productId': page.id_,
        'device_mgr_uid': 'Windows - Chrome',
        'device_model': 'Windows - Chrome',
        'deviceId': did,
        }
    print(data)
    r = session.post(url_api, data=data, headers={'Referer': page.url})
    data = r.json()
    if data['result_code']:
        raise Exception(data['message'])
    imgs = []
    for file in data['downloadData']['members']['files']:
        url = file['secureUrl']
        url = urljoin('https://page-edge-jz.kakao.com/sdownload/resource/', url)
        img = Image(url, page, len(imgs))
        imgs.append(img)
    return imgs
 def get_info(url, session, cw=None):
    print_ = get_print(cw)
    pages = get_pages(url, session)
    pages = page_selector.filter(pages, cw)
    if not pages:
        raise Exception('no pages')
    info = {}
    html = downloader.read_html(url, session=session)
    soup = Soup(html)
    __NEXT_DATA__ = soup.find('script', id='__NEXT_DATA__')
    if __NEXT_DATA__:
        data = json.loads(__NEXT_DATA__.string)
        tid = data['props']['initialState']['common']['constant']['tid']
        print_('tid: {}'.format(tid))
        session.cookies['_kptid'] = tid
        html = downloader.read_html(url, session=session)
        soup = Soup(html)
    title = soup.find('h2').text.strip()
    info['title'] = title
    artist = soup.find('meta', {'name': 'author'})['content']
    for x in [' ,', ', ']:
        while x in artist:
            artist = artist.replace(x, ',')
    artist = artist.replace(',', ', ')
    info['artist'] = artist
    imgs = []
    for i, page in enumerate(pages):
        if cw is not None:
            if not cw.alive:
                return
            cw.setTitle('{} {} / {}  ({} / {})'.format(tr_('읽는 중...'), title, page.title, i + 1, len(pages)))
        try:
            _imgs = get_imgs_page(page, session)
            e_msg = None
        except Exception as e:
            _imgs = []
            e_msg = print_error(e)[0]
        print_('{} {}'.format(page.title, len(_imgs)))
        if e_msg:
            print_(e_msg)
        imgs += _imgs
        sleep(.2)
    if not imgs:
        raise Exception('no imgs')
    info['imgs'] = imgs
    return info
@page_selector.register('kakaopage')
@try_n(4)
 def f(url):
    if 'seriesId=' not in url:
        raise Exception(tr_('목록 주소를 입력해주세요'))
    pages = get_pages(url, Session())
    return pages
--- a/src/extractor/kakaotv_downloader.py
+++ b/src/extractor/kakaotv_downloader.py
@ -0,0 +1,55 @@
 import downloader
 import ytdl
 from utils import Downloader, try_n, LazyUrl, get_ext, format_filename
 from io import BytesIO as IO
 from m3u8_tools import M3u8_stream
@Downloader.register
 class Downloader_vlive(Downloader):
    type = 'kakaotv'
    URLS = ['tv.kakao']
    single = True
    display_name = 'KakaoTV'
    @classmethod
    def fix_url(cls, url):
        return url.split('?')[0].strip('/')
    def read(self):
        video = Video(self.url)
        video.url()#
        self.urls.append(video.url)
        self.setIcon(video.thumb)
        self.enableSegment()
        self.title = video.title
 class Video(object):
    _url = None
    def __init__(self, url):
        self.url = LazyUrl(url, self.get, self)
    @try_n(2)
    def get(self,  url):
        if self._url:
            return self._url
        ydl = ytdl.YoutubeDL()
        info = ydl.extract_info(url)
        fs = [f for f in info['formats'] if f['ext'] == 'mp4']
        f = sorted(fs, key=lambda f: f['height'])[-1]
        self._url = f['url']
        self.thumb_url = info['thumbnails'][0]['url']
        self.thumb = IO()
        downloader.download(self.thumb_url, buffer=self.thumb)
        self.title = info['title']
        ext = get_ext(self._url)
        self.filename = format_filename(self.title, info['id'], ext)
        return self._url
--- a/src/extractor/kissjav_downloader.py
+++ b/src/extractor/kissjav_downloader.py
@ -0,0 +1,72 @@
 import downloader
 from utils import Soup, urljoin, Downloader, LazyUrl, Session, try_n, format_filename, clean_title
 from timee import sleep
 import ree as re
 from io import BytesIO
 import clf2
@Downloader.register
 class Downloader_kissjav(Downloader):
    type = 'kissjav'
    URLS = ['kissjav.com']
    single = True
    display_name = 'KissJAV'
    def read(self):
        video = get_video(self.url)
        self.urls.append(video.url)
        self.setIcon(video.thumb)
        self.session = get_session(self.url, cw=self.cw)
        self.enableSegment(1024*1024//2)
        self.title = video.title
 def get_video(url):
    html = downloader.read_html(url)
    soup = Soup(html)
    view = soup.find('div', id='player-container-fluid')
    src_best = None
    res_best = -1
    for source in view.findAll('source'):
        src = urljoin(url, source.attrs['src'])
        res = re.find('([0-9]+)p', source.attrs['title'])
        res = int(res) if res else 0
        if res > res_best:
            src_best = src
            res_best = res
    if src_best is None:
        raise Exception('No source')
    title = soup.find('h1').text.strip()
    id = soup.find('div', id='video').attrs['data-id']
    url_thumb = soup.find('meta', {'property': 'og:image'}).attrs['content']
    #src_best = downloader.real_url(src_best)
    video = Video(src_best, url_thumb, url, title, id)
    return video
 class Video(object):
    def __init__(self, url, url_thumb, referer, title, id):
        self.title = title
        self.filename = format_filename(title, id, '.mp4')
        self.url = LazyUrl(referer, lambda x: url, self)
        self.thumb = BytesIO()
        self.url_thumb = url_thumb
        downloader.download(url_thumb, buffer=self.thumb)
@try_n(2)
 def get_session(url, cw=None):
    session = Session()
    clf2.solve(url, session=session, cw=cw)
    return session
--- a/src/extractor/lhscan_downloader.py
+++ b/src/extractor/lhscan_downloader.py
@ -0,0 +1,165 @@
 #coding:utf8
 import downloader
 from utils import Soup, urljoin, LazyUrl, Downloader, try_n, Session, clean_title, get_print
 import os
 from translator import tr_
 import page_selector
 import clf2
 import utils
 import base64
 from image_reader import QPixmap
 class Image(object):
    def __init__(self, url, page, p):
        self._url = url
        self.url = LazyUrl(page.url, self.get, self)#, pp=self.pp)
        ext = os.path.splitext(url)[1]
        if ext.lower()[1:] not in ['jpg', 'jpeg', 'bmp', 'png', 'gif', 'webm', 'webp']:
            ext = '.jpg'
        self.filename = u'{}/{:04}{}'.format(page.title, p, ext)
    def get(self, _):
        return self._url
 ##    def pp(self, filename):
 ##        pixmap = QPixmap(filename)
 ##        pixmap.save(filename)
 ##        return filename
 class Page(object):
    def __init__(self, title, url):
        self.title = clean_title(title)
        self.url = url
@Downloader.register
 class Downloader_lhscan(Downloader):
    type = 'lhscan'
    URLS = ['lhscan.net', 'loveheaven.net', 'lovehug.net']
    MAX_CORE = 16
    display_name = 'LHScan'
    _soup = None
    def init(self):
        self.url = self.url.replace('lhscan.net', 'loveheaven.net')
        self.session = Session()
        #clf2.solve(self.url, session=self.session, cw=self.cw)
        soup = self.soup
        if not soup.find('ul', class_='manga-info'):
            self.Invalid(u'{}: {}'.format(tr_(u'목록 주소를 입력해주세요'), self.url))
    @property
    def soup(self):
        if self._soup is None:
            for try_ in range(8):
                try:
                    html = downloader.read_html(self.url, session=self.session)
                    break
                except Exception as e:
                    print(e)
            else:
                raise
            self._soup = Soup(html)
        return self._soup
    @property
    def name(self):
        title = self.soup.findAll('span', {'itemprop': 'name'})[-1].text.strip()
        return clean_title(title)
    def read(self):
        self.title = tr_(u'읽는 중... {}').format(self.name)
        imgs = get_imgs(self.url, self.name, self.session, self.soup, self.cw)
        for img in imgs:
            self.urls.append(img.url)
        self.title = self.name
@try_n(8)
 def get_imgs_page(page, session, cw=None):
    print_ = get_print(cw)
    print_(page.title)
    html = downloader.read_html(page.url, session=session)
    soup = Soup(html)
    view = soup.find('div', class_='chapter-content')
    if not view:
        raise Exception('no chapter-content')
    imgs = []
    for img in soup.findAll('img', class_='chapter-img'):
        src = img.get('data-pagespeed-lazy-src') or img.get('data-src') or img.get('data-srcset') or img.get('data-aload') or img['src']
        try:
            src = base64.b64decode(src).strip().decode('utf8')
        except:
            pass
        src = urljoin(page.url, src)
        if 'Credit_LHScan_' in src or '5e1ad960d67b2_5e1ad962338c7' in src:
            continue
        if 'fe132b3d32acc39f5adcea9075bedad4LoveHeaven' in src:
            continue
        if 'LoveHug_600cfd96e98ff.jpg' in src:
            continue
        img = Image(src.strip(), page, len(imgs))
        imgs.append(img)
    return imgs
 def get_pages(url, session, soup=None, cw=None):
    if soup is None:
        html = downloader.read_html(url, session=session)
        soup = Soup(html)
    tab = soup.find('ul', class_='list-chapters')
    pages = []
    for li in tab.findAll('li'):
        text = li.find('div', class_='chapter-name').text.strip()
        href = li.parent['href']
        href = urljoin(url, href)
        page = Page(text, href)
        pages.append(page)
    if not pages:
        raise Exception('no pages')
    return pages[::-1]
@page_selector.register('lhscan')
@try_n(4)
 def f(url):
    session = Session()
    #clf2.solve(url, session=session)
    pages = get_pages(url, session)
    return pages
@try_n(2)
 def get_imgs(url, title, session, soup=None, cw=None):
    if soup is None:
        html = downloader.read_html(url, session=session)
        soup = Soup(html)
    pages = get_pages(url, session, soup, cw)
    pages = page_selector.filter(pages, cw)
    imgs = []
    for i, page in enumerate(pages):
        imgs += get_imgs_page(page, session, cw)
        s = u'{} {} / {}  ({} / {})'.format(tr_(u'읽는 중...'), title, page.title, i+1, len(pages))
        if cw is not None:
            if not cw.alive:
                return
            cw.setTitle(s)
        else:
            print(s)
    return imgs
--- a/src/extractor/likee_downloader.py
+++ b/src/extractor/likee_downloader.py
@ -0,0 +1,119 @@
 import downloader
 from utils import Session, Downloader, get_ext, LazyUrl, get_print
 import ree as re
 import json
 from io import BytesIO
 from translator import tr_
@Downloader.register
 class Downloader_likee(Downloader):
    type = 'likee'
    URLS = ['likee.video']
    single = True
    display_name = 'Likee'
    def init(self):
        self.session = Session()
    def read(self):
        info = get_info(self.url, self.session, self.cw)
        self.print_('type: {}'.format(info['type']))
        self.artist = info['artist']
        if info['type'] != 'single':
            video = self.process_playlist(info['title'], info['videos'])
        else:
            video = info['videos'][0]
            video.url()
            self.urls.append(video.url)
            self.title = info['title']
        thumb = BytesIO()
        downloader.download(video.url_thumb, referer=self.url, buffer=thumb)
        self.setIcon(thumb)
 def get_info(url, session, cw=None):
    print_ = get_print(cw)
    info = {}
    info['videos'] = []
    if '/video/' in url:
        info['type'] = 'single'
        video = Video(url, session)
        video.url()
        info['videos'].append(video)
        info['title'] = video.id_
        info['artist'] = video.artist
        return info
    info['type'] = 'channel'
    html = downloader.read_html(url, session=session)
    data_raw = html.split('window.data = ')[1].split('};')[0]+'}'
    data = json.loads(data_raw)
    info['uid'] = data['userinfo']['uid']
    info['username'] = data['userinfo']['yyuid']
    info['artist'] = data['userinfo']['nick_name']
    info['title'] = '{} (likee_{})'.format(info['artist'], info['username'])
    lastPostId = ''
    urls = set()
    while True:
        url_api = 'https://likee.video/official_website/VideoApi/getUserVideo'
        r = session.post(url_api, data={'uid': info['uid'], 'count': '30', 'lastPostId': lastPostId})
        data = json.loads(r.text)
        videos = data['data']['videoList']
        if not videos:
            break
        for data in videos:
            url_post = 'https://likee.video/@{}/video/{}'.format(data['likeeId'], data['postId'])
            if url_post in urls:
                print_('duplicate: {}'.format(url_post))
                continue
            urls.add(url_post)
            video = Video(url_post, session, data)
            video.url()
            info['videos'].append(video)
            lastPostId = data['postId']
        msg = '{} {} - {}'.format(tr_('읽는 중...'), info['title'], len(info['videos']))
        if cw:
            if not cw.alive:
                return
            cw.setTitle(msg)
        else:
            print(msg)
    return info
 class Video(object):
    def __init__(self, url, session, data=None):
        self.id_ = re.find('/video/([0-9]+)', url, err='no id')
        self._session = session
        self._data = data
        self.url = LazyUrl(url, self.get, self)
    def get(self, url):
        if self._data:
            video = self._data
        else:
            url_api = 'https://likee.video/official_website/VideoApi/getVideoInfo'
            r = self._session.post(url_api, data={'postIds': str(self.id_)})
            data = json.loads(r.text)
            video = data['data']['videoList'][0]
        url_video = video['videoUrl']
        self.url_thumb = video['coverUrl']
        self.artist = video['nickname']
        ext = get_ext(url_video)
        self.title = self.id_
        self.filename = '{}{}'.format(self.id_, ext)
        return url_video
--- a/src/extractor/luscious_downloader.py
+++ b/src/extractor/luscious_downloader.py
@ -0,0 +1,145 @@
 #coding:utf8
 import downloader
 from utils import Soup, Downloader, LazyUrl, urljoin, try_n, get_outdir, clean_title
 import ree as re
 import os
 from timee import sleep
 from translator import tr_
 from io import BytesIO
 import json
 class Image(object):
    def __init__(self, item, referer):
        self.item = item
        self.id = str(item['id'])
        self.referer = referer
        self.url = LazyUrl(referer, self.get, self)
    def get(self, url):
        img = urljoin(url, self.item['url_to_original'])
        ext = os.path.splitext(img.split('?')[0])[1]
        self.filename = u'{}{}'.format(self.id, ext)
        return img
 class Video(object):
    def __init__(self, url, title, url_thumb):
        self.url = url
        self.title = title
        ext = os.path.splitext(url.split('?')[0])[1]
        self.filename = u'{}{}'.format(clean_title(title), ext)
        self.url_thumb = url_thumb
        self.thumb = BytesIO()
        downloader.download(self.url_thumb, buffer=self.thumb)
@Downloader.register
 class Downloader_luscious(Downloader):
    type = 'luscious'
    URLS = ['luscious.net']
    MAX_CORE = 4
    @classmethod
    def fix_url(cls, url):
        url = url.replace('members.luscious.', 'www.luscious.')
        return url
    def read(self):
        url = fix_url(self.url)
        for try_ in range(8):
            try:
                html = downloader.read_html(url)
                break
            except Exception as e:
                print(e)
                self.print_('retry...')
        else:
            raise
        soup = Soup(html)
        title = clean_title(get_title(soup))
        self.title = tr_(u'읽는 중... {}').format(title)
        if '/videos/' in url:
            video = get_video(url, soup)
            imgs = [video]
            self.setIcon(video.thumb)
        else:
            imgs = get_imgs(url, soup, self.cw)
        dir = os.path.join(get_outdir(self.type), title)
        names = {}
        try:
            for name in os.listdir(dir):
                id = os.path.splitext(name)[0]
                names[id] = name
        except:
            pass
        for img in imgs:
            if img.id in names:
                url = os.path.join(dir, names[img.id])
            else:
                url = img.url
            self.urls.append(url)
        self.title = title#
 def update(cw, title, imgs):
    s = u'{} {}  ({})'.format(tr_(u'읽는 중...'), title, len(imgs))
    if cw is not None:
        cw.setTitle(s)
    else:
        print(s)
 def fix_url(url):
    url = re.sub(r'[^./]+\.luscious', 'legacy.luscious', url)
    return url
 def get_imgs(url, soup=None, cw=None):
    url = fix_url(url)
    if soup is None:
        html = downloader.read_html(url)
        soup = Soup(html)
    title = get_title(soup)
    imgs = []
    for p in range(1, 81):
        imgs_new = get_imgs_p(url, p)
        if not imgs_new:
            break
        imgs += imgs_new
        update(cw, title, imgs)
    return imgs
@try_n(4, sleep=30)
 def get_imgs_p(url, p=1):
    id = re.find('/albums/[^/]+?([0-9]+)/', url+'/')
    print(url, id)
    url_api = 'https://api.luscious.net/graphql/nobatch/?operationName=AlbumListOwnPictures&query=+query+AlbumListOwnPictures%28%24input%3A+PictureListInput%21%29+%7B+picture+%7B+list%28input%3A+%24input%29+%7B+info+%7B+...FacetCollectionInfo+%7D+items+%7B+...PictureStandardWithoutAlbum+%7D+%7D+%7D+%7D+fragment+FacetCollectionInfo+on+FacetCollectionInfo+%7B+page+has_next_page+has_previous_page+total_items+total_pages+items_per_page+url_complete+%7D+fragment+PictureStandardWithoutAlbum+on+Picture+%7B+__typename+id+title+created+like_status+number_of_comments+number_of_favorites+status+width+height+resolution+aspect_ratio+url_to_original+url_to_video+is_animated+position+tags+%7B+category+text+url+%7D+permissions+url+thumbnails+%7B+width+height+size+url+%7D+%7D+&variables=%7B%22input%22%3A%7B%22filters%22%3A%5B%7B%22name%22%3A%22album_id%22%2C%22value%22%3A%22{}%22%7D%5D%2C%22display%22%3A%22position%22%2C%22page%22%3A{}%7D%7D'.format(id, p)
    data_raw = downloader.read_html(url_api, referer=url)
    data = json.loads(data_raw)
    has_next_page = data['data']['picture']['list']['info']['has_next_page']
    imgs = []
    for item in data['data']['picture']['list']['items']:
        img = Image(item, url)
        imgs.append(img)
    return imgs
 def get_video(url, soup):
    url_thumb = soup.find('meta', {'property': 'og:image'}).attrs['content']
    title = re.find('videos/([^/]+)', url)
    video = soup.find('video')
    url = video.source.attrs['src']
    video = Video(url, title, url_thumb)
    return video
 def get_title(soup):
    return soup.find('h2').text.strip()
--- a/src/extractor/m3u8_downloader.py
+++ b/src/extractor/m3u8_downloader.py
@ -0,0 +1,33 @@
 from utils import Downloader, LazyUrl, clean_title
 from m3u8_tools import playlist2stream, M3u8_stream
 import os
@Downloader.register
 class Downloader_m3u8(Downloader):
    type = 'm3u8'
    URLS = ['.m3u8']
    single = True
    display_name = 'M3U8'
    def init(self):
        if '://' not in self.url:
            self.url = 'http://' + self.url
    def read(self):
        video = Video(self.url)
        self.urls.append(video.url)
        self.title = video.title
 class Video(object):
    def __init__(self, url):
        try:
            m = playlist2stream(url)
        except:
            m = M3u8_stream(url)
        self.url = LazyUrl(url, lambda _: m, self)
        self.title = os.path.splitext(os.path.basename(url))[0]
        self.filename = clean_title(self.title, n=-4) + '.mp4'
--- a/src/extractor/mrm_downloader.py
+++ b/src/extractor/mrm_downloader.py
@ -0,0 +1,211 @@
 #coding:utf8
 import downloader
 from utils import Soup, urljoin, LazyUrl, Downloader, query_url, try_n, Session, get_print, clean_title
 import os
 from translator import tr_
 from timee import sleep
 import requests
 import ree as re
 import clf2#
 class Image(object):
    def __init__(self, url, p, page):
        ext = os.path.splitext(url)[1]
        if ext.lower()[1:] not in ['jpg', 'jpeg', 'bmp', 'png', 'gif', 'webm', 'webp']:
            ext = '.jpg'
        self.filename = u'{:04}{}'.format(p, ext)
        if page.title is not None:
            self.filename = u'{}/{}'.format(page.title, self.filename)
        def f(_):
            return url
        self.url = LazyUrl(page.url, f, self)
 class Page(object):
    def __init__(self, title, url, soup=None):
        self.title = clean_title(title)
        self.url = url
        self.soup = soup
@Downloader.register
 class Downloader_mrm(Downloader):
    type = 'mrm'
    URLS = ['myreadingmanga.info']
    _soup = None
    MAX_CORE = 16
    display_name = 'MyReadingManga'
    def init(self):
        self.session = get_session(self.url, self.cw)
    @classmethod
    def fix_url(cls, url):
        return re.find('https?://myreadingmanga.info/[^/]+', url, err='err')
    @property
    def soup(self):
        if self._soup is None:
            for try_ in range(8):
                try:
                    html = read_html(self.url, session=self.session, cw=self.cw)
                    break
                except Exception as e:
                    e_ = e
                    self.print_(e)
            else:
                raise e_
            self._soup = Soup(html)
        return self._soup
    @property
    def name(self):
        title = get_title(self.soup)
        return title
    def read(self):
        self.title = u'읽는 중... {}'.format(self.name)
        imgs = get_imgs(self.url, self.soup, self.session, self.cw)
        for img in imgs:
            self.urls.append(img.url)
        self.title = self.name
 def get_title(soup):
    title = soup.find('h1', class_='entry-title').text.strip()
    title = fix_title(title)
    title = clean_title(title)
    return title
 def get_imgs(url, soup=None, session=None, cw=None):
    if soup is None:
        html = read_html(url, session=session, cw=cw)
        soup = Soup(html)
    title = get_title(soup)
    pagination = soup.find('div', class_='pagination')
    if pagination is None:
        page = Page(None, url, soup)
        imgs = get_imgs_page(page, session=session)
    else:
        pages = get_pages(url, soup, session=session)
        imgs = []
        for i, page in enumerate(pages):
            s = u'{} {} / {}  ({} / {})'.format(tr_(u'읽는 중...'), title, page.title, i+1, len(pages))
            if cw:
                if not cw.alive:
                    return
                cw.setTitle(s)
            else:
                print(s)
            imgs += get_imgs_page(page, session=session)
    if not imgs:
        raise Exception('no imgs')
    return imgs
 def get_pages(url, soup=None, session=None):
    if soup is None:
        html = read_html(url, session=session, cw=None)
        soup = Soup(html)
    pagination = soup.find('div', class_='pagination')
    pages = []
    hrefs = set()
    for a in pagination.findAll('a'):
        href = a.attrs.get('href', '')
        href = urljoin(url, href)
        if not href.startswith(url):
            print('not match', href)
            continue
        while href.endswith('/'):
            href = href[:-1]
        if href in hrefs:
            print('duplicate', href)
            continue
        hrefs.add(href)
        text = a.text.strip()
        page = Page(text, href)
        pages.append(page)
    if url not in hrefs:
        page = Page('1', url, soup)
        pages.insert(0, page)
    return pages
@try_n(4)
 def get_imgs_page(page, session=None):
    url = page.url
    soup = page.soup
    if soup is None:
        html = read_html(url, session=session, cw=None)
        soup = Soup(html)
        page.soup = soup
    view = soup.find('div', class_='entry-content')
    imgs = []
    for img in view.findAll('img'):
        img = img.attrs.get('data-lazy-src') or img.attrs.get('data-src')
        if img is None:
            continue
        img = urljoin(url, img)
        img = Image(img, len(imgs), page)
        imgs.append(img)
    print(page.title, len(imgs), page.url)
    return imgs
 def fix_title(title):
    title = re.sub(r'\(?[^()]*?c\.[^() ]+\)?', '', title)
    while '  ' in title:
        title = title.replace('  ', ' ')
    return title
 def read_html(url, session, cw):
 ##    html = downloader.read_html(url, session=session)
 ##    soup = Soup(html)
 ##
 ##    cf = soup.find('div', class_='cf-browser-verification')
 ##    if cf is None:
 ##        return html
    r = clf2.solve(url, cw=cw, session=session)
    return r['html']
@try_n(4)
 def get_session(url, cw=None):
    print_ = get_print(cw)
 ##    html = downloader.read_html(url)
 ##    soup = Soup(html)
 ##
 ##    cf = soup.find('div', class_='cf-browser-verification')
 ##    if cf is None:
 ##        print_('no cf protection')
 ##        return None
    print_('cf protection')
    r = clf2.solve(url, cw=cw)
    session = r['session']
    return session
--- a/src/extractor/naver_downloader.py
+++ b/src/extractor/naver_downloader.py
@ -0,0 +1,170 @@
 #coding:utf-8
 import downloader
 import re
 from utils import urljoin, Downloader, Soup, LazyUrl, clean_title
 import json
 from timee import sleep
 import collections
 PATTERNS = ['.*blog.naver.com/(?P<username>.+)/(?P<pid>[0-9]+)',
            '.*blog.naver.com/.+?blogId=(?P<username>[^&]+).+?logNo=(?P<pid>[0-9]+)',
            '.*?(?P<username>[0-9a-zA-Z_-]+)\.blog\.me/(?P<pid>[0-9]+)']
 HDR = {
    'Accept': 'text/html, application/xhtml+xml, image/jxr, */*',
    'Accept-Encoding': 'gzip, deflate',
    'Accept-Language': 'ko, en-US; q=0.7, en; q=0.3',
    'Connection': 'Keep-Alive',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393',
    }
 def get_id(url):
    for pattern in PATTERNS:
        m = re.match(pattern, url)
        if m is None:
            continue
        username = m.group('username')
        pid = m.group('pid')
        break
    else:
        username, pid = None, None
    return username, pid
@Downloader.register
 class Downloader_naver(Downloader):
    type = 'naver'
    URLS = ['blog.naver.', '.blog.me']
    display_name = 'Naver Blog'
    def init(self):
        username, pid = get_id(self.url)
        if username is None:
            return self.Invalid('Invalid format')
        self.url = 'https://blog.naver.com/{}/{}'.format(username, pid)
        self.headers = {'User-Agent': downloader.hdr['User-Agent']}
    @property
    def name(self):
        username, pid = get_id(self.url)
        return clean_title(u'{}/{}'.format(username, pid))
    def read(self):
        self.title = u'읽는 중... {}'.format(self.name)
        imgs = get_imgs(self.url)
        for img in imgs:
            self.urls.append(img.url)
        self.title = self.name
 class Image(object):
    def __init__(self, url):
        self.url = url
 class Video(object):
    def __init__(self, url, referer, p):
        self.url = LazyUrl(referer, lambda _: url, self)
        self.filename = 'video_{}.mp4'.format(p)
 def read_page(url, depth=0):
    print('read_page', url, depth)
    if depth > 10:
        raise Exception('Too deep')
    html = downloader.read_html(url, header=HDR)
    if len(html) < 5000:
        id = re.findall('logNo=([0-9]+)', html)[0]
        usernames = re.findall('blog.naver.com/([0-9a-zA-Z]+)', url)
        if not usernames:
            usernames = re.findall('blogId=([0-9a-zA-Z]+)', url)
        username = usernames[0]
        url = 'https://m.blog.naver.com/PostView.nhn?blogId={}&logNo={}&proxyReferer='.format(username, id)
        print('###', username, id, url)
    soup = Soup(html)
    if soup.find('div', {'id': 'viewTypeSelector'}):
        return url, soup
    frame = soup.find('frame')
    if frame is None:
        print('frame is None')
        return read_page(url, depth+1)
    return read_page(urljoin('https://blog.naver.com', frame.attrs['src']), depth+1)
 def get_imgs(url):
    url = url.replace('blog.naver', 'm.blog.naver')
    url_frame, soup = read_page(url)
    imgs = []
    urls = set()
    view = soup.find('div', {'id': 'viewTypeSelector'})
    print('view', view is not None)
    imgs_ = view.findAll('span', class_='_img') + view.findAll('img')
    for img in imgs_:
        url = img.attrs.get('src', None)
        if url is None:
            url = img.attrs.get('thumburl', None)
        if url is None:
            print(u'invalid img: {}'.format(url))
            continue
        if 'ssl.pstatic.net' in url: #
            continue
        if 'blogpfthumb-phinf.pstatic.net' in url: # profile
            continue
        if 'dthumb-phinf.pstatic.net' in url: # link
            continue
        if 'storep-phinf.pstatic.net' in url: # emoticon
            continue
        url =  url.replace('mblogthumb-phinf', 'blogfiles')
        #url = re.sub('\?type=[a-zA-Z0-9]*', '?type=w1@2x', url)
        #url = re.sub('\?type=[a-zA-Z0-9]*', '', url)
        url = url.split('?')[0]
        if url in urls:
            print('### Duplicate:', url)
            continue
        urls.add(url)
        #url = url.split('?type=')[0]
        img = Image(url)
        imgs.append(img)
    pairs = []
    for video in soup.findAll('span', class_='_naverVideo'):
        vid = video.attrs['vid']
        key = video.attrs['key']
        pairs.append((vid, key))
    for script in soup.findAll('script', class_='__se_module_data'):
        data_raw = script['data-module']
        data = json.loads(data_raw)['data']
        vid = data.get('vid')
        if not vid:
            continue
        key = data['inkey']
        pairs.append((vid, key))
    videos = []
    for vid, key in pairs:
        url_api = 'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{}?key={}'.format(vid, key)
        data_raw = downloader.read_html(url_api)
        data = json.loads(data_raw)
        fs = data['videos']['list']
        fs = sorted(fs, key=lambda f: f['size'], reverse=True)
        video = Video(fs[0]['source'], url_frame, len(videos))
        videos.append(video)
    return imgs + videos
--- a/src/extractor/navertoon_downloader.py
+++ b/src/extractor/navertoon_downloader.py
@ -0,0 +1,244 @@
 # uncompyle6 version 3.5.0
 # Python bytecode 2.7 (62211)
 # Decompiled from: Python 2.7.16 (v2.7.16:413a49145e, Mar  4 2019, 01:30:55) [MSC v.1500 32 bit (Intel)]
 # Embedded file name: navertoon_downloader.pyo
 # Compiled at: 2019-10-03 10:19:35
 import downloader
 from utils import Soup, urljoin, Downloader, LazyUrl, get_imgs_already, clean_title, get_ext, get_print
 from constants import try_n
 import ree as re, os
 from timee import sleep
 import page_selector
 from translator import tr_
 import json
 class Page(object):
    def __init__(self, url, title, p):
        self.url = url
        self.title = title
        self.p = p
 class Image(object):
    def __init__(self, url, page, p):
        ext = get_ext(url)
        self.filename = (u'{}/{:04}{}').format(clean_title(page.title), p, ext)
        self.url = LazyUrl(page.url, lambda _: url, self)
 class Info(object):
    def __init__(self, id, title, artist):
        self.id = id
        self.title = title
        self.artist = artist
@Downloader.register
 class Downloader_navertoon(Downloader):
    type = 'navertoon'
    URLS = ['comic.naver.com']
    MAX_CORE = 8
    MAX_SPEED = 4.0
    display_name = 'Naver Webtoon'
    def init(self):
        self.url = get_main(self.url)
        self.__info, _ = get_pages(self.url, self.cw)
    @property
    def name(self):
        id = self.__info.id
        title = self.__info.title
        artist = self.__info.artist
        title = self.format_title('N/A', id, title, artist, 'N/A', 'N/A', 'Korean', prefix='navertoon_')
        return clean_title(title)
    def read(self):
        self.title = tr_(u'\uc77d\ub294 \uc911... {}').format(self.name)
        imgs = get_imgs_all(self.url, self.name, cw=self.cw)
        for img in imgs:
            if isinstance(img, Image):
                self.urls.append(img.url)
            else:
                self.urls.append(img)
        self.title = self.name
 def get_main(url):
    url_main = re.sub('[?&]page=[0-9]+', '', re.sub('[?&]no=[0-9]+', '', url)).replace('detail.nhn', 'list.nhn').replace('m.comic.naver.', 'comic.naver.')
    while url_main.endswith('#'):
        url_main = url_main[:-1]
    return url_main
 def set_no(url, p):
    if '&no=' not in url:
        url = url + ('&no={}').format(p)
        return url
    url = re.sub('&no=[0-9]+', ('&no={}').format(p), url)
    return url
 def get_id(url):
    return int(url.lower().split('titleid=')[1].split('&')[0])
 def set_page(url, p):
    if '&page=' in url:
        url = re.sub('&page=[0-9]+', ('&page={}').format(p), url)
    else:
        url += ('&page={}').format(p)
    return url
@try_n(4)
 def get_pages(url, cw=None):
    print_ = get_print(cw)
    url = get_main(url).replace('comic.naver.', 'm.comic.naver.')
    id = get_id(url)
    print('id:', id)
    print(url)
    html = downloader.read_html(url)
    soup = Soup(html)
    try:
        info = soup.find('div', class_='area_info')
        artist = info.find('span', class_='author').text.strip()
    except Exception as e:
        print(e)
        try:
            title = ('\n').join(soup.find('div', class_='title').text.strip().split('\n')[:-1]).strip()
        except:
            title = 'artist not found'
        raise Exception(title)
    print('artist:', artist)
    title = soup.find('meta', {'property': 'og:title'}).attrs['content']
    pages = []
    nos = set()
    for p in range(1, 100):
        if p == 1:
            url_page = url
        else:
            url_page = set_page(url, p)
            html = downloader.read_html(url_page)
        print('read page:', url_page)
        soup = Soup(html)
        view = soup.findAll('ul', class_='section_episode_list')[(-1)]
        for lst in view.findAll('li'):
            url_page = urljoin(url, lst.find('a').attrs['href'])
            if 'detail.nhn' not in url_page.lower():
                continue
            print_('url_page: {}'.format(url_page))
            text = lst.find('strong', class_='title').find('span', class_='name').text.strip()
            no = int(re.findall('[?&]no=([0-9]+)', url_page)[0])
            if no in nos:
                print('duplicate no: {}'.format(no))
                continue
            nos.add(no)
            text = '{:04} - {}'.format(no, text)
            page = Page(url_page, text, p)
            pages.append(page)
        btn_next = soup.find('a', class_='btn_next')
        if btn_next is None or btn_next.attrs['href'] == '#':
            print('end of page')
            break
    info = Info(id, title, artist)
    return (
     info, pages)
@page_selector.register('navertoon')
@try_n(4)
 def f(url):
    url = get_main(url)
    info, pages = get_pages(url)
    return pages
@try_n(6)
 def get_imgs(page, cw=None):
    print_ = get_print(cw)
    html = downloader.read_html(page.url)
    soup = Soup(html)
    type_ = re.find('''webtoonType *: *['"](.+?)['"]''', html)
    print_('type: {}'.format(type_))
    imgs = []
    if type_ == 'DEFAULT': # https://m.comic.naver.com/webtoon/detail.nhn?titleId=715772
        view = soup.find('div', class_='toon_view_lst')
        for img in view.findAll('img'):
            img = img.attrs.get('data-src')
            if not img:
                continue
            img = urljoin(page.url, img)
            img = Image(img, page, len(imgs))
            imgs.append(img)
    elif type_ == 'CUTTOON': # https://m.comic.naver.com/webtoon/detail.nhn?titleId=752803
        view = soup.find('div', class_='swiper-wrapper')
        for div in view.findAll('div', class_='swiper-slide'):
            if div.parent != view:
                continue
            if div.find('div', class_='cut_viewer_last'):
                print('cut_viewer_last')
                continue
            if div.find('div', class_='cut_viewer_recomm'):
                print('cut_viewer_recomm')
                continue
            img = div.find('img')
            img = img.attrs['data-src']
            img = urljoin(page.url, img)
            img = Image(img, page, len(imgs))
            imgs.append(img)
    elif type_ == 'EFFECTTOON': #2313; https://m.comic.naver.com/webtoon/detail.nhn?titleId=670144
        img_base = re.find('''imageUrl *: *['"](.+?)['"]''', html) + '/'
        print('img_base:', img_base)
        url_api = re.find('''documentUrl *: *['"](.+?)['"]''', html)
        data_raw = downloader.read_html(url_api, page.url)
        data = json.loads(data_raw)
        for img in data['assets']['stillcut'].values(): # ordered in python3.7+
            img = urljoin(img_base, img)
            img = Image(img, page, len(imgs))
            imgs.append(img)
    else:
        _imgs = re.findall('sImageUrl *: *[\'"](.+?)[\'"]', html)
        if not _imgs:
            raise Exception('no imgs')
        for img in _imgs:
            img = urljoin(page.url, img)
            img = Image(img, page, len(imgs))
            imgs.append(img)
    return imgs
 def get_imgs_all(url, title, cw=None):
    print_ = get_print(cw)
    info, pages = get_pages(url, cw)
    pages = page_selector.filter(pages, cw)
    imgs = []
    for p, page in enumerate(pages):
        imgs_already = get_imgs_already('navertoon', title, page, cw)
        if imgs_already:
            imgs += imgs_already
            continue
        imgs_new = get_imgs(page, cw)
        print_('{}: {}'.format(page.title, len(imgs_new)))
        imgs += imgs_new
        if cw is not None:
            cw.setTitle(tr_(u'\uc77d\ub294 \uc911... {} / {}  ({}/{})').format(title, page.title, p + 1, len(pages)))
            if not cw.alive:
                break
    return imgs
--- a/src/extractor/navertv_downloader.py
+++ b/src/extractor/navertv_downloader.py
@ -0,0 +1,63 @@
 import downloader
 import ree as re
 from io import BytesIO as IO
 import os
 from constants import try_n
 from error_printer import print_error
 from utils import Downloader, compatstr, LazyUrl, get_ext, format_filename, clean_title
 import ytdl
@Downloader.register
 class Downloader_navertv(Downloader):
    type = 'navertv'
    single = True
    URLS = ['tv.naver.com']
    display_name = 'Naver TV'
    def init(self):
        if not re.match('https?://.+', self.url, re.IGNORECASE):
            self.url = 'https://tv.naver.com/v/{}'.format(self.url)
    def read(self):
        video = Video(self.url)
        video.url()#
        self.urls.append(video.url)
        self.setIcon(video.thumb)
        self.enableSegment()
        self.title = video.title
 class Video(object):
    _url = None
    def __init__(self, url):
        self.url = LazyUrl(url, self.get, self)
    @try_n(4)
    def get(self, url):
        if self._url:
            return self._url
        ydl = ytdl.YoutubeDL()
        info = ydl.extract_info(url)
        fs = [f for f in info['formats'] if f['protocol'] in ['http', 'https']]
        fs = sorted(fs, key=lambda f: int(f.get('width', 0)), reverse=True)
        if not fs:
            raise Exception('No MP4 videos')
        f = fs[0]
        self._url = f['url']
        self.thumb_url = info['thumbnails'][0]['url']
        self.thumb = IO()
        downloader.download(self.thumb_url, buffer=self.thumb)
        self.title = info['title']
        id = info['id']
        ext = get_ext(self._url)
        self.filename = format_filename(self.title, id, ext)
        return self._url
--- a/src/extractor/nico_downloader.py
+++ b/src/extractor/nico_downloader.py
@ -0,0 +1,97 @@
 #coding:utf8
 import downloader
 import nndownload
 from io import BytesIO
 import ree as re
 from utils import Downloader, get_print, compatstr, format_filename, clean_title, try_n
 from nico_login import login, logout
 def get_id(url):
    if '/watch/' in url:
        id = re.findall('/watch/([a-zA-Z0-9]+)', url)[0]
    else:
        id = url
    return id
 class Video(object):
    def __init__(self, session, info):
        self.session = session
        self.info = info
        self.url = info['url']
        self.title = info['title']
        self.ext = info['ext']
        self.id = info['id']
        self.fileName = format_filename(self.title, self.id, self.ext)
        self.url_thumb = info['thumbnail_url']
        print('thumb:', self.url_thumb)
        self.thumb = BytesIO()
        downloader.download(self.url_thumb, buffer=self.thumb)
    def __repr__(self):
        return u'Video({})'.format(self.id)
@Downloader.register
 class Downloader_nico(Downloader):
    type = 'nico'
    single = True
    URLS = ['nicovideo.jp']
    display_name = 'Niconico'
    def init(self):
        if not re.match('https?://.+', self.url, re.IGNORECASE):
            self.url = 'https://www.nicovideo.jp/watch/{}'.format(self.url)
    @property
    def id_(self):
        return get_id(self.url)
    def read(self):
        ui_setting = self.ui_setting
        if ui_setting.nicoBox.isChecked():
            username = compatstr(ui_setting.nico_id.text())
            password = compatstr(ui_setting.nico_pw.text())
        else:
            username = ''
            password = ''
        try:
            session = login(username, password)
        except Exception as e:
            logout()
            return self.Invalid(u'Failed to login: {}'.format(self.url), fail=True)
        self.session = session
        try:
            video = get_video(session, self.id_, cw=self.cw)
        except Exception as e:
            logout()
            raise
        self.urls.append(video.url)
        self.filenames[video.url] = video.fileName
        self.setIcon(video.thumb)
        self.enableSegment()
        self.title = video.title
@try_n(2)
 def get_video(session, id, cw=None):
    print_ = get_print(cw)
    try:
        info = nndownload.request_video(session, id)
    except:
        raise Exception('Err')
    video = Video(session, info)
    return video
--- a/src/extractor/nijie_downloader.py
+++ b/src/extractor/nijie_downloader.py
@ -0,0 +1,164 @@
 #coding: utf-8
 import downloader
 from utils import Downloader, urljoin, get_max_range, query_url, Soup, Session, LazyUrl, get_print, clean_title, try_n, get_ext
 from translator import tr_
 from constants import clean_url
 import ree as re
 from errors import LoginRequired
 def get_id(url):
    return re.find('id=([0-9]+)', url)
 def get_name(soup):
    return soup.find('p', class_='user_icon').find('a', class_='name').text.strip()
 def isLogin(soup):
    if soup.find('ul', id="sub-menu"):
        return True
    return False
@Downloader.register
 class Downloader_nijie(Downloader):
    type = 'nijie'
    URLS = ['nijie.info']
    MAX_CORE = 4
    display_name = 'ニジエ'
    def init(self):
        if 'members.php' not in self.url and 'members_illust.php' not in self.url:
            raise NotImplementedError()
        id = get_id(self.url)
        html = downloader.read_html('https://nijie.info/members.php?id={}'.format(id))
        self.soup = Soup(html)
        if not isLogin(self.soup):
            raise LoginRequired()
    @classmethod
    def fix_url(cls, url):
        if 'nijie.info' not in url.lower():
            url = 'https://nijie.info/members.php?id={}'.format(url)
        return url.replace('http://', 'https://')
    @property
    def name(self):
        name = u'{} (nijie_{})'.format(get_name(self.soup), get_id(self.url))
        return clean_title(name)
    def read(self):
        self.title = self.name
        imgs = get_imgs(self.url, self.name, cw=self.cw)
        for img in imgs:
            self.urls.append(img.url)
        self.title = self.name
 class Image(object):
    def __init__(self, id, url, p, lazy=True, img=None):
        self.id = id
        self.p = p
        if lazy:
            self.url = LazyUrl(url, self.get_single, self)
        else:
            self.url = LazyUrl(url, lambda _:img, self)
            ext = get_ext(img)
            self.filename = '{}_p{}{}'.format(id, p, ext)
    def get_single(self, url): # single
        img = get_imgs_post(self.id, url)[0].url()
        ext = get_ext(img)
        self.filename = '{}_p{}{}'.format(self.id, self.p, ext)
        return img
@try_n(8, sleep=10)
 def get_imgs_post(id, url):
    #print('get_imgs_post', id, url)
    html = downloader.read_html(url)
    soup = Soup(html)
    view = soup.find('div', id='gallery')
    imgs = []
    for img in view.findAll(class_='mozamoza'):
        url_img = urljoin(url, img['src'])
        url_img = re.sub('__rs_l[0-9]+x[0-9]+/', '', url_img)
        img = Image(id, url, len(imgs), False, url_img)
        imgs.append(img)
    return imgs
 def setPage(url, page):
    # Always use HTTPS
    url = url.replace('http://', 'https://')
    # Change the page
    if 'p=' in url:
        url = re.sub('p=[0-9]*', 'p={}'.format(page), url)
    else:
        url += '&p={}'.format(page)
    return url
 def get_imgs(url, title=None, cw=None):
    print_ = get_print(cw)
    url = clean_url(url)
    id = get_id(url)
    url = u'https://nijie.info/members_illust.php?id={}'.format(id)
    # Range
    max_pid = get_max_range(cw)
    imgs = []
    url_imgs = set()
    for p in range(1, 1+100):
        url = setPage(url, p)
        print_(url)
        html = downloader.read_html(url)
        soup = Soup(html)
        posts = soup.findAll('div', class_='nijie')
        if not posts:
            print('no posts')
            break
        c = 0
        for post in posts:
            url_img = urljoin(url, post.a.attrs['href'])
            if url_img in url_imgs:
                print('duplicate:', url_img)
                continue
            url_imgs.add(url_img)
            id = int(re.find('[?&]id=([0-9]+)', url_img))
            multi = post.find('div', class_='thumbnail-icon')
            if multi:
                imgs_ = get_imgs_post(id, url_img)#
            else:
                imgs_ = [Image(id, url_img, 0)]
            imgs += imgs_
            c += 1
            if len(imgs) >= max_pid:
                break
            msg = u'{}  {} - {}'.format(tr_(u'읽는 중...'), title, len(imgs))
            if cw:
                if not cw.alive:
                    return
                cw.setTitle(msg)
            else:
                print(msg)
        if len(imgs) >= max_pid or c == 0:
            break
    return imgs
--- a/src/extractor/pandoratv_downloader.py
+++ b/src/extractor/pandoratv_downloader.py
@ -0,0 +1,109 @@
 import downloader
 from utils import Session, Soup, LazyUrl, get_print, Downloader, get_ext, try_n, format_filename, clean_title
 import ree as re
 import json
 from io import BytesIO
 class EmbedUrlError(Exception): pass
@Downloader.register
 class Downloader_pandoratv(Downloader):
    type = 'pandoratv'
    URLS = ['pandora.tv']
    single = True
    display_name = 'Pandora TV'
    @classmethod
    def fix_url(cls, url):
        return url.split('#')[0]
    def read(self):
        video = Video(self.url, format, cw=self.cw)
        try:
            video.url()#
        except EmbedUrlError as e:
            return self.Invalid(e.args[0])
        self.urls.append(video.url)
        self.setIcon(video.thumb)
        self.enableSegment()
        self.title = video.title
 def extract(name, html, cw=None):
    print_ = get_print(cw)
    value = re.find(r'''{} *= *['"](.*?)['"]'''.format(name), html)
    if value is None:
        value = json.loads(re.find(r'''{} *= *(\[.*?\])'''.format(name), html))
    print_('{}: {}'.format(name, value))
    if value is None:
        raise Exception('No {}'.format(name))
    return value
 class Video(object):
    _url_video = None
    def __init__(self, url, format='title', cw=None):
        self.url = LazyUrl(url, self.get, self)
        self.format = format
        self.cw = cw
    @try_n(2)
    def get(self, url):
        if self._url_video:
            return self._url_video
        cw = self.cw
        print_ = get_print(cw)
        html = downloader.read_html(url)
        soup = Soup(html)
        embedUrl = extract('embedUrl', html, cw)
        if embedUrl:
            raise EmbedUrlError('[pandoratv] EmbedUrl: {}'.format(embedUrl))
        uid = extract('strLocalChUserId', html, cw)
        pid = extract('nLocalPrgId', html, cw)
        fid = extract('strFid', html, cw)
        resolType = extract('strResolType', html, cw)
        resolArr = extract('strResolArr', html, cw)
        vodSvr = extract('nVodSvr', html, cw)
        resols = extract('nInfo', html, cw)
        runtime = extract('runtime', html, cw)
        url_api = 'http://www.pandora.tv/external/getExternalApi/getVodUrl/'
        data = {
            'userId': uid,
            'prgId': pid,
            'fid': fid,
            'resolType': resolType,
            'resolArr': ','.join(map(str, resolArr)),
            'vodSvr': vodSvr,
            'resol': max(resols),
            'runtime': runtime,
            'tvbox': 'false',
            'defResol': 'true',
            'embed': 'false',
            }
        session = Session()
        r = session.post(url_api, headers={'Referer': url}, data=data)
        data = json.loads(r.text)
        self._url_video = data['src']
        self.title = soup.find('meta', {'property': 'og:description'})['content']
        ext = get_ext(self._url_video)
        self.filename = format_filename(self.title, pid, ext)
        self.url_thumb = soup.find('meta', {'property': 'og:image'})['content']
        self.thumb = BytesIO()
        downloader.download(self.url_thumb, buffer=self.thumb)
        return self._url_video
--- a/src/extractor/pinter_downloader.py
+++ b/src/extractor/pinter_downloader.py
@ -0,0 +1,216 @@
 # uncompyle6 version 3.5.0
 # Python bytecode 2.7 (62211)
 # Decompiled from: Python 2.7.16 (v2.7.16:413a49145e, Mar  4 2019, 01:30:55) [MSC v.1500 32 bit (Intel)]
 # Embedded file name: pinter_downloader.pyo
 # Compiled at: 2019-10-21 07:44:55
 import downloader
 from utils import Session, Downloader, LazyUrl, clean_url, try_n, Soup, clean_title
 import json, os, ree as re
 from timee import sleep
 from translator import tr_
 import urllib
 import constants
 from ratelimit import limits, sleep_and_retry
 BASE_URL = 'https://www.pinterest.com'
 def get_info(username, board, api):
    if '/' in board:
        section = (u'/').join(board.split('/')[1:])
        board = board.split('/')[0]
        info = api.board(username, board)
        for s in api.board_sections(info['id']):
            print(s['slug'].lower(), section)
            if s['slug'].lower() == section.lower():
                break
        else:
            raise Exception('Invalid section')
        title = s['title']
        info.update(s)
        info['name'] = (u'{}/{}').format(info['name'], title)
        print('section_id:', info['id'])
    else:
        info = api.board(username, board)
        #info = board_info(username, board)
    return info
 def board_info(username, board):
    url = u'https://www.pinterest.com/{}/{}/'.format(username, board)
    html = downloader.read_html(url)
    soup = Soup(html)
    data = soup.find('script', id='initial-state').text
    data = json.loads(data)['resourceResponses']
    info = data[0]['response']['data']
    return info
@Downloader.register
 class Downloader_pinter(Downloader):
    type = 'pinter'
    URLS = ['pinterest.']
    type_pinter = 'board'
    display_name = 'Pinterest'
    @try_n(4)
    def init(self):
        if 'pinterest.' not in self.url:
            self.url = u'https://www.pinterest.com/{}'.format(self.url)
        self.api = PinterestAPI()
        username, board = get_username_board(self.url)
        if '/' in board:
            self.type_pinter = 'section'
        self.print_(('type: {}').format(self.type_pinter))
        self.info = get_info(username, board, self.api)
    @property
    def name(self):
        username = self.info['owner']['username']
        name = self.info['name']
        return clean_title((u'{}/{}').format(username, name))
    def read(self):
        self.title = self.name
        id = self.info['id']
        imgs = get_imgs(id, self.api, cw=self.cw, title=self.name, type=self.type_pinter)
        for img in imgs:
            self.urls.append(img.url)
        self.title = self.name
 class PinterestAPI:
    HEADERS = {'Accept': 'application/json, text/javascript, */*, q=0.01', 
       'Accept-Language': 'en-US,en;q=0.5', 
       'X-Pinterest-AppState': 'active', 
       'X-APP-VERSION': 'cb1c7f9', 
       'X-Requested-With': 'XMLHttpRequest', 
       'Origin': BASE_URL + '/'}
    def __init__(self):
        self.session = Session()
        self.session.headers.update(self.HEADERS)
    def pin(self, pin_id):
        options = {'id': pin_id, 'field_set_key': 'detailed'}
        return self._call('Pin', options)['resource_response']['data']
    def pin_related(self, pin_id):
        options = {'pin': pin_id, 'add_vase': True, 'pins_only': True}
        return self._pagination('RelatedPinFeed', options)
    def board(self, user, board):
        options = {'slug': board, 'username': user, 'field_set_key': 'detailed'}
        return self._call('Board', options)['resource_response']['data']
    def board_pins(self, board_id):
        options = {'board_id': board_id}
        return self._pagination('BoardFeed', options)
    def board_related(self, board_id):
        options = {'board_id': board_id, 'add_vase': True}
        return self._pagination('BoardRelatedPixieFeed', options)
    def board_sections(self, board_id):
        options = {'board_id': board_id}
        return self._pagination('BoardSections', options)
    def board_section_pins(self, section_id):
        options = {'section_id': section_id}
        return self._pagination('BoardSectionPins', options)
    @try_n(4)
    @sleep_and_retry
    @limits(1, 4) # 1000 calls per hour
    def _call(self, resource, options):
        url = ('{}/resource/{}Resource/get/').format(BASE_URL, resource)
        params = {'data': json.dumps({'options': options}), 'source_url': ''}
        print('_call: {}, {}'.format(url, params))
        r = self.session.get(url, params=params)
        print(r)
        s = r.text
        status_code = r.status_code
        try:
            data = json.loads(s)
        except ValueError:
            data = {}
        else:
            if status_code < 400 and not r.history:
                return data
        if status_code == 404 or r.history:
            raise Exception('Not Found')
        raise Exception('API request failed: {}'.format(status_code))
    def _pagination(self, resource, options):
        while True:
            data = self._call(resource, options)
            for x in data['resource_response']['data']:
                yield x
            try:
                bookmarks = data['resource']['options']['bookmarks']
                if not bookmarks or bookmarks[0] == '-end-' or bookmarks[0].startswith('Y2JOb25lO'):
                    return
                options['bookmarks'] = bookmarks
            except KeyError:
                return
 class Image(object):
    def __init__(self, img):
        self.id = img['id']
        print(self.id)
        self.url0 = img['images']['orig']['url']
        def f(_):
            return self.url0
        self.url = LazyUrl(('{}/pin/{}/').format(BASE_URL, self.id), f, self)
        ext = os.path.splitext(self.url0.split('?')[0].split('#')[0])[1]
        self.filename = ('{}{}').format(self.id, ext)
 def get_imgs(id, api, cw=None, title=None, type='board'):
    imgs = []
    ids = set()
    print('get_imgs: type={}'.format(type))
    if type == 'board':
        gen = api.board_pins(id)
    elif type == 'section':
        gen = api.board_section_pins(id)
    else:
        raise Exception((u'Type "{}" is not supported').format(type))
    for img in gen:
        if 'images' not in img:
            print('skip img:', img['id'])
            continue
        img = Image(img)
        if img.id in ids:
            print('duplicate:', img.id)
            continue
        ids.add(img.id)
        print(img.url)
        print(img.filename)
        print
        imgs.append(img)
        if cw is not None:
            if not cw.alive:
                return []
            cw.setTitle((u'{} {}  ({})').format(tr_(u'\uc77d\ub294 \uc911...'), title, len(imgs)))
    return imgs
 def get_username_board(url):
    url = clean_url(url)
    m = re.search('pinterest.[a-zA-Z.]+?/([^/]+)/([^#\\?]+)', url)
    username, board = m.groups()
    board = urllib.parse.unquote(board).strip()
    while board.endswith('/'):
        board = board[:-1].strip()
    return (username, board)
--- a/src/extractor/pixiv_downloader.py
+++ b/src/extractor/pixiv_downloader.py
@ -14,6 +14,10 @@ except ImportError:
 import constants
 from datetime import datetime
 import requests
 from timee import sleep
 from collections import deque
 from locker import lock
 import threading
 FORCE_LOGIN = True
 LIMIT = 48
 for header in ['pixiv_illust', 'pixiv_bmk', 'pixiv_search', 'pixiv_following', 'pixiv_following_r18']:
@ -27,6 +31,7 @@ class Downloader_pixiv(Downloader):
    type = 'pixiv'
    MAX_CORE = 16
    keep_date = True
    STEP = 8, 32
    @classmethod
    def fix_url(cls, url):
@ -107,10 +112,10 @@ class PixivAPI():
    def profile(self, id_):
        return self.call('user/{}/profile/all?lang=en'.format(id_))
-    def bookmarks(self, id_, offset=0, limit=None):
+    def bookmarks(self, id_, offset=0, limit=None, rest='show'):
        if limit is None:
            limit = LIMIT
-        return self.call('user/{}/illusts/bookmarks?tag=&offset={}&limit={}&rest=show&lang=en'.format(id_, offset, limit))
+        return self.call('user/{}/illusts/bookmarks?tag=&offset={}&limit={}&rest={}&lang=en'.format(id_, offset, limit, rest))
    def search(self, q, order='date_d', mode='all', p=1, s_mode='s_tag', type_='all'):
        return self.call('search/artworks/{0}?word={0}&order={1}&mode={2}&p={3}&s_mode={4}&type={5}&lang=en'.format(quote(q), order, mode, p, s_mode, type_))
@ -254,13 +259,17 @@ def get_info(url, cw=None, depth=0):
        id_ = api.user_id(url)
        if id_ is None: #
            id_ = my_id()
        if id_ == my_id():
            rest = 'all'
        else:
            rest = 'show'
        process_user(id_, info, api)
        info['title'] = '{} (pixiv_bmk_{})'.format(info['artist'], info['artist_id'])
        ids = []
        ids_set = set()
        offset = 0
        while len(ids) < max_pid:
-            data = api.bookmarks(id_, offset)
+            data = api.bookmarks(id_, offset, rest=rest)
            c = 0
            for id in [work['id'] for work in data['works']]:
                if id in ids_set:
@ -359,15 +368,54 @@ def process_user(id_, info, api):
 def process_ids(ids, info, imgs, cw, depth=0):
    print_ = get_print(cw)
    max_pid = get_max_range(cw)
-    for i, id_illust in enumerate(ids):
+    class Thread(threading.Thread):
-        try:
+        alive = True
-            info_illust = get_info('https://www.pixiv.net/en/artworks/{}'.format(id_illust), cw, depth=depth+1)
+        rem = 0
-        except Exception as e:
+
-            if depth == 0 and (e.args and e.args[0] == '不明なエラーが発生しました' or type(e) == errors.LoginRequired): # logout during extraction
+        def __init__(self, queue):
-                raise e
+            super().__init__(daemon=True)
-            print_('process_ids error ({}):\n{}'.format(depth, print_error(e)[0]))
+            self.queue = queue
-            continue
+
-        imgs += info_illust['imgs']
+        @classmethod
        @lock
        def add_rem(cls, x):
            cls.rem += x
        def run(self):
            while self.alive:
                try:
                    id_, res, i = self.queue.popleft()
                except Exception as e:
                    sleep(.1)
                    continue
                try:
                    info_illust = get_info('https://www.pixiv.net/en/artworks/{}'.format(id_), cw, depth=depth+1)
                    res[i] = info_illust['imgs']
                except Exception as e:
                    if depth == 0 and (e.args and e.args[0] == '不明なエラーが発生しました' or type(e) == errors.LoginRequired): # logout during extraction
                        res[i] = e
                    print_('process_ids error ({}):\n{}'.format(depth, print_error(e)[0]))
                finally:
                    Thread.add_rem(-1)
    queue = deque()
    n, step = Downloader_pixiv.STEP
    print_('{} / {}'.format(n, step))
    ts = []
    for i in range(n):
        t = Thread(queue)
        t.start()
        ts.append(t)
    for i in range(0, len(ids), step):
        res = [[]]*step
        for j, id_illust in enumerate(ids[i:i+step]):
            queue.append((id_illust, res, j))
            Thread.add_rem(1)
        while Thread.rem:
            sleep(.001, cw)
        for imgs_ in res:
            if isinstance(imgs_, Exception):
                raise imgs_
            imgs += imgs_
        s = '{} {} - {}'.format(tr_('읽는 중...'), info['title'], len(imgs))
        if cw:
            cw.setTitle(s)
@ -377,3 +425,5 @@ def process_ids(ids, info, imgs, cw, depth=0):
            break
        if depth == 0:
            check_alive(cw)
    for t in ts:
        t.alive = False
--- a/src/extractor/pornhub_downloader.py
+++ b/src/extractor/pornhub_downloader.py
@ -0,0 +1,530 @@
 #coding:utf8
 '''
 Pornhub Downloader
 '''
 from __future__ import division, print_function, unicode_literals
 from io import BytesIO
 import os
 import js2py
 import downloader
 import ree as re
 from utils import (Downloader, Soup, try_n, LazyUrl, urljoin, get_print,
                   Session, get_max_range, filter_range, get_ext,
                   lock, format_filename, clean_title, get_resolution)
 import clf2
 import utils
 from m3u8_tools import playlist2stream, M3u8_stream
 class File(object):
    '''
    File
    '''
    def __init__(self, id_, title, url, url_thumb):
        self.id_ = id_
        self.title = clean_title('{}'.format(title))
        self.url = url
        ext = get_ext(self.url)
        if ext.lower() == '.m3u8':
            try:
                self.url = playlist2stream(self.url, n_thread=4)
            except:
                self.url = M3u8_stream(self.url, n_thread=4)
        self.url_thumb = url_thumb
        self.thumb = BytesIO()
        downloader.download(self.url_thumb, buffer=self.thumb)
        if ext.lower() == '.m3u8':
            ext = '.mp4'
        self.filename = format_filename(self.title, self.id_, ext)
        print('filename:', self.filename)
 class Video(object):
    '''
    Video
    '''
    _url = None
    filename = None
    thumb = None
    def __init__(self, url, cw, session):
        self.url = LazyUrl(url, self.get, self)
        self.cw = cw
        self.session = session
    def get(self, url):
        '''
        get
        '''
        cw = self.cw
        session = self.session
        print_ = get_print(cw)
        if self._url:
            return self._url
        id_ = re.find(r'viewkey=(\w+)', url, re.IGNORECASE) or \
              re.find(r'/embed/(\w+)', url, re.IGNORECASE)
        print('id: {}'.format(id_))
        if 'viewkey=' not in url.lower() and '/gif/' not in url.lower():
            url = urljoin(url, '/view_video.php?viewkey={}'.format(id_))
        html = downloader.read_html(url, session=session)
        soup = Soup(html)
        soup = fix_soup(soup, url, session, cw)
        html = str(soup)
        # removed
        if soup.find('div', class_='removed'):
            raise Exception('removed')
        gif = soup.find('div', {'id': 'gifImageSection'})
        if gif:
            print_('GIF')
            id_ = url.split('/gif/')[1]
            id_ = re.findall('[0-9a-zA-Z]+', id_)[0]
            jss = list(gif.children)
            for js in jss:
                if 'data-mp4' in getattr(js, 'attrs', {}):
                    break
            else:
                raise Exception('gif mp4 url not found')
            title = js['data-gif-title']
            url = js['data-mp4']
            url_thumb = re.find(r'https?://.+?.phncdn.com/pics/gifs/.+?\.jpg', html, err='no thumb')
            file = File('gif_{}'.format(id_), title, url, url_thumb)
        else:
            if id_ is None:
                raise Exception('no id')
            print_('Video')
            j = decode(html, cw)
            # 1968
            #title = j['video_title']
            title = soup.find('h1', class_='title').text.strip()
            url_thumb = j['image_url']
            videos = []
            for video in j['mediaDefinitions']:
                url_ = video.get('videoUrl').strip()
                ext = get_ext(url_)
                if ext.lower() not in ['.mp4', '.m3u8']:
                    print('not mp4: {}'.format(ext))
                    continue
                quality = video.get('quality', 0)
                if isinstance(quality, list):
                    quality = quality[0]
                video['quality'] = int(quality)
                print_('[{}p] {}'.format(quality, url_))
                videos.append(video)
            if not videos:
                raise Exception('No videos')
            videos = sorted(videos, key=lambda video: video['quality'])
            res = get_resolution()
            videos_good = [video for video in videos if video['quality'] <= res]
            if videos_good:
                video = videos_good[-1]
            else:
                video = videos[0]
            print_('\n[{}p] {}'.format(video['quality'], video['videoUrl']))
            file = File(id_, title, video['videoUrl'].strip(), url_thumb)
        self._url = file.url
        self.title = file.title
        self.filename = file.filename
        self.thumb = file.thumb
        return self._url
 def is_login(session, cw=None, n=2):
    '''
    is_login
    '''
    print_ = get_print(cw)
    print_('is_login {}'.format(n))
    if n <= 0:
        return False
    url = 'https://www.pornhubpremium.com'
    soup = downloader.read_soup(url, session=session)
    soup = fix_soup(soup, url, session, cw)
    html = str(soup)
    if soup.find('ul', id='profileMenuDropdown'):
        return True
    return is_login(session, cw, n-1)
@Downloader.register
 class Downloader_pornhub(Downloader):
    '''
    Downloader
    '''
    type = 'pornhub'
    single = True
    strip_header = False
    URLS = ['pornhub.com', 'pornhubpremium.com']
    def init(self):
        self.session = Session() # 1791
        if 'pornhub_gif_' in self.url:
            self.url = 'https://www.pornhub.com/gif/{}'.format(
                self.url.replace('pornhub_gif_', ''))
        elif 'pornhub_album_' in self.url:
            self.url = 'https://www.pornhub.com/album/{}'.format(
                self.url.replace('pornhub_album_', ''))
        elif 'pornhub_' in self.url:
            self.url = 'https://www.pornhub.com/view_video.php?viewkey={}'\
                       .format(self.url.replace('pornhub_', ''))
        if 'pornhubpremium.com' in self.url.lower() and\
           not is_login(self.session, self.cw):
            return self.Invalid('[Pornhub] Login cookies required')
    @classmethod
    def key_id(cls, url):
        for domain in cls.URLS:
            if domain in url:
                id_ = domain + url.split(domain)[1]
                break
        else:
            raise Exception('no id')
        return id_.split('#')[0]
    def read(self):
        cw = self.cw
        session = self.session
        videos = []
        tab = ''.join(self.url.replace('pornhubpremium.com', 'pornhub.com', 1).split('?')[0].split('#')[0].split('pornhub.com/')[-1].split('/')[2:3])
        if '/album/' in self.url:
            self.print_('Album')
            info = read_album(self.url, session=session)
            self.single = False
            for photo in info['photos']:
                self.urls.append(photo.url)
            self.title = clean_title(info['title'])
        elif '/photo/' in self.url:
            self.print_('Photo')
            info = read_photo(self.url, session=session)
            for photo in info['photos']:
                self.urls.append(photo.url)
            self.title = info['title']
        elif tab not in ['', 'videos']:
            raise NotImplementedError(tab)
        elif 'viewkey=' not in self.url.lower() and\
             '/embed/' not in self.url.lower() and\
             '/gif/' not in self.url.lower():
            self.print_('videos')
            info = get_videos(self.url, cw)
            hrefs = info['hrefs']
            self.print_('videos: {}'.format(len(hrefs)))
            if not hrefs:
                raise Exception('no hrefs')
            videos = [Video(href, cw, session) for href in hrefs]
            video = self.process_playlist(info['title'], videos)
            self.setIcon(video.thumb)
            self.enableSegment()
        else:
            video = Video(self.url, cw, session)
            video.url()
            self.urls.append(video.url)
            self.setIcon(video.thumb)
            self.title = video.title
            self.enableSegment()
 def fix_soup(soup, url, session=None, cw=None):
    '''
    fix_soup
    '''
    print_ = get_print(cw)
    if soup.find('div', class_='logo'):
        return soup
    print_('invalid soup: {}'.format(url))
    res = clf2.solve(url, session=session, cw=cw)
    return Soup(res['html'])
 class Photo(object):
    '''
    Photo
    '''
    def __init__(self, id_, url, referer):
        self.id_ = id_
        self.url = LazyUrl(referer, lambda x: url, self)
        ext = os.path.splitext(url.split('?')[0])[1]
        self.filename = '{}{}'.format(id_, ext)
@try_n(8)
 def read_album(url, session=None):
    '''
    read_album
    '''
    soup = downloader.read_soup(url, session=session)
    id_album = re.find('/album/([0-9]+)', url, err='no album id')
    url_json = 'https://www.pornhub.com/album/show_album_json?album={}'.format(id_album)
    data = downloader.read_json(url_json, url, session=session)
    block = soup.find('div', class_='photoAlbumListBlock')
    href = block.a.attrs['href']
    id_ = re.find('/photo/([0-9]+)', href, err='no photo id')
    ids = [id_]
    while True:
        item = data[id_]
        id_ = item['next']
        if id_ in ids:
            break
        ids.append(id_)
    photos = []
    for id_ in ids:
        item = data[id_]
        img = item['img_large']
        referer = 'https://www.pornhub.com/photo/{}'.format(id_)
        photo = Photo(id_, img, referer)
        photos.append(photo)
    info = {}
    title = clean_title(soup.find('h1', class_='photoAlbumTitleV2').text)
    info['title'] = format_filename(title, 'album_{}'.format(id_album))
    info['photos'] = photos
    return info
@try_n(8)
 def read_photo(url, session=None):
    '''
    read_photo
    '''
    id_ = re.find('/photo/([0-9]+)', url, err='no photo id')
    soup = downloader.read_soup(url, session=session)
    div = soup.find('div', id='thumbSlider')
    href = urljoin(url, div.find('a').attrs['href'])
    info = read_album(href)
    photos = []
    for photo in info['photos']:
        if str(photo.id_) == id_:
            photos.append(photo)
    info['photos'] = photos
    info['title'] = '{} - {}'.format(info['title'], photos[0].filename)
    return info
@try_n(4)
 def get_videos(url, cw=None):
    '''
    get_videos
    '''
    print_ = get_print(cw)
    if '/users/' in url:
        mode = 'users'
        username = url.split('/users/')[1].split('/')[0]
    elif '/pornstar/' in url:
        mode = 'pornstar'
        username = url.split('/pornstar/')[1].split('/')[0]
    elif '/model/' in url:
        mode = 'model'
        username = url.split('/model/')[1].split('/')[0]
    elif '/channels/' in url:
        mode = 'channels'
        username = url.split('/channels/')[1].split('/')[0]
    elif '/playlist/' in url:
        mode = 'playlist'
        username = url.split('/playlist/')[1].split('/')[0]
    else:
        raise Exception('Not supported url')
    username = username.split('?')[0].split('#')[0]
    session = Session()
    if mode in ['pornstar']:
        url_main = 'https://www.pornhub.com/{}/{}'.format(mode, username)
        html = downloader.read_html(url_main, session=session)
        soup = Soup(html)
        soup = fix_soup(soup, url_main, session, cw)
        for a in soup.findAll('a'):
            if '/{}/{}/videos/upload'.format(mode, username) in a.attrs.get('href', ''):
                free = True
                break
        else:
            free = False
        print_('free: {}'.format(free))
    # Range
    max_pid = get_max_range(cw, 500)
    max_pid = min(max_pid, 2000)#
    html = downloader.read_html(url, session=session)
    soup = fix_soup(Soup(html), url, session, cw)
    info = {}
    # get title
    h1 = soup.find('h1')
    if h1:
        header = 'Playlist'
        title = h1.find(id='watchPlaylist')
    else:
        title = None
    if not title:
        header = 'Channel'
        profile = soup.find('div', class_='profileUserName')
        wrapper = soup.find('div', class_='titleWrapper')
        bio = soup.find('div', class_='withBio')
        title = soup.find('h1', {'itemprop':'name'})
        if not title and profile:
            title = profile.a
        if not title and wrapper:
            title = wrapper.h1
        if not title and bio:
            title = bio.h1
    if not title:
        raise Exception('No title')
    #print(title)
    info['title'] = '[{}] {}'.format(header, title.text.strip())
    token = re.find('''token *= *['"](.*?)['"]''', html)
    print_('token: {}'.format(token))
    # get links
    hrefs = []
    fail = 0
    for p in range(1, 1+100):
        try:
            if mode in ['users', 'model']:
                if mode == 'users':
                    url_api = 'https://www.pornhub.com/users/{}/videos/public/'\
                              'ajax?o=mr&page={}'.format(username, p)
                elif mode == 'model':
                    url_api = 'https://www.pornhub.com/model/{}/videos/upload/'\
                              'ajax?o=mr&page={}'.format(username, p)
                r = session.post(url_api)
                soup = Soup(r.text)
                if soup.find('h1'):
                    print('break: h1')
                    break
            elif mode in ['pornstar']:
                if free:
                    url_api = 'https://www.pornhub.com/{}/{}/videos/upload'\
                              '?page={}'.format(mode, username, p)
                    soup = downloader.read_soup(url_api, session=session)
                    soup = fix_soup(soup, url_api, session, cw)
                    soup = soup.find('div', class_='videoUList')
                else:
                    url_api = 'https://www.pornhub.com/{}/{}?page={}'.format(mode, username, p)
                    soup = downloader.read_soup(url_api, session=session)
                    soup = fix_soup(soup, url_api, session, cw)
                    soup = soup.find('ul', class_='pornstarsVideos')
            elif mode in ['channels']:
                url_api = 'https://www.pornhub.com/{}/{}/videos?page={}'.format(mode, username, p)
                soup = downloader.read_soup(url_api, session=session)
                soup = fix_soup(soup, url_api, session, cw)
                try:
                    soup = soup.find('div', {'id': 'channelsBody'}).find('div', class_='rightSide')
                except:
                    break
            elif mode in ['playlist']:
                #url_api = 'https://www.pornhub.com/playlist/viewChunked?id={}&offset={}&itemsPerPage=40'.format(username, len(hrefs))
                if token is None:
                    raise Exception('no token')
                url_api = 'https://www.pornhub.com/playlist/viewChunked?id={}&token={}&page={}'.format(username, token, p)
                soup = downloader.read_soup(url_api, session=session)
            else:
                raise NotImplementedError(mode)
            fail = 0
        except Exception as e:
            print_(e)
            fail += 1
            if fail < 2:
                continue
            else:
                break
        finally:
            print_('{}  ({})'.format(url_api, len(hrefs)))
        if cw and not cw.alive:
            return
        lis = soup.findAll('li', class_='videoblock')
        if not lis:
            print_('break: no lis')
            break
        if getattr(soup.find('title'), 'text', '').strip() == 'Page Not Found':
            print_('Page Not Found')
            break
        c = 0
        for li in lis:
            a = li.find('a')
            href = a.attrs['href']
            href = urljoin(url, href)
            if href in hrefs:
                continue
            c += 1
            if href.startswith('javascript:'): # Remove Pornhub Premium
                print(href)
                continue
            hrefs.append(href)
        if c == 0:
            print('c==0')
            break
        print(c) # 1320
        if len(hrefs) >= max_pid:
            break
    if cw:
        hrefs = filter_range(hrefs, cw.range)
    info['hrefs'] = hrefs
    return info
@lock
 def decode(html, cw=None):
    '''
    decode
    '''
    print_ = get_print(cw)
    print_('decode')
    soup = Soup(html)
    for script in soup.findAll('script'):
        script = script.text or script.string or ''
        script = script.strip()
        if 'videoUrl' in script:
            break
    else:
        raise Exception('No script')
    flashvars = script.split()[1]
    script = 'playerObjList={};' + script
    context = js2py.EvalJs()
    context.execute(script)
    return context.eval(flashvars).to_dict()
--- a/src/extractor/rule34_xxx_downloader.py
+++ b/src/extractor/rule34_xxx_downloader.py
@ -0,0 +1,133 @@
 import downloader
 import ree as re
 import os
 from utils import Downloader, urljoin, query_url, Soup, get_max_range, get_print, clean_title
 from translator import tr_
 try:
    from urllib import quote # python2
 except:
    from urllib.parse import quote # python3
 import sys
 from timee import sleep
 from constants import clean_url
 LIMIT = 100
 def get_tags(url):
    url = clean_url(url)
    qs = query_url(url)
    if 'page=favorites' in url:
        id = qs.get('id', ['N/A'])[0]
        id = u'fav_{}'.format(id)
    else:
        tags = qs.get('tags', [])
        tags.sort()
        id = u' '.join(tags)
    if not id:
        id = u'N/A'
    return id
@Downloader.register
 class Downloader_rule34_xxx(Downloader):
    type = 'rule34_xxx'
    URLS = ['rule34.xxx']
    MAX_CORE = 8
    display_name = 'Rule34.xxx'
    _name = None
    def init(self):
        if 'rule34.xxx' in self.url.lower():
            self.url = self.url.replace('http://', 'https://')
        else:
            url = self.url
            url = url.replace(' ', '+')
            while '++' in url:
                url = url.replace('++', '+')
            url = quote(url)
            url = url.replace('%2B', '+')
            self.url = u'https://rule34.xxx/index.php?page=post&s=list&tags={}'.format(url)
    @property
    def name(self):
        if self._name is None:
            tags = get_tags(self.url)
            self._name = tags
        return clean_title(self._name)
    def read(self):
        self.title = self.name
        imgs = get_imgs(self.url, self.name, cw=self.cw)
        for img in imgs:
            self.urls.append(img.url)
            self.filenames[img.url] = img.filename
        self.title = self.name
 class Image(object):
    def __init__(self, id_, url):
        self.url = url
        ext = os.path.splitext(url)[1]
        self.filename = u'{}{}'.format(id_, ext)
 def setPage(url, page):
    # Always use HTTPS
    url = url.replace('http://', 'https://')
    # Change the page
    if 'pid=' in url:
        url = re.sub('pid=[0-9]*', 'pid={}'.format(page), url)
    else:
        url += '&pid={}'.format(page)
    return url
 def get_imgs(url, title=None, cw=None):
    url = clean_url(url)
    if 's=view' in url and 'page=favorites' not in url:
        raise NotImplementedError('Not Implemented')
    if 'page=dapi' not in url.lower():
        tags = get_tags(url)
        tags = quote(tags, safe='/')
        tags = tags.replace('%20', '+')
        url = "https://rule34.xxx/index.php?page=dapi&s=post&q=index&tags={}&pid={}&limit={}".format(tags, 0, LIMIT)
    print_ = get_print(cw)
    # Range
    max_pid = get_max_range(cw)
    imgs = []
    ids = set()
    for p in range(500): #1017
        url = setPage(url, p)
        print_(url)
        html = downloader.read_html(url)
        soup = Soup(html)
        posts = soup.findAll('post')
        if not posts:
            break
        for post in posts:
            id_ = post.attrs['id']
            if id_ in ids:
                print('duplicate:', id_)
                continue
            ids.add(id_)
            url_img = post.attrs['file_url']
            img = Image(id_, url_img)
            imgs.append(img)
        if len(imgs) >= max_pid:
            break
        if cw is not None:
            if not cw.alive:
                break
            cw.setTitle(u'{}  {} - {}'.format(tr_(u'읽는 중...'), title, len(imgs)))
    return imgs
--- a/src/extractor/soundcloud_downloader.py
+++ b/src/extractor/soundcloud_downloader.py
@ -0,0 +1,180 @@
 #coding: utf8
 import downloader
 import json
 from io import BytesIO
 from utils import Downloader, LazyUrl, get_print, try_n, lock, clean_title
 from error_printer import print_error
 import os
 from timee import sleep
 import ffmpeg
 import ytdl
 from m3u8_tools import M3u8_stream
 CLIENT_ID = None
@lock
 def get_cid(force=False):
    global CLIENT_ID
    if CLIENT_ID is None or force:
        print('update cid...')
        d = ytdl.YoutubeDL()
        e = ytdl.extractor.soundcloud.SoundcloudIE(d)
        e._update_client_id()
        CLIENT_ID = e._CLIENT_ID
    return CLIENT_ID
 class Audio(object):
    _url = None
    def __init__(self, info, album_art, cw=None):
        self.info = info
        self.album_art = album_art
        self.cw = cw
        self.url = LazyUrl(info['webpage_url'], self.get, self, pp=self.pp)
    def get(self, url):
        print_ = get_print(self.cw)
        if self._url:
            return self._url
        info = self.info
 ##        ydl = ytdl.YoutubeDL()
 ##        info = ydl.extract_info(url)
        formats = info['formats']
        print(formats)
        formats = sorted(formats, key=lambda x: int(x.get('abr', 0)), reverse=True)
        url_audio = None
        for format in formats:
            protocol = format['protocol']
            print_(u'【{}】 format【{}】 abr【{}】'.format(protocol, format['format'], format.get('abr', 0)))
            if not url_audio and protocol in ['http', 'https']:
                url_audio = format['url']
        if not url_audio:
            url_audio = M3u8_stream(formats[0]['url'])
            self.album_art = False#
        self.username = info['uploader']
        self.title = u'{} - {}'.format(self.username, info['title'])
        self.filename = u'{}{}'.format(clean_title(self.title, allow_dot=True, n=-4), '.mp3')
        thumb = None
        for t in info['thumbnails'][::-1]:
            width = t.get('width', 1080)
            if not 100 <= width <= 500:
                continue
            url_thumb = t['url']
            thumb = BytesIO()
            try:
                downloader.download(url_thumb, buffer=thumb)
                break
            except Exception as e:
                print(e)
                thumb = None
        self.thumb = thumb
        self._url = url_audio
        return self._url
    def pp(self, filename):
        cw = self.cw
        with cw.convert(self):
            return self._pp(filename)
    def _pp(self, filename):
        if self.thumb and self.album_art:
            self.thumb.seek(0)#
            ffmpeg.add_cover(filename, self.thumb, {'artist':self.username, 'title':self.info['title']}, cw=self.cw)
@Downloader.register
 class Downloader_soundcloud(Downloader):
    type = 'soundcloud'
    single = True
    URLS = ['soundcloud.com']
    #lock = True
    audio = None
    display_name = 'SoundCloud'
    def init(self):
        if 'soundcloud.com' in self.url.lower():
            self.url = self.url.replace('http://', 'https://')
        else:
            self.url = 'https://soundcloud.com/{}'.format(self.url)
    def read(self):
        album_art = self.ui_setting.albumArt.isChecked()
        info = get_audios(self.url, self.cw, album_art)
        audios = info['audios']
        if not audios:
            raise Exception('no audios')
        # first audio must be valid
        while audios:
            audio = audios[0]
            try:
                audio.url()
                break
            except Exception as e:
                e_ = e
                print(e)
                audios.remove(audio)
        else:
            raise e_
        if len(audios) > 1:
            audio = self.process_playlist(info['title'], audios)
        else:
            self.urls.append(audio.url)
            self.title = audio.title
        self.artist = audio.username
        self.setIcon(audio.thumb)
@try_n(2)
 def get_audios(url, cw, album_art):
    print_ = get_print(cw)
    url = url.rstrip('/')
    if url.count('/') == 3:
        url += '/tracks'
    info = {
        #'extract_flat': True,
        }
    ydl = ytdl.YoutubeDL()
    info = ydl.extract_info(url)
    if 'entries' in info:
        entries = info['entries']
        title = info['title']
        for _type in ['All', 'Tracks', 'Albums', 'Sets', 'Reposts', 'Likes', 'Spotlight']:
            x = '({})'.format(_type)
            if x in title:
                title = title.replace(x, '')
                kind = _type
                break
        else:
            kind = 'Playlist'
        print_(u'kind: {}'.format(kind))
        info['title'] = u'[{}] {}'.format(kind.capitalize(), title)
    else:
        entries = [info]
    audios = []
    for e in entries:
        if '/sets/' in e['webpage_url']:
            continue
        audio = Audio(e, album_art, cw=cw)
        audios.append(audio)
    info['audios'] = audios
    return info
--- a/src/extractor/tiktok_downloader.py
+++ b/src/extractor/tiktok_downloader.py
@ -0,0 +1,250 @@
 from __future__ import division, print_function, unicode_literals
 import downloader
 import ree as re
 from utils import urljoin, Soup, LazyUrl, Downloader, try_n, compatstr, get_print, clean_title, Session, get_max_range
 import os
 import json
 import ast
 from io import BytesIO
 import random
 import clf2
 from translator import tr_
 from timee import sleep
 from error_printer import print_error
 import devtools
 HDR = {'User-Agent': downloader.hdr['User-Agent']}
 PATTERN_VID = '/(v|video)/(?P<id>[0-9]+)'
 def is_captcha(soup):
    return soup.find('div', class_="verify-wrap") is not None
@Downloader.register
 class Downloader_tiktok(Downloader):
    type = 'tiktok'
    single = True
    URLS = ['tiktok.com']
    display_name = 'TikTok'
    def init(self):
        cw = self.cw
        self.session = Session()
        res = clf2.solve(self.url, self.session, cw)
        soup = Soup(res['html'])
        if is_captcha(soup):
            def f(html):
                return not is_captcha(Soup(html))
            clf2.solve(self.url, self.session, cw, show=True, f=f)
    @classmethod
    def fix_url(cls, url):
        url = url.split('?')[0].split('#')[0].strip('/')
        if 'tiktok.com' not in url.lower():
            url = 'https://www.tiktok.com/@{}'.format(url)
        return url
    def read(self):
        format = compatstr(self.ui_setting.youtubeFormat.currentText()).lower().strip()
        if re.search(PATTERN_VID, self.url) is None:
            info = read_channel(self.url, self.session, self.cw)
            items = info['items']
            videos = [Video('https://www.tiktok.com/@{}/video/{}'.format(info['uid'], item['id']), self.session, format) for item in items]
            title = '{} (tiktok_{})'.format(info['nickname'], info['uid'])
            video = self.process_playlist(title, videos)
        else:
            video = Video(self.url, self.session, format)
            video.url()
            self.urls.append(video.url)
            self.title = clean_title(video.title)
        self.setIcon(video.thumb)
 class Video(object):
    _url = None
    def __init__(self, url, session, format='title (id)'):
        self.url = LazyUrl(url, self.get, self)
        self.session = session
        self.format = format
    @try_n(2)
    def get(self, url):
        if self._url:
            return self._url
        m = re.search(PATTERN_VID, url)
        id = m.group('id')
        ext = '.mp4'
        self.title = id#
        self.filename = '{}{}'.format(clean_title(self.title, n=-len(ext)), ext)
        html = downloader.read_html(url, session=self.session)
        soup = Soup(html)
        data = soup.find(id='__NEXT_DATA__')
        props = data.contents[0]
        data_encode = json.dumps(props)
        ast_le = ast.literal_eval(data_encode)
        data = json.loads(ast_le)
        #info = data['props']['pageProps']['videoData']['itemInfos']
        info = data['props']['pageProps']['itemInfo']['itemStruct']
        self._url = info['video']['downloadAddr']
        self.url_thumb = info['video']['cover']
        self.thumb = BytesIO()
        downloader.download(self.url_thumb, referer=url, buffer=self.thumb)
        return self._url
 def read_channel(url, session, cw=None):
    print_ = get_print(cw)
    info = {}
    info['items'] = []
    ids = set()
    info['items'] = []
    sd = {
        'count_empty': 0,
        'shown': False,
        }
    max_pid = get_max_range(cw)
    def f(html, browser=None):
        soup = Soup(html)
        if is_captcha(soup):
            print('captcha')
            browser.show()
            sd['shown'] = True
        elif sd['shown']:
            browser.hide()
            sd['shown'] = False
        try:
            info['uid'] = soup.find('h2', class_='share-title').text.strip()
            info['nickname'] = soup.find('h1', class_='share-sub-title').text.strip()
        except Exception as e:
            print_(print_error(e)[0])
        c = 0
        ids_now = set()
        for div in soup.findAll('div', class_='video-feed-item'):
            a = div.find('a')
            if a is None:
                continue
            href = a['href']
            if not href:
                continue
            m = re.search(PATTERN_VID, href)
            if m is None:
                continue
            id_video = int(m.group('id'))
            ids_now.add(id_video)
            if id_video in ids:
                continue
            ids.add(id_video)
            info['items'].append({'id': id_video})
            c += 1
        print_('items: {}'.format(len(info['items'])))
        if len(info['items']) >= max_pid:
            info['items'] = info['items'][:max_pid]
            return True
        browser.runJavaScript('window.scrollTo(0, document.body.scrollHeight);')
        sleep(15, cw)
        if c or (ids_now and min(ids_now) > min(ids)):
            sd['count_empty'] = 0
        else:
            print_('empty')
            sd['count_empty'] += 1
        msg = '{}  {} (tiktok_{}) - {}'.format(tr_('읽는 중...'), info.get('nickname'), info.get('uid'), len(info['items']))
        if cw:
            if not cw.alive:
                raise Exception('cw dead')
            cw.setTitle(msg)
        else:
            print(msg)
        return sd['count_empty'] > 4
    res = clf2.solve(url, session, cw, f=f, timeout=1800, show=True)
    if not info['items']:
        raise Exception('no items')
    return info
@try_n(2)
 def read_channel_legacy(url, session, cw=None):
    print_ = get_print(cw)
    html = downloader.read_html(url, session=session, headers=HDR)
    uid = re.find('//user/profile/([0-9]+)', html, err='no uid')
    secUid = re.find('"secUid" *: *"([^"]+?)"', html, err='no secUid')
    verifyFp = ''.join(random.choice('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789') for i in range(16))
    maxCursor = 0
    info = {}
    info['items'] = []
    ids = set()
    for i in range(100):
        url_api = 'https://t.tiktok.com/api/item_list/?count=30&id={uid}&type=1&secUid={secUid}&maxCursor={maxCursor}&minCursor=0&sourceType=8&appId=1180&region=US&language=en&verifyFp={verifyFp}'.format(uid=uid, secUid=secUid, verifyFp=verifyFp, maxCursor=maxCursor)
        js = 'window.byted_acrawler.sign({url:"{}"});'.replace('{}', url_api)
        print(js)
        for try_ in range(4):
            try:
                sign = devtools.eval_js(url, js, session)['output']
                break
            except Exception as e:
                print(e)
                e_ = e
        else:
            raise e_
        url_api += '&_signature=' + sign
        print_(url_api)
        data_raw = downloader.read_html(url_api, url, session=session, headers=HDR)
        data = json.loads(data_raw)
        items = []
        for item in data.get('items', []):
            id_video = item['id']
            if id_video in ids:
                print('duplicate:', id_video)
                continue
            ids.add(id_video)
            items.append(item)
        if not items:
            print('no items')
            break
        info['items'] += items
        if i == 0:
            info['uid'] = items[0]['author']['uniqueId']
            info['nickname'] = items[0]['author']['nickname']
        msg = '{}  {} (tiktok_{}) - {}'.format(tr_('읽는 중...'), info['nickname'], info['uid'], len(info['items']))
        if cw:
            if not cw.alive:
                break
            cw.setTitle(msg)
        else:
            print(msg)
        if not data['hasMore']:
            break
        maxCursor = data['maxCursor']
    if not info['items']:
        raise Exception('no items')
    return info
--- a/src/extractor/tokyomotion_downloader.py
+++ b/src/extractor/tokyomotion_downloader.py
@ -0,0 +1,100 @@
 #coding:utf8
 import downloader
 from utils import Soup, urljoin, Downloader, cut_pair, LazyUrl, clean_title
 from timee import sleep
 from translator import tr_
 from io import BytesIO
 import ree as re
 import os
@Downloader.register
 class Downloader_tokyomotion(Downloader):
    type = 'tokyomotion'
    URLS = ['tokyomotion.net']
    single = True
    _type = None
    display_name = 'TOKYO Motion'
    def init(self):
        html = downloader.read_html(self.url)
        self.soup = Soup(html)
        if '/album/' in self.url:
            self._type = 'album'
        else:
            self._type = 'video'
    @property
    def name(self):
        title = get_title(self.soup)
        return clean_title(title)
    def read(self):
        if self._type == 'video':
            video = get_video(self.url, self.soup)
            self.urls.append(video.url)
            self.setIcon(video.thumb)
        elif self._type == 'album':
            imgs = get_imgs(self.url)
            for img in imgs:
                self.urls.append(img.url)
            self.single = False
        else:
            raise NotImplementedError('Unknown type: {}'.format(self._type))
        self.title = self.name
 class Video(object):
    def __init__(self, url, url_thumb, referer, filename):
        self.url = LazyUrl(referer, lambda x: url, self)
        self.url_thumb = url_thumb
        self.thumb = BytesIO()
        downloader.download(url_thumb, referer=referer, buffer=self.thumb)
        self.filename = filename
 def get_title(soup):
    video = soup.find('video', id='vjsplayer')
    if video:
        title = soup.find('h3').text.strip()
    else:
        title = soup.find('title').text.split(' Album - ')[0].strip()
    return title
 def get_video(url, soup=None):
    if soup is None:
        html = downloader.read_html(url)
        soup = Soup(html)
    video = soup.find('video', id='vjsplayer').find('source').attrs['src']
    url_thumb = soup.find('video', id='vjsplayer').attrs['poster']
    title = get_title(soup)
    filename = u'{}.mp4'.format(clean_title(title))
    video = Video(video, url_thumb, url, filename)
    return video
 class Image(object):
    def __init__(self, url, referer):
        self.url = LazyUrl(referer, lambda x: url, self)
        self.filename = os.path.basename(url.split('?')[0])
 def get_imgs(url):
    id = re.find('album/.*?([0-9]+)', url)
    print('id:', id)
    url = 'https://www.tokyomotion.net/album/slideshow/{}'.format(id)
    html = downloader.read_html(url)
    soup = Soup(html)
    imgs = []
    for a in soup.findAll('a', {'data-lightbox': 'slideshow-{}'.format(id)}):
        img = a.find('img').attrs['src']
        img = img.replace('/tmb/', '/')
        img = Image(img, url)
        imgs.append(img)
    return imgs
--- a/src/extractor/torrent_downloader.py
+++ b/src/extractor/torrent_downloader.py
@ -1,4 +1,4 @@
-from utils import Downloader, speed_text, clean_title
+from utils import Downloader, clean_title
 import constants, os, downloader
 from size import Size
 try:
@ -54,9 +54,10 @@ class Downloader_torrent(Downloader):
        if not files:
            raise Exception('No files')
        cw.single = self.single = len(files) == 1
-        for file in files:
+        if not cw.imgs:
-            filename = os.path.join(self.dir, file)
+            for file in files:
-            cw.imgs.append(filename)
+                filename = os.path.join(self.dir, file)
                cw.imgs.append(filename)
    def start_(self):
        cw = self.cw
@ -81,8 +82,11 @@ class Downloader_torrent(Downloader):
        if cw.alive:
            cw.setSpeed('')
        if cw.pause_lock and cw.pbar.value() < cw.pbar.maximum():
-            cw.pause_data = {'type': self.type, 'url': self.url, 
+            cw.pause_data = {
-               'filesize': self._filesize_prev}
+                'type': self.type,
                'url': self.url,
                'filesize': self._filesize_prev,
                }
            cw.paused = True
            cw.pause_lock = False
            self.update_tools_buttons()
@ -110,8 +114,8 @@ class Downloader_torrent(Downloader):
                        cw.dones.add(file)
                        file = constants.compact(file).replace('\\', '/')
                        files = file.split('/')
-                        file = (u' / ').join(files[1:])
+                        file = ' / '.join(files[1:])
-                        msg = (u'Completed: {}').format(file)
+                        msg = 'Completed: {}'.format(file)
                        self.print_(msg)
                        if i == 0:
                            for try_ in range(4):
@ -126,20 +130,20 @@ class Downloader_torrent(Downloader):
                downloader.total_download_size += d_size
            cw.pbar.setValue(s.progress * MAX_PBAR)
            if s.state_str == 'queued':
-                title_ = (u'Waiting... {}').format(title)
+                title_ = 'Waiting... {}'.format(title)
            elif s.state_str == 'checking files':
-                title_ = (u'Checking files... {}').format(title)
+                title_ = 'Checking files... {}'.format(title)
                self._filesize_prev = filesize
            elif s.state_str == 'downloading':
-                title_ = (u'{}    (p: {}, s: {})').format(title, s.num_peers, s.num_seeds)
+                title_ = '{}    (p: {}, s: {})'.format(title, s.num_peers, s.num_seeds)
                cw.setFileSize(filesize)
                text = self.size.speed_text()
                cw.setSpeed(text)
            elif s.state_str == 'seeding':
-                title_ = (u'{}').format(title)
+                title_ = '{}'.format(title)
                cw.setFileSize(filesize)
            else:
-                title_ = (u'{}... {}').format(s.state_str.capitalize(), title)
+                title_ = '{}... {}'.format(s.state_str.capitalize(), title)
            cw.setTitle(title_, update_filter=False)
        else:
            return 'abort'
--- a/src/extractor/tumblr_downloader.py
+++ b/src/extractor/tumblr_downloader.py
@ -0,0 +1,204 @@
 #coding:utf8
 import downloader
 from translator import tr_
 from utils import Soup, Session, query_url, get_max_range, Downloader, clean_title, update_url_query, get_print, get_ext, LazyUrl
 import ree as re
 import errors
 from ratelimit import limits, sleep_and_retry
 from error_printer import print_error
 class Image(object):
    def __init__(self, url, id, p=0, cw=None):
        self._url = url
        self.id_ = id
        self.p = p
        self.cw = cw
        self.url = LazyUrl(url, self.get, self)
    def get(self, _):
        print_ = get_print(self.cw)
        url = self._url
        ext = get_ext(url)
        if ext.lower() == '.gif':
            print_('get_ext: {}, {}'.format(self.id_, url))
            try:
                ext = downloader.get_ext(url)
            except Exception as e: #3235
                print_('Err: {}, {}\n'.format(self.id_, url)+print_error(e)[0])
        self.filename = '{}_p{}{}'.format(self.id_, self.p, ext)
        return url
@Downloader.register
 class Downloader_tumblr(Downloader):
    type = 'tumblr'
    URLS = ['tumblr.com']
    def init(self):
        if u'tumblr.com/post/' in self.url:
            return self.Invalid(tr_(u'개별 다운로드는 지원하지 않습니다: {}').format(self.url))        
        self.session = Session()
    @classmethod
    def fix_url(cls, url):
        id = get_id(url)
        return 'https://{}.tumblr.com'.format(id)
    def read(self):
        username = get_id(self.url)
        name = get_name(username, self.session)
        for img in get_imgs(username, self.session, cw=self.cw):
            self.urls.append(img.url)
        self.title = clean_title('{} (tumblr_{})'.format(name, username))
 class TumblrAPI(object):
    _url_base = 'https://www.tumblr.com/api'
    _hdr = {
        'referer': 'https://www.tumblr.com',
        'authorization': 'Bearer aIcXSOoTtqrzR8L8YEIOmBeW94c3FmbSNSWAUbxsny9KKx5VFh',
        }
    _qs = {
    'fields[blogs]': 'name,avatar,title,url,is_adult,?is_member,description_npf,uuid,can_be_followed,?followed,?advertiser_name,is_paywall_on,theme,subscription_plan,?primary,share_likes,share_following,can_subscribe,subscribed,ask,?can_submit,?is_blocked_from_primary,?tweet,?admin,can_message,?analytics_url,?top_tags,paywall_access',
    'npf': 'true',
    'reblog_info': 'false',
    'include_pinned_posts': 'false',
    #'page_number': None,
    }
    def __init__(self, session, cw=None):
        self.session = session
        self.cw = cw
    def print_(self, s):
        get_print(self.cw)(s)
    @sleep_and_retry
    @limits(1, 1)
    def call(self, path, qs, default_qs=True):
        if default_qs:
            qs_new = qs
            qs = self._qs.copy()
            qs.update(qs_new)
        url = self._url_base + path
        url = update_url_query(url, qs)
        r = self.session.get(url, headers=self._hdr)
        data = r.json()
        errs = data.get('errors', [])
        if errs:
            code = int(errs[0]['code'])
            if code == 0:
                raise Exception('Not found')
            elif code == 4012:
                raise errors.LoginRequired(errs[0]['detail'])
        r.raise_for_status()
        return data['response']
    def name(self, username):
        path = '/v2/blog/{}/posts'.format(username)
        data = self.call(path, {})
        return data['blog']['title'] or data['blog']['name']
    def posts(self, username):
        path = '/v2/blog/{}/posts'.format(username)
        qs = {}
        ids = set()
        default_qs = True
        while True:
            if self.cw and not self.cw.alive:
                break
            data = self.call(path, qs, default_qs=default_qs)
            for post in data['posts']:
                id_ = post['id']
                if id_ in ids:
                    self.print_('duplicate: {}'.format(id_))
                    continue
                ids.add(id_)
                yield Post(post, self.cw)
            try:
                links = data.get('links') or data['_links']
                path_next = links['next']['href']
            except:
                path_next = None
            if path_next:
                path = path_next
                default_qs = False
            else:
                break
 class Post(object):
    def __init__(self, data, cw=None):
        id_ = data['id']
        self.imgs = []
        cs = data['content']
        for trail in data['trail']:
            cs += trail['content']
        for c in cs:
            if c['type'] in ['image', 'video']:
                media = c.get('media')
                if not media: #2859
                    continue
                if isinstance(media, list):
                    media = media[0]
                img = media['url']
                self.imgs.append(Image(img, id_, len(self.imgs), cw))
            elif c['type'] in ['text', 'link', 'audio']:
                continue
            else:
                raise NotImplementedError(id_, c)
 def get_name(username, session):
    return TumblrAPI(session).name(username)
 def get_imgs(username, session, cw=None):
    print_ = get_print(cw)
    artist = get_name(username, session)
    imgs = []
    error_count = 0
    max_pid = get_max_range(cw)
    api = TumblrAPI(session, cw)
    for post in api.posts(username):
        imgs += post.imgs
        s = '{}  {} (tumblr_{}) - {}'.format(tr_(u'\uc77d\ub294 \uc911...'), artist, username, len(imgs))
        if cw:
            if not cw.alive:
                return
            cw.setTitle(s)
        else:
            print(s)
        if len(imgs) > max_pid:
            break
    return imgs[:max_pid]
 def get_id(url):
    if '/dashboard/blog/' in url:
        url = re.find('/dashboard/blog/([0-9a-zA-Z_-]+)', url)
    if '/login_required/' in url:
        url = url.split('/login_required/')[1].split('?')[0].split('/')[0]
    if 'tumblr.com/blog/view/' in url:
        url = url.split('tumblr.com/blog/view/')[1]
    if 'tumblr.com' in url:
        if 'www.tumblr.com' in url:
            qs = query_url(url)
            url = qs.get('url', [url])[0]
        url = url.split('.tumblr.com')[0].split('/')[(-1)]
    if url == 'www':
        raise Exception('no id')
    return url
--- a/src/extractor/twitter_downloader.py
+++ b/src/extractor/twitter_downloader.py
@ -275,6 +275,7 @@ class TwitterAPI(object):
                    return
                params["cursor"] = cursor
            if params.get("cursor") is None: # nothing
                print_('no cursor')
                break
@ -328,7 +329,8 @@ def get_imgs(username, session, title, types, n=0, format='[%y-%m-%d] id_ppage',
                names[id_].append(name)
            else:
                names[id_] = [name]
-    max_id = max(ids) if ids else 0
+    ids_sure = sorted(ids)[:-100]
    max_id = max(ids_sure) if ids_sure else 0 #3201
    # 2303
    imgs_old = []
@ -341,23 +343,23 @@ def get_imgs(username, session, title, types, n=0, format='[%y-%m-%d] id_ppage',
    imgs_new = []
    enough = False
    c_old = 0
    for tweet in TwitterAPI(session, cw).timeline_media(username):
        id_ = int(tweet['id_str'])
        if id_ < max_id:
            print_('enough')
            enough = True
            break
        imgs_ = get_imgs_from_tweet(tweet, session, types, format, cw)
        if id_ in ids:
            print_('duplicate: {}'.format(id_))
            c_old += 1
            continue
        ids.add(id_)
-        imgs_new += imgs_
+        imgs_new += get_imgs_from_tweet(tweet, session, types, format, cw)
-        
+
-        if len(imgs_old) + len(imgs_new) >= n:
+        if len(imgs_new) + c_old >= n: #3201
            break
        msg = '{}  {} - {}'.format(tr_('읽는 중...'), title, len(imgs_new))
@ -368,7 +370,7 @@ def get_imgs(username, session, title, types, n=0, format='[%y-%m-%d] id_ppage',
        else:
            print(msg)
-    if not enough and not imgs_new:
+    if not enough and not imgs_new and c_old == 0:
        raise Exception('no imgs')
    imgs = sorted(imgs_old + imgs_new, key=lambda img: img.id, reverse=True)
--- a/src/extractor/v2ph_downloader.py
+++ b/src/extractor/v2ph_downloader.py
@ -0,0 +1,103 @@
 #coding:utf8
 from __future__ import division, print_function, unicode_literals
 import downloader
 from utils import Soup, get_ext, LazyUrl, Downloader, try_n, clean_title, get_print
 import ree as re
 from translator import tr_
 from timee import sleep
 import errors
 def setPage(url, p):
    url = url.split('?')[0]
    if p > 1:
        url += '?page={}'.format(p)
    return url
 def getPage(url):
    p = re.find('page=([0-9]+)', url)
    return int(p or 1)
 class Image(object):
    def __init__(self, url, referer, p):
        self.url = LazyUrl(referer, lambda x: url, self)
        ext = get_ext(url)
        self.filename = '{:04}{}'.format(p, ext)
@Downloader.register
 class Downloader_v2ph(Downloader):
    type = 'v2ph'
    URLS = ['v2ph.com/album/']
    MAX_CORE = 4
    display_name = 'V2PH'
    @classmethod
    def fix_url(cls, url):
        return url.split('?')[0]
    def read(self):
        info = get_info(self.url)
        for img in get_imgs(self.url, info['title'], self.cw):
            self.urls.append(img.url)
        self.title = clean_title(info['title'])
@try_n(2)
 def get_info(url):
    html = downloader.read_html(url)
    soup = Soup(html)
    info = {}
    info['title'] = soup.find('h1').text.strip()
    return info
 def get_imgs(url, title, cw=None):
    print_ = get_print(cw)
    imgs = []
    for p in range(1, 1001):
        url = setPage(url, p)
        print_(url)
        for try_ in range(4):
            try:
                html = downloader.read_html(url, user_agent=downloader.hdr['User-Agent'])
                #sleep(1)
                break
            except Exception as e:
                print(e)
        else:
            raise
        soup = Soup(html)
        view = soup.find('div', class_='photos-list')
        if view is None:
            if p == 1:
                raise errors.LoginRequired()
            else:
                break # Guest user
        for img in view.findAll('img'):
            img = img.attrs['data-src']
            img = Image(img, url, len(imgs))
            imgs.append(img)
        pgn = soup.find('ul', class_='pagination')
        ps = [getPage(a.attrs['href']) for a in pgn.findAll('a')]
        if p >= max(ps):
            print('max p')
            break
        msg =  '{} {}  ({} / {})'.format(tr_('읽는 중...'), title, p, max(ps))
        if cw:
            cw.setTitle(msg)
        else:
            print(msg)
    return imgs
--- a/src/extractor/vimeo_downloader.py
+++ b/src/extractor/vimeo_downloader.py
@ -0,0 +1,58 @@
 import downloader
 import ree as re
 from io import BytesIO as IO
 from error_printer import print_error
 from utils import Downloader, LazyUrl, get_ext, format_filename, try_n
 import ytdl
@Downloader.register
 class Downloader_vimeo(Downloader):
    type = 'vimeo'
    URLS = ['vimeo.com']
    single = True
    def init(self):
        if 'vimeo.com' not in self.url.lower():
            self.url = u'https://vimeo.com/{}'.format(self.url)
    def read(self):
        video = Video(self.url)
        video.url()#
        self.urls.append(video.url)
        self.setIcon(video.thumb)
        self.enableSegment()
        self.title = video.title
 class Video(object):
    _url = None
    def __init__(self, url):
        self.url = LazyUrl(url, self.get, self)
    @try_n(4)
    def get(self,  url):
        if self._url:
            return self._url
        ydl = ytdl.YoutubeDL()
        info = ydl.extract_info(url)
        fs = [f for f in info['formats'] if f['protocol'] in ['http', 'https']]
        fs = sorted(fs, key=lambda f: int(f.get('width', 0)), reverse=True)
        if not fs:
            raise Exception('No MP4 videos')
        f = fs[0]
        self._url = f['url']
        self.thumb_url = info['thumbnails'][0]['url']
        self.thumb = IO()
        downloader.download(self.thumb_url, buffer=self.thumb)
        self.title = info['title']
        ext = get_ext(self._url)
        self.filename = format_filename(self.title, info['id'], ext)
        return self._url
--- a/src/extractor/vlive_downloader.py
+++ b/src/extractor/vlive_downloader.py
@ -0,0 +1,76 @@
 import downloader
 import ytdl
 from utils import Downloader, try_n, LazyUrl, get_ext, format_filename, clean_title
 from io import BytesIO
 import ree as re
 from m3u8_tools import M3u8_stream
 import os
@Downloader.register
 class Downloader_vlive(Downloader):
    type = 'vlive'
    URLS = ['vlive.tv']
    single = True
    display_name = 'V LIVE'
    def init(self):
        if 'channels.vlive.tv' in self.url:
            raise NotImplementedError('channel')
    def read(self):
        video = get_video(self.url)
        self.urls.append(video.url)
        self.setIcon(video.thumb)
        self.enableSegment()
        self.title = clean_title(video.title)
@try_n(4)
 def get_video(url):
    options = {
        'noplaylist': True,
        }
    ydl = ytdl.YoutubeDL(options)
    info = ydl.extract_info(url)
    fs = []
    for f in info['formats']:
        if f['ext'] != 'mp4':
            continue
        f['quality'] = f.get('vbr') or re.find('([0-9]+)p', f['format'], re.IGNORECASE)
        print(f['format'], f['quality'])
        fs.append(f)
    if not fs:
        raise Exception('No videos')
    f = sorted(fs, key=lambda f:f['quality'])[-1]
    video = Video(f, info)
    return video
 class Video(object):
    def __init__(self, f, info):
        self.title = title = info['title']
        self.id = info['id']
        self.url = f['url']
        self.thumb = BytesIO()
        downloader.download(info['thumbnail'], buffer=self.thumb)
        ext = get_ext(self.url)
        if ext.lower() == '.m3u8':
            raise NotImplementedError('stream')#
            url = M3u8_stream(self.url, n_thread=4)
        else:
            url = self.url
        self.url = LazyUrl(self.url, lambda x: url, self)
        self.filename = format_filename(title, self.id, ext)
--- a/src/extractor/webtoon_downloader.py
+++ b/src/extractor/webtoon_downloader.py
@ -0,0 +1,147 @@
 import downloader
 from utils import Soup, LazyUrl, clean_title, get_ext, get_imgs_already, urljoin, try_n, Downloader
 import os
 import page_selector
 from translator import tr_
 import ree as re
@Downloader.register
 class Downloader_webtoon(Downloader):
    type = 'webtoon'
    URLS = ['webtoon.com', 'webtoons.com']
    MAX_CORE = 8
    MAX_SPEED = 4.0
    display_name = 'WEBTOON'
    def init(self):
        self.url = get_main(self.url)
        self.soup = downloader.read_soup(self.url)
    @classmethod
    def fix_url(cls, url):
        return url.replace('webtoon.com', 'webtoons.com')
    def read(self):
        title = clean_title(self.soup.find('h1').text.strip())
        self.title = tr_(u'\uc77d\ub294 \uc911... {}').format(title)
        imgs = get_imgs_all(self.url, title, cw=self.cw)
        for img in imgs:
            if isinstance(img, Image):
                self.urls.append(img.url)
            else:
                self.urls.append(img)
        self.title = title
 class Page(object):
    def __init__(self, url, title):
        self.url = url
        self.title = title
 class Image(object):
    def __init__(self, url, page, p):
        ext = get_ext(url) or downloader.get_ext(url, referer=page.url)
        self.filename = '{}/{:04}{}'.format(clean_title(page.title), p, ext)
        self.url = LazyUrl(page.url, lambda _: url, self)
@try_n(2)
 def get_imgs(page):
    html = downloader.read_html(page.url)
    if 'window.__motiontoonViewerState__' in html:
        raise NotImplementedError('motiontoon')
    soup = Soup(html)
    view = soup.find('div', class_='viewer_img')
    imgs = []
    for img in view.findAll('img'):
        src = img.get('data-url') or img['src']
        img = Image(urljoin(page.url, src), page, len(imgs))
        imgs.append(img)
    return imgs
 def get_main(url):
    if 'episode_no=' in url:
        soup = downloader.read_soup(url)
        url = urljoin(url, soup.find('div', class_='subj_info').find('a')['href'])
    return url
 def set_page(url, p):
    if '&page=' not in url:
        url = url + '&page={}'.format(p)
    else:
        url = re.sub('&page=[0-9]+', '&page={}'.format(p), url)
    if p == 1:
        url = url.replace('&page=1', '')
    return url
 def get_pages(url):
    pages = []
    urls = set()
    for p in range(1, 101):
        url_page = set_page(url, p)
        print(url_page)
        for try_ in range(4):
            try:
                soup = downloader.read_soup(url_page)
                view = soup.find('ul', id='_listUl')
                if view is None:
                    raise Exception('no view')
                break
            except Exception as e:
                e_ = e
                print(e)
        else:
            raise e_
        pages_new = []
        for li in view.findAll('li', recursive=False):
            href = urljoin(url, li.find('a')['href'])
            title = li.find('span', class_='subj').text.strip()
            if href in urls:
                continue
            urls.add(href)
            no = int(li['data-episode-no'])
            title = '{:04} - {}'.format(no, title)
            page = Page(href, title)
            pages_new.append(page)
        if not pages_new:
            break
        pages += pages_new
    return pages[::-1]
@page_selector.register('webtoon')
@try_n(4)
 def f(url):
    url = get_main(url)
    return get_pages(url)
 def get_imgs_all(url, title, cw=None):
    pages = get_pages(url)
    pages = page_selector.filter(pages, cw)
    imgs = []
    for p, page in enumerate(pages):
        imgs_already = get_imgs_already('webtoon', title, page, cw)
        if imgs_already:
            imgs += imgs_already
            continue
        imgs += get_imgs(page)
        msg = tr_(u'\uc77d\ub294 \uc911... {} / {}  ({}/{})').format(title, page.title, p + 1, len(pages))
        if cw is not None:
            cw.setTitle(msg)
            if not cw.alive:
                break
        else:
            print(msg)
    return imgs
--- a/src/extractor/weibo_downloader.py
+++ b/src/extractor/weibo_downloader.py
@ -0,0 +1,180 @@
 #coding:utf8
 import downloader
 import ree as re
 from timee import sleep, clock, time
 from constants import clean_url
 from utils import Downloader, urljoin, try_n, Session, get_print, clean_title, Soup, fix_protocol
 import os
 from translator import tr_
 import json
 from datetime import datetime
 import constants
 import clf2
 import errors
@Downloader.register
 class Downloader_weibo(Downloader):
    type = 'weibo'
    URLS = ['weibo.com', 'weibo.cn']
    def init(self):
        self.session = Session()
    @classmethod
    def fix_url(cls, url):
        url = url.replace('weibo.cn', 'weibo.com').split('?')[0]
        if 'weibo.com/p/' in url:
            id = re.findall('weibo.com/p/([^/]+)', url)[0]
            url = 'https://weibo.com/p/{}'.format(id)
        elif 'weibo.com/u/' in url:
            id = re.findall('weibo.com/u/([^/]+)', url)[0]
            url = 'https://weibo.com/u/{}'.format(id)
        elif 'weibo.com/' in url:
            id = re.findall('weibo.com/([^/]+)', url)[0]
            url = 'https://weibo.com/{}'.format(id)
        else:
            id = url
            url = 'https://weibo.com/u/{}'.format(id)
        url = fix_protocol(url)
        return url
    def read(self):
        checkLogin(self.session)
        uid, oid, name = get_id(self.url, self.cw)
        title = clean_title('{} (weibo_{})'.format(name, uid))
        for img in get_imgs(uid, oid, title, self.session, cw=self.cw, d=self, parent=self.mainWindow):
            self.urls.append(img.url)
            self.filenames[img.url] = img.filename
        self.title = title
 def checkLogin(session):
    c = session.cookies._cookies.get('.weibo.com', {}).get('/',{}).get('SUBP')
    if not c or c.is_expired():
        raise errors.LoginRequired()
 class Album(object):
    def __init__(self, id, type):
        self.id = id
        self.type = type
 class Image(object):
    def __init__(self, url, filename=None, timestamp=0):
        self.url = url
        if filename is None:
            filename = os.path.basename(url)
        self.filename = filename
        self.timestamp = timestamp
 def _get_page_id(html):
    m = re.search("CONFIG\\['page_id'\\]='([0-9]+?)'", html)
    return m
 def get_id(url, cw=None):
    for try_ in range(2):
        try:
            res = clf2.solve(url, cw=cw, f=_get_page_id)
            html = res['html']
            soup = Soup(html)
            if soup.find('div', class_='gn_login'):
                raise errors.LoginRequired()
            m = _get_page_id(html)
            if not m:
                raise Exception('no page_id')
            oid = m.groups()[0]
            uids = re.findall('uid=([0-9]+)', html)
            uid = max(set(uids), key=uids.count)
            name = re.findall("CONFIG\\['onick'\\]='(.+?)'", html)[0]
            break
        except errors.LoginRequired as e:
            raise
        except Exception as e:
            e_ = e
            print(e)
    else:
        raise e_
    return uid, oid, name
 def get_imgs(uid, oid, title, session, cw=None, d=None, parent=None):
    print_ = get_print(cw)
    print_('uid: {}, oid:{}'.format(uid, oid))
    @try_n(4)
    def get_album_imgs(album, page):
        url = 'https://photo.weibo.com/photos/get_all?uid={}&album_id={}&count=30&page={}&type={}&__rnd={}'.format(uid, album.id, page, album.type, int(time()*1000))
        referer = 'https://photo.weibo.com/{}/talbum/index'.format(uid)
        html = downloader.read_html(url, referer, session=session, timeout=30)
        j = json.loads(html)
        data = j['data']
        imgs = []
        for photo in data['photo_list']:
            host = photo['pic_host']
            name = photo['pic_name']
            id = photo['photo_id']
            timestamp = photo['timestamp']
            date = datetime.fromtimestamp(timestamp)
            t = '{:02}-{:02}-{:02}'.format(date.year % 100, date.month, date.day)
            url = '{}/large/{}'.format(host, name)
            ext = os.path.splitext(name)[1]
            filename = '[{}] {}{}'.format(t, id, ext)
            img = Image(url, filename, timestamp)
            imgs.append(img)
        return imgs
    def get_albums(page):
        url = 'https://photo.weibo.com/albums/get_all?uid={}&page={}&count=20&__rnd={}'.format(uid, page, int(time()*1000))
        referer = 'https://photo.weibo.com/{}/albums?rd=1'.format(uid)
        html = downloader.read_html(url, referer, session=session)
        j = json.loads(html)
        data = j['data']
        albums = []
        for album in data['album_list']:
            id = album['album_id']
            type = album['type']
            album = Album(id, type)
            albums.append(album)
        return albums
    albums = []
    for p in range(1, 101):
        albums_new = get_albums(p)
        albums += albums_new
        print_('p:{}, albums:{}'.format(p, len(albums)))
        if not albums_new:
            break
    imgs = []
    for album in albums:
        print('Album:', album.id, album.type)
        for p in range(1, 101):
            imgs_new = get_album_imgs(album, p)
            imgs += imgs_new
            s = u'{} {}  -  {}'.format(tr_(u'읽는 중...'), title, len(imgs))
            if cw:
                if not cw.alive:
                    return []
                cw.setTitle(s)
            else:
                print(s)
            if not imgs_new:
                break
            sleep(1)
    imgs = sorted(imgs, key=lambda img: img.timestamp, reverse=True)
    return imgs