181 lines
5.4 KiB
Python
181 lines
5.4 KiB
Python
#coding:utf8
|
|
import downloader
|
|
import ree as re
|
|
from timee import sleep, clock, time
|
|
from constants import clean_url
|
|
from utils import Downloader, urljoin, try_n, Session, get_print, clean_title, Soup, fix_protocol
|
|
import os
|
|
from translator import tr_
|
|
import json
|
|
from datetime import datetime
|
|
import constants
|
|
import clf2
|
|
import errors
|
|
|
|
|
|
@Downloader.register
|
|
class Downloader_weibo(Downloader):
|
|
type = 'weibo'
|
|
URLS = ['weibo.com', 'weibo.cn']
|
|
|
|
def init(self):
|
|
self.session = Session()
|
|
|
|
@classmethod
|
|
def fix_url(cls, url):
|
|
url = url.replace('weibo.cn', 'weibo.com').split('?')[0]
|
|
if 'weibo.com/p/' in url:
|
|
id = re.findall('weibo.com/p/([^/]+)', url)[0]
|
|
url = 'https://weibo.com/p/{}'.format(id)
|
|
elif 'weibo.com/u/' in url:
|
|
id = re.findall('weibo.com/u/([^/]+)', url)[0]
|
|
url = 'https://weibo.com/u/{}'.format(id)
|
|
elif 'weibo.com/' in url:
|
|
id = re.findall('weibo.com/([^/]+)', url)[0]
|
|
url = 'https://weibo.com/{}'.format(id)
|
|
else:
|
|
id = url
|
|
url = 'https://weibo.com/u/{}'.format(id)
|
|
url = fix_protocol(url)
|
|
return url
|
|
|
|
def read(self):
|
|
checkLogin(self.session)
|
|
|
|
uid, oid, name = get_id(self.url, self.cw)
|
|
title = clean_title('{} (weibo_{})'.format(name, uid))
|
|
|
|
for img in get_imgs(uid, oid, title, self.session, cw=self.cw, d=self, parent=self.mainWindow):
|
|
self.urls.append(img.url)
|
|
self.filenames[img.url] = img.filename
|
|
|
|
self.title = title
|
|
|
|
|
|
def checkLogin(session):
|
|
c = session.cookies._cookies.get('.weibo.com', {}).get('/',{}).get('SUBP')
|
|
if not c or c.is_expired():
|
|
raise errors.LoginRequired()
|
|
|
|
|
|
class Album(object):
|
|
|
|
def __init__(self, id, type):
|
|
self.id = id
|
|
self.type = type
|
|
|
|
|
|
class Image(object):
|
|
|
|
def __init__(self, url, filename=None, timestamp=0):
|
|
self.url = url
|
|
if filename is None:
|
|
filename = os.path.basename(url)
|
|
self.filename = filename
|
|
self.timestamp = timestamp
|
|
|
|
|
|
def _get_page_id(html):
|
|
m = re.search("CONFIG\\['page_id'\\]='([0-9]+?)'", html)
|
|
return m
|
|
|
|
|
|
def get_id(url, cw=None):
|
|
for try_ in range(2):
|
|
try:
|
|
res = clf2.solve(url, cw=cw, f=_get_page_id)
|
|
html = res['html']
|
|
soup = Soup(html)
|
|
if soup.find('div', class_='gn_login'):
|
|
raise errors.LoginRequired()
|
|
m = _get_page_id(html)
|
|
if not m:
|
|
raise Exception('no page_id')
|
|
oid = m.groups()[0]
|
|
uids = re.findall('uid=([0-9]+)', html)
|
|
uid = max(set(uids), key=uids.count)
|
|
name = re.findall("CONFIG\\['onick'\\]='(.+?)'", html)[0]
|
|
break
|
|
except errors.LoginRequired as e:
|
|
raise
|
|
except Exception as e:
|
|
e_ = e
|
|
print(e)
|
|
else:
|
|
raise e_
|
|
return uid, oid, name
|
|
|
|
|
|
|
|
def get_imgs(uid, oid, title, session, cw=None, d=None, parent=None):
|
|
print_ = get_print(cw)
|
|
print_('uid: {}, oid:{}'.format(uid, oid))
|
|
|
|
@try_n(4)
|
|
def get_album_imgs(album, page):
|
|
url = 'https://photo.weibo.com/photos/get_all?uid={}&album_id={}&count=30&page={}&type={}&__rnd={}'.format(uid, album.id, page, album.type, int(time()*1000))
|
|
referer = 'https://photo.weibo.com/{}/talbum/index'.format(uid)
|
|
html = downloader.read_html(url, referer, session=session, timeout=30)
|
|
j = json.loads(html)
|
|
data = j['data']
|
|
imgs = []
|
|
for photo in data['photo_list']:
|
|
host = photo['pic_host']
|
|
name = photo['pic_name']
|
|
id = photo['photo_id']
|
|
timestamp = photo['timestamp']
|
|
date = datetime.fromtimestamp(timestamp)
|
|
t = '{:02}-{:02}-{:02}'.format(date.year % 100, date.month, date.day)
|
|
url = '{}/large/{}'.format(host, name)
|
|
ext = os.path.splitext(name)[1]
|
|
filename = '[{}] {}{}'.format(t, id, ext)
|
|
img = Image(url, filename, timestamp)
|
|
imgs.append(img)
|
|
|
|
return imgs
|
|
|
|
def get_albums(page):
|
|
url = 'https://photo.weibo.com/albums/get_all?uid={}&page={}&count=20&__rnd={}'.format(uid, page, int(time()*1000))
|
|
referer = 'https://photo.weibo.com/{}/albums?rd=1'.format(uid)
|
|
html = downloader.read_html(url, referer, session=session)
|
|
j = json.loads(html)
|
|
data = j['data']
|
|
albums = []
|
|
for album in data['album_list']:
|
|
id = album['album_id']
|
|
type = album['type']
|
|
album = Album(id, type)
|
|
albums.append(album)
|
|
|
|
return albums
|
|
|
|
albums = []
|
|
for p in range(1, 101):
|
|
albums_new = get_albums(p)
|
|
albums += albums_new
|
|
print_('p:{}, albums:{}'.format(p, len(albums)))
|
|
if not albums_new:
|
|
break
|
|
|
|
imgs = []
|
|
for album in albums:
|
|
print('Album:', album.id, album.type)
|
|
for p in range(1, 101):
|
|
imgs_new = get_album_imgs(album, p)
|
|
imgs += imgs_new
|
|
s = u'{} {} - {}'.format(tr_(u'읽는 중...'), title, len(imgs))
|
|
if cw:
|
|
if not cw.alive:
|
|
return []
|
|
cw.setTitle(s)
|
|
else:
|
|
print(s)
|
|
if not imgs_new:
|
|
break
|
|
sleep(1)
|
|
|
|
imgs = sorted(imgs, key=lambda img: img.timestamp, reverse=True)
|
|
return imgs
|
|
|
|
|