Hitomi-Downloader/src/extractor/pinter_downloader.py

241 lines
7.4 KiB
Python

import downloader
from utils import Session, Downloader, LazyUrl, clean_url, try_n, Soup, clean_title, get_ext, get_max_range, get_print
import json, os, ree as re
from timee import sleep
from translator import tr_
import urllib
import constants
from ratelimit import limits, sleep_and_retry
from m3u8_tools import playlist2stream, M3u8_stream
BASE_URL = 'https://www.pinterest.com'
class Downloader_pinter(Downloader):
type = 'pinter'
URLS = ['pinterest.']
type_pinter = 'board'
display_name = 'Pinterest'
@try_n(4)
def init(self):
self.api = PinterestAPI()
self._pin_id = re.find(r'https?://.*pinterest\.[^/]+/pin/([0-9]+)', self.url)
if self._pin_id is not None:
self.type_pinter = 'pin'
else:
username, board = get_username_board(self.url)
if '/' in board:
self.type_pinter = 'section'
self.print_('type: {}'.format(self.type_pinter))
if self.type_pinter in ['board', 'section']:
self.info = get_info(username, board, self.api)
elif self.type_pinter == 'pin':
pass #5132
else:
raise NotImplementedError(self.type_pinter)
@classmethod
def fix_url(cls, url):
if 'pinterest.' not in url:
url = 'https://www.pinterest.com/{}'.format(url)
return url
@property
def name(self):
if self.type_pinter == 'pin':
return self._pin_id
username = self.info['owner']['username']
name = self.info['name']
return clean_title((u'{}/{}').format(username, name))
def read(self):
if self.type_pinter == 'pin':
self.single = True
id = self._pin_id
else:
id = self.info['id']
self.title = self.name
imgs = get_imgs(id, self.api, cw=self.cw, title=self.name, type=self.type_pinter)
for img in imgs:
self.urls.append(img.url)
self.title = self.name
def get_info(username, board, api):
if '/' in board:
section = (u'/').join(board.split('/')[1:])
board = board.split('/')[0]
info = api.board(username, board)
for s in api.board_sections(info['id']):
print(s['slug'].lower(), section)
if s['slug'].lower() == section.lower():
break
else:
raise Exception('Invalid section')
title = s['title']
info.update(s)
info['name'] = (u'{}/{}').format(info['name'], title)
print('section_id:', info['id'])
else:
info = api.board(username, board)
return info
class PinterestAPI:
HEADERS = {
'Accept': 'application/json, text/javascript, */*, q=0.01',
'Accept-Language': 'en-US,en;q=0.5',
'Referer': BASE_URL + '/',
'X-Requested-With': 'XMLHttpRequest',
'X-APP-VERSION' : '31461e0',
'X-Pinterest-AppState': 'active',
'Origin': BASE_URL,
}
def __init__(self):
self.session = Session('chrome')
self.session.headers.update(self.HEADERS)
def pin(self, pin_id):
options = {'id': pin_id, 'field_set_key': 'detailed'}
return self._call('Pin', options)['resource_response']['data']
def pin_related(self, pin_id):
options = {'pin': pin_id, 'add_vase': True, 'pins_only': True}
return self._pagination('RelatedPinFeed', options)
def board(self, user, board):
options = {'slug': board, 'username': user, 'field_set_key': 'detailed'}
return self._call('Board', options)['resource_response']['data']
def board_pins(self, board_id):
options = {'board_id': board_id}
return self._pagination('BoardFeed', options)
def board_related(self, board_id):
options = {'board_id': board_id, 'add_vase': True}
return self._pagination('BoardRelatedPixieFeed', options)
def board_sections(self, board_id):
options = {'board_id': board_id}
return self._pagination('BoardSections', options)
def board_section_pins(self, section_id):
options = {'section_id': section_id}
return self._pagination('BoardSectionPins', options)
@try_n(4)
@sleep_and_retry
@limits(1, 4) # 1000 calls per hour
def _call(self, resource, options):
url = ('{}/resource/{}Resource/get/').format(BASE_URL, resource)
params = {'data': json.dumps({'options': options}), 'source_url': ''}
print('_call: {}, {}'.format(url, params))
r = self.session.get(url, params=params)
print(r)
global R
R = r
s = r.text
status_code = r.status_code
try:
data = json.loads(s)
except ValueError:
data = {}
if status_code < 400 and not r.history:
return data
if status_code == 404 or r.history:
raise Exception('Not Found')
raise Exception('API request failed: {}'.format(status_code))
def _pagination(self, resource, options):
while True:
data = self._call(resource, options)
for x in data['resource_response']['data']:
yield x
try:
bookmarks = data['resource']['options']['bookmarks']
if not bookmarks or bookmarks[0] == '-end-' or bookmarks[0].startswith('Y2JOb25lO'):
return
options['bookmarks'] = bookmarks
except KeyError:
return
class Image:
def __init__(self, img):
self.id = img['id']
print(self.id)
videos = img.get('videos')
if videos and 'video_list' in videos:
src = list(videos['video_list'].values())[0]['url']
else:
src = img['images']['orig']['url']
ext = get_ext(src)
if ext.lower() == '.m3u8':
try:
src = playlist2stream(src)
except:
src = M3u8_stream(src)
ext = '.mp4'
self.url = LazyUrl(('{}/pin/{}/').format(BASE_URL, self.id), lambda _: src, self)
self.filename = ('{}{}').format(self.id, ext)
def get_imgs(id, api, cw=None, title=None, type='board'):
print_ = get_print(cw)
n = get_max_range(cw)
imgs = []
ids = set()
print('get_imgs: type={}'.format(type))
if type == 'board':
gen = api.board_pins(id)
elif type == 'section':
gen = api.board_section_pins(id)
elif type == 'pin':
gen = [api.pin(id)]
else:
raise Exception((u'Type "{}" is not supported').format(type))
for img in gen:
if 'images' not in img:
print('skip img:', img['id'])
continue
img = Image(img)
if type == 'pin' and img.id != id:
raise AssertionError('id mismatch')
if img.id in ids:
print('duplicate:', img.id)
continue
ids.add(img.id)
print(img.url)
print(img.filename)
print()
imgs.append(img)
if len(imgs) >= n:
break
if cw is not None:
if not cw.alive:
return []
cw.setTitle((u'{} {} ({})').format(tr_(u'\uc77d\ub294 \uc911...'), title, len(imgs)))
return imgs
def get_username_board(url):
url = clean_url(url)
m = re.search('pinterest.[a-zA-Z.]+?/([^/]+)/([^#\\?]+)', url)
username, board = m.groups()
board = urllib.parse.unquote(board).strip()
while board.endswith('/'):
board = board[:-1].strip()
return (username, board)