[FIX] google videos thumbnails

This commit is contained in:
Venca24 2019-01-04 15:48:22 +01:00
parent cee15f0375
commit cf26aba93b
1 changed files with 17 additions and 3 deletions

View File

@ -7,15 +7,16 @@
@using-api no
@results HTML
@stable no
@parse url, title, content
@parse url, title, content, thumbnail
"""
from datetime import date, timedelta
from json import loads
from lxml import html
from searx.engines import logger
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
import re
# engine dependent config
categories = ['videos']
@ -73,11 +74,24 @@ def response(resp):
url = result.xpath('.//div[@class="r"]/a/@href')[0]
content = extract_text(result.xpath('.//span[@class="st"]'))
# get thumbnails
script = str(dom.xpath('//script[contains(., "_setImagesSrc")]')[0].text)
id = result.xpath('.//div[@class="s"]//img/@id')[0]
thumbnails_data = re.findall('s=\'(.*?)(?:\\\\[a-z,1-9,\\\\]+\'|\')\;var ii=\[(?:|[\'vidthumb\d+\',]+)\'' + id,
script)
logger.debug('google video engine: ' + id + ' matched ' + str(len(thumbnails_data)) + ' times (thumbnail)')
tmp = []
if len(thumbnails_data) != 0:
tmp = re.findall('(data:image/jpeg;base64,[a-z,A-Z,0-9,/,\+]+)', thumbnails_data[0])
thumbnail = ''
if len(tmp) != 0:
thumbnail = tmp[-1]
# append result
results.append({'url': url,
'title': title,
'content': content,
'thumbnail': '',
'thumbnail': thumbnail,
'template': 'videos.html'})
return results