exclude disambiguation pages from wikipedia infobox

This commit is contained in:
Marc Abonce Seguin 2019-08-25 22:23:37 -07:00
parent 34ad3d6b34
commit c18048e045
1 changed files with 3 additions and 2 deletions

View File

@ -21,7 +21,8 @@ search_url = base_url + u'w/api.php?'\
'action=query'\
'&format=json'\
'&{query}'\
'&prop=extracts|pageimages'\
'&prop=extracts|pageimages|pageprops'\
'&ppprop=disambiguation'\
'&exintro'\
'&explaintext'\
'&pithumbsize=300'\
@ -87,7 +88,7 @@ def response(resp):
if int(article_id) > 0:
break
if int(article_id) < 0:
if int(article_id) < 0 or 'disambiguation' in page.get('pageprops', {}):
return []
title = page.get('title')