urltitel: tpb torrent name support. some typing refactor.

This commit is contained in:
soratobuneko 2021-01-22 16:22:30 +01:00
parent d911c327f2
commit 3b2b8f39fc
1 changed files with 64 additions and 16 deletions

View File

@ -1,16 +1,17 @@
# TODO option ignore chan # TODO option ignore chan
import html import html
import json
import re import re
import weechat import weechat
from socket import timeout from socket import timeout
from typing import List, Optional from typing import List, NamedTuple, Optional, Tuple
from urllib.error import URLError from urllib.error import URLError
from urllib.parse import quote, urlsplit, urlunsplit from urllib.parse import ParseResult, quote, urlparse, urlunparse
from urllib.request import Request, urlopen from urllib.request import Request, urlopen
SCRIPT_NAME = "urltitel" SCRIPT_NAME = "urltitel"
SCRIPT_AUTHOR = "soratobuneko" SCRIPT_AUTHOR = "soratobuneko"
SCRIPT_VERSION = "8dev" SCRIPT_VERSION = "8"
SCRIPT_LICENCE = "WTFPL" SCRIPT_LICENCE = "WTFPL"
SCRIPT_DESCRIPTION = ( SCRIPT_DESCRIPTION = (
"Display or send titles of URLs from incoming and outcoming messages. " "Display or send titles of URLs from incoming and outcoming messages. "
@ -35,6 +36,7 @@ script_options = {
"sendfromme": ("off", "Alway send titles for URLs sent by ourself."), "sendfromme": ("off", "Alway send titles for URLs sent by ourself."),
"urlbuffer": ("off", "Create a buffer to collect the URLs with their titles."), "urlbuffer": ("off", "Create a buffer to collect the URLs with their titles."),
"debug": ("off", "Show debug messages"), "debug": ("off", "Show debug messages"),
"http_rewrite": ("on", "Rewrite HTTP URL to HTTPS")
} }
url_buffer = None url_buffer = None
@ -56,13 +58,24 @@ def error(message: str) -> None:
weechat.prnt("", f"{weechat.prefix('error')}{SCRIPT_NAME}: {message}") weechat.prnt("", f"{weechat.prefix('error')}{SCRIPT_NAME}: {message}")
def fetch_html(url: str) -> Optional[str]: class Document(NamedTuple):
url: ParseResult
src: str
def fetch_html(url: str) -> Optional[Document]:
# IRI to URL (unicode to ascii) # IRI to URL (unicode to ascii)
url_split = urlsplit(url) url_parsed = urlparse(url)
url_list = list(url_split) url_urlencoded = ParseResult(
url_list[1] = quote(url_list[1]) # URL encode domain scheme=("https" if script_options["http_rewrite"]
url_list[2] = quote(url_list[2]) # URL encode path and url_parsed.scheme == "http"
url = urlunsplit(url_list) else url_parsed.scheme),
netloc=quote(url_parsed.netloc),
path=quote(url_parsed.path),
params=url_parsed.params,
query=url_parsed.query,
fragment=url_parsed.fragment
)
url = urlunparse(url_urlencoded)
request = Request(url, data=None, headers={"User-Agent": UA}) request = Request(url, data=None, headers={"User-Agent": UA})
tries = 2 if script_options["retry"] == "on" else 1 tries = 2 if script_options["retry"] == "on" else 1
@ -73,7 +86,7 @@ def fetch_html(url: str) -> Optional[str]:
if is_html: if is_html:
debug(f"Got an HTML document. Reading at most {script_options['maxdownload']} bytes.") debug(f"Got an HTML document. Reading at most {script_options['maxdownload']} bytes.")
html_doc_head = res.read(int(script_options["maxdownload"])).decode(errors="ignore") html_doc_head = res.read(int(script_options["maxdownload"])).decode(errors="ignore")
return html_doc_head return Document(url=url_urlencoded, src=html_doc_head)
else: else:
debug("Not an HTML document.") debug("Not an HTML document.")
return None return None
@ -98,9 +111,10 @@ def find_urls(message: str) -> List[str]:
_re_whitespace = re.compile(r"\s") _re_whitespace = re.compile(r"\s")
def get_title(html_doc: str) -> Optional[str]: def get_title(html_doc: Document) -> Optional[str]:
title = None title = None
title_match = re.search(r"(?i)<title ?[^<>]*>([^<>]*)</title>", html_doc) title_match = re.search(r"(?i)<title ?[^<>]*>([^<>]*)</title>",
html_doc.src)
if title_match is None: if title_match is None:
debug("No <title> found.") debug("No <title> found.")
return None return None
@ -116,8 +130,38 @@ def get_title(html_doc: str) -> Optional[str]:
stripped_title += " " stripped_title += " "
stripped_title = stripped_title.strip() stripped_title = stripped_title.strip()
if stripped_title.find("The Pirate Bay - The galaxy's most resilient bittorrent site") == 0:
torrent = tpb_get_torrent_by_url(html_doc.url)
if torrent is not None:
stripped_title = f"TPB torrent: {torrent.name}"
return stripped_title return stripped_title
class Torrent(NamedTuple):
id: int
name: str
def tpb_get_torrent(id: int) -> Torrent:
request = Request(f"https://apibay.org/t.php?id={id}",
data=None,
headers={"User-Agent": UA})
with urlopen(url=request,
timeout=int(script_options["timeout"])) as response:
json_ = json.load(response)
return Torrent(id=id, name=json_["name"])
_re_query_id = re.compile(r"^(?:[^&]*[&])*id=([0-9]+)$(?:[&][^&]*)*")
def tpb_get_torrent_by_url(url: ParseResult) -> Optional[Torrent]:
if url.path.endswith("description.php"):
id_match = re.match(_re_query_id, url.query)
return (tpb_get_torrent(id=int(id_match.group(1)))
if id_match is not None
else None)
return None
def on_config_change(data, option, value): def on_config_change(data, option, value):
key = option.split(".")[-1] key = option.split(".")[-1]
@ -134,7 +178,8 @@ def on_buffer_close(data, buffer):
def on_privmsg(data, signal, signal_data): def on_privmsg(data, signal, signal_data):
global url_buffer global url_buffer
server = signal.split(",")[0] server = signal.split(",")[0]
msg = weechat.info_get_hashtable("irc_message_parse", {"message": signal_data}) msg = weechat.info_get_hashtable("irc_message_parse",
{"message": signal_data})
srvchan = f"{server},{msg['channel']}" srvchan = f"{server},{msg['channel']}"
# Parse only messages from configured server/channels # Parse only messages from configured server/channels
@ -168,7 +213,8 @@ def on_privmsg(data, signal, signal_data):
url_buffer, url_buffer,
f"<{nick}{weechat.color('red')}@{weechat.color('default')}{server}/{msg['channel']}>\t{msg['text']}", f"<{nick}{weechat.color('red')}@{weechat.color('default')}{server}/{msg['channel']}>\t{msg['text']}",
) )
force_send = script_options["sendfromme"] == "on" and len(msg["nick"]) == 0 force_send = (script_options["sendfromme"] == "on"
and len(msg["nick"]) == 0)
show_urls_title(srvchan, titles, force_send) show_urls_title(srvchan, titles, force_send)
return weechat.WEECHAT_RC_OK return weechat.WEECHAT_RC_OK
@ -179,7 +225,8 @@ def show_urls_title(srvchan: str, titles: List[str], force_send: bool) -> None:
buffer = weechat.info_get("irc_buffer", srvchan) buffer = weechat.info_get("irc_buffer", srvchan)
action = ( action = (
(ACTION_SEND, "to") (ACTION_SEND, "to")
if force_send or srvchan_in_list(srvchan, script_options["replyto"].split("|")) if force_send or srvchan_in_list(srvchan,
script_options["replyto"].split("|"))
else ("Displaying", "on") else ("Displaying", "on")
) )
if buffer is not None: if buffer is not None:
@ -231,6 +278,7 @@ if script_options["urlbuffer"] == "on":
create_buffer() create_buffer()
weechat.hook_config("plugins.var.python." + SCRIPT_NAME + ".*", "on_config_change", "") weechat.hook_config("plugins.var.python." + SCRIPT_NAME + ".*",
"on_config_change", "")
weechat.hook_signal("*,irc_in2_privmsg", "on_privmsg", "") weechat.hook_signal("*,irc_in2_privmsg", "on_privmsg", "")
weechat.hook_signal("*,irc_out1_privmsg", "on_privmsg", "") weechat.hook_signal("*,irc_out1_privmsg", "on_privmsg", "")