urltitel: tpb torrent name support. some typing refactor.
This commit is contained in:
parent
d911c327f2
commit
3b2b8f39fc
80
urltitel.py
80
urltitel.py
|
@ -1,16 +1,17 @@
|
||||||
# TODO option ignore chan
|
# TODO option ignore chan
|
||||||
import html
|
import html
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
import weechat
|
import weechat
|
||||||
from socket import timeout
|
from socket import timeout
|
||||||
from typing import List, Optional
|
from typing import List, NamedTuple, Optional, Tuple
|
||||||
from urllib.error import URLError
|
from urllib.error import URLError
|
||||||
from urllib.parse import quote, urlsplit, urlunsplit
|
from urllib.parse import ParseResult, quote, urlparse, urlunparse
|
||||||
from urllib.request import Request, urlopen
|
from urllib.request import Request, urlopen
|
||||||
|
|
||||||
SCRIPT_NAME = "urltitel"
|
SCRIPT_NAME = "urltitel"
|
||||||
SCRIPT_AUTHOR = "soratobuneko"
|
SCRIPT_AUTHOR = "soratobuneko"
|
||||||
SCRIPT_VERSION = "8dev"
|
SCRIPT_VERSION = "8"
|
||||||
SCRIPT_LICENCE = "WTFPL"
|
SCRIPT_LICENCE = "WTFPL"
|
||||||
SCRIPT_DESCRIPTION = (
|
SCRIPT_DESCRIPTION = (
|
||||||
"Display or send titles of URLs from incoming and outcoming messages. "
|
"Display or send titles of URLs from incoming and outcoming messages. "
|
||||||
|
@ -35,6 +36,7 @@ script_options = {
|
||||||
"sendfromme": ("off", "Alway send titles for URLs sent by ourself."),
|
"sendfromme": ("off", "Alway send titles for URLs sent by ourself."),
|
||||||
"urlbuffer": ("off", "Create a buffer to collect the URLs with their titles."),
|
"urlbuffer": ("off", "Create a buffer to collect the URLs with their titles."),
|
||||||
"debug": ("off", "Show debug messages"),
|
"debug": ("off", "Show debug messages"),
|
||||||
|
"http_rewrite": ("on", "Rewrite HTTP URL to HTTPS")
|
||||||
}
|
}
|
||||||
|
|
||||||
url_buffer = None
|
url_buffer = None
|
||||||
|
@ -56,13 +58,24 @@ def error(message: str) -> None:
|
||||||
weechat.prnt("", f"{weechat.prefix('error')}{SCRIPT_NAME}: {message}")
|
weechat.prnt("", f"{weechat.prefix('error')}{SCRIPT_NAME}: {message}")
|
||||||
|
|
||||||
|
|
||||||
def fetch_html(url: str) -> Optional[str]:
|
class Document(NamedTuple):
|
||||||
|
url: ParseResult
|
||||||
|
src: str
|
||||||
|
|
||||||
|
def fetch_html(url: str) -> Optional[Document]:
|
||||||
# IRI to URL (unicode to ascii)
|
# IRI to URL (unicode to ascii)
|
||||||
url_split = urlsplit(url)
|
url_parsed = urlparse(url)
|
||||||
url_list = list(url_split)
|
url_urlencoded = ParseResult(
|
||||||
url_list[1] = quote(url_list[1]) # URL encode domain
|
scheme=("https" if script_options["http_rewrite"]
|
||||||
url_list[2] = quote(url_list[2]) # URL encode path
|
and url_parsed.scheme == "http"
|
||||||
url = urlunsplit(url_list)
|
else url_parsed.scheme),
|
||||||
|
netloc=quote(url_parsed.netloc),
|
||||||
|
path=quote(url_parsed.path),
|
||||||
|
params=url_parsed.params,
|
||||||
|
query=url_parsed.query,
|
||||||
|
fragment=url_parsed.fragment
|
||||||
|
)
|
||||||
|
url = urlunparse(url_urlencoded)
|
||||||
request = Request(url, data=None, headers={"User-Agent": UA})
|
request = Request(url, data=None, headers={"User-Agent": UA})
|
||||||
|
|
||||||
tries = 2 if script_options["retry"] == "on" else 1
|
tries = 2 if script_options["retry"] == "on" else 1
|
||||||
|
@ -73,7 +86,7 @@ def fetch_html(url: str) -> Optional[str]:
|
||||||
if is_html:
|
if is_html:
|
||||||
debug(f"Got an HTML document. Reading at most {script_options['maxdownload']} bytes.")
|
debug(f"Got an HTML document. Reading at most {script_options['maxdownload']} bytes.")
|
||||||
html_doc_head = res.read(int(script_options["maxdownload"])).decode(errors="ignore")
|
html_doc_head = res.read(int(script_options["maxdownload"])).decode(errors="ignore")
|
||||||
return html_doc_head
|
return Document(url=url_urlencoded, src=html_doc_head)
|
||||||
else:
|
else:
|
||||||
debug("Not an HTML document.")
|
debug("Not an HTML document.")
|
||||||
return None
|
return None
|
||||||
|
@ -98,9 +111,10 @@ def find_urls(message: str) -> List[str]:
|
||||||
_re_whitespace = re.compile(r"\s")
|
_re_whitespace = re.compile(r"\s")
|
||||||
|
|
||||||
|
|
||||||
def get_title(html_doc: str) -> Optional[str]:
|
def get_title(html_doc: Document) -> Optional[str]:
|
||||||
title = None
|
title = None
|
||||||
title_match = re.search(r"(?i)<title ?[^<>]*>([^<>]*)</title>", html_doc)
|
title_match = re.search(r"(?i)<title ?[^<>]*>([^<>]*)</title>",
|
||||||
|
html_doc.src)
|
||||||
if title_match is None:
|
if title_match is None:
|
||||||
debug("No <title> found.")
|
debug("No <title> found.")
|
||||||
return None
|
return None
|
||||||
|
@ -116,8 +130,38 @@ def get_title(html_doc: str) -> Optional[str]:
|
||||||
stripped_title += " "
|
stripped_title += " "
|
||||||
stripped_title = stripped_title.strip()
|
stripped_title = stripped_title.strip()
|
||||||
|
|
||||||
|
if stripped_title.find("The Pirate Bay - The galaxy's most resilient bittorrent site") == 0:
|
||||||
|
torrent = tpb_get_torrent_by_url(html_doc.url)
|
||||||
|
if torrent is not None:
|
||||||
|
stripped_title = f"TPB torrent: {torrent.name}"
|
||||||
|
|
||||||
return stripped_title
|
return stripped_title
|
||||||
|
|
||||||
|
class Torrent(NamedTuple):
|
||||||
|
id: int
|
||||||
|
name: str
|
||||||
|
|
||||||
|
def tpb_get_torrent(id: int) -> Torrent:
|
||||||
|
request = Request(f"https://apibay.org/t.php?id={id}",
|
||||||
|
data=None,
|
||||||
|
headers={"User-Agent": UA})
|
||||||
|
with urlopen(url=request,
|
||||||
|
timeout=int(script_options["timeout"])) as response:
|
||||||
|
json_ = json.load(response)
|
||||||
|
return Torrent(id=id, name=json_["name"])
|
||||||
|
|
||||||
|
|
||||||
|
_re_query_id = re.compile(r"^(?:[^&]*[&])*id=([0-9]+)$(?:[&][^&]*)*")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def tpb_get_torrent_by_url(url: ParseResult) -> Optional[Torrent]:
|
||||||
|
if url.path.endswith("description.php"):
|
||||||
|
id_match = re.match(_re_query_id, url.query)
|
||||||
|
return (tpb_get_torrent(id=int(id_match.group(1)))
|
||||||
|
if id_match is not None
|
||||||
|
else None)
|
||||||
|
return None
|
||||||
|
|
||||||
def on_config_change(data, option, value):
|
def on_config_change(data, option, value):
|
||||||
key = option.split(".")[-1]
|
key = option.split(".")[-1]
|
||||||
|
@ -134,7 +178,8 @@ def on_buffer_close(data, buffer):
|
||||||
def on_privmsg(data, signal, signal_data):
|
def on_privmsg(data, signal, signal_data):
|
||||||
global url_buffer
|
global url_buffer
|
||||||
server = signal.split(",")[0]
|
server = signal.split(",")[0]
|
||||||
msg = weechat.info_get_hashtable("irc_message_parse", {"message": signal_data})
|
msg = weechat.info_get_hashtable("irc_message_parse",
|
||||||
|
{"message": signal_data})
|
||||||
srvchan = f"{server},{msg['channel']}"
|
srvchan = f"{server},{msg['channel']}"
|
||||||
|
|
||||||
# Parse only messages from configured server/channels
|
# Parse only messages from configured server/channels
|
||||||
|
@ -168,7 +213,8 @@ def on_privmsg(data, signal, signal_data):
|
||||||
url_buffer,
|
url_buffer,
|
||||||
f"<{nick}{weechat.color('red')}@{weechat.color('default')}{server}/{msg['channel']}>\t{msg['text']}",
|
f"<{nick}{weechat.color('red')}@{weechat.color('default')}{server}/{msg['channel']}>\t{msg['text']}",
|
||||||
)
|
)
|
||||||
force_send = script_options["sendfromme"] == "on" and len(msg["nick"]) == 0
|
force_send = (script_options["sendfromme"] == "on"
|
||||||
|
and len(msg["nick"]) == 0)
|
||||||
show_urls_title(srvchan, titles, force_send)
|
show_urls_title(srvchan, titles, force_send)
|
||||||
|
|
||||||
return weechat.WEECHAT_RC_OK
|
return weechat.WEECHAT_RC_OK
|
||||||
|
@ -179,7 +225,8 @@ def show_urls_title(srvchan: str, titles: List[str], force_send: bool) -> None:
|
||||||
buffer = weechat.info_get("irc_buffer", srvchan)
|
buffer = weechat.info_get("irc_buffer", srvchan)
|
||||||
action = (
|
action = (
|
||||||
(ACTION_SEND, "to")
|
(ACTION_SEND, "to")
|
||||||
if force_send or srvchan_in_list(srvchan, script_options["replyto"].split("|"))
|
if force_send or srvchan_in_list(srvchan,
|
||||||
|
script_options["replyto"].split("|"))
|
||||||
else ("Displaying", "on")
|
else ("Displaying", "on")
|
||||||
)
|
)
|
||||||
if buffer is not None:
|
if buffer is not None:
|
||||||
|
@ -231,6 +278,7 @@ if script_options["urlbuffer"] == "on":
|
||||||
create_buffer()
|
create_buffer()
|
||||||
|
|
||||||
|
|
||||||
weechat.hook_config("plugins.var.python." + SCRIPT_NAME + ".*", "on_config_change", "")
|
weechat.hook_config("plugins.var.python." + SCRIPT_NAME + ".*",
|
||||||
|
"on_config_change", "")
|
||||||
weechat.hook_signal("*,irc_in2_privmsg", "on_privmsg", "")
|
weechat.hook_signal("*,irc_in2_privmsg", "on_privmsg", "")
|
||||||
weechat.hook_signal("*,irc_out1_privmsg", "on_privmsg", "")
|
weechat.hook_signal("*,irc_out1_privmsg", "on_privmsg", "")
|
||||||
|
|
Loading…
Reference in New Issue