urltitel: get URL title.
Display or send titles of URLs from incoming and outcoming messages. Also features an optional URL buffer
This commit is contained in:
commit
d7c61c3466
|
@ -0,0 +1 @@
|
|||
/test.py
|
|
@ -0,0 +1,261 @@
|
|||
import html
|
||||
import re
|
||||
import weechat
|
||||
from urllib.error import URLError
|
||||
from urllib.parse import quote, urlsplit, urlunsplit
|
||||
from urllib.request import Request, urlopen
|
||||
|
||||
SCRIPT_NAME = "urltitel"
|
||||
SCRIPT_AUTHOR = "soratobuneko"
|
||||
SCRIPT_VERSION = "4"
|
||||
SCRIPT_LICENCE = "WTFPL"
|
||||
SCRIPT_DESCRIPTION = (
|
||||
"Display or send titles of URLs from incoming and outcoming messages. "
|
||||
+ "Also features an optional URL buffer"
|
||||
)
|
||||
UA = "Mozilla/5.0 (Python) weechat {}".format(SCRIPT_NAME)
|
||||
|
||||
script_options = {
|
||||
"timeout": ("3", "Maximum time to wait to fetch URL."),
|
||||
"retry": ("off", "Retry fetching URL if it fails the first time."),
|
||||
"maxlength": ("200", "Maximum length of title."),
|
||||
"maxdownload": ("262144", "Maximum size (Bytes) to fetch from URL."),
|
||||
"serverchans": (
|
||||
"*,*",
|
||||
'"|" separated list of server,#channel to parse. for instance: "server0,#channel1|server0,#channel2"',
|
||||
),
|
||||
"replyto": (
|
||||
"",
|
||||
'"|" separated list of server,#channel for which instead of displaying localy a message we send it to the channel.',
|
||||
),
|
||||
"sendfromme": ("off", "Alway send titles for URLs sent by ourself."),
|
||||
"urlbuffer": ("off", "Create a buffer to collect the URLs with their titles."),
|
||||
"debug": ("off", "Show debug messages"),
|
||||
}
|
||||
|
||||
|
||||
def create_buffer():
|
||||
global url_buffer
|
||||
BUFFER_NAME = "{}".format(SCRIPT_NAME)
|
||||
url_buffer = weechat.buffer_new(BUFFER_NAME, "", "", "on_buffer_close", "")
|
||||
weechat.buffer_set(
|
||||
url_buffer, "title", "URL buffer ({} v{})".format(SCRIPT_NAME, SCRIPT_VERSION)
|
||||
)
|
||||
|
||||
|
||||
def debug(message):
|
||||
if script_options["debug"] == "on":
|
||||
weechat.prnt("", "{}: {}".format(SCRIPT_NAME, message))
|
||||
|
||||
|
||||
def error(message):
|
||||
weechat.prnt("", "{}{}: {}".format(weechat.prefix("error"), SCRIPT_NAME, message))
|
||||
|
||||
|
||||
def fetch_html(url):
|
||||
# IRI to URL (unicode to ascii)
|
||||
url = urlsplit(url)
|
||||
url = list(url)
|
||||
url[1] = quote(url[1]) # URL encode domain
|
||||
url[2] = quote(url[2]) # URL encode path
|
||||
url = urlunsplit(url)
|
||||
request = Request(url, data=None, headers={"User-Agent": UA})
|
||||
|
||||
tries = 2 if script_options["retry"] == "on" else 1
|
||||
for i in range(0, tries):
|
||||
try:
|
||||
with urlopen(request, timeout=int(script_options["timeout"])) as res:
|
||||
is_html = bool(re.match(".*/html.*", res.info()["Content-Type"]))
|
||||
if is_html:
|
||||
debug(
|
||||
"Got an HTML document. Reading at most {} bytes.".format(
|
||||
script_options["maxdownload"]
|
||||
),
|
||||
)
|
||||
html_doc_head = res.read(int(script_options["maxdownload"])).decode(
|
||||
"utf-8", "replace"
|
||||
)
|
||||
return html.unescape(html_doc_head)
|
||||
else:
|
||||
debug("Not an HTML document.")
|
||||
return None
|
||||
except URLError as err:
|
||||
error(
|
||||
"Cannot fetch URL. {}".format(err.reason),
|
||||
)
|
||||
|
||||
|
||||
_re_url = re.compile(r"https?://[\w0-9@:%._\+~#=()?&/\-]+")
|
||||
|
||||
|
||||
def find_urls(message):
|
||||
# Found URLs with title [["http://perdu.com", "Vous Etes Perdu ?"], ...]
|
||||
# If URL point to a non HTML document the list element is None. If the
|
||||
# HTML doc has no <title> the list element is ["https://..", None]
|
||||
urls = []
|
||||
urls_count = 0
|
||||
|
||||
if re.match(r"^url\|\d+\): ", message):
|
||||
return (0, ())
|
||||
|
||||
if re.match(r"https?://[^ ]", message) and not re.match(_re_url, message):
|
||||
debug("Failling to match URL in message: {}".format(message))
|
||||
|
||||
for url in re.findall(_re_url, message):
|
||||
debug("Fetching title for URL: {}".format(url))
|
||||
html = fetch_html(url)
|
||||
if html != None:
|
||||
title = get_title(html)
|
||||
if title != None and len(title):
|
||||
urls_count += 1
|
||||
debug("Found title: {}".format(title))
|
||||
if len(title) > int(script_options["maxlength"]):
|
||||
urls.append([url, title[0 : int(script_options["maxlength"])]])
|
||||
else:
|
||||
urls.append([url, title])
|
||||
else:
|
||||
urls.append(None)
|
||||
|
||||
return (urls_count, urls)
|
||||
|
||||
|
||||
_re_whitespace = re.compile(r"\s")
|
||||
|
||||
|
||||
def get_title(html):
|
||||
title = re.search(r"(?i)<title ?[^<>]*>([^<>]*)</title>", html)
|
||||
if title == None:
|
||||
debug("No <title> found.")
|
||||
return None
|
||||
else:
|
||||
title = title.group(1)
|
||||
|
||||
# many whitespace to one space
|
||||
stripped_title = ""
|
||||
for i, char in enumerate(title):
|
||||
if not re.match(_re_whitespace, char):
|
||||
stripped_title += char
|
||||
elif i > 0 and not re.match(_re_whitespace, title[i - 1]):
|
||||
stripped_title += " "
|
||||
stripped_title = stripped_title.strip()
|
||||
|
||||
return stripped_title
|
||||
|
||||
|
||||
def on_config_change(data, option, value):
|
||||
key = option.split(".")[-1]
|
||||
script_options[key] = value
|
||||
return weechat.WEECHAT_RC_OK
|
||||
|
||||
|
||||
def on_buffer_close(data, buffer):
|
||||
global url_buffer
|
||||
url_buffer = None
|
||||
return weechat.WEECHAT_RC_OK
|
||||
|
||||
|
||||
def on_privmsg(data, signal, signal_data):
|
||||
server = signal.split(",")[0]
|
||||
msg = weechat.info_get_hashtable("irc_message_parse", {"message": signal_data})
|
||||
srvchan = "{},{}".format(server, msg["channel"])
|
||||
|
||||
# Parse only messages from configured server/channels
|
||||
if not srvchan_in_list(srvchan, script_options["serverchans"].split("|")):
|
||||
debug("Ignoring message from {}/{}".format(server, msg["channel"]))
|
||||
return weechat.WEECHAT_RC_OK
|
||||
|
||||
urls_found = find_urls(msg["text"])
|
||||
if script_options["urlbuffer"] == "on" and len(urls_found[1]):
|
||||
nick = msg["nick"]
|
||||
if not len(nick):
|
||||
nick = "{}{}{}".format(
|
||||
weechat.color("*white"),
|
||||
weechat.info_get("irc_nick", server),
|
||||
weechat.color("default"),
|
||||
)
|
||||
if not url_buffer:
|
||||
create_buffer()
|
||||
weechat.prnt(
|
||||
url_buffer,
|
||||
"<{}{}@{}{}/{}>\t{}".format(
|
||||
nick,
|
||||
weechat.color("red"),
|
||||
weechat.color("default"),
|
||||
server,
|
||||
msg["channel"],
|
||||
msg["text"],
|
||||
),
|
||||
)
|
||||
if urls_found[0]:
|
||||
force_send = (
|
||||
True
|
||||
if script_options["sendfromme"] == "on" and not len(msg["nick"])
|
||||
else False
|
||||
)
|
||||
show_urls_title(srvchan, urls_found[1], force_send)
|
||||
|
||||
return weechat.WEECHAT_RC_OK
|
||||
|
||||
|
||||
def show_urls_title(srvchan, urls, force_send):
|
||||
ACTION_SEND = "Sending"
|
||||
buffer = weechat.info_get("irc_buffer", srvchan)
|
||||
action = (
|
||||
(ACTION_SEND, "to")
|
||||
if force_send or srvchan_in_list(srvchan, script_options["replyto"].split("|"))
|
||||
else ("Displaying", "on")
|
||||
)
|
||||
if buffer:
|
||||
for i, url in enumerate(urls):
|
||||
if url != None:
|
||||
debug(
|
||||
"{} title(s) {} {}".format(action[0], action[1], srvchan),
|
||||
)
|
||||
if action[0] == ACTION_SEND:
|
||||
weechat.command(buffer, "url|{}): {}".format(i + 1, url[1]))
|
||||
else: # We have already checked script_options["serverchans"] in on_privmsg
|
||||
weechat.prnt(buffer, "{}:\t{}".format(i + 1, url[1]))
|
||||
if script_options["urlbuffer"] == "on":
|
||||
if not url_buffer:
|
||||
create_buffer()
|
||||
weechat.prnt(url_buffer, "{}:\t{}".format(i + 1, url[1]))
|
||||
|
||||
|
||||
def srvchan_in_list(srvchan, srvchan_list):
|
||||
srvchan = srvchan.lower().split(",")
|
||||
for _srvchan in srvchan_list:
|
||||
_srvchan = _srvchan.lower().split(",")
|
||||
if (_srvchan[0] == "*" or srvchan[0] == _srvchan[0]) and (
|
||||
_srvchan[1] == "*" or srvchan[1] == _srvchan[1]
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
weechat.register(
|
||||
SCRIPT_NAME,
|
||||
SCRIPT_AUTHOR,
|
||||
SCRIPT_VERSION,
|
||||
SCRIPT_LICENCE,
|
||||
SCRIPT_DESCRIPTION,
|
||||
"",
|
||||
"",
|
||||
)
|
||||
|
||||
for option, default_value in list(script_options.items()):
|
||||
if not weechat.config_is_set_plugin(option):
|
||||
weechat.config_set_plugin(option, default_value[0])
|
||||
script_options[option] = default_value[0]
|
||||
else:
|
||||
script_options[option] = weechat.config_get_plugin(option)
|
||||
weechat.config_set_desc_plugin(
|
||||
option, "{} (default: {})".format(default_value[1], default_value[0])
|
||||
)
|
||||
|
||||
if script_options["urlbuffer"] == "on":
|
||||
create_buffer()
|
||||
|
||||
|
||||
weechat.hook_config("plugins.var.python." + SCRIPT_NAME + ".*", "on_config_change", "")
|
||||
weechat.hook_signal("*,irc_in2_privmsg", "on_privmsg", "")
|
||||
weechat.hook_signal("*,irc_out1_privmsg", "on_privmsg", "")
|
Loading…
Reference in New Issue