This commit is contained in:
夜坂雅 2023-02-15 07:25:09 +08:00
parent 308396df74
commit 12e35aca98
2 changed files with 7 additions and 5 deletions

View File

@ -216,11 +216,13 @@ async def parse_wordcloud_args(
return sender, days
RE_DATA = r"""(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))"""
RE_DATA = re.compile(
r"""(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))"""
)
def strip_urls(input: str) -> str:
return re.sub(RE_DATA, "", input)
return RE_DATA.sub("", input)
def should_record_message_content(room_features, room_id: str) -> bool:

View File

@ -44,8 +44,8 @@ def make_image(freqs, bytesio):
image = (
WordCloud(
font_path=FONT,
width=800,
height=400,
width=1600,
height=800,
)
.generate_from_frequencies(freqs)
.to_image()
@ -181,7 +181,7 @@ def gather_messages(
# XXX: Special case for Arch Linux CN
if msg_item.sender == "@matterbridge:nichi.co":
data = re.sub(r"^\[.*\] ", "", msg_item.body)
print(strip_tags(data.strip()), file=stringio)
print(strip_urls(data.strip()), file=stringio)
else:
print(strip_urls(msg_item.body), file=stringio)
else: