Add word cloud functions
This commit is contained in:
parent
584d131bfb
commit
2e0f2dc902
|
@ -0,0 +1,101 @@
|
||||||
|
THIRD PARTY FILE LICENSES:
|
||||||
|
|
||||||
|
nyx_bot/wordcloud_font.ttf:
|
||||||
|
This file is originally named `SourceHanSansSC-Regular.otf` and is avaliable under the following license:
|
||||||
|
|
||||||
|
Copyright 2014-2021 Adobe (http://www.adobe.com/), with Reserved Font
|
||||||
|
Name 'Source'. Source is a trademark of Adobe in the United States
|
||||||
|
and/or other countries.
|
||||||
|
|
||||||
|
This Font Software is licensed under the SIL Open Font License,
|
||||||
|
Version 1.1.
|
||||||
|
|
||||||
|
This license is copied below, and is also available with a FAQ at:
|
||||||
|
http://scripts.sil.org/OFL
|
||||||
|
|
||||||
|
-----------------------------------------------------------
|
||||||
|
SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
|
||||||
|
-----------------------------------------------------------
|
||||||
|
|
||||||
|
PREAMBLE
|
||||||
|
The goals of the Open Font License (OFL) are to stimulate worldwide
|
||||||
|
development of collaborative font projects, to support the font
|
||||||
|
creation efforts of academic and linguistic communities, and to
|
||||||
|
provide a free and open framework in which fonts may be shared and
|
||||||
|
improved in partnership with others.
|
||||||
|
|
||||||
|
The OFL allows the licensed fonts to be used, studied, modified and
|
||||||
|
redistributed freely as long as they are not sold by themselves. The
|
||||||
|
fonts, including any derivative works, can be bundled, embedded,
|
||||||
|
redistributed and/or sold with any software provided that any reserved
|
||||||
|
names are not used by derivative works. The fonts and derivatives,
|
||||||
|
however, cannot be released under any other type of license. The
|
||||||
|
requirement for fonts to remain under this license does not apply to
|
||||||
|
any document created using the fonts or their derivatives.
|
||||||
|
|
||||||
|
DEFINITIONS
|
||||||
|
"Font Software" refers to the set of files released by the Copyright
|
||||||
|
Holder(s) under this license and clearly marked as such. This may
|
||||||
|
include source files, build scripts and documentation.
|
||||||
|
|
||||||
|
"Reserved Font Name" refers to any names specified as such after the
|
||||||
|
copyright statement(s).
|
||||||
|
|
||||||
|
"Original Version" refers to the collection of Font Software
|
||||||
|
components as distributed by the Copyright Holder(s).
|
||||||
|
|
||||||
|
"Modified Version" refers to any derivative made by adding to,
|
||||||
|
deleting, or substituting -- in part or in whole -- any of the
|
||||||
|
components of the Original Version, by changing formats or by porting
|
||||||
|
the Font Software to a new environment.
|
||||||
|
|
||||||
|
"Author" refers to any designer, engineer, programmer, technical
|
||||||
|
writer or other person who contributed to the Font Software.
|
||||||
|
|
||||||
|
PERMISSION & CONDITIONS
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of the Font Software, to use, study, copy, merge, embed,
|
||||||
|
modify, redistribute, and sell modified and unmodified copies of the
|
||||||
|
Font Software, subject to the following conditions:
|
||||||
|
|
||||||
|
1) Neither the Font Software nor any of its individual components, in
|
||||||
|
Original or Modified Versions, may be sold by itself.
|
||||||
|
|
||||||
|
2) Original or Modified Versions of the Font Software may be bundled,
|
||||||
|
redistributed and/or sold with any software, provided that each copy
|
||||||
|
contains the above copyright notice and this license. These can be
|
||||||
|
included either as stand-alone text files, human-readable headers or
|
||||||
|
in the appropriate machine-readable metadata fields within text or
|
||||||
|
binary files as long as those fields can be easily viewed by the user.
|
||||||
|
|
||||||
|
3) No Modified Version of the Font Software may use the Reserved Font
|
||||||
|
Name(s) unless explicit written permission is granted by the
|
||||||
|
corresponding Copyright Holder. This restriction only applies to the
|
||||||
|
primary font name as presented to the users.
|
||||||
|
|
||||||
|
4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
|
||||||
|
Software shall not be used to promote, endorse or advertise any
|
||||||
|
Modified Version, except to acknowledge the contribution(s) of the
|
||||||
|
Copyright Holder(s) and the Author(s) or with their explicit written
|
||||||
|
permission.
|
||||||
|
|
||||||
|
5) The Font Software, modified or unmodified, in part or in whole,
|
||||||
|
must be distributed entirely under this license, and must not be
|
||||||
|
distributed under any other license. The requirement for fonts to
|
||||||
|
remain under this license does not apply to any document created using
|
||||||
|
the Font Software.
|
||||||
|
|
||||||
|
TERMINATION
|
||||||
|
This license becomes null and void if any of the above conditions are
|
||||||
|
not met.
|
||||||
|
|
||||||
|
DISCLAIMER
|
||||||
|
THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
|
||||||
|
OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
|
||||||
|
COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
|
||||||
|
DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
|
||||||
|
OTHER DEALINGS IN THE FONT SOFTWARE.
|
11
SETUP.md
11
SETUP.md
|
@ -106,6 +106,17 @@ pip install -e ".[postgres]"
|
||||||
|
|
||||||
[Sarasa Gothic](https://github.com/be5invis/Sarasa-Gothic) should be installed for best quote image results. It is also recommanded to install the Noto Color Emoji font on the machine running the bot.
|
[Sarasa Gothic](https://github.com/be5invis/Sarasa-Gothic) should be installed for best quote image results. It is also recommanded to install the Noto Color Emoji font on the machine running the bot.
|
||||||
|
|
||||||
|
## Build word segmenter
|
||||||
|
|
||||||
|
Use Rust to build the segmenter:
|
||||||
|
|
||||||
|
```
|
||||||
|
cd cutword
|
||||||
|
cargo build --release
|
||||||
|
```
|
||||||
|
|
||||||
|
Copy the `target/release/nyx_bot-cutword` to a `PATH` avaliable to the bot.
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
Copy the sample configuration file to a new `config.yaml` file.
|
Copy the sample configuration file to a new `config.yaml` file.
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
/target
|
|
@ -0,0 +1,158 @@
|
||||||
|
# This file is automatically @generated by Cargo.
|
||||||
|
# It is not intended for manual editing.
|
||||||
|
version = 3
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "aho-corasick"
|
||||||
|
version = "0.7.20"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "byteorder"
|
||||||
|
version = "1.4.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cedarwood"
|
||||||
|
version = "0.4.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6d910bedd62c24733263d0bed247460853c9d22e8956bd4cd964302095e04e90"
|
||||||
|
dependencies = [
|
||||||
|
"smallvec",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fxhash"
|
||||||
|
version = "0.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
|
||||||
|
dependencies = [
|
||||||
|
"byteorder",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hashbrown"
|
||||||
|
version = "0.12.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "jieba-rs"
|
||||||
|
version = "0.6.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "37228e06c75842d1097432d94d02f37fe3ebfca9791c2e8fef6e9db17ed128c1"
|
||||||
|
dependencies = [
|
||||||
|
"cedarwood",
|
||||||
|
"fxhash",
|
||||||
|
"hashbrown",
|
||||||
|
"lazy_static",
|
||||||
|
"phf",
|
||||||
|
"phf_codegen",
|
||||||
|
"regex",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lazy_static"
|
||||||
|
version = "1.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "memchr"
|
||||||
|
version = "2.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "nyx_bot-cutword"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"jieba-rs",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf"
|
||||||
|
version = "0.11.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "928c6535de93548188ef63bb7c4036bd415cd8f36ad25af44b9789b2ee72a48c"
|
||||||
|
dependencies = [
|
||||||
|
"phf_shared",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf_codegen"
|
||||||
|
version = "0.11.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a56ac890c5e3ca598bbdeaa99964edb5b0258a583a9eb6ef4e89fc85d9224770"
|
||||||
|
dependencies = [
|
||||||
|
"phf_generator",
|
||||||
|
"phf_shared",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf_generator"
|
||||||
|
version = "0.11.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b1181c94580fa345f50f19d738aaa39c0ed30a600d95cb2d3e23f94266f14fbf"
|
||||||
|
dependencies = [
|
||||||
|
"phf_shared",
|
||||||
|
"rand",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf_shared"
|
||||||
|
version = "0.11.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e1fb5f6f826b772a8d4c0394209441e7d37cbbb967ae9c7e0e8134365c9ee676"
|
||||||
|
dependencies = [
|
||||||
|
"siphasher",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rand"
|
||||||
|
version = "0.8.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
|
||||||
|
dependencies = [
|
||||||
|
"rand_core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rand_core"
|
||||||
|
version = "0.6.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex"
|
||||||
|
version = "1.7.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e076559ef8e241f2ae3479e36f97bd5741c0330689e217ad51ce2c76808b868a"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
|
"memchr",
|
||||||
|
"regex-syntax",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-syntax"
|
||||||
|
version = "0.6.28"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "siphasher"
|
||||||
|
version = "0.3.10"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "smallvec"
|
||||||
|
version = "1.10.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
|
|
@ -0,0 +1,9 @@
|
||||||
|
[package]
|
||||||
|
name = "nyx_bot-cutword"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
jieba-rs = "*"
|
|
@ -0,0 +1,60 @@
|
||||||
|
use jieba_rs::Jieba;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::io::{Result, Write};
|
||||||
|
|
||||||
|
fn main() -> Result<()> {
|
||||||
|
let jieba = Jieba::new();
|
||||||
|
let stdin = std::io::stdin();
|
||||||
|
let mut result = HashMap::new();
|
||||||
|
for line in stdin.lines() {
|
||||||
|
match line {
|
||||||
|
Ok(line) => {
|
||||||
|
if line.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if line.starts_with('/') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for tag in jieba.tag(&line, true) {
|
||||||
|
if STOP_FLAGS.contains(&tag.tag) || tag.word.len() > 21 {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
result
|
||||||
|
.entry(tag.word.to_lowercase())
|
||||||
|
.and_modify(|c| *c += 1)
|
||||||
|
.or_insert(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(_) => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let stdout = std::io::stdout();
|
||||||
|
let mut stdout = stdout.lock();
|
||||||
|
for (k, v) in result {
|
||||||
|
writeln!(stdout, "{}\t{}", k, v)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
const STOP_FLAGS: &[&str] = &[
|
||||||
|
"d", // 副词
|
||||||
|
"f", // 方位名词
|
||||||
|
"x", // 标点符号(文档说是 w 但是实际测试是 x
|
||||||
|
"p", // 介词
|
||||||
|
"t", // 时间
|
||||||
|
"q", // 量词
|
||||||
|
"m", // 数量词
|
||||||
|
"nr", // 人名,你我他
|
||||||
|
"r", // 代词
|
||||||
|
"c", // 连词
|
||||||
|
"e", // 文档没说,看着像语气词
|
||||||
|
"xc", // 其他虚词
|
||||||
|
"zg", // 文档没说,给出的词也没找到规律,但都不是想要的
|
||||||
|
"y", // 文档没说,看着像语气词
|
||||||
|
// u 开头的都是助词,具体细分的分类文档没说
|
||||||
|
"uj", "ug", "ul", "ud",
|
||||||
|
];
|
|
@ -24,6 +24,7 @@ from nyx_bot.config import Config
|
||||||
from nyx_bot.errors import NyxBotRuntimeError, NyxBotValueError
|
from nyx_bot.errors import NyxBotRuntimeError, NyxBotValueError
|
||||||
from nyx_bot.storage import MatrixMessage, MembershipUpdates, UserTag
|
from nyx_bot.storage import MatrixMessage, MembershipUpdates, UserTag
|
||||||
from nyx_bot.utils import make_divergence, parse_matrixdotto_link
|
from nyx_bot.utils import make_divergence, parse_matrixdotto_link
|
||||||
|
from nyx_bot.wordcloud import send_wordcloud
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -106,6 +107,8 @@ class Command:
|
||||||
await self._last_message()
|
await self._last_message()
|
||||||
elif self.command == "divergence":
|
elif self.command == "divergence":
|
||||||
await self._divergence()
|
await self._divergence()
|
||||||
|
elif self.command == "wordcloud":
|
||||||
|
await self._wordcloud()
|
||||||
else:
|
else:
|
||||||
await self._unknown_command()
|
await self._unknown_command()
|
||||||
|
|
||||||
|
@ -536,3 +539,18 @@ Outside of a reply, send the avatar of the command sender.\
|
||||||
raise NyxBotRuntimeError(f"Failed to fetch event: {error}")
|
raise NyxBotRuntimeError(f"Failed to fetch event: {error}")
|
||||||
sender = target_event.event.sender
|
sender = target_event.event.sender
|
||||||
UserTag.delete_user_tag(self.room.room_id, sender)
|
UserTag.delete_user_tag(self.room.room_id, sender)
|
||||||
|
|
||||||
|
async def _wordcloud(self):
|
||||||
|
if not self.reply_to:
|
||||||
|
sender = self.event.sender
|
||||||
|
else:
|
||||||
|
target_event = await self.client.room_get_event(
|
||||||
|
self.room.room_id, self.reply_to
|
||||||
|
)
|
||||||
|
if isinstance(target_event, RoomGetEventError):
|
||||||
|
error = target_event.message
|
||||||
|
raise NyxBotRuntimeError(f"Failed to fetch event: {error}")
|
||||||
|
sender = target_event.event.sender
|
||||||
|
await self.client.room_typing(self.room.room_id)
|
||||||
|
await send_wordcloud(self.client, self.room, self.event, sender)
|
||||||
|
await self.client.room_typing(self.room.room_id, False)
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from io import BytesIO
|
from html.parser import HTMLParser
|
||||||
|
from io import BytesIO, StringIO
|
||||||
from random import Random
|
from random import Random
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from urllib.parse import unquote, urlparse
|
from urllib.parse import unquote, urlparse
|
||||||
|
@ -213,3 +214,24 @@ def make_divergence(room: MatrixRoom):
|
||||||
result = first_value
|
result = first_value
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
class MLStripper(HTMLParser):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.reset()
|
||||||
|
self.strict = False
|
||||||
|
self.convert_charrefs = True
|
||||||
|
self.text = StringIO()
|
||||||
|
|
||||||
|
def handle_data(self, d):
|
||||||
|
self.text.write(d)
|
||||||
|
|
||||||
|
def get_data(self):
|
||||||
|
return self.text.getvalue()
|
||||||
|
|
||||||
|
|
||||||
|
def strip_tags(html):
|
||||||
|
s = MLStripper()
|
||||||
|
s.feed(html)
|
||||||
|
return s.get_data()
|
||||||
|
|
|
@ -0,0 +1,137 @@
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from asyncio import create_subprocess_exec
|
||||||
|
from asyncio.subprocess import PIPE
|
||||||
|
from io import BytesIO, StringIO
|
||||||
|
|
||||||
|
from nio import AsyncClient, MatrixRoom, RoomMessageText, UploadResponse
|
||||||
|
from wand.image import Image
|
||||||
|
from wordcloud import WordCloud
|
||||||
|
|
||||||
|
import nyx_bot
|
||||||
|
from nyx_bot.storage import MatrixMessage
|
||||||
|
from nyx_bot.utils import strip_tags
|
||||||
|
|
||||||
|
CUTWORDS_EXE = "nyx_bot-cutword"
|
||||||
|
FONT = os.path.join(nyx_bot.__path__[0], "wordcloud_font.ttf")
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
async def get_word_freqs(text):
|
||||||
|
proc = await create_subprocess_exec(
|
||||||
|
CUTWORDS_EXE,
|
||||||
|
stdin=PIPE,
|
||||||
|
stdout=PIPE,
|
||||||
|
)
|
||||||
|
|
||||||
|
stdout, _ = await proc.communicate(input=text.encode("utf-8"))
|
||||||
|
|
||||||
|
freqs = {}
|
||||||
|
lines = stdout.decode().splitlines()
|
||||||
|
for line in lines:
|
||||||
|
word, freq = line.split(None, 1)
|
||||||
|
freqs[word] = int(freq)
|
||||||
|
|
||||||
|
return freqs
|
||||||
|
|
||||||
|
|
||||||
|
def make_image(freqs, bytesio):
|
||||||
|
image = (
|
||||||
|
WordCloud(
|
||||||
|
font_path=FONT,
|
||||||
|
width=800,
|
||||||
|
height=400,
|
||||||
|
)
|
||||||
|
.generate_from_frequencies(freqs)
|
||||||
|
.to_image()
|
||||||
|
)
|
||||||
|
image.save(bytesio, "PNG")
|
||||||
|
|
||||||
|
|
||||||
|
async def send_wordcloud(
|
||||||
|
client: AsyncClient,
|
||||||
|
room: MatrixRoom,
|
||||||
|
event: RoomMessageText,
|
||||||
|
sender: str,
|
||||||
|
):
|
||||||
|
bytesio = BytesIO()
|
||||||
|
texts = gather_messages(room, sender)
|
||||||
|
freqs = await get_word_freqs(texts)
|
||||||
|
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
await loop.run_in_executor(None, make_image, freqs, bytesio)
|
||||||
|
|
||||||
|
length = bytesio.getbuffer().nbytes
|
||||||
|
bytesio.seek(0)
|
||||||
|
|
||||||
|
image = Image(file=bytesio)
|
||||||
|
(width, height) = (image.width, image.height)
|
||||||
|
|
||||||
|
# Seek again
|
||||||
|
bytesio.seek(0)
|
||||||
|
resp, maybe_keys = await client.upload(
|
||||||
|
bytesio,
|
||||||
|
content_type="image/png",
|
||||||
|
filename="image.png",
|
||||||
|
filesize=length,
|
||||||
|
)
|
||||||
|
if isinstance(resp, UploadResponse):
|
||||||
|
print("Image was uploaded successfully to server. ")
|
||||||
|
else:
|
||||||
|
print(f"Failed to upload image. Failure response: {resp}")
|
||||||
|
|
||||||
|
content = {
|
||||||
|
"body": "[Wordcloud]",
|
||||||
|
"info": {
|
||||||
|
"size": length,
|
||||||
|
"mimetype": "image/png",
|
||||||
|
"thumbnail_info": {
|
||||||
|
"mimetype": "image/png",
|
||||||
|
"size": length,
|
||||||
|
"w": width, # width in pixel
|
||||||
|
"h": height, # height in pixel
|
||||||
|
},
|
||||||
|
"w": width, # width in pixel
|
||||||
|
"h": height, # height in pixel
|
||||||
|
"thumbnail_url": resp.content_uri,
|
||||||
|
},
|
||||||
|
"msgtype": "m.image",
|
||||||
|
"url": resp.content_uri,
|
||||||
|
}
|
||||||
|
|
||||||
|
content["m.relates_to"] = {"m.in_reply_to": {"event_id": event.event_id}}
|
||||||
|
|
||||||
|
# Add custom data for tracking bot message.
|
||||||
|
content["io.github.shadowrz.nyx_bot"] = {
|
||||||
|
"in_reply_to": event.event_id,
|
||||||
|
"type": "image",
|
||||||
|
}
|
||||||
|
|
||||||
|
await client.room_send(room.room_id, message_type="m.room.message", content=content)
|
||||||
|
|
||||||
|
|
||||||
|
def gather_messages(
|
||||||
|
room: MatrixRoom,
|
||||||
|
sender: str,
|
||||||
|
):
|
||||||
|
stringio = StringIO()
|
||||||
|
msg_items = (
|
||||||
|
MatrixMessage.select()
|
||||||
|
.where(
|
||||||
|
(MatrixMessage.room_id == room.room_id) & (MatrixMessage.sender == sender)
|
||||||
|
)
|
||||||
|
.order_by(MatrixMessage.origin_server_ts.desc())
|
||||||
|
)
|
||||||
|
for msg_item in msg_items:
|
||||||
|
if msg_item.formatted_body is not None:
|
||||||
|
string = re.sub(r"<mx-reply>.*</mx-reply>", "", msg_item.formatted_body)
|
||||||
|
print(strip_tags(string), file=stringio)
|
||||||
|
elif msg_item.body is not None:
|
||||||
|
print(msg_item.body, file=stringio)
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
|
||||||
|
ret = stringio.getvalue()
|
||||||
|
return ret
|
Binary file not shown.
Loading…
Reference in New Issue