985 lines
41 KiB
Python
985 lines
41 KiB
Python
from __future__ import annotations
|
|
from typing import Optional, Union
|
|
from typing_extensions import Final
|
|
from abc import ABC, abstractmethod
|
|
from collections.abc import Callable, Awaitable
|
|
|
|
import asyncio
|
|
import re
|
|
from io import BytesIO
|
|
from collections import defaultdict
|
|
from telethon.tl.functions.messages import UploadMediaRequest
|
|
from telethon.tl.types import InputMediaPhotoExternal, InputMediaDocumentExternal, \
|
|
MessageMediaPhoto, MessageMediaDocument, InputFile, InputFileBig, InputMediaUploadedPhoto
|
|
from telethon.errors import FloodWaitError, SlowModeWaitError, ServerError, BadRequestError
|
|
from urllib.parse import urlencode, urlparse
|
|
|
|
from .. import env, log, web, locks
|
|
from .html_node import Code, Link, Br, Text, HtmlTree
|
|
from .utils import isAbsoluteHttpLink
|
|
from ..errors_collection import InvalidMediaErrors, ExternalMediaFetchFailedErrors, UserBlockedErrors
|
|
|
|
logger = log.getLogger('RSStT.medium')
|
|
|
|
sinaimg_sizes: Final = ('large', 'mw2048', 'mw1024', 'mw720', 'middle')
|
|
sinaimg_size_parser: Final = re.compile(r'(?P<domain>^https?://wx\d\.sinaimg\.\w+/)'
|
|
r'(?P<size>\w+)'
|
|
r'(?P<filename>/\w+\.\w+$)').match
|
|
pixiv_sizes: Final = ('original', 'master')
|
|
pixiv_size_parser: Final = re.compile(r'(?P<url_prefix>^https?://i\.pixiv\.(cat|re)/img-)'
|
|
r'(?P<size>\w+)'
|
|
r'(?P<url_infix>/img/\d{4}/(\d{2}/){5})'
|
|
r'(?P<filename>\d+_p\d+)'
|
|
r'(?P<file_ext>\.\w+$)').match
|
|
sinaimg_server_parser: Final = re.compile(r'(?P<url_prefix>^https?://wx)'
|
|
r'(?P<server_id>\d)'
|
|
r'(?P<url_suffix>\.sinaimg\.\S+$)').match
|
|
# lizhi_sizes: Final = ('ud.mp3', 'hd.mp3', 'sd.m4a') # ud.mp3 is rare
|
|
lizhi_sizes: Final = ('hd.mp3', 'sd.m4a')
|
|
lizhi_server_id: Final = ('1', '2', '5', '')
|
|
lizhi_parser: Final = re.compile(r'(?P<url_prefix>^https?://cdn)'
|
|
r'(?P<server_id>[125]?)'
|
|
r'(?P<url_infix>\.lizhi\.fm/[\w/]+)'
|
|
r'(?P<size_suffix>([uh]d\.mp3|sd\.m4a)$)').match
|
|
isTelegramCannotFetch: Final = re.compile(r'^https?://(\w+\.)?telesco\.pe').match
|
|
|
|
IMAGE: Final = 'image'
|
|
VIDEO: Final = 'video'
|
|
ANIMATION: Final = 'animation'
|
|
AUDIO: Final = 'audio'
|
|
FILE: Final = 'file'
|
|
MEDIUM_BASE_CLASS: Final = 'medium'
|
|
TypeMedium = Union[IMAGE, VIDEO, AUDIO, ANIMATION, FILE]
|
|
|
|
MEDIA_GROUP: Final = 'media_group'
|
|
TypeMessage = Union[MEDIA_GROUP, TypeMedium]
|
|
|
|
TypeMessageMedia = Union[MessageMediaPhoto, MessageMediaDocument, InputMediaUploadedPhoto]
|
|
TypeInputFile = Union[InputFile, InputFileBig]
|
|
TypeTelegramMedia = Union[TypeMessageMedia, TypeInputFile]
|
|
|
|
IMAGE_MAX_SIZE: Final = 5242880
|
|
MEDIA_MAX_SIZE: Final = 20971520
|
|
|
|
|
|
# Note:
|
|
# One message can have 10 media at most, but there are some exceptions.
|
|
# 1. A GIF (Animation) and WEBP (sent as a file) must occupy a SINGLE message.
|
|
# 1a. A WEBP sent as a file will be shown just like a sticker.
|
|
# 1b. Since some time in 2022, Telegram DC will convert any WEBP sent as an image to a JPG. Before that, same as (1a).
|
|
# 2. Videos and Images can be mixed in a media group, but any other type of media cannot be in the same message.
|
|
# 3. Images uploaded as MessageMediaPhoto will be considered as an image. While MessageMediaDocument not, it's a file.
|
|
# 4. Any other type of media except Image must be uploaded as MessageMediaDocument.
|
|
# 5. Telegram will not take notice of attributes provided if it already decoded the necessary metadata of a media.
|
|
# 6. Because of (5), we can't force send GIFs and videos as ordinary files.
|
|
# 7. Audios can be sent in a media group, but can not be mixed with other types of media.
|
|
# 8. Other files (including images sent as files) should be able to be mixed in a media group.
|
|
#
|
|
# Type fallback notes:
|
|
# 1. A video can fall back to an image if its poster is available.
|
|
# 2. An image can fall back to a file if it is: 5MB < size <= 20MB, width + height >= 10000.
|
|
# 3. A GIF need not any fallback, because of (5) above.
|
|
# 4. The only possible fallback chain is: video -> image(poster) -> file.
|
|
# 5. If an image fall back to a file, rest images must fall back to file too!
|
|
|
|
class AbstractMedium(ABC):
|
|
type: str = ''
|
|
|
|
def __init__(self):
|
|
self.valid: Optional[bool] = None
|
|
self.drop_silently: bool = False # if True, will not be included in invalid media
|
|
self.type_fallback_medium: Optional[AbstractMedium] = None
|
|
self.need_type_fallback: bool = False
|
|
self.uploaded_bucket: defaultdict[int, Optional[tuple[TypeMessageMedia, TypeMedium]]] \
|
|
= defaultdict(lambda: None)
|
|
self.uploading_lock = asyncio.Lock()
|
|
self.validating_lock = asyncio.Lock()
|
|
|
|
@abstractmethod
|
|
def telegramize(self) -> Optional[TypeMessageMedia]:
|
|
pass
|
|
|
|
@abstractmethod
|
|
async def validate(self, flush: bool = False, reason: Union[Exception, str] = None) -> bool:
|
|
pass
|
|
|
|
@abstractmethod
|
|
async def fallback(self, reason: Union[Exception, str] = None) -> bool:
|
|
pass
|
|
|
|
@abstractmethod
|
|
def type_fallback_chain(self) -> Optional[AbstractMedium]:
|
|
pass
|
|
|
|
@abstractmethod
|
|
def get_link_html_node(self) -> Optional[Text]:
|
|
pass
|
|
|
|
@property
|
|
@abstractmethod
|
|
def hash(self) -> str:
|
|
pass
|
|
|
|
@abstractmethod
|
|
async def change_server(self) -> bool:
|
|
pass
|
|
|
|
@property
|
|
@abstractmethod
|
|
def info(self) -> str:
|
|
pass
|
|
|
|
@property
|
|
@abstractmethod
|
|
def describe(self) -> str:
|
|
pass
|
|
|
|
async def upload(self, chat_id: int, force_upload: bool = False) \
|
|
-> tuple[Optional[TypeMessageMedia], Optional[TypeMedium]]:
|
|
if self.valid is None:
|
|
await self.validate()
|
|
medium_to_upload = self.type_fallback_chain()
|
|
if medium_to_upload is None:
|
|
return None, None
|
|
if self.uploaded_bucket[chat_id]:
|
|
cached = self.uploaded_bucket[chat_id]
|
|
if not force_upload and cached[1] == medium_to_upload.type:
|
|
return cached
|
|
|
|
tries = 0
|
|
error_tries = 0
|
|
max_tries = 10
|
|
server_change_count = 0
|
|
media_fallback_count = 0
|
|
err_list = []
|
|
flood_lock = locks.user_flood_lock(chat_id)
|
|
user_media_upload_semaphore = locks.user_media_upload_semaphore(chat_id)
|
|
ctm = locks.ContextTimeoutManager(timeout=7 * 60)
|
|
while True:
|
|
peer = await env.bot.get_input_entity(chat_id)
|
|
try:
|
|
async with ctm(flood_lock):
|
|
pass # wait for flood wait
|
|
|
|
async with ctm(user_media_upload_semaphore):
|
|
async with ctm(self.uploading_lock):
|
|
medium_to_upload = self.type_fallback_chain()
|
|
if medium_to_upload is None:
|
|
return None, None
|
|
if self.uploaded_bucket[chat_id]:
|
|
cached = self.uploaded_bucket[chat_id]
|
|
if not force_upload and cached[1] == medium_to_upload.type:
|
|
return cached
|
|
while True:
|
|
medium_to_upload = self.type_fallback_chain()
|
|
if medium_to_upload is None:
|
|
return None, None
|
|
tries += 1
|
|
if tries > max_tries:
|
|
logger.debug('Medium dropped due to too many upload retries: '
|
|
f'{self.describe}')
|
|
self.valid = False
|
|
self.need_type_fallback = False
|
|
return None, None
|
|
try:
|
|
async with flood_lock:
|
|
pass # wait for flood wait
|
|
|
|
uploaded_media = await env.bot(
|
|
UploadMediaRequest(peer, medium_to_upload.telegramize())
|
|
)
|
|
self.uploaded_bucket[chat_id] = uploaded_media, medium_to_upload.type
|
|
return uploaded_media, medium_to_upload.type
|
|
|
|
# errors caused by invalid img/video(s)
|
|
except InvalidMediaErrors as e:
|
|
err_list.append(e)
|
|
if await self.fallback(reason=e):
|
|
media_fallback_count += 1
|
|
else:
|
|
self.valid = False
|
|
return None, None
|
|
continue
|
|
|
|
# errors caused by server or network instability between img server and telegram server
|
|
except ExternalMediaFetchFailedErrors as e:
|
|
err_list.append(e)
|
|
if await self.change_server():
|
|
server_change_count += 1
|
|
elif await self.fallback(reason=e):
|
|
media_fallback_count += 1
|
|
else:
|
|
self.valid = False
|
|
return None, None
|
|
continue
|
|
|
|
except locks.ContextTimeoutError:
|
|
logger.error(f'Medium dropped due to lock acquisition timeout ({chat_id}): '
|
|
f'{self.describe}')
|
|
return None, None
|
|
except (FloodWaitError, SlowModeWaitError) as e:
|
|
# telethon has retried for us, but we release locks and retry again here to see if it will be better
|
|
if error_tries >= 1:
|
|
logger.error(f'Medium dropped due to too many flood control retries ({chat_id}): '
|
|
f'{self.describe}')
|
|
return None, None
|
|
|
|
error_tries += 1
|
|
await locks.user_flood_wait_background(chat_id, seconds=e.seconds) # acquire a flood wait
|
|
except ServerError as e:
|
|
# telethon has retried for us, so we just retry once more
|
|
if error_tries >= 1:
|
|
logger.error(f'Medium dropped due to Telegram internal server error '
|
|
f'({chat_id}, {e.message if type(e) is ServerError else type(e).__name__}): '
|
|
f'{self.describe}')
|
|
return None, None
|
|
|
|
error_tries += 1
|
|
|
|
|
|
class Medium(AbstractMedium):
|
|
type = MEDIUM_BASE_CLASS
|
|
maxSize = MEDIA_MAX_SIZE
|
|
# noinspection PyTypeChecker
|
|
typeFallbackTo: Optional[type[Medium]] = None
|
|
typeFallbackAllowSelfUrls: bool = False
|
|
# noinspection PyTypeChecker
|
|
inputMediaExternalType: Optional[Union[type[InputMediaPhotoExternal], type[InputMediaDocumentExternal]]] = None
|
|
|
|
def __init__(self, urls: Union[str, list[str]], type_fallback_urls: Optional[Union[str, list[str]]] = None):
|
|
super().__init__()
|
|
urls = urls if isinstance(urls, list) else [urls]
|
|
# dedup, should not use a set because sequence is important
|
|
self.urls: list[str] = sorted(set(urls), key=urls.index)
|
|
self.original_urls: tuple[str, ...] = tuple(self.urls)
|
|
self.chosen_url: Optional[str] = self.urls[0]
|
|
self._server_change_count: int = 0
|
|
self.size = self.width = self.height = None
|
|
self.max_width = self.max_height = -1 # use for long pic judgment
|
|
self.type_fallback_urls: list[str] = type_fallback_urls if isinstance(type_fallback_urls, list) \
|
|
else [type_fallback_urls] if type_fallback_urls and isinstance(type_fallback_urls, str) \
|
|
else [] # use for fallback if not type_fallback_allow_self_urls
|
|
self.content_type: Optional[str] = None
|
|
|
|
def telegramize(self) -> Optional[Union[InputMediaPhotoExternal, InputMediaDocumentExternal]]:
|
|
if self.inputMediaExternalType is None:
|
|
raise NotImplementedError
|
|
return self.inputMediaExternalType(self.chosen_url)
|
|
|
|
def type_fallback_chain(self) -> Optional[Medium]:
|
|
return (
|
|
self
|
|
if self.valid
|
|
else
|
|
(self.type_fallback_medium.type_fallback_chain()
|
|
if self.need_type_fallback and self.type_fallback_medium is not None
|
|
else None)
|
|
) if not self.drop_silently else None
|
|
|
|
def get_link_html_node(self) -> Text:
|
|
url = self.original_urls[0]
|
|
if isAbsoluteHttpLink(url):
|
|
return Link(self.type, param=self.original_urls[0])
|
|
return Text([Text(f'{self.type} ('), Code(url), Text(')')])
|
|
|
|
async def validate(self, flush: bool = False, reason: Union[Exception, str] = None) -> bool:
|
|
def flushed_log():
|
|
logger.debug(f'Medium chosen URL ({self.describe}, formerly chosen: {formerly_chosen_url}) flushed'
|
|
+ (f': {type(reason).__name__} ({reason})'
|
|
if isinstance(reason, Exception)
|
|
else (f': {reason}' if reason else '')))
|
|
|
|
async with self.validating_lock:
|
|
if self.valid is not None and not flush: # already validated
|
|
return self.valid
|
|
|
|
if self.drop_silently:
|
|
return False
|
|
|
|
self.valid = False
|
|
formerly_chosen_url = self.chosen_url
|
|
|
|
invalid_reasons = []
|
|
if not self.urls:
|
|
invalid_reasons.append('no urls')
|
|
|
|
while self.urls:
|
|
url = self.urls.pop(0)
|
|
if not isAbsoluteHttpLink(url): # bypass non-http links
|
|
invalid_reasons.append('non-http link')
|
|
continue
|
|
if (
|
|
# let Telegram DC to determine the validity of media
|
|
env.LAZY_MEDIA_VALIDATION
|
|
# images from images.weserv.nl are considered always valid
|
|
# but if the dimension of the image has not been extracted yet, let it continue
|
|
or (env.TRAFFIC_SAVING
|
|
and url.startswith(env.IMAGES_WESERV_NL)
|
|
and min(self.max_width, self.max_height) != -1)
|
|
):
|
|
self.valid = True
|
|
self.chosen_url = url
|
|
self._server_change_count = 0
|
|
if flush:
|
|
flushed_log()
|
|
return True
|
|
medium_info = await web.get_medium_info(url)
|
|
if medium_info is None:
|
|
if url.startswith(env.IMAGES_WESERV_NL) or url.startswith(env.IMG_RELAY_SERVER):
|
|
invalid_reasons.append('fetch failed')
|
|
continue
|
|
medium_info = await web.get_medium_info(env.IMG_RELAY_SERVER + url)
|
|
if medium_info is None:
|
|
invalid_reasons.append('both original and relayed image fetch failed')
|
|
continue
|
|
self.size, self.width, self.height, self.content_type = medium_info
|
|
if self.type == IMAGE and self.size <= self.maxSize and min(self.width, self.height) == -1 \
|
|
and self.content_type and self.content_type.startswith('image') \
|
|
and all(keyword not in self.content_type for keyword in ('webp', 'svg', 'application')) \
|
|
and not url.startswith(env.IMAGES_WESERV_NL):
|
|
# enforcing dimension detection for images
|
|
self.width, self.height = await detect_image_dimension_via_images_weserv_nl(url)
|
|
self.max_width = max(self.max_width, self.width)
|
|
self.max_height = max(self.max_height, self.height)
|
|
|
|
if self.type == IMAGE:
|
|
# drop icons and emoticons
|
|
if 0 < self.width <= 30 or 0 < self.height < 30:
|
|
self.valid = False
|
|
self.drop_silently = True
|
|
return False
|
|
# force convert SVG to PNG
|
|
if (
|
|
self.content_type
|
|
and any(keyword in self.content_type for keyword in ('svg', 'application'))
|
|
):
|
|
# immediately fall back to 'images.weserv.nl'
|
|
self.urls = [url for url in self.urls if url.startswith(env.IMAGES_WESERV_NL)]
|
|
invalid_reasons.append('force convert SVG to PNG')
|
|
continue
|
|
# always invalid
|
|
if self.width + self.height > 10000:
|
|
invalid_reasons.append('width + height > 10000')
|
|
self.valid = False
|
|
# always invalid
|
|
elif self.size > self.maxSize:
|
|
invalid_reasons.append(f'size > {self.maxSize}')
|
|
self.valid = False
|
|
# Telegram accepts 0.05 < w/h < 20. But after downsized, it will be ugly. Narrow the range down
|
|
elif 0.4 <= self.width / self.height <= 2.5:
|
|
self.valid = True
|
|
elif (
|
|
# ensure the image is valid
|
|
0.05 < self.width / self.height < 20
|
|
and
|
|
# Telegram downsizes images to fit 1280x1280. If not downsized a lot, passing
|
|
0 < max(self.max_width, self.max_height) <= 1280 * 1.5
|
|
):
|
|
self.valid = True
|
|
# let long images fall back to file
|
|
else:
|
|
invalid_reasons.append('long image')
|
|
self.valid = False
|
|
self.urls = [] # clear the urls, force fall back to file
|
|
elif self.size <= self.maxSize: # valid
|
|
self.valid = True
|
|
else:
|
|
invalid_reasons.append(f'size > {self.maxSize}')
|
|
self.valid = False
|
|
|
|
# some images cannot be sent as file directly, if so, images.weserv.nl may help
|
|
if self.type == FILE and self.content_type and self.content_type.startswith('image') \
|
|
and not url.startswith(env.IMAGES_WESERV_NL):
|
|
self.urls.append(construct_images_weserv_nl_url_convert_to_jpg(url))
|
|
|
|
if self.valid:
|
|
self.chosen_url = url
|
|
if flush:
|
|
flushed_log()
|
|
self._server_change_count = 0
|
|
if isTelegramCannotFetch(self.chosen_url):
|
|
await self.change_server()
|
|
return True
|
|
|
|
if env.TRAFFIC_SAVING and min(self.max_width, self.max_height) != -1 and self.urls:
|
|
self.urls = [url for url in self.urls if url.startswith(env.IMAGES_WESERV_NL)]
|
|
|
|
self.valid = False
|
|
return await self.type_fallback(reason=reason or ', '.join(invalid_reasons))
|
|
|
|
async def type_fallback(self, reason: Union[Exception, str] = None) -> bool:
|
|
fallback_urls = self.type_fallback_urls + (list(self.original_urls) if self.typeFallbackAllowSelfUrls else [])
|
|
self.valid = False
|
|
if not self.need_type_fallback and self.type_fallback_medium is None and fallback_urls and self.typeFallbackTo:
|
|
# create type fallback medium
|
|
self.type_fallback_medium = self.typeFallbackTo(fallback_urls)
|
|
if await self.type_fallback_medium.validate():
|
|
logger.debug(
|
|
f"Medium ({self.describe}) type fallback to "
|
|
+ (
|
|
f'({self.type_fallback_medium.type})'
|
|
if self.typeFallbackAllowSelfUrls
|
|
else f'({self.type_fallback_medium.describe})'
|
|
)
|
|
+ (
|
|
f': {type(reason).__name__} ({reason})'
|
|
if isinstance(reason, Exception)
|
|
else (f': {reason}' if reason else '')
|
|
)
|
|
)
|
|
self.need_type_fallback = True
|
|
# self.type_fallback_medium.type = self.type
|
|
# self.type_fallback_medium.original_urls = self.original_urls
|
|
return True
|
|
elif self.need_type_fallback and self.type_fallback_medium is not None:
|
|
if await self.type_fallback_medium.fallback(reason=reason):
|
|
return True
|
|
self.need_type_fallback = False
|
|
logger.debug(
|
|
f'Dropped medium ({self.describe}): '
|
|
+ (
|
|
f'{type(reason).__name__} ({reason})'
|
|
if isinstance(reason, Exception)
|
|
else reason or 'invalid or fetch failed'
|
|
)
|
|
)
|
|
return False
|
|
|
|
async def fallback(self, reason: Union[Exception, str] = None) -> bool:
|
|
if self.need_type_fallback:
|
|
await self.type_fallback(reason=reason)
|
|
return True
|
|
urls_len = len(self.urls)
|
|
formerly_valid = self.valid
|
|
if formerly_valid is False:
|
|
return False
|
|
await self.validate(flush=True, reason=reason)
|
|
return (self.valid != formerly_valid
|
|
or (self.valid and urls_len != len(self.urls))
|
|
or self.need_type_fallback)
|
|
|
|
async def change_server(self) -> bool:
|
|
if self._server_change_count >= 1:
|
|
return False
|
|
self._server_change_count += 1
|
|
self.chosen_url = env.IMG_RELAY_SERVER + self.chosen_url
|
|
if not env.TRAFFIC_SAVING:
|
|
# noinspection PyBroadException
|
|
try:
|
|
await web.get(url=self.chosen_url, semaphore=False, max_size=0) # let the img relay sever cache the img
|
|
except Exception:
|
|
pass
|
|
return True
|
|
|
|
def __bool__(self):
|
|
if self.valid is None:
|
|
raise RuntimeError('You must validate a medium before judging its validation')
|
|
return self.valid
|
|
|
|
def __eq__(self, other):
|
|
return type(self) == type(other) and set(self.original_urls) == set(other.original_urls)
|
|
|
|
@property
|
|
def hash(self) -> str:
|
|
if self.drop_silently:
|
|
return ''
|
|
return '|'.join(
|
|
str(s) for s in (self.valid,
|
|
self.chosen_url,
|
|
self.need_type_fallback,
|
|
self.type_fallback_medium.hash if self.need_type_fallback else None)
|
|
)
|
|
|
|
@property
|
|
def info(self) -> str:
|
|
return (
|
|
f'{self.type}, '
|
|
+ (f'{self.size / 1024 / 1024:.2f}MB, '
|
|
if self.size not in {-1, None}
|
|
else '')
|
|
+ (f'{self.width}x{self.height}'
|
|
if self.width not in {-1, None} and self.height not in {-1, None}
|
|
else '')
|
|
).rstrip(', ')
|
|
|
|
@property
|
|
def describe(self) -> str:
|
|
return (
|
|
f'{self.info}, '
|
|
+ (f'{len(self.original_urls)}URLs, ' if len(self.original_urls) > 1 else '')
|
|
+ f'{self.original_urls[0]}, '
|
|
+ (f'chosen: {self.chosen_url}' if self.chosen_url and self.chosen_url != self.original_urls[0] else '')
|
|
).rstrip(', ')
|
|
|
|
|
|
class File(Medium):
|
|
type = FILE
|
|
maxSize = MEDIA_MAX_SIZE
|
|
typeFallbackTo = None
|
|
typeFallbackAllowSelfUrls = False
|
|
inputMediaExternalType = InputMediaDocumentExternal
|
|
|
|
|
|
class Image(Medium):
|
|
type = IMAGE
|
|
maxSize = IMAGE_MAX_SIZE
|
|
typeFallbackTo = File
|
|
typeFallbackAllowSelfUrls = True
|
|
inputMediaExternalType = InputMediaPhotoExternal
|
|
|
|
def __init__(self, urls: Union[str, list[str]]):
|
|
super().__init__(urls)
|
|
new_urls = []
|
|
for url in self.urls:
|
|
sinaimg_match = sinaimg_size_parser(url)
|
|
pixiv_match = pixiv_size_parser(url)
|
|
if not any([sinaimg_match, pixiv_match]):
|
|
new_urls.append(url)
|
|
continue
|
|
if sinaimg_match:
|
|
parsed_sinaimg = sinaimg_match.groupdict() # is a sinaimg img
|
|
for size_name in sinaimg_sizes:
|
|
new_url = parsed_sinaimg['domain'] + size_name + parsed_sinaimg['filename']
|
|
if new_url not in new_urls:
|
|
new_urls.append(new_url)
|
|
elif pixiv_match:
|
|
parsed_pixiv = pixiv_match.groupdict() # is a pixiv img
|
|
for size_name in pixiv_sizes:
|
|
new_url = parsed_pixiv['url_prefix'] + size_name + parsed_pixiv['url_infix'] \
|
|
+ parsed_pixiv['filename'] \
|
|
+ ('_master1200.jpg' if size_name == 'master' else parsed_pixiv['file_ext'])
|
|
if new_url not in new_urls:
|
|
new_urls.append(new_url)
|
|
if url not in new_urls:
|
|
new_urls.append(url)
|
|
self.urls = new_urls
|
|
urls_not_images_weserv_nl = [url for url in self.urls if not url.startswith(env.IMAGES_WESERV_NL)]
|
|
self.urls.extend(construct_images_weserv_nl_url(urls_not_images_weserv_nl[i])
|
|
for i in range(min(len(urls_not_images_weserv_nl), 3))) # use for final fallback
|
|
self.chosen_url = self.urls[0]
|
|
|
|
async def change_server(self) -> bool:
|
|
sinaimg_server_match = sinaimg_server_parser(self.chosen_url)
|
|
if not sinaimg_server_match: # is not a sinaimg img
|
|
return await super().change_server()
|
|
|
|
if self._server_change_count >= 1:
|
|
return False
|
|
self._server_change_count += 1
|
|
parsed = sinaimg_server_match.groupdict()
|
|
new_server_id = int(parsed['server_id']) + 1
|
|
if new_server_id > 4:
|
|
new_server_id = 1
|
|
self.chosen_url = f"{parsed['url_prefix']}{new_server_id}{parsed['url_suffix']}"
|
|
return True
|
|
|
|
|
|
class Video(Medium):
|
|
type = VIDEO
|
|
maxSize = MEDIA_MAX_SIZE
|
|
typeFallbackTo = Image
|
|
typeFallbackAllowSelfUrls = False
|
|
inputMediaExternalType = InputMediaDocumentExternal
|
|
|
|
|
|
class Audio(Medium):
|
|
type = AUDIO
|
|
maxSize = MEDIA_MAX_SIZE
|
|
typeFallbackTo = None
|
|
typeFallbackAllowSelfUrls = False
|
|
inputMediaExternalType = InputMediaDocumentExternal
|
|
|
|
def __init__(self, urls: Union[str, list[str]]):
|
|
super().__init__(urls)
|
|
new_urls = []
|
|
for url in self.urls:
|
|
lizhi_match = lizhi_parser(url)
|
|
if not lizhi_match:
|
|
new_urls.append(url)
|
|
continue
|
|
parsed_lizhi = lizhi_match.groupdict() # is a pixiv img
|
|
for size_suffix in lizhi_sizes:
|
|
new_url = parsed_lizhi['url_prefix'] + parsed_lizhi['server_id'] + parsed_lizhi['url_infix'] \
|
|
+ size_suffix
|
|
if new_url not in new_urls:
|
|
new_urls.append(new_url)
|
|
if url not in new_urls:
|
|
new_urls.append(url)
|
|
self.urls = new_urls
|
|
|
|
async def change_server(self) -> bool:
|
|
lizhi_match = lizhi_parser(self.chosen_url)
|
|
if not lizhi_match: # is not a lizhi audio
|
|
return await super().change_server()
|
|
|
|
if self._server_change_count >= 1:
|
|
return False
|
|
self._server_change_count += 1
|
|
parsed = lizhi_match.groupdict()
|
|
server_id = parsed['server_id']
|
|
new_server_id = lizhi_server_id[lizhi_server_id.index(server_id) - 1] \
|
|
if server_id in lizhi_server_id else lizhi_server_id[0]
|
|
self.chosen_url = f"{parsed['url_prefix']}{new_server_id}{parsed['url_infix']}{parsed['size_suffix']}"
|
|
return True
|
|
|
|
|
|
class Animation(Image):
|
|
type = ANIMATION
|
|
maxSize = MEDIA_MAX_SIZE
|
|
# typeFallbackTo = Image
|
|
# typeFallbackAllowSelfUrls = True
|
|
typeFallbackTo = None
|
|
typeFallbackAllowSelfUrls = False
|
|
inputMediaExternalType = InputMediaDocumentExternal
|
|
|
|
|
|
class UploadedImage(AbstractMedium):
|
|
type: str = IMAGE
|
|
|
|
def __init__(self, file: Union[bytes, BytesIO, Callable, Awaitable]):
|
|
super().__init__()
|
|
self.file = file
|
|
self.uploaded_file: Union[InputFile, InputFileBig, None] = None
|
|
|
|
def telegramize(self) -> Optional[InputMediaUploadedPhoto]:
|
|
if self.valid is None:
|
|
raise RuntimeError('validate() must be called before telegramize()')
|
|
if self.uploaded_file:
|
|
return InputMediaUploadedPhoto(self.uploaded_file)
|
|
return None
|
|
|
|
@property
|
|
def hash(self) -> str:
|
|
return str(hash(self.file))
|
|
|
|
@property
|
|
def drop_silently(self):
|
|
return False if self.valid is None else not self.valid
|
|
|
|
@drop_silently.setter
|
|
def drop_silently(self, value):
|
|
if not value and self.valid is None:
|
|
return
|
|
self.valid = not value
|
|
|
|
def type_fallback_chain(self) -> Optional[UploadedImage]:
|
|
return self if not self.drop_silently and self.valid else None
|
|
|
|
def get_link_html_node(self) -> None:
|
|
return None
|
|
|
|
async def fallback(self, reason: Union[Exception, str] = None) -> bool:
|
|
if self.valid:
|
|
self.valid = False
|
|
logger.debug(
|
|
f'Dropped uploaded medium ({self.describe})'
|
|
+ (
|
|
f': {type(reason).__name__} ({reason})'
|
|
if isinstance(reason, Exception)
|
|
else (f': {reason}' if reason else '')
|
|
)
|
|
)
|
|
return True
|
|
return False
|
|
|
|
change_server = fallback
|
|
|
|
async def validate(self, flush: bool = False, *_, **__) -> bool:
|
|
if flush and self.valid:
|
|
self.valid = False
|
|
return False
|
|
if not flush and self.valid is not None:
|
|
return self.valid
|
|
async with self.validating_lock:
|
|
if not flush and self.valid is not None:
|
|
return self.valid
|
|
try:
|
|
try:
|
|
callable_file = self.file
|
|
if isinstance(self.file, Awaitable):
|
|
self.file = await callable_file
|
|
elif isinstance(self.file, Callable):
|
|
self.file = callable_file()
|
|
except Exception as e:
|
|
self.valid = False
|
|
logger.error(f'Failed to generate file for {callable_file.__name__}', exc_info=e)
|
|
if not isinstance(self.file, (bytes, BytesIO)):
|
|
raise ValueError(f'File must be bytes or BytesIO, got {type(self.file)}')
|
|
if isinstance(self.file, BytesIO):
|
|
self.file.seek(0)
|
|
self.uploaded_file = await env.bot.upload_file(self.file)
|
|
if isinstance(self.file, BytesIO):
|
|
self.file.close()
|
|
self.valid = True
|
|
except (BadRequestError, ValueError) as e:
|
|
logger.debug(f'Failed to upload file ({self.describe})', exc_info=e)
|
|
self.valid = False
|
|
return self.valid
|
|
|
|
@property
|
|
def info(self) -> str:
|
|
return f'{len(self.file) / 1024 / 1024:.2f}MB' if self.file else 'Pending'
|
|
|
|
@property
|
|
def describe(self) -> str:
|
|
return self.info
|
|
|
|
|
|
class Media:
|
|
def __init__(self):
|
|
self._media: list[AbstractMedium] = []
|
|
self.modify_lock = asyncio.Lock()
|
|
self.allow_mixing_images_and_videos: bool = True
|
|
self.consider_videos_as_gifs: bool = False
|
|
self.allow_files_sent_as_album: bool = True
|
|
|
|
def add(self, medium: AbstractMedium):
|
|
if medium in self._media:
|
|
return
|
|
self._media.append(medium)
|
|
|
|
def url_exists(self, url: str, loose: bool = False) -> Optional[Medium]:
|
|
# must check if medium is Medium and not UploadedImage
|
|
if not loose:
|
|
return next(
|
|
(
|
|
medium
|
|
for medium in self._media
|
|
if isinstance(medium, Medium) and url in medium.original_urls
|
|
),
|
|
None,
|
|
)
|
|
url_obj = urlparse(url)
|
|
# magnet:?xt=... -> (scheme='magnet', netloc='', path='', params='', query='xt=...', fragment='')
|
|
url_part = (url_obj.netloc + url_obj.path) or url
|
|
for medium in self._media:
|
|
if not isinstance(medium, Medium):
|
|
continue
|
|
for original_url in medium.original_urls:
|
|
if url_part in original_url:
|
|
return medium
|
|
return None
|
|
|
|
async def fallback_all(self) -> bool:
|
|
if not self._media:
|
|
return False
|
|
fallback_flag = False
|
|
for medium in self._media:
|
|
if not medium.drop_silently and await medium.fallback():
|
|
fallback_flag = True
|
|
return fallback_flag
|
|
|
|
def invalidate_all(self) -> bool:
|
|
invalidated_some_flag = False
|
|
for medium in self._media:
|
|
if not medium.drop_silently and medium.valid or medium.need_type_fallback:
|
|
medium.valid = False
|
|
medium.need_type_fallback = False
|
|
invalidated_some_flag = True
|
|
return invalidated_some_flag
|
|
|
|
async def validate(self, flush: bool = False):
|
|
if not self._media:
|
|
return
|
|
await asyncio.gather(*(medium.validate(flush=flush) for medium in self._media if not medium.drop_silently))
|
|
|
|
async def upload_all(
|
|
self, chat_id: Optional[int]
|
|
) -> tuple[
|
|
list[
|
|
tuple[
|
|
Union[
|
|
tuple[
|
|
Union[
|
|
TypeMessageMedia, # uploaded media
|
|
Medium, # origin media (if chat_id is None)
|
|
],
|
|
...,
|
|
], # uploaded media list of the media group
|
|
Union[
|
|
TypeMessageMedia, # uploaded media
|
|
Medium, # origin media (if chat_id is None)
|
|
],
|
|
],
|
|
TypeMessage, # message type
|
|
]
|
|
],
|
|
Optional[HtmlTree],
|
|
]:
|
|
"""
|
|
Upload all media to telegram.
|
|
:param chat_id: chat_id to upload to. If None, the origin media will be returned.
|
|
:return: ((uploaded/original medium, medium type), invalid media html node)
|
|
"""
|
|
await self.validate()
|
|
|
|
media_and_types: tuple[
|
|
Union[tuple[Union[TypeMessageMedia, Medium, None], Optional[TypeMedium]], BaseException],
|
|
...]
|
|
if chat_id:
|
|
# tuple[Union[tuple[Optional[TypeMessageMedia], Optional[TypeMedium]], BaseException], ...]
|
|
media_and_types = await asyncio.gather(
|
|
*(medium.upload(chat_id) for medium in self._media if not medium.drop_silently),
|
|
return_exceptions=True
|
|
)
|
|
else:
|
|
# tuple[tuple[Optional[Medium], Optional[TypeMedium]], ...]
|
|
media_and_types = tuple((medium.type_fallback_chain(), medium.type_fallback_chain().type)
|
|
if medium.type_fallback_chain() is not None
|
|
else (None, None)
|
|
for medium in self._media if not medium.drop_silently)
|
|
|
|
media: list[tuple[Union[TypeMessageMedia, Image, Video], Union[IMAGE, VIDEO]]] = []
|
|
images: list[tuple[Union[MessageMediaPhoto, Image], Union[IMAGE]]] = []
|
|
videos: list[tuple[Union[MessageMediaDocument, Video], Union[VIDEO]]] = []
|
|
gifs: list[tuple[Union[MessageMediaDocument, Animation], ANIMATION]] = []
|
|
audios: list[tuple[Union[MessageMediaDocument, Audio], AUDIO]] = []
|
|
files: list[tuple[Union[MessageMediaDocument, File], FILE]] = []
|
|
|
|
link_nodes: list[Text] = []
|
|
for medium, medium_and_type in zip(self._media, media_and_types):
|
|
if isinstance(medium_and_type, Exception):
|
|
if type(medium_and_type) in UserBlockedErrors: # user blocked, let it go
|
|
raise medium_and_type
|
|
logger.debug('Upload media failed:', exc_info=medium_and_type)
|
|
link_nodes.append(medium.get_link_html_node())
|
|
continue
|
|
file, file_type = medium_and_type
|
|
if file_type == IMAGE:
|
|
media.append(medium_and_type)
|
|
images.append(medium_and_type)
|
|
elif file_type == VIDEO:
|
|
media.append(medium_and_type)
|
|
videos.append(medium_and_type)
|
|
elif file_type == ANIMATION:
|
|
gifs.append(medium_and_type)
|
|
elif file_type == AUDIO:
|
|
audios.append(medium_and_type)
|
|
elif file_type == FILE:
|
|
files.append(medium_and_type)
|
|
else:
|
|
link_nodes.append(medium.get_link_html_node())
|
|
if file_type in {IMAGE, FILE} and isinstance(medium, Video) and file_type != medium.type:
|
|
link_nodes.append(medium.get_link_html_node())
|
|
|
|
ret = []
|
|
allow_in_group = (
|
|
((media,) if self.allow_mixing_images_and_videos and not self.consider_videos_as_gifs else (images,))
|
|
+ (tuple() if self.consider_videos_as_gifs or self.allow_mixing_images_and_videos else (videos,))
|
|
+ (audios,)
|
|
+ ((files,) if self.allow_files_sent_as_album else tuple())
|
|
)
|
|
disallow_in_group = (
|
|
((videos,) if self.consider_videos_as_gifs else tuple())
|
|
+ (gifs,)
|
|
+ (tuple() if self.allow_files_sent_as_album else (files,))
|
|
)
|
|
for list_to_process in allow_in_group:
|
|
while list_to_process:
|
|
_ = list_to_process[:10]
|
|
list_to_process = list_to_process[10:]
|
|
if len(_) == 1:
|
|
ret.append(_[0])
|
|
else:
|
|
# media group
|
|
media_group = tuple(medium_and_type[0] for medium_and_type in _)
|
|
ret.append((media_group, MEDIA_GROUP))
|
|
for list_to_process in disallow_in_group:
|
|
ret.extend(list_to_process)
|
|
|
|
html_nodes = []
|
|
invalid_html_node: Optional[HtmlTree] = None
|
|
for link in link_nodes:
|
|
if not link:
|
|
continue
|
|
html_nodes.extend((link, Br()))
|
|
if html_nodes:
|
|
html_nodes.pop()
|
|
html_nodes.insert(0, Text('Invalid media:\n'))
|
|
invalid_html_node = HtmlTree(html_nodes)
|
|
|
|
return ret, invalid_html_node
|
|
|
|
async def estimate_message_counts(self):
|
|
media = await self.upload_all(chat_id=None)
|
|
return sum(1 for _ in media[0])
|
|
|
|
def __len__(self) -> int:
|
|
return sum(not medium.drop_silently for medium in self._media)
|
|
|
|
def __bool__(self) -> bool:
|
|
return any(not medium.drop_silently for medium in self._media)
|
|
|
|
@property
|
|
def valid_count(self):
|
|
return sum(medium.valid is True and not medium.drop_silently for medium in self._media)
|
|
|
|
@property
|
|
def invalid_count(self):
|
|
return sum(medium.valid is False and not medium.drop_silently for medium in self._media)
|
|
|
|
@property
|
|
def pending_count(self):
|
|
return sum(medium.valid is None and not medium.drop_silently for medium in self._media)
|
|
|
|
@property
|
|
def need_type_fallback_count(self):
|
|
return sum(
|
|
medium.need_type_fallback
|
|
and medium.type_fallback_medium is not None
|
|
and not medium.drop_silently
|
|
for medium in self._media
|
|
)
|
|
|
|
def stat(self):
|
|
class MediaStat:
|
|
valid = self.valid_count
|
|
invalid = self.invalid_count
|
|
pending = self.pending_count
|
|
need_type_fallback = self.need_type_fallback_count
|
|
|
|
def __eq__(self, other):
|
|
return isinstance(self, other) and self.valid == other.valid and self.invalid == other.invalid \
|
|
and self.pending == other.pending and self.need_type_fallback == other.need_type_fallback
|
|
|
|
return MediaStat()
|
|
|
|
@property
|
|
def hash(self):
|
|
return '|'.join(medium.hash for medium in self._media)
|
|
|
|
|
|
def construct_images_weserv_nl_url(url: str,
|
|
width: Optional[int] = 2560,
|
|
height: Optional[int] = 2560,
|
|
fit: Optional[str] = 'inside',
|
|
output_format: Optional[str] = 'png',
|
|
without_enlargement: Optional[bool] = True,
|
|
default_image: Optional[str] = None) -> str:
|
|
params = {
|
|
'url': url,
|
|
'w': width,
|
|
'h': height,
|
|
'fit': fit,
|
|
'output': output_format,
|
|
'we': '1' if without_enlargement else None,
|
|
'default': default_image,
|
|
}
|
|
filtered_params = {k: v for k, v in params.items() if v is not None}
|
|
query_string = urlencode(filtered_params)
|
|
return f'{env.IMAGES_WESERV_NL}?{query_string}'
|
|
|
|
|
|
def construct_images_weserv_nl_url_convert_to_jpg(url: str) -> str:
|
|
return construct_images_weserv_nl_url(url, width=None, height=None, fit=None, without_enlargement=None,
|
|
output_format='jpg')
|
|
|
|
|
|
async def detect_image_dimension_via_images_weserv_nl(url: str) -> tuple[int, int]:
|
|
url = construct_images_weserv_nl_url_convert_to_jpg(url)
|
|
res = await web.get_medium_info(url)
|
|
if not res:
|
|
return -1, -1
|
|
_, width, height, _ = res
|
|
return width, height
|