From 9115a44be687e627a2cbac7a6ffe5cbabef9a547 Mon Sep 17 00:00:00 2001 From: Alex Root Junior Date: Wed, 1 Jan 2020 16:39:31 +0200 Subject: [PATCH] Backport of text decoration utils from 3.0 --- aiogram/bot/base.py | 5 + aiogram/types/message.py | 35 +------ aiogram/types/message_entity.py | 10 +- aiogram/utils/markdown.py | 158 ++++++++++++++++-------------- aiogram/utils/text_decorations.py | 143 +++++++++++++++++++++++++++ 5 files changed, 242 insertions(+), 109 deletions(-) create mode 100644 aiogram/utils/text_decorations.py diff --git a/aiogram/bot/base.py b/aiogram/bot/base.py index 8cc64e33..0b7468be 100644 --- a/aiogram/bot/base.py +++ b/aiogram/bot/base.py @@ -3,6 +3,7 @@ import contextlib import io import ssl import typing +import warnings from contextvars import ContextVar from typing import Dict, List, Optional, Union @@ -269,6 +270,10 @@ class BaseBot: if value not in ParseMode.all(): raise ValueError(f"Parse mode must be one of {ParseMode.all()}") setattr(self, '_parse_mode', value) + if value == 'markdown': + warnings.warn("Parse mode `Markdown` is legacy since Telegram Bot API 4.5, " + "retained for backward compatibility. Use `MarkdownV2` instead.\n" + "https://core.telegram.org/bots/api#markdown-style", stacklevel=3) @parse_mode.deleter def parse_mode(self): diff --git a/aiogram/types/message.py b/aiogram/types/message.py index 8fdece2b..dbe35738 100644 --- a/aiogram/types/message.py +++ b/aiogram/types/message.py @@ -2,7 +2,6 @@ from __future__ import annotations import datetime import functools -import sys import typing from . import base @@ -32,6 +31,7 @@ from .video_note import VideoNote from .voice import Voice from ..utils import helper from ..utils import markdown as md +from ..utils.text_decorations import html_decoration, markdown_decoration class Message(base.TelegramObject): @@ -200,38 +200,10 @@ class Message(base.TelegramObject): if text is None: raise TypeError("This message doesn't have any text.") - quote_fn = md.quote_html if as_html else md.escape_md - entities = self.entities or self.caption_entities - if not entities: - return quote_fn(text) + text_decorator = html_decoration if as_html else markdown_decoration - if not sys.maxunicode == 0xffff: - text = text.encode('utf-16-le') - - result = '' - offset = 0 - - for entity in sorted(entities, key=lambda item: item.offset): - entity_text = entity.parse(text, as_html=as_html) - - if sys.maxunicode == 0xffff: - part = text[offset:entity.offset] - result += quote_fn(part) + entity_text - else: - part = text[offset * 2:entity.offset * 2] - result += quote_fn(part.decode('utf-16-le')) + entity_text - - offset = entity.offset + entity.length - - if sys.maxunicode == 0xffff: - part = text[offset:] - result += quote_fn(part) - else: - part = text[offset * 2:] - result += quote_fn(part.decode('utf-16-le')) - - return result + return text_decorator.unparse(text, entities) @property def md_text(self) -> str: @@ -1798,4 +1770,5 @@ class ParseMode(helper.Helper): mode = helper.HelperMode.lowercase MARKDOWN = helper.Item() + MARKDOWN_V2 = helper.Item() HTML = helper.Item() diff --git a/aiogram/types/message_entity.py b/aiogram/types/message_entity.py index f0ad75d6..98191e43 100644 --- a/aiogram/types/message_entity.py +++ b/aiogram/types/message_entity.py @@ -4,6 +4,7 @@ from . import base from . import fields from .user import User from ..utils import helper, markdown +from ..utils.deprecated import deprecated class MessageEntity(base.TelegramObject): @@ -36,6 +37,7 @@ class MessageEntity(base.TelegramObject): entity_text = entity_text[self.offset * 2:(self.offset + self.length) * 2] return entity_text.decode('utf-16-le') + @deprecated("This method doesn't work with nested entities and will be removed in aiogram 3.0") def parse(self, text, as_html=True): """ Get entity value with markup @@ -87,6 +89,8 @@ class MessageEntityType(helper.Helper): :key: ITALIC :key: CODE :key: PRE + :key: UNDERLINE + :key: STRIKETHROUGH :key: TEXT_LINK :key: TEXT_MENTION """ @@ -101,7 +105,9 @@ class MessageEntityType(helper.Helper): PHONE_NUMBER = helper.Item() # phone_number BOLD = helper.Item() # bold - bold text ITALIC = helper.Item() # italic - italic text - CODE = helper.Item() # code - monowidth string - PRE = helper.Item() # pre - monowidth block + CODE = helper.Item() # code - monowidth string + PRE = helper.Item() # pre - monowidth block + UNDERLINE = helper.Item() # underline + STRIKETHROUGH = helper.Item() # strikethrough TEXT_LINK = helper.Item() # text_link - for clickable text URLs TEXT_MENTION = helper.Item() # text_mention - for users without usernames diff --git a/aiogram/utils/markdown.py b/aiogram/utils/markdown.py index 89a23d94..7b217b4f 100644 --- a/aiogram/utils/markdown.py +++ b/aiogram/utils/markdown.py @@ -1,59 +1,28 @@ -LIST_MD_SYMBOLS = '*_`[' +from .text_decorations import html_decoration, markdown_decoration + +LIST_MD_SYMBOLS = "*_`[" MD_SYMBOLS = ( (LIST_MD_SYMBOLS[0], LIST_MD_SYMBOLS[0]), (LIST_MD_SYMBOLS[1], LIST_MD_SYMBOLS[1]), (LIST_MD_SYMBOLS[2], LIST_MD_SYMBOLS[2]), - (LIST_MD_SYMBOLS[2] * 3 + '\n', '\n' + LIST_MD_SYMBOLS[2] * 3), - ('', ''), - ('', ''), - ('', ''), - ('
', '
'), + (LIST_MD_SYMBOLS[2] * 3 + "\n", "\n" + LIST_MD_SYMBOLS[2] * 3), + ("", ""), + ("", ""), + ("", ""), + ("
", "
"), ) -HTML_QUOTES_MAP = { - '<': '<', - '>': '>', - '&': '&', - '"': '"' -} +HTML_QUOTES_MAP = {"<": "<", ">": ">", "&": "&", '"': """} _HQS = HTML_QUOTES_MAP.keys() # HQS for HTML QUOTES SYMBOLS -def _join(*content, sep=' '): +def _join(*content, sep=" "): return sep.join(map(str, content)) -def _escape(s, symbols=LIST_MD_SYMBOLS): - for symbol in symbols: - s = s.replace(symbol, '\\' + symbol) - return s - - -def _md(string, symbols=('', '')): - start, end = symbols - return start + string + end - - -def quote_html(content): - """ - Quote HTML symbols - - All <, >, & and " symbols that are not a part of a tag or - an HTML entity must be replaced with the corresponding HTML entities - (< with < > with > & with & and " with "). - - :param content: str - :return: str - """ - new_content = '' - for symbol in content: - new_content += HTML_QUOTES_MAP[symbol] if symbol in _HQS else symbol - return new_content - - -def text(*content, sep=' '): +def text(*content, sep=" "): """ Join all elements with a separator @@ -64,7 +33,7 @@ def text(*content, sep=' '): return _join(*content, sep=sep) -def bold(*content, sep=' '): +def bold(*content, sep=" "): """ Make bold text (Markdown) @@ -72,10 +41,10 @@ def bold(*content, sep=' '): :param sep: :return: """ - return _md(_join(*content, sep=sep), symbols=MD_SYMBOLS[0]) + return markdown_decoration.bold.format(value=html_decoration.quote(_join(*content, sep=sep))) -def hbold(*content, sep=' '): +def hbold(*content, sep=" "): """ Make bold text (HTML) @@ -83,10 +52,10 @@ def hbold(*content, sep=' '): :param sep: :return: """ - return _md(quote_html(_join(*content, sep=sep)), symbols=MD_SYMBOLS[4]) + return html_decoration.bold.format(value=html_decoration.quote(_join(*content, sep=sep))) -def italic(*content, sep=' '): +def italic(*content, sep=" "): """ Make italic text (Markdown) @@ -94,10 +63,10 @@ def italic(*content, sep=' '): :param sep: :return: """ - return _md(_join(*content, sep=sep), symbols=MD_SYMBOLS[1]) + return markdown_decoration.italic.format(value=html_decoration.quote(_join(*content, sep=sep))) -def hitalic(*content, sep=' '): +def hitalic(*content, sep=" "): """ Make italic text (HTML) @@ -105,10 +74,10 @@ def hitalic(*content, sep=' '): :param sep: :return: """ - return _md(quote_html(_join(*content, sep=sep)), symbols=MD_SYMBOLS[5]) + return html_decoration.italic.format(value=html_decoration.quote(_join(*content, sep=sep))) -def code(*content, sep=' '): +def code(*content, sep=" "): """ Make mono-width text (Markdown) @@ -116,10 +85,10 @@ def code(*content, sep=' '): :param sep: :return: """ - return _md(_join(*content, sep=sep), symbols=MD_SYMBOLS[2]) + return markdown_decoration.code.format(value=html_decoration.quote(_join(*content, sep=sep))) -def hcode(*content, sep=' '): +def hcode(*content, sep=" "): """ Make mono-width text (HTML) @@ -127,10 +96,10 @@ def hcode(*content, sep=' '): :param sep: :return: """ - return _md(quote_html(_join(*content, sep=sep)), symbols=MD_SYMBOLS[6]) + return html_decoration.code.format(value=html_decoration.quote(_join(*content, sep=sep))) -def pre(*content, sep='\n'): +def pre(*content, sep="\n"): """ Make mono-width text block (Markdown) @@ -138,10 +107,10 @@ def pre(*content, sep='\n'): :param sep: :return: """ - return _md(_join(*content, sep=sep), symbols=MD_SYMBOLS[3]) + return markdown_decoration.pre.format(value=html_decoration.quote(_join(*content, sep=sep))) -def hpre(*content, sep='\n'): +def hpre(*content, sep="\n"): """ Make mono-width text block (HTML) @@ -149,10 +118,60 @@ def hpre(*content, sep='\n'): :param sep: :return: """ - return _md(quote_html(_join(*content, sep=sep)), symbols=MD_SYMBOLS[7]) + return html_decoration.pre.format(value=html_decoration.quote(_join(*content, sep=sep))) -def link(title, url): +def underline(*content, sep=" "): + """ + Make underlined text (Markdown) + + :param content: + :param sep: + :return: + """ + return markdown_decoration.underline.format( + value=markdown_decoration.quote(_join(*content, sep=sep)) + ) + + +def hunderline(*content, sep=" "): + """ + Make underlined text (HTML) + + :param content: + :param sep: + :return: + """ + return html_decoration.underline.format(value=html_decoration.quote(_join(*content, sep=sep))) + + +def strikethrough(*content, sep=" "): + """ + Make strikethrough text (Markdown) + + :param content: + :param sep: + :return: + """ + return markdown_decoration.strikethrough.format( + value=markdown_decoration.quote(_join(*content, sep=sep)) + ) + + +def hstrikethrough(*content, sep=" "): + """ + Make strikethrough text (HTML) + + :param content: + :param sep: + :return: + """ + return html_decoration.strikethrough.format( + value=html_decoration.quote(_join(*content, sep=sep)) + ) + + +def link(title: str, url: str) -> str: """ Format URL (Markdown) @@ -160,10 +179,10 @@ def link(title, url): :param url: :return: """ - return "[{0}]({1})".format(title, url) + return markdown_decoration.link.format(value=html_decoration.quote(title), link=url) -def hlink(title, url): +def hlink(title: str, url: str) -> str: """ Format URL (HTML) @@ -171,23 +190,10 @@ def hlink(title, url): :param url: :return: """ - return '{1}'.format(url, quote_html(title)) + return html_decoration.link.format(value=html_decoration.quote(title), link=url) -def escape_md(*content, sep=' '): - """ - Escape markdown text - - E.g. for usernames - - :param content: - :param sep: - :return: - """ - return _escape(_join(*content, sep=sep)) - - -def hide_link(url): +def hide_link(url: str) -> str: """ Hide URL (HTML only) Can be used for adding an image to a text message diff --git a/aiogram/utils/text_decorations.py b/aiogram/utils/text_decorations.py new file mode 100644 index 00000000..5b2cf51c --- /dev/null +++ b/aiogram/utils/text_decorations.py @@ -0,0 +1,143 @@ +from __future__ import annotations +import html +import re +import struct +from dataclasses import dataclass +from typing import TYPE_CHECKING, AnyStr, Callable, Generator, Iterable, List, Optional + +if TYPE_CHECKING: + from aiogram.types import MessageEntity + +__all__ = ( + "TextDecoration", + "html_decoration", + "markdown_decoration", + "add_surrogate", + "remove_surrogate", +) + + +@dataclass +class TextDecoration: + link: str + bold: str + italic: str + code: str + pre: str + underline: str + strikethrough: str + quote: Callable[[AnyStr], AnyStr] + + def apply_entity(self, entity: MessageEntity, text: str) -> str: + """ + Apply single entity to text + + :param entity: + :param text: + :return: + """ + if entity.type in ( + "bold", + "italic", + "code", + "pre", + "underline", + "strikethrough", + ): + return getattr(self, entity.type).format(value=text) + elif entity.type == "text_mention": + return self.link.format(value=text, link=f"tg://user?id={entity.user.id}") + elif entity.type == "text_link": + return self.link.format(value=text, link=entity.url) + elif entity.type == "url": + return text + return self.quote(text) + + def unparse(self, text, entities: Optional[List[MessageEntity]] = None) -> str: + """ + Unparse message entities + + :param text: raw text + :param entities: Array of MessageEntities + :return: + """ + text = add_surrogate(text) + result = "".join( + self._unparse_entities( + text, sorted(entities, key=lambda item: item.offset) if entities else [] + ) + ) + return remove_surrogate(result) + + def _unparse_entities( + self, + text: str, + entities: Iterable[MessageEntity], + offset: Optional[int] = None, + length: Optional[int] = None, + ) -> Generator[str, None, None]: + offset = offset or 0 + length = length or len(text) + + for index, entity in enumerate(entities): + if entity.offset < offset: + continue + if entity.offset > offset: + yield self.quote(text[offset : entity.offset]) + start = entity.offset + offset = entity.offset + entity.length + + sub_entities = list( + filter(lambda e: e.offset < offset, entities[index + 1 :]) + ) + yield self.apply_entity( + entity, + "".join( + self._unparse_entities( + text, sub_entities, offset=start, length=offset + ) + ), + ) + + if offset < length: + yield self.quote(text[offset:length]) + + +html_decoration = TextDecoration( + link='{value}', + bold="{value}", + italic="{value}", + code="{value}", + pre="
{value}
", + underline="{value}", + strikethrough="{value}", + quote=html.escape, +) + +MARKDOWN_QUOTE_PATTERN = re.compile(r"([_*\[\]()~`>#+\-|{}.!])") + +markdown_decoration = TextDecoration( + link="[{value}]({link})", + bold="*{value}*", + italic="_{value}_\r", + code="`{value}`", + pre="```{value}```", + underline="__{value}__", + strikethrough="~{value}~", + quote=lambda text: re.sub( + pattern=MARKDOWN_QUOTE_PATTERN, repl=r"\\\1", string=text + ), +) + + +def add_surrogate(text: str) -> str: + return "".join( + "".join(chr(d) for d in struct.unpack(" str: + return text.encode("utf-16", "surrogatepass").decode("utf-16")