mirror of
https://github.com/aiogram/aiogram.git
synced 2026-04-08 16:37:47 +00:00
Add text decorations utility. Provide nested message entities
This commit is contained in:
parent
3910c51ddc
commit
6275bf885c
2 changed files with 277 additions and 0 deletions
127
aiogram/utils/text_decorations.py
Normal file
127
aiogram/utils/text_decorations.py
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
import html
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from struct import unpack
|
||||
from typing import AnyStr, Callable, Generator, Iterable, List, Optional
|
||||
|
||||
from aiogram.api.types import MessageEntity
|
||||
|
||||
__all__ = ("TextDecoration", "html", "markdown", "add_surrogates", "remove_surrogates")
|
||||
|
||||
|
||||
@dataclass
|
||||
class TextDecoration:
|
||||
link: str
|
||||
bold: str
|
||||
italic: str
|
||||
code: str
|
||||
pre: str
|
||||
underline: str
|
||||
strikethrough: str
|
||||
quote: Callable[[AnyStr], AnyStr]
|
||||
|
||||
def apply_entity(self, entity: MessageEntity, text: str) -> str:
|
||||
"""
|
||||
Apply single entity to text
|
||||
|
||||
:param entity:
|
||||
:param text:
|
||||
:return:
|
||||
"""
|
||||
if entity.type in ("bold", "italic", "code", "pre", "underline", "strikethrough"):
|
||||
return getattr(self, entity.type).format(value=text)
|
||||
elif entity.type == "text_mention":
|
||||
return self.link.format(value=text, link=f"tg://user?id={entity.user.id}")
|
||||
elif entity.type == "text_link":
|
||||
return self.link.format(value=text, link=entity.url)
|
||||
elif entity.type == "url":
|
||||
return text
|
||||
return self.quote(text)
|
||||
|
||||
def unparse(self, text, entities: Optional[List[MessageEntity]] = None) -> str:
|
||||
"""
|
||||
Unparse message entities
|
||||
|
||||
:param text: raw text
|
||||
:param entities: Array of MessageEntities
|
||||
:return:
|
||||
"""
|
||||
text = add_surrogates(text)
|
||||
result = "".join(self._unparse_entities(text, entities))
|
||||
return remove_surrogates(result)
|
||||
|
||||
def _unparse_entities(
|
||||
self,
|
||||
text: str,
|
||||
entities: Iterable[MessageEntity],
|
||||
offset: Optional[int] = None,
|
||||
length: Optional[int] = None,
|
||||
) -> Generator[str, None, None]:
|
||||
offset = offset or 0
|
||||
length = length or len(text)
|
||||
|
||||
for index, entity in enumerate(entities or []):
|
||||
if entity.offset < offset:
|
||||
continue
|
||||
if entity.offset > offset:
|
||||
yield self.quote(text[offset : entity.offset])
|
||||
start = entity.offset
|
||||
end = entity.offset + entity.length
|
||||
|
||||
sub_entities = list(
|
||||
filter(lambda e: entity.offset <= e.offset < end, entities[index + 1 :])
|
||||
)
|
||||
yield self.apply_entity(
|
||||
entity,
|
||||
"".join(self._unparse_entities(text, sub_entities, offset=start, length=end)),
|
||||
)
|
||||
offset = entity.offset + entity.length
|
||||
|
||||
if offset < length:
|
||||
yield self.quote(text[offset:length])
|
||||
|
||||
|
||||
html = TextDecoration(
|
||||
link='<a href="{link}">{value}</a>',
|
||||
bold="<b>{value}</b>",
|
||||
italic="<i>{value}</i>",
|
||||
code="<code>{value}</code>",
|
||||
pre="<pre>{value}</pre>",
|
||||
underline="<u>{value}</u>",
|
||||
strikethrough="<s>{value}</s>",
|
||||
quote=html.escape,
|
||||
)
|
||||
|
||||
markdown = TextDecoration(
|
||||
link="[{value}]({link})",
|
||||
bold="*{value}*",
|
||||
italic="_{value}_",
|
||||
code="`{value}`",
|
||||
pre="```{value}```",
|
||||
underline="--{value}--", # Is not supported
|
||||
strikethrough="~~{value}~~", # Is not supported
|
||||
quote=lambda text: re.sub(
|
||||
pattern=r"([*_`\[])", repl=r"\\\1", string=text
|
||||
), # Is not always helpful
|
||||
) # Markdown is not recommended for usage. Use HTML instead
|
||||
|
||||
# Surrogates util was copied form Pyrogram code it under GPL v3 License.
|
||||
# Source: https://github.com/pyrogram/pyrogram/blob/c5cc85f0076149fc6f3a6fc1d482affb01eeab21/pyrogram/client/parser/utils.py#L19-L37
|
||||
|
||||
# SMP = Supplementary Multilingual Plane: https://en.wikipedia.org/wiki/Plane_(Unicode)#Overview
|
||||
SMP_RE = re.compile(r"[\U00010000-\U0010FFFF]")
|
||||
|
||||
|
||||
def add_surrogates(text):
|
||||
# Replace each SMP code point with a surrogate pair
|
||||
return SMP_RE.sub(
|
||||
lambda match: "".join( # Split SMP in two surrogates
|
||||
chr(i) for i in unpack("<HH", match.group().encode("utf-16le"))
|
||||
),
|
||||
text,
|
||||
)
|
||||
|
||||
|
||||
def remove_surrogates(text):
|
||||
# Replace each surrogate pair with a SMP code point
|
||||
return text.encode("utf-16", "surrogatepass").decode("utf-16")
|
||||
150
tests/test_utils/test_text_decorations.py
Normal file
150
tests/test_utils/test_text_decorations.py
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
from typing import List, Optional
|
||||
|
||||
import pytest
|
||||
from aiogram.api.types import MessageEntity, User
|
||||
from aiogram.utils.text_decorations import TextDecoration, html, markdown
|
||||
|
||||
|
||||
class TestTextDecoration:
|
||||
@pytest.mark.parametrize(
|
||||
"decorator,entity,result",
|
||||
[
|
||||
[html, MessageEntity(type="url", offset=0, length=5), "test"],
|
||||
[
|
||||
html,
|
||||
MessageEntity(type="text_link", offset=0, length=5, url="https://aiogram.dev"),
|
||||
'<a href="https://aiogram.dev">test</a>',
|
||||
],
|
||||
[html, MessageEntity(type="bold", offset=0, length=5), "<b>test</b>"],
|
||||
[html, MessageEntity(type="italic", offset=0, length=5), "<i>test</i>"],
|
||||
[html, MessageEntity(type="code", offset=0, length=5), "<code>test</code>"],
|
||||
[html, MessageEntity(type="pre", offset=0, length=5), "<pre>test</pre>"],
|
||||
[html, MessageEntity(type="underline", offset=0, length=5), "<u>test</u>"],
|
||||
[html, MessageEntity(type="strikethrough", offset=0, length=5), "<s>test</s>"],
|
||||
[html, MessageEntity(type="hashtag", offset=0, length=5), "test"],
|
||||
[html, MessageEntity(type="cashtag", offset=0, length=5), "test"],
|
||||
[html, MessageEntity(type="bot_command", offset=0, length=5), "test"],
|
||||
[html, MessageEntity(type="email", offset=0, length=5), "test"],
|
||||
[html, MessageEntity(type="phone_number", offset=0, length=5), "test"],
|
||||
[
|
||||
html,
|
||||
MessageEntity(
|
||||
type="text_mention",
|
||||
offset=0,
|
||||
length=5,
|
||||
user=User(id=42, first_name="Test", is_bot=False),
|
||||
),
|
||||
'<a href="tg://user?id=42">test</a>',
|
||||
],
|
||||
[html, MessageEntity(type="url", offset=0, length=5), "test"],
|
||||
[
|
||||
html,
|
||||
MessageEntity(type="text_link", offset=0, length=5, url="https://aiogram.dev"),
|
||||
'<a href="https://aiogram.dev">test</a>',
|
||||
],
|
||||
[markdown, MessageEntity(type="bold", offset=0, length=5), "*test*"],
|
||||
[markdown, MessageEntity(type="italic", offset=0, length=5), "_test_"],
|
||||
[markdown, MessageEntity(type="code", offset=0, length=5), "`test`"],
|
||||
[markdown, MessageEntity(type="pre", offset=0, length=5), "```test```"],
|
||||
[markdown, MessageEntity(type="underline", offset=0, length=5), "--test--"],
|
||||
[markdown, MessageEntity(type="strikethrough", offset=0, length=5), "~~test~~"],
|
||||
[markdown, MessageEntity(type="hashtag", offset=0, length=5), "test"],
|
||||
[markdown, MessageEntity(type="cashtag", offset=0, length=5), "test"],
|
||||
[markdown, MessageEntity(type="bot_command", offset=0, length=5), "test"],
|
||||
[markdown, MessageEntity(type="email", offset=0, length=5), "test"],
|
||||
[markdown, MessageEntity(type="phone_number", offset=0, length=5), "test"],
|
||||
[
|
||||
markdown,
|
||||
MessageEntity(
|
||||
type="text_mention",
|
||||
offset=0,
|
||||
length=5,
|
||||
user=User(id=42, first_name="Test", is_bot=False),
|
||||
),
|
||||
"[test](tg://user?id=42)",
|
||||
],
|
||||
],
|
||||
)
|
||||
def test_apply_single_entity(
|
||||
self, decorator: TextDecoration, entity: MessageEntity, result: str
|
||||
):
|
||||
assert decorator.apply_entity(entity, "test") == result
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"decorator,before,after",
|
||||
[
|
||||
[html, "test", "test"],
|
||||
[html, "test < test", "test < test"],
|
||||
[html, "test > test", "test > test"],
|
||||
[html, "test & test", "test & test"],
|
||||
[html, "test @ test", "test @ test"],
|
||||
[markdown, "test", "test"],
|
||||
[markdown, "[test]", "\\[test]"],
|
||||
[markdown, "test ` test", "test \\` test"],
|
||||
[markdown, "test * test", "test \\* test"],
|
||||
[markdown, "test _ test", "test \\_ test"],
|
||||
],
|
||||
)
|
||||
def test_quote(self, decorator: TextDecoration, before: str, after: str):
|
||||
assert decorator.quote(before) == after
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"decorator,text,entities,result",
|
||||
[
|
||||
[html, "test", None, "test"],
|
||||
[
|
||||
html,
|
||||
"test1 test2 test3 test4 test5 test6 test7",
|
||||
[
|
||||
MessageEntity(type="bold", offset=6, length=29),
|
||||
MessageEntity(type="underline", offset=12, length=5),
|
||||
MessageEntity(type="italic", offset=24, length=5),
|
||||
],
|
||||
"test1 <b>test2 <u>test3</u> test4 <i>test5</i> test6</b> test7",
|
||||
],
|
||||
[
|
||||
html,
|
||||
"test1 test2 test3 test4 test5",
|
||||
[
|
||||
MessageEntity(type="bold", offset=6, length=17),
|
||||
MessageEntity(type="underline", offset=12, length=5),
|
||||
],
|
||||
"test1 <b>test2 <u>test3</u> test4</b> test5",
|
||||
],
|
||||
[
|
||||
html,
|
||||
"test1 test2 test3 test4",
|
||||
[
|
||||
MessageEntity(type="bold", offset=6, length=11),
|
||||
MessageEntity(type="underline", offset=12, length=5),
|
||||
],
|
||||
"test1 <b>test2 <u>test3</u></b> test4",
|
||||
],
|
||||
[
|
||||
html,
|
||||
"test1 test2 test3",
|
||||
[MessageEntity(type="bold", offset=6, length=6),],
|
||||
"test1 <b>test2 </b> test3",
|
||||
],
|
||||
[
|
||||
html,
|
||||
"test1 test2",
|
||||
[MessageEntity(type="bold", offset=0, length=5),],
|
||||
"<b>test1</b> test2",
|
||||
],
|
||||
# [
|
||||
# html,
|
||||
# "test te👍🏿st test",
|
||||
# [MessageEntity(type="bold", offset=5, length=6, url=None, user=None),],
|
||||
# "test <b>te👍🏿st</b> test",
|
||||
# ],
|
||||
],
|
||||
)
|
||||
def test_unparse(
|
||||
self,
|
||||
decorator: TextDecoration,
|
||||
text: str,
|
||||
entities: Optional[List[MessageEntity]],
|
||||
result: str,
|
||||
):
|
||||
assert decorator.unparse(text, entities) == result
|
||||
Loading…
Add table
Add a link
Reference in a new issue