From 17eb8a56d06954a1540ebef0bdca80c8dd9cff4c Mon Sep 17 00:00:00 2001 From: unintended Date: Fri, 4 Sep 2020 18:08:15 +0300 Subject: [PATCH] Fix #413 parse entities positioning (#414) * fix entity positioning in parse_entities() #413 * add tests and small fixes --- aiogram/utils/text_decorations.py | 26 +++++++++++++++-------- tests/test_utils/test_text_decorations.py | 25 ++++++++++++++++++++++ 2 files changed, 42 insertions(+), 9 deletions(-) create mode 100644 tests/test_utils/test_text_decorations.py diff --git a/aiogram/utils/text_decorations.py b/aiogram/utils/text_decorations.py index 4b3109af..81592465 100644 --- a/aiogram/utils/text_decorations.py +++ b/aiogram/utils/text_decorations.py @@ -57,14 +57,14 @@ class TextDecoration(ABC): """ result = "".join( self._unparse_entities( - text, sorted(entities, key=lambda item: item.offset) if entities else [] + self._add_surrogates(text), sorted(entities, key=lambda item: item.offset) if entities else [] ) ) return result def _unparse_entities( self, - text: str, + text: bytes, entities: List[MessageEntity], offset: Optional[int] = None, length: Optional[int] = None, @@ -74,15 +74,15 @@ class TextDecoration(ABC): length = length or len(text) for index, entity in enumerate(entities): - if entity.offset < offset: + if entity.offset * 2 < offset: continue - if entity.offset > offset: - yield self.quote(text[offset : entity.offset]) - start = entity.offset - offset = entity.offset + entity.length + if entity.offset * 2 > offset: + yield self.quote(self._remove_surrogates(text[offset : entity.offset * 2])) + start = entity.offset * 2 + offset = entity.offset * 2 + entity.length * 2 sub_entities = list( - filter(lambda e: e.offset < (offset or 0), entities[index + 1 :]) + filter(lambda e: e.offset * 2 < (offset or 0), entities[index + 1 :]) ) yield self.apply_entity( entity, @@ -94,7 +94,15 @@ class TextDecoration(ABC): ) if offset < length: - yield self.quote(text[offset:length]) + yield self.quote(self._remove_surrogates(text[offset:length])) + + @staticmethod + def _add_surrogates(text: str): + return text.encode('utf-16-le') + + @staticmethod + def _remove_surrogates(text: bytes): + return text.decode('utf-16-le') @abstractmethod def link(self, value: str, link: str) -> str: # pragma: no cover diff --git a/tests/test_utils/test_text_decorations.py b/tests/test_utils/test_text_decorations.py new file mode 100644 index 00000000..dd0e595d --- /dev/null +++ b/tests/test_utils/test_text_decorations.py @@ -0,0 +1,25 @@ +from aiogram.types import MessageEntity, MessageEntityType +from aiogram.utils import text_decorations + + +class TestTextDecorations: + def test_unparse_entities_normal_text(self): + assert text_decorations.markdown_decoration.unparse( + "hi i'm bold and italic and still bold", + entities=[ + MessageEntity(offset=3, length=34, type=MessageEntityType.BOLD), + MessageEntity(offset=12, length=10, type=MessageEntityType.ITALIC), + ] + ) == "hi *i'm bold _and italic_\r and still bold*" + + def test_unparse_entities_emoji_text(self): + """ + emoji is encoded as two chars in json + """ + assert text_decorations.markdown_decoration.unparse( + "🚀 i'm bold and italic and still bold", + entities=[ + MessageEntity(offset=3, length=34, type=MessageEntityType.BOLD), + MessageEntity(offset=12, length=10, type=MessageEntityType.ITALIC), + ] + ) == "🚀 *i'm bold _and italic_\r and still bold*"