Fix #413 parse entities positioning (#414)

* fix entity positioning in parse_entities() #413
* add tests and small fixes
This commit is contained in:
unintended 2020-09-04 18:08:15 +03:00 committed by GitHub
parent d8c6214170
commit 17eb8a56d0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 42 additions and 9 deletions

View file

@ -57,14 +57,14 @@ class TextDecoration(ABC):
"""
result = "".join(
self._unparse_entities(
text, sorted(entities, key=lambda item: item.offset) if entities else []
self._add_surrogates(text), sorted(entities, key=lambda item: item.offset) if entities else []
)
)
return result
def _unparse_entities(
self,
text: str,
text: bytes,
entities: List[MessageEntity],
offset: Optional[int] = None,
length: Optional[int] = None,
@ -74,15 +74,15 @@ class TextDecoration(ABC):
length = length or len(text)
for index, entity in enumerate(entities):
if entity.offset < offset:
if entity.offset * 2 < offset:
continue
if entity.offset > offset:
yield self.quote(text[offset : entity.offset])
start = entity.offset
offset = entity.offset + entity.length
if entity.offset * 2 > offset:
yield self.quote(self._remove_surrogates(text[offset : entity.offset * 2]))
start = entity.offset * 2
offset = entity.offset * 2 + entity.length * 2
sub_entities = list(
filter(lambda e: e.offset < (offset or 0), entities[index + 1 :])
filter(lambda e: e.offset * 2 < (offset or 0), entities[index + 1 :])
)
yield self.apply_entity(
entity,
@ -94,7 +94,15 @@ class TextDecoration(ABC):
)
if offset < length:
yield self.quote(text[offset:length])
yield self.quote(self._remove_surrogates(text[offset:length]))
@staticmethod
def _add_surrogates(text: str):
return text.encode('utf-16-le')
@staticmethod
def _remove_surrogates(text: bytes):
return text.decode('utf-16-le')
@abstractmethod
def link(self, value: str, link: str) -> str: # pragma: no cover

View file

@ -0,0 +1,25 @@
from aiogram.types import MessageEntity, MessageEntityType
from aiogram.utils import text_decorations
class TestTextDecorations:
def test_unparse_entities_normal_text(self):
assert text_decorations.markdown_decoration.unparse(
"hi i'm bold and italic and still bold",
entities=[
MessageEntity(offset=3, length=34, type=MessageEntityType.BOLD),
MessageEntity(offset=12, length=10, type=MessageEntityType.ITALIC),
]
) == "hi *i'm bold _and italic_\r and still bold*"
def test_unparse_entities_emoji_text(self):
"""
emoji is encoded as two chars in json
"""
assert text_decorations.markdown_decoration.unparse(
"🚀 i'm bold and italic and still bold",
entities=[
MessageEntity(offset=3, length=34, type=MessageEntityType.BOLD),
MessageEntity(offset=12, length=10, type=MessageEntityType.ITALIC),
]
) == "🚀 *i'm bold _and italic_\r and still bold*"