From 17eb8a56d06954a1540ebef0bdca80c8dd9cff4c Mon Sep 17 00:00:00 2001
From: unintended <unintended@users.noreply.github.com>
Date: Fri, 4 Sep 2020 18:08:15 +0300
Subject: [PATCH] Fix #413 parse entities positioning (#414)

* fix entity positioning in parse_entities() #413
* add tests and small fixes
---
 aiogram/utils/text_decorations.py         | 26 +++++++++++++++--------
 tests/test_utils/test_text_decorations.py | 25 ++++++++++++++++++++++
 2 files changed, 42 insertions(+), 9 deletions(-)
 create mode 100644 tests/test_utils/test_text_decorations.py

diff --git a/aiogram/utils/text_decorations.py b/aiogram/utils/text_decorations.py
index 4b3109af..81592465 100644
--- a/aiogram/utils/text_decorations.py
+++ b/aiogram/utils/text_decorations.py
@@ -57,14 +57,14 @@ class TextDecoration(ABC):
         """
         result = "".join(
             self._unparse_entities(
-                text, sorted(entities, key=lambda item: item.offset) if entities else []
+                self._add_surrogates(text), sorted(entities, key=lambda item: item.offset) if entities else []
             )
         )
         return result
 
     def _unparse_entities(
         self,
-        text: str,
+        text: bytes,
         entities: List[MessageEntity],
         offset: Optional[int] = None,
         length: Optional[int] = None,
@@ -74,15 +74,15 @@ class TextDecoration(ABC):
         length = length or len(text)
 
         for index, entity in enumerate(entities):
-            if entity.offset < offset:
+            if entity.offset * 2 < offset:
                 continue
-            if entity.offset > offset:
-                yield self.quote(text[offset : entity.offset])
-            start = entity.offset
-            offset = entity.offset + entity.length
+            if entity.offset * 2 > offset:
+                yield self.quote(self._remove_surrogates(text[offset : entity.offset * 2]))
+            start = entity.offset * 2
+            offset = entity.offset * 2 + entity.length * 2
 
             sub_entities = list(
-                filter(lambda e: e.offset < (offset or 0), entities[index + 1 :])
+                filter(lambda e: e.offset * 2 < (offset or 0), entities[index + 1 :])
             )
             yield self.apply_entity(
                 entity,
@@ -94,7 +94,15 @@ class TextDecoration(ABC):
             )
 
         if offset < length:
-            yield self.quote(text[offset:length])
+            yield self.quote(self._remove_surrogates(text[offset:length]))
+
+    @staticmethod
+    def _add_surrogates(text: str):
+        return text.encode('utf-16-le')
+
+    @staticmethod
+    def _remove_surrogates(text: bytes):
+        return text.decode('utf-16-le')
 
     @abstractmethod
     def link(self, value: str, link: str) -> str:  # pragma: no cover
diff --git a/tests/test_utils/test_text_decorations.py b/tests/test_utils/test_text_decorations.py
new file mode 100644
index 00000000..dd0e595d
--- /dev/null
+++ b/tests/test_utils/test_text_decorations.py
@@ -0,0 +1,25 @@
+from aiogram.types import MessageEntity, MessageEntityType
+from aiogram.utils import text_decorations
+
+
+class TestTextDecorations:
+  def test_unparse_entities_normal_text(self):
+    assert text_decorations.markdown_decoration.unparse(
+      "hi i'm bold and italic and still bold",
+      entities=[
+        MessageEntity(offset=3, length=34, type=MessageEntityType.BOLD),
+        MessageEntity(offset=12, length=10, type=MessageEntityType.ITALIC),
+      ]
+    ) == "hi *i'm bold _and italic_\r and still bold*"
+
+  def test_unparse_entities_emoji_text(self):
+    """
+    emoji is encoded as two chars in json
+    """
+    assert text_decorations.markdown_decoration.unparse(
+      "🚀 i'm bold and italic and still bold",
+      entities=[
+        MessageEntity(offset=3, length=34, type=MessageEntityType.BOLD),
+        MessageEntity(offset=12, length=10, type=MessageEntityType.ITALIC),
+      ]
+    ) == "🚀 *i'm bold _and italic_\r and still bold*"