You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@ignite.apache.org by is...@apache.org on 2021/01/29 00:16:20 UTC

[ignite-python-thin-client] branch ignite-14059 created (now 7e53681)

This is an automated email from the ASF dual-hosted git repository.

isapego pushed a change to branch ignite-14059
in repository https://gitbox.apache.org/repos/asf/ignite-python-thin-client.git.


      at 7e53681  IGNITE-14059: Fix comments

This branch includes the following new commits:

     new 42df688  IGNITE-14059: Fix hashing of complex objects
     new 7e53681  IGNITE-14059: Fix comments

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.

[ignite-python-thin-client] 01/02: IGNITE-14059: Fix hashing of complex objects

Posted by is...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

isapego pushed a commit to branch ignite-14059
in repository https://gitbox.apache.org/repos/asf/ignite-python-thin-client.git

commit 42df688fa936fa91b96a0f84c9343d1d2a2ac2fb
Author: Igor Sapego <ig...@gmail.com>
AuthorDate: Fri Nov 20 11:46:25 2020 +0300

    IGNITE-14059: Fix hashing of complex objects
---
 pyignite/utils.py    | 30 ++++++++++++++++++++++--------
 tests/test_binary.py | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+), 8 deletions(-)

diff --git a/pyignite/utils.py b/pyignite/utils.py
index ebe5501..57eedda 100644
--- a/pyignite/utils.py
+++ b/pyignite/utils.py
@@ -106,20 +106,34 @@ def unwrap_binary(client: 'Client', wrapped: tuple) -> object:
     return result
 
 
-def hashcode(string: Union[str, bytes]) -> int:
+def hashcode(data: Union[str, bytes]) -> int:
     """
     Calculate hash code used for identifying objects in Ignite binary API.
 
     :param string: UTF-8-encoded string identifier of binary buffer,
     :return: hash code.
     """
-    result = 1 if isinstance(string, (bytes, bytearray)) else 0
-    for char in string:
-        try:
-            char = ord(char)
-        except TypeError:
-            pass
-        result = int_overflow(31 * result + char)
+    if isinstance(data, str):
+        """
+        For strings we iterate over code point which are of the int type
+        and can take up to 4 bytes and can only be positive.
+        """
+        result = 1 if isinstance(string, (bytes, bytearray)) else 0
+        for char in data:
+            try:
+                char_val = ord(char)
+                result = int_overflow(31 * result + char_val)
+            except TypeError:
+                pass
+    else:
+        """
+        For byte array we iterate over bytes which only take 1 byte and can
+        be negative. For this reason we use ctypes.c_byte() to 
+        """
+        result = 1
+        for byte in data:
+            byte = ctypes.c_byte(byte).value
+            result = int_overflow(31 * result + byte)
     return result
 
 
diff --git a/tests/test_binary.py b/tests/test_binary.py
index 5190a6a..4c45afb 100644
--- a/tests/test_binary.py
+++ b/tests/test_binary.py
@@ -304,3 +304,54 @@ def test_complex_object_names(client):
     obj = cache.get(key)
     assert obj.type_name == type_name, 'Complex type name mismatch'
     assert obj.field == data, 'Complex object data failure'
+
+
+def test_complex_object_hash(client):
+    """
+    Test that Python client correctly calculates hash of the binary
+    object that contains negative bytes.
+    """
+    class Internal(
+        metaclass=GenericObjectMeta,
+        type_name='Internal',
+        schema=OrderedDict([
+            ('id', IntObject),
+            ('str', String),
+        ])
+    ):
+        pass
+
+    class TestObject(
+        metaclass=GenericObjectMeta,
+        type_name='TestObject',
+        schema=OrderedDict([
+            ('id', IntObject),
+            ('str', String),
+            ('internal', BinaryObject),
+        ])
+    ):
+        pass
+
+    obj_ascii = TestObject()
+    obj_ascii.id = 1
+    obj_ascii.str = 'test_string'
+
+    obj_ascii.internal = Internal()
+    obj_ascii.internal.id = 2
+    obj_ascii.internal.str = 'lorem ipsum'
+
+    hash_ascii = BinaryObject.hashcode(obj_ascii, client=client)
+
+    assert hash_ascii == -1314567146, 'Invalid hashcode value for object with ASCII strings'
+
+    obj_utf8 = TestObject()
+    obj_utf8.id = 1
+    obj_utf8.str = 'юникод'
+
+    obj_utf8.internal = Internal()
+    obj_utf8.internal.id = 2
+    obj_utf8.internal.str = 'ユニコード'
+
+    hash_utf8 = BinaryObject.hashcode(obj_utf8, client=client)
+
+    assert hash_utf8 == -1945378474, 'Invalid hashcode value for object with UTF-8 strings'

[ignite-python-thin-client] 02/02: IGNITE-14059: Fix comments

Posted by is...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

isapego pushed a commit to branch ignite-14059
in repository https://gitbox.apache.org/repos/asf/ignite-python-thin-client.git

commit 7e536811408277903eaf7720541f975ef253cbca
Author: Igor Sapego <ig...@gmail.com>
AuthorDate: Tue Jan 26 13:22:06 2021 +0300

    IGNITE-14059: Fix comments
---
 pyignite/utils.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/pyignite/utils.py b/pyignite/utils.py
index 57eedda..a58f357 100644
--- a/pyignite/utils.py
+++ b/pyignite/utils.py
@@ -110,7 +110,7 @@ def hashcode(data: Union[str, bytes]) -> int:
     """
     Calculate hash code used for identifying objects in Ignite binary API.
 
-    :param string: UTF-8-encoded string identifier of binary buffer,
+    :param data: UTF-8-encoded string identifier of binary buffer or byte array
     :return: hash code.
     """
     if isinstance(data, str):
@@ -127,8 +127,11 @@ def hashcode(data: Union[str, bytes]) -> int:
                 pass
     else:
         """
-        For byte array we iterate over bytes which only take 1 byte and can
-        be negative. For this reason we use ctypes.c_byte() to 
+        For byte array we iterate over bytes which only take 1 byte. But
+        according to protocol, bytes during hashing should be treated as signed
+        integer numbers 8 bits long. On other hand elements in Python's `bytes`
+        are unsigned. For this reason we use ctypes.c_byte() to make them
+        signed.
         """
         result = 1
         for byte in data: