You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ignite.apache.org by is...@apache.org on 2021/02/02 09:17:21 UTC
[ignite-python-thin-client] branch master updated: IGNITE-14059: Fix hashing of complex objects

This is an automated email from the ASF dual-hosted git repository.

isapego pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ignite-python-thin-client.git


The following commit(s) were added to refs/heads/master by this push:
     new e0c22ef  IGNITE-14059: Fix hashing of complex objects
e0c22ef is described below

commit e0c22ef3aef39ea8a42ddb6b4495b7bcaa479417
Author: Igor Sapego <ig...@gmail.com>
AuthorDate: Tue Feb 2 12:16:43 2021 +0300

    IGNITE-14059: Fix hashing of complex objects
    
    This closes #5
---
 pyignite/utils.py    | 35 ++++++++++++++++++++++++++---------
 tests/test_binary.py | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+), 9 deletions(-)

diff --git a/pyignite/utils.py b/pyignite/utils.py
index ebe5501..ce00d53 100644
--- a/pyignite/utils.py
+++ b/pyignite/utils.py
@@ -106,20 +106,37 @@ def unwrap_binary(client: 'Client', wrapped: tuple) -> object:
     return result
 
 
-def hashcode(string: Union[str, bytes]) -> int:
+def hashcode(data: Union[str, bytes]) -> int:
     """
     Calculate hash code used for identifying objects in Ignite binary API.
 
-    :param string: UTF-8-encoded string identifier of binary buffer,
+    :param data: UTF-8-encoded string identifier of binary buffer or byte array
     :return: hash code.
     """
-    result = 1 if isinstance(string, (bytes, bytearray)) else 0
-    for char in string:
-        try:
-            char = ord(char)
-        except TypeError:
-            pass
-        result = int_overflow(31 * result + char)
+    if isinstance(data, str):
+        """
+        For strings we iterate over code point which are of the int type
+        and can take up to 4 bytes and can only be positive.
+        """
+        result = 0
+        for char in data:
+            try:
+                char_val = ord(char)
+                result = int_overflow(31 * result + char_val)
+            except TypeError:
+                pass
+    else:
+        """
+        For byte array we iterate over bytes which only take 1 byte. But
+        according to protocol, bytes during hashing should be treated as signed
+        integer numbers 8 bits long. On other hand elements in Python's `bytes`
+        are unsigned. For this reason we use ctypes.c_byte() to make them
+        signed.
+        """
+        result = 1
+        for byte in data:
+            byte = ctypes.c_byte(byte).value
+            result = int_overflow(31 * result + byte)
     return result
 
 
diff --git a/tests/test_binary.py b/tests/test_binary.py
index 5190a6a..4c45afb 100644
--- a/tests/test_binary.py
+++ b/tests/test_binary.py
@@ -304,3 +304,54 @@ def test_complex_object_names(client):
     obj = cache.get(key)
     assert obj.type_name == type_name, 'Complex type name mismatch'
     assert obj.field == data, 'Complex object data failure'
+
+
+def test_complex_object_hash(client):
+    """
+    Test that Python client correctly calculates hash of the binary
+    object that contains negative bytes.
+    """
+    class Internal(
+        metaclass=GenericObjectMeta,
+        type_name='Internal',
+        schema=OrderedDict([
+            ('id', IntObject),
+            ('str', String),
+        ])
+    ):
+        pass
+
+    class TestObject(
+        metaclass=GenericObjectMeta,
+        type_name='TestObject',
+        schema=OrderedDict([
+            ('id', IntObject),
+            ('str', String),
+            ('internal', BinaryObject),
+        ])
+    ):
+        pass
+
+    obj_ascii = TestObject()
+    obj_ascii.id = 1
+    obj_ascii.str = 'test_string'
+
+    obj_ascii.internal = Internal()
+    obj_ascii.internal.id = 2
+    obj_ascii.internal.str = 'lorem ipsum'
+
+    hash_ascii = BinaryObject.hashcode(obj_ascii, client=client)
+
+    assert hash_ascii == -1314567146, 'Invalid hashcode value for object with ASCII strings'
+
+    obj_utf8 = TestObject()
+    obj_utf8.id = 1
+    obj_utf8.str = 'юникод'
+
+    obj_utf8.internal = Internal()
+    obj_utf8.internal.id = 2
+    obj_utf8.internal.str = 'ユニコード'
+
+    hash_utf8 = BinaryObject.hashcode(obj_utf8, client=client)
+
+    assert hash_utf8 == -1945378474, 'Invalid hashcode value for object with UTF-8 strings'