You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ignite.apache.org by is...@apache.org on 2021/02/02 09:17:21 UTC
[ignite-python-thin-client] branch master updated: IGNITE-14059:
Fix hashing of complex objects
This is an automated email from the ASF dual-hosted git repository.
isapego pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ignite-python-thin-client.git
The following commit(s) were added to refs/heads/master by this push:
new e0c22ef IGNITE-14059: Fix hashing of complex objects
e0c22ef is described below
commit e0c22ef3aef39ea8a42ddb6b4495b7bcaa479417
Author: Igor Sapego <ig...@gmail.com>
AuthorDate: Tue Feb 2 12:16:43 2021 +0300
IGNITE-14059: Fix hashing of complex objects
This closes #5
---
pyignite/utils.py | 35 ++++++++++++++++++++++++++---------
tests/test_binary.py | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 77 insertions(+), 9 deletions(-)
diff --git a/pyignite/utils.py b/pyignite/utils.py
index ebe5501..ce00d53 100644
--- a/pyignite/utils.py
+++ b/pyignite/utils.py
@@ -106,20 +106,37 @@ def unwrap_binary(client: 'Client', wrapped: tuple) -> object:
return result
-def hashcode(string: Union[str, bytes]) -> int:
+def hashcode(data: Union[str, bytes]) -> int:
"""
Calculate hash code used for identifying objects in Ignite binary API.
- :param string: UTF-8-encoded string identifier of binary buffer,
+ :param data: UTF-8-encoded string identifier of binary buffer or byte array
:return: hash code.
"""
- result = 1 if isinstance(string, (bytes, bytearray)) else 0
- for char in string:
- try:
- char = ord(char)
- except TypeError:
- pass
- result = int_overflow(31 * result + char)
+ if isinstance(data, str):
+ """
+ For strings we iterate over code point which are of the int type
+ and can take up to 4 bytes and can only be positive.
+ """
+ result = 0
+ for char in data:
+ try:
+ char_val = ord(char)
+ result = int_overflow(31 * result + char_val)
+ except TypeError:
+ pass
+ else:
+ """
+ For byte array we iterate over bytes which only take 1 byte. But
+ according to protocol, bytes during hashing should be treated as signed
+ integer numbers 8 bits long. On other hand elements in Python's `bytes`
+ are unsigned. For this reason we use ctypes.c_byte() to make them
+ signed.
+ """
+ result = 1
+ for byte in data:
+ byte = ctypes.c_byte(byte).value
+ result = int_overflow(31 * result + byte)
return result
diff --git a/tests/test_binary.py b/tests/test_binary.py
index 5190a6a..4c45afb 100644
--- a/tests/test_binary.py
+++ b/tests/test_binary.py
@@ -304,3 +304,54 @@ def test_complex_object_names(client):
obj = cache.get(key)
assert obj.type_name == type_name, 'Complex type name mismatch'
assert obj.field == data, 'Complex object data failure'
+
+
+def test_complex_object_hash(client):
+ """
+ Test that Python client correctly calculates hash of the binary
+ object that contains negative bytes.
+ """
+ class Internal(
+ metaclass=GenericObjectMeta,
+ type_name='Internal',
+ schema=OrderedDict([
+ ('id', IntObject),
+ ('str', String),
+ ])
+ ):
+ pass
+
+ class TestObject(
+ metaclass=GenericObjectMeta,
+ type_name='TestObject',
+ schema=OrderedDict([
+ ('id', IntObject),
+ ('str', String),
+ ('internal', BinaryObject),
+ ])
+ ):
+ pass
+
+ obj_ascii = TestObject()
+ obj_ascii.id = 1
+ obj_ascii.str = 'test_string'
+
+ obj_ascii.internal = Internal()
+ obj_ascii.internal.id = 2
+ obj_ascii.internal.str = 'lorem ipsum'
+
+ hash_ascii = BinaryObject.hashcode(obj_ascii, client=client)
+
+ assert hash_ascii == -1314567146, 'Invalid hashcode value for object with ASCII strings'
+
+ obj_utf8 = TestObject()
+ obj_utf8.id = 1
+ obj_utf8.str = 'юникод'
+
+ obj_utf8.internal = Internal()
+ obj_utf8.internal.id = 2
+ obj_utf8.internal.str = 'ユニコード'
+
+ hash_utf8 = BinaryObject.hashcode(obj_utf8, client=client)
+
+ assert hash_utf8 == -1945378474, 'Invalid hashcode value for object with UTF-8 strings'