You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by fo...@apache.org on 2019/05/31 18:38:52 UTC
[avro] branch master updated: AVRO-1928: Simplify Python
float/double encoding (#528)
This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/master by this push:
new b990042 AVRO-1928: Simplify Python float/double encoding (#528)
b990042 is described below
commit b990042604b8bf83db3a137de0f54d726c1af9f0
Author: Michael A. Smith <mi...@smith-li.com>
AuthorDate: Fri May 31 14:38:46 2019 -0400
AVRO-1928: Simplify Python float/double encoding (#528)
Switch to using built-in little endian support in the struct
module, instead of explicit bit manipulation.
---
lang/py/src/avro/io.py | 39 +++++++--------------------------------
lang/py3/avro/io.py | 37 ++++++-------------------------------
2 files changed, 13 insertions(+), 63 deletions(-)
diff --git a/lang/py/src/avro/io.py b/lang/py/src/avro/io.py
index 2901660..d978716 100644
--- a/lang/py/src/avro/io.py
+++ b/lang/py/src/avro/io.py
@@ -71,15 +71,14 @@ else:
return struct.unpack(self.format, *args)
struct_class = SimpleStruct
-STRUCT_INT = struct_class('!I') # big-endian unsigned int
-STRUCT_LONG = struct_class('!Q') # big-endian unsigned long long
-STRUCT_FLOAT = struct_class('!f') # big-endian float
-STRUCT_DOUBLE = struct_class('!d') # big-endian double
+STRUCT_FLOAT = struct_class('<f') # big-endian float
+STRUCT_DOUBLE = struct_class('<d') # big-endian double
STRUCT_CRC32 = struct_class('>I') # big-endian unsigned int
STRUCT_SIGNED_SHORT = struct_class('>h') # big-endian signed short
STRUCT_SIGNED_INT = struct_class('>i') # big-endian signed int
STRUCT_SIGNED_LONG = struct_class('>q') # big-endian signed long
+
#
# Exceptions
#
@@ -210,11 +209,7 @@ class BinaryDecoder(object):
The float is converted into a 32-bit integer using a method equivalent to
Java's floatToIntBits and then encoded in little-endian format.
"""
- bits = (((ord(self.read(1)) & 0xffL)) |
- ((ord(self.read(1)) & 0xffL) << 8) |
- ((ord(self.read(1)) & 0xffL) << 16) |
- ((ord(self.read(1)) & 0xffL) << 24))
- return STRUCT_FLOAT.unpack(STRUCT_INT.pack(bits))[0]
+ return STRUCT_FLOAT.unpack(self.read(4))[0]
def read_double(self):
"""
@@ -222,15 +217,7 @@ class BinaryDecoder(object):
The double is converted into a 64-bit integer using a method equivalent to
Java's doubleToLongBits and then encoded in little-endian format.
"""
- bits = (((ord(self.read(1)) & 0xffL)) |
- ((ord(self.read(1)) & 0xffL) << 8) |
- ((ord(self.read(1)) & 0xffL) << 16) |
- ((ord(self.read(1)) & 0xffL) << 24) |
- ((ord(self.read(1)) & 0xffL) << 32) |
- ((ord(self.read(1)) & 0xffL) << 40) |
- ((ord(self.read(1)) & 0xffL) << 48) |
- ((ord(self.read(1)) & 0xffL) << 56))
- return STRUCT_DOUBLE.unpack(STRUCT_LONG.pack(bits))[0]
+ return STRUCT_DOUBLE.unpack(self.read(8))[0]
def read_decimal_from_bytes(self, precision, scale):
"""
@@ -367,11 +354,7 @@ class BinaryEncoder(object):
The float is converted into a 32-bit integer using a method equivalent to
Java's floatToIntBits and then encoded in little-endian format.
"""
- bits = STRUCT_INT.unpack(STRUCT_FLOAT.pack(datum))[0]
- self.write(chr((bits) & 0xFF))
- self.write(chr((bits >> 8) & 0xFF))
- self.write(chr((bits >> 16) & 0xFF))
- self.write(chr((bits >> 24) & 0xFF))
+ self.write(STRUCT_FLOAT.pack(datum))
def write_double(self, datum):
"""
@@ -379,15 +362,7 @@ class BinaryEncoder(object):
The double is converted into a 64-bit integer using a method equivalent to
Java's doubleToLongBits and then encoded in little-endian format.
"""
- bits = STRUCT_LONG.unpack(STRUCT_DOUBLE.pack(datum))[0]
- self.write(chr((bits) & 0xFF))
- self.write(chr((bits >> 8) & 0xFF))
- self.write(chr((bits >> 16) & 0xFF))
- self.write(chr((bits >> 24) & 0xFF))
- self.write(chr((bits >> 32) & 0xFF))
- self.write(chr((bits >> 40) & 0xFF))
- self.write(chr((bits >> 48) & 0xFF))
- self.write(chr((bits >> 56) & 0xFF))
+ self.write(STRUCT_DOUBLE.pack(datum))
def write_decimal_bytes(self, datum, scale):
"""
diff --git a/lang/py3/avro/io.py b/lang/py3/avro/io.py
index 7b56dff..db00be3 100644
--- a/lang/py3/avro/io.py
+++ b/lang/py3/avro/io.py
@@ -61,9 +61,8 @@ LONG_MIN_VALUE = -(1 << 63)
LONG_MAX_VALUE = (1 << 63) - 1
STRUCT_INT = struct.Struct('!I') # big-endian unsigned int
-STRUCT_LONG = struct.Struct('!Q') # big-endian unsigned long long
-STRUCT_FLOAT = struct.Struct('!f') # big-endian float
-STRUCT_DOUBLE = struct.Struct('!d') # big-endian double
+STRUCT_FLOAT = struct.Struct('<f') # little-endian float
+STRUCT_DOUBLE = struct.Struct('<d') # little-endian double
STRUCT_CRC32 = struct.Struct('>I') # big-endian unsigned int
@@ -219,11 +218,7 @@ class BinaryDecoder(object):
The float is converted into a 32-bit integer using a method equivalent to
Java's floatToIntBits and then encoded in little-endian format.
"""
- bits = (((ord(self.read(1)) & 0xff)) |
- ((ord(self.read(1)) & 0xff) << 8) |
- ((ord(self.read(1)) & 0xff) << 16) |
- ((ord(self.read(1)) & 0xff) << 24))
- return STRUCT_FLOAT.unpack(STRUCT_INT.pack(bits))[0]
+ return STRUCT_FLOAT.unpack(self.read(4))[0]
def read_double(self):
"""
@@ -231,15 +226,7 @@ class BinaryDecoder(object):
The double is converted into a 64-bit integer using a method equivalent to
Java's doubleToLongBits and then encoded in little-endian format.
"""
- bits = (((ord(self.read(1)) & 0xff)) |
- ((ord(self.read(1)) & 0xff) << 8) |
- ((ord(self.read(1)) & 0xff) << 16) |
- ((ord(self.read(1)) & 0xff) << 24) |
- ((ord(self.read(1)) & 0xff) << 32) |
- ((ord(self.read(1)) & 0xff) << 40) |
- ((ord(self.read(1)) & 0xff) << 48) |
- ((ord(self.read(1)) & 0xff) << 56))
- return STRUCT_DOUBLE.unpack(STRUCT_LONG.pack(bits))[0]
+ return STRUCT_DOUBLE.unpack(self.read(8))[0]
def read_bytes(self):
"""
@@ -362,11 +349,7 @@ class BinaryEncoder(object):
The float is converted into a 32-bit integer using a method equivalent to
Java's floatToIntBits and then encoded in little-endian format.
"""
- bits = STRUCT_INT.unpack(STRUCT_FLOAT.pack(datum))[0]
- self.WriteByte((bits) & 0xFF)
- self.WriteByte((bits >> 8) & 0xFF)
- self.WriteByte((bits >> 16) & 0xFF)
- self.WriteByte((bits >> 24) & 0xFF)
+ self.write(STRUCT_FLOAT.pack(datum))
def write_double(self, datum):
"""
@@ -374,15 +357,7 @@ class BinaryEncoder(object):
The double is converted into a 64-bit integer using a method equivalent to
Java's doubleToLongBits and then encoded in little-endian format.
"""
- bits = STRUCT_LONG.unpack(STRUCT_DOUBLE.pack(datum))[0]
- self.WriteByte((bits) & 0xFF)
- self.WriteByte((bits >> 8) & 0xFF)
- self.WriteByte((bits >> 16) & 0xFF)
- self.WriteByte((bits >> 24) & 0xFF)
- self.WriteByte((bits >> 32) & 0xFF)
- self.WriteByte((bits >> 40) & 0xFF)
- self.WriteByte((bits >> 48) & 0xFF)
- self.WriteByte((bits >> 56) & 0xFF)
+ self.write(STRUCT_DOUBLE.pack(datum))
def write_bytes(self, datum):
"""