You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by fo...@apache.org on 2019/05/31 18:38:52 UTC

[avro] branch master updated: AVRO-1928: Simplify Python float/double encoding (#528)

This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/master by this push:
     new b990042  AVRO-1928: Simplify Python float/double encoding (#528)
b990042 is described below

commit b990042604b8bf83db3a137de0f54d726c1af9f0
Author: Michael A. Smith <mi...@smith-li.com>
AuthorDate: Fri May 31 14:38:46 2019 -0400

    AVRO-1928: Simplify Python float/double encoding (#528)
    
    Switch to using built-in little endian support in the struct
    module, instead of explicit bit manipulation.
---
 lang/py/src/avro/io.py | 39 +++++++--------------------------------
 lang/py3/avro/io.py    | 37 ++++++-------------------------------
 2 files changed, 13 insertions(+), 63 deletions(-)

diff --git a/lang/py/src/avro/io.py b/lang/py/src/avro/io.py
index 2901660..d978716 100644
--- a/lang/py/src/avro/io.py
+++ b/lang/py/src/avro/io.py
@@ -71,15 +71,14 @@ else:
       return struct.unpack(self.format, *args)
   struct_class = SimpleStruct
 
-STRUCT_INT = struct_class('!I')             # big-endian unsigned int
-STRUCT_LONG = struct_class('!Q')            # big-endian unsigned long long
-STRUCT_FLOAT = struct_class('!f')           # big-endian float
-STRUCT_DOUBLE = struct_class('!d')          # big-endian double
+STRUCT_FLOAT = struct_class('<f')           # big-endian float
+STRUCT_DOUBLE = struct_class('<d')          # big-endian double
 STRUCT_CRC32 = struct_class('>I')           # big-endian unsigned int
 STRUCT_SIGNED_SHORT = struct_class('>h')    # big-endian signed short
 STRUCT_SIGNED_INT = struct_class('>i')      # big-endian signed int
 STRUCT_SIGNED_LONG = struct_class('>q')     # big-endian signed long
 
+
 #
 # Exceptions
 #
@@ -210,11 +209,7 @@ class BinaryDecoder(object):
     The float is converted into a 32-bit integer using a method equivalent to
     Java's floatToIntBits and then encoded in little-endian format.
     """
-    bits = (((ord(self.read(1)) & 0xffL)) |
-      ((ord(self.read(1)) & 0xffL) <<  8) |
-      ((ord(self.read(1)) & 0xffL) << 16) |
-      ((ord(self.read(1)) & 0xffL) << 24))
-    return STRUCT_FLOAT.unpack(STRUCT_INT.pack(bits))[0]
+    return STRUCT_FLOAT.unpack(self.read(4))[0]
 
   def read_double(self):
     """
@@ -222,15 +217,7 @@ class BinaryDecoder(object):
     The double is converted into a 64-bit integer using a method equivalent to
     Java's doubleToLongBits and then encoded in little-endian format.
     """
-    bits = (((ord(self.read(1)) & 0xffL)) |
-      ((ord(self.read(1)) & 0xffL) <<  8) |
-      ((ord(self.read(1)) & 0xffL) << 16) |
-      ((ord(self.read(1)) & 0xffL) << 24) |
-      ((ord(self.read(1)) & 0xffL) << 32) |
-      ((ord(self.read(1)) & 0xffL) << 40) |
-      ((ord(self.read(1)) & 0xffL) << 48) |
-      ((ord(self.read(1)) & 0xffL) << 56))
-    return STRUCT_DOUBLE.unpack(STRUCT_LONG.pack(bits))[0]
+    return STRUCT_DOUBLE.unpack(self.read(8))[0]
 
   def read_decimal_from_bytes(self, precision, scale):
     """
@@ -367,11 +354,7 @@ class BinaryEncoder(object):
     The float is converted into a 32-bit integer using a method equivalent to
     Java's floatToIntBits and then encoded in little-endian format.
     """
-    bits = STRUCT_INT.unpack(STRUCT_FLOAT.pack(datum))[0]
-    self.write(chr((bits) & 0xFF))
-    self.write(chr((bits >> 8) & 0xFF))
-    self.write(chr((bits >> 16) & 0xFF))
-    self.write(chr((bits >> 24) & 0xFF))
+    self.write(STRUCT_FLOAT.pack(datum))
 
   def write_double(self, datum):
     """
@@ -379,15 +362,7 @@ class BinaryEncoder(object):
     The double is converted into a 64-bit integer using a method equivalent to
     Java's doubleToLongBits and then encoded in little-endian format.
     """
-    bits = STRUCT_LONG.unpack(STRUCT_DOUBLE.pack(datum))[0]
-    self.write(chr((bits) & 0xFF))
-    self.write(chr((bits >> 8) & 0xFF))
-    self.write(chr((bits >> 16) & 0xFF))
-    self.write(chr((bits >> 24) & 0xFF))
-    self.write(chr((bits >> 32) & 0xFF))
-    self.write(chr((bits >> 40) & 0xFF))
-    self.write(chr((bits >> 48) & 0xFF))
-    self.write(chr((bits >> 56) & 0xFF))
+    self.write(STRUCT_DOUBLE.pack(datum))
 
   def write_decimal_bytes(self, datum, scale):
     """
diff --git a/lang/py3/avro/io.py b/lang/py3/avro/io.py
index 7b56dff..db00be3 100644
--- a/lang/py3/avro/io.py
+++ b/lang/py3/avro/io.py
@@ -61,9 +61,8 @@ LONG_MIN_VALUE = -(1 << 63)
 LONG_MAX_VALUE = (1 << 63) - 1
 
 STRUCT_INT = struct.Struct('!I')     # big-endian unsigned int
-STRUCT_LONG = struct.Struct('!Q')    # big-endian unsigned long long
-STRUCT_FLOAT = struct.Struct('!f')   # big-endian float
-STRUCT_DOUBLE = struct.Struct('!d')  # big-endian double
+STRUCT_FLOAT = struct.Struct('<f')   # little-endian float
+STRUCT_DOUBLE = struct.Struct('<d')  # little-endian double
 STRUCT_CRC32 = struct.Struct('>I')   # big-endian unsigned int
 
 
@@ -219,11 +218,7 @@ class BinaryDecoder(object):
     The float is converted into a 32-bit integer using a method equivalent to
     Java's floatToIntBits and then encoded in little-endian format.
     """
-    bits = (((ord(self.read(1)) & 0xff)) |
-      ((ord(self.read(1)) & 0xff) <<  8) |
-      ((ord(self.read(1)) & 0xff) << 16) |
-      ((ord(self.read(1)) & 0xff) << 24))
-    return STRUCT_FLOAT.unpack(STRUCT_INT.pack(bits))[0]
+    return STRUCT_FLOAT.unpack(self.read(4))[0]
 
   def read_double(self):
     """
@@ -231,15 +226,7 @@ class BinaryDecoder(object):
     The double is converted into a 64-bit integer using a method equivalent to
     Java's doubleToLongBits and then encoded in little-endian format.
     """
-    bits = (((ord(self.read(1)) & 0xff)) |
-      ((ord(self.read(1)) & 0xff) <<  8) |
-      ((ord(self.read(1)) & 0xff) << 16) |
-      ((ord(self.read(1)) & 0xff) << 24) |
-      ((ord(self.read(1)) & 0xff) << 32) |
-      ((ord(self.read(1)) & 0xff) << 40) |
-      ((ord(self.read(1)) & 0xff) << 48) |
-      ((ord(self.read(1)) & 0xff) << 56))
-    return STRUCT_DOUBLE.unpack(STRUCT_LONG.pack(bits))[0]
+    return STRUCT_DOUBLE.unpack(self.read(8))[0]
 
   def read_bytes(self):
     """
@@ -362,11 +349,7 @@ class BinaryEncoder(object):
     The float is converted into a 32-bit integer using a method equivalent to
     Java's floatToIntBits and then encoded in little-endian format.
     """
-    bits = STRUCT_INT.unpack(STRUCT_FLOAT.pack(datum))[0]
-    self.WriteByte((bits) & 0xFF)
-    self.WriteByte((bits >> 8) & 0xFF)
-    self.WriteByte((bits >> 16) & 0xFF)
-    self.WriteByte((bits >> 24) & 0xFF)
+    self.write(STRUCT_FLOAT.pack(datum))
 
   def write_double(self, datum):
     """
@@ -374,15 +357,7 @@ class BinaryEncoder(object):
     The double is converted into a 64-bit integer using a method equivalent to
     Java's doubleToLongBits and then encoded in little-endian format.
     """
-    bits = STRUCT_LONG.unpack(STRUCT_DOUBLE.pack(datum))[0]
-    self.WriteByte((bits) & 0xFF)
-    self.WriteByte((bits >> 8) & 0xFF)
-    self.WriteByte((bits >> 16) & 0xFF)
-    self.WriteByte((bits >> 24) & 0xFF)
-    self.WriteByte((bits >> 32) & 0xFF)
-    self.WriteByte((bits >> 40) & 0xFF)
-    self.WriteByte((bits >> 48) & 0xFF)
-    self.WriteByte((bits >> 56) & 0xFF)
+    self.write(STRUCT_DOUBLE.pack(datum))
 
   def write_bytes(self, datum):
     """