You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by rs...@apache.org on 2021/12/20 17:54:33 UTC
[avro] branch branch-1.11 updated (bdded91 -> ad63664)
This is an automated email from the ASF dual-hosted git repository.
rskraba pushed a change to branch branch-1.11
in repository https://gitbox.apache.org/repos/asf/avro.git.
from bdded91 AVRO-3263: Fix warning in Perl encoder when validating a long field (#1422)
new ae7b140 AVRO-3252: Add hasattr check for mode attr in DataFileReader (#1406)
new 6aa3b0a AVRO-3721 Support Python 3.10; Fix Annotations (#1432)
new 9b2de4a AVRO-3218 Fix parsing of logicalType = decimal (#1350)
new ad63664 AVRO-3243: Workaround JDK-8161372 - perf issue in ConcurrentHashMap#computeIfAbsent() (#1392)
The 4 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
.github/workflows/test-lang-py.yml | 6 ++--
BUILD.md | 2 +-
.../java/org/apache/avro/reflect/ReflectData.java | 6 ++--
.../org/apache/avro/specific/SpecificData.java | 6 ++--
.../ClassValueCache.java => MapUtil.java} | 40 ++++++++++------------
.../org/apache/avro/grpc/ServiceDescriptor.java | 5 +--
lang/py/avro/__main__.py | 5 ++-
lang/py/avro/datafile.py | 14 ++++----
lang/py/avro/io.py | 14 ++++----
lang/py/avro/schema.py | 2 +-
lang/py/avro/test/test_schema.py | 4 +--
lang/py/setup.cfg | 1 +
lang/py/tox.ini | 1 +
share/docker/Dockerfile | 9 ++---
14 files changed, 63 insertions(+), 52 deletions(-)
copy lang/java/avro/src/main/java/org/apache/avro/util/{internal/ClassValueCache.java => MapUtil.java} (57%)
[avro] 01/04: AVRO-3252: Add hasattr check for mode attr in DataFileReader (#1406)
Posted by rs...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
rskraba pushed a commit to branch branch-1.11
in repository https://gitbox.apache.org/repos/asf/avro.git
commit ae7b140272165106412acc309787eb0da37642ee
Author: Chris Johns <ch...@ter0.net>
AuthorDate: Sat Dec 18 00:45:12 2021 +0000
AVRO-3252: Add hasattr check for mode attr in DataFileReader (#1406)
The DataFileWriter class constructor checks for the presence of a mode attribute before accessing it, but the DataFileReader does not. This means that it is possible to use the DataFileWriter to write data to in memory binary streams using io.BytesIO, but it is not possible to read from them in the same manner. This commit adds the same check to the DataFileReader class.
---
lang/py/avro/datafile.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lang/py/avro/datafile.py b/lang/py/avro/datafile.py
index 22bf2dd..11d9a3c 100644
--- a/lang/py/avro/datafile.py
+++ b/lang/py/avro/datafile.py
@@ -315,7 +315,7 @@ class DataFileReader(_DataFileMetadata):
# TODO(hammer): allow user to specify the encoder
def __init__(self, reader: IO[AnyStr], datum_reader: avro.io.DatumReader) -> None:
- if "b" not in reader.mode:
+ if hasattr(reader, "mode") and "b" not in reader.mode:
warnings.warn(avro.errors.AvroWarning(f"Reader binary data from a reader {reader!r} that's opened for text"))
bytes_reader = getattr(reader, "buffer", reader)
self._reader = bytes_reader
[avro] 03/04: AVRO-3218 Fix parsing of logicalType = decimal (#1350)
Posted by rs...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
rskraba pushed a commit to branch branch-1.11
in repository https://gitbox.apache.org/repos/asf/avro.git
commit 9b2de4ac2febea6f18c4f7877164027350224f91
Author: Vegard Solberg <39...@users.noreply.github.com>
AuthorDate: Sat Dec 18 02:59:46 2021 +0100
AVRO-3218 Fix parsing of logicalType = decimal (#1350)
Co-authored-by: Vegard Solberg <ve...@statnett.no>
---
lang/py/avro/schema.py | 2 +-
lang/py/avro/test/test_schema.py | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/lang/py/avro/schema.py b/lang/py/avro/schema.py
index 1d3b369..73371d5 100644
--- a/lang/py/avro/schema.py
+++ b/lang/py/avro/schema.py
@@ -1067,7 +1067,7 @@ def get_other_props(all_props: Mapping[str, object], reserved_props: Sequence[st
def make_bytes_decimal_schema(other_props):
"""Make a BytesDecimalSchema from just other_props."""
- return BytesDecimalSchema(other_props.get("precision"), other_props.get("scale", 0))
+ return BytesDecimalSchema(other_props.get("precision"), other_props.get("scale", 0), other_props)
def make_logical_schema(logical_type, type_, other_props):
diff --git a/lang/py/avro/test/test_schema.py b/lang/py/avro/test/test_schema.py
index 2015727..2542617 100644
--- a/lang/py/avro/test/test_schema.py
+++ b/lang/py/avro/test/test_schema.py
@@ -642,16 +642,16 @@ class TestMisc(unittest.TestCase):
}
)
- bytes_decimal_schema = ValidTestSchema({"type": "bytes", "logicalType": "decimal", "precision": 4})
-
fixed_decimal = fixed_decimal_schema.parse()
self.assertEqual(4, fixed_decimal.get_prop("precision"))
self.assertEqual(2, fixed_decimal.get_prop("scale"))
self.assertEqual(2, fixed_decimal.get_prop("size"))
+ bytes_decimal_schema = ValidTestSchema({"type": "bytes", "logicalType": "decimal", "precision": 4})
bytes_decimal = bytes_decimal_schema.parse()
self.assertEqual(4, bytes_decimal.get_prop("precision"))
self.assertEqual(0, bytes_decimal.get_prop("scale"))
+ self.assertEqual("decimal", bytes_decimal.get_prop("logicalType"))
def test_fixed_decimal_valid_max_precision(self):
# An 8 byte number can represent any 18 digit number.
[avro] 02/04: AVRO-3721 Support Python 3.10; Fix Annotations (#1432)
Posted by rs...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
rskraba pushed a commit to branch branch-1.11
in repository https://gitbox.apache.org/repos/asf/avro.git
commit 6aa3b0a1806bbd3e3ec92378e56403e03f6bf0f1
Author: Michael A. Smith <mi...@smith-li.com>
AuthorDate: Fri Dec 17 20:41:16 2021 -0500
AVRO-3721 Support Python 3.10; Fix Annotations (#1432)
* Add Python 3.10 to Tox to test the latest version of Python.
* Fix Type Checks
* Implement Github Actions for Python 3.10
* Add Python 3.10 in Dockerfile, BUILD.md, and setup.cfg.
---
.github/workflows/test-lang-py.yml | 6 ++++--
BUILD.md | 2 +-
lang/py/avro/__main__.py | 5 ++++-
lang/py/avro/datafile.py | 12 ++++++------
lang/py/avro/io.py | 14 +++++++-------
lang/py/setup.cfg | 1 +
lang/py/tox.ini | 1 +
share/docker/Dockerfile | 9 +++++----
8 files changed, 29 insertions(+), 21 deletions(-)
diff --git a/.github/workflows/test-lang-py.yml b/.github/workflows/test-lang-py.yml
index 19522c0..b8f9800 100644
--- a/.github/workflows/test-lang-py.yml
+++ b/.github/workflows/test-lang-py.yml
@@ -36,6 +36,7 @@ jobs:
fail-fast: false
matrix:
python:
+ - '3.10'
- '3.9'
- '3.8'
- '3.7'
@@ -64,11 +65,11 @@ jobs:
python3 -m pip install --upgrade pip setuptools tox-wheel
- name: Lint
- if: ${{ matrix.python == '3.9' }}
+ if: ${{ matrix.python == '3.10' }}
run: python3 -m tox -e lint
- name: Typechecks
- if: ${{ matrix.python == '3.9' }}
+ if: ${{ matrix.python == '3.10' }}
run: python3 -m tox -e typechecks
- name: Test
@@ -81,6 +82,7 @@ jobs:
fail-fast: false
matrix:
python:
+ - '3.10'
- '3.9'
- '3.8'
- '3.7'
diff --git a/BUILD.md b/BUILD.md
index c09994e..196456f 100644
--- a/BUILD.md
+++ b/BUILD.md
@@ -6,7 +6,7 @@ The following packages must be installed before Avro can be built:
- Java: JDK 1.8, Maven 3 or better, protobuf-compile
- PHP: php7, phpunit, php7-gmp
- - Python 3: 3.5 or greater
+ - Python 3: 3.6 or greater
- C: gcc, cmake, asciidoc, source-highlight, Jansson, pkg-config
- C++: cmake 3.7.2 or greater, g++, flex, bison, libboost-dev
- C#: .NET Core 2.2 SDK
diff --git a/lang/py/avro/__main__.py b/lang/py/avro/__main__.py
index 423de59..44fda88 100755
--- a/lang/py/avro/__main__.py
+++ b/lang/py/avro/__main__.py
@@ -171,7 +171,10 @@ def convert_union(value: str, field: avro.schema.Field) -> Union[int, float, str
def iter_csv(info: IO[AnyStr], schema: avro.schema.RecordSchema) -> Generator[Dict[str, object], None, None]:
header = [field.name for field in schema.fields]
- for row in csv.reader(getattr(i, "decode", lambda: i)() for i in info):
+ # If i is bytes, decode into a string.
+ # If i is a string, no need to decode.
+ csv_data = (cast(str, getattr(i, "decode", lambda: i)()) for i in info)
+ for row in csv.reader(csv_data):
values = [convert(v, f) for v, f in zip(row, schema.fields)]
yield dict(zip(header, values))
diff --git a/lang/py/avro/datafile.py b/lang/py/avro/datafile.py
index 11d9a3c..d39a911 100644
--- a/lang/py/avro/datafile.py
+++ b/lang/py/avro/datafile.py
@@ -160,7 +160,7 @@ class DataFileWriter(_DataFileMetadata):
_datum_writer: avro.io.DatumWriter
_encoder: avro.io.BinaryEncoder
_header_written: bool
- _writer: BinaryIO
+ _writer: IO[bytes]
block_count: int
sync_marker: bytes
@@ -170,7 +170,7 @@ class DataFileWriter(_DataFileMetadata):
"""If the schema is not present, presume we're appending."""
if hasattr(writer, "mode") and "b" not in writer.mode:
warnings.warn(avro.errors.AvroWarning(f"Writing binary data to a writer {writer!r} that's opened for text"))
- bytes_writer = getattr(writer, "buffer", writer)
+ bytes_writer = cast(IO[bytes], getattr(writer, "buffer", writer))
self._writer = bytes_writer
self._encoder = avro.io.BinaryEncoder(bytes_writer)
self._datum_writer = datum_writer
@@ -202,7 +202,7 @@ class DataFileWriter(_DataFileMetadata):
self.datum_writer.writers_schema = writers_schema
@property
- def writer(self) -> BinaryIO:
+ def writer(self) -> IO[bytes]:
return self._writer
@property
@@ -307,7 +307,7 @@ class DataFileReader(_DataFileMetadata):
_datum_reader: avro.io.DatumReader
_file_length: int
_raw_decoder: avro.io.BinaryDecoder
- _reader: BinaryIO
+ _reader: IO[bytes]
block_count: int
sync_marker: bytes
@@ -317,7 +317,7 @@ class DataFileReader(_DataFileMetadata):
def __init__(self, reader: IO[AnyStr], datum_reader: avro.io.DatumReader) -> None:
if hasattr(reader, "mode") and "b" not in reader.mode:
warnings.warn(avro.errors.AvroWarning(f"Reader binary data from a reader {reader!r} that's opened for text"))
- bytes_reader = getattr(reader, "buffer", reader)
+ bytes_reader = cast(IO[bytes], getattr(reader, "buffer", reader))
self._reader = bytes_reader
self._raw_decoder = avro.io.BinaryDecoder(bytes_reader)
self._datum_decoder = None # Maybe reset at every block.
@@ -337,7 +337,7 @@ class DataFileReader(_DataFileMetadata):
return self
@property
- def reader(self) -> BinaryIO:
+ def reader(self) -> IO[bytes]:
return self._reader
@property
diff --git a/lang/py/avro/io.py b/lang/py/avro/io.py
index d8b0f94..59628b0 100644
--- a/lang/py/avro/io.py
+++ b/lang/py/avro/io.py
@@ -90,7 +90,7 @@ import decimal
import struct
import warnings
from typing import (
- BinaryIO,
+ IO,
Deque,
Generator,
Iterable,
@@ -206,16 +206,16 @@ _ITERATORS["error"] = _ITERATORS["request"] = _ITERATORS["record"]
class BinaryDecoder:
"""Read leaf values."""
- _reader: BinaryIO
+ _reader: IO[bytes]
- def __init__(self, reader: BinaryIO) -> None:
+ def __init__(self, reader: IO[bytes]) -> None:
"""
reader is a Python object on which we can call read, seek, and tell.
"""
self._reader = reader
@property
- def reader(self) -> BinaryIO:
+ def reader(self) -> IO[bytes]:
return self._reader
def read(self, n: int) -> bytes:
@@ -410,16 +410,16 @@ class BinaryDecoder:
class BinaryEncoder:
"""Write leaf values."""
- _writer: BinaryIO
+ _writer: IO[bytes]
- def __init__(self, writer: BinaryIO) -> None:
+ def __init__(self, writer: IO[bytes]) -> None:
"""
writer is a Python object on which we can call write.
"""
self._writer = writer
@property
- def writer(self) -> BinaryIO:
+ def writer(self) -> IO[bytes]:
return self._writer
def write(self, datum: bytes) -> None:
diff --git a/lang/py/setup.cfg b/lang/py/setup.cfg
index 487b322..adbeb0f 100644
--- a/lang/py/setup.cfg
+++ b/lang/py/setup.cfg
@@ -37,6 +37,7 @@ classifiers =
Programming Language :: Python :: 3.7
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9
+ Programming Language :: Python :: 3.10
Development Status :: 5 - Production/Stable
[bdist_wheel]
diff --git a/lang/py/tox.ini b/lang/py/tox.ini
index bbd42d6..ab2ceaa 100644
--- a/lang/py/tox.ini
+++ b/lang/py/tox.ini
@@ -24,6 +24,7 @@ envlist =
py37
py38
py39
+ py310
pypy3.6
pypy3.7
diff --git a/share/docker/Dockerfile b/share/docker/Dockerfile
index 619861e..7b8ce55 100644
--- a/share/docker/Dockerfile
+++ b/share/docker/Dockerfile
@@ -66,7 +66,11 @@ RUN apt-get -qqy update \
valgrind \
vim \
wget \
- python3.6 python3.7 python3.8 python3.9 \
+ python3.6 \
+ python3.7 \
+ python3.8 \
+ python3.9 \
+ python3.10 \
&& apt-get -qqy clean
# Install PHP
@@ -194,7 +198,4 @@ RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --de
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
ENV PATH $JAVA_HOME/bin:$PATH
-
-RUN apt-get -qqy install python3.6 python3.7 python3.9
-
CMD ["/bin/bash", "-i"]
[avro] 04/04: AVRO-3243: Workaround JDK-8161372 - perf issue in ConcurrentHashMap#computeIfAbsent() (#1392)
Posted by rs...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
rskraba pushed a commit to branch branch-1.11
in repository https://gitbox.apache.org/repos/asf/avro.git
commit ad63664df02bf5bfb63a6a4b3fefccede0e19352
Author: Martin Grigorov <ma...@users.noreply.github.com>
AuthorDate: Mon Dec 20 19:47:14 2021 +0200
AVRO-3243: Workaround JDK-8161372 - perf issue in ConcurrentHashMap#computeIfAbsent() (#1392)
---
.../java/org/apache/avro/reflect/ReflectData.java | 6 ++-
.../org/apache/avro/specific/SpecificData.java | 6 ++-
.../main/java/org/apache/avro/util/MapUtil.java | 45 ++++++++++++++++++++++
.../org/apache/avro/grpc/ServiceDescriptor.java | 5 ++-
4 files changed, 56 insertions(+), 6 deletions(-)
diff --git a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java
index 4ead6b8..1af2581 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java
@@ -36,6 +36,7 @@ import org.apache.avro.io.DatumWriter;
import org.apache.avro.specific.FixedSize;
import org.apache.avro.specific.SpecificData;
import org.apache.avro.util.ClassUtils;
+import org.apache.avro.util.MapUtil;
import java.io.IOException;
import java.lang.annotation.Annotation;
@@ -63,6 +64,7 @@ import java.util.List;
import java.util.Map;
import java.util.WeakHashMap;
import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
/** Utilities to use existing Java classes and interfaces via reflection. */
public class ReflectData extends SpecificData {
@@ -826,11 +828,11 @@ public class ReflectData extends SpecificData {
}
}
- private static final Map<Class<?>, Field[]> FIELDS_CACHE = new ConcurrentHashMap<>();
+ private static final ConcurrentMap<Class<?>, Field[]> FIELDS_CACHE = new ConcurrentHashMap<>();
// Return of this class and its superclasses to serialize.
private static Field[] getCachedFields(Class<?> recordClass) {
- return FIELDS_CACHE.computeIfAbsent(recordClass, rc -> getFields(rc, true));
+ return MapUtil.computeIfAbsent(FIELDS_CACHE, recordClass, rc -> getFields(rc, true));
}
private static Field[] getFields(Class<?> recordClass, boolean excludeJava) {
diff --git a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificData.java b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificData.java
index 5b53939..8efd904 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificData.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificData.java
@@ -30,6 +30,7 @@ import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.EncoderFactory;
import org.apache.avro.util.ClassUtils;
+import org.apache.avro.util.MapUtil;
import org.apache.avro.util.internal.ClassValueCache;
import java.io.ObjectInput;
@@ -48,6 +49,7 @@ import java.util.Map;
import java.util.Set;
import java.util.WeakHashMap;
import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
import java.util.function.Function;
/** Utilities for generated Java classes and interfaces. */
@@ -228,7 +230,7 @@ public class SpecificData extends GenericData {
return (datum instanceof Enum) ? getSchema(datum.getClass()) : super.getEnumSchema(datum);
}
- private Map<String, Class> classCache = new ConcurrentHashMap<>();
+ private final ConcurrentMap<String, Class> classCache = new ConcurrentHashMap<>();
private static final Class NO_CLASS = new Object() {
}.getClass();
@@ -251,7 +253,7 @@ public class SpecificData extends GenericData {
String name = schema.getFullName();
if (name == null)
return null;
- Class<?> c = classCache.computeIfAbsent(name, n -> {
+ Class<?> c = MapUtil.computeIfAbsent(classCache, name, n -> {
try {
return ClassUtils.forName(getClassLoader(), getClassName(schema));
} catch (ClassNotFoundException e) {
diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/MapUtil.java b/lang/java/avro/src/main/java/org/apache/avro/util/MapUtil.java
new file mode 100644
index 0000000..1bdbfaf
--- /dev/null
+++ b/lang/java/avro/src/main/java/org/apache/avro/util/MapUtil.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.util;
+
+import java.util.concurrent.ConcurrentMap;
+import java.util.function.Function;
+
+public class MapUtil {
+
+ private MapUtil() {
+ super();
+ }
+
+ /**
+ * A temporary workaround for Java 8 specific performance issue JDK-8161372
+ * .<br>
+ * This class should be removed once we drop Java 8 support.
+ *
+ * @see <a href=
+ * "https://bugs.openjdk.java.net/browse/JDK-8161372">JDK-8161372</a>
+ */
+ public static <K, V> V computeIfAbsent(ConcurrentMap<K, V> map, K key, Function<K, V> mappingFunction) {
+ V value = map.get(key);
+ if (value != null) {
+ return value;
+ }
+ return map.computeIfAbsent(key, mappingFunction::apply);
+ }
+
+}
diff --git a/lang/java/grpc/src/main/java/org/apache/avro/grpc/ServiceDescriptor.java b/lang/java/grpc/src/main/java/org/apache/avro/grpc/ServiceDescriptor.java
index 0984473..bfb8ec2 100644
--- a/lang/java/grpc/src/main/java/org/apache/avro/grpc/ServiceDescriptor.java
+++ b/lang/java/grpc/src/main/java/org/apache/avro/grpc/ServiceDescriptor.java
@@ -24,6 +24,7 @@ import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import io.grpc.MethodDescriptor;
+import org.apache.avro.util.MapUtil;
import static io.grpc.MethodDescriptor.generateFullMethodName;
@@ -49,7 +50,7 @@ class ServiceDescriptor {
*/
public static ServiceDescriptor create(Class iface) {
String serviceName = AvroGrpcUtils.getServiceName(iface);
- return SERVICE_DESCRIPTORS.computeIfAbsent(serviceName, key -> new ServiceDescriptor(iface, serviceName));
+ return MapUtil.computeIfAbsent(SERVICE_DESCRIPTORS, serviceName, key -> new ServiceDescriptor(iface, serviceName));
}
/**
@@ -67,7 +68,7 @@ class ServiceDescriptor {
* @return a {@link MethodDescriptor}
*/
public MethodDescriptor<Object[], Object> getMethod(String methodName, MethodDescriptor.MethodType methodType) {
- return methods.computeIfAbsent(methodName,
+ return MapUtil.computeIfAbsent(methods, methodName,
key -> MethodDescriptor.<Object[], Object>newBuilder()
.setFullMethodName(generateFullMethodName(serviceName, methodName)).setType(methodType)
.setRequestMarshaller(new AvroRequestMarshaller(protocol.getMessages().get(methodName)))