You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by ko...@apache.org on 2020/04/28 18:13:44 UTC
[avro] branch master updated: AVRO-2387: Type Check Python (#814)
This is an automated email from the ASF dual-hosted git repository.
kojiromike pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/master by this push:
new 7d37293 AVRO-2387: Type Check Python (#814)
7d37293 is described below
commit 7d372932c08e10ef04e060c138949dfd7c714616
Author: Michael A. Smith <mi...@smith-li.com>
AuthorDate: Tue Apr 28 14:13:31 2020 -0400
AVRO-2387: Type Check Python (#814)
* AVRO-2387: Type-Ignore Setuptools
* AVRO-2387: Py3 Friendly av_bench
* AVRO-2387: Py3 Friendly Sample HTTP Server
* AVRO-2387: Fix a missed self parameter
* AVRO-2387: Hard-Code TaskType Protocol Symbols for Mypy
* AVRO-2387: Py3 Friendly avro.tool
* AVRO-2387: TestProtocol.valid is not always a property
* AVRO-2387: Correct Typing for avro.test.test_schema
* AVRO-2387: Run Mypy via Tox
* AVRO-2387: Fix urlparse calls
---
lang/py/avro/test/av_bench.py | 4 ++--
lang/py/avro/test/sample_http_server.py | 22 ++++++++++++---------
lang/py/avro/test/test_protocol.py | 3 ++-
lang/py/avro/test/test_schema.py | 25 +++++++++++++----------
lang/py/avro/tether/tether_task.py | 27 ++++++++++++-------------
lang/py/avro/tether/tether_task_runner.py | 2 +-
lang/py/avro/tool.py | 20 +++++++++++++------
lang/py/mypy.ini | 33 +++++++++++++++++++++++++++++++
lang/py/setup.py | 2 +-
lang/py/tox.ini | 12 +++++++++++
share/docker/Dockerfile | 1 +
11 files changed, 107 insertions(+), 44 deletions(-)
diff --git a/lang/py/avro/test/av_bench.py b/lang/py/avro/test/av_bench.py
index 1e6a05d..96c95d9 100644
--- a/lang/py/avro/test/av_bench.py
+++ b/lang/py/avro/test/av_bench.py
@@ -19,10 +19,10 @@
from __future__ import absolute_import, division, print_function
+import string
import sys
import time
from random import choice, randint, sample
-from string import lowercase
import avro.datafile
import avro.io
@@ -31,7 +31,7 @@ import avro.schema
types = ["A", "CNAME"]
def rand_name():
- return ''.join(sample(lowercase, 15))
+ return ''.join(sample(string.ascii_lowercase, 15))
def rand_ip():
return "%s.%s.%s.%s" %(randint(0,255), randint(0,255), randint(0,255), randint(0,255))
diff --git a/lang/py/avro/test/sample_http_server.py b/lang/py/avro/test/sample_http_server.py
index c680afb..4fa05c3 100644
--- a/lang/py/avro/test/sample_http_server.py
+++ b/lang/py/avro/test/sample_http_server.py
@@ -19,9 +19,13 @@
from __future__ import absolute_import, division, print_function
-from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
+import avro.ipc
+import avro.protocol
-from avro import ipc, protocol
+try:
+ import BaseHTTPServer as http_server # type: ignore
+except ImportError:
+ import http.server as http_server # type: ignore
MAIL_PROTOCOL_JSON = """\
{"namespace": "example.proto",
@@ -49,12 +53,12 @@ MAIL_PROTOCOL_JSON = """\
}
}
"""
-MAIL_PROTOCOL = protocol.parse(MAIL_PROTOCOL_JSON)
+MAIL_PROTOCOL = avro.protocol.parse(MAIL_PROTOCOL_JSON)
SERVER_ADDRESS = ('localhost', 9090)
-class MailResponder(ipc.Responder):
+class MailResponder(avro.ipc.Responder):
def __init__(self):
- ipc.Responder.__init__(self, MAIL_PROTOCOL)
+ avro.ipc.Responder.__init__(self, MAIL_PROTOCOL)
def invoke(self, message, request):
if message.name == 'send':
@@ -65,19 +69,19 @@ class MailResponder(ipc.Responder):
elif message.name == 'replay':
return 'replay'
-class MailHandler(BaseHTTPRequestHandler):
+class MailHandler(http_server.BaseHTTPRequestHandler):
def do_POST(self):
self.responder = MailResponder()
- call_request_reader = ipc.FramedReader(self.rfile)
+ call_request_reader = avro.ipc.FramedReader(self.rfile)
call_request = call_request_reader.read_framed_message()
resp_body = self.responder.respond(call_request)
self.send_response(200)
self.send_header('Content-Type', 'avro/binary')
self.end_headers()
- resp_writer = ipc.FramedWriter(self.wfile)
+ resp_writer = avro.ipc.FramedWriter(self.wfile)
resp_writer.write_framed_message(resp_body)
if __name__ == '__main__':
- mail_server = HTTPServer(SERVER_ADDRESS, MailHandler)
+ mail_server = http_server.HTTPServer(SERVER_ADDRESS, MailHandler)
mail_server.allow_reuse_address = True
mail_server.serve_forever()
diff --git a/lang/py/avro/test/test_protocol.py b/lang/py/avro/test/test_protocol.py
index 551c667..323ccfc 100644
--- a/lang/py/avro/test/test_protocol.py
+++ b/lang/py/avro/test/test_protocol.py
@@ -264,7 +264,8 @@ EXAMPLES = [HELLO_WORLD, ValidTestProtocol({
}),
]
-VALID_EXAMPLES = [e for e in EXAMPLES if e.valid]
+VALID_EXAMPLES = [e for e in EXAMPLES if getattr(e, "valid", False)]
+
class TestMisc(unittest.TestCase):
def test_inner_namespace_set(self):
diff --git a/lang/py/avro/test/test_schema.py b/lang/py/avro/test/test_schema.py
index 9fcc0b6..38e8360 100644
--- a/lang/py/avro/test/test_schema.py
+++ b/lang/py/avro/test/test_schema.py
@@ -37,6 +37,11 @@ try:
except NameError:
basestring = (bytes, unicode)
+try:
+ from typing import List
+except ImportError:
+ pass
+
class TestSchema(object):
"""A proxy for a schema string that provides useful test metadata."""
@@ -61,18 +66,17 @@ class ValidTestSchema(TestSchema):
valid = True
-class InvalidTestSchema(ValidTestSchema):
+class InvalidTestSchema(TestSchema):
"""A proxy for an invalid schema string that provides useful test metadata."""
valid = False
-PRIMITIVE_EXAMPLES = ([
- InvalidTestSchema('"True"'),
- InvalidTestSchema('True'),
- InvalidTestSchema('{"no_type": "test"}'),
- InvalidTestSchema('{"type": "panther"}'),
-] + [ValidTestSchema('"{}"'.format(t)) for t in schema.PRIMITIVE_TYPES]
- + [ValidTestSchema({"type": t}) for t in schema.PRIMITIVE_TYPES])
+PRIMITIVE_EXAMPLES = [InvalidTestSchema('"True"')] # type: List[TestSchema]
+PRIMITIVE_EXAMPLES.append(InvalidTestSchema('True'))
+PRIMITIVE_EXAMPLES.append(InvalidTestSchema('{"no_type": "test"}'))
+PRIMITIVE_EXAMPLES.append(InvalidTestSchema('{"type": "panther"}'))
+PRIMITIVE_EXAMPLES.extend([ValidTestSchema('"{}"'.format(t)) for t in schema.PRIMITIVE_TYPES])
+PRIMITIVE_EXAMPLES.extend([ValidTestSchema({"type": t}) for t in schema.PRIMITIVE_TYPES])
FIXED_EXAMPLES = [
ValidTestSchema({"type": "fixed", "name": "Test", "size": 1}),
@@ -314,8 +318,9 @@ EXAMPLES += TIMESTAMPMILLIS_LOGICAL_TYPE
EXAMPLES += TIMESTAMPMICROS_LOGICAL_TYPE
EXAMPLES += IGNORED_LOGICAL_TYPE
-VALID_EXAMPLES = [e for e in EXAMPLES if e.valid]
-INVALID_EXAMPLES = [e for e in EXAMPLES if not e.valid]
+VALID_EXAMPLES = [e for e in EXAMPLES if getattr(e, "valid", False)]
+INVALID_EXAMPLES = [e for e in EXAMPLES if not getattr(e, "valid", True)]
+
class TestMisc(unittest.TestCase):
"""Miscellaneous tests for schema"""
diff --git a/lang/py/avro/tether/tether_task.py b/lang/py/avro/tether/tether_task.py
index ef17835..58b4aa6 100644
--- a/lang/py/avro/tether/tether_task.py
+++ b/lang/py/avro/tether/tether_task.py
@@ -48,11 +48,13 @@ if (inputProtocol is None):
with open(pfile,'r') as hf:
prototxt=hf.read()
- inputProtocol=protocol.parse(prototxt)
+ inputProtocol = protocol.parse(prototxt)
# use a named tuple to represent the tasktype enumeration
- taskschema=inputProtocol.types_dict["TaskType"]
- _ttype=collections.namedtuple("_tasktype",taskschema.symbols)
+ taskschema = inputProtocol.types_dict["TaskType"]
+ # Mypy cannot statically type check a dynamically constructed named tuple.
+ # Since InputProtocol.avpr is hard-coded here, we can hard-code the symbols.
+ _ttype = collections.namedtuple("_tasktype", ("MAP", "REDUCE"))
TaskType=_ttype(*taskschema.symbols)
if (outputProtocol is None):
@@ -229,7 +231,7 @@ class TetherTask(object):
self.midCollector=None
self.outCollector=None
- self._partitions=None
+ self._partitions = None
# cache a list of the fields used by the reducer as the keys
# we need the fields to decide when we have finished processing all values for
@@ -339,18 +341,15 @@ class TetherTask(object):
estr= traceback.format_exc()
self.fail(estr)
- def set_partitions(self,npartitions):
-
- try:
- self._partitions=npartitions
- except Exception as e:
- estr= traceback.format_exc()
- self.fail(estr)
-
- def get_partitions():
- """ Return the number of map output partitions of this job."""
+ @property
+ def partitions(self):
+ """Return the number of map output partitions of this job."""
return self._partitions
+ @partitions.setter
+ def partitions(self, npartitions):
+ self._partitions = npartitions
+
def input(self,data,count):
""" Recieve input from the server
diff --git a/lang/py/avro/tether/tether_task_runner.py b/lang/py/avro/tether/tether_task_runner.py
index b83f351..7cdb2a0 100644
--- a/lang/py/avro/tether/tether_task_runner.py
+++ b/lang/py/avro/tether/tether_task_runner.py
@@ -68,7 +68,7 @@ class TaskRunnerResponder(ipc.Responder):
elif message.name=='partitions':
self.log.info("TetherTaskRunner: Recieved partitions")
try:
- self.task.set_partitions(request["partitions"])
+ self.task.partitions = request["partitions"]
except Exception as e:
self.log.error("Exception occured while processing the partitions message: Message:\n"+traceback.format_exc())
raise
diff --git a/lang/py/avro/tool.py b/lang/py/avro/tool.py
index 3b7311f..a2ee464 100644
--- a/lang/py/avro/tool.py
+++ b/lang/py/avro/tool.py
@@ -28,13 +28,21 @@ from __future__ import absolute_import, division, print_function
import os.path
import sys
import threading
-import urlparse
import warnings
-from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
import avro.io
from avro import datafile, ipc, protocol
+try:
+ import BaseHTTPServer as http_server # type: ignore
+except ImportError:
+ import http.server as http_server # type: ignore
+
+try:
+ from urllib.parse import urlparse
+except ImportError:
+ from urlparse import urlparse # type: ignore
+
class GenericResponder(ipc.Responder):
def __init__(self, proto, msg, datum):
@@ -51,7 +59,7 @@ class GenericResponder(ipc.Responder):
server_should_shutdown = True
return self.datum
-class GenericHandler(BaseHTTPRequestHandler):
+class GenericHandler(http_server.BaseHTTPRequestHandler):
def do_POST(self):
self.responder = responder
call_request_reader = ipc.FramedReader(self.rfile)
@@ -69,13 +77,13 @@ class GenericHandler(BaseHTTPRequestHandler):
quitter.start()
def run_server(uri, proto, msg, datum):
- url_obj = urlparse.urlparse(uri)
+ url_obj = urlparse(uri)
server_addr = (url_obj.hostname, url_obj.port)
global responder
global server_should_shutdown
server_should_shutdown = False
responder = GenericResponder(proto, msg, datum)
- server = HTTPServer(server_addr, GenericHandler)
+ server = http_server.HTTPServer(server_addr, GenericHandler)
print("Port: %s" % server.server_port)
sys.stdout.flush()
server.allow_reuse_address = True
@@ -83,7 +91,7 @@ def run_server(uri, proto, msg, datum):
server.serve_forever()
def send_message(uri, proto, msg, datum):
- url_obj = urlparse.urlparse(uri)
+ url_obj = urlparse(uri)
client = ipc.HTTPTransceiver(url_obj.hostname, url_obj.port)
proto_json = open(proto, 'rb').read()
requestor = ipc.Requestor(protocol.parse(proto_json), client)
diff --git a/lang/py/mypy.ini b/lang/py/mypy.ini
new file mode 100644
index 0000000..c0628e2
--- /dev/null
+++ b/lang/py/mypy.ini
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+[mypy]
+files =
+ avro
+warn_return_any = True
+warn_unused_configs = True
+warn_unreachable = True
+
+[mypy-snappy]
+ignore_missing_imports = True
+
+[mypy-zstandard]
+ignore_missing_imports = True
+
+[mypy-zope.interface]
+ignore_missing_imports = True
+
+[mypy-twisted.*]
+ignore_missing_imports = True
diff --git a/lang/py/setup.py b/lang/py/setup.py
index 34b26c5..4281431 100755
--- a/lang/py/setup.py
+++ b/lang/py/setup.py
@@ -25,7 +25,7 @@ import glob
import os
import subprocess
-import setuptools
+import setuptools # type: ignore
_HERE = os.path.dirname(os.path.abspath(__file__))
_AVRO_DIR = os.path.join(_HERE, 'avro')
diff --git a/lang/py/tox.ini b/lang/py/tox.ini
index 87adcbc..dd22f91 100644
--- a/lang/py/tox.ini
+++ b/lang/py/tox.ini
@@ -17,7 +17,9 @@
# Remember to run tox --skip-missing-interpreters
# If you don't want to install all these interpreters.
envlist =
+ # Fastest checks first
lint
+ typechecks
py27
py35
py36
@@ -26,6 +28,7 @@ envlist =
pypy3.5
pypy3.6
+
[coverage:run]
source =
avro
@@ -62,6 +65,15 @@ commands =
pycodestyle
commands_post =
+[testenv:typechecks]
+deps =
+ coverage
+ mypy
+extras =
+ mypy
+commands =
+ mypy
+
[tool:isort]
line_length = 150
known_third_party=zope
diff --git a/share/docker/Dockerfile b/share/docker/Dockerfile
index 50b296d..5dbe528 100644
--- a/share/docker/Dockerfile
+++ b/share/docker/Dockerfile
@@ -50,6 +50,7 @@ RUN apt-get -qqy update \
libsnappy1v5 \
make \
maven \
+ mypy \
openjdk-8-jdk \
perl \
python \