You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by ko...@apache.org on 2020/04/28 18:13:44 UTC

[avro] branch master updated: AVRO-2387: Type Check Python (#814)

This is an automated email from the ASF dual-hosted git repository.

kojiromike pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/master by this push:
     new 7d37293  AVRO-2387: Type Check Python (#814)
7d37293 is described below

commit 7d372932c08e10ef04e060c138949dfd7c714616
Author: Michael A. Smith <mi...@smith-li.com>
AuthorDate: Tue Apr 28 14:13:31 2020 -0400

    AVRO-2387: Type Check Python (#814)
    
    * AVRO-2387: Type-Ignore Setuptools
    
    * AVRO-2387: Py3 Friendly av_bench
    
    * AVRO-2387: Py3 Friendly Sample HTTP Server
    
    * AVRO-2387: Fix a missed self parameter
    
    * AVRO-2387: Hard-Code TaskType Protocol Symbols for Mypy
    
    * AVRO-2387: Py3 Friendly avro.tool
    
    * AVRO-2387: TestProtocol.valid is not always a property
    
    * AVRO-2387: Correct Typing for avro.test.test_schema
    
    * AVRO-2387: Run Mypy via Tox
    
    * AVRO-2387: Fix urlparse calls
---
 lang/py/avro/test/av_bench.py             |  4 ++--
 lang/py/avro/test/sample_http_server.py   | 22 ++++++++++++---------
 lang/py/avro/test/test_protocol.py        |  3 ++-
 lang/py/avro/test/test_schema.py          | 25 +++++++++++++----------
 lang/py/avro/tether/tether_task.py        | 27 ++++++++++++-------------
 lang/py/avro/tether/tether_task_runner.py |  2 +-
 lang/py/avro/tool.py                      | 20 +++++++++++++------
 lang/py/mypy.ini                          | 33 +++++++++++++++++++++++++++++++
 lang/py/setup.py                          |  2 +-
 lang/py/tox.ini                           | 12 +++++++++++
 share/docker/Dockerfile                   |  1 +
 11 files changed, 107 insertions(+), 44 deletions(-)

diff --git a/lang/py/avro/test/av_bench.py b/lang/py/avro/test/av_bench.py
index 1e6a05d..96c95d9 100644
--- a/lang/py/avro/test/av_bench.py
+++ b/lang/py/avro/test/av_bench.py
@@ -19,10 +19,10 @@
 
 from __future__ import absolute_import, division, print_function
 
+import string
 import sys
 import time
 from random import choice, randint, sample
-from string import lowercase
 
 import avro.datafile
 import avro.io
@@ -31,7 +31,7 @@ import avro.schema
 types = ["A", "CNAME"]
 
 def rand_name():
-    return ''.join(sample(lowercase, 15))
+    return ''.join(sample(string.ascii_lowercase, 15))
 
 def rand_ip():
     return "%s.%s.%s.%s" %(randint(0,255), randint(0,255), randint(0,255), randint(0,255))
diff --git a/lang/py/avro/test/sample_http_server.py b/lang/py/avro/test/sample_http_server.py
index c680afb..4fa05c3 100644
--- a/lang/py/avro/test/sample_http_server.py
+++ b/lang/py/avro/test/sample_http_server.py
@@ -19,9 +19,13 @@
 
 from __future__ import absolute_import, division, print_function
 
-from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
+import avro.ipc
+import avro.protocol
 
-from avro import ipc, protocol
+try:
+  import BaseHTTPServer as http_server  # type: ignore
+except ImportError:
+  import http.server as http_server  # type: ignore
 
 MAIL_PROTOCOL_JSON = """\
 {"namespace": "example.proto",
@@ -49,12 +53,12 @@ MAIL_PROTOCOL_JSON = """\
  }
 }
 """
-MAIL_PROTOCOL = protocol.parse(MAIL_PROTOCOL_JSON)
+MAIL_PROTOCOL = avro.protocol.parse(MAIL_PROTOCOL_JSON)
 SERVER_ADDRESS = ('localhost', 9090)
 
-class MailResponder(ipc.Responder):
+class MailResponder(avro.ipc.Responder):
   def __init__(self):
-    ipc.Responder.__init__(self, MAIL_PROTOCOL)
+    avro.ipc.Responder.__init__(self, MAIL_PROTOCOL)
 
   def invoke(self, message, request):
     if message.name == 'send':
@@ -65,19 +69,19 @@ class MailResponder(ipc.Responder):
     elif message.name == 'replay':
       return 'replay'
 
-class MailHandler(BaseHTTPRequestHandler):
+class MailHandler(http_server.BaseHTTPRequestHandler):
   def do_POST(self):
     self.responder = MailResponder()
-    call_request_reader = ipc.FramedReader(self.rfile)
+    call_request_reader = avro.ipc.FramedReader(self.rfile)
     call_request = call_request_reader.read_framed_message()
     resp_body = self.responder.respond(call_request)
     self.send_response(200)
     self.send_header('Content-Type', 'avro/binary')
     self.end_headers()
-    resp_writer = ipc.FramedWriter(self.wfile)
+    resp_writer = avro.ipc.FramedWriter(self.wfile)
     resp_writer.write_framed_message(resp_body)
 
 if __name__ == '__main__':
-  mail_server = HTTPServer(SERVER_ADDRESS, MailHandler)
+  mail_server = http_server.HTTPServer(SERVER_ADDRESS, MailHandler)
   mail_server.allow_reuse_address = True
   mail_server.serve_forever()
diff --git a/lang/py/avro/test/test_protocol.py b/lang/py/avro/test/test_protocol.py
index 551c667..323ccfc 100644
--- a/lang/py/avro/test/test_protocol.py
+++ b/lang/py/avro/test/test_protocol.py
@@ -264,7 +264,8 @@ EXAMPLES = [HELLO_WORLD, ValidTestProtocol({
   }),
 ]
 
-VALID_EXAMPLES = [e for e in EXAMPLES if e.valid]
+VALID_EXAMPLES = [e for e in EXAMPLES if getattr(e, "valid", False)]
+
 
 class TestMisc(unittest.TestCase):
   def test_inner_namespace_set(self):
diff --git a/lang/py/avro/test/test_schema.py b/lang/py/avro/test/test_schema.py
index 9fcc0b6..38e8360 100644
--- a/lang/py/avro/test/test_schema.py
+++ b/lang/py/avro/test/test_schema.py
@@ -37,6 +37,11 @@ try:
 except NameError:
   basestring = (bytes, unicode)
 
+try:
+  from typing import List
+except ImportError:
+  pass
+
 
 class TestSchema(object):
   """A proxy for a schema string that provides useful test metadata."""
@@ -61,18 +66,17 @@ class ValidTestSchema(TestSchema):
   valid = True
 
 
-class InvalidTestSchema(ValidTestSchema):
+class InvalidTestSchema(TestSchema):
   """A proxy for an invalid schema string that provides useful test metadata."""
   valid = False
 
 
-PRIMITIVE_EXAMPLES = ([
-  InvalidTestSchema('"True"'),
-  InvalidTestSchema('True'),
-  InvalidTestSchema('{"no_type": "test"}'),
-  InvalidTestSchema('{"type": "panther"}'),
-] + [ValidTestSchema('"{}"'.format(t)) for t in schema.PRIMITIVE_TYPES]
-  + [ValidTestSchema({"type": t}) for t in schema.PRIMITIVE_TYPES])
+PRIMITIVE_EXAMPLES = [InvalidTestSchema('"True"')]  # type: List[TestSchema]
+PRIMITIVE_EXAMPLES.append(InvalidTestSchema('True'))
+PRIMITIVE_EXAMPLES.append(InvalidTestSchema('{"no_type": "test"}'))
+PRIMITIVE_EXAMPLES.append(InvalidTestSchema('{"type": "panther"}'))
+PRIMITIVE_EXAMPLES.extend([ValidTestSchema('"{}"'.format(t)) for t in schema.PRIMITIVE_TYPES])
+PRIMITIVE_EXAMPLES.extend([ValidTestSchema({"type": t}) for t in schema.PRIMITIVE_TYPES])
 
 FIXED_EXAMPLES = [
   ValidTestSchema({"type": "fixed", "name": "Test", "size": 1}),
@@ -314,8 +318,9 @@ EXAMPLES += TIMESTAMPMILLIS_LOGICAL_TYPE
 EXAMPLES += TIMESTAMPMICROS_LOGICAL_TYPE
 EXAMPLES += IGNORED_LOGICAL_TYPE
 
-VALID_EXAMPLES = [e for e in EXAMPLES if e.valid]
-INVALID_EXAMPLES = [e for e in EXAMPLES if not e.valid]
+VALID_EXAMPLES = [e for e in EXAMPLES if getattr(e, "valid", False)]
+INVALID_EXAMPLES = [e for e in EXAMPLES if not getattr(e, "valid", True)]
+
 
 class TestMisc(unittest.TestCase):
   """Miscellaneous tests for schema"""
diff --git a/lang/py/avro/tether/tether_task.py b/lang/py/avro/tether/tether_task.py
index ef17835..58b4aa6 100644
--- a/lang/py/avro/tether/tether_task.py
+++ b/lang/py/avro/tether/tether_task.py
@@ -48,11 +48,13 @@ if (inputProtocol is None):
   with open(pfile,'r') as hf:
     prototxt=hf.read()
 
-  inputProtocol=protocol.parse(prototxt)
+  inputProtocol = protocol.parse(prototxt)
 
   # use a named tuple to represent the tasktype enumeration
-  taskschema=inputProtocol.types_dict["TaskType"]
-  _ttype=collections.namedtuple("_tasktype",taskschema.symbols)
+  taskschema = inputProtocol.types_dict["TaskType"]
+  # Mypy cannot statically type check a dynamically constructed named tuple.
+  # Since InputProtocol.avpr is hard-coded here, we can hard-code the symbols.
+  _ttype = collections.namedtuple("_tasktype", ("MAP", "REDUCE"))
   TaskType=_ttype(*taskschema.symbols)
 
 if (outputProtocol is None):
@@ -229,7 +231,7 @@ class TetherTask(object):
     self.midCollector=None
     self.outCollector=None
 
-    self._partitions=None
+    self._partitions = None
 
     # cache a list of the fields used by the reducer as the keys
     # we need the fields to decide when we have finished processing all values for
@@ -339,18 +341,15 @@ class TetherTask(object):
       estr= traceback.format_exc()
       self.fail(estr)
 
-  def set_partitions(self,npartitions):
-
-    try:
-      self._partitions=npartitions
-    except Exception as e:
-      estr= traceback.format_exc()
-      self.fail(estr)
-
-  def get_partitions():
-    """ Return the number of map output partitions of this job."""
+  @property
+  def partitions(self):
+    """Return the number of map output partitions of this job."""
     return self._partitions
 
+  @partitions.setter
+  def partitions(self, npartitions):
+    self._partitions = npartitions
+
   def input(self,data,count):
     """ Recieve input from the server
 
diff --git a/lang/py/avro/tether/tether_task_runner.py b/lang/py/avro/tether/tether_task_runner.py
index b83f351..7cdb2a0 100644
--- a/lang/py/avro/tether/tether_task_runner.py
+++ b/lang/py/avro/tether/tether_task_runner.py
@@ -68,7 +68,7 @@ class TaskRunnerResponder(ipc.Responder):
       elif message.name=='partitions':
         self.log.info("TetherTaskRunner: Recieved partitions")
         try:
-          self.task.set_partitions(request["partitions"])
+          self.task.partitions = request["partitions"]
         except Exception as e:
           self.log.error("Exception occured while processing the partitions message: Message:\n"+traceback.format_exc())
           raise
diff --git a/lang/py/avro/tool.py b/lang/py/avro/tool.py
index 3b7311f..a2ee464 100644
--- a/lang/py/avro/tool.py
+++ b/lang/py/avro/tool.py
@@ -28,13 +28,21 @@ from __future__ import absolute_import, division, print_function
 import os.path
 import sys
 import threading
-import urlparse
 import warnings
-from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
 
 import avro.io
 from avro import datafile, ipc, protocol
 
+try:
+  import BaseHTTPServer as http_server  # type: ignore
+except ImportError:
+  import http.server as http_server  # type: ignore
+
+try:
+  from urllib.parse import urlparse
+except ImportError:
+  from urlparse import urlparse  # type: ignore
+
 
 class GenericResponder(ipc.Responder):
   def __init__(self, proto, msg, datum):
@@ -51,7 +59,7 @@ class GenericResponder(ipc.Responder):
       server_should_shutdown = True
       return self.datum
 
-class GenericHandler(BaseHTTPRequestHandler):
+class GenericHandler(http_server.BaseHTTPRequestHandler):
   def do_POST(self):
     self.responder = responder
     call_request_reader = ipc.FramedReader(self.rfile)
@@ -69,13 +77,13 @@ class GenericHandler(BaseHTTPRequestHandler):
       quitter.start()
 
 def run_server(uri, proto, msg, datum):
-  url_obj = urlparse.urlparse(uri)
+  url_obj = urlparse(uri)
   server_addr = (url_obj.hostname, url_obj.port)
   global responder
   global server_should_shutdown
   server_should_shutdown = False
   responder = GenericResponder(proto, msg, datum)
-  server = HTTPServer(server_addr, GenericHandler)
+  server = http_server.HTTPServer(server_addr, GenericHandler)
   print("Port: %s" % server.server_port)
   sys.stdout.flush()
   server.allow_reuse_address = True
@@ -83,7 +91,7 @@ def run_server(uri, proto, msg, datum):
   server.serve_forever()
 
 def send_message(uri, proto, msg, datum):
-  url_obj = urlparse.urlparse(uri)
+  url_obj = urlparse(uri)
   client = ipc.HTTPTransceiver(url_obj.hostname, url_obj.port)
   proto_json = open(proto, 'rb').read()
   requestor = ipc.Requestor(protocol.parse(proto_json), client)
diff --git a/lang/py/mypy.ini b/lang/py/mypy.ini
new file mode 100644
index 0000000..c0628e2
--- /dev/null
+++ b/lang/py/mypy.ini
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+[mypy]
+files =
+    avro
+warn_return_any = True
+warn_unused_configs = True
+warn_unreachable = True
+
+[mypy-snappy]
+ignore_missing_imports = True
+
+[mypy-zstandard]
+ignore_missing_imports = True
+
+[mypy-zope.interface]
+ignore_missing_imports = True
+
+[mypy-twisted.*]
+ignore_missing_imports = True
diff --git a/lang/py/setup.py b/lang/py/setup.py
index 34b26c5..4281431 100755
--- a/lang/py/setup.py
+++ b/lang/py/setup.py
@@ -25,7 +25,7 @@ import glob
 import os
 import subprocess
 
-import setuptools
+import setuptools  # type: ignore
 
 _HERE = os.path.dirname(os.path.abspath(__file__))
 _AVRO_DIR = os.path.join(_HERE, 'avro')
diff --git a/lang/py/tox.ini b/lang/py/tox.ini
index 87adcbc..dd22f91 100644
--- a/lang/py/tox.ini
+++ b/lang/py/tox.ini
@@ -17,7 +17,9 @@
 # Remember to run tox --skip-missing-interpreters
 # If you don't want to install all these interpreters.
 envlist =
+    # Fastest checks first
     lint
+    typechecks
     py27
     py35
     py36
@@ -26,6 +28,7 @@ envlist =
     pypy3.5
     pypy3.6
 
+
 [coverage:run]
 source =
   avro
@@ -62,6 +65,15 @@ commands =
     pycodestyle
 commands_post =
 
+[testenv:typechecks]
+deps =
+    coverage
+    mypy
+extras =
+    mypy
+commands =
+    mypy
+
 [tool:isort]
 line_length = 150
 known_third_party=zope
diff --git a/share/docker/Dockerfile b/share/docker/Dockerfile
index 50b296d..5dbe528 100644
--- a/share/docker/Dockerfile
+++ b/share/docker/Dockerfile
@@ -50,6 +50,7 @@ RUN apt-get -qqy update \
                                                  libsnappy1v5 \
                                                  make \
                                                  maven \
+                                                 mypy \
                                                  openjdk-8-jdk \
                                                  perl \
                                                  python \