You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by rv...@apache.org on 2015/09/22 21:14:14 UTC

[11/35] incubator-hawq git commit: SGA import. Now with files previously missing because of the .gitignore issue

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/ext/pg8000/types.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/pg8000/types.py b/tools/bin/ext/pg8000/types.py
new file mode 100644
index 0000000..c622a7d
--- /dev/null
+++ b/tools/bin/ext/pg8000/types.py
@@ -0,0 +1,687 @@
+# vim: sw=4:expandtab:foldmethod=marker
+#
+# Copyright (c) 2007-2009, Mathieu Fenniak
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+__author__ = "Mathieu Fenniak"
+
+import datetime
+import decimal
+import struct
+import math
+from errors import (NotSupportedError, ArrayDataParseError, InternalError,
+        ArrayContentEmptyError, ArrayContentNotHomogenousError,
+        ArrayContentNotSupportedError, ArrayDimensionsNotConsistentError)
+
+try:
+    from pytz import utc
+except ImportError:
+    ZERO = datetime.timedelta(0)
+    class UTC(datetime.tzinfo):
+        def utcoffset(self, dt):
+            return ZERO
+        def tzname(self, dt):
+            return "UTC"
+        def dst(self, dt):
+            return ZERO
+    utc = UTC()
+
+class Bytea(str):
+    pass
+
+class Interval(object):
+    def __init__(self, microseconds=0, days=0, months=0):
+        self.microseconds = microseconds
+        self.days = days
+        self.months = months
+
+    def _setMicroseconds(self, value):
+        if not isinstance(value, int) and not isinstance(value, long):
+            raise TypeError("microseconds must be an int or long")
+        elif not (min_int8 < value < max_int8):
+            raise OverflowError("microseconds must be representable as a 64-bit integer")
+        else:
+            self._microseconds = value
+
+    def _setDays(self, value):
+        if not isinstance(value, int) and not isinstance(value, long):
+            raise TypeError("days must be an int or long")
+        elif not (min_int4 < value < max_int4):
+            raise OverflowError("days must be representable as a 32-bit integer")
+        else:
+            self._days = value
+
+    def _setMonths(self, value):
+        if not isinstance(value, int) and not isinstance(value, long):
+            raise TypeError("months must be an int or long")
+        elif not (min_int4 < value < max_int4):
+            raise OverflowError("months must be representable as a 32-bit integer")
+        else:
+            self._months = value
+
+    microseconds = property(lambda self: self._microseconds, _setMicroseconds)
+    days = property(lambda self: self._days, _setDays)
+    months = property(lambda self: self._months, _setMonths)
+
+    def __repr__(self):
+        return "<Interval %s months %s days %s microseconds>" % (self.months, self.days, self.microseconds)
+
+    def __cmp__(self, other):
+        if other == None: return -1
+        c = cmp(self.months, other.months)
+        if c != 0: return c
+        c = cmp(self.days, other.days)
+        if c != 0: return c
+        return cmp(self.microseconds, other.microseconds)
+
+def pg_type_info(typ):
+    value = None
+    if isinstance(typ, dict):
+        value = typ["value"]
+        typ = typ["type"]
+
+    data = py_types.get(typ)
+    if data == None:
+        raise NotSupportedError("type %r not mapped to pg type" % typ)
+
+    # permit the type data to be determined by the value, if provided
+    inspect_func = data.get("inspect")
+    if value != None and inspect_func != None:
+        data = inspect_func(value)
+
+    type_oid = data.get("typeoid")
+    if type_oid == None:
+        raise InternalError("type %r has no type_oid" % typ)
+    elif type_oid == -1:
+        # special case: NULL values
+        return type_oid, 0
+
+    # prefer bin, but go with whatever exists
+    if data.get("bin_out"):
+        format = 1
+    elif data.get("txt_out"):
+        format = 0
+    else:
+        raise InternalError("no conversion fuction for type %r" % typ)
+
+    return type_oid, format
+
+def pg_value(value, fc, **kwargs):
+    typ = type(value)
+    data = py_types.get(typ)
+    if data == None:
+        raise NotSupportedError("type %r not mapped to pg type" % typ)
+
+    # permit the type conversion to be determined by the value, if provided
+    inspect_func = data.get("inspect")
+    if value != None and inspect_func != None:
+        data = inspect_func(value)
+
+    # special case: NULL values
+    if data.get("typeoid") == -1:
+        return None
+
+    if fc == 0:
+        func = data.get("txt_out")
+    elif fc == 1:
+        func = data.get("bin_out")
+    else:
+        raise InternalError("unrecognized format code %r" % fc)
+    if func == None:
+        raise NotSupportedError("type %r, format code %r not supported" % (typ, fc))
+    return func(value, **kwargs)
+
+def py_type_info(description, record_field_names):
+    type_oid = description['type_oid']
+    data = pg_types.get(type_oid)
+    if data == None:
+        record_data = record_field_names.get(type_oid)
+        if record_data != None:
+            # records are in bin format
+            return 1
+        raise NotSupportedError("type oid %r not mapped to py type" % type_oid)
+    # prefer bin, but go with whatever exists
+    if data.get("bin_in"):
+        format = 1
+    elif data.get("txt_in"):
+        format = 0
+    else:
+        raise InternalError("no conversion fuction for type oid %r" % type_oid)
+    return format
+
+def py_value(v, description, record_field_names, **kwargs):
+    if v == None:
+        # special case - NULL value
+        return None
+    type_oid = description['type_oid']
+    format = description['format']
+    data = pg_types.get(type_oid)
+    if data == None:
+        record_data = record_field_names.get(type_oid)
+        if record_data != None:
+            data = {"bin_in": record_recv(record_data)}
+    if data == None:
+        raise NotSupportedError("type oid %r not supported" % type_oid)
+    if format == 0:
+        func = data.get("txt_in")
+    elif format == 1:
+        func = data.get("bin_in")
+    else:
+        raise NotSupportedError("format code %r not supported" % format)
+    if func == None:
+        raise NotSupportedError("data response format %r, type %r not supported" % (format, type_oid))
+    return func(v, **kwargs)
+
+def voidrecv(data, **kwargs):
+    return None
+
+def voidsend(v, **kwargs):
+    return None
+
+def boolrecv(data, **kwargs):
+    return data == "\x01"
+
+def boolsend(v, **kwargs):
+    if v:
+        return "\x01"
+    else:
+        return "\x00"
+
+min_int2, max_int2 = -2 ** 15, 2 ** 15
+min_int4, max_int4 = -2 ** 31, 2 ** 31
+min_int8, max_int8 = -2 ** 63, 2 ** 63
+
+def int_inspect(value):
+    if min_int2 < value < max_int2:
+        return {"typeoid": 21, "bin_out": int2send}
+    elif min_int4 < value < max_int4:
+        return {"typeoid": 23, "bin_out": int4send}
+    elif min_int8 < value < max_int8:
+        return {"typeoid": 20, "bin_out": int8send}
+    else:
+        return {"typeoid": 1700, "bin_out": numeric_send}
+
+def int2recv(data, **kwargs):
+    return struct.unpack("!h", data)[0]
+
+def int2send(v, **kwargs):
+    return struct.pack("!h", v)
+
+def int4recv(data, **kwargs):
+    return struct.unpack("!i", data)[0]
+
+def int4send(v, **kwargs):
+    return struct.pack("!i", v)
+
+def int8recv(data, **kwargs):
+    return struct.unpack("!q", data)[0]
+
+def int8send(v, **kwargs):
+    return struct.pack("!q", v)
+
+def float4recv(data, **kwargs):
+    return struct.unpack("!f", data)[0]
+
+def float8recv(data, **kwargs):
+    return struct.unpack("!d", data)[0]
+
+def float8send(v, **kwargs):
+    return struct.pack("!d", v)
+
+def datetime_inspect(value):
+    if value.tzinfo != None:
+        # send as timestamptz if timezone is provided
+        return {"typeoid": 1184, "bin_out": timestamptz_send}
+    else:
+        # otherwise send as timestamp
+        return {"typeoid": 1114, "bin_out": timestamp_send}
+
+def timestamp_recv(data, integer_datetimes, **kwargs):
+    if integer_datetimes:
+        # data is 64-bit integer representing milliseconds since 2000-01-01
+        val = struct.unpack("!q", data)[0]
+        return datetime.datetime(2000, 1, 1) + datetime.timedelta(microseconds = val)
+    else:
+        # data is double-precision float representing seconds since 2000-01-01
+        val = struct.unpack("!d", data)[0]
+        return datetime.datetime(2000, 1, 1) + datetime.timedelta(seconds = val)
+
+# return a timezone-aware datetime instance if we're reading from a
+# "timestamp with timezone" type.  The timezone returned will always be UTC,
+# but providing that additional information can permit conversion to local.
+def timestamptz_recv(data, **kwargs):
+    return timestamp_recv(data, **kwargs).replace(tzinfo=utc)
+
+def timestamp_send(v, integer_datetimes, **kwargs):
+    delta = v - datetime.datetime(2000, 1, 1)
+    val = delta.microseconds + (delta.seconds * 1000000) + (delta.days * 86400000000)
+    if integer_datetimes:
+        # data is 64-bit integer representing milliseconds since 2000-01-01
+        return struct.pack("!q", val)
+    else:
+        # data is double-precision float representing seconds since 2000-01-01
+        return struct.pack("!d", val / 1000.0 / 1000.0)
+
+def timestamptz_send(v, **kwargs):
+    # timestamps should be sent as UTC.  If they have zone info,
+    # convert them.
+    return timestamp_send(v.astimezone(utc).replace(tzinfo=None), **kwargs)
+
+def date_in(data, **kwargs):
+    year = int(data[0:4])
+    month = int(data[5:7])
+    day = int(data[8:10])
+    return datetime.date(year, month, day)
+
+def date_out(v, **kwargs):
+    return v.isoformat()
+
+def time_in(data, **kwargs):
+    hour = int(data[0:2])
+    minute = int(data[3:5])
+    sec = decimal.Decimal(data[6:])
+    return datetime.time(hour, minute, int(sec), int((sec - int(sec)) * 1000000))
+
+def time_out(v, **kwargs):
+    return v.isoformat()
+
+def numeric_in(data, **kwargs):
+    if data.find(".") == -1:
+        return int(data)
+    else:
+        return decimal.Decimal(data)
+
+def numeric_recv(data, **kwargs):
+    num_digits, weight, sign, scale = struct.unpack("!hhhh", data[:8])
+    data = data[8:]
+    digits = struct.unpack("!" + ("h" * num_digits), data)
+    weight = decimal.Decimal(weight)
+    retval = 0
+    for d in digits:
+        d = decimal.Decimal(d)
+        retval += d * (10000 ** weight)
+        weight -= 1
+    if sign:
+        retval *= -1
+    return retval
+
+def numeric_send(v, **kwargs):
+    sign = 0
+    if v < 0:
+        sign = 16384
+        v *= -1
+    max_weight = decimal.Decimal(int(math.floor(math.log(v) / math.log(10000))))
+    weight = max_weight
+    digits = []
+    while v != 0:
+        digit = int(math.floor(v / (10000 ** weight)))
+        v = v - (digit * (10000 ** weight))
+        weight -= 1
+        digits.append(digit)
+    retval = struct.pack("!hhhh", len(digits), max_weight, sign, 0)
+    retval += struct.pack("!" + ("h" * len(digits)), *digits)
+    return retval
+
+def numeric_out(v, **kwargs):
+    return str(v)
+
+# PostgreSQL encodings:
+#   http://www.postgresql.org/docs/8.3/interactive/multibyte.html
+# Python encodings:
+#   http://www.python.org/doc/2.4/lib/standard-encodings.html
+#
+# Commented out encodings don't require a name change between PostgreSQL and
+# Python.  If the py side is None, then the encoding isn't supported.
+pg_to_py_encodings = {
+    # Not supported:
+    "mule_internal": None,
+    "euc_tw": None,
+
+    # Name fine as-is:
+    #"euc_jp",
+    #"euc_jis_2004",
+    #"euc_kr",
+    #"gb18030",
+    #"gbk",
+    #"johab",
+    #"sjis",
+    #"shift_jis_2004",
+    #"uhc",
+    #"utf8",
+
+    # Different name:
+    "euc_cn": "gb2312",
+    "iso_8859_5": "is8859_5",
+    "iso_8859_6": "is8859_6",
+    "iso_8859_7": "is8859_7",
+    "iso_8859_8": "is8859_8",
+    "koi8": "koi8_r",
+    "latin1": "iso8859-1",
+    "latin2": "iso8859_2",
+    "latin3": "iso8859_3",
+    "latin4": "iso8859_4",
+    "latin5": "iso8859_9",
+    "latin6": "iso8859_10",
+    "latin7": "iso8859_13",
+    "latin8": "iso8859_14",
+    "latin9": "iso8859_15",
+    "sql_ascii": "ascii",
+    "win866": "cp886",
+    "win874": "cp874",
+    "win1250": "cp1250",
+    "win1251": "cp1251",
+    "win1252": "cp1252",
+    "win1253": "cp1253",
+    "win1254": "cp1254",
+    "win1255": "cp1255",
+    "win1256": "cp1256",
+    "win1257": "cp1257",
+    "win1258": "cp1258",
+}
+
+def encoding_convert(encoding):
+    return pg_to_py_encodings.get(encoding.lower(), encoding)
+
+def varcharin(data, client_encoding, **kwargs):
+    return unicode(data, encoding_convert(client_encoding))
+
+def textout(v, client_encoding, **kwargs):
+    return v.encode(encoding_convert(client_encoding))
+
+def byteasend(v, **kwargs):
+    return str(v)
+
+def bytearecv(data, **kwargs):
+    return Bytea(data)
+
+# interval support does not provide a Python-usable interval object yet
+def interval_recv(data, integer_datetimes, **kwargs):
+    if integer_datetimes:
+        microseconds, days, months = struct.unpack("!qii", data)
+    else:
+        seconds, days, months = struct.unpack("!dii", data)
+        microseconds = int(seconds * 1000 * 1000)
+    return Interval(microseconds, days, months)
+
+def interval_send(data, integer_datetimes, **kwargs):
+    if integer_datetimes:
+        return struct.pack("!qii", data.microseconds, data.days, data.months)
+    else:
+        return struct.pack("!dii", data.microseconds / 1000.0 / 1000.0, data.days, data.months)
+
+def array_recv(data, **kwargs):
+    dim, hasnull, typeoid = struct.unpack("!iii", data[:12])
+    data = data[12:]
+
+    # get type conversion method for typeoid
+    conversion = pg_types[typeoid]["bin_in"]
+
+    # Read dimension info
+    dim_lengths = []
+    element_count = 1
+    for idim in range(dim):
+        dim_len, dim_lbound = struct.unpack("!ii", data[:8])
+        data = data[8:]
+        dim_lengths.append(dim_len)
+        element_count *= dim_len
+
+    # Read all array values
+    array_values = []
+    for i in range(element_count):
+        element_len, = struct.unpack("!i", data[:4])
+        data = data[4:]
+        if element_len == -1:
+            array_values.append(None)
+        else:
+            array_values.append(conversion(data[:element_len], **kwargs))
+            data = data[element_len:]
+    if data != "":
+        raise ArrayDataParseError("unexpected data left over after array read")
+
+    # at this point, {{1,2,3},{4,5,6}}::int[][] looks like [1,2,3,4,5,6].
+    # go through the dimensions and fix up the array contents to match
+    # expected dimensions
+    for dim_length in reversed(dim_lengths[1:]):
+        val = []
+        while array_values:
+            val.append(array_values[:dim_length])
+            array_values = array_values[dim_length:]
+        array_values = val
+
+    return array_values
+
+def array_inspect(value):
+    # Check if array has any values.  If not, we can't determine the proper
+    # array typeoid.
+    first_element = array_find_first_element(value)
+    if first_element == None:
+        raise ArrayContentEmptyError("array has no values")
+
+    # supported array output
+    typ = type(first_element)
+    if issubclass(typ, int) or issubclass(typ, long):
+        # special int array support -- send as smallest possible array type
+        special_int_support = True
+        int2_ok, int4_ok, int8_ok = True, True, True
+        for v in array_flatten(value):
+            if v == None:
+                continue
+            if min_int2 < v < max_int2:
+                continue
+            int2_ok = False
+            if min_int4 < v < max_int4:
+                continue
+            int4_ok = False
+            if min_int8 < v < max_int8:
+                continue
+            int8_ok = False
+        if int2_ok:
+            array_typeoid = 1005 # INT2[]
+        elif int4_ok:
+            array_typeoid = 1007 # INT4[]
+        elif int8_ok:
+            array_typeoid = 1016 # INT8[]
+        else:
+            raise ArrayContentNotSupportedError("numeric not supported as array contents")
+    else:
+        special_int_support = False
+        array_typeoid = py_array_types.get(typ)
+        if array_typeoid == None:
+            raise ArrayContentNotSupportedError("type %r not supported as array contents" % typ)
+
+    # check for homogenous array
+    for v in array_flatten(value):
+        if v != None and not (isinstance(v, typ) or (typ == long and isinstance(v, int)) or (typ == int and isinstance(v, long))):
+            raise ArrayContentNotHomogenousError("not all array elements are of type %r" % typ)
+
+    # check that all array dimensions are consistent
+    array_check_dimensions(value)
+
+    type_data = py_types[typ]
+    if special_int_support:
+        if array_typeoid == 1005:
+            type_data = {"typeoid": 21, "bin_out": int2send}
+        elif array_typeoid == 1007:
+            type_data = {"typeoid": 23, "bin_out": int4send}
+        elif array_typeoid == 1016:
+            type_data = {"typeoid": 20, "bin_out": int8send}
+    else:
+        type_data = py_types[typ]
+    return {
+        "typeoid": array_typeoid,
+        "bin_out": array_send(type_data["typeoid"], type_data["bin_out"])
+    }
+
+def array_find_first_element(arr):
+    for v in array_flatten(arr):
+        if v != None:
+            return v
+    return None
+
+def array_flatten(arr):
+    for v in arr:
+        if isinstance(v, list):
+            for v2 in array_flatten(v):
+                yield v2
+        else:
+            yield v
+
+def array_check_dimensions(arr):
+    v0 = arr[0]
+    if isinstance(v0, list):
+        req_len = len(v0)
+        req_inner_lengths = array_check_dimensions(v0)
+        for v in arr:
+            inner_lengths = array_check_dimensions(v)
+            if len(v) != req_len or inner_lengths != req_inner_lengths:
+                raise ArrayDimensionsNotConsistentError("array dimensions not consistent")
+        retval = [req_len]
+        retval.extend(req_inner_lengths)
+        return retval
+    else:
+        # make sure nothing else at this level is a list
+        for v in arr:
+            if isinstance(v, list):
+                raise ArrayDimensionsNotConsistentError("array dimensions not consistent")
+        return []
+
+def array_has_null(arr):
+    for v in array_flatten(arr):
+        if v == None:
+            return True
+    return False
+
+def array_dim_lengths(arr):
+    v0 = arr[0]
+    if isinstance(v0, list):
+        retval = [len(v0)]
+        retval.extend(array_dim_lengths(v0))
+    else:
+        return [len(arr)]
+
+class array_send(object):
+    def __init__(self, typeoid, bin_out_func):
+        self.typeoid = typeoid
+        self.bin_out_func = bin_out_func
+
+    def __call__(self, arr, **kwargs):
+        has_null = array_has_null(arr)
+        dim_lengths = array_dim_lengths(arr)
+        data = struct.pack("!iii", len(dim_lengths), has_null, self.typeoid)
+        for i in dim_lengths:
+            data += struct.pack("!ii", i, 1)
+        for v in array_flatten(arr):
+            if v == None:
+                data += struct.pack("!i", -1)
+            else:
+                inner_data = self.bin_out_func(v, **kwargs)
+                data += struct.pack("!i", len(inner_data))
+                data += inner_data
+        return data
+
+class record_recv(object):
+    def __init__(self, record_field_names):
+        self.record_field_names = record_field_names
+
+    def __call__(self, data, **kwargs):
+        num_fields, = struct.unpack("!i", data[:4])
+        data = data[4:]
+        retval = {}
+        for i in range(num_fields):
+            typeoid, size = struct.unpack("!ii", data[:8])
+            data = data[8:]
+            conversion = pg_types[typeoid]["bin_in"]
+            value = conversion(data[:size], **kwargs)
+            data = data[size:]
+            retval[self.record_field_names[i]] = value
+        return retval
+
+py_types = {
+    bool: {"typeoid": 16, "bin_out": boolsend},
+    int: {"inspect": int_inspect},
+    long: {"inspect": int_inspect},
+    str: {"typeoid": 25, "bin_out": textout},
+    unicode: {"typeoid": 25, "bin_out": textout},
+    float: {"typeoid": 701, "bin_out": float8send},
+    decimal.Decimal: {"typeoid": 1700, "bin_out": numeric_send},
+    Bytea: {"typeoid": 17, "bin_out": byteasend},
+    datetime.datetime: {"typeoid": 1114, "bin_out": timestamp_send, "inspect": datetime_inspect},
+    datetime.date: {"typeoid": 1082, "txt_out": date_out},
+    datetime.time: {"typeoid": 1083, "txt_out": time_out},
+    Interval: {"typeoid": 1186, "bin_out": interval_send},
+    type(None): {"typeoid": -1},
+    list: {"inspect": array_inspect},
+}
+
+# py type -> pg array typeoid
+py_array_types = {
+    float: 1022,
+    bool: 1000,
+    str: 1009,      # TEXT[]
+    unicode: 1009,  # TEXT[]
+    decimal.Decimal: 1231, # NUMERIC[]
+}
+
+pg_types = {
+    16: {"bin_in": boolrecv},
+    17: {"bin_in": bytearecv},
+    18: {"txt_in": varcharin}, # char type (Greenplum)
+    19: {"bin_in": varcharin}, # name type
+    20: {"bin_in": int8recv},
+    21: {"bin_in": int2recv},
+    23: {"bin_in": int4recv},
+    24: {"txt_in": varcharin}, # regproc    (Greenplum)
+    25: {"bin_in": varcharin}, # TEXT type
+    26: {"txt_in": numeric_in}, # oid type
+    28: {"txt_in": numeric_in}, # xid type  (Greenplum)
+    700: {"bin_in": float4recv},
+    701: {"bin_in": float8recv},
+    829: {"txt_in": varcharin}, # MACADDR type
+    1000: {"bin_in": array_recv}, # BOOL[]
+    1003: {"bin_in": array_recv}, # NAME[]
+    1005: {"bin_in": array_recv}, # INT2[]
+    1007: {"bin_in": array_recv}, # INT4[]
+    1009: {"bin_in": array_recv}, # TEXT[]
+    1014: {"bin_in": array_recv}, # CHAR[]
+    1015: {"bin_in": array_recv}, # VARCHAR[]
+    1016: {"bin_in": array_recv}, # INT8[]
+    1021: {"bin_in": array_recv}, # FLOAT4[]
+    1022: {"bin_in": array_recv}, # FLOAT8[]
+    1042: {"bin_in": varcharin}, # CHAR type
+    1043: {"bin_in": varcharin}, # VARCHAR type
+    1082: {"txt_in": date_in},
+    1083: {"txt_in": time_in},
+    1114: {"bin_in": timestamp_recv},
+    1184: {"bin_in": timestamptz_recv}, # timestamp w/ tz
+    1186: {"bin_in": interval_recv},
+    1231: {"bin_in": array_recv}, # NUMERIC[]
+    1263: {"bin_in": array_recv}, # cstring[]
+    1700: {"bin_in": numeric_recv},
+    2275: {"bin_in": varcharin}, # cstring
+    2278: {"txt_in": voidrecv}, # void - This is to allow the code to handle the situation where a SQL function returns void
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/ext/pg8000/util.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/pg8000/util.py b/tools/bin/ext/pg8000/util.py
new file mode 100644
index 0000000..d99421e
--- /dev/null
+++ b/tools/bin/ext/pg8000/util.py
@@ -0,0 +1,20 @@
+
+class MulticastDelegate(object):
+    def __init__(self):
+        self.delegates = []
+
+    def __iadd__(self, delegate):
+        self.add(delegate)
+        return self
+
+    def add(self, delegate):
+        self.delegates.append(delegate)
+
+    def __isub__(self, delegate):
+        self.delegates.remove(delegate)
+        return self
+
+    def __call__(self, *args, **kwargs):
+        for d in self.delegates:
+            d(*args, **kwargs)
+

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/ext/pygresql/__init__.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/pygresql/__init__.py b/tools/bin/ext/pygresql/__init__.py
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/ext/simplejson/__init__.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/simplejson/__init__.py b/tools/bin/ext/simplejson/__init__.py
new file mode 100755
index 0000000..38d6229
--- /dev/null
+++ b/tools/bin/ext/simplejson/__init__.py
@@ -0,0 +1,287 @@
+r"""
+A simple, fast, extensible JSON encoder and decoder
+
+JSON (JavaScript Object Notation) <http://json.org> is a subset of
+JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data
+interchange format.
+
+simplejson exposes an API familiar to uses of the standard library
+marshal and pickle modules.
+
+Encoding basic Python object hierarchies::
+    
+    >>> import simplejson
+    >>> simplejson.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}])
+    '["foo", {"bar": ["baz", null, 1.0, 2]}]'
+    >>> print simplejson.dumps("\"foo\bar")
+    "\"foo\bar"
+    >>> print simplejson.dumps(u'\u1234')
+    "\u1234"
+    >>> print simplejson.dumps('\\')
+    "\\"
+    >>> print simplejson.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)
+    {"a": 0, "b": 0, "c": 0}
+    >>> from StringIO import StringIO
+    >>> io = StringIO()
+    >>> simplejson.dump(['streaming API'], io)
+    >>> io.getvalue()
+    '["streaming API"]'
+
+Compact encoding::
+
+    >>> import simplejson
+    >>> simplejson.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':'))
+    '[1,2,3,{"4":5,"6":7}]'
+
+Pretty printing::
+
+    >>> import simplejson
+    >>> print simplejson.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4)
+    {
+        "4": 5, 
+        "6": 7
+    }
+
+Decoding JSON::
+    
+    >>> import simplejson
+    >>> simplejson.loads('["foo", {"bar":["baz", null, 1.0, 2]}]')
+    [u'foo', {u'bar': [u'baz', None, 1.0, 2]}]
+    >>> simplejson.loads('"\\"foo\\bar"')
+    u'"foo\x08ar'
+    >>> from StringIO import StringIO
+    >>> io = StringIO('["streaming API"]')
+    >>> simplejson.load(io)
+    [u'streaming API']
+
+Specializing JSON object decoding::
+
+    >>> import simplejson
+    >>> def as_complex(dct):
+    ...     if '__complex__' in dct:
+    ...         return complex(dct['real'], dct['imag'])
+    ...     return dct
+    ... 
+    >>> simplejson.loads('{"__complex__": true, "real": 1, "imag": 2}',
+    ...     object_hook=as_complex)
+    (1+2j)
+
+Extending JSONEncoder::
+    
+    >>> import simplejson
+    >>> class ComplexEncoder(simplejson.JSONEncoder):
+    ...     def default(self, obj):
+    ...         if isinstance(obj, complex):
+    ...             return [obj.real, obj.imag]
+    ...         return simplejson.JSONEncoder.default(self, obj)
+    ... 
+    >>> dumps(2 + 1j, cls=ComplexEncoder)
+    '[2.0, 1.0]'
+    >>> ComplexEncoder().encode(2 + 1j)
+    '[2.0, 1.0]'
+    >>> list(ComplexEncoder().iterencode(2 + 1j))
+    ['[', '2.0', ', ', '1.0', ']']
+    
+
+Note that the JSON produced by this module's default settings
+is a subset of YAML, so it may be used as a serializer for that as well.
+"""
+__version__ = '1.7.3'
+__all__ = [
+    'dump', 'dumps', 'load', 'loads',
+    'JSONDecoder', 'JSONEncoder',
+]
+
+from decoder import JSONDecoder
+from encoder import JSONEncoder
+
+_default_encoder = JSONEncoder(
+    skipkeys=False,
+    ensure_ascii=True,
+    check_circular=True,
+    allow_nan=True,
+    indent=None,
+    separators=None,
+    encoding='utf-8'
+)
+
+def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
+        allow_nan=True, cls=None, indent=None, separators=None,
+        encoding='utf-8', **kw):
+    """
+    Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
+    ``.write()``-supporting file-like object).
+
+    If ``skipkeys`` is ``True`` then ``dict`` keys that are not basic types
+    (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) 
+    will be skipped instead of raising a ``TypeError``.
+
+    If ``ensure_ascii`` is ``False``, then the some chunks written to ``fp``
+    may be ``unicode`` instances, subject to normal Python ``str`` to
+    ``unicode`` coercion rules. Unless ``fp.write()`` explicitly
+    understands ``unicode`` (as in ``codecs.getwriter()``) this is likely
+    to cause an error.
+
+    If ``check_circular`` is ``False``, then the circular reference check
+    for container types will be skipped and a circular reference will
+    result in an ``OverflowError`` (or worse).
+
+    If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to
+    serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
+    in strict compliance of the JSON specification, instead of using the
+    JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
+
+    If ``indent`` is a non-negative integer, then JSON array elements and object
+    members will be pretty-printed with that indent level. An indent level
+    of 0 will only insert newlines. ``None`` is the most compact representation.
+
+    If ``separators`` is an ``(item_separator, dict_separator)`` tuple
+    then it will be used instead of the default ``(', ', ': ')`` separators.
+    ``(',', ':')`` is the most compact JSON representation.
+
+    ``encoding`` is the character encoding for str instances, default is UTF-8.
+
+    To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
+    ``.default()`` method to serialize additional types), specify it with
+    the ``cls`` kwarg.
+    """
+    # cached encoder
+    if (skipkeys is False and ensure_ascii is True and
+        check_circular is True and allow_nan is True and
+        cls is None and indent is None and separators is None and
+        encoding == 'utf-8' and not kw):
+        iterable = _default_encoder.iterencode(obj)
+    else:
+        if cls is None:
+            cls = JSONEncoder
+        iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
+            check_circular=check_circular, allow_nan=allow_nan, indent=indent,
+            separators=separators, encoding=encoding, **kw).iterencode(obj)
+    # could accelerate with writelines in some versions of Python, at
+    # a debuggability cost
+    for chunk in iterable:
+        fp.write(chunk)
+
+
+def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
+        allow_nan=True, cls=None, indent=None, separators=None,
+        encoding='utf-8', **kw):
+    """
+    Serialize ``obj`` to a JSON formatted ``str``.
+
+    If ``skipkeys`` is ``True`` then ``dict`` keys that are not basic types
+    (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) 
+    will be skipped instead of raising a ``TypeError``.
+
+    If ``ensure_ascii`` is ``False``, then the return value will be a
+    ``unicode`` instance subject to normal Python ``str`` to ``unicode``
+    coercion rules instead of being escaped to an ASCII ``str``.
+
+    If ``check_circular`` is ``False``, then the circular reference check
+    for container types will be skipped and a circular reference will
+    result in an ``OverflowError`` (or worse).
+
+    If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to
+    serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
+    strict compliance of the JSON specification, instead of using the
+    JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
+
+    If ``indent`` is a non-negative integer, then JSON array elements and
+    object members will be pretty-printed with that indent level. An indent
+    level of 0 will only insert newlines. ``None`` is the most compact
+    representation.
+
+    If ``separators`` is an ``(item_separator, dict_separator)`` tuple
+    then it will be used instead of the default ``(', ', ': ')`` separators.
+    ``(',', ':')`` is the most compact JSON representation.
+
+    ``encoding`` is the character encoding for str instances, default is UTF-8.
+
+    To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
+    ``.default()`` method to serialize additional types), specify it with
+    the ``cls`` kwarg.
+    """
+    # cached encoder
+    if (skipkeys is False and ensure_ascii is True and
+        check_circular is True and allow_nan is True and
+        cls is None and indent is None and separators is None and
+        encoding == 'utf-8' and not kw):
+        return _default_encoder.encode(obj)
+    if cls is None:
+        cls = JSONEncoder
+    return cls(
+        skipkeys=skipkeys, ensure_ascii=ensure_ascii,
+        check_circular=check_circular, allow_nan=allow_nan, indent=indent,
+        separators=separators, encoding=encoding,
+        **kw).encode(obj)
+
+_default_decoder = JSONDecoder(encoding=None, object_hook=None)
+
+def load(fp, encoding=None, cls=None, object_hook=None, **kw):
+    """
+    Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
+    a JSON document) to a Python object.
+
+    If the contents of ``fp`` is encoded with an ASCII based encoding other
+    than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must
+    be specified. Encodings that are not ASCII based (such as UCS-2) are
+    not allowed, and should be wrapped with
+    ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode``
+    object and passed to ``loads()``
+
+    ``object_hook`` is an optional function that will be called with the
+    result of any object literal decode (a ``dict``). The return value of
+    ``object_hook`` will be used instead of the ``dict``. This feature
+    can be used to implement custom decoders (e.g. JSON-RPC class hinting).
+    
+    To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
+    kwarg.
+    """
+    return loads(fp.read(),
+        encoding=encoding, cls=cls, object_hook=object_hook, **kw)
+
+def loads(s, encoding=None, cls=None, object_hook=None, **kw):
+    """
+    Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
+    document) to a Python object.
+
+    If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding
+    other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name
+    must be specified. Encodings that are not ASCII based (such as UCS-2)
+    are not allowed and should be decoded to ``unicode`` first.
+
+    ``object_hook`` is an optional function that will be called with the
+    result of any object literal decode (a ``dict``). The return value of
+    ``object_hook`` will be used instead of the ``dict``. This feature
+    can be used to implement custom decoders (e.g. JSON-RPC class hinting).
+
+    To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
+    kwarg.
+    """
+    if cls is None and encoding is None and object_hook is None and not kw:
+        return _default_decoder.decode(s)
+    if cls is None:
+        cls = JSONDecoder
+    if object_hook is not None:
+        kw['object_hook'] = object_hook
+    return cls(encoding=encoding, **kw).decode(s)
+
+def read(s):
+    """
+    json-py API compatibility hook. Use loads(s) instead.
+    """
+    import warnings
+    warnings.warn("simplejson.loads(s) should be used instead of read(s)",
+        DeprecationWarning)
+    return loads(s)
+
+def write(obj):
+    """
+    json-py API compatibility hook. Use dumps(s) instead.
+    """
+    import warnings
+    warnings.warn("simplejson.dumps(s) should be used instead of write(s)",
+        DeprecationWarning)
+    return dumps(obj)
+
+

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/ext/simplejson/_speedups.c
----------------------------------------------------------------------
diff --git a/tools/bin/ext/simplejson/_speedups.c b/tools/bin/ext/simplejson/_speedups.c
new file mode 100644
index 0000000..8f290bb
--- /dev/null
+++ b/tools/bin/ext/simplejson/_speedups.c
@@ -0,0 +1,215 @@
+#include "Python.h"
+#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
+typedef int Py_ssize_t;
+#define PY_SSIZE_T_MAX INT_MAX
+#define PY_SSIZE_T_MIN INT_MIN
+#endif
+
+static Py_ssize_t
+ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
+static PyObject *
+ascii_escape_unicode(PyObject *pystr);
+static PyObject *
+ascii_escape_str(PyObject *pystr);
+static PyObject *
+py_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr);
+void init_speedups(void);
+
+#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '/' && c != '"')
+
+#define MIN_EXPANSION 6
+#ifdef Py_UNICODE_WIDE
+#define MAX_EXPANSION (2 * MIN_EXPANSION)
+#else
+#define MAX_EXPANSION MIN_EXPANSION
+#endif
+
+static Py_ssize_t
+ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) {
+    Py_UNICODE x;
+    output[chars++] = '\\';
+    switch (c) {
+        case '/': output[chars++] = (char)c; break;
+        case '\\': output[chars++] = (char)c; break;
+        case '"': output[chars++] = (char)c; break;
+        case '\b': output[chars++] = 'b'; break;
+        case '\f': output[chars++] = 'f'; break;
+        case '\n': output[chars++] = 'n'; break;
+        case '\r': output[chars++] = 'r'; break;
+        case '\t': output[chars++] = 't'; break;
+        default:
+#ifdef Py_UNICODE_WIDE
+            if (c >= 0x10000) {
+                /* UTF-16 surrogate pair */
+                Py_UNICODE v = c - 0x10000;
+                c = 0xd800 | ((v >> 10) & 0x3ff);
+                output[chars++] = 'u';
+                x = (c & 0xf000) >> 12;
+                output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
+                x = (c & 0x0f00) >> 8;
+                output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
+                x = (c & 0x00f0) >> 4;
+                output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
+                x = (c & 0x000f);
+                output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
+                c = 0xdc00 | (v & 0x3ff);
+                output[chars++] = '\\';
+            }
+#endif
+            output[chars++] = 'u';
+            x = (c & 0xf000) >> 12;
+            output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
+            x = (c & 0x0f00) >> 8;
+            output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
+            x = (c & 0x00f0) >> 4;
+            output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
+            x = (c & 0x000f);
+            output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
+    }
+    return chars;
+}
+
+static PyObject *
+ascii_escape_unicode(PyObject *pystr) {
+    Py_ssize_t i;
+    Py_ssize_t input_chars;
+    Py_ssize_t output_size;
+    Py_ssize_t chars;
+    PyObject *rval;
+    char *output;
+    Py_UNICODE *input_unicode;
+
+    input_chars = PyUnicode_GET_SIZE(pystr);
+    input_unicode = PyUnicode_AS_UNICODE(pystr);
+    /* One char input can be up to 6 chars output, estimate 4 of these */
+    output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
+    rval = PyString_FromStringAndSize(NULL, output_size);
+    if (rval == NULL) {
+        return NULL;
+    }
+    output = PyString_AS_STRING(rval);
+    chars = 0;
+    output[chars++] = '"';
+    for (i = 0; i < input_chars; i++) {
+        Py_UNICODE c = input_unicode[i];
+        if (S_CHAR(c)) {
+            output[chars++] = (char)c;
+        } else {
+            chars = ascii_escape_char(c, output, chars);
+        }
+        if (output_size - chars < (1 + MAX_EXPANSION)) {
+            /* There's more than four, so let's resize by a lot */
+            output_size *= 2;
+            /* This is an upper bound */
+            if (output_size > 2 + (input_chars * MAX_EXPANSION)) {
+                output_size = 2 + (input_chars * MAX_EXPANSION);
+            }
+            if (_PyString_Resize(&rval, output_size) == -1) {
+                return NULL;
+            }
+            output = PyString_AS_STRING(rval);
+        }
+    }
+    output[chars++] = '"';
+    if (_PyString_Resize(&rval, chars) == -1) {
+        return NULL;
+    }
+    return rval;
+}
+
+static PyObject *
+ascii_escape_str(PyObject *pystr) {
+    Py_ssize_t i;
+    Py_ssize_t input_chars;
+    Py_ssize_t output_size;
+    Py_ssize_t chars;
+    PyObject *rval;
+    char *output;
+    char *input_str;
+
+    input_chars = PyString_GET_SIZE(pystr);
+    input_str = PyString_AS_STRING(pystr);
+    /* One char input can be up to 6 chars output, estimate 4 of these */
+    output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
+    rval = PyString_FromStringAndSize(NULL, output_size);
+    if (rval == NULL) {
+        return NULL;
+    }
+    output = PyString_AS_STRING(rval);
+    chars = 0;
+    output[chars++] = '"';
+    for (i = 0; i < input_chars; i++) {
+        Py_UNICODE c = (Py_UNICODE)input_str[i];
+        if (S_CHAR(c)) {
+            output[chars++] = (char)c;
+        } else if (c > 0x7F) {
+            /* We hit a non-ASCII character, bail to unicode mode */
+            PyObject *uni;
+            Py_DECREF(rval);
+            uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
+            if (uni == NULL) {
+                return NULL;
+            }
+            rval = ascii_escape_unicode(uni);
+            Py_DECREF(uni);
+            return rval;
+        } else {
+            chars = ascii_escape_char(c, output, chars);
+        }
+        /* An ASCII char can't possibly expand to a surrogate! */
+        if (output_size - chars < (1 + MIN_EXPANSION)) {
+            /* There's more than four, so let's resize by a lot */
+            output_size *= 2;
+            if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
+                output_size = 2 + (input_chars * MIN_EXPANSION);
+            }
+            if (_PyString_Resize(&rval, output_size) == -1) {
+                return NULL;
+            }
+            output = PyString_AS_STRING(rval);
+        }
+    }
+    output[chars++] = '"';
+    if (_PyString_Resize(&rval, chars) == -1) {
+        return NULL;
+    }
+    return rval;
+}
+
+PyDoc_STRVAR(pydoc_encode_basestring_ascii,
+    "encode_basestring_ascii(basestring) -> str\n"
+    "\n"
+    "..."
+);
+
+static PyObject *
+py_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr) {
+    /* METH_O */
+    if (PyString_Check(pystr)) {
+        return ascii_escape_str(pystr);
+    } else if (PyUnicode_Check(pystr)) {
+        return ascii_escape_unicode(pystr);
+    }
+    PyErr_SetString(PyExc_TypeError, "first argument must be a string");
+    return NULL;
+}
+
+#define DEFN(n, k) \
+    {  \
+        #n, \
+        (PyCFunction)py_ ##n, \
+        k, \
+        pydoc_ ##n \
+    }
+static PyMethodDef speedups_methods[] = {
+    DEFN(encode_basestring_ascii, METH_O),
+    {}
+};
+#undef DEFN
+
+void
+init_speedups(void)
+{
+    PyObject *m;
+    m = Py_InitModule4("_speedups", speedups_methods, NULL, NULL, PYTHON_API_VERSION);
+}

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/ext/simplejson/decoder.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/simplejson/decoder.py b/tools/bin/ext/simplejson/decoder.py
new file mode 100755
index 0000000..a1b53b2
--- /dev/null
+++ b/tools/bin/ext/simplejson/decoder.py
@@ -0,0 +1,273 @@
+"""
+Implementation of JSONDecoder
+"""
+import re
+
+from simplejson.scanner import Scanner, pattern
+
+FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
+
+def _floatconstants():
+    import struct
+    import sys
+    _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
+    if sys.byteorder != 'big':
+        _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
+    nan, inf = struct.unpack('dd', _BYTES)
+    return nan, inf, -inf
+
+NaN, PosInf, NegInf = _floatconstants()
+
+def linecol(doc, pos):
+    lineno = doc.count('\n', 0, pos) + 1
+    if lineno == 1:
+        colno = pos
+    else:
+        colno = pos - doc.rindex('\n', 0, pos)
+    return lineno, colno
+
+def errmsg(msg, doc, pos, end=None):
+    lineno, colno = linecol(doc, pos)
+    if end is None:
+        return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
+    endlineno, endcolno = linecol(doc, end)
+    return '%s: line %d column %d - line %d column %d (char %d - %d)' % (
+        msg, lineno, colno, endlineno, endcolno, pos, end)
+
+_CONSTANTS = {
+    '-Infinity': NegInf,
+    'Infinity': PosInf,
+    'NaN': NaN,
+    'true': True,
+    'false': False,
+    'null': None,
+}
+
+def JSONConstant(match, context, c=_CONSTANTS):
+    return c[match.group(0)], None
+pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant)
+
+def JSONNumber(match, context):
+    match = JSONNumber.regex.match(match.string, *match.span())
+    integer, frac, exp = match.groups()
+    if frac or exp:
+        res = float(integer + (frac or '') + (exp or ''))
+    else:
+        res = int(integer)
+    return res, None
+pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber)
+
+STRINGCHUNK = re.compile(r'(.*?)(["\\])', FLAGS)
+BACKSLASH = {
+    '"': u'"', '\\': u'\\', '/': u'/',
+    'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
+}
+
+DEFAULT_ENCODING = "utf-8"
+
+def scanstring(s, end, encoding=None, _b=BACKSLASH, _m=STRINGCHUNK.match):
+    if encoding is None:
+        encoding = DEFAULT_ENCODING
+    chunks = []
+    _append = chunks.append
+    begin = end - 1
+    while 1:
+        chunk = _m(s, end)
+        if chunk is None:
+            raise ValueError(
+                errmsg("Unterminated string starting at", s, begin))
+        end = chunk.end()
+        content, terminator = chunk.groups()
+        if content:
+            if not isinstance(content, unicode):
+                content = unicode(content, encoding)
+            _append(content)
+        if terminator == '"':
+            break
+        try:
+            esc = s[end]
+        except IndexError:
+            raise ValueError(
+                errmsg("Unterminated string starting at", s, begin))
+        if esc != 'u':
+            try:
+                m = _b[esc]
+            except KeyError:
+                raise ValueError(
+                    errmsg("Invalid \\escape: %r" % (esc,), s, end))
+            end += 1
+        else:
+            esc = s[end + 1:end + 5]
+            try:
+                m = unichr(int(esc, 16))
+                if len(esc) != 4 or not esc.isalnum():
+                    raise ValueError
+            except ValueError:
+                raise ValueError(errmsg("Invalid \\uXXXX escape", s, end))
+            end += 5
+        _append(m)
+    return u''.join(chunks), end
+
+def JSONString(match, context):
+    encoding = getattr(context, 'encoding', None)
+    return scanstring(match.string, match.end(), encoding)
+pattern(r'"')(JSONString)
+
+WHITESPACE = re.compile(r'\s*', FLAGS)
+
+def JSONObject(match, context, _w=WHITESPACE.match):
+    pairs = {}
+    s = match.string
+    end = _w(s, match.end()).end()
+    nextchar = s[end:end + 1]
+    # trivial empty object
+    if nextchar == '}':
+        return pairs, end + 1
+    if nextchar != '"':
+        raise ValueError(errmsg("Expecting property name", s, end))
+    end += 1
+    encoding = getattr(context, 'encoding', None)
+    iterscan = JSONScanner.iterscan
+    while True:
+        key, end = scanstring(s, end, encoding)
+        end = _w(s, end).end()
+        if s[end:end + 1] != ':':
+            raise ValueError(errmsg("Expecting : delimiter", s, end))
+        end = _w(s, end + 1).end()
+        try:
+            value, end = iterscan(s, idx=end, context=context).next()
+        except StopIteration:
+            raise ValueError(errmsg("Expecting object", s, end))
+        pairs[key] = value
+        end = _w(s, end).end()
+        nextchar = s[end:end + 1]
+        end += 1
+        if nextchar == '}':
+            break
+        if nextchar != ',':
+            raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
+        end = _w(s, end).end()
+        nextchar = s[end:end + 1]
+        end += 1
+        if nextchar != '"':
+            raise ValueError(errmsg("Expecting property name", s, end - 1))
+    object_hook = getattr(context, 'object_hook', None)
+    if object_hook is not None:
+        pairs = object_hook(pairs)
+    return pairs, end
+pattern(r'{')(JSONObject)
+            
+def JSONArray(match, context, _w=WHITESPACE.match):
+    values = []
+    s = match.string
+    end = _w(s, match.end()).end()
+    # look-ahead for trivial empty array
+    nextchar = s[end:end + 1]
+    if nextchar == ']':
+        return values, end + 1
+    iterscan = JSONScanner.iterscan
+    while True:
+        try:
+            value, end = iterscan(s, idx=end, context=context).next()
+        except StopIteration:
+            raise ValueError(errmsg("Expecting object", s, end))
+        values.append(value)
+        end = _w(s, end).end()
+        nextchar = s[end:end + 1]
+        end += 1
+        if nextchar == ']':
+            break
+        if nextchar != ',':
+            raise ValueError(errmsg("Expecting , delimiter", s, end))
+        end = _w(s, end).end()
+    return values, end
+pattern(r'\[')(JSONArray)
+ 
+ANYTHING = [
+    JSONObject,
+    JSONArray,
+    JSONString,
+    JSONConstant,
+    JSONNumber,
+]
+
+JSONScanner = Scanner(ANYTHING)
+
+class JSONDecoder(object):
+    """
+    Simple JSON <http://json.org> decoder
+
+    Performs the following translations in decoding:
+    
+    +---------------+-------------------+
+    | JSON          | Python            |
+    +===============+===================+
+    | object        | dict              |
+    +---------------+-------------------+
+    | array         | list              |
+    +---------------+-------------------+
+    | string        | unicode           |
+    +---------------+-------------------+
+    | number (int)  | int, long         |
+    +---------------+-------------------+
+    | number (real) | float             |
+    +---------------+-------------------+
+    | true          | True              |
+    +---------------+-------------------+
+    | false         | False             |
+    +---------------+-------------------+
+    | null          | None              |
+    +---------------+-------------------+
+
+    It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
+    their corresponding ``float`` values, which is outside the JSON spec.
+    """
+
+    _scanner = Scanner(ANYTHING)
+    __all__ = ['__init__', 'decode', 'raw_decode']
+
+    def __init__(self, encoding=None, object_hook=None):
+        """
+        ``encoding`` determines the encoding used to interpret any ``str``
+        objects decoded by this instance (utf-8 by default).  It has no
+        effect when decoding ``unicode`` objects.
+        
+        Note that currently only encodings that are a superset of ASCII work,
+        strings of other encodings should be passed in as ``unicode``.
+
+        ``object_hook``, if specified, will be called with the result
+        of every JSON object decoded and its return value will be used in
+        place of the given ``dict``.  This can be used to provide custom
+        deserializations (e.g. to support JSON-RPC class hinting).
+        """
+        self.encoding = encoding
+        self.object_hook = object_hook
+
+    def decode(self, s, _w=WHITESPACE.match):
+        """
+        Return the Python representation of ``s`` (a ``str`` or ``unicode``
+        instance containing a JSON document)
+        """
+        obj, end = self.raw_decode(s, idx=_w(s, 0).end())
+        end = _w(s, end).end()
+        if end != len(s):
+            raise ValueError(errmsg("Extra data", s, end, len(s)))
+        return obj
+
+    def raw_decode(self, s, **kw):
+        """
+        Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
+        with a JSON document) and return a 2-tuple of the Python
+        representation and the index in ``s`` where the document ended.
+
+        This can be used to decode a JSON document from a string that may
+        have extraneous data at the end.
+        """
+        kw.setdefault('context', self)
+        try:
+            obj, end = self._scanner.iterscan(s, **kw).next()
+        except StopIteration:
+            raise ValueError("No JSON object could be decoded")
+        return obj, end
+
+__all__ = ['JSONDecoder']

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/ext/simplejson/encoder.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/simplejson/encoder.py b/tools/bin/ext/simplejson/encoder.py
new file mode 100755
index 0000000..d29919a
--- /dev/null
+++ b/tools/bin/ext/simplejson/encoder.py
@@ -0,0 +1,371 @@
+"""
+Implementation of JSONEncoder
+"""
+import re
+try:
+    from simplejson import _speedups
+except ImportError:
+    _speedups = None
+
+ESCAPE = re.compile(r'[\x00-\x19\\"\b\f\n\r\t]')
+ESCAPE_ASCII = re.compile(r'([\\"/]|[^\ -~])')
+ESCAPE_DCT = {
+    # escape all forward slashes to prevent </script> attack
+    '/': '\\/',
+    '\\': '\\\\',
+    '"': '\\"',
+    '\b': '\\b',
+    '\f': '\\f',
+    '\n': '\\n',
+    '\r': '\\r',
+    '\t': '\\t',
+}
+for i in range(0x20):
+    ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
+
+# assume this produces an infinity on all machines (probably not guaranteed)
+INFINITY = float('1e66666')
+
+def floatstr(o, allow_nan=True):
+    # Check for specials.  Note that this type of test is processor- and/or
+    # platform-specific, so do tests which don't depend on the internals.
+
+    if o != o:
+        text = 'NaN'
+    elif o == INFINITY:
+        text = 'Infinity'
+    elif o == -INFINITY:
+        text = '-Infinity'
+    else:
+        return repr(o)
+
+    if not allow_nan:
+        raise ValueError("Out of range float values are not JSON compliant: %r"
+            % (o,))
+
+    return text
+
+
+def encode_basestring(s):
+    """
+    Return a JSON representation of a Python string
+    """
+    def replace(match):
+        return ESCAPE_DCT[match.group(0)]
+    return '"' + ESCAPE.sub(replace, s) + '"'
+
+def encode_basestring_ascii(s):
+    def replace(match):
+        s = match.group(0)
+        try:
+            return ESCAPE_DCT[s]
+        except KeyError:
+            n = ord(s)
+            if n < 0x10000:
+                return '\\u%04x' % (n,)
+            else:
+                # surrogate pair
+                n -= 0x10000
+                s1 = 0xd800 | ((n >> 10) & 0x3ff)
+                s2 = 0xdc00 | (n & 0x3ff)
+                return '\\u%04x\\u%04x' % (s1, s2)
+    return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
+        
+try:
+    encode_basestring_ascii = _speedups.encode_basestring_ascii
+    _need_utf8 = True
+except AttributeError:
+    _need_utf8 = False
+
+class JSONEncoder(object):
+    """
+    Extensible JSON <http://json.org> encoder for Python data structures.
+
+    Supports the following objects and types by default:
+    
+    +-------------------+---------------+
+    | Python            | JSON          |
+    +===================+===============+
+    | dict              | object        |
+    +-------------------+---------------+
+    | list, tuple       | array         |
+    +-------------------+---------------+
+    | str, unicode      | string        |
+    +-------------------+---------------+
+    | int, long, float  | number        |
+    +-------------------+---------------+
+    | True              | true          |
+    +-------------------+---------------+
+    | False             | false         |
+    +-------------------+---------------+
+    | None              | null          |
+    +-------------------+---------------+
+
+    To extend this to recognize other objects, subclass and implement a
+    ``.default()`` method with another method that returns a serializable
+    object for ``o`` if possible, otherwise it should call the superclass
+    implementation (to raise ``TypeError``).
+    """
+    __all__ = ['__init__', 'default', 'encode', 'iterencode']
+    item_separator = ', '
+    key_separator = ': '
+    def __init__(self, skipkeys=False, ensure_ascii=True,
+            check_circular=True, allow_nan=True, sort_keys=False,
+            indent=None, separators=None, encoding='utf-8'):
+        """
+        Constructor for JSONEncoder, with sensible defaults.
+
+        If skipkeys is False, then it is a TypeError to attempt
+        encoding of keys that are not str, int, long, float or None.  If
+        skipkeys is True, such items are simply skipped.
+
+        If ensure_ascii is True, the output is guaranteed to be str
+        objects with all incoming unicode characters escaped.  If
+        ensure_ascii is false, the output will be unicode object.
+
+        If check_circular is True, then lists, dicts, and custom encoded
+        objects will be checked for circular references during encoding to
+        prevent an infinite recursion (which would cause an OverflowError).
+        Otherwise, no such check takes place.
+
+        If allow_nan is True, then NaN, Infinity, and -Infinity will be
+        encoded as such.  This behavior is not JSON specification compliant,
+        but is consistent with most JavaScript based encoders and decoders.
+        Otherwise, it will be a ValueError to encode such floats.
+
+        If sort_keys is True, then the output of dictionaries will be
+        sorted by key; this is useful for regression tests to ensure
+        that JSON serializations can be compared on a day-to-day basis.
+
+        If indent is a non-negative integer, then JSON array
+        elements and object members will be pretty-printed with that
+        indent level.  An indent level of 0 will only insert newlines.
+        None is the most compact representation.
+
+        If specified, separators should be a (item_separator, key_separator)
+        tuple. The default is (', ', ': '). To get the most compact JSON
+        representation you should specify (',', ':') to eliminate whitespace.
+
+        If encoding is not None, then all input strings will be
+        transformed into unicode using that encoding prior to JSON-encoding. 
+        The default is UTF-8.
+        """
+
+        self.skipkeys = skipkeys
+        self.ensure_ascii = ensure_ascii
+        self.check_circular = check_circular
+        self.allow_nan = allow_nan
+        self.sort_keys = sort_keys
+        self.indent = indent
+        self.current_indent_level = 0
+        if separators is not None:
+            self.item_separator, self.key_separator = separators
+        self.encoding = encoding
+
+    def _newline_indent(self):
+        return '\n' + (' ' * (self.indent * self.current_indent_level))
+
+    def _iterencode_list(self, lst, markers=None):
+        if not lst:
+            yield '[]'
+            return
+        if markers is not None:
+            markerid = id(lst)
+            if markerid in markers:
+                raise ValueError("Circular reference detected")
+            markers[markerid] = lst
+        yield '['
+        if self.indent is not None:
+            self.current_indent_level += 1
+            newline_indent = self._newline_indent()
+            separator = self.item_separator + newline_indent
+            yield newline_indent
+        else:
+            newline_indent = None
+            separator = self.item_separator
+        first = True
+        for value in lst:
+            if first:
+                first = False
+            else:
+                yield separator
+            for chunk in self._iterencode(value, markers):
+                yield chunk
+        if newline_indent is not None:
+            self.current_indent_level -= 1
+            yield self._newline_indent()
+        yield ']'
+        if markers is not None:
+            del markers[markerid]
+
+    def _iterencode_dict(self, dct, markers=None):
+        if not dct:
+            yield '{}'
+            return
+        if markers is not None:
+            markerid = id(dct)
+            if markerid in markers:
+                raise ValueError("Circular reference detected")
+            markers[markerid] = dct
+        yield '{'
+        key_separator = self.key_separator
+        if self.indent is not None:
+            self.current_indent_level += 1
+            newline_indent = self._newline_indent()
+            item_separator = self.item_separator + newline_indent
+            yield newline_indent
+        else:
+            newline_indent = None
+            item_separator = self.item_separator
+        first = True
+        if self.ensure_ascii:
+            encoder = encode_basestring_ascii
+        else:
+            encoder = encode_basestring
+        allow_nan = self.allow_nan
+        if self.sort_keys:
+            keys = dct.keys()
+            keys.sort()
+            items = [(k, dct[k]) for k in keys]
+        else:
+            items = dct.iteritems()
+        _encoding = self.encoding
+        _do_decode = (_encoding is not None
+            and not (_need_utf8 and _encoding == 'utf-8'))
+        for key, value in items:
+            if isinstance(key, str):
+                if _do_decode:
+                    key = key.decode(_encoding)
+            elif isinstance(key, basestring):
+                pass
+            # JavaScript is weakly typed for these, so it makes sense to
+            # also allow them.  Many encoders seem to do something like this.
+            elif isinstance(key, float):
+                key = floatstr(key, allow_nan)
+            elif isinstance(key, (int, long)):
+                key = str(key)
+            elif key is True:
+                key = 'true'
+            elif key is False:
+                key = 'false'
+            elif key is None:
+                key = 'null'
+            elif self.skipkeys:
+                continue
+            else:
+                raise TypeError("key %r is not a string" % (key,))
+            if first:
+                first = False
+            else:
+                yield item_separator
+            yield encoder(key)
+            yield key_separator
+            for chunk in self._iterencode(value, markers):
+                yield chunk
+        if newline_indent is not None:
+            self.current_indent_level -= 1
+            yield self._newline_indent()
+        yield '}'
+        if markers is not None:
+            del markers[markerid]
+
+    def _iterencode(self, o, markers=None):
+        if isinstance(o, basestring):
+            if self.ensure_ascii:
+                encoder = encode_basestring_ascii
+            else:
+                encoder = encode_basestring
+            _encoding = self.encoding
+            if (_encoding is not None and isinstance(o, str)
+                    and not (_need_utf8 and _encoding == 'utf-8')):
+                o = o.decode(_encoding)
+            yield encoder(o)
+        elif o is None:
+            yield 'null'
+        elif o is True:
+            yield 'true'
+        elif o is False:
+            yield 'false'
+        elif isinstance(o, (int, long)):
+            yield str(o)
+        elif isinstance(o, float):
+            yield floatstr(o, self.allow_nan)
+        elif isinstance(o, (list, tuple)):
+            for chunk in self._iterencode_list(o, markers):
+                yield chunk
+        elif isinstance(o, dict):
+            for chunk in self._iterencode_dict(o, markers):
+                yield chunk
+        else:
+            if markers is not None:
+                markerid = id(o)
+                if markerid in markers:
+                    raise ValueError("Circular reference detected")
+                markers[markerid] = o
+            for chunk in self._iterencode_default(o, markers):
+                yield chunk
+            if markers is not None:
+                del markers[markerid]
+
+    def _iterencode_default(self, o, markers=None):
+        newobj = self.default(o)
+        return self._iterencode(newobj, markers)
+
+    def default(self, o):
+        """
+        Implement this method in a subclass such that it returns
+        a serializable object for ``o``, or calls the base implementation
+        (to raise a ``TypeError``).
+
+        For example, to support arbitrary iterators, you could
+        implement default like this::
+            
+            def default(self, o):
+                try:
+                    iterable = iter(o)
+                except TypeError:
+                    pass
+                else:
+                    return list(iterable)
+                return JSONEncoder.default(self, o)
+        """
+        raise TypeError("%r is not JSON serializable" % (o,))
+
+    def encode(self, o):
+        """
+        Return a JSON string representation of a Python data structure.
+
+        >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
+        '{"foo":["bar", "baz"]}'
+        """
+        # This is for extremely simple cases and benchmarks...
+        if isinstance(o, basestring):
+            if isinstance(o, str):
+                _encoding = self.encoding
+                if (_encoding is not None 
+                        and not (_encoding == 'utf-8' and _need_utf8)):
+                    o = o.decode(_encoding)
+            return encode_basestring_ascii(o)
+        # This doesn't pass the iterator directly to ''.join() because it
+        # sucks at reporting exceptions.  It's going to do this internally
+        # anyway because it uses PySequence_Fast or similar.
+        chunks = list(self.iterencode(o))
+        return ''.join(chunks)
+
+    def iterencode(self, o):
+        """
+        Encode the given object and yield each string
+        representation as available.
+        
+        For example::
+            
+            for chunk in JSONEncoder().iterencode(bigobject):
+                mysocket.write(chunk)
+        """
+        if self.check_circular:
+            markers = {}
+        else:
+            markers = None
+        return self._iterencode(o, markers)
+
+__all__ = ['JSONEncoder']

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/ext/simplejson/jsonfilter.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/simplejson/jsonfilter.py b/tools/bin/ext/simplejson/jsonfilter.py
new file mode 100755
index 0000000..01ca21d
--- /dev/null
+++ b/tools/bin/ext/simplejson/jsonfilter.py
@@ -0,0 +1,40 @@
+import simplejson
+import cgi
+
+class JSONFilter(object):
+    def __init__(self, app, mime_type='text/x-json'):
+        self.app = app
+        self.mime_type = mime_type
+
+    def __call__(self, environ, start_response):
+        # Read JSON POST input to jsonfilter.json if matching mime type
+        response = {'status': '200 OK', 'headers': []}
+        def json_start_response(status, headers):
+            response['status'] = status
+            response['headers'].extend(headers)
+        environ['jsonfilter.mime_type'] = self.mime_type
+        if environ.get('REQUEST_METHOD', '') == 'POST':
+            if environ.get('CONTENT_TYPE', '') == self.mime_type:
+                args = [_ for _ in [environ.get('CONTENT_LENGTH')] if _]
+                data = environ['wsgi.input'].read(*map(int, args))
+                environ['jsonfilter.json'] = simplejson.loads(data)
+        res = simplejson.dumps(self.app(environ, json_start_response))
+        jsonp = cgi.parse_qs(environ.get('QUERY_STRING', '')).get('jsonp')
+        if jsonp:
+            content_type = 'text/javascript'
+            res = ''.join(jsonp + ['(', res, ')'])
+        elif 'Opera' in environ.get('HTTP_USER_AGENT', ''):
+            # Opera has bunk XMLHttpRequest support for most mime types
+            content_type = 'text/plain'
+        else:
+            content_type = self.mime_type
+        headers = [
+            ('Content-type', content_type),
+            ('Content-length', len(res)),
+        ]
+        headers.extend(response['headers'])
+        start_response(response['status'], headers)
+        return [res]
+
+def factory(app, global_conf, **kw):
+    return JSONFilter(app, **kw)

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/ext/simplejson/scanner.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/simplejson/scanner.py b/tools/bin/ext/simplejson/scanner.py
new file mode 100755
index 0000000..64f4999
--- /dev/null
+++ b/tools/bin/ext/simplejson/scanner.py
@@ -0,0 +1,63 @@
+"""
+Iterator based sre token scanner
+"""
+import sre_parse, sre_compile, sre_constants
+from sre_constants import BRANCH, SUBPATTERN
+from re import VERBOSE, MULTILINE, DOTALL
+import re
+
+__all__ = ['Scanner', 'pattern']
+
+FLAGS = (VERBOSE | MULTILINE | DOTALL)
+class Scanner(object):
+    def __init__(self, lexicon, flags=FLAGS):
+        self.actions = [None]
+        # combine phrases into a compound pattern
+        s = sre_parse.Pattern()
+        s.flags = flags
+        p = []
+        for idx, token in enumerate(lexicon):
+            phrase = token.pattern
+            try:
+                subpattern = sre_parse.SubPattern(s,
+                    [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))])
+            except sre_constants.error:
+                raise
+            p.append(subpattern)
+            self.actions.append(token)
+
+        p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
+        self.scanner = sre_compile.compile(p)
+
+
+    def iterscan(self, string, idx=0, context=None):
+        """
+        Yield match, end_idx for each match
+        """
+        match = self.scanner.scanner(string, idx).match
+        actions = self.actions
+        lastend = idx
+        end = len(string)
+        while True:
+            m = match()
+            if m is None:
+                break
+            matchbegin, matchend = m.span()
+            if lastend == matchend:
+                break
+            action = actions[m.lastindex]
+            if action is not None:
+                rval, next_pos = action(m, context)
+                if next_pos is not None and next_pos != matchend:
+                    # "fast forward" the scanner
+                    matchend = next_pos
+                    match = self.scanner.scanner(string, matchend).match
+                yield rval, matchend
+            lastend = matchend
+            
+def pattern(pattern, flags=FLAGS):
+    def decorator(fn):
+        fn.pattern = pattern
+        fn.regex = re.compile(pattern, flags)
+        return fn
+    return decorator

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/ext/simplejson/tests/__init__.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/simplejson/tests/__init__.py b/tools/bin/ext/simplejson/tests/__init__.py
new file mode 100755
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/ext/simplejson/tests/test_attacks.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/simplejson/tests/test_attacks.py b/tools/bin/ext/simplejson/tests/test_attacks.py
new file mode 100755
index 0000000..8ecfed8
--- /dev/null
+++ b/tools/bin/ext/simplejson/tests/test_attacks.py
@@ -0,0 +1,6 @@
+def test_script_close_attack():
+    import simplejson
+    res = simplejson.dumps('</script>')
+    assert '</script>' not in res
+    res = simplejson.dumps(simplejson.loads('"</script>"'))
+    assert '</script>' not in res

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/ext/simplejson/tests/test_dump.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/simplejson/tests/test_dump.py b/tools/bin/ext/simplejson/tests/test_dump.py
new file mode 100755
index 0000000..b4e236e
--- /dev/null
+++ b/tools/bin/ext/simplejson/tests/test_dump.py
@@ -0,0 +1,10 @@
+from cStringIO import StringIO
+import simplejson as S
+
+def test_dump():
+    sio = StringIO()
+    S.dump({}, sio)
+    assert sio.getvalue() == '{}'
+    
+def test_dumps():
+    assert S.dumps({}) == '{}'

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/ext/simplejson/tests/test_fail.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/simplejson/tests/test_fail.py b/tools/bin/ext/simplejson/tests/test_fail.py
new file mode 100755
index 0000000..a99d9c4
--- /dev/null
+++ b/tools/bin/ext/simplejson/tests/test_fail.py
@@ -0,0 +1,70 @@
+# Fri Dec 30 18:57:26 2005
+JSONDOCS = [
+    # http://json.org/JSON_checker/test/fail1.json
+    '"A JSON payload should be an object or array, not a string."',
+    # http://json.org/JSON_checker/test/fail2.json
+    '["Unclosed array"',
+    # http://json.org/JSON_checker/test/fail3.json
+    '{unquoted_key: "keys must be quoted}',
+    # http://json.org/JSON_checker/test/fail4.json
+    '["extra comma",]',
+    # http://json.org/JSON_checker/test/fail5.json
+    '["double extra comma",,]',
+    # http://json.org/JSON_checker/test/fail6.json
+    '[   , "<-- missing value"]',
+    # http://json.org/JSON_checker/test/fail7.json
+    '["Comma after the close"],',
+    # http://json.org/JSON_checker/test/fail8.json
+    '["Extra close"]]',
+    # http://json.org/JSON_checker/test/fail9.json
+    '{"Extra comma": true,}',
+    # http://json.org/JSON_checker/test/fail10.json
+    '{"Extra value after close": true} "misplaced quoted value"',
+    # http://json.org/JSON_checker/test/fail11.json
+    '{"Illegal expression": 1 + 2}',
+    # http://json.org/JSON_checker/test/fail12.json
+    '{"Illegal invocation": alert()}',
+    # http://json.org/JSON_checker/test/fail13.json
+    '{"Numbers cannot have leading zeroes": 013}',
+    # http://json.org/JSON_checker/test/fail14.json
+    '{"Numbers cannot be hex": 0x14}',
+    # http://json.org/JSON_checker/test/fail15.json
+    '["Illegal backslash escape: \\x15"]',
+    # http://json.org/JSON_checker/test/fail16.json
+    '["Illegal backslash escape: \\\'"]',
+    # http://json.org/JSON_checker/test/fail17.json
+    '["Illegal backslash escape: \\017"]',
+    # http://json.org/JSON_checker/test/fail18.json
+    '[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]',
+    # http://json.org/JSON_checker/test/fail19.json
+    '{"Missing colon" null}',
+    # http://json.org/JSON_checker/test/fail20.json
+    '{"Double colon":: null}',
+    # http://json.org/JSON_checker/test/fail21.json
+    '{"Comma instead of colon", null}',
+    # http://json.org/JSON_checker/test/fail22.json
+    '["Colon instead of comma": false]',
+    # http://json.org/JSON_checker/test/fail23.json
+    '["Bad value", truth]',
+    # http://json.org/JSON_checker/test/fail24.json
+    "['single quote']",
+]
+
+SKIPS = {
+    1: "why not have a string payload?",
+    18: "spec doesn't specify any nesting limitations",
+}
+
+def test_failures():
+    import simplejson
+    for idx, doc in enumerate(JSONDOCS):
+        idx = idx + 1
+        if idx in SKIPS:
+            simplejson.loads(doc)
+            continue
+        try:
+            simplejson.loads(doc)
+        except ValueError:
+            pass
+        else:
+            assert False, "Expected failure for fail%d.json: %r" % (idx, doc)

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/ext/simplejson/tests/test_float.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/simplejson/tests/test_float.py b/tools/bin/ext/simplejson/tests/test_float.py
new file mode 100755
index 0000000..ee93358
--- /dev/null
+++ b/tools/bin/ext/simplejson/tests/test_float.py
@@ -0,0 +1,4 @@
+def test_floats():
+    import simplejson
+    for num in [1617161771.7650001]:
+        assert simplejson.dumps(num) == repr(num)

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/ext/simplejson/tests/test_indent.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/simplejson/tests/test_indent.py b/tools/bin/ext/simplejson/tests/test_indent.py
new file mode 100755
index 0000000..47dd4dc
--- /dev/null
+++ b/tools/bin/ext/simplejson/tests/test_indent.py
@@ -0,0 +1,41 @@
+
+
+
+def test_indent():
+    import simplejson
+    import textwrap
+    
+    h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth',
+         {'nifty': 87}, {'field': 'yes', 'morefield': False} ]
+
+    expect = textwrap.dedent("""\
+    [
+      [
+        "blorpie"
+      ],
+      [
+        "whoops"
+      ],
+      [],
+      "d-shtaeou",
+      "d-nthiouh",
+      "i-vhbjkhnth",
+      {
+        "nifty": 87
+      },
+      {
+        "field": "yes",
+        "morefield": false
+      }
+    ]""")
+
+
+    d1 = simplejson.dumps(h)
+    d2 = simplejson.dumps(h, indent=2, sort_keys=True, separators=(',', ': '))
+
+    h1 = simplejson.loads(d1)
+    h2 = simplejson.loads(d2)
+
+    assert h1 == h
+    assert h2 == h
+    assert d2 == expect

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/ext/simplejson/tests/test_pass1.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/simplejson/tests/test_pass1.py b/tools/bin/ext/simplejson/tests/test_pass1.py
new file mode 100755
index 0000000..4eda192
--- /dev/null
+++ b/tools/bin/ext/simplejson/tests/test_pass1.py
@@ -0,0 +1,72 @@
+# from http://json.org/JSON_checker/test/pass1.json
+JSON = r'''
+[
+    "JSON Test Pattern pass1",
+    {"object with 1 member":["array with 1 element"]},
+    {},
+    [],
+    -42,
+    true,
+    false,
+    null,
+    {
+        "integer": 1234567890,
+        "real": -9876.543210,
+        "e": 0.123456789e-12,
+        "E": 1.234567890E+34,
+        "":  23456789012E666,
+        "zero": 0,
+        "one": 1,
+        "space": " ",
+        "quote": "\"",
+        "backslash": "\\",
+        "controls": "\b\f\n\r\t",
+        "slash": "/ & \/",
+        "alpha": "abcdefghijklmnopqrstuvwyz",
+        "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ",
+        "digit": "0123456789",
+        "special": "`1~!@#$%^&*()_+-={':[,]}|;.</>?",
+        "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A",
+        "true": true,
+        "false": false,
+        "null": null,
+        "array":[  ],
+        "object":{  },
+        "address": "50 St. James Street",
+        "url": "http://www.JSON.org/",
+        "comment": "// /* <!-- --",
+        "# -- --> */": " ",
+        " s p a c e d " :[1,2 , 3
+
+,
+
+4 , 5        ,          6           ,7        ],
+        "compact": [1,2,3,4,5,6,7],
+        "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}",
+        "quotes": "&#34; \u0022 %22 0x22 034 &#x22;",
+        "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?"
+: "A key can be any string"
+    },
+    0.5 ,98.6
+,
+99.44
+,
+
+1066
+
+
+,"rosebud"]
+'''
+
+def test_parse():
+    # test in/out equivalence and parsing
+    import simplejson
+    res = simplejson.loads(JSON)
+    out = simplejson.dumps(res)
+    assert res == simplejson.loads(out)
+    try:
+        simplejson.dumps(res, allow_nan=False)
+    except ValueError:
+        pass
+    else:
+        assert False, "23456789012E666 should be out of range"

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/ext/simplejson/tests/test_pass2.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/simplejson/tests/test_pass2.py b/tools/bin/ext/simplejson/tests/test_pass2.py
new file mode 100755
index 0000000..ae74abb
--- /dev/null
+++ b/tools/bin/ext/simplejson/tests/test_pass2.py
@@ -0,0 +1,11 @@
+# from http://json.org/JSON_checker/test/pass2.json
+JSON = r'''
+[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]]
+'''
+
+def test_parse():
+    # test in/out equivalence and parsing
+    import simplejson
+    res = simplejson.loads(JSON)
+    out = simplejson.dumps(res)
+    assert res == simplejson.loads(out)

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/ext/simplejson/tests/test_pass3.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/simplejson/tests/test_pass3.py b/tools/bin/ext/simplejson/tests/test_pass3.py
new file mode 100755
index 0000000..d94893f
--- /dev/null
+++ b/tools/bin/ext/simplejson/tests/test_pass3.py
@@ -0,0 +1,16 @@
+# from http://json.org/JSON_checker/test/pass3.json
+JSON = r'''
+{
+    "JSON Test Pattern pass3": {
+        "The outermost value": "must be an object or array.",
+        "In this test": "It is an object."
+    }
+}
+'''
+
+def test_parse():
+    # test in/out equivalence and parsing
+    import simplejson
+    res = simplejson.loads(JSON)
+    out = simplejson.dumps(res)
+    assert res == simplejson.loads(out)

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/ext/simplejson/tests/test_recursion.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/simplejson/tests/test_recursion.py b/tools/bin/ext/simplejson/tests/test_recursion.py
new file mode 100755
index 0000000..756b066
--- /dev/null
+++ b/tools/bin/ext/simplejson/tests/test_recursion.py
@@ -0,0 +1,62 @@
+import simplejson
+
+def test_listrecursion():
+    x = []
+    x.append(x)
+    try:
+        simplejson.dumps(x)
+    except ValueError:
+        pass
+    else:
+        assert False, "didn't raise ValueError on list recursion"
+    x = []
+    y = [x]
+    x.append(y)
+    try:
+        simplejson.dumps(x)
+    except ValueError:
+        pass
+    else:
+        assert False, "didn't raise ValueError on alternating list recursion"
+    y = []
+    x = [y, y]
+    # ensure that the marker is cleared
+    simplejson.dumps(x)
+
+def test_dictrecursion():
+    x = {}
+    x["test"] = x
+    try:
+        simplejson.dumps(x)
+    except ValueError:
+        pass
+    else:
+        assert False, "didn't raise ValueError on dict recursion"
+    x = {}
+    y = {"a": x, "b": x}
+    # ensure that the marker is cleared
+    simplejson.dumps(x)
+
+class TestObject:
+    pass
+
+class RecursiveJSONEncoder(simplejson.JSONEncoder):
+    recurse = False
+    def default(self, o):
+        if o is TestObject:
+            if self.recurse:
+                return [TestObject]
+            else:
+                return 'TestObject'
+        simplejson.JSONEncoder.default(o)
+
+def test_defaultrecursion():
+    enc = RecursiveJSONEncoder()
+    assert enc.encode(TestObject) == '"TestObject"'
+    enc.recurse = True
+    try:
+        enc.encode(TestObject)
+    except ValueError:
+        pass
+    else:
+        assert False, "didn't raise ValueError on default recursion"

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/ext/simplejson/tests/test_separators.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/simplejson/tests/test_separators.py b/tools/bin/ext/simplejson/tests/test_separators.py
new file mode 100755
index 0000000..a615354
--- /dev/null
+++ b/tools/bin/ext/simplejson/tests/test_separators.py
@@ -0,0 +1,41 @@
+
+
+
+def test_separators():
+    import simplejson
+    import textwrap
+    
+    h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth',
+         {'nifty': 87}, {'field': 'yes', 'morefield': False} ]
+
+    expect = textwrap.dedent("""\
+    [
+      [
+        "blorpie"
+      ] ,
+      [
+        "whoops"
+      ] ,
+      [] ,
+      "d-shtaeou" ,
+      "d-nthiouh" ,
+      "i-vhbjkhnth" ,
+      {
+        "nifty" : 87
+      } ,
+      {
+        "field" : "yes" ,
+        "morefield" : false
+      }
+    ]""")
+
+
+    d1 = simplejson.dumps(h)
+    d2 = simplejson.dumps(h, indent=2, sort_keys=True, separators=(' ,', ' : '))
+
+    h1 = simplejson.loads(d1)
+    h2 = simplejson.loads(d2)
+
+    assert h1 == h
+    assert h2 == h
+    assert d2 == expect

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a485be47/tools/bin/ext/simplejson/tests/test_unicode.py
----------------------------------------------------------------------
diff --git a/tools/bin/ext/simplejson/tests/test_unicode.py b/tools/bin/ext/simplejson/tests/test_unicode.py
new file mode 100755
index 0000000..88d0939
--- /dev/null
+++ b/tools/bin/ext/simplejson/tests/test_unicode.py
@@ -0,0 +1,16 @@
+import simplejson as S
+
+def test_encoding1():
+    encoder = S.JSONEncoder(encoding='utf-8')
+    u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
+    s = u.encode('utf-8')
+    ju = encoder.encode(u)
+    js = encoder.encode(s)
+    assert ju == js
+    
+def test_encoding2():
+    u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
+    s = u.encode('utf-8')
+    ju = S.dumps(u, encoding='utf-8')
+    js = S.dumps(s, encoding='utf-8')
+    assert ju == js