You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by gr...@apache.org on 2020/04/07 14:12:48 UTC

[kudu] branch master updated: [python] KUDU-2632 Add DATE type support

This is an automated email from the ASF dual-hosted git repository.

granthenke pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 80fb9a8  [python] KUDU-2632 Add DATE type support
80fb9a8 is described below

commit 80fb9a8da031c2768a3bd7afbcf652a271a1759d
Author: Volodymyr Verovkin <ve...@cloudera.com>
AuthorDate: Fri Apr 3 15:21:10 2020 -0700

    [python] KUDU-2632 Add DATE type support
    
    There are two utility functions which perfrom conversion between
    Kudu DATE and Python datetime.date:
    
    unix_epoch_days_to_date() - converts number of days since Unix
    ecpoch to datetime.date()
    
    date_to_unix_epoch_days() - converts datetime.date() to number
    of days since Unix ecpoch
    
    Change-Id: I1f08946e9ba56dab5e5b43e2bf65bc535c26ab25
    Reviewed-on: http://gerrit.cloudera.org:8080/15645
    Tested-by: Kudu Jenkins
    Reviewed-by: Alexey Serbin <as...@cloudera.com>
    Reviewed-by: Grant Henke <gr...@apache.org>
---
 python/kudu/__init__.py             |  1 +
 python/kudu/client.pyx              | 26 ++++++++++++++++++++++++--
 python/kudu/libkudu_client.pxd      |  7 +++++++
 python/kudu/schema.pyx              | 14 +++++++++++---
 python/kudu/tests/test_scanner.py   |  9 +++++++--
 python/kudu/tests/test_scantoken.py |  4 ++++
 python/kudu/tests/test_schema.py    | 13 +++++++++++++
 python/kudu/tests/util.py           | 17 +++++++++++++----
 python/kudu/util.py                 | 33 +++++++++++++++++++++++++++++++++
 9 files changed, 113 insertions(+), 11 deletions(-)

diff --git a/python/kudu/__init__.py b/python/kudu/__init__.py
index 2dc5387..4c53905 100644
--- a/python/kudu/__init__.py
+++ b/python/kudu/__init__.py
@@ -40,6 +40,7 @@ from kudu.errors import (KuduException, KuduBadStatus, KuduNotFound,  # noqa
 from kudu.schema import (int8, int16, int32, int64, string_ as string,  # noqa
                          double_ as double, float_, float_ as float, binary,
                          unixtime_micros, bool_ as bool, decimal, varchar,
+                         date,
                          KuduType,
                          SchemaBuilder, ColumnSpec, Schema, ColumnSchema,
                          COMPRESSION_DEFAULT,
diff --git a/python/kudu/client.pyx b/python/kudu/client.pyx
index 7bc8ce5..bb04895 100644
--- a/python/kudu/client.pyx
+++ b/python/kudu/client.pyx
@@ -32,7 +32,8 @@ from kudu.compat import tobytes, frombytes, dict_iter
 from kudu.schema cimport Schema, ColumnSchema, ColumnSpec, KuduValue, KuduType
 from kudu.errors cimport check_status
 from kudu.util import to_unixtime_micros, from_unixtime_micros, \
-    from_hybridtime, to_unscaled_decimal, from_unscaled_decimal
+    from_hybridtime, to_unscaled_decimal, from_unscaled_decimal, \
+    unix_epoch_days_to_date, date_to_unix_epoch_days
 from errors import KuduException
 
 import six
@@ -86,7 +87,8 @@ cdef dict _type_names = {
     KUDU_BINARY : "KUDU_BINARY",
     KUDU_UNIXTIME_MICROS : "KUDU_UNIXTIME_MICROS",
     KUDU_DECIMAL : "KUDU_DECIMAL",
-    KUDU_VARCHAR : "KUDU_VARCHAR"
+    KUDU_VARCHAR : "KUDU_VARCHAR",
+    KUDU_DATE : "KUDU_DATE"
 }
 
 # Range Partition Bound Type enums
@@ -733,6 +735,14 @@ cdef class UnixtimeMicrosVal(RawValue):
         self.val = to_unixtime_micros(obj)
         self.data = &self.val
 
+cdef class DateVal(RawValue):
+    cdef:
+        int32_t val
+
+    def __cinit__(self, obj):
+        self.val = date_to_unix_epoch_days(obj)
+        self.data = &self.val
+
 #----------------------------------------------------------------------
 cdef class TabletServer:
     """
@@ -1570,6 +1580,11 @@ cdef class Row:
         return cpython.PyBytes_FromStringAndSize(<char*> val.mutable_data(),
                                                  val.size())
 
+    cdef inline get_date(self, int i):
+        cdef int32_t val
+        check_status(self.row.GetDate(i, &val))
+        return unix_epoch_days_to_date(val)
+
     cdef inline get_slot(self, int i):
         cdef:
             Status s
@@ -1599,6 +1614,8 @@ cdef class Row:
             return self.get_decimal(i)
         elif t == KUDU_VARCHAR:
             return frombytes(self.get_varchar(i))
+        elif t == KUDU_DATE:
+            return self.get_date(i)
         else:
             raise TypeError("Cannot get kudu type <{0}>"
                                 .format(_type_names[t]))
@@ -2805,6 +2822,9 @@ cdef class PartialRow:
         elif t == KUDU_UNIXTIME_MICROS:
             check_status(self.row.SetUnixTimeMicros(i, <int64_t>
                 to_unixtime_micros(value)))
+        elif t == KUDU_DATE:
+            val = date_to_unix_epoch_days(value)
+            check_status(self.row.SetDate(i, <int32_t>val))
         elif t == KUDU_DECIMAL:
             IF PYKUDU_INT128_SUPPORTED == 1:
                 check_status(self.row.SetUnscaledDecimal(i, <int128_t>to_unscaled_decimal(value)))
@@ -2928,6 +2948,8 @@ cdef inline cast_pyvalue(DataType t, object o):
         return StringVal(o)
     elif t == KUDU_VARCHAR:
         return StringVal(o)
+    elif t == KUDU_DATE:
+        return DateVal(o)
     else:
         raise TypeError("Cannot cast kudu type <{0}>".format(_type_names[t]))
 
diff --git a/python/kudu/libkudu_client.pxd b/python/kudu/libkudu_client.pxd
index e599941..7080c07 100644
--- a/python/kudu/libkudu_client.pxd
+++ b/python/kudu/libkudu_client.pxd
@@ -126,6 +126,7 @@ cdef extern from "kudu/client/schema.h" namespace "kudu::client" nogil:
         KUDU_UNIXTIME_MICROS " kudu::client::KuduColumnSchema::UNIXTIME_MICROS"
         KUDU_DECIMAL " kudu::client::KuduColumnSchema::DECIMAL"
         KUDU_VARCHAR " kudu::client::KuduColumnSchema::VARCHAR"
+        KUDU_DATE " kudu::client::KuduColumnSchema::DATE"
 
     enum EncodingType" kudu::client::KuduColumnStorageAttributes::EncodingType":
         EncodingType_AUTO " kudu::client::KuduColumnStorageAttributes::AUTO_ENCODING"
@@ -271,6 +272,9 @@ cdef extern from "kudu/client/scan_batch.h" namespace "kudu::client" nogil:
         Status GetVarchar(const Slice& col_name, Slice* val)
         Status GetVarchar(int col_idx, Slice* val)
 
+        Status GetDate(Slice& col_name, int32_t* val)
+        Status GetDate(int col_idx, int32_t* val)
+
         const void* cell(int col_idx)
         string ToString()
 
@@ -369,6 +373,9 @@ cdef extern from "kudu/common/partial_row.h" namespace "kudu" nogil:
         Status SetVarchar(Slice& col_name, Slice& val)
         Status SetVarchar(int col_idx, Slice& val)
 
+        Status SetDate(Slice& col_name, int32_t val)
+        Status SetDate(int col_idx, int32_t val)
+
         Status SetNull(Slice& col_name)
         Status SetNull(int col_idx)
 
diff --git a/python/kudu/schema.pyx b/python/kudu/schema.pyx
index 35f7192..a71e153 100644
--- a/python/kudu/schema.pyx
+++ b/python/kudu/schema.pyx
@@ -26,7 +26,8 @@ from kudu.compat import tobytes, frombytes
 from kudu.schema cimport *
 from kudu.errors cimport check_status
 from kudu.client cimport PartialRow
-from kudu.util import get_decimal_scale, to_unixtime_micros, to_unscaled_decimal
+from kudu.util import get_decimal_scale, to_unixtime_micros, to_unscaled_decimal, \
+    unix_epoch_days_to_date, date_to_unix_epoch_days
 from errors import KuduException
 
 import six
@@ -46,6 +47,7 @@ FLOAT = KUDU_FLOAT
 DOUBLE = KUDU_DOUBLE
 
 UNIXTIME_MICROS = KUDU_UNIXTIME_MICROS
+DATE = KUDU_DATE
 BINARY = KUDU_BINARY
 
 DECIMAL = KUDU_DECIMAL
@@ -125,6 +127,7 @@ binary = KuduType(KUDU_BINARY)
 unixtime_micros = KuduType(KUDU_UNIXTIME_MICROS)
 decimal = KuduType(KUDU_DECIMAL)
 varchar = KuduType(KUDU_VARCHAR)
+date = KuduType(KUDU_DATE)
 
 
 cdef dict _type_names = {
@@ -139,7 +142,8 @@ cdef dict _type_names = {
     BINARY: 'binary',
     UNIXTIME_MICROS: 'unixtime_micros',
     DECIMAL: 'decimal',
-    VARCHAR: 'varchar'
+    VARCHAR: 'varchar',
+    DATE: 'date'
 }
 
 
@@ -157,7 +161,8 @@ cdef dict _type_to_obj = {
     BINARY: binary,
     UNIXTIME_MICROS: unixtime_micros,
     DECIMAL: decimal,
-    VARCHAR: varchar
+    VARCHAR: varchar,
+    DATE: date
 }
 
 
@@ -796,6 +801,9 @@ cdef class KuduValue:
         elif (type_.name == 'unixtime_micros'):
             value = to_unixtime_micros(value)
             self._value = C_KuduValue.FromInt(value)
+        elif (type_.name == 'date'):
+            val = date_to_unix_epoch_days(value)
+            self._value = C_KuduValue.FromInt(val)
         elif (type_.name == 'decimal'):
             IF PYKUDU_INT128_SUPPORTED == 1:
                 scale = get_decimal_scale(value)
diff --git a/python/kudu/tests/test_scanner.py b/python/kudu/tests/test_scanner.py
index b7e21b6..fa40625 100644
--- a/python/kudu/tests/test_scanner.py
+++ b/python/kudu/tests/test_scanner.py
@@ -315,6 +315,9 @@ class TestScanner(TestScanBase):
     def test_varchar_pred(self):
         self._test_varchar_pred()
 
+    def test_date_pred(self):
+        self._test_date_pred()
+
     def test_scan_selection(self):
         """
         This test confirms that setting the scan selection policy on the
@@ -352,7 +355,8 @@ class TestScanner(TestScanBase):
             self.assertEqual(types[6], np.int8)
             self.assertEqual(types[7], np.object)
             self.assertEqual(types[8], np.object)
-            self.assertEqual(types[9], np.float32)
+            self.assertEqual(types[9], np.object)
+            self.assertEqual(types[10], np.float32)
         else:
             self.assertEqual(types[0], np.int64)
             self.assertEqual(types[1], 'datetime64[ns, UTC]')
@@ -362,7 +366,8 @@ class TestScanner(TestScanBase):
             self.assertEqual(types[5], np.int8)
             self.assertEqual(types[6], np.object)
             self.assertEqual(types[7], np.object)
-            self.assertEqual(types[8], np.float32)
+            self.assertEqual(types[8], np.object)
+            self.assertEqual(types[9], np.float32)
 
     @pytest.mark.skipif(not (kudu.CLIENT_SUPPORTS_PANDAS),
                         reason="Pandas required to run this test.")
diff --git a/python/kudu/tests/test_scantoken.py b/python/kudu/tests/test_scantoken.py
index cfea352..0c1fda6 100644
--- a/python/kudu/tests/test_scantoken.py
+++ b/python/kudu/tests/test_scantoken.py
@@ -272,6 +272,10 @@ class TestScanToken(TestScanBase):
         # Test a varchar predicate
         self._test_varchar_pred()
 
+    def test_date_pred(self):
+        # Test a date predicate
+        self._test_date_pred()
+
     def test_scan_selection(self):
         """
         This test confirms that setting the scan selection policy on the
diff --git a/python/kudu/tests/test_schema.py b/python/kudu/tests/test_schema.py
index 1b81bbb..dca9ab5 100644
--- a/python/kudu/tests/test_schema.py
+++ b/python/kudu/tests/test_schema.py
@@ -182,6 +182,19 @@ class TestSchema(unittest.TestCase):
         with self.assertRaises(kudu.KuduInvalidArgument):
             builder.build()
 
+    def test_date(self):
+        builder = kudu.schema_builder()
+        (builder.add_column('key')
+         .type('date')
+         .primary_key()
+         .nullable(False))
+        schema = builder.build()
+
+        column = schema[0]
+        tp = column.type
+        assert tp.name == 'date'
+        assert tp.type == kudu.schema.DATE
+
     def test_varchar(self):
         builder = kudu.schema_builder()
         (builder.add_column('key')
diff --git a/python/kudu/tests/util.py b/python/kudu/tests/util.py
index 62f94d2..c3c91a5 100644
--- a/python/kudu/tests/util.py
+++ b/python/kudu/tests/util.py
@@ -73,6 +73,7 @@ class TestScanBase(KuduTestBase, unittest.TestCase):
         builder.add_column('int8_val', type_=kudu.int8)
         builder.add_column('binary_val', type_='binary', compression=kudu.COMPRESSION_SNAPPY, encoding='prefix')
         builder.add_column('varchar_val', type_=kudu.varchar, length=10)
+        builder.add_column('date_val', type_=kudu.date)
         builder.add_column('float_val', type_=kudu.float)
         builder.set_primary_keys(['key', 'unixtime_micros_val'])
         schema = builder.build()
@@ -104,22 +105,22 @@ class TestScanBase(KuduTestBase, unittest.TestCase):
                 (1, datetime.datetime(2016, 1, 1).replace(tzinfo=pytz.utc), Decimal('111.11'),
                  "Test One", True, 1.7976931348623157 * (10^308), 127,
                  b'\xce\x99\xce\xbf\xcf\x81\xce\xb4\xce\xb1\xce\xbd\xce\xaf\xce\xb1',
-                 "Test One", 3.402823 * (10^38)),
+                 "Test One", datetime.date(1970,1,1), 3.402823 * (10^38)),
                 (2, datetime.datetime.utcnow().replace(tzinfo=pytz.utc), Decimal('0.99'),
                  "测试二", False, 200.1, -1,
                  b'\xd0\x98\xd0\xbe\xd1\x80\xd0\xb4\xd0\xb0\xd0\xbd\xd0\xb8\xd1\x8f',
-                 "测试二", -150.2)
+                 "测试二", datetime.date(2020,1,1), -150.2)
             ]
         else:
             self.type_test_rows = [
                 (1, datetime.datetime(2016, 1, 1).replace(tzinfo=pytz.utc),
                  "Test One", True, 1.7976931348623157 * (10 ^ 308), 127,
                  b'\xce\x99\xce\xbf\xcf\x81\xce\xb4\xce\xb1\xce\xbd\xce\xaf\xce\xb1',
-                 "Test One", 3.402823 * (10 ^ 38)),
+                 "Test One", datetime.date(1970,1,1), 3.402823 * (10 ^ 38)),
                 (2, datetime.datetime.utcnow().replace(tzinfo=pytz.utc),
                  "测试二", False, 200.1, -1,
                  b'\xd0\x98\xd0\xbe\xd1\x80\xd0\xb4\xd0\xb0\xd0\xbd\xd0\xb8\xd1\x8f',
-                 "测试二", -150.2)
+                 "测试二", datetime.date(2020,1,1), -150.2)
             ]
         session = self.client.new_session()
         for row in self.type_test_rows:
@@ -264,3 +265,11 @@ class TestScanBase(KuduTestBase, unittest.TestCase):
             ],
             row_indexes=slice(0, 1)
         )
+
+    def _test_date_pred(self):
+        self.verify_pred_type_scans(
+            preds=[
+                self.type_table['date_val'] == datetime.date(1970, 1, 1)
+            ],
+            row_indexes=slice(0, 1)
+        )
diff --git a/python/kudu/util.py b/python/kudu/util.py
index 61062fb..68c8669 100644
--- a/python/kudu/util.py
+++ b/python/kudu/util.py
@@ -180,3 +180,36 @@ def get_decimal_scale(decimal):
        int : The calculated scale
        """
     return max(0, -decimal.as_tuple().exponent)
+
+def unix_epoch_days_to_date(ndays):
+    """
+    Convert number of days since the unix epoch into datetime.date in UTC timezone.
+    Number of days converted into timestamp by multiplying days on number
+    of seconds per day (86400).
+
+    Parameters
+    ---------
+    ndays : integer
+      The number of days since the unix epoch
+
+    Returns
+    -------
+    datetime.date : calendar date for "ndays" days since unix epoch
+    """
+    return datetime.datetime.utcfromtimestamp(ndays * 86400).date()
+
+def date_to_unix_epoch_days(date):
+    """
+    Convert datetime.date value to a integer representing
+    the number of days since the unix epoch.
+
+    Parameters
+    ---------
+    date : datetime.date
+
+    Returns
+    -------
+    int : Number of days since unix epoch
+    """
+    delta = date - datetime.datetime.utcfromtimestamp(0).date()
+    return delta.days
\ No newline at end of file