You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by gr...@apache.org on 2020/04/07 14:12:48 UTC
[kudu] branch master updated: [python] KUDU-2632 Add DATE type
support
This is an automated email from the ASF dual-hosted git repository.
granthenke pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 80fb9a8 [python] KUDU-2632 Add DATE type support
80fb9a8 is described below
commit 80fb9a8da031c2768a3bd7afbcf652a271a1759d
Author: Volodymyr Verovkin <ve...@cloudera.com>
AuthorDate: Fri Apr 3 15:21:10 2020 -0700
[python] KUDU-2632 Add DATE type support
There are two utility functions which perfrom conversion between
Kudu DATE and Python datetime.date:
unix_epoch_days_to_date() - converts number of days since Unix
ecpoch to datetime.date()
date_to_unix_epoch_days() - converts datetime.date() to number
of days since Unix ecpoch
Change-Id: I1f08946e9ba56dab5e5b43e2bf65bc535c26ab25
Reviewed-on: http://gerrit.cloudera.org:8080/15645
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin <as...@cloudera.com>
Reviewed-by: Grant Henke <gr...@apache.org>
---
python/kudu/__init__.py | 1 +
python/kudu/client.pyx | 26 ++++++++++++++++++++++++--
python/kudu/libkudu_client.pxd | 7 +++++++
python/kudu/schema.pyx | 14 +++++++++++---
python/kudu/tests/test_scanner.py | 9 +++++++--
python/kudu/tests/test_scantoken.py | 4 ++++
python/kudu/tests/test_schema.py | 13 +++++++++++++
python/kudu/tests/util.py | 17 +++++++++++++----
python/kudu/util.py | 33 +++++++++++++++++++++++++++++++++
9 files changed, 113 insertions(+), 11 deletions(-)
diff --git a/python/kudu/__init__.py b/python/kudu/__init__.py
index 2dc5387..4c53905 100644
--- a/python/kudu/__init__.py
+++ b/python/kudu/__init__.py
@@ -40,6 +40,7 @@ from kudu.errors import (KuduException, KuduBadStatus, KuduNotFound, # noqa
from kudu.schema import (int8, int16, int32, int64, string_ as string, # noqa
double_ as double, float_, float_ as float, binary,
unixtime_micros, bool_ as bool, decimal, varchar,
+ date,
KuduType,
SchemaBuilder, ColumnSpec, Schema, ColumnSchema,
COMPRESSION_DEFAULT,
diff --git a/python/kudu/client.pyx b/python/kudu/client.pyx
index 7bc8ce5..bb04895 100644
--- a/python/kudu/client.pyx
+++ b/python/kudu/client.pyx
@@ -32,7 +32,8 @@ from kudu.compat import tobytes, frombytes, dict_iter
from kudu.schema cimport Schema, ColumnSchema, ColumnSpec, KuduValue, KuduType
from kudu.errors cimport check_status
from kudu.util import to_unixtime_micros, from_unixtime_micros, \
- from_hybridtime, to_unscaled_decimal, from_unscaled_decimal
+ from_hybridtime, to_unscaled_decimal, from_unscaled_decimal, \
+ unix_epoch_days_to_date, date_to_unix_epoch_days
from errors import KuduException
import six
@@ -86,7 +87,8 @@ cdef dict _type_names = {
KUDU_BINARY : "KUDU_BINARY",
KUDU_UNIXTIME_MICROS : "KUDU_UNIXTIME_MICROS",
KUDU_DECIMAL : "KUDU_DECIMAL",
- KUDU_VARCHAR : "KUDU_VARCHAR"
+ KUDU_VARCHAR : "KUDU_VARCHAR",
+ KUDU_DATE : "KUDU_DATE"
}
# Range Partition Bound Type enums
@@ -733,6 +735,14 @@ cdef class UnixtimeMicrosVal(RawValue):
self.val = to_unixtime_micros(obj)
self.data = &self.val
+cdef class DateVal(RawValue):
+ cdef:
+ int32_t val
+
+ def __cinit__(self, obj):
+ self.val = date_to_unix_epoch_days(obj)
+ self.data = &self.val
+
#----------------------------------------------------------------------
cdef class TabletServer:
"""
@@ -1570,6 +1580,11 @@ cdef class Row:
return cpython.PyBytes_FromStringAndSize(<char*> val.mutable_data(),
val.size())
+ cdef inline get_date(self, int i):
+ cdef int32_t val
+ check_status(self.row.GetDate(i, &val))
+ return unix_epoch_days_to_date(val)
+
cdef inline get_slot(self, int i):
cdef:
Status s
@@ -1599,6 +1614,8 @@ cdef class Row:
return self.get_decimal(i)
elif t == KUDU_VARCHAR:
return frombytes(self.get_varchar(i))
+ elif t == KUDU_DATE:
+ return self.get_date(i)
else:
raise TypeError("Cannot get kudu type <{0}>"
.format(_type_names[t]))
@@ -2805,6 +2822,9 @@ cdef class PartialRow:
elif t == KUDU_UNIXTIME_MICROS:
check_status(self.row.SetUnixTimeMicros(i, <int64_t>
to_unixtime_micros(value)))
+ elif t == KUDU_DATE:
+ val = date_to_unix_epoch_days(value)
+ check_status(self.row.SetDate(i, <int32_t>val))
elif t == KUDU_DECIMAL:
IF PYKUDU_INT128_SUPPORTED == 1:
check_status(self.row.SetUnscaledDecimal(i, <int128_t>to_unscaled_decimal(value)))
@@ -2928,6 +2948,8 @@ cdef inline cast_pyvalue(DataType t, object o):
return StringVal(o)
elif t == KUDU_VARCHAR:
return StringVal(o)
+ elif t == KUDU_DATE:
+ return DateVal(o)
else:
raise TypeError("Cannot cast kudu type <{0}>".format(_type_names[t]))
diff --git a/python/kudu/libkudu_client.pxd b/python/kudu/libkudu_client.pxd
index e599941..7080c07 100644
--- a/python/kudu/libkudu_client.pxd
+++ b/python/kudu/libkudu_client.pxd
@@ -126,6 +126,7 @@ cdef extern from "kudu/client/schema.h" namespace "kudu::client" nogil:
KUDU_UNIXTIME_MICROS " kudu::client::KuduColumnSchema::UNIXTIME_MICROS"
KUDU_DECIMAL " kudu::client::KuduColumnSchema::DECIMAL"
KUDU_VARCHAR " kudu::client::KuduColumnSchema::VARCHAR"
+ KUDU_DATE " kudu::client::KuduColumnSchema::DATE"
enum EncodingType" kudu::client::KuduColumnStorageAttributes::EncodingType":
EncodingType_AUTO " kudu::client::KuduColumnStorageAttributes::AUTO_ENCODING"
@@ -271,6 +272,9 @@ cdef extern from "kudu/client/scan_batch.h" namespace "kudu::client" nogil:
Status GetVarchar(const Slice& col_name, Slice* val)
Status GetVarchar(int col_idx, Slice* val)
+ Status GetDate(Slice& col_name, int32_t* val)
+ Status GetDate(int col_idx, int32_t* val)
+
const void* cell(int col_idx)
string ToString()
@@ -369,6 +373,9 @@ cdef extern from "kudu/common/partial_row.h" namespace "kudu" nogil:
Status SetVarchar(Slice& col_name, Slice& val)
Status SetVarchar(int col_idx, Slice& val)
+ Status SetDate(Slice& col_name, int32_t val)
+ Status SetDate(int col_idx, int32_t val)
+
Status SetNull(Slice& col_name)
Status SetNull(int col_idx)
diff --git a/python/kudu/schema.pyx b/python/kudu/schema.pyx
index 35f7192..a71e153 100644
--- a/python/kudu/schema.pyx
+++ b/python/kudu/schema.pyx
@@ -26,7 +26,8 @@ from kudu.compat import tobytes, frombytes
from kudu.schema cimport *
from kudu.errors cimport check_status
from kudu.client cimport PartialRow
-from kudu.util import get_decimal_scale, to_unixtime_micros, to_unscaled_decimal
+from kudu.util import get_decimal_scale, to_unixtime_micros, to_unscaled_decimal, \
+ unix_epoch_days_to_date, date_to_unix_epoch_days
from errors import KuduException
import six
@@ -46,6 +47,7 @@ FLOAT = KUDU_FLOAT
DOUBLE = KUDU_DOUBLE
UNIXTIME_MICROS = KUDU_UNIXTIME_MICROS
+DATE = KUDU_DATE
BINARY = KUDU_BINARY
DECIMAL = KUDU_DECIMAL
@@ -125,6 +127,7 @@ binary = KuduType(KUDU_BINARY)
unixtime_micros = KuduType(KUDU_UNIXTIME_MICROS)
decimal = KuduType(KUDU_DECIMAL)
varchar = KuduType(KUDU_VARCHAR)
+date = KuduType(KUDU_DATE)
cdef dict _type_names = {
@@ -139,7 +142,8 @@ cdef dict _type_names = {
BINARY: 'binary',
UNIXTIME_MICROS: 'unixtime_micros',
DECIMAL: 'decimal',
- VARCHAR: 'varchar'
+ VARCHAR: 'varchar',
+ DATE: 'date'
}
@@ -157,7 +161,8 @@ cdef dict _type_to_obj = {
BINARY: binary,
UNIXTIME_MICROS: unixtime_micros,
DECIMAL: decimal,
- VARCHAR: varchar
+ VARCHAR: varchar,
+ DATE: date
}
@@ -796,6 +801,9 @@ cdef class KuduValue:
elif (type_.name == 'unixtime_micros'):
value = to_unixtime_micros(value)
self._value = C_KuduValue.FromInt(value)
+ elif (type_.name == 'date'):
+ val = date_to_unix_epoch_days(value)
+ self._value = C_KuduValue.FromInt(val)
elif (type_.name == 'decimal'):
IF PYKUDU_INT128_SUPPORTED == 1:
scale = get_decimal_scale(value)
diff --git a/python/kudu/tests/test_scanner.py b/python/kudu/tests/test_scanner.py
index b7e21b6..fa40625 100644
--- a/python/kudu/tests/test_scanner.py
+++ b/python/kudu/tests/test_scanner.py
@@ -315,6 +315,9 @@ class TestScanner(TestScanBase):
def test_varchar_pred(self):
self._test_varchar_pred()
+ def test_date_pred(self):
+ self._test_date_pred()
+
def test_scan_selection(self):
"""
This test confirms that setting the scan selection policy on the
@@ -352,7 +355,8 @@ class TestScanner(TestScanBase):
self.assertEqual(types[6], np.int8)
self.assertEqual(types[7], np.object)
self.assertEqual(types[8], np.object)
- self.assertEqual(types[9], np.float32)
+ self.assertEqual(types[9], np.object)
+ self.assertEqual(types[10], np.float32)
else:
self.assertEqual(types[0], np.int64)
self.assertEqual(types[1], 'datetime64[ns, UTC]')
@@ -362,7 +366,8 @@ class TestScanner(TestScanBase):
self.assertEqual(types[5], np.int8)
self.assertEqual(types[6], np.object)
self.assertEqual(types[7], np.object)
- self.assertEqual(types[8], np.float32)
+ self.assertEqual(types[8], np.object)
+ self.assertEqual(types[9], np.float32)
@pytest.mark.skipif(not (kudu.CLIENT_SUPPORTS_PANDAS),
reason="Pandas required to run this test.")
diff --git a/python/kudu/tests/test_scantoken.py b/python/kudu/tests/test_scantoken.py
index cfea352..0c1fda6 100644
--- a/python/kudu/tests/test_scantoken.py
+++ b/python/kudu/tests/test_scantoken.py
@@ -272,6 +272,10 @@ class TestScanToken(TestScanBase):
# Test a varchar predicate
self._test_varchar_pred()
+ def test_date_pred(self):
+ # Test a date predicate
+ self._test_date_pred()
+
def test_scan_selection(self):
"""
This test confirms that setting the scan selection policy on the
diff --git a/python/kudu/tests/test_schema.py b/python/kudu/tests/test_schema.py
index 1b81bbb..dca9ab5 100644
--- a/python/kudu/tests/test_schema.py
+++ b/python/kudu/tests/test_schema.py
@@ -182,6 +182,19 @@ class TestSchema(unittest.TestCase):
with self.assertRaises(kudu.KuduInvalidArgument):
builder.build()
+ def test_date(self):
+ builder = kudu.schema_builder()
+ (builder.add_column('key')
+ .type('date')
+ .primary_key()
+ .nullable(False))
+ schema = builder.build()
+
+ column = schema[0]
+ tp = column.type
+ assert tp.name == 'date'
+ assert tp.type == kudu.schema.DATE
+
def test_varchar(self):
builder = kudu.schema_builder()
(builder.add_column('key')
diff --git a/python/kudu/tests/util.py b/python/kudu/tests/util.py
index 62f94d2..c3c91a5 100644
--- a/python/kudu/tests/util.py
+++ b/python/kudu/tests/util.py
@@ -73,6 +73,7 @@ class TestScanBase(KuduTestBase, unittest.TestCase):
builder.add_column('int8_val', type_=kudu.int8)
builder.add_column('binary_val', type_='binary', compression=kudu.COMPRESSION_SNAPPY, encoding='prefix')
builder.add_column('varchar_val', type_=kudu.varchar, length=10)
+ builder.add_column('date_val', type_=kudu.date)
builder.add_column('float_val', type_=kudu.float)
builder.set_primary_keys(['key', 'unixtime_micros_val'])
schema = builder.build()
@@ -104,22 +105,22 @@ class TestScanBase(KuduTestBase, unittest.TestCase):
(1, datetime.datetime(2016, 1, 1).replace(tzinfo=pytz.utc), Decimal('111.11'),
"Test One", True, 1.7976931348623157 * (10^308), 127,
b'\xce\x99\xce\xbf\xcf\x81\xce\xb4\xce\xb1\xce\xbd\xce\xaf\xce\xb1',
- "Test One", 3.402823 * (10^38)),
+ "Test One", datetime.date(1970,1,1), 3.402823 * (10^38)),
(2, datetime.datetime.utcnow().replace(tzinfo=pytz.utc), Decimal('0.99'),
"测试二", False, 200.1, -1,
b'\xd0\x98\xd0\xbe\xd1\x80\xd0\xb4\xd0\xb0\xd0\xbd\xd0\xb8\xd1\x8f',
- "测试二", -150.2)
+ "测试二", datetime.date(2020,1,1), -150.2)
]
else:
self.type_test_rows = [
(1, datetime.datetime(2016, 1, 1).replace(tzinfo=pytz.utc),
"Test One", True, 1.7976931348623157 * (10 ^ 308), 127,
b'\xce\x99\xce\xbf\xcf\x81\xce\xb4\xce\xb1\xce\xbd\xce\xaf\xce\xb1',
- "Test One", 3.402823 * (10 ^ 38)),
+ "Test One", datetime.date(1970,1,1), 3.402823 * (10 ^ 38)),
(2, datetime.datetime.utcnow().replace(tzinfo=pytz.utc),
"测试二", False, 200.1, -1,
b'\xd0\x98\xd0\xbe\xd1\x80\xd0\xb4\xd0\xb0\xd0\xbd\xd0\xb8\xd1\x8f',
- "测试二", -150.2)
+ "测试二", datetime.date(2020,1,1), -150.2)
]
session = self.client.new_session()
for row in self.type_test_rows:
@@ -264,3 +265,11 @@ class TestScanBase(KuduTestBase, unittest.TestCase):
],
row_indexes=slice(0, 1)
)
+
+ def _test_date_pred(self):
+ self.verify_pred_type_scans(
+ preds=[
+ self.type_table['date_val'] == datetime.date(1970, 1, 1)
+ ],
+ row_indexes=slice(0, 1)
+ )
diff --git a/python/kudu/util.py b/python/kudu/util.py
index 61062fb..68c8669 100644
--- a/python/kudu/util.py
+++ b/python/kudu/util.py
@@ -180,3 +180,36 @@ def get_decimal_scale(decimal):
int : The calculated scale
"""
return max(0, -decimal.as_tuple().exponent)
+
+def unix_epoch_days_to_date(ndays):
+ """
+ Convert number of days since the unix epoch into datetime.date in UTC timezone.
+ Number of days converted into timestamp by multiplying days on number
+ of seconds per day (86400).
+
+ Parameters
+ ---------
+ ndays : integer
+ The number of days since the unix epoch
+
+ Returns
+ -------
+ datetime.date : calendar date for "ndays" days since unix epoch
+ """
+ return datetime.datetime.utcfromtimestamp(ndays * 86400).date()
+
+def date_to_unix_epoch_days(date):
+ """
+ Convert datetime.date value to a integer representing
+ the number of days since the unix epoch.
+
+ Parameters
+ ---------
+ date : datetime.date
+
+ Returns
+ -------
+ int : Number of days since unix epoch
+ """
+ delta = date - datetime.datetime.utcfromtimestamp(0).date()
+ return delta.days
\ No newline at end of file