You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2022/01/04 20:00:09 UTC
[orc] branch main updated: ORC-1055: [C++] Add the timezone option for the csv-import tool (#975)
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/main by this push:
new 9a66348 ORC-1055: [C++] Add the timezone option for the csv-import tool (#975)
9a66348 is described below
commit 9a663489f0a79224a3ac3e9d93d90afaf8a7f747
Author: coderex2522 <re...@gmail.com>
AuthorDate: Wed Jan 5 03:59:20 2022 +0800
ORC-1055: [C++] Add the timezone option for the csv-import tool (#975)
### What changes were proposed in this pull request?
The pull request provides the csv-import tool with support for timezone settings
### Why are the changes needed?
This is a new option to mitigate ORC-1055 situation.
### How was this patch tested?
The unit case is TestCSVFileImport.testTimezoneOption in TestCSVFileImport.cc
---
examples/TestCSVFileImport.testTimezoneOption.csv | 1 +
tools/src/CSVFileImport.cc | 10 +++++-
tools/test/TestCSVFileImport.cc | 37 +++++++++++++++++++++++
3 files changed, 47 insertions(+), 1 deletion(-)
diff --git a/examples/TestCSVFileImport.testTimezoneOption.csv b/examples/TestCSVFileImport.testTimezoneOption.csv
new file mode 100644
index 0000000..9e87f23
--- /dev/null
+++ b/examples/TestCSVFileImport.testTimezoneOption.csv
@@ -0,0 +1 @@
+2021-12-27 00:00:00.000
\ No newline at end of file
diff --git a/tools/src/CSVFileImport.cc b/tools/src/CSVFileImport.cc
index 3857adc..7bbc9c6 100644
--- a/tools/src/CSVFileImport.cc
+++ b/tools/src/CSVFileImport.cc
@@ -280,8 +280,10 @@ void usage() {
<< " [-s <size>] [--stripe=<size>]\n"
<< " [-c <size>] [--block=<size>]\n"
<< " [-b <size>] [--batch=<size>]\n"
+ << " [-t <string>] [--timezone=<string>]\n"
<< " <schema> <input> <output>\n"
<< "Import CSV file into an Orc file using the specified schema.\n"
+ << "The timezone is writer timezone of timestamp types.\n"
<< "Compound types are not yet supported.\n";
}
@@ -289,6 +291,7 @@ int main(int argc, char* argv[]) {
std::string input;
std::string output;
std::string schema;
+ std::string timezoneName="GMT";
uint64_t stripeSize = (128 << 20); // 128M
uint64_t blockSize = 64 << 10; // 64K
uint64_t batchSize = 1024;
@@ -300,13 +303,14 @@ int main(int argc, char* argv[]) {
{"stripe", required_argument, ORC_NULLPTR, 'p'},
{"block", required_argument, ORC_NULLPTR, 'c'},
{"batch", required_argument, ORC_NULLPTR, 'b'},
+ {"timezone", required_argument, ORC_NULLPTR, 't'},
{ORC_NULLPTR, 0, ORC_NULLPTR, 0}
};
bool helpFlag = false;
int opt;
char *tail;
do {
- opt = getopt_long(argc, argv, "i:o:s:b:c:p:h", longOptions, ORC_NULLPTR);
+ opt = getopt_long(argc, argv, "i:o:s:b:c:p:t:h", longOptions, ORC_NULLPTR);
switch (opt) {
case '?':
case 'h':
@@ -337,6 +341,9 @@ int main(int argc, char* argv[]) {
return 1;
}
break;
+ case 't':
+ timezoneName = std::string(optarg);
+ break;
}
} while (opt != -1);
@@ -364,6 +371,7 @@ int main(int argc, char* argv[]) {
options.setStripeSize(stripeSize);
options.setCompressionBlockSize(blockSize);
options.setCompression(compression);
+ options.setTimezoneName(timezoneName);
ORC_UNIQUE_PTR<orc::OutputStream> outStream = orc::writeLocalFile(output);
ORC_UNIQUE_PTR<orc::Writer> writer =
diff --git a/tools/test/TestCSVFileImport.cc b/tools/test/TestCSVFileImport.cc
index 79bc3de..b4cd967 100644
--- a/tools/test/TestCSVFileImport.cc
+++ b/tools/test/TestCSVFileImport.cc
@@ -53,3 +53,40 @@ TEST (TestCSVFileImport, test10rows) {
EXPECT_EQ(expected, output);
EXPECT_EQ("", error);
}
+
+TEST (TestCSVFileImport, testTimezoneOption) {
+ // create an ORC file from importing the CSV file
+ const std::string pgm1 = findProgram("tools/src/csv-import");
+ const std::string pgm2 = findProgram("tools/src/orc-contents");
+ const std::string csvFile =
+ findExample("TestCSVFileImport.testTimezoneOption.csv");
+ const std::string schema = "'struct<_a:timestamp>'";
+ std::string output;
+ std::string error;
+ {
+ std::string orcFile = "/tmp/test_csv_import_test_timezone_option1.orc";
+ std::string option = "--timezone=America/Los_Angeles";
+ EXPECT_EQ(0, runProgram({pgm1, option, schema, csvFile, orcFile},
+ output, error));
+ EXPECT_EQ("", error);
+ // verify the ORC file content
+ const std::string expected =
+ "{\"_a\": \"2021-12-26 16:00:00.0\"}\n";
+ EXPECT_EQ(0, runProgram({pgm2, orcFile}, output, error));
+ EXPECT_EQ(expected, output);
+ EXPECT_EQ("", error);
+ }
+ {
+ std::string orcFile = "/tmp/test_csv_import_test_timezone_option2.orc";
+ std::string option = "--timezone=Europe/Paris";
+ EXPECT_EQ(0, runProgram({pgm1, option, schema, csvFile, orcFile},
+ output, error));
+ EXPECT_EQ("", error);
+ // verify the ORC file content
+ const std::string expected =
+ "{\"_a\": \"2021-12-27 01:00:00.0\"}\n";
+ EXPECT_EQ(0, runProgram({pgm2, orcFile}, output, error));
+ EXPECT_EQ(expected, output);
+ EXPECT_EQ("", error);
+ }
+}