You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2024/02/26 17:42:33 UTC
(orc) branch branch-2.0 updated: ORC-1633: Add test for `sizes` command
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 4e06dd8aa ORC-1633: Add test for `sizes` command
4e06dd8aa is described below
commit 4e06dd8aaabdad04c5f251a85d27283312bd5aab
Author: sychen <sy...@ctrip.com>
AuthorDate: Mon Feb 26 09:42:18 2024 -0800
ORC-1633: Add test for `sizes` command
### What changes were proposed in this pull request?
Add unit test for `sizes` command of `orc-tools`.
### Why are the changes needed?
Improve code test coverage and avoid regressions.
### How was this patch tested?
Add UT.
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #1818 from cxzl25/ORC-1633.
Authored-by: sychen <sy...@ctrip.com>
Signed-off-by: Dongjoon Hyun <do...@apache.org>
(cherry picked from commit 71f18e163b8819e3cce4744bb37ddf5c89327239)
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
.../apache/orc/tools/sizes/TestColumnSizes.java | 100 +++++++++++++++++++++
1 file changed, 100 insertions(+)
diff --git a/java/tools/src/test/org/apache/orc/tools/sizes/TestColumnSizes.java b/java/tools/src/test/org/apache/orc/tools/sizes/TestColumnSizes.java
new file mode 100644
index 000000000..4b1c57bc4
--- /dev/null
+++ b/java/tools/src/test/org/apache/orc/tools/sizes/TestColumnSizes.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.tools.sizes;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.orc.OrcFile;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.Writer;
+import org.apache.orc.tools.ColumnSizes;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.ByteArrayOutputStream;
+import java.io.PrintStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Paths;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class TestColumnSizes {
+ private Path workDir = new Path(
+ Paths.get(System.getProperty("test.tmp.dir"), "orc-test-sizes").toString());
+ private Configuration conf;
+ private FileSystem fs;
+
+ @BeforeEach
+ public void openFileSystem() throws Exception {
+ conf = new Configuration();
+ fs = FileSystem.getLocal(conf);
+ fs.setWorkingDirectory(workDir);
+ fs.mkdirs(workDir);
+ fs.deleteOnExit(workDir);
+ }
+
+ @Test
+ public void testSizes() throws Exception {
+ TypeDescription schema = TypeDescription.fromString("struct<x:int,y:string>");
+ Map<String, Integer> fileToRowCountMap = new LinkedHashMap<>();
+ fileToRowCountMap.put("test-sizes-1.orc", 10000);
+ fileToRowCountMap.put("test-sizes-2.orc", 20000);
+ for (Map.Entry<String, Integer> fileToRowCount : fileToRowCountMap.entrySet()) {
+ Writer writer = OrcFile.createWriter(new Path(fileToRowCount.getKey()),
+ OrcFile.writerOptions(conf)
+ .setSchema(schema));
+ VectorizedRowBatch batch = schema.createRowBatch();
+ LongColumnVector x = (LongColumnVector) batch.cols[0];
+ BytesColumnVector y = (BytesColumnVector) batch.cols[1];
+ for (int r = 0; r < fileToRowCount.getValue(); ++r) {
+ int row = batch.size++;
+ x.vector[row] = r;
+ byte[] buffer = ("byte-" + r).getBytes();
+ y.setRef(row, buffer, 0, buffer.length);
+ if (batch.size == batch.getMaxSize()) {
+ writer.addRowBatch(batch);
+ batch.reset();
+ }
+ }
+ if (batch.size != 0) {
+ writer.addRowBatch(batch);
+ }
+ writer.close();
+ }
+
+ PrintStream origOut = System.out;
+ ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+ // replace stdout and run command
+ System.setOut(new PrintStream(myOut, false, StandardCharsets.UTF_8));
+ ColumnSizes.main(conf, new String[]{"--summary", workDir.toString()});
+ System.out.flush();
+ System.setOut(origOut);
+ String output = myOut.toString(StandardCharsets.UTF_8);
+ assertTrue(output.contains("Total Files: 2"));
+ assertTrue(output.contains("Total Rows: 30000"));
+ assertTrue(output.contains("Percent Bytes/Row Name"));
+ assertTrue(output.contains("_stripe_footer"));
+ }
+}