You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2019/12/03 21:06:03 UTC
[orc] branch branch-1.4 updated: ORC-231: Configurable capability
to overwrite the file if it exists.
This is an automated email from the ASF dual-hosted git repository.
omalley pushed a commit to branch branch-1.4
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/branch-1.4 by this push:
new a22c501 ORC-231: Configurable capability to overwrite the file if it exists.
a22c501 is described below
commit a22c501af3d88955b66880f5bb895f6313b095a5
Author: Ajay Yadava <aj...@apache.org>
AuthorDate: Mon Aug 21 23:49:54 2017 -0700
ORC-231: Configurable capability to overwrite the file if it exists.
The default value of this flag is false and this change is backward
compatible.
Fixes #162
Signed-off-by: Owen O'Malley <om...@apache.org>
---
java/core/src/java/org/apache/orc/OrcConf.java | 4 +-
java/core/src/java/org/apache/orc/OrcFile.java | 15 +++++
.../java/org/apache/orc/impl/PhysicalFsWriter.java | 2 +-
.../test/org/apache/orc/impl/TestWriterImpl.java | 71 ++++++++++++++++++++++
4 files changed, 90 insertions(+), 2 deletions(-)
diff --git a/java/core/src/java/org/apache/orc/OrcConf.java b/java/core/src/java/org/apache/orc/OrcConf.java
index 06b444b..ae9c48b 100644
--- a/java/core/src/java/org/apache/orc/OrcConf.java
+++ b/java/core/src/java/org/apache/orc/OrcConf.java
@@ -144,7 +144,9 @@ public enum OrcConf {
"orc.force.positional.evolution", false,
"Require schema evolution to match the top level columns using position\n" +
"rather than column names. This provides backwards compatibility with\n" +
- "Hive 2.1.")
+ "Hive 2.1."),
+ OVERWRITE_OUTPUT_FILE("orc.overwrite.output.file", "orc.overwrite.output.file", false,
+ "A boolean flag to enable overwriting of the output file if it already exists.\n")
;
private final String attribute;
diff --git a/java/core/src/java/org/apache/orc/OrcFile.java b/java/core/src/java/org/apache/orc/OrcFile.java
index 11281cb..30ac557 100644
--- a/java/core/src/java/org/apache/orc/OrcFile.java
+++ b/java/core/src/java/org/apache/orc/OrcFile.java
@@ -312,10 +312,12 @@ public class OrcFile {
private BloomFilterVersion bloomFilterVersion;
private PhysicalWriter physicalWriter;
private WriterVersion writerVersion = CURRENT_WRITER;
+ private boolean overwrite;
protected WriterOptions(Properties tableProperties, Configuration conf) {
configuration = conf;
memoryManagerValue = getStaticMemoryManager(conf);
+ overwrite = OrcConf.OVERWRITE_OUTPUT_FILE.getBoolean(tableProperties, conf);
stripeSizeValue = OrcConf.STRIPE_SIZE.getLong(tableProperties, conf);
blockSizeValue = OrcConf.BLOCK_SIZE.getLong(tableProperties, conf);
rowIndexStrideValue =
@@ -373,6 +375,15 @@ public class OrcFile {
}
/**
+ * If the output file already exists, should it be overwritten?
+ * If it is not provided, write operation will fail if the file already exists.
+ */
+ public WriterOptions overwrite(boolean value) {
+ overwrite = value;
+ return this;
+ }
+
+ /**
* Set the stripe size for the file. The writer stores the contents of the
* stripe in memory until this memory limit is reached and the stripe
* is flushed to the HDFS file and the next stripe started.
@@ -559,6 +570,10 @@ public class OrcFile {
return bloomFilterColumns;
}
+ public boolean getOverwrite() {
+ return overwrite;
+ }
+
public FileSystem getFileSystem() {
return fileSystemValue;
}
diff --git a/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java b/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java
index 1769182..06e762f 100644
--- a/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java
@@ -88,7 +88,7 @@ public class PhysicalFsWriter implements PhysicalWriter {
LOG.info("ORC writer created for path: {} with stripeSize: {} blockSize: {}" +
" compression: {} bufferSize: {}", path, defaultStripeSize, blockSize,
compress, bufferSize);
- rawWriter = fs.create(path, false, HDFS_BUFFER_SIZE,
+ rawWriter = fs.create(path, opts.getOverwrite(), HDFS_BUFFER_SIZE,
fs.getDefaultReplication(path), blockSize);
codec = OrcCodecPool.getCodec(compress);
writer = new OutStream("metadata", bufferSize, codec,
diff --git a/java/core/src/test/org/apache/orc/impl/TestWriterImpl.java b/java/core/src/test/org/apache/orc/impl/TestWriterImpl.java
new file mode 100644
index 0000000..22bb1d3
--- /dev/null
+++ b/java/core/src/test/org/apache/orc/impl/TestWriterImpl.java
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.impl;
+
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileAlreadyExistsException;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.orc.OrcConf;
+import org.apache.orc.OrcFile;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.Writer;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestWriterImpl {
+
+ Path workDir = new Path(System.getProperty("test.tmp.dir"));
+ Configuration conf;
+ FileSystem fs;
+ Path testFilePath;
+ TypeDescription schema;
+
+ @Before
+ public void openFileSystem() throws Exception {
+ conf = new Configuration();
+ fs = FileSystem.getLocal(conf);
+ fs.setWorkingDirectory(workDir);
+ testFilePath = new Path("testWriterImpl.orc");
+ fs.create(testFilePath, true);
+ schema = TypeDescription.fromString("struct<x:int,y:int>");
+ }
+
+ @After
+ public void deleteTestFile() throws Exception {
+ fs.delete(testFilePath, false);
+ }
+
+ @Test(expected = FileAlreadyExistsException.class)
+ public void testDefaultOverwriteFlagForWriter() throws Exception {
+ // default value of the overwrite flag is false, so this should fail
+ Writer w = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).setSchema(schema));
+ w.close();
+ }
+
+ @Test
+ public void testOverriddenOverwriteFlagForWriter() throws Exception {
+ // overriding the flag should result in a successful write (no exception)
+ conf.set(OrcConf.OVERWRITE_OUTPUT_FILE.getAttribute(), "true");
+ Writer w = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).setSchema(schema));
+ w.close();
+ }
+}