You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2019/12/03 21:06:03 UTC

[orc] branch branch-1.4 updated: ORC-231: Configurable capability to overwrite the file if it exists.

This is an automated email from the ASF dual-hosted git repository.

omalley pushed a commit to branch branch-1.4
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/branch-1.4 by this push:
     new a22c501  ORC-231: Configurable capability to overwrite the file if it exists.
a22c501 is described below

commit a22c501af3d88955b66880f5bb895f6313b095a5
Author: Ajay Yadava <aj...@apache.org>
AuthorDate: Mon Aug 21 23:49:54 2017 -0700

    ORC-231: Configurable capability to overwrite the file if it exists.
    
    The default value of this flag is false and this change is backward
    compatible.
    
    Fixes #162
    
    Signed-off-by: Owen O'Malley <om...@apache.org>
---
 java/core/src/java/org/apache/orc/OrcConf.java     |  4 +-
 java/core/src/java/org/apache/orc/OrcFile.java     | 15 +++++
 .../java/org/apache/orc/impl/PhysicalFsWriter.java |  2 +-
 .../test/org/apache/orc/impl/TestWriterImpl.java   | 71 ++++++++++++++++++++++
 4 files changed, 90 insertions(+), 2 deletions(-)

diff --git a/java/core/src/java/org/apache/orc/OrcConf.java b/java/core/src/java/org/apache/orc/OrcConf.java
index 06b444b..ae9c48b 100644
--- a/java/core/src/java/org/apache/orc/OrcConf.java
+++ b/java/core/src/java/org/apache/orc/OrcConf.java
@@ -144,7 +144,9 @@ public enum OrcConf {
       "orc.force.positional.evolution", false,
       "Require schema evolution to match the top level columns using position\n" +
       "rather than column names. This provides backwards compatibility with\n" +
-      "Hive 2.1.")
+      "Hive 2.1."),
+  OVERWRITE_OUTPUT_FILE("orc.overwrite.output.file", "orc.overwrite.output.file", false,
+    "A boolean flag to enable overwriting of the output file if it already exists.\n")
   ;
 
   private final String attribute;
diff --git a/java/core/src/java/org/apache/orc/OrcFile.java b/java/core/src/java/org/apache/orc/OrcFile.java
index 11281cb..30ac557 100644
--- a/java/core/src/java/org/apache/orc/OrcFile.java
+++ b/java/core/src/java/org/apache/orc/OrcFile.java
@@ -312,10 +312,12 @@ public class OrcFile {
     private BloomFilterVersion bloomFilterVersion;
     private PhysicalWriter physicalWriter;
     private WriterVersion writerVersion = CURRENT_WRITER;
+    private boolean overwrite;
 
     protected WriterOptions(Properties tableProperties, Configuration conf) {
       configuration = conf;
       memoryManagerValue = getStaticMemoryManager(conf);
+      overwrite = OrcConf.OVERWRITE_OUTPUT_FILE.getBoolean(tableProperties, conf);
       stripeSizeValue = OrcConf.STRIPE_SIZE.getLong(tableProperties, conf);
       blockSizeValue = OrcConf.BLOCK_SIZE.getLong(tableProperties, conf);
       rowIndexStrideValue =
@@ -373,6 +375,15 @@ public class OrcFile {
     }
 
     /**
+     * If the output file already exists, should it be overwritten?
+     * If it is not provided, write operation will fail if the file already exists.
+     */
+    public WriterOptions overwrite(boolean value) {
+      overwrite = value;
+      return this;
+    }
+
+    /**
      * Set the stripe size for the file. The writer stores the contents of the
      * stripe in memory until this memory limit is reached and the stripe
      * is flushed to the HDFS file and the next stripe started.
@@ -559,6 +570,10 @@ public class OrcFile {
       return bloomFilterColumns;
     }
 
+    public boolean getOverwrite() {
+      return overwrite;
+    }
+
     public FileSystem getFileSystem() {
       return fileSystemValue;
     }
diff --git a/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java b/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java
index 1769182..06e762f 100644
--- a/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java
@@ -88,7 +88,7 @@ public class PhysicalFsWriter implements PhysicalWriter {
     LOG.info("ORC writer created for path: {} with stripeSize: {} blockSize: {}" +
         " compression: {} bufferSize: {}", path, defaultStripeSize, blockSize,
         compress, bufferSize);
-    rawWriter = fs.create(path, false, HDFS_BUFFER_SIZE,
+    rawWriter = fs.create(path, opts.getOverwrite(), HDFS_BUFFER_SIZE,
         fs.getDefaultReplication(path), blockSize);
     codec = OrcCodecPool.getCodec(compress);
     writer = new OutStream("metadata", bufferSize, codec,
diff --git a/java/core/src/test/org/apache/orc/impl/TestWriterImpl.java b/java/core/src/test/org/apache/orc/impl/TestWriterImpl.java
new file mode 100644
index 0000000..22bb1d3
--- /dev/null
+++ b/java/core/src/test/org/apache/orc/impl/TestWriterImpl.java
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.impl;
+
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileAlreadyExistsException;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.orc.OrcConf;
+import org.apache.orc.OrcFile;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.Writer;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestWriterImpl {
+
+  Path workDir = new Path(System.getProperty("test.tmp.dir"));
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+  TypeDescription schema;
+
+  @Before
+  public void openFileSystem() throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    fs.setWorkingDirectory(workDir);
+    testFilePath = new Path("testWriterImpl.orc");
+    fs.create(testFilePath, true);
+    schema = TypeDescription.fromString("struct<x:int,y:int>");
+  }
+
+  @After
+  public void deleteTestFile() throws Exception {
+    fs.delete(testFilePath, false);
+  }
+
+  @Test(expected = FileAlreadyExistsException.class)
+  public void testDefaultOverwriteFlagForWriter() throws Exception {
+    // default value of the overwrite flag is false, so this should fail
+    Writer w = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).setSchema(schema));
+    w.close();
+  }
+
+  @Test
+  public void testOverriddenOverwriteFlagForWriter() throws Exception {
+    // overriding the flag should result in a successful write (no exception)
+    conf.set(OrcConf.OVERWRITE_OUTPUT_FILE.getAttribute(), "true");
+    Writer w = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).setSchema(schema));
+    w.close();
+  }
+}