You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2019/06/03 16:57:53 UTC
[orc] branch master updated: ORC-485. Add API to write encrypted
files.
This is an automated email from the ASF dual-hosted git repository.
omalley pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/master by this push:
new f3931fe ORC-485. Add API to write encrypted files.
f3931fe is described below
commit f3931fe8fe38724b87827149ea16977ae197ec09
Author: Owen O'Malley <om...@apache.org>
AuthorDate: Fri Apr 19 21:54:46 2019 -0700
ORC-485. Add API to write encrypted files.
Fixes #386
Signed-off-by: Owen O'Malley <om...@apache.org>
---
java/core/pom.xml | 4 +
java/core/src/java/org/apache/orc/DataMask.java | 55 +++++---
.../MaskProvider.java => DataMaskDescription.java} | 38 +++---
.../mask/MaskProvider.java => EncryptionKey.java} | 44 ++++---
.../src/java/org/apache/orc/EncryptionVariant.java | 60 +++++++++
.../src/java/org/apache/orc/InMemoryKeystore.java | 4 +-
java/core/src/java/org/apache/orc/OrcFile.java | 128 +++++++++++++++++-
java/core/src/java/org/apache/orc/Reader.java | 17 +++
.../org/apache/orc/impl/MaskDescriptionImpl.java | 146 +++++++++++++++++++++
.../src/java/org/apache/orc/impl/ReaderImpl.java | 21 +++
.../java/org/apache/orc/impl/mask/MaskFactory.java | 43 +++---
.../org/apache/orc/impl/mask/MaskProvider.java | 11 +-
.../test/org/apache/orc/TestInMemoryKeystore.java | 26 ++--
.../test/org/apache/orc/impl/TestCryptoUtils.java | 2 +-
.../src/test/org/apache/orc/impl/TestInStream.java | 4 +-
.../test/org/apache/orc/impl/TestOutStream.java | 4 +-
.../org/apache/orc/impl/mask/TestDataMask.java | 18 ++-
java/pom.xml | 5 +
.../java/org/apache/orc/EncryptionAlgorithm.java | 4 +-
.../org/apache/orc/impl/HadoopShimsPre2_7.java | 4 +-
.../org/apache/orc/impl/TestHadoopShimsPre2_7.java | 6 +-
.../test/resources/orc-file-dump-bloomfilter.out | 2 +-
.../test/resources/orc-file-dump-bloomfilter2.out | 2 +-
.../orc-file-dump-dictionary-threshold.out | 2 +-
java/tools/src/test/resources/orc-file-dump.json | 2 +-
java/tools/src/test/resources/orc-file-dump.out | 2 +-
.../tools/src/test/resources/orc-file-has-null.out | 2 +-
27 files changed, 538 insertions(+), 118 deletions(-)
diff --git a/java/core/pom.xml b/java/core/pom.xml
index 9ebb63d..c6de022 100644
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -68,6 +68,10 @@
<artifactId>hive-storage-api</artifactId>
</dependency>
<dependency>
+ <groupId>org.jetbrains</groupId>
+ <artifactId>annotations</artifactId>
+ </dependency>
+ <dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
diff --git a/java/core/src/java/org/apache/orc/DataMask.java b/java/core/src/java/org/apache/orc/DataMask.java
index 1abc031..a6b9416 100644
--- a/java/core/src/java/org/apache/orc/DataMask.java
+++ b/java/core/src/java/org/apache/orc/DataMask.java
@@ -18,6 +18,7 @@
package org.apache.orc;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.orc.impl.MaskDescriptionImpl;
import java.util.ServiceLoader;
@@ -47,12 +48,21 @@ public interface DataMask {
private final String name;
+ /**
+ * Get the name of the predefined data mask.
+ * @return the standard name
+ */
public String getName() {
return name;
}
- public DataMask build(TypeDescription schema, String... params) {
- return Factory.build(name, schema, params);
+ /**
+ * Build a DataMaskDescription given the name and a set of parameters.
+ * @param params the paramters
+ * @return a MaskDescription with the given parameters
+ */
+ public DataMaskDescription getDescription(String... params) {
+ return new MaskDescriptionImpl(name, params);
}
}
@@ -68,6 +78,18 @@ public interface DataMask {
/**
+ * An interface to provide override data masks for sub-columns.
+ */
+ interface MaskOverrides {
+ /**
+ * Should the current mask be overridden on a sub-column?
+ * @param type the subfield
+ * @return the new mask description or null to continue using the same one
+ */
+ DataMaskDescription hasOverride(TypeDescription type);
+ }
+
+ /**
* Providers can provide one or more kinds of data masks.
* Because they are discovered using a service loader, they may be added
* by third party jars.
@@ -75,12 +97,14 @@ public interface DataMask {
interface Provider {
/**
* Build a mask with the given parameters.
- * @param name the kind of masking
+ * @param description the description of the data mask
* @param schema the type of the field
- * @param params the list of parameters with the name in params[0]
+ * @param overrides a function to override this mask on a sub-column
* @return the new data mask or null if this name is unknown
*/
- DataMask build(String name, TypeDescription schema, String... params);
+ DataMask build(DataMaskDescription description,
+ TypeDescription schema,
+ MaskOverrides overrides);
}
/**
@@ -95,31 +119,24 @@ public interface DataMask {
/**
* Build a new DataMask instance.
- * @param name the name of the mask
+ * @param mask the description of the data mask
* @param schema the type of the field
- * @param params a list of parameters to the mask
+ * @param overrides sub-columns where the mask is overridden
* @return a new DataMask
* @throws IllegalArgumentException if no such kind of data mask was found
*
* @see org.apache.orc.impl.mask.MaskProvider for the standard provider
*/
- public static DataMask build(String name,
- TypeDescription schema,
- String... params) {
+ public static DataMask build(DataMaskDescription mask,
+ TypeDescription schema,
+ MaskOverrides overrides) {
for(Provider provider: LOADER) {
- DataMask result = provider.build(name, schema, params);
+ DataMask result = provider.build(mask, schema, overrides);
if (result != null) {
return result;
}
}
- StringBuilder msg = new StringBuilder();
- msg.append("Can't find data mask - ");
- msg.append(name);
- for(int i=0; i < params.length; ++i) {
- msg.append(", ");
- msg.append(params[i]);
- }
- throw new IllegalArgumentException(msg.toString());
+ throw new IllegalArgumentException("Can't find data mask - " + mask);
}
}
}
diff --git a/java/core/src/java/org/apache/orc/impl/mask/MaskProvider.java b/java/core/src/java/org/apache/orc/DataMaskDescription.java
similarity index 53%
copy from java/core/src/java/org/apache/orc/impl/mask/MaskProvider.java
copy to java/core/src/java/org/apache/orc/DataMaskDescription.java
index 737a7bf..d70eaba 100644
--- a/java/core/src/java/org/apache/orc/impl/mask/MaskProvider.java
+++ b/java/core/src/java/org/apache/orc/DataMaskDescription.java
@@ -15,27 +15,29 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.orc.impl.mask;
-import org.apache.orc.DataMask;
-import org.apache.orc.TypeDescription;
+package org.apache.orc;
/**
- * The Provider for all of the built-in data masks.
+ * Information about the DataMask used to mask the unencrypted data.
*/
-public class MaskProvider implements DataMask.Provider {
+public interface DataMaskDescription {
- @Override
- public DataMask build(String name,
- TypeDescription schema,
- String... params) {
- if (name.equals(DataMask.Standard.NULLIFY.getName())) {
- return new NullifyMask();
- } else if (name.equals(DataMask.Standard.REDACT.getName())) {
- return new RedactMaskFactory(params).build(schema);
- } else if(name.equals(DataMask.Standard.SHA256.getName())) {
- return new SHA256MaskFactory(params).build(schema);
- }
- return null;
- }
+ /**
+ * The name of the mask.
+ * @return the name
+ */
+ String getName();
+
+ /**
+ * The parameters for the mask
+ * @return the array of parameters
+ */
+ String[] getParameters();
+
+ /**
+ * Get the list of columns that use this mask.
+ * @return the list of columns
+ */
+ TypeDescription[] getColumns();
}
diff --git a/java/core/src/java/org/apache/orc/impl/mask/MaskProvider.java b/java/core/src/java/org/apache/orc/EncryptionKey.java
similarity index 53%
copy from java/core/src/java/org/apache/orc/impl/mask/MaskProvider.java
copy to java/core/src/java/org/apache/orc/EncryptionKey.java
index 737a7bf..24153b8 100644
--- a/java/core/src/java/org/apache/orc/impl/mask/MaskProvider.java
+++ b/java/core/src/java/org/apache/orc/EncryptionKey.java
@@ -15,27 +15,35 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.orc.impl.mask;
-import org.apache.orc.DataMask;
-import org.apache.orc.TypeDescription;
+package org.apache.orc;
/**
- * The Provider for all of the built-in data masks.
+ * Information about a key used for column encryption in an ORC file.
*/
-public class MaskProvider implements DataMask.Provider {
+public interface EncryptionKey extends Comparable<EncryptionKey> {
- @Override
- public DataMask build(String name,
- TypeDescription schema,
- String... params) {
- if (name.equals(DataMask.Standard.NULLIFY.getName())) {
- return new NullifyMask();
- } else if (name.equals(DataMask.Standard.REDACT.getName())) {
- return new RedactMaskFactory(params).build(schema);
- } else if(name.equals(DataMask.Standard.SHA256.getName())) {
- return new SHA256MaskFactory(params).build(schema);
- }
- return null;
- }
+ /**
+ * The name of the key.
+ * @return the name
+ */
+ String getKeyName();
+
+ /**
+ * The version of the key.
+ * @return the version, which for most KeyProviders start at 0.
+ */
+ int getKeyVersion();
+
+ /**
+ * The encryption algorithm for this key.
+ * @return the encryption algorithm
+ */
+ EncryptionAlgorithm getAlgorithm();
+
+ /**
+ * The columns that are encrypted with this key.
+ * @return the list of columns
+ */
+ EncryptionVariant[] getEncryptionRoots();
}
diff --git a/java/core/src/java/org/apache/orc/EncryptionVariant.java b/java/core/src/java/org/apache/orc/EncryptionVariant.java
new file mode 100644
index 0000000..e0e91d8
--- /dev/null
+++ b/java/core/src/java/org/apache/orc/EncryptionVariant.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc;
+
+import java.io.IOException;
+import java.security.Key;
+
+/**
+ * Information about a key used for column encryption in an ORC file.
+ */
+public interface EncryptionVariant extends Comparable<EncryptionVariant> {
+
+ /**
+ * Get the key description for this column. This description is global to the
+ * file and is passed to the KeyProvider along with various encrypted local
+ * keys for the stripes or file footer so that it can decrypt them.
+ * @return the encryption key description
+ */
+ EncryptionKey getKeyDescription();
+
+ /**
+ * Get the root column for this encryption.
+ * @return the root column type
+ */
+ TypeDescription getRoot();
+
+ /**
+ * Get the encryption variant id within the file.
+ */
+ int getVariantId();
+
+ /**
+ * Get the local key for the footer.
+ * @return the local decrypted key or null if it isn't available
+ */
+ Key getFileFooterKey() throws IOException;
+
+ /**
+ * Get the local key for a stripe's data or footer.
+ * @param stripe the stripe within the file (0 to N-1)
+ * @return the local decrypted key or null if it isn't available
+ */
+ Key getStripeKey(long stripe) throws IOException;
+}
diff --git a/java/core/src/java/org/apache/orc/InMemoryKeystore.java b/java/core/src/java/org/apache/orc/InMemoryKeystore.java
index d2d39d1..2dd8f95 100644
--- a/java/core/src/java/org/apache/orc/InMemoryKeystore.java
+++ b/java/core/src/java/org/apache/orc/InMemoryKeystore.java
@@ -282,8 +282,8 @@ public class InMemoryKeystore implements HadoopShims.KeyProvider {
byte[] masterKey) throws IOException {
/* Test weather platform supports the algorithm */
- if (!SUPPORTS_AES_256 && (algorithm != EncryptionAlgorithm.AES_128)) {
- algorithm = EncryptionAlgorithm.AES_128;
+ if (!SUPPORTS_AES_256 && (algorithm != EncryptionAlgorithm.AES_CTR_128)) {
+ algorithm = EncryptionAlgorithm.AES_CTR_128;
}
final byte[] buffer = new byte[algorithm.keyLength()];
diff --git a/java/core/src/java/org/apache/orc/OrcFile.java b/java/core/src/java/org/apache/orc/OrcFile.java
index a04b9a9..bf3d0bd 100644
--- a/java/core/src/java/org/apache/orc/OrcFile.java
+++ b/java/core/src/java/org/apache/orc/OrcFile.java
@@ -172,6 +172,7 @@ public class OrcFile {
ORC_101(WriterImplementation.ORC_JAVA, 5), // bloom filters use utf8
ORC_135(WriterImplementation.ORC_JAVA, 6), // timestamp stats use utc
ORC_203(WriterImplementation.ORC_JAVA, 7), // trim long strings & record they were trimmed
+ ORC_14(WriterImplementation.ORC_JAVA, 8), // column encryption added
// C++ ORC Writer
ORC_CPP_ORIGINAL(WriterImplementation.ORC_CPP, 6),
@@ -255,7 +256,7 @@ public class OrcFile {
/**
* The WriterVersion for this version of the software.
*/
- public static final WriterVersion CURRENT_WRITER = WriterVersion.ORC_203;
+ public static final WriterVersion CURRENT_WRITER = WriterVersion.ORC_14;
public enum EncodingStrategy {
SPEED, COMPRESSION
@@ -273,6 +274,7 @@ public class OrcFile {
private FileSystem filesystem;
private long maxLength = Long.MAX_VALUE;
private OrcTail orcTail;
+ private HadoopShims.KeyProvider keyProvider;
// TODO: We can generalize FileMetada interface. Make OrcTail implement FileMetadata interface
// and remove this class altogether. Both footer caching and llap caching just needs OrcTail.
// For now keeping this around to avoid complex surgery
@@ -298,6 +300,16 @@ public class OrcFile {
return this;
}
+ /**
+ * Set the KeyProvider to override the default for getting keys.
+ * @param provider
+ * @return
+ */
+ public ReaderOptions setKeyProvider(HadoopShims.KeyProvider provider) {
+ this.keyProvider = provider;
+ return this;
+ }
+
public Configuration getConfiguration() {
return conf;
}
@@ -314,6 +326,10 @@ public class OrcFile {
return orcTail;
}
+ public HadoopShims.KeyProvider getKeyProvider() {
+ return keyProvider;
+ }
+
public ReaderOptions fileMetadata(final FileMetadata metadata) {
fileMetadata = metadata;
return this;
@@ -380,6 +396,40 @@ public class OrcFile {
}
/**
+ * An internal class that describes how to encrypt a column.
+ */
+ public static class EncryptionOption {
+ private final String columnNames;
+ private final String keyName;
+ private final String mask;
+ private final String[] maskParameters;
+
+ EncryptionOption(String columnNames, String keyName, String mask,
+ String... maskParams) {
+ this.columnNames = columnNames;
+ this.keyName = keyName;
+ this.mask = mask;
+ this.maskParameters = maskParams;
+ }
+
+ public String getColumnNames() {
+ return columnNames;
+ }
+
+ public String getKeyName() {
+ return keyName;
+ }
+
+ public String getMask() {
+ return mask;
+ }
+
+ public String[] getMaskParameters() {
+ return maskParameters;
+ }
+ }
+
+ /**
* Options for creating ORC file writers.
*/
public static class WriterOptions implements Cloneable {
@@ -409,6 +459,8 @@ public class OrcFile {
private boolean writeVariableLengthBlocks;
private HadoopShims shims;
private String directEncodingColumns;
+ private List<EncryptionOption> encryption = new ArrayList<>();
+ private HadoopShims.KeyProvider provider;
protected WriterOptions(Properties tableProperties, Configuration conf) {
configuration = conf;
@@ -461,8 +513,7 @@ public class OrcFile {
public WriterOptions clone() {
try {
return (WriterOptions) super.clone();
- }
- catch(CloneNotSupportedException ex) {
+ } catch (CloneNotSupportedException ex) {
throw new AssertionError("Expected super.clone() to work");
}
}
@@ -573,6 +624,7 @@ public class OrcFile {
/**
* Specify the false positive probability for bloom filter.
+ *
* @param fpp - false positive probability
* @return this
*/
@@ -591,6 +643,7 @@ public class OrcFile {
/**
* Set the schema for the file. This is a required parameter.
+ *
* @param schema the schema for the file.
* @return this
*/
@@ -609,6 +662,7 @@ public class OrcFile {
/**
* Add a listener for when the stripe and file are about to be closed.
+ *
* @param callback the object to be called when the stripe is closed
* @return this
*/
@@ -627,7 +681,7 @@ public class OrcFile {
/**
* Change the physical writer of the ORC file.
- *
+ * <p>
* SHOULD ONLY BE USED BY LLAP.
*
* @param writer the writer to control the layout and persistence
@@ -671,6 +725,7 @@ public class OrcFile {
/**
* Manually set the writer version.
* This is an internal API.
+ *
* @param version the version to write
* @return this
*/
@@ -701,6 +756,67 @@ public class OrcFile {
return this;
}
+ /*
+ * Encrypt a set of columns with a key.
+ * For readers without access to the key, they will read nulls.
+ * @param columnNames the columns to encrypt
+ * @param keyName the key name to encrypt the data with
+ * @return this
+ */
+ public WriterOptions encryptColumn(String columnNames,
+ String keyName) {
+ return encryptColumn(columnNames, keyName,
+ DataMask.Standard.NULLIFY.getName());
+ }
+
+ /**
+ * Encrypt a set of columns with a key.
+ * The data is also masked and stored unencrypted in the file. Readers
+ * without access to the key will instead get the masked data.
+ * @param columnNames the column names to encrypt
+ * @param keyName the key name to encrypt the data with
+ * @param mask the kind of masking
+ * @param maskParameters the parameters to the mask
+ * @return this
+ */
+ public WriterOptions encryptColumn(String columnNames,
+ String keyName,
+ String mask,
+ String... maskParameters) {
+ encryption.add(new EncryptionOption(columnNames, keyName, mask,
+ maskParameters));
+ return this;
+ }
+
+ /**
+ * Set a different mask on a subtree that is already being encrypted.
+ * @param columnNames the column names to change the mask on
+ * @param mask the name of the mask
+ * @param maskParameters the parameters for the mask
+ * @return this
+ */
+ public WriterOptions maskColumn(String columnNames,
+ String mask,
+ String... maskParameters) {
+ encryption.add(new EncryptionOption(columnNames, null,
+ mask, maskParameters));
+ return this;
+ }
+
+ /**
+ * Set the key provider for
+ * @param provider
+ * @return
+ */
+ public WriterOptions setKeyProvider(HadoopShims.KeyProvider provider) {
+ this.provider = provider;
+ return this;
+ }
+
+ public HadoopShims.KeyProvider getKeyProvider() {
+ return provider;
+ }
+
public boolean getBlockPadding() {
return blockPaddingValue;
}
@@ -804,6 +920,10 @@ public class OrcFile {
public String getDirectEncodingColumns() {
return directEncodingColumns;
}
+
+ public List<EncryptionOption> getEncryption() {
+ return encryption;
+ }
}
/**
diff --git a/java/core/src/java/org/apache/orc/Reader.java b/java/core/src/java/org/apache/orc/Reader.java
index 6d6e04b..5d026b1 100644
--- a/java/core/src/java/org/apache/orc/Reader.java
+++ b/java/core/src/java/org/apache/orc/Reader.java
@@ -147,6 +147,23 @@ public interface Reader extends Closeable {
OrcProto.FileTail getFileTail();
/**
+ * Get the list of encryption keys for column encryption.
+ * @return the set of encryption keys
+ */
+ EncryptionKey[] getColumnEncryptionKeys();
+
+ /**
+ * Get the data masks for the unencrypted variant of the data.
+ * @return the lists of data masks
+ */
+ DataMaskDescription[] getDataMasks();
+
+ /**
+ * Get the list of encryption variants for the data.
+ */
+ EncryptionVariant[] getEncryptionVariants();
+
+ /**
* Options for creating a RecordReader.
*/
public static class Options implements Cloneable {
diff --git a/java/core/src/java/org/apache/orc/impl/MaskDescriptionImpl.java b/java/core/src/java/org/apache/orc/impl/MaskDescriptionImpl.java
new file mode 100644
index 0000000..ce92551
--- /dev/null
+++ b/java/core/src/java/org/apache/orc/impl/MaskDescriptionImpl.java
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.impl;
+
+import org.apache.orc.DataMaskDescription;
+import org.apache.orc.OrcProto;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.DataMask;
+import org.jetbrains.annotations.NotNull;
+
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.Set;
+
+public class MaskDescriptionImpl implements DataMaskDescription,
+ Comparable<MaskDescriptionImpl> {
+ private int id;
+ private final String name;
+ private final String[] parameters;
+ private final Set<TypeDescription> columns = new HashSet<>();
+
+ public MaskDescriptionImpl(String name,
+ String... parameters) {
+ this.name = name;
+ this.parameters = parameters == null ? new String[0] : parameters;
+ }
+
+ public MaskDescriptionImpl(int id,
+ OrcProto.DataMask mask) {
+ this.id = id;
+ this.name = mask.getName();
+ this.parameters = new String[mask.getMaskParametersCount()];
+ for(int p=0; p < parameters.length; ++p) {
+ parameters[p] = mask.getMaskParameters(p);
+ }
+
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (other == null || other.getClass() != getClass()) {
+ return false;
+ } else {
+ return compareTo((MaskDescriptionImpl) other) == 0;
+ }
+ }
+
+ public void addColumn(TypeDescription column) {
+ columns.add(column);
+ }
+
+ public void setId(int id) {
+ this.id = id;
+ }
+
+ @Override
+ public String getName() {
+ return name;
+ }
+
+ @Override
+ public String[] getParameters() {
+ return parameters;
+ }
+
+ @Override
+ public TypeDescription[] getColumns() {
+ TypeDescription[] result = columns.toArray(new TypeDescription[columns.size()]);
+ // sort the columns by their ids
+ Arrays.sort(result, Comparator.comparingInt(TypeDescription::getId));
+ return result;
+ }
+
+ public int getId() {
+ return id;
+ }
+
+ public DataMask create(TypeDescription schema,
+ DataMask.MaskOverrides overrides) {
+ return DataMask.Factory.build(this, schema, overrides);
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buffer = new StringBuilder();
+ buffer.append("mask ");
+ buffer.append(getName());
+ buffer.append('(');
+ String[] parameters = getParameters();
+ if (parameters != null) {
+ for(int p=0; p < parameters.length; ++p) {
+ if (p != 0) {
+ buffer.append(", ");
+ }
+ buffer.append(parameters[p]);
+ }
+ }
+ buffer.append(')');
+ return buffer.toString();
+ }
+
+ @Override
+ public int hashCode() {
+ int result = name.hashCode();
+ for (String p: parameters) {
+ result = result * 101 + p.hashCode();
+ }
+ return result;
+ }
+
+ @Override
+ public int compareTo(@NotNull MaskDescriptionImpl other) {
+ if (other == this) {
+ return 0;
+ }
+ int result = name.compareTo(other.name);
+ int p = 0;
+ while (result == 0 &&
+ p < parameters.length && p < other.parameters.length) {
+ result = parameters[p].compareTo(other.parameters[p]);
+ p += 1;
+ }
+ if (result == 0) {
+ result = Integer.compare(parameters.length, other.parameters.length);
+ }
+ return result;
+ }
+}
+
diff --git a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
index 79ed82b..8edfaee 100644
--- a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
@@ -28,6 +28,9 @@ import java.util.Set;
import org.apache.hadoop.fs.FileStatus;
import org.apache.orc.CompressionKind;
+import org.apache.orc.DataMaskDescription;
+import org.apache.orc.EncryptionKey;
+import org.apache.orc.EncryptionVariant;
import org.apache.orc.FileMetadata;
import org.apache.orc.OrcFile;
import org.apache.orc.OrcUtils;
@@ -217,6 +220,24 @@ public class ReaderImpl implements Reader {
}
@Override
+ public EncryptionKey[] getColumnEncryptionKeys() {
+ // TODO
+ return new EncryptionKey[0];
+ }
+
+ @Override
+ public DataMaskDescription[] getDataMasks() {
+ // TODO
+ return new DataMaskDescription[0];
+ }
+
+ @Override
+ public EncryptionVariant[] getEncryptionVariants() {
+ // TODO
+ return new EncryptionVariant[0];
+ }
+
+ @Override
public int getRowIndexStride() {
return rowIndexStride;
}
diff --git a/java/core/src/java/org/apache/orc/impl/mask/MaskFactory.java b/java/core/src/java/org/apache/orc/impl/mask/MaskFactory.java
index 9a77c4a..e1be9bd 100644
--- a/java/core/src/java/org/apache/orc/impl/mask/MaskFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/mask/MaskFactory.java
@@ -18,6 +18,7 @@
package org.apache.orc.impl.mask;
import org.apache.orc.DataMask;
+import org.apache.orc.DataMaskDescription;
import org.apache.orc.TypeDescription;
import java.util.List;
@@ -38,7 +39,8 @@ public abstract class MaskFactory {
protected abstract DataMask buildTimestampMask(TypeDescription schema);
protected abstract DataMask buildBinaryMask(TypeDescription schema);
- public DataMask build(TypeDescription schema) {
+ public DataMask build(TypeDescription schema,
+ DataMask.MaskOverrides overrides) {
switch(schema.getCategory()) {
case BOOLEAN:
return buildBooleanMask(schema);
@@ -63,39 +65,50 @@ public abstract class MaskFactory {
case BINARY:
return buildBinaryMask(schema);
case UNION:
- return buildUnionMask(schema);
+ return buildUnionMask(schema, overrides);
case STRUCT:
- return buildStructMask(schema);
+ return buildStructMask(schema, overrides);
case LIST:
- return buildListMask(schema);
+ return buildListMask(schema, overrides);
case MAP:
- return buildMapMask(schema);
+ return buildMapMask(schema, overrides);
default:
throw new IllegalArgumentException("Unhandled type " + schema);
}
}
- protected DataMask[] buildChildren(List<TypeDescription> children) {
+ protected DataMask[] buildChildren(List<TypeDescription> children,
+ DataMask.MaskOverrides overrides) {
DataMask[] result = new DataMask[children.size()];
for(int i = 0; i < result.length; ++i) {
- result[i] = build(children.get(i));
+ TypeDescription child = children.get(i);
+ DataMaskDescription over = overrides.hasOverride(child);
+ if (over != null) {
+ result[i] = DataMask.Factory.build(over, child, overrides);
+ } else {
+ result[i] = build(child, overrides);
+ }
}
return result;
}
- protected DataMask buildStructMask(TypeDescription schema) {
- return new StructIdentity(buildChildren(schema.getChildren()));
+ protected DataMask buildStructMask(TypeDescription schema,
+ DataMask.MaskOverrides overrides) {
+ return new StructIdentity(buildChildren(schema.getChildren(), overrides));
}
- DataMask buildListMask(TypeDescription schema) {
- return new ListIdentity(buildChildren(schema.getChildren()));
+ DataMask buildListMask(TypeDescription schema,
+ DataMask.MaskOverrides overrides) {
+ return new ListIdentity(buildChildren(schema.getChildren(), overrides));
}
- DataMask buildMapMask(TypeDescription schema) {
- return new MapIdentity(buildChildren(schema.getChildren()));
+ DataMask buildMapMask(TypeDescription schema,
+ DataMask.MaskOverrides overrides) {
+ return new MapIdentity(buildChildren(schema.getChildren(), overrides));
}
- DataMask buildUnionMask(TypeDescription schema) {
- return new UnionIdentity(buildChildren(schema.getChildren()));
+ DataMask buildUnionMask(TypeDescription schema,
+ DataMask.MaskOverrides overrides) {
+ return new UnionIdentity(buildChildren(schema.getChildren(), overrides));
}
}
diff --git a/java/core/src/java/org/apache/orc/impl/mask/MaskProvider.java b/java/core/src/java/org/apache/orc/impl/mask/MaskProvider.java
index 737a7bf..b5d9ca1 100644
--- a/java/core/src/java/org/apache/orc/impl/mask/MaskProvider.java
+++ b/java/core/src/java/org/apache/orc/impl/mask/MaskProvider.java
@@ -18,6 +18,7 @@
package org.apache.orc.impl.mask;
import org.apache.orc.DataMask;
+import org.apache.orc.DataMaskDescription;
import org.apache.orc.TypeDescription;
/**
@@ -26,15 +27,17 @@ import org.apache.orc.TypeDescription;
public class MaskProvider implements DataMask.Provider {
@Override
- public DataMask build(String name,
+ public DataMask build(DataMaskDescription description,
TypeDescription schema,
- String... params) {
+ DataMask.MaskOverrides overrides) {
+ String name = description.getName();
if (name.equals(DataMask.Standard.NULLIFY.getName())) {
return new NullifyMask();
} else if (name.equals(DataMask.Standard.REDACT.getName())) {
- return new RedactMaskFactory(params).build(schema);
+ return new RedactMaskFactory(description.getParameters())
+ .build(schema, overrides);
} else if(name.equals(DataMask.Standard.SHA256.getName())) {
- return new SHA256MaskFactory(params).build(schema);
+ return new SHA256MaskFactory().build(schema, overrides);
}
return null;
}
diff --git a/java/core/src/test/org/apache/orc/TestInMemoryKeystore.java b/java/core/src/test/org/apache/orc/TestInMemoryKeystore.java
index 2cdd76c..d9bea99 100644
--- a/java/core/src/test/org/apache/orc/TestInMemoryKeystore.java
+++ b/java/core/src/test/org/apache/orc/TestInMemoryKeystore.java
@@ -45,9 +45,9 @@ public class TestInMemoryKeystore {
Random random = new Random(2);
memoryKeystore =
new InMemoryKeystore(random)
- .addKey("key128", EncryptionAlgorithm.AES_128, "123".getBytes())
- .addKey("key256", EncryptionAlgorithm.AES_256, "secret123".getBytes())
- .addKey("key256short", EncryptionAlgorithm.AES_256, "5".getBytes());
+ .addKey("key128", EncryptionAlgorithm.AES_CTR_128, "123".getBytes())
+ .addKey("key256", EncryptionAlgorithm.AES_CTR_256, "secret123".getBytes())
+ .addKey("key256short", EncryptionAlgorithm.AES_CTR_256, "5".getBytes());
}
@@ -72,9 +72,9 @@ public class TestInMemoryKeystore {
Assert.assertEquals("key256", metadata.getKeyName());
if (InMemoryKeystore.SUPPORTS_AES_256) {
- Assert.assertEquals(EncryptionAlgorithm.AES_256, metadata.getAlgorithm());
+ Assert.assertEquals(EncryptionAlgorithm.AES_CTR_256, metadata.getAlgorithm());
} else {
- Assert.assertEquals(EncryptionAlgorithm.AES_128, metadata.getAlgorithm());
+ Assert.assertEquals(EncryptionAlgorithm.AES_CTR_128, metadata.getAlgorithm());
}
Assert.assertEquals(0, metadata.getVersion());
@@ -133,7 +133,7 @@ public class TestInMemoryKeystore {
Assert.assertEquals(0,
memoryKeystore.getCurrentKeyVersion("key128").getVersion());
- memoryKeystore.addKey("key128", 1, EncryptionAlgorithm.AES_128, "NewSecret".getBytes());
+ memoryKeystore.addKey("key128", 1, EncryptionAlgorithm.AES_CTR_128, "NewSecret".getBytes());
Assert.assertEquals(1,
memoryKeystore.getCurrentKeyVersion("key128").getVersion());
}
@@ -141,7 +141,7 @@ public class TestInMemoryKeystore {
@Test
public void testDuplicateKeyNames() {
try {
- memoryKeystore.addKey("key128", 0, EncryptionAlgorithm.AES_128,
+ memoryKeystore.addKey("key128", 0, EncryptionAlgorithm.AES_CTR_128,
"exception".getBytes());
Assert.fail("Keys with same name cannot be added.");
} catch (IOException e) {
@@ -161,32 +161,32 @@ public class TestInMemoryKeystore {
public void testMultipleVersion() throws IOException {
Assert.assertEquals(0,
memoryKeystore.getCurrentKeyVersion("key256").getVersion());
- memoryKeystore.addKey("key256", 1, EncryptionAlgorithm.AES_256, "NewSecret".getBytes());
+ memoryKeystore.addKey("key256", 1, EncryptionAlgorithm.AES_CTR_256, "NewSecret".getBytes());
Assert.assertEquals(1,
memoryKeystore.getCurrentKeyVersion("key256").getVersion());
try {
- memoryKeystore.addKey("key256", 1, EncryptionAlgorithm.AES_256,
+ memoryKeystore.addKey("key256", 1, EncryptionAlgorithm.AES_CTR_256,
"BadSecret".getBytes());
Assert.fail("Keys with smaller version should not be added.");
} catch (final IOException e) {
Assert.assertTrue(e.toString().contains("equal or higher version"));
}
- memoryKeystore.addKey("key256", 2, EncryptionAlgorithm.AES_256,
+ memoryKeystore.addKey("key256", 2, EncryptionAlgorithm.AES_CTR_256,
"NewerSecret".getBytes());
Assert.assertEquals(2,
memoryKeystore.getCurrentKeyVersion("key256").getVersion());
// make sure that all 3 versions of key256 exist and have different secrets
Key key0 = memoryKeystore.decryptLocalKey(
- new HadoopShims.KeyMetadata("key256", 0, EncryptionAlgorithm.AES_256),
+ new HadoopShims.KeyMetadata("key256", 0, EncryptionAlgorithm.AES_CTR_256),
new byte[16]);
Key key1 = memoryKeystore.decryptLocalKey(
- new HadoopShims.KeyMetadata("key256", 1, EncryptionAlgorithm.AES_256),
+ new HadoopShims.KeyMetadata("key256", 1, EncryptionAlgorithm.AES_CTR_256),
new byte[16]);
Key key2 = memoryKeystore.decryptLocalKey(
- new HadoopShims.KeyMetadata("key256", 2, EncryptionAlgorithm.AES_256),
+ new HadoopShims.KeyMetadata("key256", 2, EncryptionAlgorithm.AES_CTR_256),
new byte[16]);
Assert.assertNotEquals(new BytesWritable(key0.getEncoded()).toString(),
new BytesWritable(key1.getEncoded()).toString());
diff --git a/java/core/src/test/org/apache/orc/impl/TestCryptoUtils.java b/java/core/src/test/org/apache/orc/impl/TestCryptoUtils.java
index 203d3e7..a6d2867 100644
--- a/java/core/src/test/org/apache/orc/impl/TestCryptoUtils.java
+++ b/java/core/src/test/org/apache/orc/impl/TestCryptoUtils.java
@@ -31,7 +31,7 @@ public class TestCryptoUtils {
@Test
public void testCreateStreamIv() throws Exception {
- byte[] iv = CryptoUtils.createIvForStream(EncryptionAlgorithm.AES_128,
+ byte[] iv = CryptoUtils.createIvForStream(EncryptionAlgorithm.AES_CTR_128,
new StreamName(0x234567,
OrcProto.Stream.Kind.BLOOM_FILTER_UTF8), 0x123456);
assertEquals(16, iv.length);
diff --git a/java/core/src/test/org/apache/orc/impl/TestInStream.java b/java/core/src/test/org/apache/orc/impl/TestInStream.java
index 78098b2..a7e023a 100644
--- a/java/core/src/test/org/apache/orc/impl/TestInStream.java
+++ b/java/core/src/test/org/apache/orc/impl/TestInStream.java
@@ -127,7 +127,7 @@ public class TestInStream {
final long DATA_CONST = 0x1_0000_0003L;
final int ROW_COUNT = 1024;
OutputCollector collect = new OutputCollector();
- EncryptionAlgorithm algorithm = EncryptionAlgorithm.AES_128;
+ EncryptionAlgorithm algorithm = EncryptionAlgorithm.AES_CTR_128;
byte[] rawKey = new byte[algorithm.keyLength()];
for(int i=0; i < rawKey.length; ++i) {
rawKey[i] = (byte) i;
@@ -181,7 +181,7 @@ public class TestInStream {
final long DATA_CONST = 0x1_0000_0003L;
final int ROW_COUNT = 1024;
OutputCollector collect = new OutputCollector();
- EncryptionAlgorithm algorithm = EncryptionAlgorithm.AES_128;
+ EncryptionAlgorithm algorithm = EncryptionAlgorithm.AES_CTR_128;
byte[] rawKey = new byte[algorithm.keyLength()];
for(int i=0; i < rawKey.length; ++i) {
rawKey[i] = (byte) i;
diff --git a/java/core/src/test/org/apache/orc/impl/TestOutStream.java b/java/core/src/test/org/apache/orc/impl/TestOutStream.java
index 87c1fce..707a407 100644
--- a/java/core/src/test/org/apache/orc/impl/TestOutStream.java
+++ b/java/core/src/test/org/apache/orc/impl/TestOutStream.java
@@ -82,7 +82,7 @@ public class TestOutStream {
@Test
public void testEncryption() throws Exception {
TestInStream.OutputCollector receiver = new TestInStream.OutputCollector();
- EncryptionAlgorithm aes128 = EncryptionAlgorithm.AES_128;
+ EncryptionAlgorithm aes128 = EncryptionAlgorithm.AES_CTR_128;
byte[] keyBytes = new byte[aes128.keyLength()];
for(int i=0; i < keyBytes.length; ++i) {
keyBytes[i] = (byte) i;
@@ -177,7 +177,7 @@ public class TestOutStream {
// disable test if AES_256 is not available
Assume.assumeTrue(TEST_AES_256);
TestInStream.OutputCollector receiver = new TestInStream.OutputCollector();
- EncryptionAlgorithm aes256 = EncryptionAlgorithm.AES_256;
+ EncryptionAlgorithm aes256 = EncryptionAlgorithm.AES_CTR_256;
byte[] keyBytes = new byte[aes256.keyLength()];
for(int i=0; i < keyBytes.length; ++i) {
keyBytes[i] = (byte) (i * 13);
diff --git a/java/core/src/test/org/apache/orc/impl/mask/TestDataMask.java b/java/core/src/test/org/apache/orc/impl/mask/TestDataMask.java
index bbf313c..070e319 100644
--- a/java/core/src/test/org/apache/orc/impl/mask/TestDataMask.java
+++ b/java/core/src/test/org/apache/orc/impl/mask/TestDataMask.java
@@ -18,7 +18,6 @@
package org.apache.orc.impl.mask;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
@@ -27,7 +26,6 @@ import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.orc.DataMask;
import org.apache.orc.TypeDescription;
@@ -43,7 +41,8 @@ public class TestDataMask {
public void testNullFactory() throws Exception {
TypeDescription schema = TypeDescription.fromString("struct<x:int>");
// take the first column's type
- DataMask mask = DataMask.Standard.NULLIFY.build(schema.findSubtype(1));
+ DataMask mask = DataMask.Factory.build(DataMask.Standard.NULLIFY.getDescription(),
+ schema.findSubtype(1), (type) -> null);;
assertEquals(NullifyMask.class.toString(), mask.getClass().toString());
LongColumnVector cv = (LongColumnVector) schema.createRowBatch().cols[0];
LongColumnVector masked = (LongColumnVector) schema.createRowBatch().cols[0];
@@ -59,7 +58,8 @@ public class TestDataMask {
public void testRedactFactory() throws Exception {
TypeDescription schema =
TypeDescription.fromString("struct<s:struct<x:int,y:string>>");
- DataMask mask = DataMask.Standard.REDACT.build(schema.findSubtype(1));
+ DataMask mask = DataMask.Factory.build(DataMask.Standard.REDACT.getDescription(),
+ schema.findSubtype(1), (type) -> null);
assertEquals(StructIdentity.class.toString(), mask.getClass().toString());
StructColumnVector cv = (StructColumnVector)schema.createRowBatch().cols[0];
StructColumnVector masked = (StructColumnVector)schema.createRowBatch().cols[0];
@@ -83,10 +83,14 @@ public class TestDataMask {
TypeDescription schema =
TypeDescription.fromString("struct<s:struct<a:decimal(18,6),b:double," +
"c:array<int>,d:map<timestamp,date>,e:uniontype<int,binary>,f:string>>");
- DataMask nullify = DataMask.Standard.NULLIFY.build(schema.findSubtype(1));
+ DataMask nullify =
+ DataMask.Factory.build(DataMask.Standard.NULLIFY.getDescription(),
+ schema.findSubtype(1), (type) -> null);
// create a redact mask that passes everything though
- DataMask identity = DataMask.Standard.REDACT.build(schema.findSubtype(1),
- "__________", "_ _ _ _ _ _");
+ DataMask identity =
+ DataMask.Factory.build(DataMask.Standard.REDACT
+ .getDescription("__________", "_ _ _ _ _ _"),
+ schema.findSubtype(1), (type) -> null);
// allow easier access to fields
StructColumnVector cv = (StructColumnVector)schema.createRowBatch().cols[0];
diff --git a/java/pom.xml b/java/pom.xml
index 18d2461..10e2458 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -643,6 +643,11 @@
</exclusions>
</dependency>
<dependency>
+ <groupId>org.jetbrains</groupId>
+ <artifactId>annotations</artifactId>
+ <version>17.0.0</version>
+ </dependency>
+ <dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.5</version>
diff --git a/java/shims/src/java/org/apache/orc/EncryptionAlgorithm.java b/java/shims/src/java/org/apache/orc/EncryptionAlgorithm.java
index 5bdccfd..7da37fa 100644
--- a/java/shims/src/java/org/apache/orc/EncryptionAlgorithm.java
+++ b/java/shims/src/java/org/apache/orc/EncryptionAlgorithm.java
@@ -28,8 +28,8 @@ import java.security.NoSuchAlgorithmException;
* This class can't reference any of the newer Hadoop classes.
*/
public enum EncryptionAlgorithm {
- AES_128("AES", "CTR/NoPadding", 16, 1),
- AES_256("AES", "CTR/NoPadding", 32, 2);
+ AES_CTR_128("AES", "CTR/NoPadding", 16, 1),
+ AES_CTR_256("AES", "CTR/NoPadding", 32, 2);
private final String algorithm;
private final String mode;
diff --git a/java/shims/src/java/org/apache/orc/impl/HadoopShimsPre2_7.java b/java/shims/src/java/org/apache/orc/impl/HadoopShimsPre2_7.java
index 17296ab..e49d389 100644
--- a/java/shims/src/java/org/apache/orc/impl/HadoopShimsPre2_7.java
+++ b/java/shims/src/java/org/apache/orc/impl/HadoopShimsPre2_7.java
@@ -203,13 +203,13 @@ public class HadoopShimsPre2_7 implements HadoopShims {
if (cipher.startsWith("AES/")) {
int bitLength = meta.getBitLength();
if (bitLength == 128) {
- return EncryptionAlgorithm.AES_128;
+ return EncryptionAlgorithm.AES_CTR_128;
} else {
if (bitLength != 256) {
LOG.info("ORC column encryption does not support " + bitLength +
" bit keys. Using 256 bits instead.");
}
- return EncryptionAlgorithm.AES_256;
+ return EncryptionAlgorithm.AES_CTR_256;
}
}
throw new IllegalArgumentException("ORC column encryption only supports" +
diff --git a/java/shims/src/test/org/apache/orc/impl/TestHadoopShimsPre2_7.java b/java/shims/src/test/org/apache/orc/impl/TestHadoopShimsPre2_7.java
index 5ed4407..95249b3 100644
--- a/java/shims/src/test/org/apache/orc/impl/TestHadoopShimsPre2_7.java
+++ b/java/shims/src/test/org/apache/orc/impl/TestHadoopShimsPre2_7.java
@@ -54,17 +54,17 @@ public class TestHadoopShimsPre2_7 {
KeyProvider.Metadata meta = new KMSClientProvider.KMSMetadata(
"AES/CTR/NoPadding", 128, "", new HashMap<String, String>(),
new Date(0), 1);
- assertEquals(EncryptionAlgorithm.AES_128,
+ assertEquals(EncryptionAlgorithm.AES_CTR_128,
HadoopShimsPre2_7.findAlgorithm(meta));
meta = new KMSClientProvider.KMSMetadata(
"AES/CTR/NoPadding", 256, "", new HashMap<String, String>(),
new Date(0), 1);
- assertEquals(EncryptionAlgorithm.AES_256,
+ assertEquals(EncryptionAlgorithm.AES_CTR_256,
HadoopShimsPre2_7.findAlgorithm(meta));
meta = new KMSClientProvider.KMSMetadata(
"AES/CTR/NoPadding", 512, "", new HashMap<String, String>(),
new Date(0), 1);
- assertEquals(EncryptionAlgorithm.AES_256,
+ assertEquals(EncryptionAlgorithm.AES_CTR_256,
HadoopShimsPre2_7.findAlgorithm(meta));
}
diff --git a/java/tools/src/test/resources/orc-file-dump-bloomfilter.out b/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
index da79120..3887059 100644
--- a/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
+++ b/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
@@ -1,5 +1,5 @@
Structure for TestFileDump.testDump.orc
-File Version: 0.12 with ORC_203
+File Version: 0.12 with ORC_14
Rows: 21000
Compression: ZLIB
Compression size: 4096
diff --git a/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out b/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out
index 4ec83bf..b63b4e7 100644
--- a/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out
+++ b/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out
@@ -1,5 +1,5 @@
Structure for TestFileDump.testDump.orc
-File Version: 0.12 with ORC_203
+File Version: 0.12 with ORC_14
Rows: 21000
Compression: ZLIB
Compression size: 4096
diff --git a/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out b/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out
index 14e9ac3..1ae53df 100644
--- a/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out
+++ b/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out
@@ -1,5 +1,5 @@
Structure for TestFileDump.testDump.orc
-File Version: 0.12 with ORC_203
+File Version: 0.12 with ORC_14
Rows: 21000
Compression: ZLIB
Compression size: 4096
diff --git a/java/tools/src/test/resources/orc-file-dump.json b/java/tools/src/test/resources/orc-file-dump.json
index 91c1a2b..b153cae 100644
--- a/java/tools/src/test/resources/orc-file-dump.json
+++ b/java/tools/src/test/resources/orc-file-dump.json
@@ -1,7 +1,7 @@
{
"fileName": "TestFileDump.testDump.orc",
"fileVersion": "0.12",
- "writerVersion": "ORC_203",
+ "writerVersion": "ORC_14",
"numberOfRows": 21000,
"compression": "ZLIB",
"compressionBufferSize": 4096,
diff --git a/java/tools/src/test/resources/orc-file-dump.out b/java/tools/src/test/resources/orc-file-dump.out
index d988155..dd7291c 100644
--- a/java/tools/src/test/resources/orc-file-dump.out
+++ b/java/tools/src/test/resources/orc-file-dump.out
@@ -1,5 +1,5 @@
Structure for TestFileDump.testDump.orc
-File Version: 0.12 with ORC_203
+File Version: 0.12 with ORC_14
Rows: 21000
Compression: ZLIB
Compression size: 4096
diff --git a/java/tools/src/test/resources/orc-file-has-null.out b/java/tools/src/test/resources/orc-file-has-null.out
index 4fb7d69..65f8814 100644
--- a/java/tools/src/test/resources/orc-file-has-null.out
+++ b/java/tools/src/test/resources/orc-file-has-null.out
@@ -1,5 +1,5 @@
Structure for TestFileDump.testDump.orc
-File Version: 0.12 with ORC_203
+File Version: 0.12 with ORC_14
Rows: 20000
Compression: ZLIB
Compression size: 4096