You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nifi.apache.org by pv...@apache.org on 2017/06/26 17:16:46 UTC

nifi git commit: NIFI-4055: Add a compression option to AvroRecordSetWriter

Repository: nifi
Updated Branches:
  refs/heads/master 91ed96f8c -> 68b42c9e5


NIFI-4055: Add a compression option to AvroRecordSetWriter

This closes #1909.

Signed-off-by: Pierre Villard <pi...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/nifi/repo
Commit: http://git-wip-us.apache.org/repos/asf/nifi/commit/68b42c9e
Tree: http://git-wip-us.apache.org/repos/asf/nifi/tree/68b42c9e
Diff: http://git-wip-us.apache.org/repos/asf/nifi/diff/68b42c9e

Branch: refs/heads/master
Commit: 68b42c9e547608d78d301d5b3a3856fe4853b4cd
Parents: 91ed96f
Author: Steve Champagne <ch...@gmail.com>
Authored: Fri Jun 9 18:39:16 2017 +0000
Committer: Pierre Villard <pi...@gmail.com>
Committed: Mon Jun 26 19:16:31 2017 +0200

----------------------------------------------------------------------
 .../apache/nifi/avro/AvroRecordSetWriter.java   | 66 +++++++++++++++++++-
 .../nifi/avro/WriteAvroResultWithSchema.java    |  4 +-
 .../avro/TestWriteAvroResultWithSchema.java     |  3 +-
 3 files changed, 70 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/nifi/blob/68b42c9e/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/avro/AvroRecordSetWriter.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/avro/AvroRecordSetWriter.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/avro/AvroRecordSetWriter.java
index fd09961..a8459a9 100644
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/avro/AvroRecordSetWriter.java
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/avro/AvroRecordSetWriter.java
@@ -20,6 +20,7 @@ package org.apache.nifi.avro;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.EnumSet;
 import java.util.LinkedHashMap;
 import java.util.List;
@@ -28,10 +29,13 @@ import java.util.Optional;
 import java.util.Set;
 
 import org.apache.avro.Schema;
+import org.apache.avro.file.CodecFactory;
 import org.apache.nifi.annotation.documentation.CapabilityDescription;
 import org.apache.nifi.annotation.documentation.Tags;
 import org.apache.nifi.components.AllowableValue;
+import org.apache.nifi.components.PropertyDescriptor;
 import org.apache.nifi.components.ValidationContext;
+import org.apache.nifi.components.ValidationResult;
 import org.apache.nifi.flowfile.FlowFile;
 import org.apache.nifi.logging.ComponentLog;
 import org.apache.nifi.processor.exception.ProcessException;
@@ -48,6 +52,23 @@ public class AvroRecordSetWriter extends SchemaRegistryRecordSetWriter implement
     private static final Set<SchemaField> requiredSchemaFields = EnumSet.of(SchemaField.SCHEMA_TEXT, SchemaField.SCHEMA_TEXT_FORMAT);
     private static final int MAX_AVRO_SCHEMA_CACHE_SIZE = 20;
 
+    private enum CodecType {
+        BZIP2,
+        DEFLATE,
+        NONE,
+        SNAPPY,
+        LZO
+    }
+
+    private static final PropertyDescriptor COMPRESSION_FORMAT = new PropertyDescriptor.Builder()
+        .name("compression-format")
+        .displayName("Compression Format")
+        .description("Compression type to use when writing Avro files. Default is None.")
+        .allowableValues(CodecType.values())
+        .defaultValue(CodecType.NONE.toString())
+        .required(true)
+        .build();
+
     private final Map<String, Schema> compiledAvroSchemaCache = new LinkedHashMap<String, Schema>() {
         @Override
         protected boolean removeEldestEntry(final Map.Entry<String, Schema> eldest) {
@@ -61,6 +82,7 @@ public class AvroRecordSetWriter extends SchemaRegistryRecordSetWriter implement
     @Override
     public RecordSetWriter createWriter(final ComponentLog logger, final RecordSchema recordSchema, final FlowFile flowFile, final OutputStream out) throws IOException {
         final String strategyValue = getConfigurationContext().getProperty(getSchemaWriteStrategyDescriptor()).getValue();
+        final String compressionFormat = getConfigurationContext().getProperty(COMPRESSION_FORMAT).getValue();
 
         try {
             final Schema avroSchema;
@@ -80,7 +102,7 @@ public class AvroRecordSetWriter extends SchemaRegistryRecordSetWriter implement
             }
 
             if (AVRO_EMBEDDED.getValue().equals(strategyValue)) {
-                return new WriteAvroResultWithSchema(avroSchema, out);
+                return new WriteAvroResultWithSchema(avroSchema, out, getCodecFactory(compressionFormat));
             } else {
                 return new WriteAvroResultWithExternalSchema(avroSchema, recordSchema, getSchemaAccessWriter(recordSchema), out);
             }
@@ -113,6 +135,30 @@ public class AvroRecordSetWriter extends SchemaRegistryRecordSetWriter implement
         }
     }
 
+    private CodecFactory getCodecFactory(String property) {
+        CodecType type = CodecType.valueOf(property);
+        switch (type) {
+        case BZIP2:
+            return CodecFactory.bzip2Codec();
+        case DEFLATE:
+            return CodecFactory.deflateCodec(CodecFactory.DEFAULT_DEFLATE_LEVEL);
+        case LZO:
+            return CodecFactory.xzCodec(CodecFactory.DEFAULT_XZ_LEVEL);
+        case SNAPPY:
+            return CodecFactory.snappyCodec();
+        case NONE:
+        default:
+            return CodecFactory.nullCodec();
+        }
+    }
+
+    @Override
+    protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
+        final List<PropertyDescriptor> properties = new ArrayList<>(super.getSupportedPropertyDescriptors());
+        properties.add(COMPRESSION_FORMAT);
+        return properties;
+    }
+
     @Override
     protected List<AllowableValue> getSchemaWriteStrategyValues() {
         final List<AllowableValue> allowableValues = new ArrayList<>();
@@ -135,4 +181,22 @@ public class AvroRecordSetWriter extends SchemaRegistryRecordSetWriter implement
 
         return super.getRequiredSchemaFields(validationContext);
     }
+
+    @Override
+    protected Collection<ValidationResult> customValidate(final ValidationContext validationContext) {
+        final List<ValidationResult> results = new ArrayList<>(super.customValidate(validationContext));
+        final String writeStrategyValue = validationContext.getProperty(getSchemaWriteStrategyDescriptor()).getValue();
+        final String compressionFormatValue = validationContext.getProperty(COMPRESSION_FORMAT).getValue();
+        if (!writeStrategyValue.equalsIgnoreCase(AVRO_EMBEDDED.getValue())
+                && !CodecType.NONE.toString().equals(compressionFormatValue)) {
+            results.add(new ValidationResult.Builder()
+                    .subject(COMPRESSION_FORMAT.getName())
+                    .valid(false)
+                    .explanation("Avro compression codecs are stored in the header of the Avro file and therefore "
+                        + "requires the header to be embedded into the content.")
+                    .build());
+        }
+
+        return results;
+    }
 }

http://git-wip-us.apache.org/repos/asf/nifi/blob/68b42c9e/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/avro/WriteAvroResultWithSchema.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/avro/WriteAvroResultWithSchema.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/avro/WriteAvroResultWithSchema.java
index ae2f109..ea327a4 100644
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/avro/WriteAvroResultWithSchema.java
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/avro/WriteAvroResultWithSchema.java
@@ -23,6 +23,7 @@ import java.util.Collections;
 import java.util.Map;
 
 import org.apache.avro.Schema;
+import org.apache.avro.file.CodecFactory;
 import org.apache.avro.file.DataFileWriter;
 import org.apache.avro.generic.GenericDatumWriter;
 import org.apache.avro.generic.GenericRecord;
@@ -34,12 +35,13 @@ public class WriteAvroResultWithSchema extends AbstractRecordSetWriter {
     private final DataFileWriter<GenericRecord> dataFileWriter;
     private final Schema schema;
 
-    public WriteAvroResultWithSchema(final Schema schema, final OutputStream out) throws IOException {
+    public WriteAvroResultWithSchema(final Schema schema, final OutputStream out, final CodecFactory codec) throws IOException {
         super(out);
         this.schema = schema;
 
         final GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
         dataFileWriter = new DataFileWriter<>(datumWriter);
+        dataFileWriter.setCodec(codec);
         dataFileWriter.create(schema, out);
     }
 

http://git-wip-us.apache.org/repos/asf/nifi/blob/68b42c9e/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/avro/TestWriteAvroResultWithSchema.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/avro/TestWriteAvroResultWithSchema.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/avro/TestWriteAvroResultWithSchema.java
index 9761076..b3eecde 100644
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/avro/TestWriteAvroResultWithSchema.java
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/avro/TestWriteAvroResultWithSchema.java
@@ -22,6 +22,7 @@ import java.io.InputStream;
 import java.io.OutputStream;
 
 import org.apache.avro.Schema;
+import org.apache.avro.file.CodecFactory;
 import org.apache.avro.file.DataFileStream;
 import org.apache.avro.generic.GenericData;
 import org.apache.avro.generic.GenericData.StringType;
@@ -33,7 +34,7 @@ public class TestWriteAvroResultWithSchema extends TestWriteAvroResult {
 
     @Override
     protected RecordSetWriter createWriter(final Schema schema, final OutputStream out) throws IOException {
-        return new WriteAvroResultWithSchema(schema, out);
+        return new WriteAvroResultWithSchema(schema, out, CodecFactory.nullCodec());
     }
 
     @Override