You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@nifi.apache.org by mattyb149 <gi...@git.apache.org> on 2018/06/07 02:37:56 UTC

[GitHub] nifi pull request #2711: NIFI-1705 - Adding AttributesToCSV processor

Github user mattyb149 commented on a diff in the pull request:

    https://github.com/apache/nifi/pull/2711#discussion_r193611811
  
    --- Diff: nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/AttributesToCSV.java ---
    @@ -0,0 +1,335 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.nifi.processors.standard;
    +
    +import org.apache.commons.text.StringEscapeUtils;
    +import org.apache.commons.lang3.StringUtils;
    +import org.apache.nifi.annotation.behavior.WritesAttribute;
    +import org.apache.nifi.annotation.behavior.WritesAttributes;
    +import org.apache.nifi.annotation.behavior.SideEffectFree;
    +import org.apache.nifi.annotation.behavior.SupportsBatching;
    +import org.apache.nifi.annotation.behavior.EventDriven;
    +import org.apache.nifi.annotation.behavior.InputRequirement;
    +import org.apache.nifi.annotation.documentation.CapabilityDescription;
    +import org.apache.nifi.annotation.documentation.Tags;
    +import org.apache.nifi.annotation.lifecycle.OnScheduled;
    +import org.apache.nifi.components.AllowableValue;
    +import org.apache.nifi.components.PropertyDescriptor;
    +import org.apache.nifi.expression.ExpressionLanguageScope;
    +import org.apache.nifi.flowfile.FlowFile;
    +import org.apache.nifi.flowfile.attributes.CoreAttributes;
    +import org.apache.nifi.processor.AbstractProcessor;
    +import org.apache.nifi.processor.ProcessContext;
    +import org.apache.nifi.processor.ProcessorInitializationContext;
    +import org.apache.nifi.processor.ProcessSession;
    +import org.apache.nifi.processor.Relationship;
    +import org.apache.nifi.processor.exception.ProcessException;
    +import org.apache.nifi.processor.util.StandardValidators;
    +
    +import java.util.Map;
    +import java.util.Set;
    +import java.util.HashSet;
    +import java.util.List;
    +import java.util.LinkedHashMap;
    +import java.util.LinkedHashSet;
    +import java.util.regex.Pattern;
    +import java.util.stream.Collectors;
    +import java.util.Collections;
    +import java.util.Arrays;
    +import java.util.ArrayList;
    +
    +@EventDriven
    +@SideEffectFree
    +@SupportsBatching
    +@Tags({"csv", "attributes", "flowfile"})
    +@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
    +@CapabilityDescription("Generates a CSV representation of the input FlowFile Attributes. The resulting CSV " +
    +        "can be written to either a newly generated attribute named 'CSVAttributes' or written to the FlowFile as content.  " +
    +        "If the attribute value contains a comma, newline or double quote, then the attribute value will be " +
    +        "escaped with double quotes.  Any double quote characters in the attribute value are escaped with " +
    +        "another double quote.")
    +@WritesAttributes({
    +        @WritesAttribute(attribute = "CSVSchema", description = "CSV representation of the Schema"),
    +        @WritesAttribute(attribute = "CSVData", description = "CSV representation of Attributes")
    +})
    +
    +public class AttributesToCSV extends AbstractProcessor {
    +    private static final String DATA_ATTRIBUTE_NAME = "CSVData";
    +    private static final String SCHEMA_ATTRIBUTE_NAME = "CSVSchema";
    +    private static final String OUTPUT_SEPARATOR = ",";
    +    private static final String OUTPUT_MIME_TYPE = "text/csv";
    +    private static final String SPLIT_REGEX = OUTPUT_SEPARATOR + "(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)";
    +
    +    static final AllowableValue OUTPUT_OVERWRITE_CONTENT = new AllowableValue("flowfile-content", "flowfile-content", "The resulting CSV string will be placed into the content of the flowfile." +
    +            "Existing flowfile context will be overwritten. 'CSVData' will not be written to at all (neither null nor empty string).");
    +    static final AllowableValue OUTPUT_NEW_ATTRIBUTE= new AllowableValue("flowfile-attribute", "flowfile-attribute", "The resulting CSV string will be placed into a new flowfile" +
    +            " attribute named 'CSVData'.  The content of the flowfile will not be changed.");
    +
    +    public static final PropertyDescriptor ATTRIBUTES_LIST = new PropertyDescriptor.Builder()
    +            .name("attribute-list")
    +            .displayName("Attribute List")
    +            .description("Comma separated list of attributes to be included in the resulting CSV. If this value " +
    +                    "is left empty then all existing Attributes will be included. This list of attributes is " +
    +                    "case sensitive and supports attribute names that contain commas. If an attribute specified in the list is not found it will be emitted " +
    +                    "to the resulting CSV with an empty string or null depending on the 'Null Value' property. " +
    +                    "If a core attribute is specified in this list " +
    +                    "and the 'Include Core Attributes' property is false, the core attribute will be included. The attribute list " +
    +                    "ALWAYS wins.")
    +            .required(false)
    +            .addValidator(StandardValidators.NON_EMPTY_EL_VALIDATOR)
    +            .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
    +            .build();
    +
    +    public static final PropertyDescriptor ATTRIBUTES_REGEX = new PropertyDescriptor.Builder()
    +            .name("attributes-regex")
    +            .displayName("Attributes Regular Expression")
    +            .description("Regular expression that will be evaluated against the flow file attributes to select "
    +                    + "the matching attributes. This property can be used in combination with the attributes "
    +                    + "list property.  The final output will contain a combination of matches found in the ATTRIBUTE_LIST and ATTRIBUTE_REGEX.")
    +            .required(false)
    +            .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
    +            .addValidator(StandardValidators.createRegexValidator(0, Integer.MAX_VALUE, true))
    +            .addValidator(StandardValidators.NON_EMPTY_EL_VALIDATOR)
    +            .build();
    +
    +    public static final PropertyDescriptor DESTINATION = new PropertyDescriptor.Builder()
    +            .name("destination")
    +            .displayName("Destination")
    +            .description("Control if CSV value is written as a new flowfile attribute 'CSVData' " +
    +                    "or written in the flowfile content.")
    +            .required(true)
    +            .allowableValues(OUTPUT_NEW_ATTRIBUTE, OUTPUT_OVERWRITE_CONTENT)
    +            .defaultValue(OUTPUT_NEW_ATTRIBUTE.getDisplayName())
    --- End diff --
    
    The "write to attribute" is a cool concept, I thought it might also be beneficial in other areas (including UpdateAttribute), so I wrote up https://issues.apache.org/jira/browse/NIFI-5276 to cover it (and will soon have a PR). Were it included, would it preclude you from adding such a property to this processor? If not then no worries, just wanted to mention it, in case the other Jira would take care of that use case instead.


---