You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@nifi.apache.org by MikeThomsen <gi...@git.apache.org> on 2018/06/02 20:05:16 UTC
[GitHub] nifi pull request #2737: NIFI-5231 Added RecordStats processor.
Github user MikeThomsen commented on a diff in the pull request:
https://github.com/apache/nifi/pull/2737#discussion_r192569021
--- Diff: nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/RecordStats.java ---
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nifi.processors.standard;
+
+import org.apache.nifi.annotation.behavior.InputRequirement;
+import org.apache.nifi.annotation.lifecycle.OnScheduled;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.components.Validator;
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.processor.AbstractProcessor;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.processor.Relationship;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.processor.util.StandardValidators;
+import org.apache.nifi.record.path.FieldValue;
+import org.apache.nifi.record.path.RecordPath;
+import org.apache.nifi.record.path.RecordPathResult;
+import org.apache.nifi.record.path.util.RecordPathCache;
+import org.apache.nifi.serialization.RecordReader;
+import org.apache.nifi.serialization.RecordReaderFactory;
+import org.apache.nifi.serialization.record.Record;
+
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
+public class RecordStats extends AbstractProcessor {
+ static final PropertyDescriptor RECORD_READER = new PropertyDescriptor.Builder()
+ .name("record-stats-reader")
+ .displayName("Record Reader")
+ .description("A record reader to use for reading the records.")
+ .addValidator(Validator.VALID)
+ .identifiesControllerService(RecordReaderFactory.class)
+ .build();
+
+ static final Relationship REL_SUCCESS = new Relationship.Builder()
+ .name("success")
+ .description("If a flowfile is successfully processed, it goes here.")
+ .build();
+ static final Relationship REL_FAILURE = new Relationship.Builder()
+ .name("failure")
+ .description("If a flowfile fails to be processed, it goes here.")
+ .build();
+
+ protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) {
+ return new PropertyDescriptor.Builder()
+ .name(propertyDescriptorName)
+ .displayName(propertyDescriptorName)
+ .dynamic(true)
+ .addValidator(StandardValidators.NON_BLANK_VALIDATOR)
+ .build();
+ }
+
+ private RecordPathCache cache;
+
+ @OnScheduled
+ public void onEnabled(ProcessContext context) {
+ cache = new RecordPathCache(25);
+ }
+
+ @Override
+ public Set<Relationship> getRelationships() {
+ return new HashSet<Relationship>() {{
+ add(REL_SUCCESS);
+ add(REL_FAILURE);
+ }};
+ }
+
+ @Override
+ public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
+ FlowFile input = session.get();
+ if (input == null) {
+ return;
+ }
+
+ try {
+ Map<String, RecordPath> paths = getRecordPaths(context);
+ Map<String, String> stats = getStats(input, paths, context, session);
+
+ input = session.putAllAttributes(input, stats);
+
+ session.transfer(input, REL_SUCCESS);
+
+ } catch (Exception ex) {
+ getLogger().error("Error processing stats.", ex);
+ session.transfer(input, REL_FAILURE);
+ }
+
+ }
+
+ protected Map<String, RecordPath> getRecordPaths(ProcessContext context) {
+ return context.getProperties().keySet()
+ .stream().filter(p -> p.isDynamic() && !p.getName().contains(RECORD_READER.getName()))
+ .collect(Collectors.toMap(
+ e -> e.getName(),
+ e -> {
+ String val = context.getProperty(e).getValue();
+ return cache.getCompiled(val);
+ })
+ );
+ }
+
+ protected Map<String, String> getStats(FlowFile input, Map<String, RecordPath> paths, ProcessContext context, ProcessSession session) {
+ try (InputStream is = session.read(input)) {
+ RecordReaderFactory factory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
+ RecordReader reader = factory.createRecordReader(input, is, getLogger());
+
+ Map<String, Integer> retVal = new HashMap<>();
+ Record record;
+
+ int recordCount = 0;
+ while ((record = reader.nextRecord()) != null) {
+ for (Map.Entry<String, RecordPath> entry : paths.entrySet()) {
+ RecordPathResult result = entry.getValue().evaluate(record);
+ Optional<FieldValue> value = result.getSelectedFields().findFirst();
+ if (value.isPresent() && value.get().getValue() != null) {
+ FieldValue fieldValue = value.get();
+ String approxValue = fieldValue.getValue().toString();
+ String key = String.format("%s.%s", entry.getKey(), approxValue);
+ Integer stat = retVal.containsKey(key) ? retVal.get(key) : 0;
+ Integer baseStat = retVal.containsKey(entry.getKey()) ? retVal.get(entry.getKey()) : 0;
+ stat++;
+ baseStat++;
+
+ retVal.put(key, stat);
+ retVal.put(entry.getKey(), baseStat);
+ }
+ }
+
+ recordCount++;
+ }
--- End diff --
Done.
---