You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@nifi.apache.org by GitBox <gi...@apache.org> on 2022/08/23 09:28:59 UTC

[GitHub] [nifi] tpalfy commented on a diff in pull request #6279: NIFI-10230 added FetchSmb

tpalfy commented on code in PR #6279:
URL: https://github.com/apache/nifi/pull/6279#discussion_r952377790


##########
nifi-nar-bundles/nifi-smb-bundle/nifi-smb-processors/src/main/java/org/apache/nifi/processors/smb/FetchSmb.java:
##########
@@ -0,0 +1,201 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.smb;
+
+import static java.util.Arrays.asList;
+import static org.apache.nifi.processor.util.StandardValidators.NON_EMPTY_EL_VALIDATOR;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import org.apache.nifi.annotation.behavior.InputRequirement;
+import org.apache.nifi.annotation.behavior.WritesAttribute;
+import org.apache.nifi.annotation.behavior.WritesAttributes;
+import org.apache.nifi.annotation.documentation.CapabilityDescription;
+import org.apache.nifi.annotation.documentation.SeeAlso;
+import org.apache.nifi.annotation.documentation.Tags;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.components.PropertyDescriptor.Builder;
+import org.apache.nifi.expression.ExpressionLanguageScope;
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.processor.AbstractProcessor;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.processor.Relationship;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.schema.access.SchemaNotFoundException;
+import org.apache.nifi.serialization.MalformedRecordException;
+import org.apache.nifi.serialization.RecordReader;
+import org.apache.nifi.serialization.RecordReaderFactory;
+import org.apache.nifi.serialization.record.Record;
+import org.apache.nifi.services.smb.SmbClientProviderService;
+import org.apache.nifi.services.smb.SmbClientService;
+import org.apache.nifi.services.smb.SmbException;
+
+@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
+@Tags({"samba, smb, cifs, files", "fetch"})
+@CapabilityDescription("Fetches files from a SMB Share. Designed to be used in tandem with ListSmb.")
+@SeeAlso({ListSmb.class, PutSmbFile.class, GetSmbFile.class})
+@WritesAttributes({
+        @WritesAttribute(attribute = FetchSmb.ERROR_CODE_ATTRIBUTE, description = "The error code returned by SMB when the fetch of a file fails"),
+        @WritesAttribute(attribute = FetchSmb.ERROR_MESSAGE_ATTRIBUTE, description = "The error message returned by SMB when the fetch of a file fails")
+})
+public class FetchSmb extends AbstractProcessor {
+
+    public static final String ERROR_CODE_ATTRIBUTE = "error.code";
+    public static final String ERROR_MESSAGE_ATTRIBUTE = "error.message";
+
+    public static final PropertyDescriptor FILE_ID = new PropertyDescriptor
+            .Builder().name("file-id")
+            .displayName("File ID")
+            .description("The identifier of the file to fetch.")
+            .required(true)
+            .defaultValue("${identifier}")
+            .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
+            .addValidator(NON_EMPTY_EL_VALIDATOR)
+            .build();
+
+    public static final PropertyDescriptor SMB_CLIENT_PROVIDER_SERVICE = new Builder()
+            .name("smb-client-provider-service")
+            .displayName("SMB Client Provider Service")
+            .description("Specifies the SMB client provider to use for creating SMB connections.")
+            .required(true)
+            .identifiesControllerService(SmbClientProviderService.class)
+            .build();
+
+    public static final Relationship REL_SUCCESS =
+            new Relationship.Builder()
+                    .name("success")
+                    .description("A flowfile will be routed here for each successfully fetched File.")
+                    .build();
+    public static final Relationship REL_FAILURE =
+            new Relationship.Builder().name("failure")
+                    .description(
+                            "A flowfile will be routed here for each File for which fetch was attempted but failed.")
+                    .build();
+    public static final Relationship REL_INPUT_FAILURE =
+            new Relationship.Builder().name("input_failure")
+                    .description("The incoming flowfile will be routed here if its content could not be processed.")
+                    .build();
+    public static final Set<Relationship> relationships = Collections.unmodifiableSet(new HashSet<>(asList(
+            REL_SUCCESS,
+            REL_FAILURE,
+            REL_INPUT_FAILURE
+    )));
+    static final PropertyDescriptor RECORD_READER = new PropertyDescriptor.Builder()
+            .name("record-reader")
+            .displayName("Record Reader")
+            .description(
+                    "Specifies the Controller Service to use for reading incoming NiFi Records. Each record should contain \"identifier\""
+                            + " attribute set to the path and name of the file to fetch."
+                            + " If not set, the Processor expects as attributes of a separate flowfile for each File to fetch.")
+            .identifiesControllerService(RecordReaderFactory.class)
+            .required(false)
+            .build();
+    private static final List<PropertyDescriptor> PROPERTIES = asList(
+            FILE_ID,
+            SMB_CLIENT_PROVIDER_SERVICE,
+            RECORD_READER
+    );
+
+    @Override
+    public Set<Relationship> getRelationships() {
+        return relationships;
+    }
+
+    @Override
+    public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
+        final FlowFile flowFile = session.get();
+        if (flowFile == null) {
+            return;
+        }
+
+        if (context.getProperty(RECORD_READER).isSet()) {
+            final RecordReaderFactory recordReaderFactory =
+                    context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
+
+            try (InputStream inFlowFile = session.read(flowFile)) {
+                final Map<String, String> flowFileAttributes = flowFile.getAttributes();
+                final RecordReader
+                        reader =
+                        recordReaderFactory.createRecordReader(flowFileAttributes, inFlowFile, flowFile.getSize(),
+                                getLogger());
+
+                Record record;
+                while ((record = reader.nextRecord()) != null) {
+                    final String fileName = record.getAsString("identifier");

Review Comment:
   The main purpose of the inclusion of the record reader is to facilitate a more performing cooperation between the List- and the Fetch- processor.
   There are multiple technical challenges that would need to be solved if we wanted to properly expose the record-based configuration to the user (among which the `File ID` is just one of many).
   
   I think the best would be to leave it as it is with a small improvement. When an individual fetch - based on a single record - fails, it generates a flowfile with attributes what were record fields before. We could make it so that those flowfiles can be retried by the processor without stopping and reconfiguring.
   We could see if the `File ID` attribute is present on the flowfile or not and based on that would we try to process the flowfile with the record reader or by just parsing the attributes.
   
   This would also eliminate the problem where the `File ID` is not used when the record reader is set.
   
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@nifi.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org