You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nifi.apache.org by ex...@apache.org on 2024/04/25 04:05:51 UTC

(nifi) branch main updated: NIFI-12960 Support reading password-protected files in ExcelReader

This is an automated email from the ASF dual-hosted git repository.

exceptionfactory pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/nifi.git


The following commit(s) were added to refs/heads/main by this push:
     new d54e85fab2 NIFI-12960 Support reading password-protected files in ExcelReader
d54e85fab2 is described below

commit d54e85fab2464f72935421be58ccb76d14dfd49d
Author: dan-s1 <ds...@gmail.com>
AuthorDate: Mon Apr 15 19:38:56 2024 +0000

    NIFI-12960 Support reading password-protected files in ExcelReader
    
    This closes #8658
    
    Signed-off-by: David Handermann <ex...@apache.org>
---
 .../java/org/apache/nifi/excel/ExcelReader.java    | 52 ++++++++++++
 .../org/apache/nifi/excel/ExcelRecordReader.java   |  2 +-
 .../nifi/excel/ExcelRecordReaderConfiguration.java | 25 ++++++
 .../org/apache/nifi/excel/ExcelRecordSource.java   |  8 +-
 .../java/org/apache/nifi/excel/RowIterator.java    |  7 +-
 .../apache/nifi/excel/TestExcelRecordReader.java   | 95 ++++++++++++++++++++++
 6 files changed, 185 insertions(+), 4 deletions(-)

diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java
index 25fcc449c9..24d47f3a75 100644
--- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java
+++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java
@@ -21,6 +21,7 @@ import org.apache.nifi.annotation.documentation.CapabilityDescription;
 import org.apache.nifi.annotation.documentation.Tags;
 import org.apache.nifi.annotation.lifecycle.OnEnabled;
 import org.apache.nifi.components.AllowableValue;
+import org.apache.nifi.components.DescribedValue;
 import org.apache.nifi.components.PropertyDescriptor;
 import org.apache.nifi.context.PropertyContext;
 import org.apache.nifi.controller.ConfigurationContext;
@@ -61,6 +62,34 @@ import java.util.Map;
         + "(XSSF 2007 OOXML file format) Excel documents and not older .xls (HSSF '97(-2007) file format) documents.")
 public class ExcelReader extends SchemaRegistryService implements RecordReaderFactory {
 
+    public enum ProtectionType implements DescribedValue {
+        UNPROTECTED("Unprotected", "An Excel spreadsheet not protected by a password"),
+        PASSWORD("Password Protected", "An Excel spreadsheet protected by a password");
+
+        ProtectionType(String displayName, String description) {
+            this.displayName = displayName;
+            this.description = description;
+        }
+
+        private final String displayName;
+        private final String description;
+
+        @Override
+        public String getValue() {
+            return name();
+        }
+
+        @Override
+        public String getDisplayName() {
+            return displayName;
+        }
+
+        @Override
+        public String getDescription() {
+            return description;
+        }
+    }
+
     public static final PropertyDescriptor REQUIRED_SHEETS = new PropertyDescriptor
             .Builder().name("Required Sheets")
             .displayName("Required Sheets")
@@ -83,6 +112,25 @@ public class ExcelReader extends SchemaRegistryService implements RecordReaderFa
             .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR)
             .build();
 
+    public static final PropertyDescriptor PROTECTION_TYPE = new PropertyDescriptor
+            .Builder().name("Protection Type")
+            .displayName("Protection Type")
+            .description("Specifies whether an Excel spreadsheet is protected by a password or not.")
+            .required(true)
+            .allowableValues(ProtectionType.class)
+            .defaultValue(ProtectionType.UNPROTECTED)
+            .build();
+
+    public static final PropertyDescriptor PASSWORD = new PropertyDescriptor
+            .Builder().name("Password")
+            .displayName("Password")
+            .description("The password for a password protected Excel spreadsheet")
+            .required(true)
+            .sensitive(true)
+            .addValidator(StandardValidators.NON_BLANK_VALIDATOR)
+            .dependsOn(PROTECTION_TYPE, ProtectionType.PASSWORD)
+            .build();
+
     private volatile ConfigurationContext configurationContext;
     private volatile String dateFormat;
     private volatile String timeFormat;
@@ -106,6 +154,7 @@ public class ExcelReader extends SchemaRegistryService implements RecordReaderFa
 
         final List<String> requiredSheets = getRequiredSheets(variables);
         final int firstRow = getStartingRow(variables);
+        final String password = configurationContext.getProperty(PASSWORD).getValue();
         final ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder()
                 .withDateFormat(dateFormat)
                 .withRequiredSheets(requiredSheets)
@@ -113,6 +162,7 @@ public class ExcelReader extends SchemaRegistryService implements RecordReaderFa
                 .withSchema(schema)
                 .withTimeFormat(timeFormat)
                 .withTimestampFormat(timestampFormat)
+                .withPassword(password)
                 .build();
 
         return new ExcelRecordReader(configuration, in, logger);
@@ -123,6 +173,8 @@ public class ExcelReader extends SchemaRegistryService implements RecordReaderFa
         final List<PropertyDescriptor> properties = new ArrayList<>(super.getSupportedPropertyDescriptors());
         properties.add(STARTING_ROW);
         properties.add(REQUIRED_SHEETS);
+        properties.add(PROTECTION_TYPE);
+        properties.add(PASSWORD);
         properties.add(DateTimeUtils.DATE_FORMAT);
         properties.add(DateTimeUtils.TIME_FORMAT);
         properties.add(DateTimeUtils.TIMESTAMP_FORMAT);
diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java
index 78a7768679..f9df73a38c 100644
--- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java
+++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java
@@ -68,7 +68,7 @@ public class ExcelRecordReader implements RecordReader {
         }
 
         try {
-            this.rowIterator = new RowIterator(inputStream, configuration.getRequiredSheets(), configuration.getFirstRow(), logger);
+            this.rowIterator = new RowIterator(inputStream, configuration, logger);
         } catch (RuntimeException e) {
             throw new MalformedRecordException("Read initial Record from Excel XLSX failed", e);
         }
diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReaderConfiguration.java b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReaderConfiguration.java
index 2275b91333..6faa826ad5 100644
--- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReaderConfiguration.java
+++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReaderConfiguration.java
@@ -28,6 +28,8 @@ public class ExcelRecordReaderConfiguration {
     private String dateFormat;
     private String timeFormat;
     private String timestampFormat;
+    private String password;
+    private boolean avoidTempFiles;
 
     private ExcelRecordReaderConfiguration() {
     }
@@ -56,6 +58,14 @@ public class ExcelRecordReaderConfiguration {
         return timestampFormat;
     }
 
+    public String getPassword() {
+        return password;
+    }
+
+    public boolean isAvoidTempFiles() {
+        return avoidTempFiles;
+    }
+
     public static final class Builder {
         private RecordSchema schema;
         private List<String> requiredSheets;
@@ -63,6 +73,8 @@ public class ExcelRecordReaderConfiguration {
         private String dateFormat;
         private String timeFormat;
         private String timestampFormat;
+        private String password;
+        private boolean avoidTempFiles;
 
         public Builder withSchema(RecordSchema schema) {
             this.schema = schema;
@@ -94,6 +106,16 @@ public class ExcelRecordReaderConfiguration {
             return this;
         }
 
+        public Builder withPassword(String password) {
+            this.password = password;
+            return this;
+        }
+
+        public Builder withAvoidTempFiles(boolean avoidTempFiles) {
+            this.avoidTempFiles = avoidTempFiles;
+            return this;
+        }
+
         public ExcelRecordReaderConfiguration build() {
             ExcelRecordReaderConfiguration excelRecordReaderConfiguration = new ExcelRecordReaderConfiguration();
             excelRecordReaderConfiguration.schema = this.schema;
@@ -102,6 +124,9 @@ public class ExcelRecordReaderConfiguration {
             excelRecordReaderConfiguration.requiredSheets = this.requiredSheets == null ? Collections.emptyList() : this.requiredSheets;
             excelRecordReaderConfiguration.dateFormat = this.dateFormat;
             excelRecordReaderConfiguration.firstRow = this.firstRow;
+            excelRecordReaderConfiguration.password = password;
+            excelRecordReaderConfiguration.avoidTempFiles = avoidTempFiles;
+
             return excelRecordReaderConfiguration;
         }
     }
diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordSource.java b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordSource.java
index 9ad00882a4..24f4a9fcff 100644
--- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordSource.java
+++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordSource.java
@@ -35,7 +35,13 @@ public class ExcelRecordSource implements RecordSource<Row> {
         final Integer rawFirstRow = context.getProperty(ExcelReader.STARTING_ROW).evaluateAttributeExpressions(variables).asInteger();
         final int firstRow = rawFirstRow == null ? NumberUtils.toInt(ExcelReader.STARTING_ROW.getDefaultValue()) : rawFirstRow;
         final int zeroBasedFirstRow = ExcelReader.getZeroBasedIndex(firstRow);
-        this.rowIterator = new RowIterator(in, requiredSheets, zeroBasedFirstRow, logger);
+        final String password = context.getProperty(ExcelReader.PASSWORD).getValue();
+        final ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder()
+                .withRequiredSheets(requiredSheets)
+                .withFirstRow(zeroBasedFirstRow)
+                .withPassword(password)
+                .build();
+        this.rowIterator = new RowIterator(in, configuration, logger);
     }
 
     @Override
diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/RowIterator.java b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/RowIterator.java
index 2b5f4e987a..733697efd1 100644
--- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/RowIterator.java
+++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/RowIterator.java
@@ -41,12 +41,15 @@ class RowIterator implements Iterator<Row>, Closeable {
     private Iterator<Row> currentRows;
     private Row currentRow;
 
-    RowIterator(final InputStream in, final List<String> requiredSheets, final int firstRow, final ComponentLog logger) {
+    RowIterator(final InputStream in, final ExcelRecordReaderConfiguration configuration, final ComponentLog logger) {
         this.workbook = StreamingReader.builder()
                 .rowCacheSize(100)
                 .bufferSize(4096)
+                .password(configuration.getPassword())
+                .setAvoidTempFiles(configuration.isAvoidTempFiles())
                 .open(in);
 
+        final List<String> requiredSheets = configuration.getRequiredSheets();
         if (requiredSheets == null || requiredSheets.isEmpty()) {
             this.sheets = this.workbook.iterator();
         } else {
@@ -65,7 +68,7 @@ class RowIterator implements Iterator<Row>, Closeable {
                     .collect(Collectors.toList()).iterator();
         }
 
-        this.firstRow = firstRow;
+        this.firstRow = configuration.getFirstRow();
         this.logger = logger;
         setCurrent();
     }
diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelRecordReader.java b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelRecordReader.java
index 6fde3d07f1..6f5a0afeed 100644
--- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelRecordReader.java
+++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelRecordReader.java
@@ -26,6 +26,16 @@ import org.apache.nifi.serialization.record.RecordField;
 import org.apache.nifi.serialization.record.RecordFieldType;
 import org.apache.nifi.serialization.record.RecordSchema;
 import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.poifs.crypt.EncryptionInfo;
+import org.apache.poi.poifs.crypt.EncryptionMode;
+import org.apache.poi.poifs.crypt.Encryptor;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.xssf.usermodel.XSSFSheet;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.extension.ExtendWith;
 import org.junit.jupiter.params.ParameterizedTest;
@@ -33,7 +43,10 @@ import org.junit.jupiter.params.provider.ValueSource;
 import org.mockito.Mock;
 import org.mockito.junit.jupiter.MockitoExtension;
 
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
 import java.io.InputStream;
+import java.io.OutputStream;
 import java.sql.Timestamp;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -50,10 +63,60 @@ public class TestExcelRecordReader {
 
     private static final String DATA_FORMATTING_FILE = "dataformatting.xlsx";
     private static final String MULTI_SHEET_FILE = "twoSheets.xlsx";
+    private static final String PASSWORD = "nifi";
+    private static final ByteArrayOutputStream PASSWORD_PROTECTED = new ByteArrayOutputStream();
+    private static final Object[][] DATA = {
+            {"ID", "Name"},
+            {1, "Manny"},
+            {2, "Moe"},
+            {3, "Jack"},
+    };
 
     @Mock
     ComponentLog logger;
 
+    @BeforeAll
+    static void setUpBeforeAll() throws Exception {
+        //Generate an Excel file and populate it with data
+        final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
+        try (XSSFWorkbook workbook = new XSSFWorkbook()) {
+            final XSSFSheet sheet = workbook.createSheet("User Info");
+            populateSheet(sheet);
+            workbook.write(outputStream);
+        }
+
+        //Protect the Excel file with a password
+        try (POIFSFileSystem poifsFileSystem = new POIFSFileSystem()) {
+            EncryptionInfo encryptionInfo = new EncryptionInfo(EncryptionMode.agile);
+            Encryptor encryptor = encryptionInfo.getEncryptor();
+            encryptor.confirmPassword(PASSWORD);
+
+            try (OPCPackage opc = OPCPackage.open(new ByteArrayInputStream(outputStream.toByteArray()));
+                 OutputStream os = encryptor.getDataStream(poifsFileSystem)) {
+                opc.save(os);
+            }
+            poifsFileSystem.writeFilesystem(PASSWORD_PROTECTED);
+        }
+    }
+
+    private static void populateSheet(XSSFSheet sheet) {
+        //Adding the data to the Excel worksheet
+        int rowCount = 0;
+        for (Object[] dataRow : DATA) {
+            Row row = sheet.createRow(rowCount++);
+            int columnCount = 0;
+
+            for (Object field : dataRow) {
+                Cell cell = row.createCell(columnCount++);
+                if (field instanceof String) {
+                    cell.setCellValue((String) field);
+                } else if (field instanceof Integer) {
+                    cell.setCellValue((Integer) field);
+                }
+            }
+        }
+    }
+
     @Test
     public void testNonExcelFile() {
         ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder()
@@ -220,4 +283,36 @@ public class TestExcelRecordReader {
 
         assertEquals(7, records.size());
     }
+
+    @Test
+    void testPasswordProtected() throws Exception {
+        RecordSchema schema = getPasswordProtectedSchema();
+        ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder()
+                .withSchema(schema)
+                .withPassword(PASSWORD)
+                .withAvoidTempFiles(true)
+                .build();
+
+        InputStream inputStream = new ByteArrayInputStream(PASSWORD_PROTECTED.toByteArray());
+        ExcelRecordReader recordReader = new ExcelRecordReader(configuration, inputStream, logger);
+        List<Record> records = getRecords(recordReader, false, false);
+
+        assertEquals(DATA.length, records.size());
+    }
+
+    @Test
+    void testPasswordProtectedWithoutPassword() {
+        RecordSchema schema = getPasswordProtectedSchema();
+        ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder()
+                .withSchema(schema)
+                .build();
+
+        InputStream inputStream = new ByteArrayInputStream(PASSWORD_PROTECTED.toByteArray());
+        assertThrows(Exception.class, () -> new ExcelRecordReader(configuration, inputStream, logger));
+    }
+
+    private RecordSchema getPasswordProtectedSchema() {
+        return new SimpleRecordSchema(Arrays.asList(new RecordField("id", RecordFieldType.INT.getDataType()),
+                new RecordField("name", RecordFieldType.STRING.getDataType())));
+    }
 }