You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nifi.apache.org by ex...@apache.org on 2024/04/25 04:05:51 UTC
(nifi) branch main updated: NIFI-12960 Support reading password-protected files in ExcelReader
This is an automated email from the ASF dual-hosted git repository.
exceptionfactory pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/nifi.git
The following commit(s) were added to refs/heads/main by this push:
new d54e85fab2 NIFI-12960 Support reading password-protected files in ExcelReader
d54e85fab2 is described below
commit d54e85fab2464f72935421be58ccb76d14dfd49d
Author: dan-s1 <ds...@gmail.com>
AuthorDate: Mon Apr 15 19:38:56 2024 +0000
NIFI-12960 Support reading password-protected files in ExcelReader
This closes #8658
Signed-off-by: David Handermann <ex...@apache.org>
---
.../java/org/apache/nifi/excel/ExcelReader.java | 52 ++++++++++++
.../org/apache/nifi/excel/ExcelRecordReader.java | 2 +-
.../nifi/excel/ExcelRecordReaderConfiguration.java | 25 ++++++
.../org/apache/nifi/excel/ExcelRecordSource.java | 8 +-
.../java/org/apache/nifi/excel/RowIterator.java | 7 +-
.../apache/nifi/excel/TestExcelRecordReader.java | 95 ++++++++++++++++++++++
6 files changed, 185 insertions(+), 4 deletions(-)
diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java
index 25fcc449c9..24d47f3a75 100644
--- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java
+++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java
@@ -21,6 +21,7 @@ import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnEnabled;
import org.apache.nifi.components.AllowableValue;
+import org.apache.nifi.components.DescribedValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.context.PropertyContext;
import org.apache.nifi.controller.ConfigurationContext;
@@ -61,6 +62,34 @@ import java.util.Map;
+ "(XSSF 2007 OOXML file format) Excel documents and not older .xls (HSSF '97(-2007) file format) documents.")
public class ExcelReader extends SchemaRegistryService implements RecordReaderFactory {
+ public enum ProtectionType implements DescribedValue {
+ UNPROTECTED("Unprotected", "An Excel spreadsheet not protected by a password"),
+ PASSWORD("Password Protected", "An Excel spreadsheet protected by a password");
+
+ ProtectionType(String displayName, String description) {
+ this.displayName = displayName;
+ this.description = description;
+ }
+
+ private final String displayName;
+ private final String description;
+
+ @Override
+ public String getValue() {
+ return name();
+ }
+
+ @Override
+ public String getDisplayName() {
+ return displayName;
+ }
+
+ @Override
+ public String getDescription() {
+ return description;
+ }
+ }
+
public static final PropertyDescriptor REQUIRED_SHEETS = new PropertyDescriptor
.Builder().name("Required Sheets")
.displayName("Required Sheets")
@@ -83,6 +112,25 @@ public class ExcelReader extends SchemaRegistryService implements RecordReaderFa
.addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR)
.build();
+ public static final PropertyDescriptor PROTECTION_TYPE = new PropertyDescriptor
+ .Builder().name("Protection Type")
+ .displayName("Protection Type")
+ .description("Specifies whether an Excel spreadsheet is protected by a password or not.")
+ .required(true)
+ .allowableValues(ProtectionType.class)
+ .defaultValue(ProtectionType.UNPROTECTED)
+ .build();
+
+ public static final PropertyDescriptor PASSWORD = new PropertyDescriptor
+ .Builder().name("Password")
+ .displayName("Password")
+ .description("The password for a password protected Excel spreadsheet")
+ .required(true)
+ .sensitive(true)
+ .addValidator(StandardValidators.NON_BLANK_VALIDATOR)
+ .dependsOn(PROTECTION_TYPE, ProtectionType.PASSWORD)
+ .build();
+
private volatile ConfigurationContext configurationContext;
private volatile String dateFormat;
private volatile String timeFormat;
@@ -106,6 +154,7 @@ public class ExcelReader extends SchemaRegistryService implements RecordReaderFa
final List<String> requiredSheets = getRequiredSheets(variables);
final int firstRow = getStartingRow(variables);
+ final String password = configurationContext.getProperty(PASSWORD).getValue();
final ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder()
.withDateFormat(dateFormat)
.withRequiredSheets(requiredSheets)
@@ -113,6 +162,7 @@ public class ExcelReader extends SchemaRegistryService implements RecordReaderFa
.withSchema(schema)
.withTimeFormat(timeFormat)
.withTimestampFormat(timestampFormat)
+ .withPassword(password)
.build();
return new ExcelRecordReader(configuration, in, logger);
@@ -123,6 +173,8 @@ public class ExcelReader extends SchemaRegistryService implements RecordReaderFa
final List<PropertyDescriptor> properties = new ArrayList<>(super.getSupportedPropertyDescriptors());
properties.add(STARTING_ROW);
properties.add(REQUIRED_SHEETS);
+ properties.add(PROTECTION_TYPE);
+ properties.add(PASSWORD);
properties.add(DateTimeUtils.DATE_FORMAT);
properties.add(DateTimeUtils.TIME_FORMAT);
properties.add(DateTimeUtils.TIMESTAMP_FORMAT);
diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java
index 78a7768679..f9df73a38c 100644
--- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java
+++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java
@@ -68,7 +68,7 @@ public class ExcelRecordReader implements RecordReader {
}
try {
- this.rowIterator = new RowIterator(inputStream, configuration.getRequiredSheets(), configuration.getFirstRow(), logger);
+ this.rowIterator = new RowIterator(inputStream, configuration, logger);
} catch (RuntimeException e) {
throw new MalformedRecordException("Read initial Record from Excel XLSX failed", e);
}
diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReaderConfiguration.java b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReaderConfiguration.java
index 2275b91333..6faa826ad5 100644
--- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReaderConfiguration.java
+++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReaderConfiguration.java
@@ -28,6 +28,8 @@ public class ExcelRecordReaderConfiguration {
private String dateFormat;
private String timeFormat;
private String timestampFormat;
+ private String password;
+ private boolean avoidTempFiles;
private ExcelRecordReaderConfiguration() {
}
@@ -56,6 +58,14 @@ public class ExcelRecordReaderConfiguration {
return timestampFormat;
}
+ public String getPassword() {
+ return password;
+ }
+
+ public boolean isAvoidTempFiles() {
+ return avoidTempFiles;
+ }
+
public static final class Builder {
private RecordSchema schema;
private List<String> requiredSheets;
@@ -63,6 +73,8 @@ public class ExcelRecordReaderConfiguration {
private String dateFormat;
private String timeFormat;
private String timestampFormat;
+ private String password;
+ private boolean avoidTempFiles;
public Builder withSchema(RecordSchema schema) {
this.schema = schema;
@@ -94,6 +106,16 @@ public class ExcelRecordReaderConfiguration {
return this;
}
+ public Builder withPassword(String password) {
+ this.password = password;
+ return this;
+ }
+
+ public Builder withAvoidTempFiles(boolean avoidTempFiles) {
+ this.avoidTempFiles = avoidTempFiles;
+ return this;
+ }
+
public ExcelRecordReaderConfiguration build() {
ExcelRecordReaderConfiguration excelRecordReaderConfiguration = new ExcelRecordReaderConfiguration();
excelRecordReaderConfiguration.schema = this.schema;
@@ -102,6 +124,9 @@ public class ExcelRecordReaderConfiguration {
excelRecordReaderConfiguration.requiredSheets = this.requiredSheets == null ? Collections.emptyList() : this.requiredSheets;
excelRecordReaderConfiguration.dateFormat = this.dateFormat;
excelRecordReaderConfiguration.firstRow = this.firstRow;
+ excelRecordReaderConfiguration.password = password;
+ excelRecordReaderConfiguration.avoidTempFiles = avoidTempFiles;
+
return excelRecordReaderConfiguration;
}
}
diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordSource.java b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordSource.java
index 9ad00882a4..24f4a9fcff 100644
--- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordSource.java
+++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordSource.java
@@ -35,7 +35,13 @@ public class ExcelRecordSource implements RecordSource<Row> {
final Integer rawFirstRow = context.getProperty(ExcelReader.STARTING_ROW).evaluateAttributeExpressions(variables).asInteger();
final int firstRow = rawFirstRow == null ? NumberUtils.toInt(ExcelReader.STARTING_ROW.getDefaultValue()) : rawFirstRow;
final int zeroBasedFirstRow = ExcelReader.getZeroBasedIndex(firstRow);
- this.rowIterator = new RowIterator(in, requiredSheets, zeroBasedFirstRow, logger);
+ final String password = context.getProperty(ExcelReader.PASSWORD).getValue();
+ final ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder()
+ .withRequiredSheets(requiredSheets)
+ .withFirstRow(zeroBasedFirstRow)
+ .withPassword(password)
+ .build();
+ this.rowIterator = new RowIterator(in, configuration, logger);
}
@Override
diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/RowIterator.java b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/RowIterator.java
index 2b5f4e987a..733697efd1 100644
--- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/RowIterator.java
+++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/RowIterator.java
@@ -41,12 +41,15 @@ class RowIterator implements Iterator<Row>, Closeable {
private Iterator<Row> currentRows;
private Row currentRow;
- RowIterator(final InputStream in, final List<String> requiredSheets, final int firstRow, final ComponentLog logger) {
+ RowIterator(final InputStream in, final ExcelRecordReaderConfiguration configuration, final ComponentLog logger) {
this.workbook = StreamingReader.builder()
.rowCacheSize(100)
.bufferSize(4096)
+ .password(configuration.getPassword())
+ .setAvoidTempFiles(configuration.isAvoidTempFiles())
.open(in);
+ final List<String> requiredSheets = configuration.getRequiredSheets();
if (requiredSheets == null || requiredSheets.isEmpty()) {
this.sheets = this.workbook.iterator();
} else {
@@ -65,7 +68,7 @@ class RowIterator implements Iterator<Row>, Closeable {
.collect(Collectors.toList()).iterator();
}
- this.firstRow = firstRow;
+ this.firstRow = configuration.getFirstRow();
this.logger = logger;
setCurrent();
}
diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelRecordReader.java b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelRecordReader.java
index 6fde3d07f1..6f5a0afeed 100644
--- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelRecordReader.java
+++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelRecordReader.java
@@ -26,6 +26,16 @@ import org.apache.nifi.serialization.record.RecordField;
import org.apache.nifi.serialization.record.RecordFieldType;
import org.apache.nifi.serialization.record.RecordSchema;
import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.poifs.crypt.EncryptionInfo;
+import org.apache.poi.poifs.crypt.EncryptionMode;
+import org.apache.poi.poifs.crypt.Encryptor;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.xssf.usermodel.XSSFSheet;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.params.ParameterizedTest;
@@ -33,7 +43,10 @@ import org.junit.jupiter.params.provider.ValueSource;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
import java.io.InputStream;
+import java.io.OutputStream;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Arrays;
@@ -50,10 +63,60 @@ public class TestExcelRecordReader {
private static final String DATA_FORMATTING_FILE = "dataformatting.xlsx";
private static final String MULTI_SHEET_FILE = "twoSheets.xlsx";
+ private static final String PASSWORD = "nifi";
+ private static final ByteArrayOutputStream PASSWORD_PROTECTED = new ByteArrayOutputStream();
+ private static final Object[][] DATA = {
+ {"ID", "Name"},
+ {1, "Manny"},
+ {2, "Moe"},
+ {3, "Jack"},
+ };
@Mock
ComponentLog logger;
+ @BeforeAll
+ static void setUpBeforeAll() throws Exception {
+ //Generate an Excel file and populate it with data
+ final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
+ try (XSSFWorkbook workbook = new XSSFWorkbook()) {
+ final XSSFSheet sheet = workbook.createSheet("User Info");
+ populateSheet(sheet);
+ workbook.write(outputStream);
+ }
+
+ //Protect the Excel file with a password
+ try (POIFSFileSystem poifsFileSystem = new POIFSFileSystem()) {
+ EncryptionInfo encryptionInfo = new EncryptionInfo(EncryptionMode.agile);
+ Encryptor encryptor = encryptionInfo.getEncryptor();
+ encryptor.confirmPassword(PASSWORD);
+
+ try (OPCPackage opc = OPCPackage.open(new ByteArrayInputStream(outputStream.toByteArray()));
+ OutputStream os = encryptor.getDataStream(poifsFileSystem)) {
+ opc.save(os);
+ }
+ poifsFileSystem.writeFilesystem(PASSWORD_PROTECTED);
+ }
+ }
+
+ private static void populateSheet(XSSFSheet sheet) {
+ //Adding the data to the Excel worksheet
+ int rowCount = 0;
+ for (Object[] dataRow : DATA) {
+ Row row = sheet.createRow(rowCount++);
+ int columnCount = 0;
+
+ for (Object field : dataRow) {
+ Cell cell = row.createCell(columnCount++);
+ if (field instanceof String) {
+ cell.setCellValue((String) field);
+ } else if (field instanceof Integer) {
+ cell.setCellValue((Integer) field);
+ }
+ }
+ }
+ }
+
@Test
public void testNonExcelFile() {
ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder()
@@ -220,4 +283,36 @@ public class TestExcelRecordReader {
assertEquals(7, records.size());
}
+
+ @Test
+ void testPasswordProtected() throws Exception {
+ RecordSchema schema = getPasswordProtectedSchema();
+ ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder()
+ .withSchema(schema)
+ .withPassword(PASSWORD)
+ .withAvoidTempFiles(true)
+ .build();
+
+ InputStream inputStream = new ByteArrayInputStream(PASSWORD_PROTECTED.toByteArray());
+ ExcelRecordReader recordReader = new ExcelRecordReader(configuration, inputStream, logger);
+ List<Record> records = getRecords(recordReader, false, false);
+
+ assertEquals(DATA.length, records.size());
+ }
+
+ @Test
+ void testPasswordProtectedWithoutPassword() {
+ RecordSchema schema = getPasswordProtectedSchema();
+ ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder()
+ .withSchema(schema)
+ .build();
+
+ InputStream inputStream = new ByteArrayInputStream(PASSWORD_PROTECTED.toByteArray());
+ assertThrows(Exception.class, () -> new ExcelRecordReader(configuration, inputStream, logger));
+ }
+
+ private RecordSchema getPasswordProtectedSchema() {
+ return new SimpleRecordSchema(Arrays.asList(new RecordField("id", RecordFieldType.INT.getDataType()),
+ new RecordField("name", RecordFieldType.STRING.getDataType())));
+ }
}