You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@metamodel.apache.org by ar...@apache.org on 2019/12/10 13:17:16 UTC
[metamodel] 07/10: MM-82 Detect Column Types - Refactoring based on
comments in PR 3
This is an automated email from the ASF dual-hosted git repository.
arjansh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/metamodel.git
commit f65d06fee73e62ee92f0f8e76f28f0cf150f96e8
Author: Gerard Dellemann <g....@quadient.com>
AuthorDate: Mon Dec 9 15:08:40 2019 +0100
MM-82 Detect Column Types - Refactoring based on comments in PR 3
---
.../excel/DefaultSpreadsheetReaderDelegate.java | 212 +++++++++++----------
.../org/apache/metamodel/excel/ExcelUtils.java | 135 +++++--------
2 files changed, 157 insertions(+), 190 deletions(-)
diff --git a/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java b/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java
index 3672de1..98cdefe 100644
--- a/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java
+++ b/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java
@@ -225,7 +225,7 @@ final class DefaultSpreadsheetReaderDelegate implements SpreadsheetReaderDelegat
// Now we need the first data row
row = findTheFirstNonEmptyRow(iterator);
if (row != null) {
- detectColumnTypes(row, iterator, columnTypes);
+ new ColumnTypeScanner(sheet).detectColumnTypes(row, iterator, columnTypes);
}
} else {
if (_configuration.getColumnNameLineNumber() == ExcelConfiguration.NO_COLUMN_NAME_LINE) {
@@ -247,109 +247,6 @@ final class DefaultSpreadsheetReaderDelegate implements SpreadsheetReaderDelegat
return null;
}
- private void detectColumnTypes(final Row firstRow, final Iterator<Row> dataRowIterator,
- final ColumnType[] columnTypes) {
- detectColumnTypesFirstRow(firstRow, columnTypes);
- detectColumnTypesOtherRows(dataRowIterator, columnTypes);
-
- // If all cells are null, then this loop sets the column type to the default
- for (int i = 0; i < columnTypes.length; i++) {
- if (columnTypes[i] == null) {
- columnTypes[i] = DEFAULT_COLUMN_TYPE;
- }
- }
- }
-
- private void detectColumnTypesFirstRow(final Row firstRow, final ColumnType[] columnTypes) {
- if (firstRow != null && firstRow.getLastCellNum() > 0) {
- for (int i = getColumnOffset(firstRow); i < columnTypes.length; i++) {
- if (firstRow.getCell(i) != null) {
- columnTypes[i] = determineColumnTypeFromCell(firstRow.getCell(i));
- }
- }
- }
- }
-
- private void detectColumnTypesOtherRows(final Iterator<Row> dataRowIterator, final ColumnType[] columnTypes) {
- int numberOfLinesToScan = _configuration.getNumberOfLinesToScan() - 1;
-
- while (dataRowIterator.hasNext() && numberOfLinesToScan-- > 0) {
- final Row currentRow = dataRowIterator.next();
- if (currentRow != null && currentRow.getLastCellNum() > 0) {
- for (int i = getColumnOffset(currentRow); i < columnTypes.length; i++) {
- final ColumnType detectNewColumnType = detectNewColumnTypeCell(columnTypes[i], currentRow
- .getCell(i));
- if (detectNewColumnType != null) {
- columnTypes[i] = detectNewColumnType;
- }
- }
- }
- }
- }
-
- /**
- * Tries to detect a new {@link ColumnType} for a cell.
- * @param currentColumnType
- * @param cell
- * @return Returns a new {@link ColumnType} when detected. Otherwise null is returned.
- */
- private static ColumnType detectNewColumnTypeCell(final ColumnType currentColumnType, final Cell cell) {
- // Can't detect something new if it's already on the default.
- if (currentColumnType != null && currentColumnType.equals(DEFAULT_COLUMN_TYPE)) {
- return null;
- }
- // Skip if the cell is null. This way 1 missing cell can't influence the column type of all other cells.
- if (cell == null) {
- return null;
- }
-
- final ColumnType detectedColumnType = determineColumnTypeFromCell(cell);
- if (currentColumnType == null) {
- return detectedColumnType;
- } else if (!currentColumnType.equals(detectedColumnType)) {
- // If the column type is Double and a Integer is detected, then don't set it to Integer
- if (currentColumnType.equals(ColumnType.INTEGER) && detectedColumnType.equals(ColumnType.DOUBLE)) {
- // If the column type is Integer and a Double is detected, then set it to Double
- return detectedColumnType;
- } else if (currentColumnType.equals(ColumnType.DOUBLE) && detectedColumnType.equals(ColumnType.INTEGER)) {
- return null;
- } else {
- return DEFAULT_COLUMN_TYPE;
- }
- }
- return null;
- }
-
- private static ColumnType determineColumnTypeFromCell(final Cell cell) {
- switch (cell.getCellType()) {
- case NUMERIC:
- if (DateUtil.isCellDateFormatted(cell)) {
- return ColumnType.DATE;
- } else {
- return cell.getNumericCellValue() % 1 == 0 ? ColumnType.INTEGER : ColumnType.DOUBLE;
- }
- case BOOLEAN:
- return ColumnType.BOOLEAN;
- case FORMULA:
- FormulaEvaluator formulaEvaluator = cell
- .getSheet()
- .getWorkbook()
- .getCreationHelper()
- .createFormulaEvaluator();
- return determineColumnTypeFromCell(formulaEvaluator.evaluateInCell(cell));
- case STRING:
- // fall through
- case BLANK:
- // fall through
- case _NONE:
- // fall through
- case ERROR:
- // fall through
- default:
- return DEFAULT_COLUMN_TYPE;
- }
- }
-
/**
* Builds columns based on row/cell values.
*
@@ -400,4 +297,111 @@ final class DefaultSpreadsheetReaderDelegate implements SpreadsheetReaderDelegat
}
return offset;
}
+
+ private class ColumnTypeScanner {
+ final FormulaEvaluator formulaEvaluator;
+
+ ColumnTypeScanner(final Sheet sheet) {
+ formulaEvaluator = sheet.getWorkbook().getCreationHelper().createFormulaEvaluator();
+ }
+
+ private void detectColumnTypes(final Row firstRow, final Iterator<Row> dataRowIterator,
+ final ColumnType[] columnTypes) {
+ detectColumnTypesFirstRow(firstRow, columnTypes);
+ detectColumnTypesOtherRows(dataRowIterator, columnTypes);
+
+ // If all cells are null, then this loop sets the column type to the default
+ for (int i = 0; i < columnTypes.length; i++) {
+ if (columnTypes[i] == null) {
+ columnTypes[i] = DEFAULT_COLUMN_TYPE;
+ }
+ }
+ }
+
+ private void detectColumnTypesFirstRow(final Row firstRow, final ColumnType[] columnTypes) {
+ if (firstRow != null && firstRow.getLastCellNum() > 0) {
+ for (int i = getColumnOffset(firstRow); i < columnTypes.length; i++) {
+ if (firstRow.getCell(i) != null) {
+ columnTypes[i] = determineColumnTypeFromCell(firstRow.getCell(i));
+ }
+ }
+ }
+ }
+
+ private void detectColumnTypesOtherRows(final Iterator<Row> dataRowIterator, final ColumnType[] columnTypes) {
+ int numberOfLinesToScan = _configuration.getNumberOfLinesToScan() - 1;
+
+ while (dataRowIterator.hasNext() && numberOfLinesToScan-- > 0) {
+ final Row currentRow = dataRowIterator.next();
+ if (currentRow != null && currentRow.getLastCellNum() > 0) {
+ for (int i = getColumnOffset(currentRow); i < columnTypes.length; i++) {
+ final ColumnType detectNewColumnType = detectNewColumnTypeCell(columnTypes[i], currentRow
+ .getCell(i));
+ if (detectNewColumnType != null) {
+ columnTypes[i] = detectNewColumnType;
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Tries to detect a new {@link ColumnType} for a cell.
+ * @param currentColumnType
+ * @param cell
+ * @return Returns a new {@link ColumnType} when detected. Otherwise null is returned.
+ */
+ private ColumnType detectNewColumnTypeCell(final ColumnType currentColumnType, final Cell cell) {
+ // Can't detect something new if it's already on the default.
+ if (currentColumnType != null && currentColumnType.equals(DEFAULT_COLUMN_TYPE)) {
+ return null;
+ }
+ // Skip if the cell is null. This way 1 missing cell can't influence the column type of all other cells.
+ if (cell == null) {
+ return null;
+ }
+
+ final ColumnType detectedColumnType = determineColumnTypeFromCell(cell);
+ if (currentColumnType == null) {
+ return detectedColumnType;
+ } else if (!currentColumnType.equals(detectedColumnType)) {
+ // If the column type is Double and a Integer is detected, then don't set it to Integer
+ if (currentColumnType.equals(ColumnType.INTEGER) && detectedColumnType.equals(ColumnType.DOUBLE)) {
+ // If the column type is Integer and a Double is detected, then set it to Double
+ return detectedColumnType;
+ } else if (currentColumnType.equals(ColumnType.DOUBLE) && detectedColumnType
+ .equals(ColumnType.INTEGER)) {
+ return null;
+ } else {
+ return DEFAULT_COLUMN_TYPE;
+ }
+ }
+ return null;
+ }
+
+ private ColumnType determineColumnTypeFromCell(final Cell cell) {
+ switch (cell.getCellType()) {
+ case NUMERIC:
+ if (DateUtil.isCellDateFormatted(cell)) {
+ return ColumnType.DATE;
+ } else {
+ return cell.getNumericCellValue() % 1 == 0 ? ColumnType.INTEGER : ColumnType.DOUBLE;
+ }
+ case BOOLEAN:
+ return ColumnType.BOOLEAN;
+ case FORMULA:
+ return determineColumnTypeFromCell(formulaEvaluator.evaluateInCell(cell));
+ case STRING:
+ // fall through
+ case BLANK:
+ // fall through
+ case _NONE:
+ // fall through
+ case ERROR:
+ // fall through
+ default:
+ return DEFAULT_COLUMN_TYPE;
+ }
+ }
+ }
}
diff --git a/excel/src/main/java/org/apache/metamodel/excel/ExcelUtils.java b/excel/src/main/java/org/apache/metamodel/excel/ExcelUtils.java
index a86a646..c242f9b 100644
--- a/excel/src/main/java/org/apache/metamodel/excel/ExcelUtils.java
+++ b/excel/src/main/java/org/apache/metamodel/excel/ExcelUtils.java
@@ -46,7 +46,6 @@ import org.apache.metamodel.util.Resource;
import org.apache.poi.hssf.usermodel.HSSFFont;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hssf.util.HSSFColor;
-import org.apache.poi.ss.formula.FormulaParseException;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellStyle;
@@ -186,27 +185,7 @@ final class ExcelUtils {
result = Boolean.toString(cell.getBooleanCellValue());
break;
case ERROR:
- String errorResult;
- try {
- byte errorCode = cell.getErrorCellValue();
- FormulaError formulaError = FormulaError.forInt(errorCode);
- errorResult = formulaError.getString();
- } catch (RuntimeException e) {
- logger
- .debug("Getting error code for ({},{}) failed!: {}", cell.getRowIndex(), cell.getColumnIndex(),
- e.getMessage());
- if (cell instanceof XSSFCell) {
- // hack to get error string, which is available
- String value = ((XSSFCell) cell).getErrorCellString();
- errorResult = value;
- } else {
- logger
- .error("Couldn't handle unexpected error scenario in cell: (" + cell.getRowIndex() + ","
- + cell.getColumnIndex() + ")", e);
- throw e;
- }
- }
- result = errorResult;
+ result = (String) getErrorResult(cell);
break;
case FORMULA:
result = getFormulaCellValue(wb, cell);
@@ -251,24 +230,7 @@ final class ExcelUtils {
result = Boolean.valueOf(cell.getBooleanCellValue());
break;
case ERROR:
- String errorResult;
- try {
- errorResult = FormulaError.forInt(cell.getErrorCellValue()).getString();
- } catch (final RuntimeException e) {
- logger
- .debug("Getting error code for ({},{}) failed!: {}", cell.getRowIndex(), cell.getColumnIndex(),
- e.getMessage());
- if (cell instanceof XSSFCell) {
- // hack to get error string, which is available
- errorResult = ((XSSFCell) cell).getErrorCellString();
- } else {
- logger
- .error("Couldn't handle unexpected error scenario in cell: (" + cell.getRowIndex() + ","
- + cell.getColumnIndex() + ")", e);
- throw e;
- }
- }
- result = errorResult;
+ result = getErrorResult(cell);
break;
case FORMULA:
result = getFormulaCellValueAsObject(workbook, cell);
@@ -292,6 +254,27 @@ final class ExcelUtils {
return result;
}
+ private static Object getErrorResult(final Cell cell) {
+ String errorResult;
+ try {
+ errorResult = FormulaError.forInt(cell.getErrorCellValue()).getString();
+ } catch (final RuntimeException e) {
+ logger
+ .debug("Getting error code for ({},{}) failed!: {}", cell.getRowIndex(), cell.getColumnIndex(), e
+ .getMessage());
+ if (cell instanceof XSSFCell) {
+ // hack to get error string, which is available
+ errorResult = ((XSSFCell) cell).getErrorCellString();
+ } else {
+ logger
+ .error("Couldn't handle unexpected error scenario in cell: ({},{})", cell.getRowIndex(), cell
+ .getColumnIndex());
+ throw e;
+ }
+ }
+ return errorResult;
+ }
+
private static Object getCellValueChecked(final Workbook workbook, final Cell cell,
final ColumnType expectedColumnType) {
final Object value = getCellValueAsObject(workbook, cell);
@@ -303,16 +286,15 @@ final class ExcelUtils {
if (!(value.getClass().equals(Integer.class) && expectedColumnType
.getJavaEquivalentClass()
.equals(Double.class)) && logger.isWarnEnabled()) {
- final String warning = String
- .format("Cell (%s,%s) has the value '%s' of data type '%s', which doesn't match the detected "
- + "column's data type '%s'. This cell gets value NULL in the DataSet.", cell.getRowIndex(),
+ logger
+ .warn("Cell ({},{}) has the value '{}' of data type '{}', which doesn't match the detected "
+ + "column's data type '{}'. This cell gets value NULL in the DataSet.", cell.getRowIndex(),
cell.getColumnIndex(), value, value.getClass().getSimpleName(), expectedColumnType);
- logger.warn(warning);
}
return null;
}
- private static String getFormulaCellValue(Workbook wb, Cell cell) {
+ private static String getFormulaCellValue(Workbook workbook, Cell cell) {
// first try with a cached/precalculated value
try {
double numericCellValue = cell.getNumericCellValue();
@@ -324,36 +306,13 @@ final class ExcelUtils {
}
// evaluate cell first, if possible
- try {
- if (logger.isInfoEnabled()) {
- logger
- .info("cell ({},{}) is a formula. Attempting to evaluate: {}", cell.getRowIndex(), cell
- .getColumnIndex(), cell.getCellFormula());
- }
-
- final FormulaEvaluator evaluator = wb.getCreationHelper().createFormulaEvaluator();
-
- // calculates the formula and puts it's value back into the cell
- final Cell evaluatedCell = evaluator.evaluateInCell(cell);
-
- return getCellValue(wb, evaluatedCell);
- } catch (RuntimeException e) {
- logger
- .warn("Exception occurred while evaluating formula at position ({},{}): {}", cell.getRowIndex(),
- cell.getColumnIndex(), e.getMessage());
- // Some exceptions we simply log - result will be then be the
- // actual formula
- if (e instanceof FormulaParseException) {
- logger.warn("Parse exception occurred while evaluating cell formula: " + cell, e);
- } else if (e instanceof IllegalArgumentException) {
- logger.error("Illegal formula argument occurred while evaluating cell formula: " + cell, e);
- } else {
- logger.error("Unexpected exception occurred while evaluating cell formula: " + cell, e);
- }
+ final Cell evaluatedCell = getEvaluatedCell(workbook, cell);
+ if (evaluatedCell != null) {
+ return getCellValue(workbook, evaluatedCell);
+ } else {
+ // last resort: return the string formula
+ return cell.getCellFormula();
}
-
- // last resort: return the string formula
- return cell.getCellFormula();
}
private static Object getFormulaCellValueAsObject(final Workbook workbook, final Cell cell) {
@@ -367,6 +326,16 @@ final class ExcelUtils {
}
// evaluate cell first, if possible
+ final Cell evaluatedCell = getEvaluatedCell(workbook, cell);
+ if (evaluatedCell != null) {
+ return getCellValueAsObject(workbook, evaluatedCell);
+ } else {
+ // last resort: return the string formula
+ return cell.getCellFormula();
+ }
+ }
+
+ private static Cell getEvaluatedCell(final Workbook workbook, final Cell cell) {
try {
if (logger.isInfoEnabled()) {
logger
@@ -377,19 +346,13 @@ final class ExcelUtils {
final FormulaEvaluator evaluator = workbook.getCreationHelper().createFormulaEvaluator();
// calculates the formula and puts it's value back into the cell
- final Cell evaluatedCell = evaluator.evaluateInCell(cell);
-
- return getCellValueAsObject(workbook, evaluatedCell);
- } catch (final FormulaParseException e) {
- logger.warn("Parse exception occurred while evaluating cell formula: " + cell, e);
- } catch (final IllegalArgumentException e) {
- logger.error("Illegal formula argument occurred while evaluating cell formula: " + cell, e);
- } catch (final RuntimeException e) {
- logger.error("Unexpected exception occurred while evaluating cell formula: " + cell, e);
+ return evaluator.evaluateInCell(cell);
+ } catch (RuntimeException e) {
+ logger
+ .warn("Exception occurred while evaluating formula at position ({},{}): {}", cell.getRowIndex(),
+ cell.getColumnIndex(), e.getMessage());
}
-
- // last resort: return the string formula
- return cell.getCellFormula();
+ return null;
}
private static Number getDoubleAsNumber(final double value) {