You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@metamodel.apache.org by ar...@apache.org on 2019/12/10 13:17:16 UTC

[metamodel] 07/10: MM-82 Detect Column Types - Refactoring based on comments in PR 3

This is an automated email from the ASF dual-hosted git repository.

arjansh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/metamodel.git

commit f65d06fee73e62ee92f0f8e76f28f0cf150f96e8
Author: Gerard Dellemann <g....@quadient.com>
AuthorDate: Mon Dec 9 15:08:40 2019 +0100

    MM-82 Detect Column Types - Refactoring based on comments in PR 3
---
 .../excel/DefaultSpreadsheetReaderDelegate.java    | 212 +++++++++++----------
 .../org/apache/metamodel/excel/ExcelUtils.java     | 135 +++++--------
 2 files changed, 157 insertions(+), 190 deletions(-)

diff --git a/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java b/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java
index 3672de1..98cdefe 100644
--- a/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java
+++ b/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java
@@ -225,7 +225,7 @@ final class DefaultSpreadsheetReaderDelegate implements SpreadsheetReaderDelegat
             // Now we need the first data row
             row = findTheFirstNonEmptyRow(iterator);
             if (row != null) {
-                detectColumnTypes(row, iterator, columnTypes);
+                new ColumnTypeScanner(sheet).detectColumnTypes(row, iterator, columnTypes);
             }
         } else {
             if (_configuration.getColumnNameLineNumber() == ExcelConfiguration.NO_COLUMN_NAME_LINE) {
@@ -247,109 +247,6 @@ final class DefaultSpreadsheetReaderDelegate implements SpreadsheetReaderDelegat
         return null;
     }
 
-    private void detectColumnTypes(final Row firstRow, final Iterator<Row> dataRowIterator,
-            final ColumnType[] columnTypes) {
-        detectColumnTypesFirstRow(firstRow, columnTypes);
-        detectColumnTypesOtherRows(dataRowIterator, columnTypes);
-
-        // If all cells are null, then this loop sets the column type to the default
-        for (int i = 0; i < columnTypes.length; i++) {
-            if (columnTypes[i] == null) {
-                columnTypes[i] = DEFAULT_COLUMN_TYPE;
-            }
-        }
-    }
-
-    private void detectColumnTypesFirstRow(final Row firstRow, final ColumnType[] columnTypes) {
-        if (firstRow != null && firstRow.getLastCellNum() > 0) {
-            for (int i = getColumnOffset(firstRow); i < columnTypes.length; i++) {
-                if (firstRow.getCell(i) != null) {
-                    columnTypes[i] = determineColumnTypeFromCell(firstRow.getCell(i));
-                }
-            }
-        }
-    }
-
-    private void detectColumnTypesOtherRows(final Iterator<Row> dataRowIterator, final ColumnType[] columnTypes) {
-        int numberOfLinesToScan = _configuration.getNumberOfLinesToScan() - 1;
-
-        while (dataRowIterator.hasNext() && numberOfLinesToScan-- > 0) {
-            final Row currentRow = dataRowIterator.next();
-            if (currentRow != null && currentRow.getLastCellNum() > 0) {
-                for (int i = getColumnOffset(currentRow); i < columnTypes.length; i++) {
-                    final ColumnType detectNewColumnType = detectNewColumnTypeCell(columnTypes[i], currentRow
-                            .getCell(i));
-                    if (detectNewColumnType != null) {
-                        columnTypes[i] = detectNewColumnType;
-                    }
-                }
-            }
-        }
-    }
-
-    /**
-     * Tries to detect a new {@link ColumnType} for a cell.
-     * @param currentColumnType
-     * @param cell
-     * @return Returns a new {@link ColumnType} when detected. Otherwise null is returned.
-     */
-    private static ColumnType detectNewColumnTypeCell(final ColumnType currentColumnType, final Cell cell) {
-        // Can't detect something new if it's already on the default.
-        if (currentColumnType != null && currentColumnType.equals(DEFAULT_COLUMN_TYPE)) {
-            return null;
-        }
-        // Skip if the cell is null. This way 1 missing cell can't influence the column type of all other cells.
-        if (cell == null) {
-            return null;
-        }
-
-        final ColumnType detectedColumnType = determineColumnTypeFromCell(cell);
-        if (currentColumnType == null) {
-            return detectedColumnType;
-        } else if (!currentColumnType.equals(detectedColumnType)) {
-            // If the column type is Double and a Integer is detected, then don't set it to Integer
-            if (currentColumnType.equals(ColumnType.INTEGER) && detectedColumnType.equals(ColumnType.DOUBLE)) {
-                // If the column type is Integer and a Double is detected, then set it to Double
-                return detectedColumnType;
-            } else if (currentColumnType.equals(ColumnType.DOUBLE) && detectedColumnType.equals(ColumnType.INTEGER)) {
-                return null;
-            } else {
-                return DEFAULT_COLUMN_TYPE;
-            }
-        }
-        return null;
-    }
-
-    private static ColumnType determineColumnTypeFromCell(final Cell cell) {
-        switch (cell.getCellType()) {
-        case NUMERIC:
-            if (DateUtil.isCellDateFormatted(cell)) {
-                return ColumnType.DATE;
-            } else {
-                return cell.getNumericCellValue() % 1 == 0 ? ColumnType.INTEGER : ColumnType.DOUBLE;
-            }
-        case BOOLEAN:
-            return ColumnType.BOOLEAN;
-        case FORMULA:
-            FormulaEvaluator formulaEvaluator = cell
-                    .getSheet()
-                    .getWorkbook()
-                    .getCreationHelper()
-                    .createFormulaEvaluator();
-            return determineColumnTypeFromCell(formulaEvaluator.evaluateInCell(cell));
-        case STRING:
-            // fall through
-        case BLANK:
-            // fall through
-        case _NONE:
-            // fall through
-        case ERROR:
-            // fall through
-        default:
-            return DEFAULT_COLUMN_TYPE;
-        }
-    }
-
     /**
      * Builds columns based on row/cell values.
      * 
@@ -400,4 +297,111 @@ final class DefaultSpreadsheetReaderDelegate implements SpreadsheetReaderDelegat
         }
         return offset;
     }
+    
+    private class ColumnTypeScanner {
+        final FormulaEvaluator formulaEvaluator;
+
+        ColumnTypeScanner(final Sheet sheet) {
+            formulaEvaluator = sheet.getWorkbook().getCreationHelper().createFormulaEvaluator();
+        }
+
+        private void detectColumnTypes(final Row firstRow, final Iterator<Row> dataRowIterator,
+                final ColumnType[] columnTypes) {
+            detectColumnTypesFirstRow(firstRow, columnTypes);
+            detectColumnTypesOtherRows(dataRowIterator, columnTypes);
+
+            // If all cells are null, then this loop sets the column type to the default
+            for (int i = 0; i < columnTypes.length; i++) {
+                if (columnTypes[i] == null) {
+                    columnTypes[i] = DEFAULT_COLUMN_TYPE;
+                }
+            }
+        }
+
+        private void detectColumnTypesFirstRow(final Row firstRow, final ColumnType[] columnTypes) {
+            if (firstRow != null && firstRow.getLastCellNum() > 0) {
+                for (int i = getColumnOffset(firstRow); i < columnTypes.length; i++) {
+                    if (firstRow.getCell(i) != null) {
+                        columnTypes[i] = determineColumnTypeFromCell(firstRow.getCell(i));
+                    }
+                }
+            }
+        }
+
+        private void detectColumnTypesOtherRows(final Iterator<Row> dataRowIterator, final ColumnType[] columnTypes) {
+            int numberOfLinesToScan = _configuration.getNumberOfLinesToScan() - 1;
+
+            while (dataRowIterator.hasNext() && numberOfLinesToScan-- > 0) {
+                final Row currentRow = dataRowIterator.next();
+                if (currentRow != null && currentRow.getLastCellNum() > 0) {
+                    for (int i = getColumnOffset(currentRow); i < columnTypes.length; i++) {
+                        final ColumnType detectNewColumnType = detectNewColumnTypeCell(columnTypes[i], currentRow
+                                .getCell(i));
+                        if (detectNewColumnType != null) {
+                            columnTypes[i] = detectNewColumnType;
+                        }
+                    }
+                }
+            }
+        }
+
+        /**
+         * Tries to detect a new {@link ColumnType} for a cell.
+         * @param currentColumnType
+         * @param cell
+         * @return Returns a new {@link ColumnType} when detected. Otherwise null is returned.
+         */
+        private ColumnType detectNewColumnTypeCell(final ColumnType currentColumnType, final Cell cell) {
+            // Can't detect something new if it's already on the default.
+            if (currentColumnType != null && currentColumnType.equals(DEFAULT_COLUMN_TYPE)) {
+                return null;
+            }
+            // Skip if the cell is null. This way 1 missing cell can't influence the column type of all other cells.
+            if (cell == null) {
+                return null;
+            }
+
+            final ColumnType detectedColumnType = determineColumnTypeFromCell(cell);
+            if (currentColumnType == null) {
+                return detectedColumnType;
+            } else if (!currentColumnType.equals(detectedColumnType)) {
+                // If the column type is Double and a Integer is detected, then don't set it to Integer
+                if (currentColumnType.equals(ColumnType.INTEGER) && detectedColumnType.equals(ColumnType.DOUBLE)) {
+                    // If the column type is Integer and a Double is detected, then set it to Double
+                    return detectedColumnType;
+                } else if (currentColumnType.equals(ColumnType.DOUBLE) && detectedColumnType
+                        .equals(ColumnType.INTEGER)) {
+                    return null;
+                } else {
+                    return DEFAULT_COLUMN_TYPE;
+                }
+            }
+            return null;
+        }
+
+        private ColumnType determineColumnTypeFromCell(final Cell cell) {
+            switch (cell.getCellType()) {
+            case NUMERIC:
+                if (DateUtil.isCellDateFormatted(cell)) {
+                    return ColumnType.DATE;
+                } else {
+                    return cell.getNumericCellValue() % 1 == 0 ? ColumnType.INTEGER : ColumnType.DOUBLE;
+                }
+            case BOOLEAN:
+                return ColumnType.BOOLEAN;
+            case FORMULA:
+                return determineColumnTypeFromCell(formulaEvaluator.evaluateInCell(cell));
+            case STRING:
+                // fall through
+            case BLANK:
+                // fall through
+            case _NONE:
+                // fall through
+            case ERROR:
+                // fall through
+            default:
+                return DEFAULT_COLUMN_TYPE;
+            }
+        }
+    }
 }
diff --git a/excel/src/main/java/org/apache/metamodel/excel/ExcelUtils.java b/excel/src/main/java/org/apache/metamodel/excel/ExcelUtils.java
index a86a646..c242f9b 100644
--- a/excel/src/main/java/org/apache/metamodel/excel/ExcelUtils.java
+++ b/excel/src/main/java/org/apache/metamodel/excel/ExcelUtils.java
@@ -46,7 +46,6 @@ import org.apache.metamodel.util.Resource;
 import org.apache.poi.hssf.usermodel.HSSFFont;
 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
 import org.apache.poi.hssf.util.HSSFColor;
-import org.apache.poi.ss.formula.FormulaParseException;
 import org.apache.poi.ss.usermodel.BuiltinFormats;
 import org.apache.poi.ss.usermodel.Cell;
 import org.apache.poi.ss.usermodel.CellStyle;
@@ -186,27 +185,7 @@ final class ExcelUtils {
             result = Boolean.toString(cell.getBooleanCellValue());
             break;
         case ERROR:
-            String errorResult;
-            try {
-                byte errorCode = cell.getErrorCellValue();
-                FormulaError formulaError = FormulaError.forInt(errorCode);
-                errorResult = formulaError.getString();
-            } catch (RuntimeException e) {
-                logger
-                        .debug("Getting error code for ({},{}) failed!: {}", cell.getRowIndex(), cell.getColumnIndex(),
-                                e.getMessage());
-                if (cell instanceof XSSFCell) {
-                    // hack to get error string, which is available
-                    String value = ((XSSFCell) cell).getErrorCellString();
-                    errorResult = value;
-                } else {
-                    logger
-                            .error("Couldn't handle unexpected error scenario in cell: (" + cell.getRowIndex() + ","
-                                    + cell.getColumnIndex() + ")", e);
-                    throw e;
-                }
-            }
-            result = errorResult;
+            result = (String) getErrorResult(cell);
             break;
         case FORMULA:
             result = getFormulaCellValue(wb, cell);
@@ -251,24 +230,7 @@ final class ExcelUtils {
             result = Boolean.valueOf(cell.getBooleanCellValue());
             break;
         case ERROR:
-            String errorResult;
-            try {
-                errorResult = FormulaError.forInt(cell.getErrorCellValue()).getString();
-            } catch (final RuntimeException e) {
-                logger
-                        .debug("Getting error code for ({},{}) failed!: {}", cell.getRowIndex(), cell.getColumnIndex(),
-                                e.getMessage());
-                if (cell instanceof XSSFCell) {
-                    // hack to get error string, which is available
-                    errorResult = ((XSSFCell) cell).getErrorCellString();
-                } else {
-                    logger
-                            .error("Couldn't handle unexpected error scenario in cell: (" + cell.getRowIndex() + ","
-                                    + cell.getColumnIndex() + ")", e);
-                    throw e;
-                }
-            }
-            result = errorResult;
+            result = getErrorResult(cell);
             break;
         case FORMULA:
             result = getFormulaCellValueAsObject(workbook, cell);
@@ -292,6 +254,27 @@ final class ExcelUtils {
         return result;
     }
 
+    private static Object getErrorResult(final Cell cell) {
+        String errorResult;
+        try {
+            errorResult = FormulaError.forInt(cell.getErrorCellValue()).getString();
+        } catch (final RuntimeException e) {
+            logger
+                    .debug("Getting error code for ({},{}) failed!: {}", cell.getRowIndex(), cell.getColumnIndex(), e
+                            .getMessage());
+            if (cell instanceof XSSFCell) {
+                // hack to get error string, which is available
+                errorResult = ((XSSFCell) cell).getErrorCellString();
+            } else {
+                logger
+                        .error("Couldn't handle unexpected error scenario in cell: ({},{})", cell.getRowIndex(), cell
+                                .getColumnIndex());
+                throw e;
+            }
+        }
+        return errorResult;
+    }
+
     private static Object getCellValueChecked(final Workbook workbook, final Cell cell,
             final ColumnType expectedColumnType) {
         final Object value = getCellValueAsObject(workbook, cell);
@@ -303,16 +286,15 @@ final class ExcelUtils {
         if (!(value.getClass().equals(Integer.class) && expectedColumnType
                 .getJavaEquivalentClass()
                 .equals(Double.class)) && logger.isWarnEnabled()) {
-            final String warning = String
-                    .format("Cell (%s,%s) has the value '%s' of data type '%s', which doesn't match the detected "
-                            + "column's data type '%s'. This cell gets value NULL in the DataSet.", cell.getRowIndex(),
+            logger
+                    .warn("Cell ({},{}) has the value '{}' of data type '{}', which doesn't match the detected "
+                            + "column's data type '{}'. This cell gets value NULL in the DataSet.", cell.getRowIndex(),
                             cell.getColumnIndex(), value, value.getClass().getSimpleName(), expectedColumnType);
-            logger.warn(warning);
         }
         return null;
     }
 
-    private static String getFormulaCellValue(Workbook wb, Cell cell) {
+    private static String getFormulaCellValue(Workbook workbook, Cell cell) {
         // first try with a cached/precalculated value
         try {
             double numericCellValue = cell.getNumericCellValue();
@@ -324,36 +306,13 @@ final class ExcelUtils {
         }
 
         // evaluate cell first, if possible
-        try {
-            if (logger.isInfoEnabled()) {
-                logger
-                        .info("cell ({},{}) is a formula. Attempting to evaluate: {}", cell.getRowIndex(), cell
-                                .getColumnIndex(), cell.getCellFormula());
-            }
-
-            final FormulaEvaluator evaluator = wb.getCreationHelper().createFormulaEvaluator();
-
-            // calculates the formula and puts it's value back into the cell
-            final Cell evaluatedCell = evaluator.evaluateInCell(cell);
-
-            return getCellValue(wb, evaluatedCell);
-        } catch (RuntimeException e) {
-            logger
-                    .warn("Exception occurred while evaluating formula at position ({},{}): {}", cell.getRowIndex(),
-                            cell.getColumnIndex(), e.getMessage());
-            // Some exceptions we simply log - result will be then be the
-            // actual formula
-            if (e instanceof FormulaParseException) {
-                logger.warn("Parse exception occurred while evaluating cell formula: " + cell, e);
-            } else if (e instanceof IllegalArgumentException) {
-                logger.error("Illegal formula argument occurred while evaluating cell formula: " + cell, e);
-            } else {
-                logger.error("Unexpected exception occurred while evaluating cell formula: " + cell, e);
-            }
+        final Cell evaluatedCell = getEvaluatedCell(workbook, cell);
+        if (evaluatedCell != null) {
+            return getCellValue(workbook, evaluatedCell);
+        } else {
+            // last resort: return the string formula
+            return cell.getCellFormula();
         }
-
-        // last resort: return the string formula
-        return cell.getCellFormula();
     }
 
     private static Object getFormulaCellValueAsObject(final Workbook workbook, final Cell cell) {
@@ -367,6 +326,16 @@ final class ExcelUtils {
         }
 
         // evaluate cell first, if possible
+        final Cell evaluatedCell = getEvaluatedCell(workbook, cell);
+        if (evaluatedCell != null) {
+            return getCellValueAsObject(workbook, evaluatedCell);
+        } else {
+            // last resort: return the string formula
+            return cell.getCellFormula();
+        }
+    }
+
+    private static Cell getEvaluatedCell(final Workbook workbook, final Cell cell) {
         try {
             if (logger.isInfoEnabled()) {
                 logger
@@ -377,19 +346,13 @@ final class ExcelUtils {
             final FormulaEvaluator evaluator = workbook.getCreationHelper().createFormulaEvaluator();
 
             // calculates the formula and puts it's value back into the cell
-            final Cell evaluatedCell = evaluator.evaluateInCell(cell);
-
-            return getCellValueAsObject(workbook, evaluatedCell);
-        } catch (final FormulaParseException e) {
-            logger.warn("Parse exception occurred while evaluating cell formula: " + cell, e);
-        } catch (final IllegalArgumentException e) {
-            logger.error("Illegal formula argument occurred while evaluating cell formula: " + cell, e);
-        } catch (final RuntimeException e) {
-            logger.error("Unexpected exception occurred while evaluating cell formula: " + cell, e);
+            return evaluator.evaluateInCell(cell);
+        } catch (RuntimeException e) {
+            logger
+                    .warn("Exception occurred while evaluating formula at position ({},{}): {}", cell.getRowIndex(),
+                            cell.getColumnIndex(), e.getMessage());
         }
-
-        // last resort: return the string formula
-        return cell.getCellFormula();
+        return null;
     }
 
     private static Number getDoubleAsNumber(final double value) {