You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2008/03/26 22:04:45 UTC

svn commit: r641575 - /incubator/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java

Author: jukka
Date: Wed Mar 26 14:04:41 2008
New Revision: 641575

URL: http://svn.apache.org/viewvc?rev=641575&view=rev
Log:
TIKA-132: Refactor Excel extractor to parse per sheet and add hyperlink support
    - Removed the insideWorksheet flag
    - Improved javadocs
    - Extracted PointComparator to an explicit utility class

Modified:
    incubator/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java

Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java?rev=641575&r1=641574&r2=641575&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java Wed Mar 26 14:04:41 2008
@@ -170,21 +170,23 @@
         private SAXException exception = null;
 
         private SSTRecord sstRecord;
+
+        /**
+         * List of worksheet names.
+         */
         private List<String> sheetNames = new ArrayList<String>();
-        private short currentSheetIndex;
 
-        private boolean insideWorksheet = false;
+        /**
+         * Index of the current worksheet within the workbook.
+         * Used to find the worksheet name in the {@link #sheetNames} list.
+         */
+        private short currentSheetIndex;
 
-        private SortedMap<Point, Cell> currentSheet =
-            new TreeMap<Point, Cell>(new Comparator<Point> () {
-                public int compare(Point a, Point b) {
-                    int diff = a.y - b.y;
-                    if (diff == 0) {
-                        diff = a.x - b.x;
-                    }
-                    return diff;
-                }
-            });
+        /**
+         * Content of the current worksheet, or <code>null</code> if no
+         * worksheet is currently active.
+         */
+        private SortedMap<Point, Cell> currentSheet = null;
 
         /**
          * Contstruct a new listener instance outputting parsed data to
@@ -228,16 +230,16 @@
                     currentSheetIndex = -1;
                 } else if (bof.getType() == BOFRecord.TYPE_WORKSHEET) {
                     currentSheetIndex++;
-                    currentSheet.clear();
-                    insideWorksheet = true;
+                    currentSheet =
+                        new TreeMap<Point, Cell>(new PointComparator());
                 }
                 break;
 
             case EOFRecord.sid: // end of workbook, worksheet etc. records
-                if (insideWorksheet && !currentSheet.isEmpty()) {
+                if (currentSheet != null && !currentSheet.isEmpty()) {
                     processSheet();
                 }
-                insideWorksheet = false;
+                currentSheet = null;
                 break;
 
             case BoundSheetRecord.sid: // Worksheet index record
@@ -277,12 +279,14 @@
 
             // FIXME - requires POI release
             // case HyperlinkRecord.sid: // holds a URL associated with a cell
-            //     HyperlinkRecord link = (HyperlinkRecord) record;
-            //     Point point =
-            //         new Point(link.getFirstColumn(), link.getFirstRow());
-            //     Cell cell = currentSheet.get(point);
-            //     if (cell != null) {
-            //         addCell(record, new LinkedCell(cell, link.getAddress()));
+            //     if (currentSheet != null) {
+            //         HyperlinkRecord link = (HyperlinkRecord) record;
+            //         Point point =
+            //             new Point(link.getFirstColumn(), link.getFirstRow());
+            //         Cell cell = currentSheet.get(point);
+            //         if (cell != null) {
+            //             addCell(record, new LinkedCell(cell, link.getAddress()));
+            //         }
             //     }
             //     break;
             }
@@ -296,7 +300,7 @@
          * @param cell cell value (or <code>null</code>)
          */
         private void addCell(Record record, Cell cell) {
-            if (!insideWorksheet) {
+            if (currentSheet == null) {
                 // Ignore cells outside sheets
             } else if (cell == null) {
                 // Ignore empty cells
@@ -373,6 +377,21 @@
             handler.endElement("div");
             handler.characters("\n");
         }
+    }
+
+    /**
+     * Utility comparator for points.
+     */
+    private static class PointComparator implements Comparator<Point> {
+
+        public int compare(Point a, Point b) {
+            int diff = a.y - b.y;
+            if (diff == 0) {
+                diff = a.x - b.x;
+            }
+            return diff;
+        }
+
     }
 
 }