You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/06/24 14:20:08 UTC

tika git commit: TIKA-2019 -- clean up -- move state variables to inner classes, convert protected to package private, add @Override on parse

Repository: tika
Updated Branches:
  refs/heads/master 81279a1e0 -> 2031de70c


TIKA-2019 -- clean up -- move state variables to inner classes, convert protected to package private, add @Override on parse


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/2031de70
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/2031de70
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/2031de70

Branch: refs/heads/master
Commit: 2031de70c117fdabf793008fe22dd9c97c82d2c9
Parents: 81279a1
Author: tballison <ta...@mitre.org>
Authored: Fri Jun 24 10:19:59 2016 -0400
Committer: tballison <ta...@mitre.org>
Committed: Fri Jun 24 10:19:59 2016 -0400

----------------------------------------------------------------------
 .../microsoft/xml/AbstractXML2003Parser.java    | 45 ++++++++++----------
 .../microsoft/xml/SpreadsheetMLParser.java      |  3 +-
 .../tika/parser/microsoft/xml/WordMLParser.java |  8 ++--
 3 files changed, 28 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/2031de70/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/AbstractXML2003Parser.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/AbstractXML2003Parser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/AbstractXML2003Parser.java
index 637b4d6..4e05d0e 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/AbstractXML2003Parser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/AbstractXML2003Parser.java
@@ -43,31 +43,31 @@ import java.io.InputStream;
 
 public abstract class AbstractXML2003Parser extends AbstractParser {
 
-    protected final static String MS_OFFICE_PROPERTIES_URN = "urn:schemas-microsoft-com:office:office";
-    protected final static String MS_DOC_PROPERTIES_URN = "urn:schemas-microsoft-com:office:office";
-    protected final static String MS_SPREADSHEET_URN = "urn:schemas-microsoft-com:office:spreadsheet";
-    protected final static String WORD_ML_URL = "http://schemas.microsoft.com/office/word/2003/wordml";
-    protected final static Attributes EMPTY_ATTRS = new AttributesImpl();
+    final static String MS_OFFICE_PROPERTIES_URN = "urn:schemas-microsoft-com:office:office";
+    final static String MS_DOC_PROPERTIES_URN = "urn:schemas-microsoft-com:office:office";
+    final static String MS_SPREADSHEET_URN = "urn:schemas-microsoft-com:office:spreadsheet";
+    final static String WORD_ML_URL = "http://schemas.microsoft.com/office/word/2003/wordml";
+    final static Attributes EMPTY_ATTRS = new AttributesImpl();
 
-    protected final static String DOCUMENT_PROPERTIES = "DocumentProperties";
-    protected final static String PICT = "pict";
-    protected final static String BIN_DATA = "binData";
+    final static String DOCUMENT_PROPERTIES = "DocumentProperties";
+    final static String PICT = "pict";
+    final static String BIN_DATA = "binData";
 
-    protected final static String A = "a";
-    protected final static String BODY = "body";
-    protected final static String CDATA = "cdata";
-    protected final static String DIV = "div";
-    protected final static String HREF = "href";
-    protected final static String IMG = "img";
-    protected final static String P = "p";
-    protected final static String TD = "td";
-    protected final static String TR = "tr";
-    protected final static String TABLE = "table";
-    protected final static String TBODY = "tbody";
+    final static String A = "a";
+    final static String BODY = "body";
+    final static String CDATA = "cdata";
+    final static String DIV = "div";
+    final static String HREF = "href";
+    final static String IMG = "img";
+    final static String P = "p";
+    final static String TD = "td";
+    final static String TR = "tr";
+    final static String TABLE = "table";
+    final static String TBODY = "tbody";
 
-    protected final static String HLINK = "hlink";
-    protected final static String HLINK_DEST = "dest";
-    protected final static String NAME_ATTR = "name";
+    final static String HLINK = "hlink";
+    final static String HLINK_DEST = "dest";
+    final static String NAME_ATTR = "name";
 
 
     private static ContentHandler getMSPropertiesHandler(
@@ -77,6 +77,7 @@ public abstract class AbstractXML2003Parser extends AbstractParser {
                 metadata, property);
     }
 
+    @Override
     public void parse(
             InputStream stream, ContentHandler handler,
             Metadata metadata, ParseContext context)

http://git-wip-us.apache.org/repos/asf/tika/blob/2031de70/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/SpreadsheetMLParser.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/SpreadsheetMLParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/SpreadsheetMLParser.java
index 0cf7520..c442453 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/SpreadsheetMLParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/SpreadsheetMLParser.java
@@ -52,8 +52,6 @@ public class SpreadsheetMLParser extends AbstractXML2003Parser {
             Collections.unmodifiableSet(new HashSet<>(Arrays.asList(
                     MEDIA_TYPE)));
 
-    private boolean inBody = false;
-
     @Override
     public Set<MediaType> getSupportedTypes(ParseContext context) {
         return SUPPORTED_TYPES;
@@ -78,6 +76,7 @@ public class SpreadsheetMLParser extends AbstractXML2003Parser {
         StringBuilder buffer = new StringBuilder();
         String href = null;
         boolean inData = false;
+        private boolean inBody = false;
 
         public ExcelMLHandler(ContentHandler handler) {
             this.handler = handler;

http://git-wip-us.apache.org/repos/asf/tika/blob/2031de70/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/WordMLParser.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/WordMLParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/WordMLParser.java
index 6bd51da..16b8c46 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/WordMLParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/WordMLParser.java
@@ -62,7 +62,6 @@ public class WordMLParser extends AbstractXML2003Parser {
     private static final Set<MediaType> SUPPORTED_TYPES =
             Collections.unmodifiableSet(new HashSet<>(Arrays.asList(
                     MEDIA_TYPE)));
-    private boolean inBody = false;
 
     static {
         WORDML_TO_XHTML.put(P, P);
@@ -107,6 +106,7 @@ public class WordMLParser extends AbstractXML2003Parser {
     private class WordMLHandler extends DefaultHandler {
         private final ContentHandler handler;
         private boolean ignoreCharacters;
+        private boolean inBody = false;
 
         //use inP to keep track of whether the handler is
         //in a paragraph or not. <p><p></p></p> was allowed
@@ -128,7 +128,7 @@ public class WordMLParser extends AbstractXML2003Parser {
                 }
                 String html = WORDML_TO_XHTML.get(localName);
                 if (html != null) {
-                    if ("p".equals(localName)) {
+                    if (P.equals(localName)) {
                         //close p if already in a p to prevent nested <p>
                         if (inP) {
                             handler.endElement(XHTMLContentHandler.XHTML, P, P);
@@ -165,13 +165,13 @@ public class WordMLParser extends AbstractXML2003Parser {
                     if (html.equals(TABLE)) {
                         handler.endElement(XHTMLContentHandler.XHTML, TBODY, TBODY);
                     }
-                    if ("p".equals(html) && !inP) {
+                    if (P.equals(html) && !inP) {
                         //start p if not already in one to prevent non-matching <p>
                         handler.startElement(XHTMLContentHandler.XHTML, P, P, EMPTY_ATTRS);
                     }
                     handler.endElement(XHTMLContentHandler.XHTML, html, html);
 
-                    if ("p".equals(html)) {
+                    if (P.equals(html)) {
                         inP = false;
                     }
                 }