You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by bd...@apache.org on 2007/09/26 07:28:54 UTC

svn commit: r579477 - in /incubator/tika/trunk/src/main/java/org/apache/tika/parser: opendocument/OpenOfficeParser.java xml/XMLParser.java

Author: bdelacretaz
Date: Tue Sep 25 22:28:51 2007
New Revision: 579477

URL: http://svn.apache.org/viewvc?rev=579477&view=rev
Log:
TIKA-32 - remove useless CDATA clauses, and code cleanup - contributed by Keith R. Bennett, thanks!

Modified:
    incubator/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java
    incubator/tika/trunk/src/main/java/org/apache/tika/parser/xml/XMLParser.java

Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java?rev=579477&r1=579476&r2=579477&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java Tue Sep 25 22:28:51 2007
@@ -61,8 +61,8 @@
             xmlMeta = builder.build((InputStream) files.get(1));
             Element rootMeta = xmlMeta.getRootElement();
             Element meta = null;
-            List ls = new ArrayList();
-            if ((ls = rootMeta.getChildren()).size() > 0) {
+            List ls = rootMeta.getChildren();
+            if (! ls.isEmpty()) {
                 meta = (Element) ls.get(0);
             }
             xmlDoc.getRootElement().addContent(meta.detach());
@@ -85,7 +85,7 @@
                 xp.extractContent(xmlDoc, content);
             }
         }
-        return xp.concatOccurance(xmlDoc, "//*", " ");
+        return xp.concatOccurrence(xmlDoc, "//*", " ");
     }
 
     public List unzip(InputStream is) {

Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/xml/XMLParser.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/xml/XMLParser.java?rev=579477&r1=579476&r2=579477&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/parser/xml/XMLParser.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/xml/XMLParser.java Tue Sep 25 22:28:51 2007
@@ -27,12 +27,12 @@
 import org.apache.tika.parser.Parser;
 import org.apache.tika.utils.Utils;
 
+import org.apache.commons.lang.StringUtils;
 import org.apache.log4j.Logger;
 import org.jaxen.JaxenException;
 import org.jaxen.SimpleNamespaceContext;
 import org.jaxen.jdom.JDOMXPath;
 import org.jdom.Attribute;
-import org.jdom.CDATA;
 import org.jdom.Comment;
 import org.jdom.Document;
 import org.jdom.Element;
@@ -60,12 +60,12 @@
                 }
             }
         }
-        return concatOccurance(xmlDoc, "//*", " ");
+        return concatOccurrence(xmlDoc, "//*", " ");
     }
 
-    public String concatOccurance(Object xmlDoc, String xpath, String concatSep) {
+    public String concatOccurrence(Object xmlDoc, String xpath, String concatSep) {
 
-        StringBuffer chaineConcat = new StringBuffer();
+        StringBuilder chaineConcat = new StringBuilder();
         try {
             JDOMXPath xp = new JDOMXPath(xpath);
             xp.setNamespaceContext(nsc);
@@ -75,7 +75,7 @@
             while (i.hasNext()) {
                 j++;
                 String text = "";
-                Object obj = (Object) i.next();
+                Object obj = i.next();
                 if (obj instanceof Element) {
                     Element elem = (Element) obj;
                     text = elem.getText().trim();
@@ -85,9 +85,6 @@
                 } else if (obj instanceof Text) {
                     Text txt = (Text) obj;
                     text = txt.getText().trim();
-                } else if (obj instanceof CDATA) {
-                    CDATA cdata = (CDATA) obj;
-                    text = cdata.getText().trim();
                 } else if (obj instanceof Comment) {
                     Comment com = (Comment) obj;
                     text = com.getText().trim();
@@ -98,15 +95,16 @@
                     EntityRef er = (EntityRef) obj;
                     text = er.toString().trim();
                 }
-                if (text != "") {
+                if (StringUtils.isNotEmpty(text)) {
+                    chaineConcat.append(text);
                     if (ls.size() == 1) {
-                        chaineConcat.append(text);
                         return chaineConcat.toString().trim();
                     } else {
-                        if (ls.size() == j)
-                            chaineConcat.append(text);
-                        else
-                            chaineConcat.append(text + " " + concatSep + " ");
+                        if (ls.size() != j) {
+                            chaineConcat.append(' ')
+                                    .append(concatSep)
+                                    .append(' ');
+                        }
                     }
                 }
             }
@@ -125,8 +123,8 @@
     private boolean exist(List nsLs, String nsUri) {
         if (nsLs.isEmpty())
             return false;
-        for (int i = 0; i < nsLs.size(); i++) {
-            if (((String) nsLs.get(i)).equals(nsUri)) {
+        for (Object nsL : nsLs) {
+            if (nsL.equals(nsUri)) {
                 return true;
             }
         }
@@ -134,7 +132,7 @@
     }
 
     private void processChildren(Element elem, List ns) {
-        Namespace nsCourent = (Namespace) elem.getNamespace();
+        Namespace nsCourent = elem.getNamespace();
         String nsUri = (nsCourent.getURI());
         if (!exist(ns, nsUri)) {
             ns.add(nsUri.trim());
@@ -145,15 +143,15 @@
             copyNsList(additionalNs, ns);
         if (elem.getChildren().size() > 0) {
             List elemChildren = elem.getChildren();
-            for (int i = 0; i < elemChildren.size(); i++) {
-                processChildren((Element) elemChildren.get(i), ns);
+            for (Object anElemChildren : elemChildren) {
+                processChildren((Element) anElemChildren, ns);
             }
         }
     }
 
     private void copyNsList(List nsElem, List nsRes) {
-        for (int i = 0; i < nsElem.size(); i++) {
-            Namespace ns = (Namespace) nsElem.get(i);
+        for (Object aNsElem : nsElem) {
+            Namespace ns = (Namespace) aNsElem;
             nsc.addNamespace(ns.getPrefix(), ns.getURI());
             nsRes.add(ns.getURI().trim());
         }
@@ -171,8 +169,7 @@
                 Object node = nodes.next();
                 if (node instanceof Element) {
                     Element elem = (Element) node;
-                    if (elem.getText().trim() != null
-                            && elem.getText().trim() != "") {
+                    if (StringUtils.isNotBlank(elem.getText())) {
                         values[i] = elem.getText().trim();
                     }
                 } else if (node instanceof Attribute) {
@@ -181,9 +178,6 @@
                 } else if (node instanceof Text) {
                     Text text = (Text) node;
                     values[i] = text.getText();
-                } else if (node instanceof CDATA) {
-                    CDATA cdata = (CDATA) node;
-                    values[i] = cdata.getText();
                 } else if (node instanceof Comment) {
                     Comment com = (Comment) node;
                     values[i] = com.getText();