You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by bd...@apache.org on 2007/09/26 07:28:54 UTC
svn commit: r579477 - in
/incubator/tika/trunk/src/main/java/org/apache/tika/parser:
opendocument/OpenOfficeParser.java xml/XMLParser.java
Author: bdelacretaz
Date: Tue Sep 25 22:28:51 2007
New Revision: 579477
URL: http://svn.apache.org/viewvc?rev=579477&view=rev
Log:
TIKA-32 - remove useless CDATA clauses, and code cleanup - contributed by Keith R. Bennett, thanks!
Modified:
incubator/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java
incubator/tika/trunk/src/main/java/org/apache/tika/parser/xml/XMLParser.java
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java?rev=579477&r1=579476&r2=579477&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java Tue Sep 25 22:28:51 2007
@@ -61,8 +61,8 @@
xmlMeta = builder.build((InputStream) files.get(1));
Element rootMeta = xmlMeta.getRootElement();
Element meta = null;
- List ls = new ArrayList();
- if ((ls = rootMeta.getChildren()).size() > 0) {
+ List ls = rootMeta.getChildren();
+ if (! ls.isEmpty()) {
meta = (Element) ls.get(0);
}
xmlDoc.getRootElement().addContent(meta.detach());
@@ -85,7 +85,7 @@
xp.extractContent(xmlDoc, content);
}
}
- return xp.concatOccurance(xmlDoc, "//*", " ");
+ return xp.concatOccurrence(xmlDoc, "//*", " ");
}
public List unzip(InputStream is) {
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/xml/XMLParser.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/xml/XMLParser.java?rev=579477&r1=579476&r2=579477&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/parser/xml/XMLParser.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/xml/XMLParser.java Tue Sep 25 22:28:51 2007
@@ -27,12 +27,12 @@
import org.apache.tika.parser.Parser;
import org.apache.tika.utils.Utils;
+import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.jaxen.JaxenException;
import org.jaxen.SimpleNamespaceContext;
import org.jaxen.jdom.JDOMXPath;
import org.jdom.Attribute;
-import org.jdom.CDATA;
import org.jdom.Comment;
import org.jdom.Document;
import org.jdom.Element;
@@ -60,12 +60,12 @@
}
}
}
- return concatOccurance(xmlDoc, "//*", " ");
+ return concatOccurrence(xmlDoc, "//*", " ");
}
- public String concatOccurance(Object xmlDoc, String xpath, String concatSep) {
+ public String concatOccurrence(Object xmlDoc, String xpath, String concatSep) {
- StringBuffer chaineConcat = new StringBuffer();
+ StringBuilder chaineConcat = new StringBuilder();
try {
JDOMXPath xp = new JDOMXPath(xpath);
xp.setNamespaceContext(nsc);
@@ -75,7 +75,7 @@
while (i.hasNext()) {
j++;
String text = "";
- Object obj = (Object) i.next();
+ Object obj = i.next();
if (obj instanceof Element) {
Element elem = (Element) obj;
text = elem.getText().trim();
@@ -85,9 +85,6 @@
} else if (obj instanceof Text) {
Text txt = (Text) obj;
text = txt.getText().trim();
- } else if (obj instanceof CDATA) {
- CDATA cdata = (CDATA) obj;
- text = cdata.getText().trim();
} else if (obj instanceof Comment) {
Comment com = (Comment) obj;
text = com.getText().trim();
@@ -98,15 +95,16 @@
EntityRef er = (EntityRef) obj;
text = er.toString().trim();
}
- if (text != "") {
+ if (StringUtils.isNotEmpty(text)) {
+ chaineConcat.append(text);
if (ls.size() == 1) {
- chaineConcat.append(text);
return chaineConcat.toString().trim();
} else {
- if (ls.size() == j)
- chaineConcat.append(text);
- else
- chaineConcat.append(text + " " + concatSep + " ");
+ if (ls.size() != j) {
+ chaineConcat.append(' ')
+ .append(concatSep)
+ .append(' ');
+ }
}
}
}
@@ -125,8 +123,8 @@
private boolean exist(List nsLs, String nsUri) {
if (nsLs.isEmpty())
return false;
- for (int i = 0; i < nsLs.size(); i++) {
- if (((String) nsLs.get(i)).equals(nsUri)) {
+ for (Object nsL : nsLs) {
+ if (nsL.equals(nsUri)) {
return true;
}
}
@@ -134,7 +132,7 @@
}
private void processChildren(Element elem, List ns) {
- Namespace nsCourent = (Namespace) elem.getNamespace();
+ Namespace nsCourent = elem.getNamespace();
String nsUri = (nsCourent.getURI());
if (!exist(ns, nsUri)) {
ns.add(nsUri.trim());
@@ -145,15 +143,15 @@
copyNsList(additionalNs, ns);
if (elem.getChildren().size() > 0) {
List elemChildren = elem.getChildren();
- for (int i = 0; i < elemChildren.size(); i++) {
- processChildren((Element) elemChildren.get(i), ns);
+ for (Object anElemChildren : elemChildren) {
+ processChildren((Element) anElemChildren, ns);
}
}
}
private void copyNsList(List nsElem, List nsRes) {
- for (int i = 0; i < nsElem.size(); i++) {
- Namespace ns = (Namespace) nsElem.get(i);
+ for (Object aNsElem : nsElem) {
+ Namespace ns = (Namespace) aNsElem;
nsc.addNamespace(ns.getPrefix(), ns.getURI());
nsRes.add(ns.getURI().trim());
}
@@ -171,8 +169,7 @@
Object node = nodes.next();
if (node instanceof Element) {
Element elem = (Element) node;
- if (elem.getText().trim() != null
- && elem.getText().trim() != "") {
+ if (StringUtils.isNotBlank(elem.getText())) {
values[i] = elem.getText().trim();
}
} else if (node instanceof Attribute) {
@@ -181,9 +178,6 @@
} else if (node instanceof Text) {
Text text = (Text) node;
values[i] = text.getText();
- } else if (node instanceof CDATA) {
- CDATA cdata = (CDATA) node;
- values[i] = cdata.getText();
} else if (node instanceof Comment) {
Comment com = (Comment) node;
values[i] = com.getText();