You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2007/09/25 00:55:11 UTC
svn commit: r579006 - in
/incubator/tika/trunk/src/main/java/org/apache/tika/parser: ./ html/
msexcel/ mspowerpoint/ msword/ opendocument/ pdf/ rtf/ txt/ xml/
Author: jukka
Date: Mon Sep 24 15:55:03 2007
New Revision: 579006
URL: http://svn.apache.org/viewvc?rev=579006&view=rev
Log:
TIKA-26 - Implemented Parser.getContent(String) in the base class
Modified:
incubator/tika/trunk/src/main/java/org/apache/tika/parser/Parser.java
incubator/tika/trunk/src/main/java/org/apache/tika/parser/html/HtmlParser.java
incubator/tika/trunk/src/main/java/org/apache/tika/parser/msexcel/MsExcelParser.java
incubator/tika/trunk/src/main/java/org/apache/tika/parser/mspowerpoint/MsPowerPointParser.java
incubator/tika/trunk/src/main/java/org/apache/tika/parser/msword/MsWordParser.java
incubator/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java
incubator/tika/trunk/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
incubator/tika/trunk/src/main/java/org/apache/tika/parser/rtf/RTFParser.java
incubator/tika/trunk/src/main/java/org/apache/tika/parser/txt/TXTParser.java
incubator/tika/trunk/src/main/java/org/apache/tika/parser/xml/XMLParser.java
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/Parser.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/Parser.java?rev=579006&r1=579005&r2=579006&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/parser/Parser.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/Parser.java Mon Sep 24 15:55:03 2007
@@ -76,7 +76,14 @@
* It could be a document metadata, XPath selection, regex selection or
* fulltext
*/
- public abstract Content getContent(String name);
+ public final Content getContent(String name) {
+ for (Content content : getContents()) {
+ if (name.equals(content.getName())) {
+ return content;
+ }
+ }
+ return null;
+ }
/**
* Get a List of contents objects, this objects are configured from the
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/html/HtmlParser.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/html/HtmlParser.java?rev=579006&r1=579005&r2=579006&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/parser/html/HtmlParser.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/html/HtmlParser.java Mon Sep 24 15:55:03 2007
@@ -46,22 +46,11 @@
private String contentStr;
- private Map<String, Content> contentsMap;
-
- public Content getContent(String name) {
- if (contentsMap == null || contentsMap.isEmpty()) {
- getContents();
- }
- return contentsMap.get(name);
- }
-
public List<Content> getContents() {
if (contentStr == null) {
contentStr = getStrContent();
}
List<Content> ctt = super.getContents();
- contentsMap = new HashMap<String, Content>();
-
if (ctt == null) {
return new ArrayList<Content>(0);
@@ -91,7 +80,6 @@
logger.error(e.getMessage());
}
}
- contentsMap.put(ct.getName(), ct);
}
return ctt;
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/msexcel/MsExcelParser.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/msexcel/MsExcelParser.java?rev=579006&r1=579005&r2=579006&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/parser/msexcel/MsExcelParser.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/msexcel/MsExcelParser.java Mon Sep 24 15:55:03 2007
@@ -39,23 +39,13 @@
private String contentStr;
- private Map<String, Content> contentsMap;
-
static Logger logger = Logger.getRootLogger();
- public Content getContent(String name) {
- if (contentsMap == null || contentsMap.isEmpty()) {
- getContents();
- }
- return contentsMap.get(name);
- }
-
public List<Content> getContents() {
if (contentStr == null) {
contentStr = getStrContent();
}
List<Content> ctt = super.getContents();
- contentsMap = new HashMap<String, Content>();
Iterator i = ctt.iterator();
while (i.hasNext()) {
Content ct = (Content) i.next();
@@ -76,7 +66,6 @@
logger.error(e.getMessage());
}
}
- contentsMap.put(ct.getName(), ct);
}
return ctt;
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/mspowerpoint/MsPowerPointParser.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/mspowerpoint/MsPowerPointParser.java?rev=579006&r1=579005&r2=579006&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/parser/mspowerpoint/MsPowerPointParser.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/mspowerpoint/MsPowerPointParser.java Mon Sep 24 15:55:03 2007
@@ -39,23 +39,13 @@
private String contentStr;
- private Map<String, Content> contentsMap;
-
static Logger logger = Logger.getRootLogger();
- public Content getContent(String name) {
- if (contentsMap == null || contentsMap.isEmpty()) {
- getContents();
- }
- return contentsMap.get(name);
- }
-
public List<Content> getContents() {
if (contentStr == null) {
contentStr = getStrContent();
}
List<Content> ctt = super.getContents();
- contentsMap = new HashMap<String, Content>();
Iterator i = ctt.iterator();
while (i.hasNext()) {
Content ct = (Content) i.next();
@@ -76,7 +66,6 @@
logger.error(e.getMessage());
}
}
- contentsMap.put(ct.getName(), ct);
}
return ctt;
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/msword/MsWordParser.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/msword/MsWordParser.java?rev=579006&r1=579005&r2=579006&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/parser/msword/MsWordParser.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/msword/MsWordParser.java Mon Sep 24 15:55:03 2007
@@ -40,23 +40,13 @@
private String contentStr;
- private Map<String, Content> contentsMap;
-
static Logger logger = Logger.getRootLogger();
- public Content getContent(String name) {
- if (contentsMap == null || contentsMap.isEmpty()) {
- getContents();
- }
- return contentsMap.get(name);
- }
-
public List<Content> getContents() {
if (contentStr == null) {
contentStr = getStrContent();
}
List<Content> ctt = super.getContents();
- contentsMap = new HashMap<String, Content>();
Iterator i = ctt.iterator();
while (i.hasNext()) {
Content ct = (Content) i.next();
@@ -77,7 +67,6 @@
logger.error(e.getMessage());
}
}
- contentsMap.put(ct.getName(), ct);
}
return ctt;
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java?rev=579006&r1=579005&r2=579006&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java Mon Sep 24 15:55:03 2007
@@ -58,8 +58,6 @@
private org.jdom.Document xmlDoc;
- private Map<String, Content> contentsMap;
-
private String contentStr;
public org.jdom.Document parse(InputStream is) {
@@ -98,12 +96,10 @@
List<String> documentNs = xp.getAllDocumentNs(xmlDoc);
List<Content> ctt = super.getContents();
Iterator it = ctt.iterator();
- contentsMap = new HashMap<String, Content>();
-
while (it.hasNext()) {
Content content = (Content) it.next();
if (content.getXPathSelect() != null) {
- xp.extractContent(xmlDoc, content, contentsMap);
+ xp.extractContent(xmlDoc, content);
} else if (content.getRegexSelect() != null) {
try {
List<String> valuesLs = RegexUtils.extract(contentStr,
@@ -152,13 +148,6 @@
logger.error(e.getMessage());
}
return res;
- }
-
- public Content getContent(String name) {
- if (contentsMap == null || contentsMap.isEmpty()) {
- getContents();
- }
- return contentsMap.get(name);
}
protected void copyInputStream(InputStream in, OutputStream out)
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/pdf/PDFParser.java?rev=579006&r1=579005&r2=579006&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/parser/pdf/PDFParser.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/pdf/PDFParser.java Mon Sep 24 15:55:03 2007
@@ -47,8 +47,6 @@
private PDDocument pdfDocument = null;
- private Map<String, Content> contentsMap;
-
public String getStrContent() {
try {
@@ -86,7 +84,6 @@
contentStr = getStrContent();
}
List<Content> ctt = super.getContents();
- contentsMap = new HashMap<String, Content>();
Iterator i = ctt.iterator();
while (i.hasNext()) {
Content ct = (Content) i.next();
@@ -180,17 +177,9 @@
logger.error(e.getMessage());
}
}
- contentsMap.put(ct.getName(), ct);
}
return ctt;
- }
-
- public Content getContent(String name) {
- if (contentsMap == null || contentsMap.isEmpty()) {
- getContents();
- }
- return contentsMap.get(name);
}
}
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/rtf/RTFParser.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/rtf/RTFParser.java?rev=579006&r1=579005&r2=579006&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/parser/rtf/RTFParser.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/rtf/RTFParser.java Mon Sep 24 15:55:03 2007
@@ -42,23 +42,13 @@
static Logger logger = Logger.getRootLogger();
- private Map<String, Content> contentsMap;
-
private String contentStr;
- public Content getContent(String name) {
- if (contentsMap == null || contentsMap.isEmpty()) {
- getContents();
- }
- return contentsMap.get(name);
- }
-
public List<Content> getContents() {
if (contentStr == null) {
contentStr = getStrContent();
}
List<Content> ctt = super.getContents();
- contentsMap = new HashMap<String, Content>();
Iterator i = ctt.iterator();
while (i.hasNext()) {
Content ct = (Content) i.next();
@@ -79,7 +69,6 @@
logger.error(e.getMessage());
}
}
- contentsMap.put(ct.getName(), ct);
}
return ctt;
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/txt/TXTParser.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/txt/TXTParser.java?rev=579006&r1=579005&r2=579006&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/parser/txt/TXTParser.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/txt/TXTParser.java Mon Sep 24 15:55:03 2007
@@ -41,23 +41,13 @@
static Logger logger = Logger.getRootLogger();
- private Map<String, Content> contentsMap;
-
private String contentStr;
- public Content getContent(String name) {
- if (contentsMap == null || contentsMap.isEmpty()) {
- getContents();
- }
- return contentsMap.get(name);
- }
-
public List<Content> getContents() {
if (contentStr == null) {
contentStr = getStrContent();
}
List<Content> ctt = super.getContents();
- contentsMap = new HashMap<String, Content>();
Iterator i = ctt.iterator();
while (i.hasNext()) {
Content ct = (Content) i.next();
@@ -78,7 +68,6 @@
logger.error(e.getMessage());
}
}
- contentsMap.put(ct.getName(), ct);
}
return ctt;
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/xml/XMLParser.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/xml/XMLParser.java?rev=579006&r1=579005&r2=579006&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/parser/xml/XMLParser.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/xml/XMLParser.java Mon Sep 24 15:55:03 2007
@@ -54,17 +54,8 @@
private SimpleNamespaceContext nsc = new SimpleNamespaceContext();
- private Map<String, Content> contentsMap;
-
private String contentStr;
- public Content getContent(String name) {
- if (contentsMap == null || contentsMap.isEmpty()) {
- getContents();
- }
- return contentsMap.get(name);
- }
-
public String getStrContent() {
if (xmlDoc == null)
xmlDoc = Utils.parse(getInputStream());
@@ -81,12 +72,11 @@
List<String> documentNs = getAllDocumentNs(xmlDoc);
List<Content> ctt = super.getContents();
Iterator it = ctt.iterator();
- contentsMap = new HashMap<String, Content>();
if (exist(documentNs, getNamespace())) {
while (it.hasNext()) {
Content content = (Content) it.next();
if (content.getXPathSelect() != null) {
- extractContent(xmlDoc, content, contentsMap);
+ extractContent(xmlDoc, content);
} else if (content.getRegexSelect() != null) {
try {
List<String> valuesLs = RegexUtils.extract(contentStr,
@@ -201,8 +191,7 @@
}
}
- public void extractContent(Document xmlDoc, Content content,
- Map<String, Content> contentsMap) {
+ public void extractContent(Document xmlDoc, Content content) {
try {
JDOMXPath xp = new JDOMXPath(content.getXPathSelect());
xp.setNamespaceContext(nsc);
@@ -243,7 +232,6 @@
if (values.length > 0) {
content.setValue(values[0]);
content.setValues(values);
- contentsMap.put(content.getName(), content);
}
} catch (JaxenException e) {
logger.error(e.getMessage());