You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2007/09/24 18:29:13 UTC
svn commit: r578871 - in /incubator/tika/trunk: ./
src/main/java/org/apache/tika/config/ src/test/java/org/apache/tika/
Author: jukka
Date: Mon Sep 24 09:29:11 2007
New Revision: 578871
URL: http://svn.apache.org/viewvc?rev=578871&view=rev
Log:
TIKA-21 - Simplified configuration code
- LiusConfig is now instantiated as: new LiusConfig("config.file");
- Dropped use of static caching and maps for config objects
- Made configuration objects immutable (except for Content values)
Modified:
incubator/tika/trunk/CHANGES.txt
incubator/tika/trunk/src/main/java/org/apache/tika/config/Content.java
incubator/tika/trunk/src/main/java/org/apache/tika/config/LiusConfig.java
incubator/tika/trunk/src/main/java/org/apache/tika/config/ParserConfig.java
incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java
Modified: incubator/tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/CHANGES.txt?rev=578871&r1=578870&r2=578871&view=diff
==============================================================================
--- incubator/tika/trunk/CHANGES.txt (original)
+++ incubator/tika/trunk/CHANGES.txt Mon Sep 24 09:29:11 2007
@@ -37,4 +37,4 @@
17. TIKA-22 - Remove @author tags from the java source (mattmann)
-
+18. TIKA-21 - Simplified configuration code (jukka)
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/config/Content.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/config/Content.java?rev=578871&r1=578870&r2=578871&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/config/Content.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/config/Content.java Mon Sep 24 09:29:11 2007
@@ -16,57 +16,63 @@
*/
package org.apache.tika.config;
+import org.jdom.Element;
+
/**
- * Content object, used to configure and store data pupulated from XPATH, Regex,
+ * Content object, used to configure and store data populated from XPATH, Regex,
* Document fulltext, document metadata etc.
- *
*/
public class Content {
-
- private String name;
+
+ private final String name;
+
+ private final String textSelect;
+
+ private final String xPathSelect;
+
+ private final String regexSelect;
+
private String value;
+
private String[] values;
- private String textSelect;
- private String xPathSelect;
- private String regexSelect;
-
-
+
+ public Content(Element element) {
+ name = element.getAttributeValue("name");
+ xPathSelect = element.getAttributeValue("xpathSelect");
+ textSelect = element.getAttributeValue("textSelect");
+ regexSelect = element.getChildTextTrim("regexSelect");
+ }
+
public String getName() {
return name;
}
- public void setName(String name) {
- this.name = name;
- }
+
public String getRegexSelect() {
return regexSelect;
}
- public void setRegexSelect(String regexSelect) {
- this.regexSelect = regexSelect;
- }
+
public String getTextSelect() {
return textSelect;
}
- public void setTextSelect(String select) {
- this.textSelect = select;
+
+ public String getXPathSelect() {
+ return xPathSelect;
}
+
public String getValue() {
return value;
}
+
public void setValue(String value) {
this.value = value;
}
- public String getXPathSelect() {
- return xPathSelect;
- }
- public void setXPathSelect(String pathSelect) {
- xPathSelect = pathSelect;
- }
+
public String[] getValues() {
return values;
}
+
public void setValues(String[] values) {
this.values = values;
}
-
}
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/config/LiusConfig.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/config/LiusConfig.java?rev=578871&r1=578870&r2=578871&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/config/LiusConfig.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/config/LiusConfig.java Mon Sep 24 09:29:11 2007
@@ -18,12 +18,9 @@
import java.io.File;
import java.io.IOException;
-import java.util.ArrayList;
import java.util.HashMap;
-import java.util.List;
import java.util.Map;
-import org.apache.log4j.Logger;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
@@ -31,128 +28,25 @@
import org.jdom.xpath.XPath;
/**
- * Parse xml config file. Use cache mecanisme to store multiple configs
- *
+ * Parse xml config file.
*/
public class LiusConfig {
- static Logger logger = Logger.getRootLogger();
+ private final Map<String, ParserConfig> configs =
+ new HashMap<String, ParserConfig>();
- private static Map configsCache = new HashMap<String, LiusConfig>();
-
- private static List<ParserConfig> parsersConfigs;
-
- private static LiusConfig tc;
-
- private LiusConfig() {
- }
-
- private static String currentFile;
-
- public static LiusConfig getInstance(String configFile) throws JDOMException,IOException {
-
- if (configsCache.containsKey(configFile)) {
- return (LiusConfig) configsCache.get(configFile);
-
- } else {
- Document doc = parse(configFile);
-
- tc = new LiusConfig();
-
- populateConfig(doc, tc);
-
- configsCache.put(configFile, tc);
- }
- currentFile = configFile;
- return tc;
- }
-
- public List<ParserConfig> getParsersConfigs() {
- return parsersConfigs;
- }
-
- public void setParsersConfigs(List<ParserConfig> parsersConfigs) {
- this.parsersConfigs = parsersConfigs;
- }
-
- public ParserConfig getParserConfig(String mimeType) {
- ParserConfig pc = null;
- for (int i = 0; i < parsersConfigs.size(); i++) {
- if (((ParserConfig) parsersConfigs.get(i)).getMimes().containsKey(
- mimeType)) {
- return (ParserConfig) parsersConfigs.get(i);
+ public LiusConfig(String file) throws JDOMException, IOException {
+ Document document = new SAXBuilder().build(new File(file));
+ for (Object element : XPath.selectNodes(document, "//parser")) {
+ ParserConfig pc = new ParserConfig((Element) element);
+ for (Object child : ((Element) element).getChildren("mime")) {
+ configs.put(((Element) child).getTextTrim(), pc);
}
}
- return pc;
}
- private static Document parse(String file) throws JDOMException,IOException {
- org.jdom.Document xmlDoc = new org.jdom.Document();
- try {
- SAXBuilder builder = new SAXBuilder();
- xmlDoc = builder.build(new File(file));
- } catch (JDOMException jde) {
- logger.error(jde.getMessage(),jde);
- throw jde;
- } catch(IOException ioe) {
- logger.error(ioe.getMessage(),ioe);
- throw ioe;
- }
- return xmlDoc;
-
- }
-
- private static void populateConfig(Document doc, LiusConfig tc) {
- parsersConfigs = new ArrayList<ParserConfig>();
- try {
- List parsersList = XPath.selectNodes(doc, "//parser");
- for (int i = 0; i < parsersList.size(); i++) {
- ParserConfig pc = new ParserConfig();
- Element parserElem = (Element) parsersList.get(i);
- pc.setName(parserElem.getAttributeValue("name"));
- pc.setParserClass(parserElem.getAttributeValue("class"));
- if (parserElem.getChild("namespace") != null) {
- pc.setNameSpace(parserElem.getChild("namespace")
- .getTextTrim());
- }
- Map<String, String> mimes = new HashMap<String, String>();
- List mimesElems = parserElem.getChildren("mime");
- for (int j = 0; j < mimesElems.size(); j++) {
- String mime = ((Element) mimesElems.get(j)).getTextTrim();
- mimes.put(mime, null);
- }
- pc.setMimes(mimes);
- List<Content> contents = new ArrayList<Content>();
- if (parserElem.getChild("extract") != null) {
- List contentsElems = parserElem.getChild("extract")
- .getChildren();
- for (int j = 0; j < contentsElems.size(); j++) {
- Content content = new Content();
- Element contentElem = (Element) contentsElems.get(j);
- content.setName(contentElem.getAttributeValue("name"));
- if (contentElem.getAttribute("xpathSelect") != null) {
- content.setXPathSelect(contentElem
- .getAttributeValue("xpathSelect"));
- }
- if (contentElem.getAttribute("textSelect") != null) {
- content.setTextSelect(contentElem
- .getAttributeValue("textSelect"));
- }
- if (contentElem.getChild("regexSelect") != null) {
- content.setRegexSelect(contentElem.getChild(
- "regexSelect").getTextTrim());
- }
- contents.add(content);
- }
- }
- pc.setContents(contents);
- parsersConfigs.add(pc);
- }
- } catch (JDOMException e) {
- logger.error(e.getMessage());
- }
- tc.setParsersConfigs(parsersConfigs);
-
+ public ParserConfig getParserConfig(String mimeType) {
+ return configs.get(mimeType);
}
}
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/config/ParserConfig.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/config/ParserConfig.java?rev=578871&r1=578870&r2=578871&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/config/ParserConfig.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/config/ParserConfig.java Mon Sep 24 09:29:11 2007
@@ -16,63 +16,52 @@
*/
package org.apache.tika.config;
+import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
-import java.util.Map;
+
+import org.jdom.Element;
/**
- * Store all the informations regarding specific parser
- *
+ * Store all the informations regarding specific parser
*/
public class ParserConfig {
- private String name;
-
- private String parserClass;
-
- private Map<String, String> mimes;
-
- private String nameSpace;
+ private final String name;
- private List<Content> contents;
-
- public List<Content> getContents() {
- return contents;
- }
+ private final String parserClass;
- public void setContents(List<Content> contents) {
- this.contents = contents;
- }
+ private final String nameSpace;
- public Map<String, String> getMimes() {
- return mimes;
- }
+ private final List<Content> contents = new ArrayList<Content>();
+;
- public void setMimes(Map<String, String> mimes) {
- this.mimes = mimes;
+ public ParserConfig(Element element) {
+ name = element.getAttributeValue("name");
+ parserClass = element.getAttributeValue("class");
+ nameSpace = element.getChildTextTrim("namespace");
+ Element extract = element.getChild("extract");
+ if (extract != null) {
+ for (Object child : extract.getChildren()) {
+ contents.add(new Content((Element) child));
+ }
+ }
}
public String getName() {
return name;
}
- public void setName(String name) {
- this.name = name;
- }
-
public String getNameSpace() {
return nameSpace;
}
- public void setNameSpace(String nameSpace) {
- this.nameSpace = nameSpace;
- }
-
public String getParserClass() {
return parserClass;
}
- public void setParserClass(String parserClass) {
- this.parserClass = parserClass;
+ public List<Content> getContents() {
+ return Collections.unmodifiableList(contents);
}
}
Modified: incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java?rev=578871&r1=578870&r2=578871&view=diff
==============================================================================
--- incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java (original)
+++ incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java Mon Sep 24 09:29:11 2007
@@ -59,7 +59,7 @@
final String log4jPropertiesFilename = "target/classes/log4j/log4j.properties";
testFilesBaseDir = new File("src/test/resources/test-documents");
- tc = LiusConfig.getInstance(liusConfigFilename);
+ tc = new LiusConfig(liusConfigFilename);
LiusLogger.setLoggerConfigFile(log4jPropertiesFilename);