You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2007/09/24 18:29:13 UTC

svn commit: r578871 - in /incubator/tika/trunk: ./ src/main/java/org/apache/tika/config/ src/test/java/org/apache/tika/

Author: jukka
Date: Mon Sep 24 09:29:11 2007
New Revision: 578871

URL: http://svn.apache.org/viewvc?rev=578871&view=rev
Log:
TIKA-21 - Simplified configuration code
    - LiusConfig is now instantiated as: new LiusConfig("config.file");
    - Dropped use of static caching and maps for config objects
    - Made configuration objects immutable (except for Content values)

Modified:
    incubator/tika/trunk/CHANGES.txt
    incubator/tika/trunk/src/main/java/org/apache/tika/config/Content.java
    incubator/tika/trunk/src/main/java/org/apache/tika/config/LiusConfig.java
    incubator/tika/trunk/src/main/java/org/apache/tika/config/ParserConfig.java
    incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java

Modified: incubator/tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/CHANGES.txt?rev=578871&r1=578870&r2=578871&view=diff
==============================================================================
--- incubator/tika/trunk/CHANGES.txt (original)
+++ incubator/tika/trunk/CHANGES.txt Mon Sep 24 09:29:11 2007
@@ -37,4 +37,4 @@
 
 17. TIKA-22 - Remove @author tags from the java source (mattmann)
 
-
+18. TIKA-21 - Simplified configuration code (jukka)

Modified: incubator/tika/trunk/src/main/java/org/apache/tika/config/Content.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/config/Content.java?rev=578871&r1=578870&r2=578871&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/config/Content.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/config/Content.java Mon Sep 24 09:29:11 2007
@@ -16,57 +16,63 @@
  */
 package org.apache.tika.config;
 
+import org.jdom.Element;
+
 /**
- * Content object, used to configure and store data pupulated from XPATH, Regex,
+ * Content object, used to configure and store data populated from XPATH, Regex,
  * Document fulltext, document metadata etc.
- * 
  */
 public class Content {
-    
-    private String name;
+
+    private final String name;
+
+    private final String textSelect;
+
+    private final String xPathSelect;
+
+    private final String regexSelect;
+
     private String value;
+
     private String[] values;
-    private String textSelect;
-    private String xPathSelect;
-    private String regexSelect;
-    
-    
+
+    public Content(Element element) {
+        name = element.getAttributeValue("name");
+        xPathSelect = element.getAttributeValue("xpathSelect");
+        textSelect = element.getAttributeValue("textSelect");
+        regexSelect = element.getChildTextTrim("regexSelect");
+    }
+
     public String getName() {
         return name;
     }
-    public void setName(String name) {
-        this.name = name;
-    }
+
     public String getRegexSelect() {
         return regexSelect;
     }
-    public void setRegexSelect(String regexSelect) {
-        this.regexSelect = regexSelect;
-    }
+
     public String getTextSelect() {
         return textSelect;
     }
-    public void setTextSelect(String select) {
-        this.textSelect = select;
+
+    public String getXPathSelect() {
+        return xPathSelect;
     }
+
     public String getValue() {
         return value;
     }
+
     public void setValue(String value) {
         this.value = value;
     }
-    public String getXPathSelect() {
-        return xPathSelect;
-    }
-    public void setXPathSelect(String pathSelect) {
-        xPathSelect = pathSelect;
-    }
+
     public String[] getValues() {
         return values;
     }
+
     public void setValues(String[] values) {
         this.values = values;
     }
-
 
 }

Modified: incubator/tika/trunk/src/main/java/org/apache/tika/config/LiusConfig.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/config/LiusConfig.java?rev=578871&r1=578870&r2=578871&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/config/LiusConfig.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/config/LiusConfig.java Mon Sep 24 09:29:11 2007
@@ -18,12 +18,9 @@
 
 import java.io.File;
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
 
-import org.apache.log4j.Logger;
 import org.jdom.Document;
 import org.jdom.Element;
 import org.jdom.JDOMException;
@@ -31,128 +28,25 @@
 import org.jdom.xpath.XPath;
 
 /**
- * Parse xml config file. Use cache mecanisme to store multiple configs
- *  
+ * Parse xml config file.
  */
 public class LiusConfig {
 
-    static Logger logger = Logger.getRootLogger();
+    private final Map<String, ParserConfig> configs =
+        new HashMap<String, ParserConfig>();
 
-    private static Map configsCache = new HashMap<String, LiusConfig>();
-
-    private static List<ParserConfig> parsersConfigs;
-
-    private static LiusConfig tc;
-
-    private LiusConfig() {
-    }
-
-    private static String currentFile;
-
-    public static LiusConfig getInstance(String configFile) throws JDOMException,IOException {
-
-        if (configsCache.containsKey(configFile)) {
-            return (LiusConfig) configsCache.get(configFile);
-
-        } else {
-            Document doc = parse(configFile);
-
-            tc = new LiusConfig();
-
-            populateConfig(doc, tc);
-
-            configsCache.put(configFile, tc);
-        }
-        currentFile = configFile;
-        return tc;
-    }
-
-    public List<ParserConfig> getParsersConfigs() {
-        return parsersConfigs;
-    }
-
-    public void setParsersConfigs(List<ParserConfig> parsersConfigs) {
-        this.parsersConfigs = parsersConfigs;
-    }
-
-    public ParserConfig getParserConfig(String mimeType) {
-        ParserConfig pc = null;
-        for (int i = 0; i < parsersConfigs.size(); i++) {
-            if (((ParserConfig) parsersConfigs.get(i)).getMimes().containsKey(
-                    mimeType)) {
-                return (ParserConfig) parsersConfigs.get(i);
+    public LiusConfig(String file) throws JDOMException, IOException {
+        Document document = new SAXBuilder().build(new File(file));
+        for (Object element : XPath.selectNodes(document, "//parser")) {
+            ParserConfig pc = new ParserConfig((Element) element);
+            for (Object child : ((Element) element).getChildren("mime")) {
+                configs.put(((Element) child).getTextTrim(), pc);
             }
         }
-        return pc;
     }
 
-    private static Document parse(String file) throws JDOMException,IOException {
-        org.jdom.Document xmlDoc = new org.jdom.Document();
-        try {
-            SAXBuilder builder = new SAXBuilder();
-            xmlDoc = builder.build(new File(file));
-        } catch (JDOMException jde) {
-            logger.error(jde.getMessage(),jde);
-            throw jde;
-        } catch(IOException ioe) {
-          logger.error(ioe.getMessage(),ioe);
-          throw ioe;
-        }
-        return xmlDoc;
-
-    }
-
-    private static void populateConfig(Document doc, LiusConfig tc) {
-        parsersConfigs = new ArrayList<ParserConfig>();
-        try {
-            List parsersList = XPath.selectNodes(doc, "//parser");
-            for (int i = 0; i < parsersList.size(); i++) {
-                ParserConfig pc = new ParserConfig();
-                Element parserElem = (Element) parsersList.get(i);
-                pc.setName(parserElem.getAttributeValue("name"));
-                pc.setParserClass(parserElem.getAttributeValue("class"));
-                if (parserElem.getChild("namespace") != null) {
-                    pc.setNameSpace(parserElem.getChild("namespace")
-                            .getTextTrim());
-                }
-                Map<String, String> mimes = new HashMap<String, String>();
-                List mimesElems = parserElem.getChildren("mime");
-                for (int j = 0; j < mimesElems.size(); j++) {
-                    String mime = ((Element) mimesElems.get(j)).getTextTrim();
-                    mimes.put(mime, null);
-                }
-                pc.setMimes(mimes);
-                List<Content> contents = new ArrayList<Content>();
-                if (parserElem.getChild("extract") != null) {
-                    List contentsElems = parserElem.getChild("extract")
-                            .getChildren();
-                    for (int j = 0; j < contentsElems.size(); j++) {
-                        Content content = new Content();
-                        Element contentElem = (Element) contentsElems.get(j);
-                        content.setName(contentElem.getAttributeValue("name"));
-                        if (contentElem.getAttribute("xpathSelect") != null) {
-                            content.setXPathSelect(contentElem
-                                    .getAttributeValue("xpathSelect"));
-                        }
-                        if (contentElem.getAttribute("textSelect") != null) {
-                            content.setTextSelect(contentElem
-                                    .getAttributeValue("textSelect"));
-                        }
-                        if (contentElem.getChild("regexSelect") != null) {
-                            content.setRegexSelect(contentElem.getChild(
-                                    "regexSelect").getTextTrim());
-                        }
-                        contents.add(content);
-                    }
-                }
-                pc.setContents(contents);
-                parsersConfigs.add(pc);
-            }
-        } catch (JDOMException e) {
-            logger.error(e.getMessage());
-        }
-        tc.setParsersConfigs(parsersConfigs);
-
+    public ParserConfig getParserConfig(String mimeType) {
+        return configs.get(mimeType);
     }
 
 }

Modified: incubator/tika/trunk/src/main/java/org/apache/tika/config/ParserConfig.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/config/ParserConfig.java?rev=578871&r1=578870&r2=578871&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/config/ParserConfig.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/config/ParserConfig.java Mon Sep 24 09:29:11 2007
@@ -16,63 +16,52 @@
  */
 package org.apache.tika.config;
 
+import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
-import java.util.Map;
+
+import org.jdom.Element;
 
 /**
- * Store all the informations regarding specific parser   
- *   
+ * Store all the informations regarding specific parser
  */
 public class ParserConfig {
 
-    private String name;
-
-    private String parserClass;
-
-    private Map<String, String> mimes;
-
-    private String nameSpace;
+    private final String name;
 
-    private List<Content> contents;
-
-    public List<Content> getContents() {
-        return contents;
-    }
+    private final String parserClass;
 
-    public void setContents(List<Content> contents) {
-        this.contents = contents;
-    }
+    private final String nameSpace;
 
-    public Map<String, String> getMimes() {
-        return mimes;
-    }
+    private final List<Content> contents = new ArrayList<Content>();
+;
 
-    public void setMimes(Map<String, String> mimes) {
-        this.mimes = mimes;
+    public ParserConfig(Element element) {
+        name = element.getAttributeValue("name");
+        parserClass = element.getAttributeValue("class");
+        nameSpace = element.getChildTextTrim("namespace");
+        Element extract = element.getChild("extract");
+        if (extract != null) {
+            for (Object child : extract.getChildren()) {
+                contents.add(new Content((Element) child));
+            }
+        }
     }
 
     public String getName() {
         return name;
     }
 
-    public void setName(String name) {
-        this.name = name;
-    }
-
     public String getNameSpace() {
         return nameSpace;
     }
 
-    public void setNameSpace(String nameSpace) {
-        this.nameSpace = nameSpace;
-    }
-
     public String getParserClass() {
         return parserClass;
     }
 
-    public void setParserClass(String parserClass) {
-        this.parserClass = parserClass;
+    public List<Content> getContents() {
+        return Collections.unmodifiableList(contents);
     }
 
 }

Modified: incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java?rev=578871&r1=578870&r2=578871&view=diff
==============================================================================
--- incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java (original)
+++ incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java Mon Sep 24 09:29:11 2007
@@ -59,7 +59,7 @@
         final String log4jPropertiesFilename = "target/classes/log4j/log4j.properties";
         testFilesBaseDir = new File("src/test/resources/test-documents");
         
-        tc = LiusConfig.getInstance(liusConfigFilename);
+        tc = new LiusConfig(liusConfigFilename);
 
         LiusLogger.setLoggerConfigFile(log4jPropertiesFilename);