You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2010/10/21 13:43:37 UTC

svn commit: r1025960 - in /nutch/trunk: ./ src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/ src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/ src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilte...

Author: ab
Date: Thu Oct 21 11:43:37 2010
New Revision: 1025960

URL: http://svn.apache.org/viewvc?rev=1025960&view=rev
Log:
NUTCH-921 Reduce dependency of Nutch on config files.

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexURLFilterBase.java
    nutch/trunk/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/AutomatonURLFilter.java
    nutch/trunk/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/DomainURLFilter.java
    nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java
    nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java
    nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
    nutch/trunk/src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/RegexURLNormalizer.java
    nutch/trunk/src/plugin/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java

Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1025960&r1=1025959&r2=1025960&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Oct 21 11:43:37 2010
@@ -72,6 +72,8 @@ Release 2.0 - Current Development
 
 * NUTCH-832 Website menu has lots of broken links - in particular the API docs (Alex McLintock via mattmann)
 
+* NUTCH-921 Reduce dependency of Nutch on config files (ab)
+
 
 Release 1.1 - 2010-06-06
 

Modified: nutch/trunk/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexURLFilterBase.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexURLFilterBase.java?rev=1025960&r1=1025959&r2=1025960&view=diff
==============================================================================
--- nutch/trunk/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexURLFilterBase.java (original)
+++ nutch/trunk/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexURLFilterBase.java Thu Oct 21 11:43:37 2010
@@ -17,11 +17,13 @@
 package org.apache.nutch.urlfilter.api;
 
 // JDK imports
+import java.io.File;
 import java.io.Reader;
 import java.io.FileReader;
 import java.io.BufferedReader;
 import java.io.InputStreamReader;
 import java.io.IOException;
+import java.io.StringReader;
 import java.util.List;
 import java.util.ArrayList;
 
@@ -74,10 +76,21 @@ public abstract class RegexURLFilterBase
    * Constructs a new RegexURLFilter and init it with a file of rules.
    * @param filename is the name of rules file.
    */
-  public RegexURLFilterBase(String filename)
+  public RegexURLFilterBase(File filename)
     throws IOException, IllegalArgumentException {
     this(new FileReader(filename));
   }
+  
+  /**
+   * Constructs a new RegexURLFilter and inits it with a list of rules.
+   * @param rules string with a list of rules, one rule per line
+   * @throws IOException
+   * @throws IllegalArgumentException
+   */
+  public RegexURLFilterBase(String rules) throws IOException,
+      IllegalArgumentException {
+    this(new StringReader(rules));
+  }
 
   /**
    * Constructs a new RegexURLFilter and init it with a Reader of rules.
@@ -85,7 +98,7 @@ public abstract class RegexURLFilterBase
    */
   protected RegexURLFilterBase(Reader reader)
     throws IOException, IllegalArgumentException {
-    rules = readRulesFile(reader);
+    rules = readRules(reader);
   }
   
   /**
@@ -102,9 +115,9 @@ public abstract class RegexURLFilterBase
    * Returns the name of the file of rules to use for
    * a particular implementation.
    * @param conf is the current configuration.
-   * @return the name of the file of rules to use.
+   * @return the name of the resource containing the rules to use.
    */
-  protected abstract String getRulesFile(Configuration conf);
+  protected abstract Reader getRulesReader(Configuration conf) throws IOException;
   
   
   /* -------------------------- *
@@ -132,18 +145,18 @@ public abstract class RegexURLFilterBase
   
   public void setConf(Configuration conf) {
     this.conf = conf;
-    String file = getRulesFile(conf);
-    Reader reader = conf.getConfResourceAsReader(file);
-    if (reader == null) {
-      if (LOG.isErrorEnabled()) { LOG.error("Can't find resource: " + file); }
-    } else {
-      try {
-        rules = readRulesFile(reader);
-      } catch (IOException e) {
-        if (LOG.isErrorEnabled()) { LOG.error(e.getMessage()); }
-        //TODO mb@media-style.com: throw Exception? Because broken api.
-        throw new RuntimeException(e.getMessage(), e);
-      }
+    Reader reader = null;
+    try {
+      reader = getRulesReader(conf);
+    } catch (Exception e) {
+      if (LOG.isErrorEnabled()) { LOG.error(e.getMessage()); }
+      throw new RuntimeException(e.getMessage(), e);      
+    }
+    try {
+      rules = readRules(reader);
+    } catch (IOException e) {
+      if (LOG.isErrorEnabled()) { LOG.error(e.getMessage()); }
+      throw new RuntimeException(e.getMessage(), e);
     }
   }
 
@@ -161,7 +174,7 @@ public abstract class RegexURLFilterBase
    * @param reader is a reader of regular expressions rules.
    * @return the corresponding {@RegexRule rules}.
    */
-  private RegexRule[] readRulesFile(Reader reader)
+  private RegexRule[] readRules(Reader reader)
     throws IOException, IllegalArgumentException {
 
     BufferedReader in = new BufferedReader(reader);

Modified: nutch/trunk/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/AutomatonURLFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/AutomatonURLFilter.java?rev=1025960&r1=1025959&r2=1025960&view=diff
==============================================================================
--- nutch/trunk/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/AutomatonURLFilter.java (original)
+++ nutch/trunk/src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/AutomatonURLFilter.java Thu Oct 21 11:43:37 2010
@@ -19,6 +19,7 @@ package org.apache.nutch.urlfilter.autom
 // JDK imports
 import java.io.Reader;
 import java.io.IOException;
+import java.io.StringReader;
 import java.util.regex.PatternSyntaxException;
 
 // Hadoop imports
@@ -41,6 +42,8 @@ import org.apache.nutch.urlfilter.api.Re
  * @see <a href="http://www.brics.dk/automaton/">dk.brics.automaton</a>
  */
 public class AutomatonURLFilter extends RegexURLFilterBase {
+  public static final String URLFILTER_AUTOMATON_FILE = "urlfilter.automaton.file";
+  public static final String URLFILTER_AUTOMATON_RULES = "urlfilter.automaton.rules";
 
   public AutomatonURLFilter() {
     super();
@@ -61,9 +64,17 @@ public class AutomatonURLFilter extends 
    * <implementation:RegexURLFilterBase> *
    * ----------------------------------- */
   
-  // Inherited Javadoc
-  protected String getRulesFile(Configuration conf) {
-    return conf.get("urlfilter.automaton.file");
+  /**
+   * Rules specified as a config property will override rules specified
+   * as a config file.
+   */
+  protected Reader getRulesReader(Configuration conf) throws IOException {
+    String stringRules = conf.get(URLFILTER_AUTOMATON_RULES);
+    if (stringRules != null) {
+      return new StringReader(stringRules);
+    }
+    String fileRules = conf.get(URLFILTER_AUTOMATON_FILE);
+    return conf.getConfResourceAsReader(fileRules);
   }
 
   // Inherited Javadoc

Modified: nutch/trunk/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/DomainURLFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/DomainURLFilter.java?rev=1025960&r1=1025959&r2=1025960&view=diff
==============================================================================
--- nutch/trunk/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/DomainURLFilter.java (original)
+++ nutch/trunk/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/DomainURLFilter.java Thu Oct 21 11:43:37 2010
@@ -20,6 +20,7 @@ import java.io.BufferedReader;
 import java.io.FileReader;
 import java.io.IOException;
 import java.io.Reader;
+import java.io.StringReader;
 import java.util.LinkedHashSet;
 import java.util.Set;
 
@@ -69,7 +70,7 @@ public class DomainURLFilter
   private String domainFile = null;
   private Set<String> domainSet = new LinkedHashSet<String>();
 
-  private void readConfigurationFile(Reader configReader)
+  private void readConfiguration(Reader configReader)
     throws IOException {
 
     // read the configuration file, line by line
@@ -139,21 +140,24 @@ public class DomainURLFilter
 
     // domain file and attribute "file" take precedence if defined
     String file = conf.get("urlfilter.domain.file");    
+    String stringRules = conf.get("urlfilter.domain.rules");
     if (domainFile != null) {
       file = domainFile;
     }
     else if (attributeFile != null) {
       file = attributeFile;
     }
-
-    // get the file as a classpath resource and populate the domain set with
-    // the domains from the file
+    Reader reader = null;
+    if (stringRules != null) { // takes precedence over files
+      reader = new StringReader(stringRules);
+    } else {
+      reader = conf.getConfResourceAsReader(file);
+    }
     try {
-      Reader reader = conf.getConfResourceAsReader(file);
       if (reader == null) {
         reader = new FileReader(file);
       }
-      readConfigurationFile(reader);
+      readConfiguration(reader);
     }
     catch (IOException e) {
       LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));

Modified: nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java?rev=1025960&r1=1025959&r2=1025960&view=diff
==============================================================================
--- nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java (original)
+++ nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java Thu Oct 21 11:43:37 2010
@@ -36,6 +36,7 @@ import java.io.FileReader;
 import java.io.BufferedReader;
 import java.io.InputStreamReader;
 import java.io.IOException;
+import java.io.StringReader;
 
 import java.util.List;
 import java.util.ArrayList;
@@ -63,8 +64,8 @@ public class PrefixURLFilter implements 
    
   }
 
-  public PrefixURLFilter(String filename) throws IOException {
-    trie = readConfigurationFile(new FileReader(filename));
+  public PrefixURLFilter(String stringRules) throws IOException {
+    trie = readConfiguration(new StringReader(stringRules));
   }
 
   public String filter(String url) {
@@ -74,7 +75,7 @@ public class PrefixURLFilter implements 
       return url;
   }
 
-  private TrieStringMatcher readConfigurationFile(Reader reader)
+  private TrieStringMatcher readConfiguration(Reader reader)
     throws IOException {
     
     BufferedReader in=new BufferedReader(reader);
@@ -144,16 +145,22 @@ public class PrefixURLFilter implements 
     }
 
     String file = conf.get("urlfilter.prefix.file");
+    String stringRules = conf.get("urlfilter.prefix.rules");
     // attribute "file" takes precedence if defined
     if (attributeFile != null)
       file = attributeFile;
-    Reader reader = conf.getConfResourceAsReader(file);
+    Reader reader = null;
+    if (stringRules != null) { // takes precedence over files
+      reader = new StringReader(stringRules);
+    } else {
+      reader = conf.getConfResourceAsReader(file);
+    }
 
     if (reader == null) {
       trie = new PrefixStringMatcher(new String[0]);
     } else {
       try {
-        trie = readConfigurationFile(reader);
+        trie = readConfiguration(reader);
       } catch (IOException e) {
         if (LOG.isErrorEnabled()) { LOG.error(e.getMessage()); }
         // TODO mb@media-style.com: throw Exception? Because broken api.

Modified: nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java?rev=1025960&r1=1025959&r2=1025960&view=diff
==============================================================================
--- nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java (original)
+++ nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java Thu Oct 21 11:43:37 2010
@@ -19,6 +19,7 @@ package org.apache.nutch.urlfilter.regex
 // JDK imports
 import java.io.Reader;
 import java.io.IOException;
+import java.io.StringReader;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;
 
@@ -35,6 +36,9 @@ import org.apache.nutch.util.NutchConfig
  * {@link java.util.regex Java Regex implementation}.
  */
 public class RegexURLFilter extends RegexURLFilterBase {
+  
+  public static final String URLFILTER_REGEX_FILE = "urlfilter.regex.file";
+  public static final String URLFILTER_REGEX_RULES = "urlfilter.regex.rules";
 
   public RegexURLFilter() {
     super();
@@ -55,9 +59,17 @@ public class RegexURLFilter extends Rege
    * <implementation:RegexURLFilterBase> *
    * ----------------------------------- */
   
-  // Inherited Javadoc
-  protected String getRulesFile(Configuration conf) {
-    return conf.get("urlfilter.regex.file");
+  /**
+   * Rules specified as a config property will override rules specified
+   * as a config file.
+   */
+  protected Reader getRulesReader(Configuration conf) throws IOException {
+    String stringRules = conf.get(URLFILTER_REGEX_RULES);
+    if (stringRules != null) {
+      return new StringReader(stringRules);
+    }
+    String fileRules = conf.get(URLFILTER_REGEX_FILE);
+    return conf.getConfResourceAsReader(fileRules);
   }
 
   // Inherited Javadoc

Modified: nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java?rev=1025960&r1=1025959&r2=1025960&view=diff
==============================================================================
--- nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java (original)
+++ nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java Thu Oct 21 11:43:37 2010
@@ -34,6 +34,7 @@ import java.io.FileReader;
 import java.io.BufferedReader;
 import java.io.InputStreamReader;
 import java.io.IOException;
+import java.io.StringReader;
 
 import java.util.List;
 import java.util.ArrayList;
@@ -139,7 +140,7 @@ public class SuffixURLFilter implements 
   }
 
   public SuffixURLFilter(Reader reader) throws IOException {
-    readConfigurationFile(reader);
+    readConfiguration(reader);
   }
 
   public String filter(String url) {
@@ -167,7 +168,7 @@ public class SuffixURLFilter implements 
     }
   }
 
-  public void readConfigurationFile(Reader reader) throws IOException {
+  public void readConfiguration(Reader reader) throws IOException {
 
     // handle missing config file
     if (reader == null) {
@@ -269,12 +270,18 @@ public class SuffixURLFilter implements 
     }
 
     String file = conf.get("urlfilter.suffix.file");
+    String stringRules = conf.get("urlfilter.suffix.rules");
     // attribute "file" takes precedence if defined
     if (attributeFile != null) file = attributeFile;
-    Reader reader = conf.getConfResourceAsReader(file);
+    Reader reader = null;
+    if (stringRules != null) { // takes precedence over files
+      reader = new StringReader(stringRules);
+    } else {
+      reader = conf.getConfResourceAsReader(file);
+    }
 
     try {
-      readConfigurationFile(reader);
+      readConfiguration(reader);
     } catch (IOException e) {
       if (LOG.isErrorEnabled()) { LOG.error(e.getMessage()); }
       throw new RuntimeException(e.getMessage(), e);

Modified: nutch/trunk/src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/RegexURLNormalizer.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/RegexURLNormalizer.java?rev=1025960&r1=1025959&r2=1025960&view=diff
==============================================================================
--- nutch/trunk/src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/RegexURLNormalizer.java (original)
+++ nutch/trunk/src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/RegexURLNormalizer.java Thu Oct 21 11:43:37 2010
@@ -20,8 +20,11 @@ package org.apache.nutch.net.urlnormaliz
 import java.net.URL;
 import java.net.MalformedURLException;
 import java.io.FileInputStream;
+import java.io.FileReader;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.Reader;
+import java.io.StringReader;
 
 import java.util.Collections;
 import java.util.HashMap;
@@ -40,6 +43,7 @@ import org.apache.nutch.util.NutchConfig
 
 import javax.xml.parsers.*;
 import org.w3c.dom.*;
+import org.xml.sax.InputSource;
 import org.apache.oro.text.regex.*;
 
 /**
@@ -106,17 +110,23 @@ public class RegexURLNormalizer extends 
     // the default constructor was called
     if (this.scopedRules == null) {
       String filename = getConf().get("urlnormalizer.regex.file");
+      String stringRules = getConf().get("urlnormalizer.regex.rules");
       scopedRules = new HashMap();
-      URL url = getConf().getResource(filename);
+      Reader reader = null;
+      if (stringRules != null) {
+        reader = new StringReader(stringRules);
+      } else {
+        reader = getConf().getConfResourceAsReader(filename);
+      }
       List rules = null;
-      if (url == null) {
-        LOG.warn("Can't load the default config file! " + filename);
+      if (reader == null) {
+        LOG.warn("Can't load the default rules! ");
         rules = EMPTY_RULES;
       } else {
         try {
-          rules = readConfiguration(url.openStream());
+          rules = readConfiguration(reader);
         } catch (Exception e) {
-          LOG.warn("Couldn't read default config from '" + url + "': " + e);
+          LOG.warn("Couldn't read default config: " + e);
           rules = EMPTY_RULES;
         }
       }
@@ -125,8 +135,8 @@ public class RegexURLNormalizer extends 
   }
 
   // used in JUnit test.
-  void setConfiguration(InputStream is, String scope) {
-    List rules = readConfiguration(is);
+  void setConfiguration(Reader reader, String scope) {
+    List rules = readConfiguration(reader);
     scopedRules.put(scope, rules);
     LOG.debug("Set config for scope '" + scope + "': " + rules.size() + " rules.");
   }
@@ -141,17 +151,16 @@ public class RegexURLNormalizer extends 
       // try to populate
       String configFile = getConf().get("urlnormalizer.regex.file." + scope);
       if (configFile != null) {
-        URL resource = getConf().getResource(configFile);
-        LOG.debug("resource for scope '" + scope + "': " + resource);
-        if (resource == null) {
+        LOG.debug("resource for scope '" + scope + "': " + configFile);
+        if (configFile == null) {
           LOG.warn("Can't load resource for config file: " + configFile);
         } else {
           try {
-            InputStream is = resource.openStream();
-            curRules = readConfiguration(resource.openStream());
+            Reader reader = getConf().getConfResourceAsReader(configFile);
+            curRules = readConfiguration(reader);
             scopedRules.put(scope, curRules);
           } catch (Exception e) {
-            LOG.warn("Couldn't load resource '" + resource + "': " + e);
+            LOG.warn("Couldn't load resource '" + configFile + "': " + e);
           }
         }
       }
@@ -185,22 +194,22 @@ public class RegexURLNormalizer extends 
       LOG.info("loading " + filename);
     }
     try {
-      FileInputStream fis = new FileInputStream(filename);
-      return readConfiguration(fis);
+      FileReader reader = new FileReader(filename);
+      return readConfiguration(reader);
     } catch (Exception e) {
       LOG.error("Error loading rules from '" + filename + "': " + e);
       return EMPTY_RULES;
     }
   }
   
-  private List readConfiguration(InputStream is) {
+  private List readConfiguration(Reader reader) {
     Perl5Compiler compiler = new Perl5Compiler();
     List rules = new ArrayList();
     try {
 
       // borrowed heavily from code in Configuration.java
       Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder()
-              .parse(is);
+              .parse(new InputSource(reader));
       Element root = doc.getDocumentElement();
       if ((!"regex-normalize".equals(root.getTagName()))
               && (LOG.isErrorEnabled())) {

Modified: nutch/trunk/src/plugin/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java?rev=1025960&r1=1025959&r2=1025960&view=diff
==============================================================================
--- nutch/trunk/src/plugin/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java (original)
+++ nutch/trunk/src/plugin/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java Thu Oct 21 11:43:37 2010
@@ -21,6 +21,7 @@ import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileFilter;
 import java.io.FileInputStream;
+import java.io.FileReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.Reader;
@@ -64,10 +65,10 @@ public class TestRegexURLNormalizer exte
     });
     for (int i = 0; i < configs.length; i++) {
       try {
-        FileInputStream fis = new FileInputStream(configs[i]);
+        FileReader reader = new FileReader(configs[i]);
         String cname = configs[i].getName();
         cname = cname.substring(16, cname.indexOf(".xml"));
-        normalizer.setConfiguration(fis, cname);
+        normalizer.setConfiguration(reader, cname);
         NormalizedURL[] urls = readTestFile(cname);
         testData.put(cname, urls);
       } catch (Exception e) {