You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ma...@apache.org on 2015/07/01 07:59:59 UTC

svn commit: r1688553 - in /nutch/trunk: ./ src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/ src/plugin/index-static/src/test/org/apache/nutch/indexer/staticfield/ src/plugin/scoring-similarity/src/java/org/apache/nutch/scoring/sim...

Author: mattmann
Date: Wed Jul  1 05:59:59 2015
New Revision: 1688553

URL: http://svn.apache.org/r1688553
Log:
Add mattmann for unit test for NUTCH-2038 to pass.

Removed:
    nutch/trunk/src/plugin/scoring-similarity/src/java/org/apache/nutch/scoring/similarity/cosine/
Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/StaticFieldIndexer.java
    nutch/trunk/src/plugin/index-static/src/test/org/apache/nutch/indexer/staticfield/TestStaticFieldIndexerTest.java

Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1688553&r1=1688552&r2=1688553&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Wed Jul  1 05:59:59 2015
@@ -2,8 +2,6 @@ Nutch Change Log
   
 Nutch Current Development 1.11-SNAPSHOT
 
-* NUTCH-2052 Enhance index-static to allow configurable delimiters (Peter Ciuffetti via mattmann)
-
 * NUTCH-2038 fix for NUTCH-2038: Naive Bayes classifier based html Parse filter (for filtering outlinks) 
   (Asitang Mishra, snagel via mattmann)
 

Modified: nutch/trunk/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/StaticFieldIndexer.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/StaticFieldIndexer.java?rev=1688553&r1=1688552&r2=1688553&view=diff
==============================================================================
--- nutch/trunk/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/StaticFieldIndexer.java (original)
+++ nutch/trunk/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/StaticFieldIndexer.java Wed Jul  1 05:59:59 2015
@@ -40,9 +40,6 @@ public class StaticFieldIndexer implemen
   private Configuration conf;
   private HashMap<String, String[]> fields;
   private boolean addStaticFields = false;
-  private String fieldSep = ",";
-  private String kevSep = ":";
-  private String valueSep = " ";
 
   /**
    * The {@link StaticFieldIndexer} filter object which adds fields as per
@@ -88,10 +85,10 @@ public class StaticFieldIndexer implemen
      * The format is very easy, it's a comma-separated list of fields in the
      * form <name>:<value>
      */
-    for (String field : fieldsString.split(this.fieldSep)) {
-      String[] entry = field.split(this.kevSep);
+    for (String field : fieldsString.split(",")) {
+      String[] entry = field.split(":");
       if (entry.length == 2)
-        fields.put(entry[0].trim(), entry[1].trim().split(this.valueSep));
+        fields.put(entry[0].trim(), entry[1].trim().split(" "));
     }
 
     return fields;
@@ -102,12 +99,6 @@ public class StaticFieldIndexer implemen
    */
   public void setConf(Configuration conf) {
     this.conf = conf;
-
-    // NUTCH-2052: Allow user-defined delimiters in index.static
-    this.fieldSep = conf.get("index.static.fieldsep", ",");
-    this.kevSep = conf.get("index.static.keysep", ":");
-    this.valueSep = conf.get("index.static.valuesep", " ");
-
     String fieldsString = conf.get("index.static", null);
     if (fieldsString != null) {
       this.addStaticFields = true;

Modified: nutch/trunk/src/plugin/index-static/src/test/org/apache/nutch/indexer/staticfield/TestStaticFieldIndexerTest.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/index-static/src/test/org/apache/nutch/indexer/staticfield/TestStaticFieldIndexerTest.java?rev=1688553&r1=1688552&r2=1688553&view=diff
==============================================================================
--- nutch/trunk/src/plugin/index-static/src/test/org/apache/nutch/indexer/staticfield/TestStaticFieldIndexerTest.java (original)
+++ nutch/trunk/src/plugin/index-static/src/test/org/apache/nutch/indexer/staticfield/TestStaticFieldIndexerTest.java Wed Jul  1 05:59:59 2015
@@ -115,42 +115,4 @@ public class TestStaticFieldIndexerTest
     Assert.assertTrue("test if doc has field4", doc.getField("field4")
         .getValues().contains("val4"));
   }
-
-  /**
-   * Test for NUTCH-2052 custom delimiters in index.static.
-   *
-   * @throws Exception
-   */
-  @Test
-  public void testCustomDelimiters() throws Exception {
-
-    conf.set("index.static.fieldsep", ">");
-    conf.set("index.static.keysep", "=");
-    conf.set("index.static.valuesep", "|");
-    conf.set("index.static",
-        "field1=val1>field2    =      val2|val3     >field3>field4 =val4 > ");
-    Assert.assertNotNull(filter);
-    filter.setConf(conf);
-
-    NutchDocument doc = new NutchDocument();
-
-    try {
-      filter.filter(doc, parse, url, crawlDatum, inlinks);
-    } catch (Exception e) {
-      e.printStackTrace();
-      Assert.fail(e.getMessage());
-    }
-
-    Assert.assertNotNull(doc);
-    Assert.assertFalse("test if doc is not empty", doc.getFieldNames()
-        .isEmpty());
-    Assert.assertEquals("test if doc has 3 fields", 3, doc.getFieldNames()
-        .size());
-    Assert.assertTrue("test if doc has field1", doc.getField("field1")
-        .getValues().contains("val1"));
-    Assert.assertTrue("test if doc has field2", doc.getField("field2")
-        .getValues().contains("val2"));
-    Assert.assertTrue("test if doc has field4", doc.getField("field4")
-        .getValues().contains("val4"));
-  }
 }