You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by jn...@apache.org on 2015/01/30 09:38:25 UTC

svn commit: r1655960 - in /nutch/trunk: CHANGES.txt src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java

Author: jnioche
Date: Fri Jan 30 08:38:25 2015
New Revision: 1655960

URL: http://svn.apache.org/r1655960
Log:
NUTCH-1889 Store all values from Tika metadata in Nutch metadata

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java

Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1655960&r1=1655959&r2=1655960&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Fri Jan 30 08:38:25 2015
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Nutch Current Development 1.10-SNAPSHOT
 
+* NUTCH-1889 Store all values from Tika metadata in Nutch metadata (jnioche)
+
 * NUTCH-865 Format source code in unique style (lewismc)
 
 * NUTCH-1893 Parse-tika failes to parse feed files (Mengying Wang via snagel)

Modified: nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java?rev=1655960&r1=1655959&r2=1655960&view=diff
==============================================================================
--- nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java (original)
+++ nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java Fri Jan 30 08:38:25 2015
@@ -157,8 +157,9 @@ public class TikaParser implements org.a
     for (String tikaMDName : TikaMDNames) {
       if (tikaMDName.equalsIgnoreCase(Metadata.TITLE))
         continue;
-      // TODO what if multivalued?
-      nutchMetadata.add(tikaMDName, tikamd.get(tikaMDName));
+      String[] values = tikamd.getValues(tikaMDName);
+      for (String v : values)
+        nutchMetadata.add(tikaMDName, v);
     }
 
     // no outlinks? try OutlinkExtractor e.g works for mime types where no