You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by jn...@apache.org on 2015/01/30 09:38:25 UTC
svn commit: r1655960 - in /nutch/trunk: CHANGES.txt
src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java
Author: jnioche
Date: Fri Jan 30 08:38:25 2015
New Revision: 1655960
URL: http://svn.apache.org/r1655960
Log:
NUTCH-1889 Store all values from Tika metadata in Nutch metadata
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java
Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1655960&r1=1655959&r2=1655960&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Fri Jan 30 08:38:25 2015
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Current Development 1.10-SNAPSHOT
+* NUTCH-1889 Store all values from Tika metadata in Nutch metadata (jnioche)
+
* NUTCH-865 Format source code in unique style (lewismc)
* NUTCH-1893 Parse-tika failes to parse feed files (Mengying Wang via snagel)
Modified: nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java?rev=1655960&r1=1655959&r2=1655960&view=diff
==============================================================================
--- nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java (original)
+++ nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java Fri Jan 30 08:38:25 2015
@@ -157,8 +157,9 @@ public class TikaParser implements org.a
for (String tikaMDName : TikaMDNames) {
if (tikaMDName.equalsIgnoreCase(Metadata.TITLE))
continue;
- // TODO what if multivalued?
- nutchMetadata.add(tikaMDName, tikamd.get(tikaMDName));
+ String[] values = tikamd.getValues(tikaMDName);
+ for (String v : values)
+ nutchMetadata.add(tikaMDName, v);
}
// no outlinks? try OutlinkExtractor e.g works for mime types where no