You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ma...@apache.org on 2008/06/04 15:40:19 UTC

svn commit: r663092 - in /lucene/nutch/trunk: CHANGES.txt conf/tika-mimetypes.xml src/java/org/apache/nutch/util/MimeUtil.java

Author: mattmann
Date: Wed Jun  4 06:40:19 2008
New Revision: 663092

URL: http://svn.apache.org/viewvc?rev=663092&view=rev
Log:
- fix for NUTCH-618

Modified:
    lucene/nutch/trunk/CHANGES.txt
    lucene/nutch/trunk/conf/tika-mimetypes.xml
    lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java

Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=663092&r1=663091&r2=663092&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Wed Jun  4 06:40:19 2008
@@ -250,6 +250,8 @@
 
 91. NUTCH-596 - ParseSegments parse content even if its not
     CrawlDatum.STATUS_FETCH_SUCCESS (dogacan)
+    
+92. NUTCH-618 - Tika error "Media type alias already exists" (mattmann,kubes)
 
 Release 0.9 - 2007-04-02
 

Modified: lucene/nutch/trunk/conf/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/conf/tika-mimetypes.xml?rev=663092&r1=663091&r2=663092&view=diff
==============================================================================
--- lucene/nutch/trunk/conf/tika-mimetypes.xml (original)
+++ lucene/nutch/trunk/conf/tika-mimetypes.xml Wed Jun  4 06:40:19 2008
@@ -58,7 +58,6 @@
 	</mime-type>
 
 	<mime-type type="application/xhtml+xml">
-		<sub-class-of type="text/xml" />
 		<glob pattern="*.xhtml" />
 		<root-XML namespaceURI='http://www.w3.org/1999/xhtml'
 			localName='html' />
@@ -245,7 +244,7 @@
 		</mime-type> -->
 
 	<mime-type type="application/x-ms-dos-executable">
-		<alias type="application/x-dosexec;exe" />
+		<alias type="application/x-dosexec" />
 	</mime-type>
 
 	<mime-type type="application/ogg">

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java?rev=663092&r1=663091&r2=663092&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java Wed Jun  4 06:40:19 2008
@@ -55,8 +55,17 @@
   private static final Logger LOG = Logger.getLogger(MimeUtil.class.getName());
 
   public MimeUtil(Configuration conf) {
-    this.mimeTypes = MimeTypesFactory.create(conf
-        .getConfResourceAsInputStream(conf.get("mime.types.file")));
+    ObjectCache objectCache = ObjectCache.get(conf);
+    MimeTypes mimeTypez = (MimeTypes) objectCache.getObject(MimeTypes.class
+        .getName());
+    if (mimeTypez == null) {
+      mimeTypez = MimeTypesFactory.create(conf
+          .getConfResourceAsInputStream(conf.get("mime.types.file")));
+      objectCache.setObject(MimeTypes.class.getName(), mimeTypez);
+
+    }
+    
+    this.mimeTypes = mimeTypez;
     this.mimeMagic = conf.getBoolean("mime.type.magic", true);
   }