You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by do...@apache.org on 2009/01/21 14:09:50 UTC

svn commit: r736307 - in /lucene/nutch/trunk: ./ src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/ src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/

Author: dogacan
Date: Wed Jan 21 05:09:48 2009
New Revision: 736307

URL: http://svn.apache.org/viewvc?rev=736307&view=rev
Log:
NUTCH-681 - parse-mp3 compilation problem. Patch by Wildan Maulana.

Modified:
    lucene/nutch/trunk/CHANGES.txt
    lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MP3Parser.java
    lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MetadataCollector.java
    lucene/nutch/trunk/src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/TestMP3Parser.java

Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=736307&r1=736306&r2=736307&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Wed Jan 21 05:09:48 2009
@@ -313,6 +313,9 @@
 
 117. NUTCH-678 - Hadoop 0.19 requires an update of jets3t.
                  (julien nioche via dogacan)
+
+118. NUTCH-681 - parse-mp3 compilation problem. 
+                 (Wildan Maulana via dogacan)
      
 Release 0.9 - 2007-04-02
 

Modified: lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MP3Parser.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MP3Parser.java?rev=736307&r1=736306&r2=736307&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MP3Parser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MP3Parser.java Wed Jan 21 05:09:48 2009
@@ -37,17 +37,16 @@
 
 // Nutch imports
 import org.apache.nutch.metadata.Metadata;
-import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.ParseData;
-import org.apache.nutch.parse.ParseException;
 import org.apache.nutch.parse.ParseImpl;
+import org.apache.nutch.parse.ParseResult;
 import org.apache.nutch.parse.ParseStatus;
 import org.apache.nutch.parse.Parser;
 import org.apache.nutch.protocol.Content;
 
-
 /**
  * A parser for MP3 audio files
+ * 
  * @author Andy Hedges
  */
 public class MP3Parser implements Parser {
@@ -55,12 +54,12 @@
   private MetadataCollector metadataCollector;
   private Configuration conf;
 
-  public Parse getParse(Content content) {
+  public ParseResult getParse(Content content) {
 
-    Parse parse = null;
+    ParseResult parse = null;
     byte[] raw = content.getContent();
     File tmp = null;
-    
+
     try {
       tmp = File.createTempFile("nutch", ".mp3");
       FileOutputStream fos = new FileOutputStream(tmp);
@@ -69,49 +68,50 @@
       MP3File mp3 = new MP3File(tmp);
 
       if (mp3.hasID3v2Tag()) {
-        parse = getID3v2Parse(mp3, content.getMetadata());
+        parse = getID3v2Parse(mp3, content.getMetadata(), content);
       } else if (mp3.hasID3v1Tag()) {
-        parse = getID3v1Parse(mp3, content.getMetadata());
+        parse = getID3v1Parse(mp3, content.getMetadata(), content);
       } else {
-        return new ParseStatus(ParseStatus.FAILED,
-                               ParseStatus.FAILED_MISSING_CONTENT,
-                               "No textual content available").getEmptyParse(conf);
+        return new ParseStatus().getEmptyParseResult(content.getUrl(),
+            getConf());
       }
     } catch (IOException e) {
-      return new ParseStatus(ParseStatus.FAILED,
-                             ParseStatus.FAILED_EXCEPTION,
-                             "Couldn't create temporary file:" + e).getEmptyParse(conf);
+      return new ParseStatus().getEmptyParseResult(content.getUrl(),
+          getConf());
     } catch (TagException e) {
-      return new ParseStatus(ParseStatus.FAILED,
-                             ParseStatus.FAILED_EXCEPTION,
-                             "ID3 Tags could not be parsed:" + e).getEmptyParse(conf);
-    } finally{
+      return new ParseStatus().getEmptyParseResult(content.getUrl(),
+          getConf());
+    } finally {
       tmp.delete();
     }
+
     return parse;
   }
 
-  private Parse getID3v1Parse(MP3File mp3, Metadata contentMeta)
-  throws MalformedURLException {
+  private ParseResult getID3v1Parse(MP3File mp3, Metadata contentMeta,
+      Content content) throws MalformedURLException {
 
     ID3v1 tag = mp3.getID3v1Tag();
     metadataCollector.notifyProperty("TALB-Text", tag.getAlbum());
     metadataCollector.notifyProperty("TPE1-Text", tag.getArtist());
     metadataCollector.notifyProperty("COMM-Text", tag.getComment());
-    metadataCollector.notifyProperty("TCON-Text", "(" + tag.getGenre() + ")");
+    metadataCollector.notifyProperty("TCON-Text", "(" + tag.getGenre()
+        + ")");
     metadataCollector.notifyProperty("TIT2-Text", tag.getTitle());
     metadataCollector.notifyProperty("TYER-Text", tag.getYear());
     ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS,
-                                        metadataCollector.getTitle(),
-                                        metadataCollector.getOutlinks(),
-                                        contentMeta,
-                                        metadataCollector.getData());
-    return new ParseImpl(metadataCollector.getText(), parseData);
+        metadataCollector.getTitle(), metadataCollector.getOutlinks(),
+        contentMeta, metadataCollector.getData());
+    ParseResult parseResult = ParseResult.createParseResult(content
+        .getUrl(),
+        new ParseImpl(metadataCollector.getText(), parseData));
+
+    return parseResult;
   }
 
-  public Parse getID3v2Parse(MP3File mp3, Metadata contentMeta)
-  throws IOException {
-    
+  public ParseResult getID3v2Parse(MP3File mp3, Metadata contentMeta,
+      Content content) throws IOException {
+
     AbstractID3v2 tag = mp3.getID3v2Tag();
     Iterator it = tag.iterator();
     while (it.hasNext()) {
@@ -120,23 +120,26 @@
       if (!name.equals("APIC")) {
         Iterator itBody = frame.getBody().iterator();
         while (itBody.hasNext()) {
-          AbstractMP3Object mp3Obj = (AbstractMP3Object) itBody.next();
+          AbstractMP3Object mp3Obj = (AbstractMP3Object) itBody
+          .next();
           String bodyName = mp3Obj.getIdentifier();
           if (!bodyName.equals("Picture data")) {
             String bodyValue = mp3Obj.getValue().toString();
-            metadataCollector.notifyProperty(name + "-" + bodyName, bodyValue);
+            metadataCollector.notifyProperty(name + "-" + bodyName,
+                bodyValue);
           }
         }
       }
     }
     ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS,
-                                        metadataCollector.getTitle(),
-                                        metadataCollector.getOutlinks(),
-                                        contentMeta,
-                                        metadataCollector.getData());
-    return new ParseImpl(metadataCollector.getText(), parseData);
-  }
+        metadataCollector.getTitle(), metadataCollector.getOutlinks(),
+        contentMeta, metadataCollector.getData());
+    ParseResult parseResult = ParseResult.createParseResult(content
+        .getUrl(),
+        new ParseImpl(metadataCollector.getText(), parseData));
 
+    return parseResult;
+  }
 
   public void setConf(Configuration conf) {
     this.conf = conf;

Modified: lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MetadataCollector.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MetadataCollector.java?rev=736307&r1=736306&r2=736307&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MetadataCollector.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MetadataCollector.java Wed Jan 21 05:09:48 2009
@@ -34,7 +34,7 @@
   private String title = null;
   private String artist = null;
   private String album = null;
-  private ArrayList links = new ArrayList();
+  private ArrayList<Outlink> links = new ArrayList<Outlink>();
   private String text = "";
   private Configuration conf;
 
@@ -51,7 +51,7 @@
       setArtist(value);
 
     if (name.indexOf("URL Link") > -1) {
-      links.add(new Outlink(value, "", this.conf));
+      links.add(new Outlink(value, ""));
     } else if (name.indexOf("Text") > -1) {
       text += value + "\n";
     }

Modified: lucene/nutch/trunk/src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/TestMP3Parser.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/TestMP3Parser.java?rev=736307&r1=736306&r2=736307&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/TestMP3Parser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/TestMP3Parser.java Wed Jan 21 05:09:48 2009
@@ -71,7 +71,7 @@
     protocol = new ProtocolFactory(conf).getProtocol(urlString);
     content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum())
                       .getContent();
-    parse = new ParseUtil(conf).parseByExtensionId("parse-mp3", content);
+    parse = new ParseUtil(conf).parseByExtensionId("parse-mp3", content).get(urlString);
     Metadata metadata = parse.getData().getParseMeta();
     assertEquals("postgresql comment id3v2", metadata.get("COMM-Text"));
     assertEquals("postgresql composer id3v2", metadata.get("TCOM-Text"));
@@ -103,7 +103,7 @@
     protocol = new ProtocolFactory(conf).getProtocol(urlString);
     content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum())
                       .getContent();
-    parse = new ParseUtil(conf).parseByExtensionId("parse-mp3", content);
+    parse = new ParseUtil(conf).parseByExtensionId("parse-mp3", content).get(urlString);
 
     Metadata metadata = parse.getData().getParseMeta();
     assertEquals("postgresql comment id3v1", metadata.get("COMM-Text"));
@@ -130,7 +130,7 @@
     protocol = new ProtocolFactory(conf).getProtocol(urlString);
     content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum())
                       .getContent();
-    parse = new ParseUtil(conf).parseByExtensionId("parse-mp3", content);
+    parse = new ParseUtil(conf).parseByExtensionId("parse-mp3", content).get(urlString);
 //    Metadata metadata = parse.getData().getParseMeta();
     if (parse.getData().getStatus().isSuccess()) {
       fail("Expected ParseException");