You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by do...@apache.org on 2009/01/21 14:09:50 UTC
svn commit: r736307 - in /lucene/nutch/trunk: ./
src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/
src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/
Author: dogacan
Date: Wed Jan 21 05:09:48 2009
New Revision: 736307
URL: http://svn.apache.org/viewvc?rev=736307&view=rev
Log:
NUTCH-681 - parse-mp3 compilation problem. Patch by Wildan Maulana.
Modified:
lucene/nutch/trunk/CHANGES.txt
lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MP3Parser.java
lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MetadataCollector.java
lucene/nutch/trunk/src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/TestMP3Parser.java
Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=736307&r1=736306&r2=736307&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Wed Jan 21 05:09:48 2009
@@ -313,6 +313,9 @@
117. NUTCH-678 - Hadoop 0.19 requires an update of jets3t.
(julien nioche via dogacan)
+
+118. NUTCH-681 - parse-mp3 compilation problem.
+ (Wildan Maulana via dogacan)
Release 0.9 - 2007-04-02
Modified: lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MP3Parser.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MP3Parser.java?rev=736307&r1=736306&r2=736307&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MP3Parser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MP3Parser.java Wed Jan 21 05:09:48 2009
@@ -37,17 +37,16 @@
// Nutch imports
import org.apache.nutch.metadata.Metadata;
-import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseData;
-import org.apache.nutch.parse.ParseException;
import org.apache.nutch.parse.ParseImpl;
+import org.apache.nutch.parse.ParseResult;
import org.apache.nutch.parse.ParseStatus;
import org.apache.nutch.parse.Parser;
import org.apache.nutch.protocol.Content;
-
/**
* A parser for MP3 audio files
+ *
* @author Andy Hedges
*/
public class MP3Parser implements Parser {
@@ -55,12 +54,12 @@
private MetadataCollector metadataCollector;
private Configuration conf;
- public Parse getParse(Content content) {
+ public ParseResult getParse(Content content) {
- Parse parse = null;
+ ParseResult parse = null;
byte[] raw = content.getContent();
File tmp = null;
-
+
try {
tmp = File.createTempFile("nutch", ".mp3");
FileOutputStream fos = new FileOutputStream(tmp);
@@ -69,49 +68,50 @@
MP3File mp3 = new MP3File(tmp);
if (mp3.hasID3v2Tag()) {
- parse = getID3v2Parse(mp3, content.getMetadata());
+ parse = getID3v2Parse(mp3, content.getMetadata(), content);
} else if (mp3.hasID3v1Tag()) {
- parse = getID3v1Parse(mp3, content.getMetadata());
+ parse = getID3v1Parse(mp3, content.getMetadata(), content);
} else {
- return new ParseStatus(ParseStatus.FAILED,
- ParseStatus.FAILED_MISSING_CONTENT,
- "No textual content available").getEmptyParse(conf);
+ return new ParseStatus().getEmptyParseResult(content.getUrl(),
+ getConf());
}
} catch (IOException e) {
- return new ParseStatus(ParseStatus.FAILED,
- ParseStatus.FAILED_EXCEPTION,
- "Couldn't create temporary file:" + e).getEmptyParse(conf);
+ return new ParseStatus().getEmptyParseResult(content.getUrl(),
+ getConf());
} catch (TagException e) {
- return new ParseStatus(ParseStatus.FAILED,
- ParseStatus.FAILED_EXCEPTION,
- "ID3 Tags could not be parsed:" + e).getEmptyParse(conf);
- } finally{
+ return new ParseStatus().getEmptyParseResult(content.getUrl(),
+ getConf());
+ } finally {
tmp.delete();
}
+
return parse;
}
- private Parse getID3v1Parse(MP3File mp3, Metadata contentMeta)
- throws MalformedURLException {
+ private ParseResult getID3v1Parse(MP3File mp3, Metadata contentMeta,
+ Content content) throws MalformedURLException {
ID3v1 tag = mp3.getID3v1Tag();
metadataCollector.notifyProperty("TALB-Text", tag.getAlbum());
metadataCollector.notifyProperty("TPE1-Text", tag.getArtist());
metadataCollector.notifyProperty("COMM-Text", tag.getComment());
- metadataCollector.notifyProperty("TCON-Text", "(" + tag.getGenre() + ")");
+ metadataCollector.notifyProperty("TCON-Text", "(" + tag.getGenre()
+ + ")");
metadataCollector.notifyProperty("TIT2-Text", tag.getTitle());
metadataCollector.notifyProperty("TYER-Text", tag.getYear());
ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS,
- metadataCollector.getTitle(),
- metadataCollector.getOutlinks(),
- contentMeta,
- metadataCollector.getData());
- return new ParseImpl(metadataCollector.getText(), parseData);
+ metadataCollector.getTitle(), metadataCollector.getOutlinks(),
+ contentMeta, metadataCollector.getData());
+ ParseResult parseResult = ParseResult.createParseResult(content
+ .getUrl(),
+ new ParseImpl(metadataCollector.getText(), parseData));
+
+ return parseResult;
}
- public Parse getID3v2Parse(MP3File mp3, Metadata contentMeta)
- throws IOException {
-
+ public ParseResult getID3v2Parse(MP3File mp3, Metadata contentMeta,
+ Content content) throws IOException {
+
AbstractID3v2 tag = mp3.getID3v2Tag();
Iterator it = tag.iterator();
while (it.hasNext()) {
@@ -120,23 +120,26 @@
if (!name.equals("APIC")) {
Iterator itBody = frame.getBody().iterator();
while (itBody.hasNext()) {
- AbstractMP3Object mp3Obj = (AbstractMP3Object) itBody.next();
+ AbstractMP3Object mp3Obj = (AbstractMP3Object) itBody
+ .next();
String bodyName = mp3Obj.getIdentifier();
if (!bodyName.equals("Picture data")) {
String bodyValue = mp3Obj.getValue().toString();
- metadataCollector.notifyProperty(name + "-" + bodyName, bodyValue);
+ metadataCollector.notifyProperty(name + "-" + bodyName,
+ bodyValue);
}
}
}
}
ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS,
- metadataCollector.getTitle(),
- metadataCollector.getOutlinks(),
- contentMeta,
- metadataCollector.getData());
- return new ParseImpl(metadataCollector.getText(), parseData);
- }
+ metadataCollector.getTitle(), metadataCollector.getOutlinks(),
+ contentMeta, metadataCollector.getData());
+ ParseResult parseResult = ParseResult.createParseResult(content
+ .getUrl(),
+ new ParseImpl(metadataCollector.getText(), parseData));
+ return parseResult;
+ }
public void setConf(Configuration conf) {
this.conf = conf;
Modified: lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MetadataCollector.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MetadataCollector.java?rev=736307&r1=736306&r2=736307&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MetadataCollector.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MetadataCollector.java Wed Jan 21 05:09:48 2009
@@ -34,7 +34,7 @@
private String title = null;
private String artist = null;
private String album = null;
- private ArrayList links = new ArrayList();
+ private ArrayList<Outlink> links = new ArrayList<Outlink>();
private String text = "";
private Configuration conf;
@@ -51,7 +51,7 @@
setArtist(value);
if (name.indexOf("URL Link") > -1) {
- links.add(new Outlink(value, "", this.conf));
+ links.add(new Outlink(value, ""));
} else if (name.indexOf("Text") > -1) {
text += value + "\n";
}
Modified: lucene/nutch/trunk/src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/TestMP3Parser.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/TestMP3Parser.java?rev=736307&r1=736306&r2=736307&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/TestMP3Parser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/TestMP3Parser.java Wed Jan 21 05:09:48 2009
@@ -71,7 +71,7 @@
protocol = new ProtocolFactory(conf).getProtocol(urlString);
content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum())
.getContent();
- parse = new ParseUtil(conf).parseByExtensionId("parse-mp3", content);
+ parse = new ParseUtil(conf).parseByExtensionId("parse-mp3", content).get(urlString);
Metadata metadata = parse.getData().getParseMeta();
assertEquals("postgresql comment id3v2", metadata.get("COMM-Text"));
assertEquals("postgresql composer id3v2", metadata.get("TCOM-Text"));
@@ -103,7 +103,7 @@
protocol = new ProtocolFactory(conf).getProtocol(urlString);
content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum())
.getContent();
- parse = new ParseUtil(conf).parseByExtensionId("parse-mp3", content);
+ parse = new ParseUtil(conf).parseByExtensionId("parse-mp3", content).get(urlString);
Metadata metadata = parse.getData().getParseMeta();
assertEquals("postgresql comment id3v1", metadata.get("COMM-Text"));
@@ -130,7 +130,7 @@
protocol = new ProtocolFactory(conf).getProtocol(urlString);
content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum())
.getContent();
- parse = new ParseUtil(conf).parseByExtensionId("parse-mp3", content);
+ parse = new ParseUtil(conf).parseByExtensionId("parse-mp3", content).get(urlString);
// Metadata metadata = parse.getData().getParseMeta();
if (parse.getData().getStatus().isSuccess()) {
fail("Expected ParseException");