You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2014/09/30 00:06:44 UTC
svn commit: r1628329 - in /nutch/trunk: CHANGES.txt
src/java/org/apache/nutch/crawl/CrawlDbReader.java
Author: lewismc
Date: Mon Sep 29 22:06:44 2014
New Revision: 1628329
URL: http://svn.apache.org/r1628329
Log:
NUTCH-1857 readb -dump -format csv should use comma
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java
Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1628329&r1=1628328&r2=1628329&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Mon Sep 29 22:06:44 2014
@@ -1,6 +1,8 @@
Nutch Change Log
-Nutch Current Development
+Nutch Current Development 1.10-SNAPSHOT
+
+* NUTCH-1857 readb -dump -format csv should use comma (lewismc)
* NUTCH-1853 Add commented out WebGraph executions to ./bin/crawl (lewismc)
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java?rev=1628329&r1=1628328&r2=1628329&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java Mon Sep 29 22:06:44 2014
@@ -30,10 +30,10 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.TreeMap;
+
// Commons Logging imports
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -100,7 +100,7 @@ public class CrawlDbReader implements Cl
public LineRecordWriter(DataOutputStream out) {
this.out = out;
try {
- out.writeBytes("Url;Status code;Status name;Fetch Time;Modified Time;Retries since fetch;Retry interval seconds;Retry interval days;Score;Signature;Metadata\n");
+ out.writeBytes("Url,Status code,Status name,Fetch Time,Modified Time,Retries since fetch,Retry interval seconds,Retry interval days,Score,Signature,Metadata\n");
} catch (IOException e) {}
}
@@ -108,29 +108,29 @@ public class CrawlDbReader implements Cl
out.writeByte('"');
out.writeBytes(key.toString());
out.writeByte('"');
- out.writeByte(';');
+ out.writeByte(',');
out.writeBytes(Integer.toString(value.getStatus()));
- out.writeByte(';');
+ out.writeByte(',');
out.writeByte('"');
out.writeBytes(CrawlDatum.getStatusName(value.getStatus()));
out.writeByte('"');
- out.writeByte(';');
+ out.writeByte(',');
out.writeBytes(new Date(value.getFetchTime()).toString());
- out.writeByte(';');
+ out.writeByte(',');
out.writeBytes(new Date(value.getModifiedTime()).toString());
- out.writeByte(';');
+ out.writeByte(',');
out.writeBytes(Integer.toString(value.getRetriesSinceFetch()));
- out.writeByte(';');
+ out.writeByte(',');
out.writeBytes(Float.toString(value.getFetchInterval()));
- out.writeByte(';');
+ out.writeByte(',');
out.writeBytes(Float.toString((value.getFetchInterval() / FetchSchedule.SECONDS_PER_DAY)));
- out.writeByte(';');
+ out.writeByte(',');
out.writeBytes(Float.toString(value.getScore()));
- out.writeByte(';');
+ out.writeByte(',');
out.writeByte('"');
out.writeBytes(value.getSignature() != null ? StringUtil.toHexString(value.getSignature()): "null");
out.writeByte('"');
- out.writeByte(';');
+ out.writeByte(',');
out.writeByte('"');
if (value.getMetaData() != null) {
for (Entry<Writable, Writable> e : value.getMetaData().entrySet()) {
@@ -540,6 +540,7 @@ public class CrawlDbReader implements Cl
}
public static void main(String[] args) throws IOException {
+ @SuppressWarnings("resource")
CrawlDbReader dbr = new CrawlDbReader();
if (args.length < 2) {