You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by cu...@apache.org on 2005/07/26 23:09:43 UTC

svn commit: r225395 - in /lucene/nutch/branches/mapred/src/test/org/apache/nutch/fs: ./ TestNutchFileSystem.java

Author: cutting
Date: Tue Jul 26 14:09:36 2005
New Revision: 225395

URL: http://svn.apache.org/viewcvs?rev=225395&view=rev
Log:
Add file system tester.

Added:
    lucene/nutch/branches/mapred/src/test/org/apache/nutch/fs/
    lucene/nutch/branches/mapred/src/test/org/apache/nutch/fs/TestNutchFileSystem.java

Added: lucene/nutch/branches/mapred/src/test/org/apache/nutch/fs/TestNutchFileSystem.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/test/org/apache/nutch/fs/TestNutchFileSystem.java?rev=225395&view=auto
==============================================================================
--- lucene/nutch/branches/mapred/src/test/org/apache/nutch/fs/TestNutchFileSystem.java (added)
+++ lucene/nutch/branches/mapred/src/test/org/apache/nutch/fs/TestNutchFileSystem.java Tue Jul 26 14:09:36 2005
@@ -0,0 +1,271 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.fs;
+
+import java.io.*;
+import java.util.*;
+import junit.framework.TestCase;
+import java.util.logging.*;
+
+import org.apache.nutch.fs.*;
+import org.apache.nutch.mapred.*;
+import org.apache.nutch.mapred.lib.*;
+import org.apache.nutch.io.*;
+import org.apache.nutch.util.*;
+
+public class TestNutchFileSystem extends TestCase {
+  private static final Logger LOG = InputFormatBase.LOG;
+
+  private static final long MEGA = 1024 * 1024;
+
+  private static String ROOT = System.getProperty("test.build.data",".");
+  private static File CONTROL_DIR = new File(ROOT, "fs_control");
+  private static File WRITE_DIR = new File(ROOT, "fs_write");
+  private static File READ_DIR = new File(ROOT, "fs_read");
+  private static File DATA_DIR = new File(ROOT, "fs_data");
+
+  public void testFs() throws Exception {
+    testFs(10 * MEGA, 100, 0);
+  }
+
+  public static void testFs(long megaBytes, int numFiles, long seed)
+    throws Exception {
+
+    NutchFileSystem fs = NutchFileSystem.get();
+
+    if (seed == 0)
+      seed = new Random().nextLong();
+
+    LOG.info("seed = "+seed);
+
+    createControlFile(fs, megaBytes, numFiles, seed);
+    writeTest(fs);
+    readTest(fs);
+  }
+
+  public static void createControlFile(NutchFileSystem fs,
+                                       long megaBytes, int numFiles,
+                                       long seed) throws Exception {
+
+    LOG.info("creating control file: "+megaBytes+" bytes, "+numFiles+" files");
+
+    File controlFile = new File(CONTROL_DIR, "files");
+    fs.delete(controlFile);
+    Random random = new Random(seed);
+
+    SequenceFile.Writer writer =
+      new SequenceFile.Writer(fs, controlFile.toString(),
+                              UTF8.class, LongWritable.class);
+
+    long totalSize = 0;
+    long maxSize = ((megaBytes / numFiles) * 2) + 1;
+    try {
+      while (totalSize < megaBytes) {
+        UTF8 name = new UTF8(Long.toString(random.nextLong()));
+
+        long size = random.nextLong();
+        if (size < 0)
+          size = -size;
+        size = size % maxSize;
+
+        //LOG.info(" adding: name="+name+" size="+size);
+
+        writer.append(name, new LongWritable(size));
+
+        totalSize += size;
+      }
+    } finally {
+      writer.close();
+    }
+    LOG.info("created control file for: "+totalSize+" bytes");
+  }
+
+  public static class WriteMapper extends NutchConfigured implements Mapper {
+    private Random random = new Random();
+    private byte[] buffer = new byte[8192];
+    private NutchFileSystem fs;
+
+    {
+      try {
+        fs = NutchFileSystem.get();
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+    public WriteMapper() { super(null); }
+    
+    public WriteMapper(NutchConf conf) { super(conf); }
+
+    public void configure(JobConf job) {
+      setConf(job);
+    }
+
+    public void map(WritableComparable key, Writable value,
+                    OutputCollector collector) throws IOException {
+      String name = ((UTF8)key).toString();
+      long size = ((LongWritable)value).get();
+      long seed = Long.parseLong(name);
+
+      random.setSeed(seed);
+      //LOG.info("writing: name="+name+" size="+size);
+
+      OutputStream out = fs.create(new File(DATA_DIR, name));
+
+      long written = 0;
+      try {
+        while (written < size) {
+          random.nextBytes(buffer);
+          long remains = size - written;
+          int length = (remains<=buffer.length) ? (int)remains : buffer.length;
+          out.write(buffer, 0, length);
+          written += length;
+        }
+      } finally {
+        out.close();
+      }
+
+      collector.collect(new UTF8("bytes"), new LongWritable(written));
+    }
+  }
+
+  public static void writeTest(NutchFileSystem fs)
+    throws Exception {
+
+    fs.delete(WRITE_DIR);
+
+    JobConf job = new JobConf(NutchConf.get());
+
+    job.setInputDir(CONTROL_DIR);
+    job.setInputFormat(SequenceFileInputFormat.class);
+    job.setInputKeyClass(UTF8.class);
+    job.setInputValueClass(LongWritable.class);
+
+    job.setMapperClass(WriteMapper.class);
+    job.setReducerClass(LongSumReducer.class);
+
+    job.setOutputDir(WRITE_DIR);
+    job.setOutputKeyClass(UTF8.class);
+    job.setOutputValueClass(LongWritable.class);
+    job.setNumReduceTasks(1);
+    JobClient.runJob(job);
+  }
+
+  public static class ReadMapper extends NutchConfigured implements Mapper {
+    private Random random = new Random();
+    private byte[] buffer = new byte[8192];
+    private byte[] check  = new byte[8192];
+    private NutchFileSystem fs;
+
+    {
+      try {
+        fs = NutchFileSystem.get();
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+    public ReadMapper() { super(null); }
+    
+    public ReadMapper(NutchConf conf) { super(conf); }
+
+    public void configure(JobConf job) {
+      setConf(job);
+    }
+
+    public void map(WritableComparable key, Writable value,
+                    OutputCollector collector) throws IOException {
+      String name = ((UTF8)key).toString();
+      long size = ((LongWritable)value).get();
+      long seed = Long.parseLong(name);
+
+      random.setSeed(seed);
+      //LOG.info("reading: name="+name+" size="+size);
+
+      InputStream in = fs.open(new File(DATA_DIR, name));
+
+      long read = 0;
+      try {
+        while (read < size) {
+          long remains = size - read;
+          int req = (remains<=buffer.length) ? (int)remains : buffer.length;
+          int got = in.read(buffer, 0, req);
+          read += got;
+          assertEquals(got, req);
+          random.nextBytes(check);
+          if (got != buffer.length) {
+            Arrays.fill(buffer, got, buffer.length, (byte)0);
+            Arrays.fill(check, got, check.length, (byte)0);
+          }
+          assertTrue(Arrays.equals(buffer, check));
+        }
+      } finally {
+        in.close();
+      }
+
+      collector.collect(new UTF8("bytes"), new LongWritable(read));
+    }
+  }
+
+  public static void readTest(NutchFileSystem fs)
+    throws Exception {
+
+    fs.delete(READ_DIR);
+
+    JobConf job = new JobConf(NutchConf.get());
+
+    job.setInputDir(CONTROL_DIR);
+    job.setInputFormat(SequenceFileInputFormat.class);
+    job.setInputKeyClass(UTF8.class);
+    job.setInputValueClass(LongWritable.class);
+
+    job.setMapperClass(ReadMapper.class);
+    job.setReducerClass(LongSumReducer.class);
+
+    job.setOutputDir(READ_DIR);
+    job.setOutputKeyClass(UTF8.class);
+    job.setOutputValueClass(LongWritable.class);
+    job.setNumReduceTasks(1);
+    JobClient.runJob(job);
+  }
+
+
+  public static void main(String[] args) throws Exception {
+    int megaBytes = 10;
+    int files = 100;
+    boolean check = true;
+
+    String usage = "Usage: TestNutchFileSystem -files N -megaBytes M";
+    
+    if (args.length == 0) {
+        System.err.println(usage);
+        System.exit(-1);
+    }
+    for (int i = 0; i < args.length; i++) {       // parse command line
+      if (args[i].equals("-files")) {
+        files = Integer.parseInt(args[++i]);
+      } else if (args[i].equals("-megaBytes")) {
+        megaBytes = Integer.parseInt(args[++i]);
+      }
+    }
+    LOG.info("files = " + files);
+    LOG.info("megaBytes = " + megaBytes);
+    LOG.info("check = " + check);
+  
+    testFs(megaBytes * MEGA, files, 0);
+  }
+}