You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by cd...@apache.org on 2009/08/21 03:29:45 UTC

svn commit: r806408 - in /hadoop/mapreduce/trunk: CHANGES.txt src/examples/org/apache/hadoop/examples/RandomTextWriter.java

Author: cdouglas
Date: Fri Aug 21 01:29:45 2009
New Revision: 806408

URL: http://svn.apache.org/viewvc?rev=806408&view=rev
Log:
MAPREDUCE-712. Minor efficiency tweaks to RandomTextWriter.

Modified:
    hadoop/mapreduce/trunk/CHANGES.txt
    hadoop/mapreduce/trunk/src/examples/org/apache/hadoop/examples/RandomTextWriter.java

Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=806408&r1=806407&r2=806408&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Fri Aug 21 01:29:45 2009
@@ -216,6 +216,8 @@
     MAPREDUCE-767. Remove the dependence on the CLI 2.0 snapshot.
     (Amar Kamat via omalley)
 
+    MAPREDUCE-712. Minor efficiency tweaks to RandomTextWriter. (cdouglas)
+
   BUG FIXES
 
     MAPREDUCE-878. Rename fair scheduler design doc to 

Modified: hadoop/mapreduce/trunk/src/examples/org/apache/hadoop/examples/RandomTextWriter.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/examples/org/apache/hadoop/examples/RandomTextWriter.java?rev=806408&r1=806407&r2=806408&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/examples/org/apache/hadoop/examples/RandomTextWriter.java (original)
+++ hadoop/mapreduce/trunk/src/examples/org/apache/hadoop/examples/RandomTextWriter.java Fri Aug 21 01:29:45 2009
@@ -21,6 +21,7 @@
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Date;
+import java.util.Formatter;
 import java.util.List;
 import java.util.Random;
 
@@ -96,8 +97,16 @@
     private int wordsInKeyRange;
     private int minWordsInValue;
     private int wordsInValueRange;
-    private Random random = new Random();
-    
+
+    private final Random random = new Random();
+    private final Text keyWords = new Text();
+    private final Text valueWords = new Text();
+    private final String STATUS_MSG = "wrote record %d. %d bytes left.";
+    private final Formatter statusFormat = new Formatter(new StringBuilder());
+
+    private Counter byteCounter;
+    private Counter recordCounter;
+
     /**
      * Save the configuration value that we need to write the data.
      */
@@ -115,6 +124,8 @@
       wordsInValueRange = 
         (conf.getInt("test.randomtextwrite.max_words_value", 100) - 
          minWordsInValue);
+      byteCounter = context.getCounter(Counters.BYTES_WRITTEN);
+      recordCounter = context.getCounter(Counters.RECORDS_WRITTEN);
     }
     
     /**
@@ -125,38 +136,39 @@
       int itemCount = 0;
       while (numBytesToWrite > 0) {
         // Generate the key/value 
-        int noWordsKey = minWordsInKey + 
+        final int noWordsKey = minWordsInKey +
           (wordsInKeyRange != 0 ? random.nextInt(wordsInKeyRange) : 0);
-        int noWordsValue = minWordsInValue + 
+        final int noWordsValue = minWordsInValue +
           (wordsInValueRange != 0 ? random.nextInt(wordsInValueRange) : 0);
-        Text keyWords = generateSentence(noWordsKey);
-        Text valueWords = generateSentence(noWordsValue);
-        
+
+        int recordBytes = generateSentence(keyWords, noWordsKey);
+        recordBytes += generateSentence(valueWords, noWordsValue);
+        numBytesToWrite -= recordBytes;
+
         // Write the sentence 
         context.write(keyWords, valueWords);
-        
-        numBytesToWrite -= (keyWords.getLength() + valueWords.getLength());
-        
+
         // Update counters, progress etc.
-        context.getCounter(Counters.BYTES_WRITTEN).increment(
-                  keyWords.getLength() + valueWords.getLength());
-        context.getCounter(Counters.RECORDS_WRITTEN).increment(1);
-        if (++itemCount % 200 == 0) {
-          context.setStatus("wrote record " + itemCount + ". " + 
-                             numBytesToWrite + " bytes left.");
+        recordCounter.increment(1);
+        byteCounter.increment(recordBytes);
+
+        if (++itemCount % 1000 == 0) {
+          ((StringBuilder)statusFormat.out()).setLength(0);
+          context.setStatus(statusFormat.format(STATUS_MSG,
+                itemCount, numBytesToWrite).toString());
         }
       }
       context.setStatus("done with " + itemCount + " records.");
     }
     
-    private Text generateSentence(int noWords) {
-      StringBuffer sentence = new StringBuffer();
-      String space = " ";
+    private int generateSentence(Text txt, int noWords) {
+      txt.clear();
       for (int i=0; i < noWords; ++i) {
-        sentence.append(words[random.nextInt(words.length)]);
-        sentence.append(space);
+        final Text word = words[random.nextInt(words.length)];
+        txt.append(word.getBytes(), 0, word.getLength());
+        txt.append(SPACE, 0, SPACE.length);
       }
-      return new Text(sentence.toString());
+      return txt.getLength();
     }
   }
   
@@ -245,10 +257,12 @@
     System.exit(res);
   }
 
+  private static final byte[] SPACE = " ".getBytes();
+
   /**
    * A random list of 100 words from /usr/share/dict/words
    */
-  private static String[] words = {
+  private final static Text[] words = buildText(new String[] {
                                    "diurnalness", "Homoiousian",
                                    "spiranthic", "tetragynian",
                                    "silverhead", "ungreat",
@@ -749,5 +763,14 @@
                                    "sterilely", "unrealize",
                                    "unpatched", "hypochondriacism",
                                    "critically", "cheesecutter",
-                                  };
+                                  });
+
+  private static Text[] buildText(String[] str) {
+    Text[] ret = new Text[str.length];
+    for (int i = 0; i < str.length; ++i) {
+      ret[i] = new Text(str[i]);
+    }
+    return ret;
+  }
+
 }