You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by el...@apache.org on 2014/03/25 22:38:52 UTC

[05/14] git commit: ACCUMULO-2539 Expand javadoc for Combiner to be more specific about combination implementation

ACCUMULO-2539 Expand javadoc for Combiner to be more specific about combination implementation

Explicitly note that, most times, fetchColumnFamily or fetchColumns is desired and that columns
are only combined within a row. Only multiple versions for otherwise equal Keys are combined.


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/88761e05
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/88761e05
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/88761e05

Branch: refs/heads/1.5.2-SNAPSHOT
Commit: 88761e05bce7e71c89b1a6033ba74edfcd8db9a6
Parents: 2ab011c
Author: Josh Elser <el...@apache.org>
Authored: Mon Mar 24 19:10:31 2014 -0700
Committer: Josh Elser <el...@apache.org>
Committed: Tue Mar 25 13:34:48 2014 -0700

----------------------------------------------------------------------
 .../accumulo/core/iterators/Combiner.java       | 94 +++++++++++---------
 1 file changed, 52 insertions(+), 42 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/88761e05/src/core/src/main/java/org/apache/accumulo/core/iterators/Combiner.java
----------------------------------------------------------------------
diff --git a/src/core/src/main/java/org/apache/accumulo/core/iterators/Combiner.java b/src/core/src/main/java/org/apache/accumulo/core/iterators/Combiner.java
index 584eb14..083e93e 100644
--- a/src/core/src/main/java/org/apache/accumulo/core/iterators/Combiner.java
+++ b/src/core/src/main/java/org/apache/accumulo/core/iterators/Combiner.java
@@ -26,27 +26,35 @@ import java.util.NoSuchElementException;
 
 import org.apache.accumulo.core.client.IteratorSetting;
 import org.apache.accumulo.core.client.IteratorSetting.Column;
+import org.apache.accumulo.core.client.ScannerBase;
 import org.apache.accumulo.core.data.ByteSequence;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.PartialKey;
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.core.iterators.conf.ColumnSet;
+import org.apache.hadoop.io.Text;
 import org.apache.log4j.Logger;
 
 /**
- * A SortedKeyValueIterator that combines the Values for different versions of a Key into a single Value. Combiner will replace one or more versions of a Key
- * and their Values with the most recent Key and a Value which is the result of the reduce method.
+ * A SortedKeyValueIterator that combines the Values for different versions (timestamp) of a Key within a row into a single Value. Combiner will replace one or
+ * more versions of a Key and their Values with the most recent Key and a Value which is the result of the reduce method. An {@link IteratorSetting.Column}
+ * which only specifies a column family will combine all Keys in that column family individually. Similarly, a {@link IteratorSetting.Column} which specifies a
+ * column family and column qualifier will combine all Keys in column family and qualifier individually. Combination is only ever performed on multiple versions
+ * and not across column qualifiers or column visibilities.
  * 
- * Subclasses must implement a reduce method: {@code public Value reduce(Key key, Iterator<Value> iter)}.
+ * Implementations must provide a reduce method: {@code public Value reduce(Key key, Iterator<Value> iter)}.
  * 
  * This reduce method will be passed the most recent Key and an iterator over the Values for all non-deleted versions of that Key.
+ * 
+ * This class and its implementations do not automatically filter out unwanted columns from those being combined, thus it is generally recommended to use a
+ * {@link Combiner} implementation with the {@link ScannerBase#fetchColumnFamily(Text)} or {@link ScannerBase#fetchColumn(Text, Text)} methods.
  */
 public abstract class Combiner extends WrappingIterator implements OptionDescriber {
   static final Logger log = Logger.getLogger(Combiner.class);
   protected static final String COLUMNS_OPTION = "columns";
   protected static final String ALL_OPTION = "all";
-  
+
   /**
    * A Java Iterator that iterates over the Values for a given Key from a source SortedKeyValueIterator.
    */
@@ -54,7 +62,7 @@ public abstract class Combiner extends WrappingIterator implements OptionDescrib
     Key topKey;
     SortedKeyValueIterator<Key,Value> source;
     boolean hasNext;
-    
+
     /**
      * Constructs an iterator over Values whose Keys are versions of the current topKey of the source SortedKeyValueIterator.
      * 
@@ -66,11 +74,11 @@ public abstract class Combiner extends WrappingIterator implements OptionDescrib
       topKey = new Key(source.getTopKey());
       hasNext = _hasNext();
     }
-    
+
     private boolean _hasNext() {
       return source.hasTop() && !source.getTopKey().isDeleted() && topKey.equals(source.getTopKey(), PartialKey.ROW_COLFAM_COLQUAL_COLVIS);
     }
-    
+
     /**
      * @return <tt>true</tt> if there is another Value
      * 
@@ -80,7 +88,7 @@ public abstract class Combiner extends WrappingIterator implements OptionDescrib
     public boolean hasNext() {
       return hasNext;
     }
-    
+
     /**
      * @return the next Value
      * 
@@ -99,7 +107,7 @@ public abstract class Combiner extends WrappingIterator implements OptionDescrib
       }
       return topValue;
     }
-    
+
     /**
      * unsupported
      * 
@@ -110,29 +118,29 @@ public abstract class Combiner extends WrappingIterator implements OptionDescrib
       throw new UnsupportedOperationException();
     }
   }
-  
+
   Key topKey;
   Value topValue;
-  
+
   @Override
   public Key getTopKey() {
     if (topKey == null)
       return super.getTopKey();
     return topKey;
   }
-  
+
   @Override
   public Value getTopValue() {
     if (topKey == null)
       return super.getTopValue();
     return topValue;
   }
-  
+
   @Override
   public boolean hasTop() {
     return topKey != null || super.hasTop();
   }
-  
+
   @Override
   public void next() throws IOException {
     if (topKey != null) {
@@ -141,12 +149,12 @@ public abstract class Combiner extends WrappingIterator implements OptionDescrib
     } else {
       super.next();
     }
-    
+
     findTop();
   }
-  
+
   private Key workKey = new Key();
-  
+
   /**
    * Sets the topKey and topValue based on the top key of the source. If the column of the source top key is in the set of combiners, topKey will be the top key
    * of the source and topValue will be the result of the reduce method. Otherwise, topKey and topValue will be unchanged. (They are always set to null before
@@ -167,16 +175,16 @@ public abstract class Combiner extends WrappingIterator implements OptionDescrib
       }
     }
   }
-  
+
   @Override
   public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException {
     // do not want to seek to the middle of a value that should be combined...
-    
+
     Range seekRange = IteratorUtil.maximizeStartKeyTimeStamp(range);
-    
+
     super.seek(seekRange, columnFamilies, inclusive);
     findTop();
-    
+
     if (range.getStartKey() != null) {
       while (hasTop() && getTopKey().equals(range.getStartKey(), PartialKey.ROW_COLFAM_COLQUAL_COLVIS)
           && getTopKey().getTimestamp() > range.getStartKey().getTimestamp()) {
@@ -184,13 +192,13 @@ public abstract class Combiner extends WrappingIterator implements OptionDescrib
         // log.debug("skipping "+getTopKey());
         next();
       }
-      
+
       while (hasTop() && range.beforeStartKey(getTopKey())) {
         next();
       }
     }
   }
-  
+
   /**
    * Reduces a list of Values into a single Value.
    * 
@@ -203,14 +211,14 @@ public abstract class Combiner extends WrappingIterator implements OptionDescrib
    * @return The combined Value.
    */
   public abstract Value reduce(Key key, Iterator<Value> iter);
-  
+
   private ColumnSet combiners;
   private boolean combineAllColumns;
-  
+
   @Override
   public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException {
     super.init(source, options, env);
-    
+
     combineAllColumns = false;
     if (options.containsKey(ALL_OPTION)) {
       combineAllColumns = Boolean.parseBoolean(options.get(ALL_OPTION));
@@ -219,14 +227,14 @@ public abstract class Combiner extends WrappingIterator implements OptionDescrib
     }
     if (!options.containsKey(COLUMNS_OPTION))
       throw new IllegalArgumentException("Must specify " + COLUMNS_OPTION + " option");
-    
+
     String encodedColumns = options.get(COLUMNS_OPTION);
     if (encodedColumns.length() == 0)
       throw new IllegalArgumentException("The " + COLUMNS_OPTION + " must not be empty");
-    
+
     combiners = new ColumnSet(Arrays.asList(encodedColumns.split(",")));
   }
-  
+
   @Override
   public SortedKeyValueIterator<Key,Value> deepCopy(IteratorEnvironment env) {
     Combiner newInstance;
@@ -240,16 +248,16 @@ public abstract class Combiner extends WrappingIterator implements OptionDescrib
     newInstance.combineAllColumns = combineAllColumns;
     return newInstance;
   }
-  
+
   @Override
   public IteratorOptions describeOptions() {
-    IteratorOptions io = new IteratorOptions("comb", "Combiners apply reduce functions to values with identical keys", null, null);
+    IteratorOptions io = new IteratorOptions("comb", "Combiners apply reduce functions to multiple versions of values with otherwise equal keys", null, null);
     io.addNamedOption(ALL_OPTION, "set to true to apply Combiner to every column, otherwise leave blank. if true, " + COLUMNS_OPTION
         + " option will be ignored.");
     io.addNamedOption(COLUMNS_OPTION, "<col fam>[:<col qual>]{,<col fam>[:<col qual>]} escape non-alphanum chars using %<hex>.");
     return io;
   }
-  
+
   @Override
   public boolean validateOptions(Map<String,String> options) {
     if (options.containsKey(ALL_OPTION)) {
@@ -259,43 +267,45 @@ public abstract class Combiner extends WrappingIterator implements OptionDescrib
     }
     if (!options.containsKey(COLUMNS_OPTION))
       return false;
-    
+
     String encodedColumns = options.get(COLUMNS_OPTION);
     if (encodedColumns.length() == 0)
       return false;
-    
+
     for (String columns : encodedColumns.split(",")) {
       if (!ColumnSet.isValidEncoding(columns))
         return false;
     }
-    
+
     return true;
   }
-  
+
   /**
-   * A convenience method to set which columns a combiner should be applied to.
+   * A convenience method to set which columns a combiner should be applied to. For each column specified, all versions of a Key which match that @{link
+   * IteratorSetting.Column} will be combined individually in each row. This method is likely to be used in conjunction with
+   * {@link ScannerBase#fetchColumnFamily(Text)} or {@link ScannerBase#fetchColumn(Text,Text)}.
    * 
    * @param is
    *          iterator settings object to configure
    * @param columns
    *          a list of columns to encode as the value for the combiner column configuration
    */
-  
+
   public static void setColumns(IteratorSetting is, List<IteratorSetting.Column> columns) {
     String sep = "";
     StringBuilder sb = new StringBuilder();
-    
+
     for (Column col : columns) {
       sb.append(sep);
       sep = ",";
       sb.append(ColumnSet.encodeColumns(col.getFirst(), col.getSecond()));
     }
-    
+
     is.addOption(COLUMNS_OPTION, sb.toString());
   }
-  
+
   /**
-   * A convenience method to set the "all columns" option on a Combiner.
+   * A convenience method to set the "all columns" option on a Combiner. This will combine all columns individually within each row.
    * 
    * @param is
    *          iterator settings object to configure