You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by bi...@apache.org on 2012/01/31 17:53:40 UTC

svn commit: r1238696 - in /incubator/accumulo/branches/1.4/src: core/src/main/java/org/apache/accumulo/core/iterators/ examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/ examples/wikisearch/ingest/src/test/java/org...

Author: billie
Date: Tue Jan 31 16:53:40 2012
New Revision: 1238696

URL: http://svn.apache.org/viewvc?rev=1238696&view=rev
Log:
ACCUMULO-354 added boolean instead of null to detect presence of next value

Modified:
    incubator/accumulo/branches/1.4/src/core/src/main/java/org/apache/accumulo/core/iterators/TypedValueCombiner.java
    incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java
    incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java

Modified: incubator/accumulo/branches/1.4/src/core/src/main/java/org/apache/accumulo/core/iterators/TypedValueCombiner.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.4/src/core/src/main/java/org/apache/accumulo/core/iterators/TypedValueCombiner.java?rev=1238696&r1=1238695&r2=1238696&view=diff
==============================================================================
--- incubator/accumulo/branches/1.4/src/core/src/main/java/org/apache/accumulo/core/iterators/TypedValueCombiner.java (original)
+++ incubator/accumulo/branches/1.4/src/core/src/main/java/org/apache/accumulo/core/iterators/TypedValueCombiner.java Tue Jan 31 16:53:40 2012
@@ -40,7 +40,7 @@ public abstract class TypedValueCombiner
   private boolean lossy = false;
   
   protected static final String LOSSY = "lossy";
-
+  
   /**
    * A Java Iterator that translates an Iterator<Value> to an Iterator<V> using the decode method of an Encoder.
    */
@@ -68,19 +68,19 @@ public abstract class TypedValueCombiner
     }
     
     V next = null;
+    boolean hasNext = false;
+    
     @Override
     public boolean hasNext() {
-      if (next != null)
+      if (hasNext)
         return true;
-
-      while (true)
-      {
+      
+      while (true) {
         if (!source.hasNext())
           return false;
-        try
-        {
+        try {
           next = encoder.decode(source.next().get());
-          return true;
+          return hasNext = true;
         } catch (ValueFormatException vfe) {
           if (!lossy)
             throw vfe;
@@ -90,10 +90,11 @@ public abstract class TypedValueCombiner
     
     @Override
     public V next() {
-      if (!hasNext())
+      if (!hasNext && !hasNext())
         throw new NoSuchElementException();
       V toRet = next;
       next = null;
+      hasNext = false;
       return toRet;
     }
     
@@ -192,7 +193,7 @@ public abstract class TypedValueCombiner
     super.init(source, options, env);
     setLossyness(options);
   }
-
+  
   private void setLossyness(Map<String,String> options) {
     String loss = options.get(LOSSY);
     if (loss == null)
@@ -214,7 +215,7 @@ public abstract class TypedValueCombiner
     setLossyness(options);
     return true;
   }
-
+  
   /**
    * A convenience method to set the "lossy" option on a TypedValueCombiner. If true, the combiner will ignore any values which fail to decode. Otherwise, the
    * combiner will throw an error which will interrupt the action (and prevent potential data loss). False is the default behavior.

Modified: incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java?rev=1238696&r1=1238695&r2=1238696&view=diff
==============================================================================
--- incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java (original)
+++ incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java Tue Jan 31 16:53:40 2012
@@ -88,6 +88,7 @@ public class WikipediaIngester extends C
           columns.add(new Column("fi\0" + family));
         }
         TextIndexCombiner.setColumns(setting, columns);
+        TextIndexCombiner.setLossyness(setting, true);
         
         tops.attachIterator(tableName, setting, EnumSet.allOf(IteratorScope.class));
       }
@@ -102,6 +103,7 @@ public class WikipediaIngester extends C
       // Add the UID combiner
       IteratorSetting setting = new IteratorSetting(19, "UIDAggregator", GlobalIndexUidCombiner.class);
       GlobalIndexUidCombiner.setCombineAllColumns(setting, true);
+      GlobalIndexUidCombiner.setLossyness(setting, true);
       tops.attachIterator(indexTableName, setting, EnumSet.allOf(IteratorScope.class));
     }
     
@@ -110,6 +112,7 @@ public class WikipediaIngester extends C
       // Add the UID combiner
       IteratorSetting setting = new IteratorSetting(19, "UIDAggregator", GlobalIndexUidCombiner.class);
       GlobalIndexUidCombiner.setCombineAllColumns(setting, true);
+      GlobalIndexUidCombiner.setLossyness(setting, true);
       tops.attachIterator(reverseIndexTableName, setting, EnumSet.allOf(IteratorScope.class));
     }
     

Modified: incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java?rev=1238696&r1=1238695&r2=1238696&view=diff
==============================================================================
--- incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java (original)
+++ incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java Tue Jan 31 16:53:40 2012
@@ -139,4 +139,47 @@ public class TextIndexTest {
     Assert.assertTrue(offsets.get(4) == 15);
     Assert.assertTrue(offsets.get(5) == 19);
   }
+  
+  @Test
+  public void testEmptyValue() throws InvalidProtocolBufferException {
+    Builder builder = createBuilder();
+    builder.addWordOffset(13);
+    builder.addWordOffset(15);
+    builder.addWordOffset(19);
+    builder.setNormalizedTermFrequency(0.12f);
+    
+    values.add(new Value("".getBytes()));
+    values.add(new Value(builder.build().toByteArray()));
+    values.add(new Value("".getBytes()));
+    
+    builder = createBuilder();
+    builder.addWordOffset(1);
+    builder.addWordOffset(5);
+    builder.setNormalizedTermFrequency(0.1f);
+    
+    values.add(new Value(builder.build().toByteArray()));
+    values.add(new Value("".getBytes()));
+    
+    builder = createBuilder();
+    builder.addWordOffset(3);
+    builder.setNormalizedTermFrequency(0.05f);
+    
+    values.add(new Value(builder.build().toByteArray()));
+    values.add(new Value("".getBytes()));
+    
+    Value result = combiner.reduce(new Key(), values.iterator());
+    
+    TermWeight.Info info = TermWeight.Info.parseFrom(result.get());
+    
+    Assert.assertTrue(info.getNormalizedTermFrequency() == 0.27f);
+    
+    List<Integer> offsets = info.getWordOffsetList();
+    Assert.assertTrue(offsets.size() == 6);
+    Assert.assertTrue(offsets.get(0) == 1);
+    Assert.assertTrue(offsets.get(1) == 3);
+    Assert.assertTrue(offsets.get(2) == 5);
+    Assert.assertTrue(offsets.get(3) == 13);
+    Assert.assertTrue(offsets.get(4) == 15);
+    Assert.assertTrue(offsets.get(5) == 19);
+  }
 }