You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by bi...@apache.org on 2012/01/31 17:53:40 UTC
svn commit: r1238696 - in /incubator/accumulo/branches/1.4/src:
core/src/main/java/org/apache/accumulo/core/iterators/
examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/
examples/wikisearch/ingest/src/test/java/org...
Author: billie
Date: Tue Jan 31 16:53:40 2012
New Revision: 1238696
URL: http://svn.apache.org/viewvc?rev=1238696&view=rev
Log:
ACCUMULO-354 added boolean instead of null to detect presence of next value
Modified:
incubator/accumulo/branches/1.4/src/core/src/main/java/org/apache/accumulo/core/iterators/TypedValueCombiner.java
incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java
incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java
Modified: incubator/accumulo/branches/1.4/src/core/src/main/java/org/apache/accumulo/core/iterators/TypedValueCombiner.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.4/src/core/src/main/java/org/apache/accumulo/core/iterators/TypedValueCombiner.java?rev=1238696&r1=1238695&r2=1238696&view=diff
==============================================================================
--- incubator/accumulo/branches/1.4/src/core/src/main/java/org/apache/accumulo/core/iterators/TypedValueCombiner.java (original)
+++ incubator/accumulo/branches/1.4/src/core/src/main/java/org/apache/accumulo/core/iterators/TypedValueCombiner.java Tue Jan 31 16:53:40 2012
@@ -40,7 +40,7 @@ public abstract class TypedValueCombiner
private boolean lossy = false;
protected static final String LOSSY = "lossy";
-
+
/**
* A Java Iterator that translates an Iterator<Value> to an Iterator<V> using the decode method of an Encoder.
*/
@@ -68,19 +68,19 @@ public abstract class TypedValueCombiner
}
V next = null;
+ boolean hasNext = false;
+
@Override
public boolean hasNext() {
- if (next != null)
+ if (hasNext)
return true;
-
- while (true)
- {
+
+ while (true) {
if (!source.hasNext())
return false;
- try
- {
+ try {
next = encoder.decode(source.next().get());
- return true;
+ return hasNext = true;
} catch (ValueFormatException vfe) {
if (!lossy)
throw vfe;
@@ -90,10 +90,11 @@ public abstract class TypedValueCombiner
@Override
public V next() {
- if (!hasNext())
+ if (!hasNext && !hasNext())
throw new NoSuchElementException();
V toRet = next;
next = null;
+ hasNext = false;
return toRet;
}
@@ -192,7 +193,7 @@ public abstract class TypedValueCombiner
super.init(source, options, env);
setLossyness(options);
}
-
+
private void setLossyness(Map<String,String> options) {
String loss = options.get(LOSSY);
if (loss == null)
@@ -214,7 +215,7 @@ public abstract class TypedValueCombiner
setLossyness(options);
return true;
}
-
+
/**
* A convenience method to set the "lossy" option on a TypedValueCombiner. If true, the combiner will ignore any values which fail to decode. Otherwise, the
* combiner will throw an error which will interrupt the action (and prevent potential data loss). False is the default behavior.
Modified: incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java?rev=1238696&r1=1238695&r2=1238696&view=diff
==============================================================================
--- incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java (original)
+++ incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java Tue Jan 31 16:53:40 2012
@@ -88,6 +88,7 @@ public class WikipediaIngester extends C
columns.add(new Column("fi\0" + family));
}
TextIndexCombiner.setColumns(setting, columns);
+ TextIndexCombiner.setLossyness(setting, true);
tops.attachIterator(tableName, setting, EnumSet.allOf(IteratorScope.class));
}
@@ -102,6 +103,7 @@ public class WikipediaIngester extends C
// Add the UID combiner
IteratorSetting setting = new IteratorSetting(19, "UIDAggregator", GlobalIndexUidCombiner.class);
GlobalIndexUidCombiner.setCombineAllColumns(setting, true);
+ GlobalIndexUidCombiner.setLossyness(setting, true);
tops.attachIterator(indexTableName, setting, EnumSet.allOf(IteratorScope.class));
}
@@ -110,6 +112,7 @@ public class WikipediaIngester extends C
// Add the UID combiner
IteratorSetting setting = new IteratorSetting(19, "UIDAggregator", GlobalIndexUidCombiner.class);
GlobalIndexUidCombiner.setCombineAllColumns(setting, true);
+ GlobalIndexUidCombiner.setLossyness(setting, true);
tops.attachIterator(reverseIndexTableName, setting, EnumSet.allOf(IteratorScope.class));
}
Modified: incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java?rev=1238696&r1=1238695&r2=1238696&view=diff
==============================================================================
--- incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java (original)
+++ incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java Tue Jan 31 16:53:40 2012
@@ -139,4 +139,47 @@ public class TextIndexTest {
Assert.assertTrue(offsets.get(4) == 15);
Assert.assertTrue(offsets.get(5) == 19);
}
+
+ @Test
+ public void testEmptyValue() throws InvalidProtocolBufferException {
+ Builder builder = createBuilder();
+ builder.addWordOffset(13);
+ builder.addWordOffset(15);
+ builder.addWordOffset(19);
+ builder.setNormalizedTermFrequency(0.12f);
+
+ values.add(new Value("".getBytes()));
+ values.add(new Value(builder.build().toByteArray()));
+ values.add(new Value("".getBytes()));
+
+ builder = createBuilder();
+ builder.addWordOffset(1);
+ builder.addWordOffset(5);
+ builder.setNormalizedTermFrequency(0.1f);
+
+ values.add(new Value(builder.build().toByteArray()));
+ values.add(new Value("".getBytes()));
+
+ builder = createBuilder();
+ builder.addWordOffset(3);
+ builder.setNormalizedTermFrequency(0.05f);
+
+ values.add(new Value(builder.build().toByteArray()));
+ values.add(new Value("".getBytes()));
+
+ Value result = combiner.reduce(new Key(), values.iterator());
+
+ TermWeight.Info info = TermWeight.Info.parseFrom(result.get());
+
+ Assert.assertTrue(info.getNormalizedTermFrequency() == 0.27f);
+
+ List<Integer> offsets = info.getWordOffsetList();
+ Assert.assertTrue(offsets.size() == 6);
+ Assert.assertTrue(offsets.get(0) == 1);
+ Assert.assertTrue(offsets.get(1) == 3);
+ Assert.assertTrue(offsets.get(2) == 5);
+ Assert.assertTrue(offsets.get(3) == 13);
+ Assert.assertTrue(offsets.get(4) == 15);
+ Assert.assertTrue(offsets.get(5) == 19);
+ }
}