You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by bi...@apache.org on 2012/01/30 19:09:05 UTC
svn commit: r1237873 - in /incubator/accumulo/branches/1.4: lib/ext/
src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/aggregator/
src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/i...
Author: billie
Date: Mon Jan 30 18:09:04 2012
New Revision: 1237873
URL: http://svn.apache.org/viewvc?rev=1237873&view=rev
Log:
ACCUMULO-354 replaced example aggregators with combiners
Added:
incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidCombiner.java (with props)
incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexCombiner.java (with props)
incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/
incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java
- copied, changed from r1230153, incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/aggregator/GlobalIndexUidAggregatorTest.java
incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java
- copied, changed from r1230153, incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/aggregator/TextIndexAggregatorTest.java
Removed:
incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/aggregator/
incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/TotalAggregatingIterator.java
Modified:
incubator/accumulo/branches/1.4/lib/ext/ (props changed)
incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java
incubator/accumulo/branches/1.4/src/examples/wikisearch/query-war/src/main/webapp/ui.jsp
incubator/accumulo/branches/1.4/src/examples/wikisearch/query/src/main/java/org/apache/accumulo/examples/wikisearch/query/Query.java
incubator/accumulo/branches/1.4/src/examples/wikisearch/query/src/main/resources/META-INF/ (props changed)
Propchange: incubator/accumulo/branches/1.4/lib/ext/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon Jan 30 18:09:04 2012
@@ -0,0 +1 @@
+*.jar
Modified: incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java?rev=1237873&r1=1237872&r2=1237873&view=diff
==============================================================================
--- incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java (original)
+++ incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java Mon Jan 30 18:09:04 2012
@@ -19,6 +19,7 @@ package org.apache.accumulo.examples.wik
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
+import java.util.EnumSet;
import java.util.List;
import java.util.Set;
import java.util.SortedSet;
@@ -29,15 +30,17 @@ import java.util.regex.Pattern;
import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.IteratorSetting.Column;
import org.apache.accumulo.core.client.TableExistsException;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.accumulo.core.client.admin.TableOperations;
import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
-import org.apache.accumulo.core.conf.Property;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
-import org.apache.accumulo.core.iterators.aggregation.NumSummation;
-import org.apache.accumulo.core.iterators.aggregation.conf.AggregatorConfiguration;
+import org.apache.accumulo.core.iterators.user.SummingCombiner;
+import org.apache.accumulo.examples.wikisearch.iterator.GlobalIndexUidCombiner;
+import org.apache.accumulo.examples.wikisearch.iterator.TextIndexCombiner;
import org.apache.accumulo.examples.wikisearch.reader.AggregatingRecordReader;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
@@ -52,8 +55,6 @@ import org.apache.hadoop.mapreduce.lib.i
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
-
-@SuppressWarnings("deprecation")
public class WikipediaIngester extends Configured implements Tool {
public final static String INGEST_LANGUAGE = "wikipedia.ingest_language";
@@ -74,23 +75,21 @@ public class WikipediaIngester extends C
// create the shard table
if (!tops.exists(tableName)) {
- // Set a text index aggregator on the given field names. No aggregator is set if the option is not supplied
+ // Set a text index combiner on the given field names. No combiner is set if the option is not supplied
String textIndexFamilies = WikipediaMapper.TOKENS_FIELD_NAME;
+ tops.create(tableName);
if (textIndexFamilies.length() > 0) {
- System.out.println("Adding content aggregator on the fields: " + textIndexFamilies);
-
- // Create and set the aggregators in one shot
- List<AggregatorConfiguration> aggregators = new ArrayList<AggregatorConfiguration>();
+ System.out.println("Adding content combiner on the fields: " + textIndexFamilies);
+ IteratorSetting setting = new IteratorSetting(10, TextIndexCombiner.class);
+ List<Column> columns = new ArrayList<Column>();
for (String family : StringUtils.split(textIndexFamilies, ',')) {
- aggregators.add(new AggregatorConfiguration(new Text("fi\0" + family), org.apache.accumulo.examples.wikisearch.aggregator.TextIndexAggregator.class.getName()));
+ columns.add(new Column("fi\0" + family));
}
+ TextIndexCombiner.setColumns(setting, columns);
- tops.create(tableName);
- tops.addAggregators(tableName, aggregators);
- } else {
- tops.create(tableName);
+ tops.attachIterator(tableName, setting, EnumSet.allOf(IteratorScope.class));
}
// Set the locality group for the full content column family
@@ -100,34 +99,27 @@ public class WikipediaIngester extends C
if (!tops.exists(indexTableName)) {
tops.create(indexTableName);
- // Add the UID aggregator
- for (IteratorScope scope : IteratorScope.values()) {
- String stem = String.format("%s%s.%s", Property.TABLE_ITERATOR_PREFIX, scope.name(), "UIDAggregator");
- tops.setProperty(indexTableName, stem, "19,org.apache.accumulo.examples.wikisearch.iterator.TotalAggregatingIterator");
- stem += ".opt.";
- tops.setProperty(indexTableName, stem + "*", "org.apache.accumulo.examples.wikisearch.aggregator.GlobalIndexUidAggregator");
-
- }
+ // Add the UID combiner
+ IteratorSetting setting = new IteratorSetting(19, "UIDAggregator", GlobalIndexUidCombiner.class);
+ GlobalIndexUidCombiner.setCombineAllColumns(setting, true);
+ tops.attachIterator(indexTableName, setting, EnumSet.allOf(IteratorScope.class));
}
if (!tops.exists(reverseIndexTableName)) {
tops.create(reverseIndexTableName);
- // Add the UID aggregator
- for (IteratorScope scope : IteratorScope.values()) {
- String stem = String.format("%s%s.%s", Property.TABLE_ITERATOR_PREFIX, scope.name(), "UIDAggregator");
- tops.setProperty(reverseIndexTableName, stem, "19,org.apache.accumulo.examples.wikisearch.iterator.TotalAggregatingIterator");
- stem += ".opt.";
- tops.setProperty(reverseIndexTableName, stem + "*", "org.apache.accumulo.examples.wikisearch.aggregator.GlobalIndexUidAggregator");
-
- }
+ // Add the UID combiner
+ IteratorSetting setting = new IteratorSetting(19, "UIDAggregator", GlobalIndexUidCombiner.class);
+ GlobalIndexUidCombiner.setCombineAllColumns(setting, true);
+ tops.attachIterator(reverseIndexTableName, setting, EnumSet.allOf(IteratorScope.class));
}
if (!tops.exists(metadataTableName)) {
- // Add the NumSummation aggregator for the frequency column
- List<AggregatorConfiguration> aggregators = new ArrayList<AggregatorConfiguration>();
- aggregators.add(new AggregatorConfiguration(new Text("f"), NumSummation.class.getName()));
+ // Add the SummingCombiner with VARLEN encoding for the frequency column
tops.create(metadataTableName);
- tops.addAggregators(metadataTableName, aggregators);
+ IteratorSetting setting = new IteratorSetting(10, SummingCombiner.class);
+ SummingCombiner.setColumns(setting, Collections.singletonList(new Column("f")));
+ SummingCombiner.setEncodingType(setting, SummingCombiner.Type.VARLEN);
+ tops.attachIterator(metadataTableName, setting, EnumSet.allOf(IteratorScope.class));
}
}
@@ -136,7 +128,7 @@ public class WikipediaIngester extends C
Job job = new Job(getConf(), "Ingest Wikipedia");
Configuration conf = job.getConfiguration();
conf.set("mapred.map.tasks.speculative.execution", "false");
-
+
String tablename = WikipediaConfiguration.getTableName(conf);
String zookeepers = WikipediaConfiguration.getZookeepers(conf);
@@ -171,8 +163,8 @@ public class WikipediaIngester extends C
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Mutation.class);
job.setOutputFormatClass(AccumuloOutputFormat.class);
- AccumuloOutputFormat.setOutputInfo(job, user, password, true, tablename);
- AccumuloOutputFormat.setZooKeeperInstance(job, instanceName, zookeepers);
+ AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), user, password, true, tablename);
+ AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), instanceName, zookeepers);
return job.waitForCompletion(true) ? 0 : 1;
}
Added: incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidCombiner.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidCombiner.java?rev=1237873&view=auto
==============================================================================
--- incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidCombiner.java (added)
+++ incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidCombiner.java Mon Jan 30 18:09:04 2012
@@ -0,0 +1,94 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.wikisearch.iterator;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.IteratorEnvironment;
+import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
+import org.apache.accumulo.core.iterators.TypedValueCombiner;
+import org.apache.accumulo.core.iterators.ValueFormatException;
+import org.apache.accumulo.examples.wikisearch.protobuf.Uid;
+
+import com.google.protobuf.InvalidProtocolBufferException;
+
+/**
+ *
+ */
+public class GlobalIndexUidCombiner extends TypedValueCombiner<Uid.List> {
+ public static final Encoder<Uid.List> UID_LIST_ENCODER = new UidListEncoder();
+ public static final int MAX = 20;
+
+ @Override
+ public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException {
+ super.init(source, options, env);
+ setEncoder(UID_LIST_ENCODER);
+ }
+
+ @Override
+ public Uid.List typedReduce(Key key, Iterator<Uid.List> iter) {
+ Uid.List.Builder builder = Uid.List.newBuilder();
+ HashSet<String> uids = new HashSet<String>();
+ boolean seenIgnore = false;
+ long count = 0;
+ while (iter.hasNext()) {
+ Uid.List v = iter.next();
+ if (null == v)
+ continue;
+ count = count + v.getCOUNT();
+ if (v.getIGNORE()) {
+ seenIgnore = true;
+ }
+ uids.addAll(v.getUIDList());
+ }
+ // Special case logic
+ // If we have aggregated more than MAX UIDs, then null out the UID list and set IGNORE to true
+ // However, always maintain the count
+ builder.setCOUNT(count);
+ if (uids.size() > MAX || seenIgnore) {
+ builder.setIGNORE(true);
+ builder.clearUID();
+ } else {
+ builder.setIGNORE(false);
+ builder.addAllUID(uids);
+ }
+ return builder.build();
+ }
+
+ public static class UidListEncoder implements Encoder<Uid.List> {
+ @Override
+ public byte[] encode(Uid.List v) {
+ return v.toByteArray();
+ }
+
+ @Override
+ public Uid.List decode(byte[] b) {
+ if (b.length == 0)
+ return null;
+ try {
+ return Uid.List.parseFrom(b);
+ } catch (InvalidProtocolBufferException e) {
+ throw new ValueFormatException("Value passed to aggregator was not of type Uid.List");
+ }
+ }
+ }
+}
Propchange: incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidCombiner.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexCombiner.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexCombiner.java?rev=1237873&view=auto
==============================================================================
--- incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexCombiner.java (added)
+++ incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexCombiner.java Mon Jan 30 18:09:04 2012
@@ -0,0 +1,102 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.wikisearch.iterator;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.IteratorEnvironment;
+import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
+import org.apache.accumulo.core.iterators.TypedValueCombiner;
+import org.apache.accumulo.core.iterators.ValueFormatException;
+import org.apache.accumulo.examples.wikisearch.protobuf.TermWeight;
+
+import com.google.protobuf.InvalidProtocolBufferException;
+
+/**
+ *
+ */
+public class TextIndexCombiner extends TypedValueCombiner<TermWeight.Info> {
+ public static final Encoder<TermWeight.Info> TERMWEIGHT_INFO_ENCODER = new TermWeightInfoEncoder();
+
+ @Override
+ public TermWeight.Info typedReduce(Key key, Iterator<TermWeight.Info> iter) {
+ TermWeight.Info.Builder builder = TermWeight.Info.newBuilder();
+ List<Integer> offsets = new ArrayList<Integer>();
+ float normalizedTermFrequency = 0f;
+
+ while (iter.hasNext()) {
+ TermWeight.Info info = iter.next();
+ if (null == info)
+ continue;
+
+ // Add each offset into the list maintaining sorted order
+ for (int offset : info.getWordOffsetList()) {
+ int pos = Collections.binarySearch(offsets, offset);
+
+ if (pos < 0) {
+ // Undo the transform on the insertion point
+ offsets.add((-1 * pos) - 1, offset);
+ } else {
+ offsets.add(pos, offset);
+ }
+ }
+
+ if (info.getNormalizedTermFrequency() > 0) {
+ normalizedTermFrequency += info.getNormalizedTermFrequency();
+ }
+ }
+
+ // Keep the sorted order we tried to maintain
+ for (int i = 0; i < offsets.size(); ++i) {
+ builder.addWordOffset(offsets.get(i));
+ }
+
+ builder.setNormalizedTermFrequency(normalizedTermFrequency);
+ return builder.build();
+ }
+
+ @Override
+ public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException {
+ super.init(source, options, env);
+ setEncoder(TERMWEIGHT_INFO_ENCODER);
+ }
+
+ public static class TermWeightInfoEncoder implements Encoder<TermWeight.Info> {
+ @Override
+ public byte[] encode(TermWeight.Info v) {
+ return v.toByteArray();
+ }
+
+ @Override
+ public TermWeight.Info decode(byte[] b) {
+ if (b.length == 0)
+ return null;
+ try {
+ return TermWeight.Info.parseFrom(b);
+ } catch (InvalidProtocolBufferException e) {
+ throw new ValueFormatException("Value passed to aggregator was not of type TermWeight.Info");
+ }
+ }
+ }
+}
Propchange: incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexCombiner.java
------------------------------------------------------------------------------
svn:eol-style = native
Copied: incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java (from r1230153, incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/aggregator/GlobalIndexUidAggregatorTest.java)
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java?p2=incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java&p1=incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/aggregator/GlobalIndexUidAggregatorTest.java&r1=1230153&r2=1237873&rev=1237873&view=diff
==============================================================================
--- incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/aggregator/GlobalIndexUidAggregatorTest.java (original)
+++ incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java Mon Jan 30 18:09:04 2012
@@ -14,49 +14,58 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.accumulo.examples.wikisearch.aggregator;
+package org.apache.accumulo.examples.wikisearch.iterator;
+
+import static org.junit.Assert.assertTrue;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
import java.util.UUID;
-import junit.framework.TestCase;
-
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.iterators.aggregation.Aggregator;
-import org.apache.accumulo.examples.wikisearch.aggregator.GlobalIndexUidAggregator;
+import org.apache.accumulo.core.iterators.Combiner;
import org.apache.accumulo.examples.wikisearch.protobuf.Uid;
import org.apache.accumulo.examples.wikisearch.protobuf.Uid.List.Builder;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
+import org.junit.Before;
+import org.junit.Test;
-
-@SuppressWarnings("deprecation")
-public class GlobalIndexUidAggregatorTest extends TestCase {
-
- Aggregator agg = new GlobalIndexUidAggregator();
+public class GlobalIndexUidTest {
+ private GlobalIndexUidCombiner combiner;
+ private List<Value> values;
+
+ @Before
+ public void setup() throws Exception {
+ combiner = new GlobalIndexUidCombiner();
+ combiner.init(null, Collections.singletonMap("all", "true"), null);
+ values = new ArrayList<Value>();
+ }
private Uid.List.Builder createNewUidList() {
return Uid.List.newBuilder();
}
+ @Test
public void testSingleUid() {
- agg.reset();
Builder b = createNewUidList();
b.setCOUNT(1);
b.setIGNORE(false);
b.addUID(UUID.randomUUID().toString());
Uid.List uidList = b.build();
Value val = new Value(uidList.toByteArray());
- agg.collect(val);
- Value result = agg.aggregate();
+ values.add(val);
+ Value result = combiner.reduce(new Key(), values.iterator());
assertTrue(val.compareTo(result.get()) == 0);
}
+ @Test
public void testLessThanMax() throws Exception {
- agg.reset();
List<String> savedUUIDs = new ArrayList<String>();
- for (int i = 0; i < GlobalIndexUidAggregator.MAX - 1; i++) {
+ for (int i = 0; i < GlobalIndexUidCombiner.MAX - 1; i++) {
Builder b = createNewUidList();
b.setIGNORE(false);
String uuid = UUID.randomUUID().toString();
@@ -65,21 +74,21 @@ public class GlobalIndexUidAggregatorTes
b.addUID(uuid);
Uid.List uidList = b.build();
Value val = new Value(uidList.toByteArray());
- agg.collect(val);
+ values.add(val);
}
- Value result = agg.aggregate();
+ Value result = combiner.reduce(new Key(), values.iterator());
Uid.List resultList = Uid.List.parseFrom(result.get());
assertTrue(resultList.getIGNORE() == false);
- assertTrue(resultList.getUIDCount() == (GlobalIndexUidAggregator.MAX - 1));
+ assertTrue(resultList.getUIDCount() == (GlobalIndexUidCombiner.MAX - 1));
List<String> resultListUUIDs = resultList.getUIDList();
for (String s : savedUUIDs)
assertTrue(resultListUUIDs.contains(s));
}
+ @Test
public void testEqualsMax() throws Exception {
- agg.reset();
List<String> savedUUIDs = new ArrayList<String>();
- for (int i = 0; i < GlobalIndexUidAggregator.MAX; i++) {
+ for (int i = 0; i < GlobalIndexUidCombiner.MAX; i++) {
Builder b = createNewUidList();
b.setIGNORE(false);
String uuid = UUID.randomUUID().toString();
@@ -88,21 +97,21 @@ public class GlobalIndexUidAggregatorTes
b.addUID(uuid);
Uid.List uidList = b.build();
Value val = new Value(uidList.toByteArray());
- agg.collect(val);
+ values.add(val);
}
- Value result = agg.aggregate();
+ Value result = combiner.reduce(new Key(), values.iterator());
Uid.List resultList = Uid.List.parseFrom(result.get());
assertTrue(resultList.getIGNORE() == false);
- assertTrue(resultList.getUIDCount() == (GlobalIndexUidAggregator.MAX));
+ assertTrue(resultList.getUIDCount() == (GlobalIndexUidCombiner.MAX));
List<String> resultListUUIDs = resultList.getUIDList();
for (String s : savedUUIDs)
assertTrue(resultListUUIDs.contains(s));
}
+ @Test
public void testMoreThanMax() throws Exception {
- agg.reset();
List<String> savedUUIDs = new ArrayList<String>();
- for (int i = 0; i < GlobalIndexUidAggregator.MAX + 10; i++) {
+ for (int i = 0; i < GlobalIndexUidCombiner.MAX + 10; i++) {
Builder b = createNewUidList();
b.setIGNORE(false);
String uuid = UUID.randomUUID().toString();
@@ -111,51 +120,56 @@ public class GlobalIndexUidAggregatorTes
b.addUID(uuid);
Uid.List uidList = b.build();
Value val = new Value(uidList.toByteArray());
- agg.collect(val);
+ values.add(val);
}
- Value result = agg.aggregate();
+ Value result = combiner.reduce(new Key(), values.iterator());
Uid.List resultList = Uid.List.parseFrom(result.get());
assertTrue(resultList.getIGNORE() == true);
assertTrue(resultList.getUIDCount() == 0);
- assertTrue(resultList.getCOUNT() == (GlobalIndexUidAggregator.MAX + 10));
+ assertTrue(resultList.getCOUNT() == (GlobalIndexUidCombiner.MAX + 10));
}
+ @Test
public void testSeenIgnore() throws Exception {
- agg.reset();
Builder b = createNewUidList();
b.setIGNORE(true);
b.setCOUNT(0);
Uid.List uidList = b.build();
Value val = new Value(uidList.toByteArray());
- agg.collect(val);
+ values.add(val);
b = createNewUidList();
b.setIGNORE(false);
b.setCOUNT(1);
b.addUID(UUID.randomUUID().toString());
uidList = b.build();
val = new Value(uidList.toByteArray());
- agg.collect(val);
- Value result = agg.aggregate();
+ values.add(val);
+ Value result = combiner.reduce(new Key(), values.iterator());
Uid.List resultList = Uid.List.parseFrom(result.get());
assertTrue(resultList.getIGNORE() == true);
assertTrue(resultList.getUIDCount() == 0);
assertTrue(resultList.getCOUNT() == 1);
}
+ @Test
public void testInvalidValueType() throws Exception {
- Logger.getLogger(GlobalIndexUidAggregator.class).setLevel(Level.OFF);
- agg.reset();
+ Combiner comb = new GlobalIndexUidCombiner();
+ IteratorSetting setting = new IteratorSetting(1, GlobalIndexUidCombiner.class);
+ GlobalIndexUidCombiner.setCombineAllColumns(setting, true);
+ GlobalIndexUidCombiner.setLossyness(setting, true);
+ comb.init(null, setting.getProperties(), null);
+ Logger.getLogger(GlobalIndexUidCombiner.class).setLevel(Level.OFF);
Value val = new Value(UUID.randomUUID().toString().getBytes());
- agg.collect(val);
- Value result = agg.aggregate();
+ values.add(val);
+ Value result = comb.reduce(new Key(), values.iterator());
Uid.List resultList = Uid.List.parseFrom(result.get());
assertTrue(resultList.getIGNORE() == false);
assertTrue(resultList.getUIDCount() == 0);
assertTrue(resultList.getCOUNT() == 0);
}
+ @Test
public void testCount() throws Exception {
- agg.reset();
UUID uuid = UUID.randomUUID();
// Collect the same UUID five times.
for (int i = 0; i < 5; i++) {
@@ -165,9 +179,9 @@ public class GlobalIndexUidAggregatorTes
b.addUID(uuid.toString());
Uid.List uidList = b.build();
Value val = new Value(uidList.toByteArray());
- agg.collect(val);
+ values.add(val);
}
- Value result = agg.aggregate();
+ Value result = combiner.reduce(new Key(), values.iterator());
Uid.List resultList = Uid.List.parseFrom(result.get());
assertTrue(resultList.getIGNORE() == false);
assertTrue(resultList.getUIDCount() == 1);
Copied: incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java (from r1230153, incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/aggregator/TextIndexAggregatorTest.java)
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java?p2=incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java&p1=incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/aggregator/TextIndexAggregatorTest.java&r1=1230153&r2=1237873&rev=1237873&view=diff
==============================================================================
--- incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/aggregator/TextIndexAggregatorTest.java (original)
+++ incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java Mon Jan 30 18:09:04 2012
@@ -14,29 +14,33 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.accumulo.examples.wikisearch.aggregator;
+package org.apache.accumulo.examples.wikisearch.iterator;
+import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
import junit.framework.Assert;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
-
+import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.examples.wikisearch.aggregator.TextIndexAggregator;
import org.apache.accumulo.examples.wikisearch.protobuf.TermWeight;
import org.apache.accumulo.examples.wikisearch.protobuf.TermWeight.Info.Builder;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
import com.google.protobuf.InvalidProtocolBufferException;
-public class TextIndexAggregatorTest {
- private TextIndexAggregator aggregator;
+public class TextIndexTest {
+ private TextIndexCombiner combiner;
+ private List<Value> values;
@Before
public void setup() throws Exception {
- aggregator = new TextIndexAggregator();
+ combiner = new TextIndexCombiner();
+ combiner.init(null, Collections.singletonMap("all", "true"), null);
+ values = new ArrayList<Value>();
}
@After
@@ -50,15 +54,14 @@ public class TextIndexAggregatorTest {
@Test
public void testSingleValue() throws InvalidProtocolBufferException {
- aggregator = new TextIndexAggregator();
Builder builder = createBuilder();
builder.addWordOffset(1);
builder.addWordOffset(5);
builder.setNormalizedTermFrequency(0.1f);
- aggregator.collect(new Value(builder.build().toByteArray()));
+ values.add(new Value(builder.build().toByteArray()));
- Value result = aggregator.aggregate();
+ Value result = combiner.reduce(new Key(), values.iterator());
TermWeight.Info info = TermWeight.Info.parseFrom(result.get());
@@ -72,21 +75,20 @@ public class TextIndexAggregatorTest {
@Test
public void testAggregateTwoValues() throws InvalidProtocolBufferException {
- aggregator = new TextIndexAggregator();
Builder builder = createBuilder();
builder.addWordOffset(1);
builder.addWordOffset(5);
builder.setNormalizedTermFrequency(0.1f);
- aggregator.collect(new Value(builder.build().toByteArray()));
+ values.add(new Value(builder.build().toByteArray()));
builder = createBuilder();
builder.addWordOffset(3);
builder.setNormalizedTermFrequency(0.05f);
- aggregator.collect(new Value(builder.build().toByteArray()));
+ values.add(new Value(builder.build().toByteArray()));
- Value result = aggregator.aggregate();
+ Value result = combiner.reduce(new Key(), values.iterator());
TermWeight.Info info = TermWeight.Info.parseFrom(result.get());
@@ -101,30 +103,28 @@ public class TextIndexAggregatorTest {
@Test
public void testAggregateManyValues() throws InvalidProtocolBufferException {
- aggregator = new TextIndexAggregator();
-
Builder builder = createBuilder();
builder.addWordOffset(13);
builder.addWordOffset(15);
builder.addWordOffset(19);
builder.setNormalizedTermFrequency(0.12f);
- aggregator.collect(new Value(builder.build().toByteArray()));
+ values.add(new Value(builder.build().toByteArray()));
builder = createBuilder();
builder.addWordOffset(1);
builder.addWordOffset(5);
builder.setNormalizedTermFrequency(0.1f);
- aggregator.collect(new Value(builder.build().toByteArray()));
+ values.add(new Value(builder.build().toByteArray()));
builder = createBuilder();
builder.addWordOffset(3);
builder.setNormalizedTermFrequency(0.05f);
- aggregator.collect(new Value(builder.build().toByteArray()));
+ values.add(new Value(builder.build().toByteArray()));
- Value result = aggregator.aggregate();
+ Value result = combiner.reduce(new Key(), values.iterator());
TermWeight.Info info = TermWeight.Info.parseFrom(result.get());
Modified: incubator/accumulo/branches/1.4/src/examples/wikisearch/query-war/src/main/webapp/ui.jsp
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.4/src/examples/wikisearch/query-war/src/main/webapp/ui.jsp?rev=1237873&r1=1237872&r2=1237873&view=diff
==============================================================================
--- incubator/accumulo/branches/1.4/src/examples/wikisearch/query-war/src/main/webapp/ui.jsp (original)
+++ incubator/accumulo/branches/1.4/src/examples/wikisearch/query-war/src/main/webapp/ui.jsp Mon Jan 30 18:09:04 2012
@@ -62,6 +62,8 @@
<p>The search syntax is boolean logic, for example: TEXT == 'boy' and TITLE =~ 'Autism'. The supported operators are:
==, !=, <, >, ≤, ≥, =~, and !~. Likewise grouping can be performed using parentheses and predicates can be
joined using and, or, and not.
+ <p>To highlight the cell-level access control of Apache Accumulo, the "authorization" required for a particular cell is the language
+ of the associated wikipedia article.
</div>
<div id="d">
<form id="FORM" name="queryForm" method="get" target="results" onsubmit="return setAction()">
@@ -73,6 +75,8 @@
</div>
<br />
<div class="center_input">
+ <label>Authorizations: </label>
+ <br />
<label>All</label><input type="checkbox" name="auths" value="all" />
</div>
<div class="center_input">
Modified: incubator/accumulo/branches/1.4/src/examples/wikisearch/query/src/main/java/org/apache/accumulo/examples/wikisearch/query/Query.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.4/src/examples/wikisearch/query/src/main/java/org/apache/accumulo/examples/wikisearch/query/Query.java?rev=1237873&r1=1237872&r2=1237873&view=diff
==============================================================================
--- incubator/accumulo/branches/1.4/src/examples/wikisearch/query/src/main/java/org/apache/accumulo/examples/wikisearch/query/Query.java (original)
+++ incubator/accumulo/branches/1.4/src/examples/wikisearch/query/src/main/java/org/apache/accumulo/examples/wikisearch/query/Query.java Mon Jan 30 18:09:04 2012
@@ -40,7 +40,6 @@ import javax.xml.transform.TransformerFa
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
-
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.Instance;
import org.apache.accumulo.core.client.ZooKeeperInstance;
@@ -49,7 +48,6 @@ import org.apache.accumulo.examples.wiki
import org.apache.accumulo.examples.wikisearch.sample.Results;
import org.apache.log4j.Logger;
-
@Stateless
@Local(IQuery.class)
public class Query implements IQuery {
@@ -192,12 +190,9 @@ public class Query implements IQuery {
// Create list of auths
List<String> authorizations = new ArrayList<String>();
- if (auths == null || "".equals(auths)) {
- authorizations.add("all");
- } else {
+ if (auths != null && auths.length() > 0)
for (String a : auths.split(","))
authorizations.add(a);
- }
ContentLogic table = new ContentLogic();
table.setTableName(tableName);
return table.runQuery(connector, query, authorizations);
@@ -229,8 +224,9 @@ public class Query implements IQuery {
// Create list of auths
List<String> authorizations = new ArrayList<String>();
- for (String a : auths.split(","))
- authorizations.add(a);
+ if (auths != null && auths.length() > 0)
+ for (String a : auths.split(","))
+ authorizations.add(a);
QueryLogic table = new QueryLogic();
table.setTableName(tableName);
Propchange: incubator/accumulo/branches/1.4/src/examples/wikisearch/query/src/main/resources/META-INF/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon Jan 30 18:09:04 2012
@@ -0,0 +1 @@
+ejb-jar.xml