You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by kt...@apache.org on 2017/11/14 21:59:36 UTC
[accumulo] branch master updated: ACCUMULO-4730 Added
EntryLengthSummarizer
This is an automated email from the ASF dual-hosted git repository.
kturner pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/master by this push:
new 9cd4be0 ACCUMULO-4730 Added EntryLengthSummarizer
9cd4be0 is described below
commit 9cd4be0432c7b0297d86b19ddeac64ed0feaea87
Author: jkrdev <jk...@gmail.com>
AuthorDate: Wed Nov 1 18:31:12 2017 +0000
ACCUMULO-4730 Added EntryLengthSummarizer
---
.../summary/summarizers/EntryLengthSummarizer.java | 147 +++
.../summarizers/EntryLengthSummarizersTest.java | 1149 ++++++++++++++++++++
2 files changed, 1296 insertions(+)
diff --git a/core/src/main/java/org/apache/accumulo/core/client/summary/summarizers/EntryLengthSummarizer.java b/core/src/main/java/org/apache/accumulo/core/client/summary/summarizers/EntryLengthSummarizer.java
new file mode 100644
index 0000000..10fb9d9
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/client/summary/summarizers/EntryLengthSummarizer.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License");you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.summary.summarizers;
+
+import java.math.RoundingMode;
+import java.util.Map;
+import java.util.function.BiFunction;
+
+import org.apache.accumulo.core.client.summary.Summarizer;
+import org.apache.accumulo.core.client.summary.SummarizerConfiguration;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+
+import com.google.common.math.IntMath;
+
+/**
+ * Summarizer that computes summary information about field lengths. Specifically key length, row length, family length, qualifier length, visibility length,
+ * and value length. Incrementally computes minimum, maximum, count, sum, and log2 histogram of the lengths.
+ *
+ * @since 2.0.0
+ */
+public class EntryLengthSummarizer implements Summarizer {
+
+ /* Helper function that calculates the various statistics that is used for the Collector methods. */
+ private static class LengthStats {
+ private long min = Long.MAX_VALUE;
+ private long max = Long.MIN_VALUE;
+ private long sum = 0;
+ private long[] counts = new long[32];
+
+ private void accept(int length) {
+ int idx;
+
+ if (length < min) {
+ min = length;
+ }
+
+ if (length > max) {
+ max = length;
+ }
+
+ sum += length;
+
+ if (length == 0) {
+ idx = 0;
+ } else {
+ idx = IntMath.log2(length, RoundingMode.HALF_UP);
+ }
+
+ counts[idx]++;
+ }
+
+ void summarize(String prefix, StatisticConsumer sc) {
+ sc.accept(prefix + ".min", (min != Long.MAX_VALUE ? min : 0));
+ sc.accept(prefix + ".max", (max != Long.MIN_VALUE ? max : 0));
+ sc.accept(prefix + ".sum", sum);
+
+ for (int i = 0; i < counts.length; i++) {
+ if (counts[i] > 0) {
+ sc.accept(prefix + ".logHist." + i, counts[i]);
+ }
+ }
+ }
+
+ }
+
+ /* Helper functions for merging that is used by the Combiner. */
+ private static void merge(String key, BiFunction<Long,Long,Long> mergeFunc, Map<String,Long> stats1, Map<String,Long> stats2) {
+ Long mergeVal = stats2.get(key);
+
+ if (mergeVal != null) {
+ stats1.merge(key, mergeVal, mergeFunc);
+ }
+ }
+
+ private static void merge(String prefix, Map<String, Long> stats1, Map<String,Long> stats2) {
+ merge(prefix+".min", Long::min, stats1, stats2);
+ merge(prefix+".max", Long::max, stats1, stats2);
+ merge(prefix+".sum", Long::sum, stats1, stats2);
+ for (int i = 0; i < 32; i++) {
+ merge(prefix+".logHist."+i, Long::sum, stats1, stats2);
+ }
+ }
+
+ @Override
+ public Collector collector(SummarizerConfiguration sc) {
+ return new Collector() {
+
+ private LengthStats keyStats = new LengthStats();
+ private LengthStats rowStats = new LengthStats();
+ private LengthStats familyStats = new LengthStats();
+ private LengthStats qualifierStats = new LengthStats();
+ private LengthStats visibilityStats = new LengthStats();
+ private LengthStats valueStats = new LengthStats();
+ private long total = 0;
+
+ @Override
+ public void accept(Key k, Value v) {
+ keyStats.accept(k.getLength());
+ rowStats.accept(k.getRowData().length());
+ familyStats.accept(k.getColumnFamilyData().length());
+ qualifierStats.accept(k.getColumnQualifierData().length());
+ visibilityStats.accept(k.getColumnVisibilityData().length());
+ valueStats.accept(v.getSize());
+ total++;
+ }
+
+ @Override
+ public void summarize(StatisticConsumer sc) {
+ keyStats.summarize("key", sc);
+ rowStats.summarize("row", sc);
+ familyStats.summarize("family", sc);
+ qualifierStats.summarize("qualifier", sc);
+ visibilityStats.summarize("visibility", sc);
+ valueStats.summarize("value", sc);
+ sc.accept("total", total);
+ }
+ };
+ }
+
+ @Override
+ public Combiner combiner(SummarizerConfiguration sc) {
+ return (stats1, stats2) -> {
+ merge("key", stats1, stats2);
+ merge("row", stats1, stats2);
+ merge("family", stats1, stats2);
+ merge("qualifier", stats1, stats2);
+ merge("visibility", stats1, stats2);
+ merge("value", stats1, stats2);
+ stats1.merge("total", stats2.get("total"), Long::sum);
+ };
+ }
+}
diff --git a/core/src/test/java/org/apache/accumulo/core/client/summary/summarizers/EntryLengthSummarizersTest.java b/core/src/test/java/org/apache/accumulo/core/client/summary/summarizers/EntryLengthSummarizersTest.java
new file mode 100644
index 0000000..ff50d87
--- /dev/null
+++ b/core/src/test/java/org/apache/accumulo/core/client/summary/summarizers/EntryLengthSummarizersTest.java
@@ -0,0 +1,1149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License");you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.summary.summarizers;
+
+import java.util.HashMap;
+
+import org.apache.accumulo.core.client.summary.Summarizer.Collector;
+import org.apache.accumulo.core.client.summary.Summarizer.Combiner;
+import org.apache.accumulo.core.client.summary.SummarizerConfiguration;
+import org.apache.accumulo.core.client.summary.summarizers.EntryLengthSummarizer;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class EntryLengthSummarizersTest {
+
+ /* COLLECTOR TEST */
+ /* Basic Test: Each test adds to the next, all are simple lengths. */
+
+ @Test
+ public void testEmpty() {
+ SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build();
+ EntryLengthSummarizer entrySum = new EntryLengthSummarizer();
+
+ Collector collector = entrySum.collector(sc);
+
+ HashMap<String,Long> stats = new HashMap<>();
+ collector.summarize(stats::put);
+
+ HashMap<String,Long> expected = new HashMap<>();
+ expected.put("key.min", 0L);
+ expected.put("key.max", 0L);
+ expected.put("key.sum", 0L);
+
+ expected.put("row.min", 0L);
+ expected.put("row.max", 0L);
+ expected.put("row.sum", 0L);
+
+ expected.put("family.min", 0L);
+ expected.put("family.max", 0L);
+ expected.put("family.sum", 0L);
+
+ expected.put("qualifier.min", 0L);
+ expected.put("qualifier.max", 0L);
+ expected.put("qualifier.sum", 0L);
+
+ expected.put("visibility.min", 0L);
+ expected.put("visibility.max", 0L);
+ expected.put("visibility.sum", 0L);
+
+ expected.put("value.min", 0L);
+ expected.put("value.max", 0L);
+ expected.put("value.sum", 0L);
+
+ expected.put("total", 0L);
+
+ Assert.assertEquals(expected, stats);
+ }
+
+ @Test
+ public void testBasicRow() {
+ SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build();
+ EntryLengthSummarizer entrySum = new EntryLengthSummarizer();
+
+ Key k1 = new Key("r1");
+ Key k2 = new Key("r2");
+ Key k3 = new Key("r3");
+
+ Collector collector = entrySum.collector(sc);
+ collector.accept(k1, new Value(""));
+ collector.accept(k2, new Value(""));
+ collector.accept(k3, new Value(""));
+
+ HashMap<String,Long> stats = new HashMap<>();
+ collector.summarize(stats::put);
+
+ HashMap<String,Long> expected = new HashMap<>();
+ expected.put("key.min", 2L);
+ expected.put("key.max", 2L);
+ expected.put("key.sum", 6L);
+
+ // Log2 Histogram
+ expected.put("key.logHist.1", 3L);
+
+ expected.put("row.min", 2L);
+ expected.put("row.max", 2L);
+ expected.put("row.sum", 6L);
+
+ // Log2 Histogram
+ expected.put("row.logHist.1", 3L);
+
+ expected.put("family.min", 0L);
+ expected.put("family.max", 0L);
+ expected.put("family.sum", 0L);
+
+ // Log2 Histogram
+ expected.put("family.logHist.0", 3L);
+
+ expected.put("qualifier.min", 0L);
+ expected.put("qualifier.max", 0L);
+ expected.put("qualifier.sum", 0L);
+
+ // Log2 Histogram
+ expected.put("qualifier.logHist.0", 3L);
+
+ expected.put("visibility.min", 0L);
+ expected.put("visibility.max", 0L);
+ expected.put("visibility.sum", 0L);
+
+ // Log2 Histogram
+ expected.put("visibility.logHist.0", 3L);
+
+ expected.put("value.min", 0L);
+ expected.put("value.max", 0L);
+ expected.put("value.sum", 0L);
+
+ // Log2 Histogram
+ expected.put("value.logHist.0", 3L);
+
+ expected.put("total", 3L);
+
+ Assert.assertEquals(expected, stats);
+ }
+
+ @Test
+ public void testBasicFamily() {
+ SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build();
+ EntryLengthSummarizer entrySum = new EntryLengthSummarizer();
+
+ Key k1 = new Key("r1", "f1");
+ Key k2 = new Key("r2", "f2");
+ Key k3 = new Key("r3", "f3");
+
+ Collector collector = entrySum.collector(sc);
+ collector.accept(k1, new Value(""));
+ collector.accept(k2, new Value(""));
+ collector.accept(k3, new Value(""));
+
+ HashMap<String,Long> stats = new HashMap<>();
+ collector.summarize(stats::put);
+
+ HashMap<String,Long> expected = new HashMap<>();
+ expected.put("key.min", 4L);
+ expected.put("key.max", 4L);
+ expected.put("key.sum", 12L);
+
+ // Log2 Histogram
+ expected.put("key.logHist.2", 3L);
+
+ expected.put("row.min", 2L);
+ expected.put("row.max", 2L);
+ expected.put("row.sum", 6L);
+
+ // Log2 Histogram
+ expected.put("row.logHist.1", 3L);
+
+ expected.put("family.min", 2L);
+ expected.put("family.max", 2L);
+ expected.put("family.sum", 6L);
+
+ // Log2 Histogram
+ expected.put("family.logHist.1", 3L);
+
+ expected.put("qualifier.min", 0L);
+ expected.put("qualifier.max", 0L);
+ expected.put("qualifier.sum", 0L);
+
+ // Log2 Histogram
+ expected.put("qualifier.logHist.0", 3L);
+
+ expected.put("visibility.min", 0L);
+ expected.put("visibility.max", 0L);
+ expected.put("visibility.sum", 0L);
+
+ // Log2 Histogram
+ expected.put("visibility.logHist.0", 3L);
+
+ expected.put("value.min", 0L);
+ expected.put("value.max", 0L);
+ expected.put("value.sum", 0L);
+
+ // Log2 Histogram
+ expected.put("value.logHist.0", 3L);
+
+ expected.put("total", 3L);
+
+ Assert.assertEquals(expected, stats);
+ }
+
+ @Test
+ public void testBasicQualifier() {
+ SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build();
+ EntryLengthSummarizer entrySum = new EntryLengthSummarizer();
+
+ Key k1 = new Key("r1", "f1", "q1");
+ Key k2 = new Key("r2", "f2", "q2");
+ Key k3 = new Key("r3", "f3", "q3");
+
+ Collector collector = entrySum.collector(sc);
+ collector.accept(k1, new Value(""));
+ collector.accept(k2, new Value(""));
+ collector.accept(k3, new Value(""));
+
+ HashMap<String,Long> stats = new HashMap<>();
+ collector.summarize(stats::put);
+
+ HashMap<String,Long> expected = new HashMap<>();
+ expected.put("key.min", 6L);
+ expected.put("key.max", 6L);
+ expected.put("key.sum", 18L);
+
+ // Log2 Histogram
+ expected.put("key.logHist.3", 3L);
+
+ expected.put("row.min", 2L);
+ expected.put("row.max", 2L);
+ expected.put("row.sum", 6L);
+
+ // Log2 Histogram
+ expected.put("row.logHist.1", 3L);
+
+ expected.put("family.min", 2L);
+ expected.put("family.max", 2L);
+ expected.put("family.sum", 6L);
+
+ // Log2 Histogram
+ expected.put("family.logHist.1", 3L);
+
+ expected.put("qualifier.min", 2L);
+ expected.put("qualifier.max", 2L);
+ expected.put("qualifier.sum", 6L);
+
+ // Log2 Histogram
+ expected.put("qualifier.logHist.1", 3L);
+
+ expected.put("visibility.min", 0L);
+ expected.put("visibility.max", 0L);
+ expected.put("visibility.sum", 0L);
+
+ // Log2 Histogram
+ expected.put("visibility.logHist.0", 3L);
+
+ expected.put("value.min", 0L);
+ expected.put("value.max", 0L);
+ expected.put("value.sum", 0L);
+
+ // Log2 Histogram
+ expected.put("value.logHist.0", 3L);
+
+ expected.put("total", 3L);
+
+ Assert.assertEquals(expected, stats);
+ }
+
+ @Test
+ public void testBasicVisibility() {
+ SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build();
+ EntryLengthSummarizer entrySum = new EntryLengthSummarizer();
+
+ Key k1 = new Key("r1", "f1", "q1", "v1");
+ Key k2 = new Key("r2", "f2", "q2", "v2");
+ Key k3 = new Key("r3", "f3", "q3", "v3");
+
+ Collector collector = entrySum.collector(sc);
+ collector.accept(k1, new Value(""));
+ collector.accept(k2, new Value(""));
+ collector.accept(k3, new Value(""));
+
+ HashMap<String,Long> stats = new HashMap<>();
+ collector.summarize(stats::put);
+
+ HashMap<String,Long> expected = new HashMap<>();
+ expected.put("key.min", 8L);
+ expected.put("key.max", 8L);
+ expected.put("key.sum", 24L);
+
+ // Log2 Histogram
+ expected.put("key.logHist.3", 3L);
+
+ expected.put("row.min", 2L);
+ expected.put("row.max", 2L);
+ expected.put("row.sum", 6L);
+
+ // Log2 Histogram
+ expected.put("row.logHist.1", 3L);
+
+ expected.put("family.min", 2L);
+ expected.put("family.max", 2L);
+ expected.put("family.sum", 6L);
+
+ // Log2 Histogram
+ expected.put("family.logHist.1", 3L);
+
+ expected.put("qualifier.min", 2L);
+ expected.put("qualifier.max", 2L);
+ expected.put("qualifier.sum", 6L);
+
+ // Log2 Histogram
+ expected.put("qualifier.logHist.1", 3L);
+
+ expected.put("visibility.min", 2L);
+ expected.put("visibility.max", 2L);
+ expected.put("visibility.sum", 6L);
+
+ // Log2 Histogram
+ expected.put("visibility.logHist.1", 3L);
+
+ expected.put("value.min", 0L);
+ expected.put("value.max", 0L);
+ expected.put("value.sum", 0L);
+
+ // Log2 Histogram
+ expected.put("value.logHist.0", 3L);
+
+ expected.put("total", 3L);
+
+ Assert.assertEquals(expected, stats);
+ }
+
+ @Test
+ public void testBasicValue() {
+ SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build();
+ EntryLengthSummarizer entrySum = new EntryLengthSummarizer();
+
+ Key k1 = new Key("r1", "f1", "q1", "v1");
+ Key k2 = new Key("r2", "f2", "q2", "v2");
+ Key k3 = new Key("r3", "f3", "q3", "v3");
+
+ Collector collector = entrySum.collector(sc);
+ collector.accept(k1, new Value("v1"));
+ collector.accept(k2, new Value("v2"));
+ collector.accept(k3, new Value("v3"));
+
+ HashMap<String,Long> stats = new HashMap<>();
+ collector.summarize(stats::put);
+
+ HashMap<String,Long> expected = new HashMap<>();
+ expected.put("key.min", 8L);
+ expected.put("key.max", 8L);
+ expected.put("key.sum", 24L);
+
+ // Log2 Histogram
+ expected.put("key.logHist.3", 3L);
+
+ expected.put("row.min", 2L);
+ expected.put("row.max", 2L);
+ expected.put("row.sum", 6L);
+
+ // Log2 Histogram
+ expected.put("row.logHist.1", 3L);
+
+ expected.put("family.min", 2L);
+ expected.put("family.max", 2L);
+ expected.put("family.sum", 6L);
+
+ // Log2 Histogram
+ expected.put("family.logHist.1", 3L);
+
+ expected.put("qualifier.min", 2L);
+ expected.put("qualifier.max", 2L);
+ expected.put("qualifier.sum", 6L);
+
+ // Log2 Histogram
+ expected.put("qualifier.logHist.1", 3L);
+
+ expected.put("visibility.min", 2L);
+ expected.put("visibility.max", 2L);
+ expected.put("visibility.sum", 6L);
+
+ // Log2 Histogram
+ expected.put("visibility.logHist.1", 3L);
+
+ expected.put("value.min", 2L);
+ expected.put("value.max", 2L);
+ expected.put("value.sum", 6L);
+
+ // Log2 Histogram
+ expected.put("value.logHist.1", 3L);
+
+ expected.put("total", 3L);
+
+ Assert.assertEquals(expected, stats);
+ }
+
+ /* Complex Test: Each test adds to the next, all are mixed lengths. */
+
+ @Test
+ public void testComplexRow() {
+ SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build();
+ EntryLengthSummarizer entrySum = new EntryLengthSummarizer();
+
+ Key k1 = new Key("r1");
+ Key k2 = new Key("row2");
+ Key k3 = new Key("columnRow3");
+
+ Collector collector = entrySum.collector(sc);
+ collector.accept(k1, new Value(""));
+ collector.accept(k2, new Value(""));
+ collector.accept(k3, new Value(""));
+
+ HashMap<String,Long> stats = new HashMap<>();
+ collector.summarize(stats::put);
+
+ HashMap<String,Long> expected = new HashMap<>();
+ expected.put("key.min", 2L);
+ expected.put("key.max", 10L);
+ expected.put("key.sum", 16L);
+
+ // Log2 Histogram
+ expected.put("key.logHist.1", 1L);
+ expected.put("key.logHist.2", 1L);
+ expected.put("key.logHist.3", 1L);
+
+ expected.put("row.min", 2L);
+ expected.put("row.max", 10L);
+ expected.put("row.sum", 16L);
+
+ // Log2 Histogram
+ expected.put("row.logHist.1", 1L);
+ expected.put("row.logHist.2", 1L);
+ expected.put("row.logHist.3", 1L);
+
+ expected.put("family.min", 0L);
+ expected.put("family.max", 0L);
+ expected.put("family.sum", 0L);
+
+ // Log2 Histogram
+ expected.put("family.logHist.0", 3L);
+
+ expected.put("qualifier.min", 0L);
+ expected.put("qualifier.max", 0L);
+ expected.put("qualifier.sum", 0L);
+
+ // Log2 Histogram
+ expected.put("qualifier.logHist.0", 3L);
+
+ expected.put("visibility.min", 0L);
+ expected.put("visibility.max", 0L);
+ expected.put("visibility.sum", 0L);
+
+ // Log2 Histogram
+ expected.put("visibility.logHist.0", 3L);
+
+ expected.put("value.min", 0L);
+ expected.put("value.max", 0L);
+ expected.put("value.sum", 0L);
+
+ // Log2 Histogram
+ expected.put("value.logHist.0", 3L);
+
+ expected.put("total", 3L);
+
+ Assert.assertEquals(expected, stats);
+ }
+
+ @Test
+ public void testComplexFamily() {
+ SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build();
+ EntryLengthSummarizer entrySum = new EntryLengthSummarizer();
+
+ Key k1 = new Key("r1", "family1");
+ Key k2 = new Key("row2", "columnFamily2");
+ Key k3 = new Key("columnRow3", "f3");
+
+ Collector collector = entrySum.collector(sc);
+ collector.accept(k1, new Value(""));
+ collector.accept(k2, new Value(""));
+ collector.accept(k3, new Value(""));
+
+ HashMap<String,Long> stats = new HashMap<>();
+ collector.summarize(stats::put);
+
+ HashMap<String,Long> expected = new HashMap<>();
+ expected.put("key.min", 9L);
+ expected.put("key.max", 17L);
+ expected.put("key.sum", 38L);
+
+ // Log2 Histogram
+ expected.put("key.logHist.3", 1L);
+ expected.put("key.logHist.4", 2L);
+
+ expected.put("row.min", 2L);
+ expected.put("row.max", 10L);
+ expected.put("row.sum", 16L);
+
+ // Log2 Histogram
+ expected.put("row.logHist.1", 1L);
+ expected.put("row.logHist.2", 1L);
+ expected.put("row.logHist.3", 1L);
+
+ expected.put("family.min", 2L);
+ expected.put("family.max", 13L);
+ expected.put("family.sum", 22L);
+
+ // Log2 Histogram
+ expected.put("family.logHist.1", 1L);
+ expected.put("family.logHist.3", 1L);
+ expected.put("family.logHist.4", 1L);
+
+ expected.put("qualifier.min", 0L);
+ expected.put("qualifier.max", 0L);
+ expected.put("qualifier.sum", 0L);
+
+ // Log2 Histogram
+ expected.put("qualifier.logHist.0", 3L);
+
+ expected.put("visibility.min", 0L);
+ expected.put("visibility.max", 0L);
+ expected.put("visibility.sum", 0L);
+
+ // Log2 Histogram
+ expected.put("visibility.logHist.0", 3L);
+
+ expected.put("value.min", 0L);
+ expected.put("value.max", 0L);
+ expected.put("value.sum", 0L);
+
+ // Log2 Histogram
+ expected.put("value.logHist.0", 3L);
+
+ expected.put("total", 3L);
+
+ Assert.assertEquals(expected, stats);
+ }
+
+ @Test
+ public void testComplexQualifier() {
+ SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build();
+ EntryLengthSummarizer entrySum = new EntryLengthSummarizer();
+
+ Key k1 = new Key("r1", "family1", "columnQualifier1");
+ Key k2 = new Key("row2", "columnFamily2", "q2");
+ Key k3 = new Key("columnRow3", "f3", "qualifier3");
+
+ Collector collector = entrySum.collector(sc);
+ collector.accept(k1, new Value(""));
+ collector.accept(k2, new Value(""));
+ collector.accept(k3, new Value(""));
+
+ HashMap<String,Long> stats = new HashMap<>();
+ collector.summarize(stats::put);
+
+ HashMap<String,Long> expected = new HashMap<>();
+ expected.put("key.min", 19L);
+ expected.put("key.max", 25L);
+ expected.put("key.sum", 66L);
+
+ // Log2 Histogram
+ expected.put("key.logHist.4", 2L);
+ expected.put("key.logHist.5", 1L);
+
+ expected.put("row.min", 2L);
+ expected.put("row.max", 10L);
+ expected.put("row.sum", 16L);
+
+ // Log2 Histogram
+ expected.put("row.logHist.1", 1L);
+ expected.put("row.logHist.2", 1L);
+ expected.put("row.logHist.3", 1L);
+
+ expected.put("family.min", 2L);
+ expected.put("family.max", 13L);
+ expected.put("family.sum", 22L);
+
+ // Log2 Histogram
+ expected.put("family.logHist.1", 1L);
+ expected.put("family.logHist.3", 1L);
+ expected.put("family.logHist.4", 1L);
+
+ expected.put("qualifier.min", 2L);
+ expected.put("qualifier.max", 16L);
+ expected.put("qualifier.sum", 28L);
+
+ // Log2 Histogram
+ expected.put("qualifier.logHist.1", 1L);
+ expected.put("qualifier.logHist.3", 1L);
+ expected.put("qualifier.logHist.4", 1L);
+
+ expected.put("visibility.min", 0L);
+ expected.put("visibility.max", 0L);
+ expected.put("visibility.sum", 0L);
+
+ // Log2 Histogram
+ expected.put("visibility.logHist.0", 3L);
+
+ expected.put("value.min", 0L);
+ expected.put("value.max", 0L);
+ expected.put("value.sum", 0L);
+
+ // Log2 Histogram
+ expected.put("value.logHist.0", 3L);
+
+ expected.put("total", 3L);
+
+ Assert.assertEquals(expected, stats);
+ }
+
+ @Test
+ public void testComplexVisibility() {
+ SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build();
+ EntryLengthSummarizer entrySum = new EntryLengthSummarizer();
+
+ Key k1 = new Key("r1", "family1", "columnQualifier1", "v1");
+ Key k2 = new Key("row2", "columnFamily2", "q2", "visibility2");
+ Key k3 = new Key("columnRow3", "f3", "qualifier3", "columnVisibility3");
+
+ Collector collector = entrySum.collector(sc);
+ collector.accept(k1, new Value(""));
+ collector.accept(k2, new Value(""));
+ collector.accept(k3, new Value(""));
+
+ HashMap<String,Long> stats = new HashMap<>();
+ collector.summarize(stats::put);
+
+ HashMap<String,Long> expected = new HashMap<>();
+ expected.put("key.min", 27L);
+ expected.put("key.max", 39L);
+ expected.put("key.sum", 96L);
+
+ // Log2 Histogram
+ expected.put("key.logHist.5", 3L);
+
+ expected.put("row.min", 2L);
+ expected.put("row.max", 10L);
+ expected.put("row.sum", 16L);
+
+ // Log2 Histogram
+ expected.put("row.logHist.1", 1L);
+ expected.put("row.logHist.2", 1L);
+ expected.put("row.logHist.3", 1L);
+
+ expected.put("family.min", 2L);
+ expected.put("family.max", 13L);
+ expected.put("family.sum", 22L);
+
+ // Log2 Histogram
+ expected.put("family.logHist.1", 1L);
+ expected.put("family.logHist.3", 1L);
+ expected.put("family.logHist.4", 1L);
+
+ expected.put("qualifier.min", 2L);
+ expected.put("qualifier.max", 16L);
+ expected.put("qualifier.sum", 28L);
+
+ // Log2 Histogram
+ expected.put("qualifier.logHist.1", 1L);
+ expected.put("qualifier.logHist.3", 1L);
+ expected.put("qualifier.logHist.4", 1L);
+
+ expected.put("visibility.min", 2L);
+ expected.put("visibility.max", 17L);
+ expected.put("visibility.sum", 30L);
+
+ // Log2 Histogram
+ expected.put("visibility.logHist.1", 1L);
+ expected.put("visibility.logHist.3", 1L);
+ expected.put("visibility.logHist.4", 1L);
+
+ expected.put("value.min", 0L);
+ expected.put("value.max", 0L);
+ expected.put("value.sum", 0L);
+
+ // Log2 Histogram
+ expected.put("value.logHist.0", 3L);
+
+ expected.put("total", 3L);
+
+ Assert.assertEquals(expected, stats);
+ }
+
+ @Test
+ public void testComplexValue() {
+ SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build();
+ EntryLengthSummarizer entrySum = new EntryLengthSummarizer();
+
+ Key k1 = new Key("r1", "family1", "columnQualifier1", "v1");
+ Key k2 = new Key("row2", "columnFamily2", "q2", "visibility2");
+ Key k3 = new Key("columnRow3", "f3", "qualifier3", "columnVisibility3");
+
+ Collector collector = entrySum.collector(sc);
+ collector.accept(k1, new Value("v1"));
+ collector.accept(k2, new Value("value2"));
+ collector.accept(k3, new Value("keyValue3"));
+
+ HashMap<String,Long> stats = new HashMap<>();
+ collector.summarize(stats::put);
+
+ HashMap<String,Long> expected = new HashMap<>();
+ expected.put("key.min", 27L);
+ expected.put("key.max", 39L);
+ expected.put("key.sum", 96L);
+
+ // Log2 Histogram
+ expected.put("key.logHist.5", 3L);
+
+ expected.put("row.min", 2L);
+ expected.put("row.max", 10L);
+ expected.put("row.sum", 16L);
+
+ // Log2 Histogram
+ expected.put("row.logHist.1", 1L);
+ expected.put("row.logHist.2", 1L);
+ expected.put("row.logHist.3", 1L);
+
+ expected.put("family.min", 2L);
+ expected.put("family.max", 13L);
+ expected.put("family.sum", 22L);
+
+ // Log2 Histogram
+ expected.put("family.logHist.1", 1L);
+ expected.put("family.logHist.3", 1L);
+ expected.put("family.logHist.4", 1L);
+
+ expected.put("qualifier.min", 2L);
+ expected.put("qualifier.max", 16L);
+ expected.put("qualifier.sum", 28L);
+
+ // Log2 Histogram
+ expected.put("qualifier.logHist.1", 1L);
+ expected.put("qualifier.logHist.3", 1L);
+ expected.put("qualifier.logHist.4", 1L);
+
+ expected.put("visibility.min", 2L);
+ expected.put("visibility.max", 17L);
+ expected.put("visibility.sum", 30L);
+
+ // Log2 Histogram
+ expected.put("visibility.logHist.1", 1L);
+ expected.put("visibility.logHist.3", 1L);
+ expected.put("visibility.logHist.4", 1L);
+
+ expected.put("value.min", 2L);
+ expected.put("value.max", 9L);
+ expected.put("value.sum", 17L);
+
+ // Log2 Histogram
+ expected.put("value.logHist.1", 1L);
+ expected.put("value.logHist.3", 2L);
+
+ expected.put("total", 3L);
+
+ Assert.assertEquals(expected, stats);
+ }
+
+ /* Miscellaneous Test */
+
+ @Test
+ public void testAll() {
+ SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build();
+ EntryLengthSummarizer entrySum = new EntryLengthSummarizer();
+
+ Key k1 = new Key("maximumnoqualifier","f1", "q", "vis1");
+ Key k2 = new Key("minKey","fam2", "q2", "visibility2");
+ Key k3 = new Key("row3","f3", "qualifier3", "v3");
+ Key k4 = new Key("r4", "family4", "qual4", "vis4");
+ Key k5 = new Key("fifthrow", "thirdfamily", "q5", "v5");
+ Key k6 = new Key("r6", "sixthfamily", "qual6", "visibi6");
+
+ Collector collector = entrySum.collector(sc);
+ collector.accept(k1, new Value("v1"));
+ collector.accept(k2, new Value("value2"));
+ collector.accept(k3, new Value("val3"));
+ collector.accept(k4, new Value("fourthvalue"));
+ collector.accept(k5, new Value(""));
+ collector.accept(k6, new Value("value6"));
+
+ HashMap<String,Long> stats = new HashMap<>();
+ collector.summarize(stats::put);
+
+ HashMap<String,Long> expected = new HashMap<>();
+ expected.put("key.min", 18L);
+ expected.put("key.max", 25L);
+ expected.put("key.sum", 132L);
+
+ // Log2 Histogram
+ expected.put("key.logHist.4", 2L);
+ expected.put("key.logHist.5", 4L);
+
+ expected.put("row.min", 2L);
+ expected.put("row.max", 18L);
+ expected.put("row.sum", 40L);
+
+ // Log2 Histogram
+ expected.put("row.logHist.1", 2L);
+ expected.put("row.logHist.2", 1L);
+ expected.put("row.logHist.3", 2L);
+ expected.put("row.logHist.4", 1L);
+
+ expected.put("family.min", 2L);
+ expected.put("family.max", 11L);
+ expected.put("family.sum", 37L);
+
+ // Log2 Histogram
+ expected.put("family.logHist.1", 2L);
+ expected.put("family.logHist.2", 1L);
+ expected.put("family.logHist.3", 3L);
+
+ expected.put("qualifier.min", 1L);
+ expected.put("qualifier.max", 10L);
+ expected.put("qualifier.sum", 25L);
+
+ // Log2 Histogram
+ expected.put("qualifier.logHist.0", 1L);
+ expected.put("qualifier.logHist.1", 2L);
+ expected.put("qualifier.logHist.2", 2L);
+ expected.put("qualifier.logHist.3", 1L);
+
+ expected.put("visibility.min", 2L);
+ expected.put("visibility.max", 11L);
+ expected.put("visibility.sum", 30L);
+
+ // Log2 Histogram
+ expected.put("visibility.logHist.1", 2L);
+ expected.put("visibility.logHist.2", 2L);
+ expected.put("visibility.logHist.3", 2L);
+
+ expected.put("value.min", 0L);
+ expected.put("value.max", 11L);
+ expected.put("value.sum", 29L);
+
+ // Log2 Histogram
+ expected.put("value.logHist.0", 1L);
+ expected.put("value.logHist.1", 1L);
+ expected.put("value.logHist.2", 1L);
+ expected.put("value.logHist.3", 3L);
+
+ expected.put("total", 6L);
+
+ Assert.assertEquals(expected, stats);
+ }
+
+ @Test
+ public void testLog2Histogram() {
+ SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build();
+ EntryLengthSummarizer entrySum = new EntryLengthSummarizer();
+
+ Key k1 = new Key("row1");
+ Key k2 = new Key("row2");
+ Key k3 = new Key("row3");
+
+ Collector collector = entrySum.collector(sc);
+ collector.accept(k1, new Value("01"));
+ collector.accept(k2, new Value("012345678"));
+ collector.accept(k3, new Value("012345679"));
+
+ HashMap<String,Long> stats = new HashMap<>();
+ collector.summarize(stats::put);
+
+ HashMap<String,Long> expected = new HashMap<>();
+ expected.put("key.min", 4L);
+ expected.put("key.max", 4L);
+ expected.put("key.sum", 12L);
+
+ // Log2 Histogram for Key
+ expected.put("key.logHist.2", 3L);
+
+ expected.put("row.min", 4L);
+ expected.put("row.max", 4L);
+ expected.put("row.sum", 12L);
+
+ // Log2 Histogram for Row
+ expected.put("row.logHist.2", 3L);
+
+ expected.put("family.min", 0L);
+ expected.put("family.max", 0L);
+ expected.put("family.sum", 0L);
+
+ // Log2 Histogram for Family
+ expected.put("family.logHist.0", 3L);
+
+ expected.put("qualifier.min", 0L);
+ expected.put("qualifier.max", 0L);
+ expected.put("qualifier.sum", 0L);
+
+ // Log2 Histogram for Qualifier
+ expected.put("qualifier.logHist.0", 3L);
+
+ expected.put("visibility.min", 0L);
+ expected.put("visibility.max", 0L);
+ expected.put("visibility.sum", 0L);
+
+ // Log2 Histogram for Visibility
+ expected.put("visibility.logHist.0", 3L);
+
+ expected.put("value.min", 2L);
+ expected.put("value.max", 9L);
+ expected.put("value.sum", 20L);
+
+ // Log2 Histogram for Value
+ expected.put("value.logHist.1", 1L);
+ expected.put("value.logHist.3", 2L);
+
+ expected.put("total", 3L);
+
+ Assert.assertEquals(expected, stats);
+ }
+
+ /* COMBINER TEST */
+
+ @Test
+ public void testCombine() {
+ SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build();
+ EntryLengthSummarizer entrySum = new EntryLengthSummarizer();
+
+ Collector collector1 = entrySum.collector(sc);
+ collector1.accept(new Key("1","f1","q1"), new Value("v1"));
+ collector1.accept(new Key("1234","f1","q1"), new Value("v111"));
+ collector1.accept(new Key("12345678","f1","q1"), new Value("v111111"));
+
+ HashMap<String, Long> stats1 = new HashMap<>();
+ collector1.summarize(stats1::put);
+
+ Collector collector2 = entrySum.collector(sc);
+ collector2.accept(new Key("5432","f11","q12"), new Value("2"));
+ collector2.accept(new Key("12","f11","q1234"), new Value("12"));
+ collector2.accept(new Key("12","f11","q11234567"), new Value("4444"));
+
+ HashMap<String, Long> stats2 = new HashMap<>();
+ collector2.summarize(stats2::put);
+
+ Combiner combiner = entrySum.combiner(sc);
+ combiner.merge(stats1, stats2);
+
+ HashMap<String,Long> expected = new HashMap<>();
+ expected.put("key.min", 5L);
+ expected.put("key.max", 14L);
+ expected.put("key.sum", 59L);
+
+ // Log2 Histogram for Key
+ expected.put("key.logHist.2", 1L);
+ expected.put("key.logHist.3", 3L);
+ expected.put("key.logHist.4", 2L);
+
+ expected.put("row.min", 1L);
+ expected.put("row.max", 8L);
+ expected.put("row.sum", 21L);
+
+ // Log2 Histogram for Row
+ expected.put("row.logHist.0", 1L);
+ expected.put("row.logHist.1", 2L);
+ expected.put("row.logHist.2", 2L);
+ expected.put("row.logHist.3", 1L);
+
+ expected.put("family.min", 2L);
+ expected.put("family.max", 3L);
+ expected.put("family.sum", 15L);
+
+ // Log2 Histogram for Family
+ expected.put("family.logHist.1", 3L);
+ expected.put("family.logHist.2", 3L);
+
+ expected.put("qualifier.min", 2L);
+ expected.put("qualifier.max", 9L);
+ expected.put("qualifier.sum", 23L);
+
+ // Log2 Histogram for Qualifier
+ expected.put("qualifier.logHist.1", 3L);
+ expected.put("qualifier.logHist.2", 2L);
+ expected.put("qualifier.logHist.3", 1L);
+
+ expected.put("visibility.min", 0L);
+ expected.put("visibility.max", 0L);
+ expected.put("visibility.sum", 0L);
+
+ // Log2 Histogram for Visibility
+ expected.put("visibility.logHist.0", 6L);
+
+ expected.put("value.min", 1L);
+ expected.put("value.max", 7L);
+ expected.put("value.sum", 20L);
+
+ // Log2 Histogram for Value
+ expected.put("value.logHist.0", 1L);
+ expected.put("value.logHist.1", 2L);
+ expected.put("value.logHist.2", 2L);
+ expected.put("value.logHist.3", 1L);
+
+ expected.put("total", 6L);
+
+ Assert.assertEquals(expected, stats1);
+ }
+
+ @Test
+ public void testCombine2() {
+ SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build();
+ EntryLengthSummarizer entrySum = new EntryLengthSummarizer();
+
+ Collector collector1 = entrySum.collector(sc);
+ collector1.accept(new Key("12345678901234567890","f12345","q123456"), new Value("value1234567890"));
+
+ HashMap<String, Long> stats1 = new HashMap<>();
+ collector1.summarize(stats1::put);
+
+ Collector collector2 = entrySum.collector(sc);
+ collector2.accept(new Key("5432","f11","q12"), new Value("2"));
+ collector2.accept(new Key("12","f11","q1234"), new Value("12"));
+ collector2.accept(new Key("12","f11","q11234567"), new Value("4444"));
+
+ HashMap<String, Long> stats2 = new HashMap<>();
+ collector2.summarize(stats2::put);
+
+ Combiner combiner = entrySum.combiner(sc);
+ combiner.merge(stats1, stats2);
+
+ HashMap<String,Long> expected = new HashMap<>();
+ expected.put("key.min", 10L);
+ expected.put("key.max", 33L);
+ expected.put("key.sum", 67L);
+
+ // Log2 Histogram for Key
+ expected.put("key.logHist.3", 2L);
+ expected.put("key.logHist.4", 1L);
+ expected.put("key.logHist.5", 1L);
+
+ expected.put("row.min", 2L);
+ expected.put("row.max", 20L);
+ expected.put("row.sum", 28L);
+
+ // Log2 Histogram for Row
+ expected.put("row.logHist.1", 2L);
+ expected.put("row.logHist.2", 1L);
+ expected.put("row.logHist.4", 1L);
+
+ expected.put("family.min", 3L);
+ expected.put("family.max", 6L);
+ expected.put("family.sum", 15L);
+
+ // Log2 Histogram for Family
+ expected.put("family.logHist.2", 3L);
+ expected.put("family.logHist.3", 1L);
+
+ expected.put("qualifier.min", 3L);
+ expected.put("qualifier.max", 9L);
+ expected.put("qualifier.sum", 24L);
+
+ // Log2 Histogram for Qualifier
+ expected.put("qualifier.logHist.2", 2L);
+ expected.put("qualifier.logHist.3", 2L);
+
+ expected.put("visibility.min", 0L);
+ expected.put("visibility.max", 0L);
+ expected.put("visibility.sum", 0L);
+
+ // Log2 Histogram for Visibility
+ expected.put("visibility.logHist.0", 4L);
+
+ expected.put("value.min", 1L);
+ expected.put("value.max", 15L);
+ expected.put("value.sum", 22L);
+
+ // Log2 Histogram for Value
+ expected.put("value.logHist.0", 1L);
+ expected.put("value.logHist.1", 1L);
+ expected.put("value.logHist.2", 1L);
+ expected.put("value.logHist.4", 1L);
+
+ expected.put("total", 4L);
+
+ Assert.assertEquals(expected, stats1);
+ }
+
+ @Test
+ public void testCombine3() {
+ SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build();
+ EntryLengthSummarizer entrySum = new EntryLengthSummarizer();
+
+ Collector collector1 = entrySum.collector(sc);
+ collector1.accept(new Key("r1","f1"), new Value("v1"));
+
+ HashMap<String, Long> stats1 = new HashMap<>();
+ collector1.summarize(stats1::put);
+
+ Collector collector2 = entrySum.collector(sc);
+ collector2.accept(new Key("row1","family1","q1"), new Value(""));
+
+ HashMap<String, Long> stats2 = new HashMap<>();
+ collector2.summarize(stats2::put);
+
+ Combiner combiner = entrySum.combiner(sc);
+ combiner.merge(stats1, stats2);
+
+ HashMap<String,Long> expected = new HashMap<>();
+ expected.put("key.min", 4L);
+ expected.put("key.max", 13L);
+ expected.put("key.sum", 17L);
+
+ // Log2 Histogram for Key
+ expected.put("key.logHist.2", 1L);
+ expected.put("key.logHist.4", 1L);
+
+ expected.put("row.min", 2L);
+ expected.put("row.max", 4L);
+ expected.put("row.sum", 6L);
+
+ // Log2 Histogram for Row
+ expected.put("row.logHist.1", 1L);
+ expected.put("row.logHist.2", 1L);
+
+ expected.put("family.min", 2L);
+ expected.put("family.max", 7L);
+ expected.put("family.sum", 9L);
+
+ // Log2 Histogram for Family
+ expected.put("family.logHist.1", 1L);
+ expected.put("family.logHist.3", 1L);
+
+ expected.put("qualifier.min", 0L);
+ expected.put("qualifier.max", 2L);
+ expected.put("qualifier.sum", 2L);
+
+ // Log2 Histogram for Qualifier
+ expected.put("qualifier.logHist.0", 1L);
+ expected.put("qualifier.logHist.1", 1L);
+
+ expected.put("visibility.min", 0L);
+ expected.put("visibility.max", 0L);
+ expected.put("visibility.sum", 0L);
+
+ // Log2 Histogram for Visibility
+ expected.put("visibility.logHist.0", 2L);
+
+ expected.put("value.min", 0L);
+ expected.put("value.max", 2L);
+ expected.put("value.sum", 2L);
+
+ // Log2 Histogram for Value
+ expected.put("value.logHist.0", 1L);
+ expected.put("value.logHist.1", 1L);
+
+ expected.put("total", 2L);
+
+ Assert.assertEquals(expected, stats1);
+ }
+}
--
To stop receiving notification emails like this one, please contact
['"commits@accumulo.apache.org" <co...@accumulo.apache.org>'].