You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by kt...@apache.org on 2017/12/01 02:12:52 UTC
[accumulo] 01/01: Merge branch '1.8'
This is an automated email from the ASF dual-hosted git repository.
kturner pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/accumulo.git
commit 26e83f05d1448631f0b8a0da1b8671abe9beb922
Merge: 345070d 06cb5ed
Author: Keith Turner <kt...@apache.org>
AuthorDate: Thu Nov 30 21:08:53 2017 -0500
Merge branch '1.8'
.../org/apache/accumulo/core/file/rfile/RFile.java | 9 +-
.../accumulo/core/file/rfile/RollingStats.java | 114 ++++++++++++
.../core/file/rfile/RolllingStatsTest.java | 204 +++++++++++++++++++++
3 files changed, 323 insertions(+), 4 deletions(-)
diff --cc core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
index c399a22,cda246a..f2e2463
--- a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
+++ b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java
@@@ -403,8 -402,9 +402,9 @@@ public class RFile
private SampleLocalityGroupWriter sample;
- private SummaryStatistics keyLenStats = new SummaryStatistics();
+ // Use windowed stats to fix ACCUMULO-4669
+ private RollingStats keyLenStats = new RollingStats(2017);
- private double avergageKeySize = 0;
+ private double averageKeySize = 0;
LocalityGroupWriter(BlockFileWriter fileWriter, long blockSize, long maxBlockSize, LocalityGroupMetadata currentLocalityGroup,
SampleLocalityGroupWriter sample) {
diff --cc core/src/test/java/org/apache/accumulo/core/file/rfile/RolllingStatsTest.java
index 0000000,4f8fcd1..19f9c5c
mode 000000,100644..100644
--- a/core/src/test/java/org/apache/accumulo/core/file/rfile/RolllingStatsTest.java
+++ b/core/src/test/java/org/apache/accumulo/core/file/rfile/RolllingStatsTest.java
@@@ -1,0 -1,205 +1,204 @@@
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ package org.apache.accumulo.core.file.rfile;
+
+ import java.util.Random;
++import java.util.function.IntSupplier;
+
+ import org.apache.commons.math3.distribution.NormalDistribution;
+ import org.apache.commons.math3.distribution.ZipfDistribution;
+ import org.apache.commons.math3.random.Well19937c;
+ import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
+ import org.junit.Assert;
+ import org.junit.Test;
+
+ import com.google.common.math.DoubleMath;
+
+ public class RolllingStatsTest {
+
+ private static final double TOLERANCE = 1.0 / 1000;
+
+ private static void assertFuzzyEquals(double expected, double actual) {
+ Assert.assertTrue(String.format("expected: %f, actual: %f diff: %f", expected, actual, Math.abs(expected - actual)),
+ DoubleMath.fuzzyEquals(expected, actual, TOLERANCE));
+ }
+
+ private static void checkAgreement(DescriptiveStatistics ds, RollingStats rs) {
+ // getting stats from ds is expensive, so do it once... otherwise unit test takes 11 sec
+ // instead of 5 secs
+ double expMean = ds.getMean();
+ double expVar = ds.getVariance();
+ double expStdDev = Math.sqrt(expVar);
+
+ assertFuzzyEquals(expMean, rs.getMean());
+ assertFuzzyEquals(expVar, rs.getVariance());
+ assertFuzzyEquals(expStdDev, rs.getStandardDeviation());
+
+ Assert.assertTrue(expMean >= 0);
+ Assert.assertTrue(rs.getMean() >= 0);
+ Assert.assertTrue(expVar >= 0);
+ Assert.assertTrue(rs.getVariance() >= 0);
+ Assert.assertTrue(expStdDev >= 0);
+ Assert.assertTrue(rs.getStandardDeviation() >= 0);
+ }
+
+ private static class StatTester {
+
+ Random rand = new Random(42);
+ private DescriptiveStatistics ds;
+ private RollingStats rs;
+ private RollingStats rsp;
+
+ StatTester(int windowSize) {
+ ds = new DescriptiveStatistics();
+ ds.setWindowSize(windowSize);
+
+ rs = new RollingStats(windowSize);
+ rsp = new RollingStats(windowSize);
+ }
+
+ void addValue(long v) {
+ ds.addValue(v);
+ rs.addValue(v);
+ rsp.addValue(v);
+ checkAgreement(ds, rs);
+
+ if (rand.nextDouble() < 0.001) {
+ checkAgreement(ds, rsp);
+ }
+ }
+
+ void check() {
+ checkAgreement(ds, rsp);
+ }
+ }
+
+ @Test
+ public void testFewSizes() {
+ StatTester st = new StatTester(1019);
+ int[] keySizes = new int[] {103, 113, 123, 2345};
+ Random rand = new Random(42);
+ for (int i = 0; i < 10000; i++) {
+ st.addValue(keySizes[rand.nextInt(keySizes.length)]);
+ }
+ st.check();
+ }
+
+ @Test
+ public void testConstant() {
+
+ StatTester st = new StatTester(1019);
+
+ for (int i = 0; i < 10000; i++) {
+ st.addValue(111);
+ }
+
+ st.check();
+ }
+
+ @Test
+ public void testUniformIncreasing() {
+
+ for (int windowSize : new int[] {10, 13, 20, 100, 500}) {
+
+ StatTester st = new StatTester(windowSize);
+
+ Random rand = new Random();
+
+ for (int i = 0; i < 1000; i++) {
+ int v = 200 + rand.nextInt(50);
+
+ st.addValue(v);
+ }
+
+ st.check();
+ }
+ }
+
+ @Test
+ public void testSlowIncreases() {
+ // number of keys with the same len
+ int len = 100;
+
+ StatTester st = new StatTester(1019);
+
+ for (int i = 0; i < 50; i++) {
+ for (int j = 0; j < 3000; j++) {
+ st.addValue(len);
+ }
+
+ len = (int) (len * 1.1);
+ }
+
+ st.check();
+ }
+
- @Test
- public void testZipf() {
- ZipfDistribution zd = new ZipfDistribution(new Well19937c(42), 1000, 2);
++ private void testDistribrution(IntSupplier d) {
+ StatTester st = new StatTester(2017);
+
+ for (int i = 0; i < 7000; i++) {
- st.addValue(zd.sample() * 100);
++ st.addValue(d.getAsInt());
+ }
+
+ st.check();
+ }
+
+ @Test
++ public void testZipf() {
++ ZipfDistribution zd = new ZipfDistribution(new Well19937c(42), 1000, 2);
++ testDistribrution(() -> zd.sample() * 100);
++ }
++
++ @Test
+ public void testNormal() {
+ NormalDistribution nd = new NormalDistribution(new Well19937c(42), 200, 20);
- StatTester st = new StatTester(2017);
-
- for (int i = 0; i < 7000; i++) {
- st.addValue((int) nd.sample());
- }
-
- st.check();
++ testDistribrution(() -> (int) nd.sample());
+ }
+
+ @Test
+ public void testSpikes() {
+
+ Random rand = new Random();
+
+ StatTester st = new StatTester(3017);
+
+ for (int i = 0; i < 13; i++) {
+
+ // write small keys
+ int numSmall = 1000 + rand.nextInt(1000);
+ for (int s = 0; s < numSmall; s++) {
+ int sks = 50 + rand.nextInt(100);
+ // simulate row with multiple cols
+ for (int c = 0; c < 3; c++) {
+ st.addValue(sks);
+ }
+ }
+
+ // write a few large keys
+ int numLarge = 1 + rand.nextInt(1);
+ for (int l = 0; l < numLarge; l++) {
+ int lks = 500000 + rand.nextInt(1000000);
+ for (int c = 0; c < 3; c++) {
+ st.addValue(lks);
+ }
+ }
+ }
+
+ st.check();
+ }
+ }
--
To stop receiving notification emails like this one, please contact
"commits@accumulo.apache.org" <co...@accumulo.apache.org>.