You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by zh...@apache.org on 2018/12/31 12:45:48 UTC
[12/47] hbase git commit: HBASE-21520 TestMultiColumnScanner cost
long time when using ROWCOL bloom type
HBASE-21520 TestMultiColumnScanner cost long time when using ROWCOL bloom type
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/ac0b3bb5
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/ac0b3bb5
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/ac0b3bb5
Branch: refs/heads/HBASE-21512
Commit: ac0b3bb5477612cb8844c4ef10fa2be0f1d1a025
Parents: 4911534
Author: huzheng <op...@gmail.com>
Authored: Thu Dec 13 15:04:12 2018 +0800
Committer: huzheng <op...@gmail.com>
Committed: Sat Dec 15 21:08:52 2018 +0800
----------------------------------------------------------------------
.../regionserver/TestMultiColumnScanner.java | 94 ++++++--------------
...olumnScannerWithAlgoGZAndNoDataEncoding.java | 48 ++++++++++
...lumnScannerWithAlgoGZAndUseDataEncoding.java | 48 ++++++++++
...iColumnScannerWithNoneAndNoDataEncoding.java | 48 ++++++++++
...ColumnScannerWithNoneAndUseDataEncoding.java | 48 ++++++++++
5 files changed, 219 insertions(+), 67 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/ac0b3bb5/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java
index 2ff0d8c..bb97c9c 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java
@@ -32,11 +32,9 @@ import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
-import org.apache.commons.lang3.ArrayUtils;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparatorImpl;
import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.KeyValue;
@@ -47,29 +45,27 @@ import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
-import org.apache.hadoop.hbase.testclassification.MediumTests;
-import org.apache.hadoop.hbase.testclassification.RegionServerTests;
import org.apache.hadoop.hbase.util.BloomFilterUtil;
import org.apache.hadoop.hbase.util.Bytes;
-import org.junit.ClassRule;
import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
+import org.junit.runners.Parameterized.Parameter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
- * Tests optimized scanning of multiple columns.
+ * Tests optimized scanning of multiple columns. <br>
+ * We separated the big test into several sub-class UT, because When in ROWCOL bloom type, we will
+ * test the row-col bloom filter frequently for saving HDFS seek once we switch from one column to
+ * another in our UT. It's cpu time consuming (~45s for each case), so moved the ROWCOL case into a
+ * separated LargeTests to avoid timeout failure. <br>
+ * <br>
+ * To be clear: In TestMultiColumnScanner, we will flush 10 (NUM_FLUSHES=10) HFiles here, and the
+ * table will put ~1000 cells (rows=20, ts=6, qualifiers=8, total=20*6*8 ~ 1000) . Each full table
+ * scan will check the ROWCOL bloom filter 20 (rows)* 8 (column) * 10 (hfiles)= 1600 times, beside
+ * it will scan the full table 6*2^8=1536 times, so finally will have 1600*1536=2457600 bloom filter
+ * testing. (See HBASE-21520)
*/
-@RunWith(Parameterized.class)
-@Category({RegionServerTests.class, MediumTests.class})
-public class TestMultiColumnScanner {
-
- @ClassRule
- public static final HBaseClassTestRule CLASS_RULE =
- HBaseClassTestRule.forClass(TestMultiColumnScanner.class);
+public abstract class TestMultiColumnScanner {
private static final Logger LOG = LoggerFactory.getLogger(TestMultiColumnScanner.class);
@@ -104,20 +100,19 @@ public class TestMultiColumnScanner {
/** The probability that a column is skipped in a store file. */
private static final double COLUMN_SKIP_IN_STORE_FILE_PROB = 0.7;
- /** The probability of skipping a column in a single row */
- private static final double COLUMN_SKIP_IN_ROW_PROB = 0.1;
-
- /** The probability of skipping a column everywhere */
- private static final double COLUMN_SKIP_EVERYWHERE_PROB = 0.1;
-
/** The probability to delete a row/column pair */
private static final double DELETE_PROBABILITY = 0.02;
private final static HBaseTestingUtility TEST_UTIL = HBaseTestingUtility.createLocalHTU();
- private final Compression.Algorithm comprAlgo;
- private final BloomType bloomType;
- private final DataBlockEncoding dataBlockEncoding;
+ @Parameter(0)
+ public Compression.Algorithm comprAlgo;
+
+ @Parameter(1)
+ public BloomType bloomType;
+
+ @Parameter(2)
+ public DataBlockEncoding dataBlockEncoding;
// Some static sanity-checking.
static {
@@ -128,27 +123,17 @@ public class TestMultiColumnScanner {
assertTrue(TIMESTAMPS[i] < TIMESTAMPS[i + 1]);
}
- @Parameters
- public static final Collection<Object[]> parameters() {
+ public static Collection<Object[]> generateParams(Compression.Algorithm algo,
+ boolean useDataBlockEncoding) {
List<Object[]> parameters = new ArrayList<>();
- for (Object[] bloomAndCompressionParams :
- HBaseTestingUtility.BLOOM_AND_COMPRESSION_COMBINATIONS) {
- for (boolean useDataBlockEncoding : new boolean[]{false, true}) {
- parameters.add(ArrayUtils.add(bloomAndCompressionParams,
- useDataBlockEncoding));
- }
+ for (BloomType bloomType : BloomType.values()) {
+ DataBlockEncoding dataBlockEncoding =
+ useDataBlockEncoding ? DataBlockEncoding.PREFIX : DataBlockEncoding.NONE;
+ parameters.add(new Object[] { algo, bloomType, dataBlockEncoding });
}
return parameters;
}
- public TestMultiColumnScanner(Compression.Algorithm comprAlgo,
- BloomType bloomType, boolean useDataBlockEncoding) {
- this.comprAlgo = comprAlgo;
- this.bloomType = bloomType;
- this.dataBlockEncoding = useDataBlockEncoding ? DataBlockEncoding.PREFIX :
- DataBlockEncoding.NONE;
- }
-
@Test
public void testMultiColumnScanner() throws IOException {
TEST_UTIL.getConfiguration().setInt(BloomFilterUtil.PREFIX_LENGTH_KEY, 10);
@@ -170,24 +155,6 @@ public class TestMultiColumnScanner {
Map<String, Long> lastDelTimeMap = new HashMap<>();
Random rand = new Random(29372937L);
- Set<String> rowQualSkip = new HashSet<>();
-
- // Skip some columns in some rows. We need to test scanning over a set
- // of columns when some of the columns are not there.
- for (String row : rows)
- for (String qual : qualifiers)
- if (rand.nextDouble() < COLUMN_SKIP_IN_ROW_PROB) {
- LOG.info("Skipping " + qual + " in row " + row);
- rowQualSkip.add(rowQualKey(row, qual));
- }
-
- // Also skip some columns in all rows.
- for (String qual : qualifiers)
- if (rand.nextDouble() < COLUMN_SKIP_EVERYWHERE_PROB) {
- LOG.info("Skipping " + qual + " in all rows");
- for (String row : rows)
- rowQualSkip.add(rowQualKey(row, qual));
- }
for (int iFlush = 0; iFlush < NUM_FLUSHES; ++iFlush) {
for (String qual : qualifiers) {
@@ -316,10 +283,6 @@ public class TestMultiColumnScanner {
kv.getQualifierLength());
}
- private static String rowQualKey(String row, String qual) {
- return row + "_" + qual;
- }
-
static String createValue(String row, String qual, long ts) {
return "value_for_" + row + "_" + qual + "_" + ts;
}
@@ -339,10 +302,7 @@ public class TestMultiColumnScanner {
lst.add(sb.toString());
}
-
return lst;
}
-
-
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/ac0b3bb5/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndNoDataEncoding.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndNoDataEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndNoDataEncoding.java
new file mode 100644
index 0000000..cc68c11
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndNoDataEncoding.java
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import java.util.Collection;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.RegionServerTests;
+import org.junit.ClassRule;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+/**
+ * Test case for Compression.Algorithm.GZ and no use data block encoding.
+ * @see org.apache.hadoop.hbase.regionserver.TestMultiColumnScanner
+ */
+@RunWith(Parameterized.class)
+@Category({ RegionServerTests.class, LargeTests.class })
+public class TestMultiColumnScannerWithAlgoGZAndNoDataEncoding extends TestMultiColumnScanner {
+
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestMultiColumnScannerWithAlgoGZAndNoDataEncoding.class);
+
+ @Parameters
+ public static Collection<Object[]> parameters() {
+ return TestMultiColumnScanner.generateParams(Algorithm.GZ, false);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/ac0b3bb5/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndUseDataEncoding.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndUseDataEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndUseDataEncoding.java
new file mode 100644
index 0000000..c817da2
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithAlgoGZAndUseDataEncoding.java
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import java.util.Collection;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.RegionServerTests;
+import org.junit.ClassRule;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+/**
+ * Test case for Compression.Algorithm.GZ and use data block encoding.
+ * @see org.apache.hadoop.hbase.regionserver.TestMultiColumnScanner
+ */
+@RunWith(Parameterized.class)
+@Category({ RegionServerTests.class, LargeTests.class })
+public class TestMultiColumnScannerWithAlgoGZAndUseDataEncoding extends TestMultiColumnScanner {
+
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestMultiColumnScannerWithAlgoGZAndUseDataEncoding.class);
+
+ @Parameters
+ public static Collection<Object[]> parameters() {
+ return TestMultiColumnScanner.generateParams(Algorithm.GZ, true);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/ac0b3bb5/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndNoDataEncoding.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndNoDataEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndNoDataEncoding.java
new file mode 100644
index 0000000..4f6aa90
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndNoDataEncoding.java
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import java.util.Collection;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.RegionServerTests;
+import org.junit.ClassRule;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+/**
+ * Test case for Compression.Algorithm.NONE and no use data block encoding.
+ * @see org.apache.hadoop.hbase.regionserver.TestMultiColumnScanner
+ */
+@RunWith(Parameterized.class)
+@Category({ RegionServerTests.class, LargeTests.class })
+public class TestMultiColumnScannerWithNoneAndNoDataEncoding extends TestMultiColumnScanner {
+
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestMultiColumnScannerWithNoneAndNoDataEncoding.class);
+
+ @Parameters
+ public static Collection<Object[]> parameters() {
+ return TestMultiColumnScanner.generateParams(Algorithm.NONE, false);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/ac0b3bb5/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndUseDataEncoding.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndUseDataEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndUseDataEncoding.java
new file mode 100644
index 0000000..f1fd30d
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScannerWithNoneAndUseDataEncoding.java
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import java.util.Collection;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.RegionServerTests;
+import org.junit.ClassRule;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+/**
+ * Test case for Compression.Algorithm.NONE and no use data block encoding.
+ * @see org.apache.hadoop.hbase.regionserver.TestMultiColumnScanner
+ */
+@RunWith(Parameterized.class)
+@Category({ RegionServerTests.class, LargeTests.class })
+public class TestMultiColumnScannerWithNoneAndUseDataEncoding extends TestMultiColumnScanner {
+
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestMultiColumnScannerWithNoneAndUseDataEncoding.class);
+
+ @Parameters
+ public static Collection<Object[]> parameters() {
+ return TestMultiColumnScanner.generateParams(Algorithm.NONE, true);
+ }
+}