You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by da...@apache.org on 2018/11/13 21:14:21 UTC
[incubator-druid] branch 0.13.0-incubating updated: use a sha512
hash of bloom filter for cache key instead of filter bytes (#6488) (#6618)
This is an automated email from the ASF dual-hosted git repository.
davidlim pushed a commit to branch 0.13.0-incubating
in repository https://gitbox.apache.org/repos/asf/incubator-druid.git
The following commit(s) were added to refs/heads/0.13.0-incubating by this push:
new f288d4a use a sha512 hash of bloom filter for cache key instead of filter bytes (#6488) (#6618)
f288d4a is described below
commit f288d4a04070d35cd9af0028f101f0c0e2f2246b
Author: Clint Wylie <cj...@gmail.com>
AuthorDate: Tue Nov 13 13:14:14 2018 -0800
use a sha512 hash of bloom filter for cache key instead of filter bytes (#6488) (#6618)
* use a sha512 hash of bloom filter for cache key instead of filter bytes
* make serde private, BloomDimFilter.toString and BloomDimFilter.equals use hash instead of bloomKFilter which has no tostring or equals of its own
* keep and use HashCode object instead of converting to bytes up front
* uneeded imports oops
* tweaks from review
* refactor dupe code
* refactor
---
.../druid/guice/BloomFilterSerializersModule.java | 48 +++++++++---
.../apache/druid/query/filter/BloomDimFilter.java | 30 +++-----
.../druid/query/filter/BloomKFilterHolder.java | 85 ++++++++++++++++++++++
.../druid/query/filter/BloomDimFilterTest.java | 65 ++++++++++++-----
4 files changed, 178 insertions(+), 50 deletions(-)
diff --git a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/guice/BloomFilterSerializersModule.java b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/guice/BloomFilterSerializersModule.java
index 21af16d..011e091 100644
--- a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/guice/BloomFilterSerializersModule.java
+++ b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/guice/BloomFilterSerializersModule.java
@@ -20,7 +20,6 @@ package org.apache.druid.guice;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.JsonParser;
-import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.SerializerProvider;
import com.fasterxml.jackson.databind.deser.std.StdDeserializer;
@@ -28,6 +27,7 @@ import com.fasterxml.jackson.databind.jsontype.NamedType;
import com.fasterxml.jackson.databind.module.SimpleModule;
import com.fasterxml.jackson.databind.ser.std.StdSerializer;
import org.apache.druid.query.filter.BloomDimFilter;
+import org.apache.druid.query.filter.BloomKFilterHolder;
import org.apache.hive.common.util.BloomKFilter;
import java.io.ByteArrayInputStream;
@@ -45,12 +45,13 @@ public class BloomFilterSerializersModule extends SimpleModule
);
addSerializer(BloomKFilter.class, new BloomKFilterSerializer());
addDeserializer(BloomKFilter.class, new BloomKFilterDeserializer());
+ addDeserializer(BloomKFilterHolder.class, new BloomKFilterHolderDeserializer());
}
- public static class BloomKFilterSerializer extends StdSerializer<BloomKFilter>
+ private static class BloomKFilterSerializer extends StdSerializer<BloomKFilter>
{
- public BloomKFilterSerializer()
+ BloomKFilterSerializer()
{
super(BloomKFilter.class);
}
@@ -60,17 +61,14 @@ public class BloomFilterSerializersModule extends SimpleModule
BloomKFilter bloomKFilter, JsonGenerator jsonGenerator, SerializerProvider serializerProvider
) throws IOException
{
- ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
- BloomKFilter.serialize(byteArrayOutputStream, bloomKFilter);
- byte[] bytes = byteArrayOutputStream.toByteArray();
- jsonGenerator.writeBinary(bytes);
+ jsonGenerator.writeBinary(bloomKFilterToBytes(bloomKFilter));
}
}
- public static class BloomKFilterDeserializer extends StdDeserializer<BloomKFilter>
+ private static class BloomKFilterDeserializer extends StdDeserializer<BloomKFilter>
{
- protected BloomKFilterDeserializer()
+ BloomKFilterDeserializer()
{
super(BloomKFilter.class);
}
@@ -78,11 +76,37 @@ public class BloomFilterSerializersModule extends SimpleModule
@Override
public BloomKFilter deserialize(
JsonParser jsonParser, DeserializationContext deserializationContext
- ) throws IOException, JsonProcessingException
+ ) throws IOException
{
- byte[] bytes = jsonParser.getBinaryValue();
- return BloomKFilter.deserialize(new ByteArrayInputStream(bytes));
+ return bloomKFilterFromBytes(jsonParser.getBinaryValue());
+ }
+ }
+ private static class BloomKFilterHolderDeserializer extends StdDeserializer<BloomKFilterHolder>
+ {
+ BloomKFilterHolderDeserializer()
+ {
+ super(BloomKFilterHolder.class);
}
+
+ @Override
+ public BloomKFilterHolder deserialize(
+ JsonParser jsonParser, DeserializationContext deserializationContext
+ ) throws IOException
+ {
+ return BloomKFilterHolder.fromBytes(jsonParser.getBinaryValue());
+ }
+ }
+
+ public static byte[] bloomKFilterToBytes(BloomKFilter bloomKFilter) throws IOException
+ {
+ ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
+ BloomKFilter.serialize(byteArrayOutputStream, bloomKFilter);
+ return byteArrayOutputStream.toByteArray();
+ }
+
+ public static BloomKFilter bloomKFilterFromBytes(byte[] bytes) throws IOException
+ {
+ return BloomKFilter.deserialize(new ByteArrayInputStream(bytes));
}
}
diff --git a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java
index bfaa37c..383488c 100644
--- a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java
+++ b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomDimFilter.java
@@ -25,15 +25,13 @@ import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.collect.RangeSet;
import com.google.common.collect.Sets;
-import org.apache.druid.java.util.common.ISE;
+import com.google.common.hash.HashCode;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.cache.CacheKeyBuilder;
import org.apache.druid.query.extraction.ExtractionFn;
import org.apache.druid.segment.filter.DimensionPredicateFilter;
import org.apache.hive.common.util.BloomKFilter;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
import java.util.HashSet;
/**
@@ -43,39 +41,33 @@ public class BloomDimFilter implements DimFilter
private final String dimension;
private final BloomKFilter bloomKFilter;
+ private final HashCode hash;
private final ExtractionFn extractionFn;
@JsonCreator
public BloomDimFilter(
@JsonProperty("dimension") String dimension,
- @JsonProperty("bloomKFilter") BloomKFilter bloomKFilter,
+ @JsonProperty("bloomKFilter") BloomKFilterHolder bloomKFilterHolder,
@JsonProperty("extractionFn") ExtractionFn extractionFn
)
{
Preconditions.checkArgument(dimension != null, "dimension must not be null");
- Preconditions.checkNotNull(bloomKFilter);
+ Preconditions.checkNotNull(bloomKFilterHolder);
this.dimension = dimension;
- this.bloomKFilter = bloomKFilter;
+ this.bloomKFilter = bloomKFilterHolder.getFilter();
+ this.hash = bloomKFilterHolder.getFilterHash();
this.extractionFn = extractionFn;
}
@Override
public byte[] getCacheKey()
{
- ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
- try {
- BloomKFilter.serialize(byteArrayOutputStream, bloomKFilter);
- }
- catch (IOException e) {
- throw new ISE(e, "Exception when generating cache key for [%s]", this);
- }
- byte[] bloomFilterBytes = byteArrayOutputStream.toByteArray();
return new CacheKeyBuilder(DimFilterUtils.BLOOM_DIM_FILTER_CACHE_ID)
.appendString(dimension)
.appendByte(DimFilterUtils.STRING_SEPARATOR)
.appendByteArray(extractionFn == null ? new byte[0] : extractionFn.getCacheKey())
.appendByte(DimFilterUtils.STRING_SEPARATOR)
- .appendByteArray(bloomFilterBytes)
+ .appendByteArray(hash.asBytes())
.build();
}
@@ -187,9 +179,9 @@ public class BloomDimFilter implements DimFilter
public String toString()
{
if (extractionFn != null) {
- return StringUtils.format("%s(%s) = %s", extractionFn, dimension, bloomKFilter);
+ return StringUtils.format("%s(%s) = %s", extractionFn, dimension, hash.toString());
} else {
- return StringUtils.format("%s = %s", dimension, bloomKFilter);
+ return StringUtils.format("%s = %s", dimension, hash.toString());
}
}
@@ -208,7 +200,7 @@ public class BloomDimFilter implements DimFilter
if (!dimension.equals(that.dimension)) {
return false;
}
- if (bloomKFilter != null ? !bloomKFilter.equals(that.bloomKFilter) : that.bloomKFilter != null) {
+ if (hash != null ? !hash.equals(that.hash) : that.hash != null) {
return false;
}
return extractionFn != null ? extractionFn.equals(that.extractionFn) : that.extractionFn == null;
@@ -230,7 +222,7 @@ public class BloomDimFilter implements DimFilter
public int hashCode()
{
int result = dimension.hashCode();
- result = 31 * result + (bloomKFilter != null ? bloomKFilter.hashCode() : 0);
+ result = 31 * result + (hash != null ? hash.hashCode() : 0);
result = 31 * result + (extractionFn != null ? extractionFn.hashCode() : 0);
return result;
}
diff --git a/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomKFilterHolder.java b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomKFilterHolder.java
new file mode 100644
index 0000000..e06632f
--- /dev/null
+++ b/extensions-core/druid-bloom-filter/src/main/java/org/apache/druid/query/filter/BloomKFilterHolder.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.query.filter;
+
+import com.google.common.hash.HashCode;
+import com.google.common.hash.Hashing;
+import org.apache.druid.guice.BloomFilterSerializersModule;
+import org.apache.hive.common.util.BloomKFilter;
+
+import java.io.IOException;
+import java.util.Objects;
+
+public class BloomKFilterHolder
+{
+ private final BloomKFilter filter;
+ private final HashCode hash;
+
+ public BloomKFilterHolder(BloomKFilter filter, HashCode hash)
+ {
+ this.filter = filter;
+ this.hash = hash;
+ }
+
+ BloomKFilter getFilter()
+ {
+ return filter;
+ }
+
+ HashCode getFilterHash()
+ {
+ return hash;
+ }
+
+ public static BloomKFilterHolder fromBloomKFilter(BloomKFilter filter) throws IOException
+ {
+ byte[] bytes = BloomFilterSerializersModule.bloomKFilterToBytes(filter);
+
+ return new BloomKFilterHolder(filter, Hashing.sha512().hashBytes(bytes));
+ }
+
+ public static BloomKFilterHolder fromBytes(byte[] bytes) throws IOException
+ {
+ return new BloomKFilterHolder(
+ BloomFilterSerializersModule.bloomKFilterFromBytes(bytes),
+ Hashing.sha512().hashBytes(bytes)
+ );
+ }
+
+ @Override
+ public boolean equals(Object o)
+ {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+
+ BloomKFilterHolder that = (BloomKFilterHolder) o;
+ return Objects.equals(this.hash, that.hash);
+ }
+
+ @Override
+ public int hashCode()
+ {
+ return Objects.hash(filter, hash);
+ }
+}
diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java
index 181235a..81a272f 100644
--- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java
+++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/BloomDimFilterTest.java
@@ -90,6 +90,8 @@ public class BloomDimFilterTest extends BaseFilterTest
PARSER.parseBatch(ImmutableMap.of("dim0", "5", "dim1", "abc")).get(0)
);
+ private static DefaultObjectMapper mapper = new DefaultObjectMapper();
+
public BloomDimFilterTest(
String testName,
IndexBuilder indexBuilder,
@@ -111,8 +113,6 @@ public class BloomDimFilterTest extends BaseFilterTest
);
}
- private static DefaultObjectMapper mapper = new DefaultObjectMapper();
-
@BeforeClass
public static void beforeClass()
{
@@ -130,9 +130,10 @@ public class BloomDimFilterTest extends BaseFilterTest
{
BloomKFilter bloomFilter = new BloomKFilter(1500);
bloomFilter.addString("myTestString");
+ BloomKFilterHolder holder = new BloomKFilterHolder(bloomFilter, null);
BloomDimFilter bloomDimFilter = new BloomDimFilter(
"abc",
- bloomFilter,
+ holder,
new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)
);
DimFilter filter = mapper.readValue(mapper.writeValueAsBytes(bloomDimFilter), DimFilter.class);
@@ -145,7 +146,7 @@ public class BloomDimFilterTest extends BaseFilterTest
}
@Test
- public void testWithTimeExtractionFnNull()
+ public void testWithTimeExtractionFnNull() throws IOException
{
assertFilterMatches(new BloomDimFilter(
"dim0",
@@ -170,7 +171,7 @@ public class BloomDimFilterTest extends BaseFilterTest
}
@Test
- public void testSingleValueStringColumnWithoutNulls()
+ public void testSingleValueStringColumnWithoutNulls() throws IOException
{
assertFilterMatches(new BloomDimFilter("dim0", bloomKFilter(1000, (String) null), null), ImmutableList.of());
assertFilterMatches(new BloomDimFilter("dim0", bloomKFilter(1000, ""), null), ImmutableList.of());
@@ -179,7 +180,7 @@ public class BloomDimFilterTest extends BaseFilterTest
}
@Test
- public void testSingleValueStringColumnWithNulls()
+ public void testSingleValueStringColumnWithNulls() throws IOException
{
if (NullHandling.replaceWithDefault()) {
assertFilterMatches(new BloomDimFilter("dim1", bloomKFilter(1000, (String) null), null), ImmutableList.of("0"));
@@ -196,7 +197,7 @@ public class BloomDimFilterTest extends BaseFilterTest
}
@Test
- public void testMultiValueStringColumn()
+ public void testMultiValueStringColumn() throws IOException
{
if (NullHandling.replaceWithDefault()) {
assertFilterMatches(
@@ -217,7 +218,7 @@ public class BloomDimFilterTest extends BaseFilterTest
}
@Test
- public void testMissingColumnSpecifiedInDimensionList()
+ public void testMissingColumnSpecifiedInDimensionList() throws IOException
{
assertFilterMatches(
new BloomDimFilter("dim3", bloomKFilter(1000, (String) null), null),
@@ -230,7 +231,7 @@ public class BloomDimFilterTest extends BaseFilterTest
}
@Test
- public void testMissingColumnNotSpecifiedInDimensionList()
+ public void testMissingColumnNotSpecifiedInDimensionList() throws IOException
{
assertFilterMatches(
new BloomDimFilter("dim4", bloomKFilter(1000, (String) null), null),
@@ -243,7 +244,7 @@ public class BloomDimFilterTest extends BaseFilterTest
}
@Test
- public void testExpressionVirtualColumn()
+ public void testExpressionVirtualColumn() throws IOException
{
assertFilterMatches(
new BloomDimFilter("expr", bloomKFilter(1000, 1.1F), null),
@@ -263,7 +264,7 @@ public class BloomDimFilterTest extends BaseFilterTest
}
@Test
- public void testSelectorWithLookupExtractionFn()
+ public void testSelectorWithLookupExtractionFn() throws IOException
{
final Map<String, String> stringMap = ImmutableMap.of(
"1", "HELLO",
@@ -334,7 +335,32 @@ public class BloomDimFilterTest extends BaseFilterTest
}
}
- private static BloomKFilter bloomKFilter(int expectedEntries, String... values)
+ @Test
+ public void testCacheKeyIsNotGiantIfFilterIsGiant() throws IOException
+ {
+ BloomKFilter bloomFilter = new BloomKFilter(10_000_000);
+ // FILL IT UP!
+ bloomFilter.addString("myTestString");
+
+ BloomKFilterHolder holder = BloomKFilterHolder.fromBloomKFilter(bloomFilter);
+
+ BloomDimFilter bloomDimFilter = new BloomDimFilter(
+ "abc",
+ holder,
+ new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)
+ );
+
+ byte[] bloomFilterBytes = BloomFilterSerializersModule.bloomKFilterToBytes(bloomFilter);
+
+ // serialized filter can be quite large for high capacity bloom filters...
+ Assert.assertTrue(bloomFilterBytes.length > 7794000);
+
+ // actual size is 86 bytes instead of 7794075 bytes of old key format
+ final int actualSize = bloomDimFilter.getCacheKey().length;
+ Assert.assertTrue(actualSize < 100);
+ }
+
+ private static BloomKFilterHolder bloomKFilter(int expectedEntries, String... values) throws IOException
{
BloomKFilter filter = new BloomKFilter(expectedEntries);
for (String value : values) {
@@ -344,10 +370,11 @@ public class BloomDimFilterTest extends BaseFilterTest
filter.addString(value);
}
}
- return filter;
+
+ return BloomKFilterHolder.fromBloomKFilter(filter);
}
- private static BloomKFilter bloomKFilter(int expectedEntries, Float... values)
+ private static BloomKFilterHolder bloomKFilter(int expectedEntries, Float... values) throws IOException
{
BloomKFilter filter = new BloomKFilter(expectedEntries);
for (Float value : values) {
@@ -357,10 +384,10 @@ public class BloomDimFilterTest extends BaseFilterTest
filter.addFloat(value);
}
}
- return filter;
+ return BloomKFilterHolder.fromBloomKFilter(filter);
}
- private static BloomKFilter bloomKFilter(int expectedEntries, Double... values)
+ private static BloomKFilterHolder bloomKFilter(int expectedEntries, Double... values) throws IOException
{
BloomKFilter filter = new BloomKFilter(expectedEntries);
for (Double value : values) {
@@ -370,10 +397,10 @@ public class BloomDimFilterTest extends BaseFilterTest
filter.addDouble(value);
}
}
- return filter;
+ return BloomKFilterHolder.fromBloomKFilter(filter);
}
- private static BloomKFilter bloomKFilter(int expectedEntries, Long... values)
+ private static BloomKFilterHolder bloomKFilter(int expectedEntries, Long... values) throws IOException
{
BloomKFilter filter = new BloomKFilter(expectedEntries);
for (Long value : values) {
@@ -383,6 +410,6 @@ public class BloomDimFilterTest extends BaseFilterTest
filter.addLong(value);
}
}
- return filter;
+ return BloomKFilterHolder.fromBloomKFilter(filter);
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org