You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2015/07/16 12:32:07 UTC
svn commit: r1691350 [3/3] - in /lucene/dev/branches/solr7787: ./ lucene/
solr/ solr/core/ solr/core/src/java/org/apache/solr/handler/component/
solr/core/src/java/org/apache/solr/search/facet/
solr/core/src/java/org/apache/solr/util/hll/ solr/core/src...
Added: lucene/dev/branches/solr7787/solr/core/src/java/org/apache/solr/util/hll/SchemaVersionOne.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr7787/solr/core/src/java/org/apache/solr/util/hll/SchemaVersionOne.java?rev=1691350&view=auto
==============================================================================
--- lucene/dev/branches/solr7787/solr/core/src/java/org/apache/solr/util/hll/SchemaVersionOne.java (added)
+++ lucene/dev/branches/solr7787/solr/core/src/java/org/apache/solr/util/hll/SchemaVersionOne.java Thu Jul 16 10:32:07 2015
@@ -0,0 +1,156 @@
+package org.apache.solr.util.hll;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A concrete {@link ISchemaVersion} representing schema version one.
+ *
+ * @author timon
+ */
+class SchemaVersionOne implements ISchemaVersion {
+ /**
+ * The schema version number for this instance.
+ */
+ public static final int SCHEMA_VERSION = 1;
+
+ // ------------------------------------------------------------------------
+ // Version-specific ordinals (array position) for each of the HLL types
+ private static final HLLType[] TYPE_ORDINALS = new HLLType[] {
+ HLLType.EMPTY,
+ HLLType.EXPLICIT,
+ HLLType.SPARSE,
+ HLLType.FULL
+ };
+
+ // ------------------------------------------------------------------------
+ // number of header bytes for all HLL types
+ private static final int HEADER_BYTE_COUNT = 3;
+
+ // sentinel values from the spec for explicit off and auto
+ private static final int EXPLICIT_OFF = 0;
+ private static final int EXPLICIT_AUTO = 63;
+
+ // ************************************************************************
+ /* (non-Javadoc)
+ * @see net.agkn.hll.serialization.ISchemaVersion#paddingBytes(HLLType)
+ */
+ @Override
+ public int paddingBytes(final HLLType type) {
+ return HEADER_BYTE_COUNT;
+ }
+
+ /* (non-Javadoc)
+ * @see net.agkn.hll.serialization.ISchemaVersion#writeMetadata(byte[], IHLLMetadata)
+ */
+ @Override
+ public void writeMetadata(final byte[] bytes, final IHLLMetadata metadata) {
+ final HLLType type = metadata.HLLType();
+ final int typeOrdinal = getOrdinal(type);
+
+ final int explicitCutoffValue;
+ if(metadata.explicitOff()) {
+ explicitCutoffValue = EXPLICIT_OFF;
+ } else if(metadata.explicitAuto()) {
+ explicitCutoffValue = EXPLICIT_AUTO;
+ } else {
+ explicitCutoffValue = metadata.log2ExplicitCutoff() + 1/*per spec*/;
+ }
+
+ bytes[0] = SerializationUtil.packVersionByte(SCHEMA_VERSION, typeOrdinal);
+ bytes[1] = SerializationUtil.packParametersByte(metadata.registerWidth(), metadata.registerCountLog2());
+ bytes[2] = SerializationUtil.packCutoffByte(explicitCutoffValue, metadata.sparseEnabled());
+ }
+
+ /* (non-Javadoc)
+ * @see net.agkn.hll.serialization.ISchemaVersion#readMetadata(byte[])
+ */
+ @Override
+ public IHLLMetadata readMetadata(final byte[] bytes) {
+ final byte versionByte = bytes[0];
+ final byte parametersByte = bytes[1];
+ final byte cutoffByte = bytes[2];
+
+ final int typeOrdinal = SerializationUtil.typeOrdinal(versionByte);
+ final int explicitCutoffValue = SerializationUtil.explicitCutoff(cutoffByte);
+ final boolean explicitOff = (explicitCutoffValue == EXPLICIT_OFF);
+ final boolean explicitAuto = (explicitCutoffValue == EXPLICIT_AUTO);
+ final int log2ExplicitCutoff = (explicitOff || explicitAuto) ? -1/*sentinel*/ : (explicitCutoffValue - 1/*per spec*/);
+
+ return new HLLMetadata(SCHEMA_VERSION,
+ getType(typeOrdinal),
+ SerializationUtil.registerCountLog2(parametersByte),
+ SerializationUtil.registerWidth(parametersByte),
+ log2ExplicitCutoff,
+ explicitOff,
+ explicitAuto,
+ SerializationUtil.sparseEnabled(cutoffByte));
+ }
+
+ /* (non-Javadoc)
+ * @see net.agkn.hll.serialization.ISchemaVersion#getSerializer(HLLType, int, int)
+ */
+ @Override
+ public IWordSerializer getSerializer(HLLType type, int wordLength, int wordCount) {
+ return new BigEndianAscendingWordSerializer(wordLength, wordCount, paddingBytes(type));
+ }
+
+ /* (non-Javadoc)
+ * @see net.agkn.hll.serialization.ISchemaVersion#getDeserializer(HLLType, int, byte[])
+ */
+ @Override
+ public IWordDeserializer getDeserializer(HLLType type, int wordLength, byte[] bytes) {
+ return new BigEndianAscendingWordDeserializer(wordLength, paddingBytes(type), bytes);
+ }
+
+ /* (non-Javadoc)
+ * @see net.agkn.hll.serialization.ISchemaVersion#schemaVersionNumber()
+ */
+ @Override
+ public int schemaVersionNumber() {
+ return SCHEMA_VERSION;
+ }
+
+ // ========================================================================
+ // Type/Ordinal lookups
+ /**
+ * Gets the ordinal for the specified {@link HLLType}.
+ *
+ * @param type the type whose ordinal is desired
+ * @return the ordinal for the specified type, to be used in the version byte.
+ * This will always be non-negative.
+ */
+ private static int getOrdinal(final HLLType type) {
+ for(int i=0; i<TYPE_ORDINALS.length; i++) {
+ if(TYPE_ORDINALS[i].equals(type)) return i;
+ }
+ throw new RuntimeException("Unknown HLL type " + type);
+ }
+
+ /**
+ * Gets the {@link HLLType} for the specified ordinal.
+ *
+ * @param ordinal the ordinal whose type is desired
+ * @return the type for the specified ordinal. This will never be <code>null</code>.
+ */
+ private static HLLType getType(final int ordinal) {
+ if((ordinal < 0) || (ordinal >= TYPE_ORDINALS.length)) {
+ throw new IllegalArgumentException("Invalid type ordinal '" + ordinal + "'. Only 0-" + (TYPE_ORDINALS.length - 1) + " inclusive allowed.");
+ }
+ return TYPE_ORDINALS[ordinal];
+ }
+}
Added: lucene/dev/branches/solr7787/solr/core/src/java/org/apache/solr/util/hll/SerializationUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr7787/solr/core/src/java/org/apache/solr/util/hll/SerializationUtil.java?rev=1691350&view=auto
==============================================================================
--- lucene/dev/branches/solr7787/solr/core/src/java/org/apache/solr/util/hll/SerializationUtil.java (added)
+++ lucene/dev/branches/solr7787/solr/core/src/java/org/apache/solr/util/hll/SerializationUtil.java Thu Jul 16 10:32:07 2015
@@ -0,0 +1,277 @@
+package org.apache.solr.util.hll;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A collection of constants and utilities for serializing and deserializing
+ * HLLs.
+ *
+ * NOTE: 'package' visibility is used for many methods that only need to be
+ * used by the {@link ISchemaVersion} implementations. The structure of
+ * a serialized HLL's metadata should be opaque to the rest of the
+ * library.
+ */
+class SerializationUtil {
+ /**
+ * The number of bits (of the parameters byte) dedicated to encoding the
+ * width of the registers.
+ */
+ /*package*/ static int REGISTER_WIDTH_BITS = 3;
+
+ /**
+ * A mask to cap the maximum value of the register width.
+ */
+ /*package*/ static int REGISTER_WIDTH_MASK = (1 << REGISTER_WIDTH_BITS) - 1;
+
+ /**
+ * The number of bits (of the parameters byte) dedicated to encoding
+ * <code>log2(registerCount)</code>.
+ */
+ /*package*/ static int LOG2_REGISTER_COUNT_BITS = 5;
+
+ /**
+ * A mask to cap the maximum value of <code>log2(registerCount)</code>.
+ */
+ /*package*/ static int LOG2_REGISTER_COUNT_MASK = (1 << LOG2_REGISTER_COUNT_BITS) - 1;
+
+ /**
+ * The number of bits (of the cutoff byte) dedicated to encoding the
+ * log-base-2 of the explicit cutoff or sentinel values for
+ * 'explicit-disabled' or 'auto'.
+ */
+ /*package*/ static int EXPLICIT_CUTOFF_BITS = 6;
+
+ /**
+ * A mask to cap the maximum value of the explicit cutoff choice.
+ */
+ /*package*/ static int EXPLICIT_CUTOFF_MASK = (1 << EXPLICIT_CUTOFF_BITS) - 1;
+
+ /**
+ * Number of bits in a nibble.
+ */
+ private static int NIBBLE_BITS = 4;
+
+ /**
+ * A mask to cap the maximum value of a nibble.
+ */
+ private static int NIBBLE_MASK = (1 << NIBBLE_BITS) - 1;
+
+ // ************************************************************************
+ // Serialization utilities
+
+ /**
+ * Schema version one (v1).
+ */
+ public static ISchemaVersion VERSION_ONE = new SchemaVersionOne();
+
+ /**
+ * The default schema version for serializing HLLs.
+ */
+ public static ISchemaVersion DEFAULT_SCHEMA_VERSION = VERSION_ONE;
+
+ /**
+ * List of registered schema versions, indexed by their version numbers. If
+ * an entry is <code>null</code>, then no such schema version is registered.
+ * Similarly, registering a new schema version simply entails assigning an
+ * {@link ISchemaVersion} instance to the appropriate index of this array.<p/>
+ *
+ * By default, only {@link SchemaVersionOne} is registered. Note that version
+ * zero will always be reserved for internal (e.g. proprietary, legacy) schema
+ * specifications/implementations and will never be assigned to in by this
+ * library.
+ */
+ public static ISchemaVersion[] REGISTERED_SCHEMA_VERSIONS = new ISchemaVersion[16];
+
+ static {
+ REGISTERED_SCHEMA_VERSIONS[1] = VERSION_ONE;
+ }
+
+ /**
+ * @param schemaVersionNumber the version number of the {@link ISchemaVersion}
+ * desired. This must be a registered schema version number.
+ * @return The {@link ISchemaVersion} for the given number. This will never
+ * be <code>null</code>.
+ */
+ public static ISchemaVersion getSchemaVersion(final int schemaVersionNumber) {
+ if(schemaVersionNumber >= REGISTERED_SCHEMA_VERSIONS.length || schemaVersionNumber < 0) {
+ throw new RuntimeException("Invalid schema version number " + schemaVersionNumber);
+ }
+ final ISchemaVersion schemaVersion = REGISTERED_SCHEMA_VERSIONS[schemaVersionNumber];
+ if(schemaVersion == null) {
+ throw new RuntimeException("Unknown schema version number " + schemaVersionNumber);
+ }
+ return schemaVersion;
+ }
+
+ /**
+ * Get the appropriate {@link ISchemaVersion schema version} for the specified
+ * serialized HLL.
+ *
+ * @param bytes the serialized HLL whose schema version is desired.
+ * @return the schema version for the specified HLL. This will never
+ * be <code>null</code>.
+ */
+ public static ISchemaVersion getSchemaVersion(final byte[] bytes) {
+ final byte versionByte = bytes[0];
+ final int schemaVersionNumber = schemaVersion(versionByte);
+
+ return getSchemaVersion(schemaVersionNumber);
+ }
+
+ // ************************************************************************
+ // Package-specific shared helpers
+
+ /**
+ * Generates a byte that encodes the schema version and the type ordinal
+ * of the HLL.
+ *
+ * The top nibble is the schema version and the bottom nibble is the type
+ * ordinal.
+ *
+ * @param schemaVersion the schema version to encode.
+ * @param typeOrdinal the type ordinal of the HLL to encode.
+ * @return the packed version byte
+ */
+ public static byte packVersionByte(final int schemaVersion, final int typeOrdinal) {
+ return (byte)(((NIBBLE_MASK & schemaVersion) << NIBBLE_BITS) | (NIBBLE_MASK & typeOrdinal));
+ }
+ /**
+ * Generates a byte that encodes the log-base-2 of the explicit cutoff
+ * or sentinel values for 'explicit-disabled' or 'auto', as well as the
+ * boolean indicating whether to use {@link HLLType#SPARSE}
+ * in the promotion hierarchy.
+ *
+ * The top bit is always padding, the second highest bit indicates the
+ * 'sparse-enabled' boolean, and the lowest six bits encode the explicit
+ * cutoff value.
+ *
+ * @param explicitCutoff the explicit cutoff value to encode.
+ * <ul>
+ * <li>
+ * If 'explicit-disabled' is chosen, this value should be <code>0</code>.
+ * </li>
+ * <li>
+ * If 'auto' is chosen, this value should be <code>63</code>.
+ * </li>
+ * <li>
+ * If a cutoff of 2<sup>n</sup> is desired, for <code>0 <= n < 31</code>,
+ * this value should be <code>n + 1</code>.
+ * </li>
+ * </ul>
+ * @param sparseEnabled whether {@link HLLType#SPARSE}
+ * should be used in the promotion hierarchy to improve HLL
+ * storage.
+ *
+ * @return the packed cutoff byte
+ */
+ public static byte packCutoffByte(final int explicitCutoff, final boolean sparseEnabled) {
+ final int sparseBit = (sparseEnabled ? (1 << EXPLICIT_CUTOFF_BITS) : 0);
+ return (byte)(sparseBit | (EXPLICIT_CUTOFF_MASK & explicitCutoff));
+ }
+
+ /**
+ * Generates a byte that encodes the parameters of a
+ * {@link HLLType#FULL} or {@link HLLType#SPARSE}
+ * HLL.<p/>
+ *
+ * The top 3 bits are used to encode <code>registerWidth - 1</code>
+ * (range of <code>registerWidth</code> is thus 1-9) and the bottom 5
+ * bits are used to encode <code>registerCountLog2</code>
+ * (range of <code>registerCountLog2</code> is thus 0-31).
+ *
+ * @param registerWidth the register width (must be at least 1 and at
+ * most 9)
+ * @param registerCountLog2 the log-base-2 of the register count (must
+ * be at least 0 and at most 31)
+ * @return the packed parameters byte
+ */
+ public static byte packParametersByte(final int registerWidth, final int registerCountLog2) {
+ final int widthBits = ((registerWidth - 1) & REGISTER_WIDTH_MASK);
+ final int countBits = (registerCountLog2 & LOG2_REGISTER_COUNT_MASK);
+ return (byte)((widthBits << LOG2_REGISTER_COUNT_BITS) | countBits);
+ }
+
+ /**
+ * Extracts the 'sparse-enabled' boolean from the cutoff byte of a serialized
+ * HLL.
+ *
+ * @param cutoffByte the cutoff byte of the serialized HLL
+ * @return the 'sparse-enabled' boolean
+ */
+ public static boolean sparseEnabled(final byte cutoffByte) {
+ return ((cutoffByte >>> EXPLICIT_CUTOFF_BITS) & 1) == 1;
+ }
+
+ /**
+ * Extracts the explicit cutoff value from the cutoff byte of a serialized
+ * HLL.
+ *
+ * @param cutoffByte the cutoff byte of the serialized HLL
+ * @return the explicit cutoff value
+ */
+ public static int explicitCutoff(final byte cutoffByte) {
+ return (cutoffByte & EXPLICIT_CUTOFF_MASK);
+ }
+
+ /**
+ * Extracts the schema version from the version byte of a serialized
+ * HLL.
+ *
+ * @param versionByte the version byte of the serialized HLL
+ * @return the schema version of the serialized HLL
+ */
+ public static int schemaVersion(final byte versionByte) {
+ return NIBBLE_MASK & (versionByte >>> NIBBLE_BITS);
+ }
+
+ /**
+ * Extracts the type ordinal from the version byte of a serialized HLL.
+ *
+ * @param versionByte the version byte of the serialized HLL
+ * @return the type ordinal of the serialized HLL
+ */
+ public static int typeOrdinal(final byte versionByte) {
+ return (versionByte & NIBBLE_MASK);
+ }
+
+ /**
+ * Extracts the register width from the parameters byte of a serialized
+ * {@link HLLType#FULL} HLL.
+ *
+ * @param parametersByte the parameters byte of the serialized HLL
+ * @return the register width of the serialized HLL
+ *
+ * @see #packParametersByte(int, int)
+ */
+ public static int registerWidth(final byte parametersByte) {
+ return ((parametersByte >>> LOG2_REGISTER_COUNT_BITS) & REGISTER_WIDTH_MASK) + 1;
+ }
+
+ /**
+ * Extracts the log2(registerCount) from the parameters byte of a
+ * serialized {@link HLLType#FULL} HLL.
+ *
+ * @param parametersByte the parameters byte of the serialized HLL
+ * @return log2(registerCount) of the serialized HLL
+ *
+ * @see #packParametersByte(int, int)
+ */
+ public static int registerCountLog2(final byte parametersByte) {
+ return (parametersByte & LOG2_REGISTER_COUNT_MASK);
+ }
+}
Added: lucene/dev/branches/solr7787/solr/core/src/java/org/apache/solr/util/hll/package-info.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr7787/solr/core/src/java/org/apache/solr/util/hll/package-info.java?rev=1691350&view=auto
==============================================================================
--- lucene/dev/branches/solr7787/solr/core/src/java/org/apache/solr/util/hll/package-info.java (added)
+++ lucene/dev/branches/solr7787/solr/core/src/java/org/apache/solr/util/hll/package-info.java Thu Jul 16 10:32:07 2015
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A fork of <a href="https://github.com/aggregateknowledge/java-hll/">Java-HyperLogLog</a> package tweaked
+ * not to depend on fastutil and with cleanups to make it lean and clean.
+ */
+package org.apache.solr.util.hll;
+
+
Modified: lucene/dev/branches/solr7787/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr7787/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java?rev=1691350&r1=1691343&r2=1691350&view=diff
==============================================================================
--- lucene/dev/branches/solr7787/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java (original)
+++ lucene/dev/branches/solr7787/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java Thu Jul 16 10:32:07 2015
@@ -55,9 +55,9 @@ import org.apache.solr.util.AbstractSolr
import org.apache.commons.math3.util.Combinations;
import com.tdunning.math.stats.AVLTreeDigest;
-import net.agkn.hll.HLL;
import com.google.common.hash.Hashing;
-import com.google.common.hash.HashFunction;
+import com.google.common.hash.HashFunction;
+import org.apache.solr.util.hll.HLL;
import org.junit.BeforeClass;
Modified: lucene/dev/branches/solr7787/solr/core/src/test/org/apache/solr/handler/component/TestDistributedStatsComponentCardinality.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr7787/solr/core/src/test/org/apache/solr/handler/component/TestDistributedStatsComponentCardinality.java?rev=1691350&r1=1691343&r2=1691350&view=diff
==============================================================================
--- lucene/dev/branches/solr7787/solr/core/src/test/org/apache/solr/handler/component/TestDistributedStatsComponentCardinality.java (original)
+++ lucene/dev/branches/solr7787/solr/core/src/test/org/apache/solr/handler/component/TestDistributedStatsComponentCardinality.java Thu Jul 16 10:32:07 2015
@@ -31,7 +31,7 @@ import org.apache.solr.client.solrj.resp
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.ModifiableSolrParams;
-import net.agkn.hll.HLL;
+import org.apache.solr.util.hll.HLL;
import com.google.common.hash.Hashing;
import com.google.common.hash.HashFunction;
Modified: lucene/dev/branches/solr7787/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr7787/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java?rev=1691350&r1=1691343&r2=1691350&view=diff
==============================================================================
--- lucene/dev/branches/solr7787/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java (original)
+++ lucene/dev/branches/solr7787/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java Thu Jul 16 10:32:07 2015
@@ -28,7 +28,7 @@ import java.util.Map;
import java.util.Random;
import com.tdunning.math.stats.AVLTreeDigest;
-import net.agkn.hll.HLL;
+import org.apache.solr.util.hll.HLL;
import org.apache.lucene.queryparser.flexible.standard.processors.NumericQueryNodeProcessor;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.packed.GrowableWriter;