You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by GitBox <gi...@apache.org> on 2021/11/23 03:32:16 UTC

[GitHub] [hudi] vinothchandar commented on a change in pull request #3952: [HUDI-2102]support hilbert curve for hudi.

vinothchandar commented on a change in pull request #3952:
URL: https://github.com/apache/hudi/pull/3952#discussion_r754782757



##########
File path: hudi-client/hudi-client-common/src/main/java/org/apache/hudi/optimize/HilbertCurve.java
##########
@@ -0,0 +1,321 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.optimize;
+
+import java.math.BigInteger;
+import java.util.Arrays;
+
+/**
+ * Converts between Hilbert index ({@code BigInteger}) and N-dimensional points.
+ *
+ * <p>
+ * Note:
+ * <a href="https://github.com/davidmoten/hilbert-curve/blob/master/src/main/java/org/davidmoten/hilbert/HilbertCurve.java">GitHub</a>).
+ * the Licensed of above link is also http://www.apache.org/licenses/LICENSE-2.0
+ */
+public final class HilbertCurve {
+
+  private final int bits;
+  private final int dimensions;
+  // cached calculations
+  private final int length;
+
+  private HilbertCurve(int bits, int dimensions) {
+    this.bits = bits;
+    this.dimensions = dimensions;
+    // cache a calculated values for small perf improvements
+    this.length = bits * dimensions;
+  }
+
+  /**
+   * Returns a builder for and object that performs transformations for a Hilbert
+   * curve with the given number of bits.
+   *
+   * @param bits
+   *            depth of the Hilbert curve. If bits is one, this is the top-level
+   *            Hilbert curve
+   * @return builder for object to do transformations with the Hilbert Curve
+   */
+  public static Builder bits(int bits) {
+    return new Builder(bits);
+  }
+
+  /**
+   * Builds a {@link HilbertCurve} instance.
+   */
+  public static final class Builder {
+    final int bits;
+
+    private Builder(int bits) {
+      if (bits <= 0  || bits >= 64) {
+        throw new IllegalArgumentException(String.format("bits must be greater than zero and less than 64, now found bits value: %s", bits));
+      }
+      this.bits = bits;
+    }
+
+    public HilbertCurve dimensions(int dimensions) {
+      if (dimensions < 2) {
+        throw new IllegalArgumentException(String.format("dimensions must be at least 2, now found dimensions value: %s", dimensions));
+      }
+      return new HilbertCurve(bits, dimensions);
+    }
+  }
+
+  /**
+   * Converts a point to its Hilbert curve index.
+   *
+   * @param point
+   *            an array of {@code long}. Each ordinate can be between 0 and
+   *            2<sup>bits</sup>-1.
+   * @return index (nonnegative {@link BigInteger})
+   * @throws IllegalArgumentException
+   *             if length of point array is not equal to the number of
+   *             dimensions.
+   */
+  public BigInteger index(long... point) {
+    if (point.length != dimensions) {
+      throw new IllegalArgumentException(String.format("length of point array must equal to the number of dimensions"));
+    }
+    return toIndex(transposedIndex(bits, point));
+  }
+
+  public byte[] indexBytes(long... point) {
+    if (point.length != dimensions) {
+      throw new IllegalArgumentException(String.format("length of point array must equal to the number of dimensions"));
+    }
+    return toIndexBytes(transposedIndex(bits, point));
+  }
+
+  /**
+   * Converts a {@link BigInteger} index (distance along the Hilbert Curve from 0)
+   * to a point of dimensions defined in the constructor of {@code this}.
+   *
+   * @param index
+   *            index along the Hilbert Curve from 0. Maximum value 2 <sup>bits *
+   *            dimensions</sup>-1.
+   * @return array of longs being the point
+   * @throws NullPointerException
+   *             if index is null
+   * @throws IllegalArgumentException
+   *             if index is negative
+   */
+  public long[] point(BigInteger index) {
+    if (index == null) {
+      throw new NullPointerException("index must not be null");
+    }
+    if (index.signum() == -1) {
+      throw new IllegalArgumentException("index cannot be negative");
+    }
+    return transposedIndexToPoint(bits, transpose(index));
+  }
+
+  public void point(BigInteger index, long[] x) {
+    if (index == null) {
+      throw new NullPointerException("index must not be null");
+    }
+    if (index.signum() == -1) {
+      throw new IllegalArgumentException("index cannot be negative");
+    }
+    Arrays.fill(x, 0);
+    transpose(index, x);
+    transposedIndexToPoint(bits, x);
+  }
+
+  public void point(long i, long[] x) {
+    point(BigInteger.valueOf(i), x);
+  }
+
+  /**
+   * Converts a {@code long} index (distance along the Hilbert Curve from 0) to a
+   * point of dimensions defined in the constructor of {@code this}.
+   *
+   * @param index
+   *            index along the Hilbert Curve from 0. Maximum value 2
+   *            <sup>bits+1</sup>-1.
+   * @return array of longs being the point
+   * @throws IllegalArgumentException
+   *             if index is negative
+   */
+  public long[] point(long index) {
+    return point(BigInteger.valueOf(index));
+  }
+
+  /**
+   * Returns the transposed representation of the Hilbert curve index.
+   *
+   * <p>
+   * The Hilbert index is expressed internally as an array of transposed bits.
+   *
+   * <pre>
+   Example: 5 bits for each of n=3 coordinates.
+   15-bit Hilbert integer = A B C D E F G H I J K L M N O is stored
+   as its Transpose                        ^
+   X[0] = A D G J M                    X[2]|  7
+   X[1] = B E H K N        &lt;-------&gt;       | /X[1]

Review comment:
       @alexeykudinkin can you please expand. so @xiarixiaoyao can address/




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@hudi.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org