You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2020/08/25 18:44:20 UTC

[incubator-datasketches-java] 01/05: Interim

This is an automated email from the ASF dual-hosted git repository.

leerho pushed a commit to branch RelativeErrorQuantiles
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-java.git

commit 4167017c0fce4870b6da29bbeb0e1b645f61c127
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Tue Aug 11 13:16:49 2020 -0700

    Interim
---
 .../apache/datasketches/kll/KllReFloatsSketch.java | 161 +++++++++++++++++++++
 1 file changed, 161 insertions(+)

diff --git a/src/main/java/org/apache/datasketches/kll/KllReFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllReFloatsSketch.java
new file mode 100644
index 0000000..760d061
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/kll/KllReFloatsSketch.java
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.kll;
+
+import static java.lang.Math.ceil;
+import static java.lang.Math.sqrt;
+
+import java.util.Random;
+
+/**
+ * Proof-of-concept code for paper "Relative Error Streaming Quantiles",
+ * https://arxiv.org/abs/2004.01668.
+ *
+ * <p>This implementation differs from the algorithm described in the paper in the following:</p>
+ * <ul><li>The algorithm requires no upper bound on the stream length (input size).
+ * Instead, each relative-compactor (i.e. buffer) counts the number of compaction operations performed
+ * so far (variable numCompactions). Initially, the relative-compactor starts with 2 buffer sections
+ * and each time the numCompactions exceeds 2^{# of sections}, we double the number of sections
+ * (variable numSections).</li>
+ * <li>The size of each buffer section (variable sectionSize in the code and parameter k in the paper)
+ * is initialized with a value set by the user via variable sectionSize (parameter -sec)
+ * or via setting epsilon (parameter -eps). Setting the failure probability delta is not implememnted.
+ * When the number of sections doubles, we decrease sectionSize by a factor of sqrt(2)
+ * (for which we use a float variable sectionSizeF). As in item 1), this is applied
+ * at each level separately.
+ * Thus, when we double the number of section, the buffer size increases by a factor of sqrt(2)
+ * (up to +-1 after rounding). For experimental purposes, the buffer consists of three parts:
+ * <ul><li>a part that is never compacted (its size can be set by variable never),</li>
+ * <li>numSections many sections of size sectionSize, and</li>
+ * <li>a part that is always involved in a compaction (its size can be set by variable always).</li>
+ * </ul></li>
+ * <li>The merge operation here does not perform "special compactions", which are used in the paper
+ * to allow for a tight analysis of the sketch.</li>
+ * </ul>
+ *
+ * @author Edo Liberty
+ * @author Pavel Vesely
+ * @author Lee Rhodes
+ */
+@SuppressWarnings("unused")
+public class KllReFloatsSketch {
+
+  static {
+    final double x = sqrt(2);
+    final double y = ceil(x);
+    final Random rand =  new Random();
+  }
+
+  // Constants
+
+  private final static double SECTION_SIZE_SCALAR = 0.5;
+  private final static double NEVER_SIZE_SCALAR = 0.5;
+  private final static int INIT_NUMBER_OF_SECTIONS = 2;
+  private final static int SMALLEST_MEANINGFUL_SECTION_SIZE = 4;
+  private final static double DEFAULT_EPS = 0.01;
+  private final static double EPS_UPPER_BOUND = 0.1; //the sketch gives rather bad results for eps > 0.1
+
+  /**
+   * Schedule
+   * @author Lee Rhodes
+   */
+  @SuppressWarnings("javadoc")
+  public enum Schedule { DETERMINISTIC, RANDOMIZED }
+
+  //class variables
+  private double eps = DEFAULT_EPS;
+  private Schedule sch = Schedule.DETERMINISTIC;
+  private int always = -1;
+  private int never = -1;
+  private int sectionSize = -1;
+  private int initNumSections = INIT_NUMBER_OF_SECTIONS;
+  private boolean lazy = true;
+  private boolean alternate = true;
+  private boolean neverGrows = false;
+  private RelativeCompactor compactor = new RelativeCompactor();
+  private RelativeCompactor[] compactors = new RelativeCompactor[0];
+  private int H = 0;
+  private int size = 0;
+
+
+  /**
+   * Constructor.
+   * @param eps blah
+   * @param sch blah
+   * @param always blah
+   * @param never blah
+   * @param sectionSize blah
+   * @param initNumSections blah
+   * @param lazy blah
+   * @param alternate blah
+   */
+  public KllReFloatsSketch(
+      final double eps,
+      final Schedule sch,
+      final int always,
+      final int never,
+      final int sectionSize,
+      final int initNumSections,
+      final boolean lazy,
+      final boolean alternate) {
+    this.eps = eps;
+    this.sch = sch;
+    this.always = always;
+    this.never = never;
+    this.sectionSize = sectionSize;
+    //an initial upper bound on log_2 of the number of compactions
+    this.initNumSections = initNumSections;
+    this.lazy = lazy;
+    this.alternate = alternate;
+
+    // default setting of sectionSize, always, and never according to eps
+    if (this.sectionSize == -1) {
+      // ensured to be even and positive (thus >= 2)
+      this.sectionSize = (2 * (int)(SECTION_SIZE_SCALAR / eps)) + 1;
+    }
+    if (this.always == -1) { this.always = sectionSize; }
+
+    neverGrows = false; //if never is set by the user, then we do not let it grow
+    if (this.never == -1) {
+      this.never = (int)(NEVER_SIZE_SCALAR * this.sectionSize * this.initNumSections);
+      neverGrows = true;
+    }
+    compactors = new RelativeCompactor[0];
+    H = 0;
+    size = 0;
+    grow();
+  }
+
+  void grow() {
+
+  }
+
+
+  class RelativeCompactor {
+    int numCompactions = 0;
+
+    public RelativeCompactor() {
+      numCompactions = 0; // number of compaction operations performed
+    }
+  }
+
+}
+
+


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org