You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2019/06/12 20:44:43 UTC

[incubator-datasketches-characterization] branch master updated: Remove streamA.txt.zip, minor styles fixes, update DirectoryWalker

This is an automated email from the ASF dual-hosted git repository.

leerho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-characterization.git


The following commit(s) were added to refs/heads/master by this push:
     new 008ef26  Remove streamA.txt.zip, minor styles fixes, update DirectoryWalker
008ef26 is described below

commit 008ef26de05a3db4bded2270b5d358ae45bec42e
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Wed Jun 12 13:43:27 2019 -0700

    Remove streamA.txt.zip, minor styles fixes, update DirectoryWalker
---
 pom.xml                                            |   6 +-
 .../java/com/yahoo/sketches/DirectoryWalker.java   | 114 ++++++++++++---------
 .../characterization/ZipfDistribution.java         |  28 +++--
 .../frequencies/BaseFrequenciesSpeedProfile.java   |   9 +-
 .../frequencies/LongsSketchSpeedProfile.java       |   9 +-
 streamA.txt.zip                                    |   3 -
 6 files changed, 95 insertions(+), 74 deletions(-)

diff --git a/pom.xml b/pom.xml
index 956c5ae..98546dd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -6,10 +6,10 @@
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
     <modelVersion>4.0.0</modelVersion>
 
-    <groupId>com.yahoo.datasketches</groupId>
-    <artifactId>characterization</artifactId>
+    <groupId>org.apache.datasketches</groupId>
+    <artifactId>datasketches-characterization</artifactId>
     <packaging>jar</packaging> <!-- jar is the default -->
-    <version>0.1.0-SNAPSHOT</version>
+    <version>0.1.0-incubating-SNAPSHOT</version>
 
     <name>${project.groupId}:${project.artifactId}</name>
     <description>Characterization code for the DataSketches Library.</description>
diff --git a/src/main/java/com/yahoo/sketches/DirectoryWalker.java b/src/main/java/com/yahoo/sketches/DirectoryWalker.java
index 32e9a33..c312e6f 100644
--- a/src/main/java/com/yahoo/sketches/DirectoryWalker.java
+++ b/src/main/java/com/yahoo/sketches/DirectoryWalker.java
@@ -10,6 +10,8 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.regex.Pattern;
 
+import org.testng.annotations.Test;
+
 /**
  * Recursive directory search.
  *
@@ -22,97 +24,92 @@ public class DirectoryWalker {
    * A Node is a directory in a tree of directories.
    */
   private static class Node {
-    private String nodePath__ = null;
+    private String nodePath = null;
     // List of sub-directory Nodes found by this Node only
-    private ArrayList<Node> nodeDirList__ = null;
-    private boolean recurseFlag__ = false;
-    private Pattern filePattern__ = null;
-    private ArrayList<String> nodeFileList__ = null; // cumulative file list
+    private ArrayList<Node> nodeDirList = null;
+    private boolean recurseFlag = false;
+    private Pattern filePattern = null;
+    private ArrayList<String> nodeFileList = null; // cumulative file list
 
     /**
      * Construct a new directory Node.
      *
-     * @param nodePath
-     *          the path to this directory including the file separator "/"
-     * @param filePattern
-     *          the Pattern used to select files to be included in the fileList
-     * @param recurseFlag
-     *          if true, the sub-directories in this directory will be searched
-     *          as well.
-     * @param fileList
-     *          the cumulative file list that is added to by each node searched.
+     * @param nodePath the path to this directory including the file separator "/"
+     * @param filePattern the Pattern used to select files to be included in the fileList
+     * @param recurseFlag if true, the sub-directories in this directory will be searched as well.
+     * @param fileList the cumulative file list that is added to by each node searched.
      */
     Node(final String nodePath, final Pattern filePattern, final boolean recurseFlag,
         final ArrayList<String> fileList) {
-      nodePath__ = nodePath;
-      filePattern__ = filePattern;
-      recurseFlag__ = recurseFlag;
-      nodeFileList__ = fileList;
+      this.nodePath = nodePath;
+      this.filePattern = filePattern;
+      this.recurseFlag = recurseFlag;
+      nodeFileList = fileList;
     }
 
     void buildLists() {
-      File file = new File(nodePath__);
+      File file = new File(nodePath);
       final String[] strFileDirArr = file.list(); // get array of file/dir names in my directory
       if (strFileDirArr == null) {
         throw new IllegalArgumentException("File is not a valid dir.");
       }
       final int numFileDirs = strFileDirArr.length;
       for (int i = 0; i < numFileDirs; i++) { // scan all file/dirs at this node
-        final String fileName = nodePath__ + strFileDirArr[i];
+        final String fileName = nodePath + strFileDirArr[i];
         file = new File(fileName);
         if (file.isDirectory()) {
-          if (recurseFlag__) {
-            if (nodeDirList__ == null) {
-              nodeDirList__ = new ArrayList<>();
+          if (recurseFlag) {
+            if (nodeDirList == null) {
+              nodeDirList = new ArrayList<>();
             }
-            final Node node = new Node(fileName + FS, filePattern__, recurseFlag__, nodeFileList__);
-            nodeDirList__.add(node);
+            final Node node = new Node(fileName + FS, filePattern, recurseFlag, nodeFileList);
+            nodeDirList.add(node);
           }
         } else { // it is a file
-          if (filePattern__ != null) {
-            if (filePattern__.matcher(fileName).matches()) {
-              nodeFileList__.add(fileName); // add it if it matches
+          if (filePattern != null) {
+            if (filePattern.matcher(fileName).matches()) {
+              nodeFileList.add(fileName); // add it if it matches
             }
           }
           else {
-            nodeFileList__.add(fileName); // just add it
+            nodeFileList.add(fileName); // just add it
           }
         }
       }
     }
   } // End of class Node
 
-  // Recursive routine
-  private static void buildDirTree(final Node current, final boolean recursive) {
-    current.buildLists(); // build the list for my node
-    final ArrayList<Node> al = current.nodeDirList__;
-    if ((al == null) || al.isEmpty() || !recursive) {
+  /**
+   * Recursive routine that builds the fileList for each node.
+   * @param curDirNode the current directory node
+   * @param recursive if true, recurse.
+   */
+  private static void buildDirTree(final Node curDirNode, final boolean recursive) {
+    curDirNode.buildLists(); // build the list for my current node
+    final ArrayList<Node> dirList = curDirNode.nodeDirList;
+    if ((dirList == null) || dirList.isEmpty() || !recursive) {
       return; // return if leaf node
     }
-    final int numDirs = al.size(); // otherwise, go deeper
+    final int numDirs = dirList.size(); // otherwise, go deeper
     for (int i = 0; i < numDirs; i++) {
-      buildDirTree(al.get(i), recursive);
+      buildDirTree(dirList.get(i), recursive);
     }
   }
 
   /**
-   * Creates a new List&lt;String&gt; of fileNames starting with the root directory
-   * path.
+   * Creates a new List&lt;String&gt; of fileNames starting with the root directory path.
    *
-   * @param rootPath
-   *          absolute or relative path and must end with a file separator.
-   * @param regExSelector
-   *          A RegEx matching pattern for putting a filename into the list. It
-   *          may be null.
-   * @param recursive
-   *          If true, examine all subdirectories
+   * @param rootPath absolute or relative path and must end with a file separator.
+   * @param fileSelector A RegEx matching string for putting a filename into the list.
+   * It may be null, which selects all files.
+   * @param recursive If true, examine all subdirectories
    * @return an ArrayListFile of the list of paths + fileNames.
    */
-  public static List<String> appendFileList(final String rootPath, final String regExSelector,
+  public static List<String> appendFileList(final String rootPath, final String fileSelector,
       final boolean recursive) {
     Pattern filePattern = null;
-    if ((regExSelector != null) && (regExSelector.length() > 0)) {
-      filePattern = Pattern.compile(regExSelector);
+    if ((fileSelector != null) && (fileSelector.length() > 0)) {
+      filePattern = Pattern.compile(fileSelector);
     }
     final ArrayList<String> fileList = new ArrayList<>();
     final Node root = new Node(rootPath, filePattern, recursive, fileList);
@@ -120,13 +117,30 @@ public class DirectoryWalker {
     return fileList;
   }
 
+  @Test
+  public static void printFiles() {
+    final String rootPath = "/Users/lrhodes/dev/git/Apache/datasketches-memory/src/";
+    final String regExSelector = ".+[.]java";
+    final boolean recursive = true;
+
+    final List<String> fileList = appendFileList(rootPath, regExSelector, recursive);
+    final int size = fileList.size();
+
+    for (int i = 0; i < size; i++) {
+      println(fileList.get(i));
+    }
+    println("Files: " + size);
+  }
+
+  static void println(final String s) { System.out.println(s); }
+
   /**
    * blah
    * @param args blah
    */
   public static void main(final String[] args) {
-    final String rootPath = "/Users/lrhodes/dev/git/DataSketches.github.io/_site/docs/";
-    final String regExSelector = ".+[.]html";
+    final String rootPath = "/Users/lrhodes/dev/git/Apache/datasketches-memory/src";
+    final String regExSelector = ".+[.]java";
     final boolean recursive = true;
 
     final List<String> fileList = appendFileList(rootPath, regExSelector, recursive);
diff --git a/src/main/java/com/yahoo/sketches/characterization/ZipfDistribution.java b/src/main/java/com/yahoo/sketches/characterization/ZipfDistribution.java
index 81965a1..b9acaf2 100644
--- a/src/main/java/com/yahoo/sketches/characterization/ZipfDistribution.java
+++ b/src/main/java/com/yahoo/sketches/characterization/ZipfDistribution.java
@@ -27,19 +27,29 @@ public class ZipfDistribution {
   private final double hIntegralNumberOfElements;
   private final double s;
 
+  /**
+   *
+   * @param numberOfElements blah
+   * @param exponent blah
+   */
   public ZipfDistribution(final int numberOfElements, final double exponent) {
     this.numberOfElements = numberOfElements;
     this.exponent = exponent;
-    this.hIntegralX1 = hIntegral(1.5) - 1.0;
-    this.hIntegralNumberOfElements = hIntegral(numberOfElements + F_1_2);
-    this.s = 2 - hIntegralInverse(hIntegral(2.5) - h(2.0));
+    hIntegralX1 = hIntegral(1.5) - 1.0;
+    hIntegralNumberOfElements = hIntegral(numberOfElements + F_1_2);
+    s = 2 - hIntegralInverse(hIntegral(2.5) - h(2.0));
   }
 
+  /**
+   *
+   * @return number of elements
+   */
   public int sample() {
-    while(true) {
-      final double u = hIntegralNumberOfElements + rng.nextDouble() * (hIntegralX1 - hIntegralNumberOfElements);
+    while (true) {
+      final double u = hIntegralNumberOfElements
+          + (rng.nextDouble() * (hIntegralX1 - hIntegralNumberOfElements));
 
-      double x = hIntegralInverse(u);
+      final double x = hIntegralInverse(u);
       int k = (int) (x + F_1_2);
 
       if (k < 1) {
@@ -48,7 +58,7 @@ public class ZipfDistribution {
         k = numberOfElements;
       }
 
-      if (k - x <= s || u >= hIntegral(k + F_1_2) - h(k)) {
+      if (((k - x) <= s) || (u >= (hIntegral(k + F_1_2) - h(k)))) {
           return k;
       }
     }
@@ -77,14 +87,14 @@ public class ZipfDistribution {
     if (Math.abs(x) > TAYLOR_THRESHOLD) {
       return Math.log1p(x) / x;
     }
-    return 1 - x * (F_1_2 - x * (F_1_3 - F_1_4 * x));
+    return 1 - (x * (F_1_2 - (x * (F_1_3 - (F_1_4 * x)))));
   }
 
   private static double helper2(final double x) {
     if (Math.abs(x) > TAYLOR_THRESHOLD) {
       return Math.expm1(x) / x;
     }
-    return 1 + x * F_1_2 * (1 + x * F_1_3 * (1 + F_1_4 * x));
+    return 1 + (x * F_1_2 * (1 + (x * F_1_3 * (1 + (F_1_4 * x)))));
   }
 
 }
diff --git a/src/main/java/com/yahoo/sketches/characterization/frequencies/BaseFrequenciesSpeedProfile.java b/src/main/java/com/yahoo/sketches/characterization/frequencies/BaseFrequenciesSpeedProfile.java
index e8f22b3..7e1ca8b 100644
--- a/src/main/java/com/yahoo/sketches/characterization/frequencies/BaseFrequenciesSpeedProfile.java
+++ b/src/main/java/com/yahoo/sketches/characterization/frequencies/BaseFrequenciesSpeedProfile.java
@@ -2,9 +2,9 @@ package com.yahoo.sketches.characterization.frequencies;
 
 import static com.yahoo.sketches.Util.pwr2LawNext;
 
-import com.yahoo.sketches.characterization.Job;
-import com.yahoo.sketches.characterization.JobProfile;
-import com.yahoo.sketches.characterization.Properties;
+import com.yahoo.sketches.Job;
+import com.yahoo.sketches.JobProfile;
+import com.yahoo.sketches.Properties;
 
 public abstract class BaseFrequenciesSpeedProfile implements JobProfile {
 
@@ -43,7 +43,8 @@ public abstract class BaseFrequenciesSpeedProfile implements JobProfile {
 
     int streamLength = minStreamLen;
     while (streamLength <= maxStreamLen) {
-      final int numTrials = getNumTrials(streamLength, lgMinStreamLen, lgMaxStreamLen, lgMinTrials, lgMaxTrials);
+      final int numTrials = getNumTrials(streamLength, lgMinStreamLen, lgMaxStreamLen, lgMinTrials,
+          lgMaxTrials);
       resetStats();
       for (int i = 0; i < numTrials; i++) {
         prepareTrial(streamLength);
diff --git a/src/main/java/com/yahoo/sketches/characterization/frequencies/LongsSketchSpeedProfile.java b/src/main/java/com/yahoo/sketches/characterization/frequencies/LongsSketchSpeedProfile.java
index 025f219..1746540 100644
--- a/src/main/java/com/yahoo/sketches/characterization/frequencies/LongsSketchSpeedProfile.java
+++ b/src/main/java/com/yahoo/sketches/characterization/frequencies/LongsSketchSpeedProfile.java
@@ -1,10 +1,9 @@
 package com.yahoo.sketches.characterization.frequencies;
 
 import com.yahoo.memory.Memory;
-import com.yahoo.sketches.characterization.Properties;
-import com.yahoo.sketches.frequencies.LongsSketch;
-
+import com.yahoo.sketches.Properties;
 import com.yahoo.sketches.characterization.ZipfDistribution;
+import com.yahoo.sketches.frequencies.LongsSketch;
 
 public class LongsSketchSpeedProfile extends BaseFrequenciesSpeedProfile {
 
@@ -21,10 +20,10 @@ public class LongsSketchSpeedProfile extends BaseFrequenciesSpeedProfile {
 
   @Override
   void configure(final Properties properties) {
-    this.k = Integer.parseInt(properties.mustGet("k"));
+    k = Integer.parseInt(properties.mustGet("k"));
     final int range = Integer.parseInt(properties.mustGet("zipfRange"));
     final double exponent = Double.parseDouble(properties.mustGet("zipfExponent"));
-    this.zipf = new ZipfDistribution(range, exponent);
+    zipf = new ZipfDistribution(range, exponent);
   }
 
   @Override
diff --git a/streamA.txt.zip b/streamA.txt.zip
deleted file mode 100644
index f6a0155..0000000
--- a/streamA.txt.zip
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:fffcff47170d405d78a622dc154f79d49a1d39a19595b455e2139a233bc10365
-size 489195747


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org