You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2019/06/12 20:44:43 UTC
[incubator-datasketches-characterization] branch master updated:
Remove streamA.txt.zip, minor styles fixes, update DirectoryWalker
This is an automated email from the ASF dual-hosted git repository.
leerho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-characterization.git
The following commit(s) were added to refs/heads/master by this push:
new 008ef26 Remove streamA.txt.zip, minor styles fixes, update DirectoryWalker
008ef26 is described below
commit 008ef26de05a3db4bded2270b5d358ae45bec42e
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Wed Jun 12 13:43:27 2019 -0700
Remove streamA.txt.zip, minor styles fixes, update DirectoryWalker
---
pom.xml | 6 +-
.../java/com/yahoo/sketches/DirectoryWalker.java | 114 ++++++++++++---------
.../characterization/ZipfDistribution.java | 28 +++--
.../frequencies/BaseFrequenciesSpeedProfile.java | 9 +-
.../frequencies/LongsSketchSpeedProfile.java | 9 +-
streamA.txt.zip | 3 -
6 files changed, 95 insertions(+), 74 deletions(-)
diff --git a/pom.xml b/pom.xml
index 956c5ae..98546dd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -6,10 +6,10 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
- <groupId>com.yahoo.datasketches</groupId>
- <artifactId>characterization</artifactId>
+ <groupId>org.apache.datasketches</groupId>
+ <artifactId>datasketches-characterization</artifactId>
<packaging>jar</packaging> <!-- jar is the default -->
- <version>0.1.0-SNAPSHOT</version>
+ <version>0.1.0-incubating-SNAPSHOT</version>
<name>${project.groupId}:${project.artifactId}</name>
<description>Characterization code for the DataSketches Library.</description>
diff --git a/src/main/java/com/yahoo/sketches/DirectoryWalker.java b/src/main/java/com/yahoo/sketches/DirectoryWalker.java
index 32e9a33..c312e6f 100644
--- a/src/main/java/com/yahoo/sketches/DirectoryWalker.java
+++ b/src/main/java/com/yahoo/sketches/DirectoryWalker.java
@@ -10,6 +10,8 @@ import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
+import org.testng.annotations.Test;
+
/**
* Recursive directory search.
*
@@ -22,97 +24,92 @@ public class DirectoryWalker {
* A Node is a directory in a tree of directories.
*/
private static class Node {
- private String nodePath__ = null;
+ private String nodePath = null;
// List of sub-directory Nodes found by this Node only
- private ArrayList<Node> nodeDirList__ = null;
- private boolean recurseFlag__ = false;
- private Pattern filePattern__ = null;
- private ArrayList<String> nodeFileList__ = null; // cumulative file list
+ private ArrayList<Node> nodeDirList = null;
+ private boolean recurseFlag = false;
+ private Pattern filePattern = null;
+ private ArrayList<String> nodeFileList = null; // cumulative file list
/**
* Construct a new directory Node.
*
- * @param nodePath
- * the path to this directory including the file separator "/"
- * @param filePattern
- * the Pattern used to select files to be included in the fileList
- * @param recurseFlag
- * if true, the sub-directories in this directory will be searched
- * as well.
- * @param fileList
- * the cumulative file list that is added to by each node searched.
+ * @param nodePath the path to this directory including the file separator "/"
+ * @param filePattern the Pattern used to select files to be included in the fileList
+ * @param recurseFlag if true, the sub-directories in this directory will be searched as well.
+ * @param fileList the cumulative file list that is added to by each node searched.
*/
Node(final String nodePath, final Pattern filePattern, final boolean recurseFlag,
final ArrayList<String> fileList) {
- nodePath__ = nodePath;
- filePattern__ = filePattern;
- recurseFlag__ = recurseFlag;
- nodeFileList__ = fileList;
+ this.nodePath = nodePath;
+ this.filePattern = filePattern;
+ this.recurseFlag = recurseFlag;
+ nodeFileList = fileList;
}
void buildLists() {
- File file = new File(nodePath__);
+ File file = new File(nodePath);
final String[] strFileDirArr = file.list(); // get array of file/dir names in my directory
if (strFileDirArr == null) {
throw new IllegalArgumentException("File is not a valid dir.");
}
final int numFileDirs = strFileDirArr.length;
for (int i = 0; i < numFileDirs; i++) { // scan all file/dirs at this node
- final String fileName = nodePath__ + strFileDirArr[i];
+ final String fileName = nodePath + strFileDirArr[i];
file = new File(fileName);
if (file.isDirectory()) {
- if (recurseFlag__) {
- if (nodeDirList__ == null) {
- nodeDirList__ = new ArrayList<>();
+ if (recurseFlag) {
+ if (nodeDirList == null) {
+ nodeDirList = new ArrayList<>();
}
- final Node node = new Node(fileName + FS, filePattern__, recurseFlag__, nodeFileList__);
- nodeDirList__.add(node);
+ final Node node = new Node(fileName + FS, filePattern, recurseFlag, nodeFileList);
+ nodeDirList.add(node);
}
} else { // it is a file
- if (filePattern__ != null) {
- if (filePattern__.matcher(fileName).matches()) {
- nodeFileList__.add(fileName); // add it if it matches
+ if (filePattern != null) {
+ if (filePattern.matcher(fileName).matches()) {
+ nodeFileList.add(fileName); // add it if it matches
}
}
else {
- nodeFileList__.add(fileName); // just add it
+ nodeFileList.add(fileName); // just add it
}
}
}
}
} // End of class Node
- // Recursive routine
- private static void buildDirTree(final Node current, final boolean recursive) {
- current.buildLists(); // build the list for my node
- final ArrayList<Node> al = current.nodeDirList__;
- if ((al == null) || al.isEmpty() || !recursive) {
+ /**
+ * Recursive routine that builds the fileList for each node.
+ * @param curDirNode the current directory node
+ * @param recursive if true, recurse.
+ */
+ private static void buildDirTree(final Node curDirNode, final boolean recursive) {
+ curDirNode.buildLists(); // build the list for my current node
+ final ArrayList<Node> dirList = curDirNode.nodeDirList;
+ if ((dirList == null) || dirList.isEmpty() || !recursive) {
return; // return if leaf node
}
- final int numDirs = al.size(); // otherwise, go deeper
+ final int numDirs = dirList.size(); // otherwise, go deeper
for (int i = 0; i < numDirs; i++) {
- buildDirTree(al.get(i), recursive);
+ buildDirTree(dirList.get(i), recursive);
}
}
/**
- * Creates a new List<String> of fileNames starting with the root directory
- * path.
+ * Creates a new List<String> of fileNames starting with the root directory path.
*
- * @param rootPath
- * absolute or relative path and must end with a file separator.
- * @param regExSelector
- * A RegEx matching pattern for putting a filename into the list. It
- * may be null.
- * @param recursive
- * If true, examine all subdirectories
+ * @param rootPath absolute or relative path and must end with a file separator.
+ * @param fileSelector A RegEx matching string for putting a filename into the list.
+ * It may be null, which selects all files.
+ * @param recursive If true, examine all subdirectories
* @return an ArrayListFile of the list of paths + fileNames.
*/
- public static List<String> appendFileList(final String rootPath, final String regExSelector,
+ public static List<String> appendFileList(final String rootPath, final String fileSelector,
final boolean recursive) {
Pattern filePattern = null;
- if ((regExSelector != null) && (regExSelector.length() > 0)) {
- filePattern = Pattern.compile(regExSelector);
+ if ((fileSelector != null) && (fileSelector.length() > 0)) {
+ filePattern = Pattern.compile(fileSelector);
}
final ArrayList<String> fileList = new ArrayList<>();
final Node root = new Node(rootPath, filePattern, recursive, fileList);
@@ -120,13 +117,30 @@ public class DirectoryWalker {
return fileList;
}
+ @Test
+ public static void printFiles() {
+ final String rootPath = "/Users/lrhodes/dev/git/Apache/datasketches-memory/src/";
+ final String regExSelector = ".+[.]java";
+ final boolean recursive = true;
+
+ final List<String> fileList = appendFileList(rootPath, regExSelector, recursive);
+ final int size = fileList.size();
+
+ for (int i = 0; i < size; i++) {
+ println(fileList.get(i));
+ }
+ println("Files: " + size);
+ }
+
+ static void println(final String s) { System.out.println(s); }
+
/**
* blah
* @param args blah
*/
public static void main(final String[] args) {
- final String rootPath = "/Users/lrhodes/dev/git/DataSketches.github.io/_site/docs/";
- final String regExSelector = ".+[.]html";
+ final String rootPath = "/Users/lrhodes/dev/git/Apache/datasketches-memory/src";
+ final String regExSelector = ".+[.]java";
final boolean recursive = true;
final List<String> fileList = appendFileList(rootPath, regExSelector, recursive);
diff --git a/src/main/java/com/yahoo/sketches/characterization/ZipfDistribution.java b/src/main/java/com/yahoo/sketches/characterization/ZipfDistribution.java
index 81965a1..b9acaf2 100644
--- a/src/main/java/com/yahoo/sketches/characterization/ZipfDistribution.java
+++ b/src/main/java/com/yahoo/sketches/characterization/ZipfDistribution.java
@@ -27,19 +27,29 @@ public class ZipfDistribution {
private final double hIntegralNumberOfElements;
private final double s;
+ /**
+ *
+ * @param numberOfElements blah
+ * @param exponent blah
+ */
public ZipfDistribution(final int numberOfElements, final double exponent) {
this.numberOfElements = numberOfElements;
this.exponent = exponent;
- this.hIntegralX1 = hIntegral(1.5) - 1.0;
- this.hIntegralNumberOfElements = hIntegral(numberOfElements + F_1_2);
- this.s = 2 - hIntegralInverse(hIntegral(2.5) - h(2.0));
+ hIntegralX1 = hIntegral(1.5) - 1.0;
+ hIntegralNumberOfElements = hIntegral(numberOfElements + F_1_2);
+ s = 2 - hIntegralInverse(hIntegral(2.5) - h(2.0));
}
+ /**
+ *
+ * @return number of elements
+ */
public int sample() {
- while(true) {
- final double u = hIntegralNumberOfElements + rng.nextDouble() * (hIntegralX1 - hIntegralNumberOfElements);
+ while (true) {
+ final double u = hIntegralNumberOfElements
+ + (rng.nextDouble() * (hIntegralX1 - hIntegralNumberOfElements));
- double x = hIntegralInverse(u);
+ final double x = hIntegralInverse(u);
int k = (int) (x + F_1_2);
if (k < 1) {
@@ -48,7 +58,7 @@ public class ZipfDistribution {
k = numberOfElements;
}
- if (k - x <= s || u >= hIntegral(k + F_1_2) - h(k)) {
+ if (((k - x) <= s) || (u >= (hIntegral(k + F_1_2) - h(k)))) {
return k;
}
}
@@ -77,14 +87,14 @@ public class ZipfDistribution {
if (Math.abs(x) > TAYLOR_THRESHOLD) {
return Math.log1p(x) / x;
}
- return 1 - x * (F_1_2 - x * (F_1_3 - F_1_4 * x));
+ return 1 - (x * (F_1_2 - (x * (F_1_3 - (F_1_4 * x)))));
}
private static double helper2(final double x) {
if (Math.abs(x) > TAYLOR_THRESHOLD) {
return Math.expm1(x) / x;
}
- return 1 + x * F_1_2 * (1 + x * F_1_3 * (1 + F_1_4 * x));
+ return 1 + (x * F_1_2 * (1 + (x * F_1_3 * (1 + (F_1_4 * x)))));
}
}
diff --git a/src/main/java/com/yahoo/sketches/characterization/frequencies/BaseFrequenciesSpeedProfile.java b/src/main/java/com/yahoo/sketches/characterization/frequencies/BaseFrequenciesSpeedProfile.java
index e8f22b3..7e1ca8b 100644
--- a/src/main/java/com/yahoo/sketches/characterization/frequencies/BaseFrequenciesSpeedProfile.java
+++ b/src/main/java/com/yahoo/sketches/characterization/frequencies/BaseFrequenciesSpeedProfile.java
@@ -2,9 +2,9 @@ package com.yahoo.sketches.characterization.frequencies;
import static com.yahoo.sketches.Util.pwr2LawNext;
-import com.yahoo.sketches.characterization.Job;
-import com.yahoo.sketches.characterization.JobProfile;
-import com.yahoo.sketches.characterization.Properties;
+import com.yahoo.sketches.Job;
+import com.yahoo.sketches.JobProfile;
+import com.yahoo.sketches.Properties;
public abstract class BaseFrequenciesSpeedProfile implements JobProfile {
@@ -43,7 +43,8 @@ public abstract class BaseFrequenciesSpeedProfile implements JobProfile {
int streamLength = minStreamLen;
while (streamLength <= maxStreamLen) {
- final int numTrials = getNumTrials(streamLength, lgMinStreamLen, lgMaxStreamLen, lgMinTrials, lgMaxTrials);
+ final int numTrials = getNumTrials(streamLength, lgMinStreamLen, lgMaxStreamLen, lgMinTrials,
+ lgMaxTrials);
resetStats();
for (int i = 0; i < numTrials; i++) {
prepareTrial(streamLength);
diff --git a/src/main/java/com/yahoo/sketches/characterization/frequencies/LongsSketchSpeedProfile.java b/src/main/java/com/yahoo/sketches/characterization/frequencies/LongsSketchSpeedProfile.java
index 025f219..1746540 100644
--- a/src/main/java/com/yahoo/sketches/characterization/frequencies/LongsSketchSpeedProfile.java
+++ b/src/main/java/com/yahoo/sketches/characterization/frequencies/LongsSketchSpeedProfile.java
@@ -1,10 +1,9 @@
package com.yahoo.sketches.characterization.frequencies;
import com.yahoo.memory.Memory;
-import com.yahoo.sketches.characterization.Properties;
-import com.yahoo.sketches.frequencies.LongsSketch;
-
+import com.yahoo.sketches.Properties;
import com.yahoo.sketches.characterization.ZipfDistribution;
+import com.yahoo.sketches.frequencies.LongsSketch;
public class LongsSketchSpeedProfile extends BaseFrequenciesSpeedProfile {
@@ -21,10 +20,10 @@ public class LongsSketchSpeedProfile extends BaseFrequenciesSpeedProfile {
@Override
void configure(final Properties properties) {
- this.k = Integer.parseInt(properties.mustGet("k"));
+ k = Integer.parseInt(properties.mustGet("k"));
final int range = Integer.parseInt(properties.mustGet("zipfRange"));
final double exponent = Double.parseDouble(properties.mustGet("zipfExponent"));
- this.zipf = new ZipfDistribution(range, exponent);
+ zipf = new ZipfDistribution(range, exponent);
}
@Override
diff --git a/streamA.txt.zip b/streamA.txt.zip
deleted file mode 100644
index f6a0155..0000000
--- a/streamA.txt.zip
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:fffcff47170d405d78a622dc154f79d49a1d39a19595b455e2139a233bc10365
-size 489195747
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org