You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datafu.apache.org by ey...@apache.org on 2020/02/06 13:33:21 UTC
[datafu] branch master updated: Enable logging in Pig tests
This is an automated email from the ASF dual-hosted git repository.
eyal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/datafu.git
The following commit(s) were added to refs/heads/master by this push:
new 90f4264 Enable logging in Pig tests
90f4264 is described below
commit 90f42640f55e744c65706f7d995d36912ae0288c
Author: Matthew Hayes <mh...@apache.org>
AuthorDate: Wed Feb 5 15:37:37 2020 -0800
Enable logging in Pig tests
Signed-off-by: Eyal Allweil <ey...@apache.org>
---
.../multilinestring/EcjMultilineProcessor.java | 4 +-
.../multilinestring/JavacMultilineProcessor.java | 4 +-
.../multilinestring/MultilineProcessor.java | 6 +-
datafu-pig/build.gradle | 4 +
.../src/test/java/datafu/test/pig/PigTests.java | 83 ++++++++-------
.../java/datafu/test/pig/hash/lsh/LSHPigTest.java | 116 ++++++++++-----------
6 files changed, 108 insertions(+), 109 deletions(-)
diff --git a/build-plugin/src/main/java/org/adrianwalker/multilinestring/EcjMultilineProcessor.java b/build-plugin/src/main/java/org/adrianwalker/multilinestring/EcjMultilineProcessor.java
index 7dbfe9a..cafab7f 100644
--- a/build-plugin/src/main/java/org/adrianwalker/multilinestring/EcjMultilineProcessor.java
+++ b/build-plugin/src/main/java/org/adrianwalker/multilinestring/EcjMultilineProcessor.java
@@ -23,11 +23,11 @@ import org.eclipse.jdt.internal.compiler.lookup.FieldBinding;
import java.lang.reflect.Constructor;
@SupportedAnnotationTypes({"org.adrianwalker.multilinestring.Multiline"})
-@SupportedSourceVersion(SourceVersion.RELEASE_6)
+@SupportedSourceVersion(SourceVersion.RELEASE_8)
public final class EcjMultilineProcessor extends AbstractProcessor {
private Elements elementUtils;
-
+
@Override
public void init(final ProcessingEnvironment procEnv) {
super.init(procEnv);
diff --git a/build-plugin/src/main/java/org/adrianwalker/multilinestring/JavacMultilineProcessor.java b/build-plugin/src/main/java/org/adrianwalker/multilinestring/JavacMultilineProcessor.java
index 39aa24e..f6221d1 100644
--- a/build-plugin/src/main/java/org/adrianwalker/multilinestring/JavacMultilineProcessor.java
+++ b/build-plugin/src/main/java/org/adrianwalker/multilinestring/JavacMultilineProcessor.java
@@ -20,12 +20,12 @@ import com.sun.tools.javac.tree.JCTree.JCVariableDecl;
import com.sun.tools.javac.tree.TreeMaker;
@SupportedAnnotationTypes({"org.adrianwalker.multilinestring.Multiline"})
-@SupportedSourceVersion(SourceVersion.RELEASE_6)
+@SupportedSourceVersion(SourceVersion.RELEASE_8)
public final class JavacMultilineProcessor extends AbstractProcessor {
private JavacElements elementUtils;
private TreeMaker maker;
-
+
@Override
public void init(final ProcessingEnvironment procEnv) {
super.init(procEnv);
diff --git a/build-plugin/src/main/java/org/adrianwalker/multilinestring/MultilineProcessor.java b/build-plugin/src/main/java/org/adrianwalker/multilinestring/MultilineProcessor.java
index 5fe247c..592b41c 100644
--- a/build-plugin/src/main/java/org/adrianwalker/multilinestring/MultilineProcessor.java
+++ b/build-plugin/src/main/java/org/adrianwalker/multilinestring/MultilineProcessor.java
@@ -16,12 +16,10 @@ import javax.lang.model.element.TypeElement;
@SupportedAnnotationTypes({"org.adrianwalker.multilinestring.Multiline"})
-// This generates a warning with Java 8 - however, if we switch to Java 8 and use SourceVersion.RELEASE_8, it
-// prevents compilation with Java 7. So we'll keep it and ignore the warning
-@SupportedSourceVersion(SourceVersion.RELEASE_7)
+@SupportedSourceVersion(SourceVersion.RELEASE_8)
public final class MultilineProcessor extends AbstractProcessor {
private Processor delegator = null;
-
+
@Override
public void init(final ProcessingEnvironment procEnv) {
super.init(procEnv);
diff --git a/datafu-pig/build.gradle b/datafu-pig/build.gradle
index 579a475..017b7c2 100644
--- a/datafu-pig/build.gradle
+++ b/datafu-pig/build.gradle
@@ -234,6 +234,10 @@ test {
// enable TestNG support (default is JUnit)
useTestNG()
+ testLogging {
+ showStandardStreams true
+ }
+
systemProperty 'datafu.jar.dir', file('build/libs')
systemProperty 'datafu.data.dir', file('data')
diff --git a/datafu-pig/src/test/java/datafu/test/pig/PigTests.java b/datafu-pig/src/test/java/datafu/test/pig/PigTests.java
index d83ff4f..327ed2f 100644
--- a/datafu-pig/src/test/java/datafu/test/pig/PigTests.java
+++ b/datafu-pig/src/test/java/datafu/test/pig/PigTests.java
@@ -37,47 +37,50 @@ import org.apache.commons.io.IOUtils;
import org.apache.hadoop.metrics.jvm.JvmMetrics;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
+import org.apache.log4j.PatternLayout;
+import org.apache.log4j.ConsoleAppender;
import org.apache.pig.data.Tuple;
import org.apache.pig.pigunit.PigTest;
import org.apache.pig.tools.parameters.ParseException;
public abstract class PigTests
-{
+{
+
private String testFileDir;
private String savedUserDir;
-
+
+ private static final Logger logger = Logger.getLogger(PigTests.class);
+
@org.testng.annotations.BeforeClass
public void beforeClass()
{
- // TODO make it configurable whether this happens, for travis-ci we can't spam the logs so much,
- // however otherwise it is useful to see the errors
Logger.getRootLogger().removeAllAppenders();
+ Logger.getRootLogger().addAppender(new ConsoleAppender(new PatternLayout(PatternLayout.TTCC_CONVERSION_PATTERN)));
+ Logger.getRootLogger().setLevel(Level.INFO);
Logger.getLogger(JvmMetrics.class).setLevel(Level.OFF);
-
+
System.setProperty("pig.import.search.path", System.getProperty("user.dir") + File.separator + "src" + File.separator + "main" + File.separator + "resources");
// Test files will be created in the following sub-directory
- new File(System.getProperty("user.dir") + File.separator + "build", "test-files").mkdir();
+ new File(System.getProperty("user.dir") + File.separator + "build", "test-files").mkdir();
}
-
+
@org.testng.annotations.BeforeMethod
public void beforeMethod(Method method)
- {
+ {
// working directory needs to be changed to the location of the test files for the PigTests to work properly
this.savedUserDir = System.getProperty("user.dir");
this.testFileDir = System.getProperty("user.dir") + File.separator + "build" + File.separator + "test-files";
System.setProperty("user.dir", this.testFileDir);
-
- System.out.println("\n*** Running " + method.getName() + " ***");
}
- @org.testng.annotations.AfterMethod
+ @org.testng.annotations.AfterMethod
public void afterMethod(Method method)
{
- // restore the change made in the location of the working directory in beforeMethod
+ // restore the change made in the location of the working directory in beforeMethod
System.setProperty("user.dir", this.savedUserDir);
}
-
+
protected String[] getDefaultArgs()
{
String[] args = {
@@ -85,7 +88,7 @@ public abstract class PigTests
};
return args;
}
-
+
protected List<String> getDefaultArgsAsList()
{
String[] args = getDefaultArgs();
@@ -96,12 +99,12 @@ public abstract class PigTests
}
return argsList;
}
-
+
protected PigTest createPigTestFromString(String str, String... args) throws IOException
{
return createPigTest(str.split("\n"),args);
}
-
+
protected PigTest createPigTest(String[] lines, String... args) throws IOException
{
// append args to list of default args
@@ -110,7 +113,7 @@ public abstract class PigTests
{
theArgs.add(arg);
}
-
+
for (String arg : theArgs)
{
String[] parts = arg.split("=",2);
@@ -122,20 +125,20 @@ public abstract class PigTests
}
}
}
-
+
return new PigTest(lines);
}
-
+
protected PigTest createPigTest(String scriptPath, String... args) throws IOException
{
return createPigTest(getLinesFromFile(scriptPath), args);
}
-
+
protected String getDataDirParam()
{
return "DATA_DIR=" + getDataPath();
}
-
+
protected String getDataPath()
{
if (System.getProperty("datafu.data.dir") != null)
@@ -145,13 +148,13 @@ public abstract class PigTests
else
{
return new File(System.getProperty("user.dir"), "data").getAbsolutePath();
- }
+ }
}
-
+
protected String getJarPath()
- {
+ {
String jarDir = null;
-
+
if (System.getProperty("datafu.jar.dir") != null)
{
jarDir = System.getProperty("datafu.jar.dir");
@@ -159,10 +162,10 @@ public abstract class PigTests
else
{
jarDir = new File(System.getProperty("user.dir"), "build/libs").getAbsolutePath();
- }
-
+ }
+
File userDir = new File(jarDir);
-
+
String[] files = userDir.list(new FilenameFilter() {
@Override
@@ -170,9 +173,9 @@ public abstract class PigTests
{
return name.endsWith(".jar") && !name.contains("sources") && !name.contains("javadoc");
}
-
+
});
-
+
if (files == null || files.length == 0)
{
throw new RuntimeException("Could not find JAR file");
@@ -187,41 +190,41 @@ public abstract class PigTests
}
throw new RuntimeException("Found more JAR files than expected: " + sb.substring(0, sb.length()-1));
}
-
+
return userDir.getAbsolutePath() + "/" + files[0];
}
-
+
protected List<Tuple> getLinesForAlias(PigTest test, String alias) throws IOException, ParseException
{
return getLinesForAlias(test,alias,true);
}
-
+
protected List<Tuple> getLinesForAlias(PigTest test, String alias, boolean logValues) throws IOException, ParseException
{
Iterator<Tuple> tuplesIterator = test.getAlias(alias);
List<Tuple> tuples = new ArrayList<Tuple>();
if (logValues)
{
- System.out.println(String.format("Values for %s: ", alias));
+ logger.info(String.format("Values for %s: ", alias));
}
while (tuplesIterator.hasNext())
{
Tuple tuple = tuplesIterator.next();
if (logValues)
{
- System.out.println(tuple.toString());
+ logger.info(tuple.toString());
}
tuples.add(tuple);
}
return tuples;
}
-
+
protected void writeLinesToFile(String fileName, String... lines) throws IOException
{
File inputFile = deleteIfExists(getFile(fileName));
writeLinesToFile(inputFile, lines);
}
-
+
protected void writeLinesToFile(File file, String[] lines) throws IOException
{
FileWriter writer = new FileWriter(file);
@@ -252,15 +255,15 @@ public abstract class PigTests
}
return file;
}
-
+
protected File getFile(String fileName)
{
return new File(System.getProperty("user.dir"), fileName).getAbsoluteFile();
}
-
+
/**
* Gets the lines from a given file.
- *
+ *
* @param relativeFilePath The path relative to the datafu-tests project.
* @return The lines from the file
* @throws IOException
diff --git a/datafu-pig/src/test/java/datafu/test/pig/hash/lsh/LSHPigTest.java b/datafu-pig/src/test/java/datafu/test/pig/hash/lsh/LSHPigTest.java
index f652101..d95953f 100644
--- a/datafu-pig/src/test/java/datafu/test/pig/hash/lsh/LSHPigTest.java
+++ b/datafu-pig/src/test/java/datafu/test/pig/hash/lsh/LSHPigTest.java
@@ -53,26 +53,20 @@ import datafu.test.pig.PigTests;
public class LSHPigTest extends PigTests
{
-
- private static void setDebuggingLogging()
- {
- Logger.getRootLogger().setLevel(Level.INFO);
- Logger.getRootLogger().addAppender(new ConsoleAppender(new PatternLayout(PatternLayout.TTCC_CONVERSION_PATTERN)));
- }
-
+
private static void setMemorySettings()
{
System.getProperties().setProperty("mapred.map.child.java.opts", "-Xmx1G");
System.getProperties().setProperty("mapred.reduce.child.java.opts","-Xmx1G");
System.getProperties().setProperty("io.sort.mb","10");
}
-
+
/**
* PTS = LOAD 'input' AS (b:bag{t:tuple(idx:int, val:double)});
* STORE PTS INTO 'output';
*/
@Multiline private String sparseVectorTest;
-
+
@Test
public void testSparseVectors() throws IOException, ParseException
{
@@ -100,21 +94,21 @@ public class LSHPigTest extends PigTests
double interpretedField = interpreted.getEntry(i);
Assert.assertTrue(Math.abs(originalField - interpretedField) < 1e-5);
}
-
+
idx++;
}
}
-
+
/**
-
+
define LSH datafu.pig.hash.lsh.L1PStableHash('3', '150', '1', '5');
define METRIC datafu.pig.hash.lsh.metric.L1('3');
-
+
PTS = LOAD 'input' AS (dim1:double, dim2:double, dim3:double);
PTS_HASHED = foreach PTS generate TOTUPLE(dim1, dim2, dim3) as pt
, FLATTEN(LSH(TOTUPLE(dim1, dim2, dim3)));
-
- store PTS_HASHED INTO 'lsh_pts';
+
+ store PTS_HASHED INTO 'lsh_pts';
*/
@Multiline private String randomSeedTest;
@Test
@@ -163,17 +157,17 @@ public class LSHPigTest extends PigTests
System.out.println(1.0*numDiff / numHashes);
Assert.assertTrue(1.0*numDiff/numHashes > .8);
}
-
+
/**
-
+
define LSH datafu.pig.hash.lsh.L1PStableHash('3', '150', '1', '5', '0');
define METRIC datafu.pig.hash.lsh.metric.L1('3');
-
+
PTS = LOAD 'input' AS (pt:bag{t:tuple(idx:int, val:double)});
PTS_HASHED = foreach PTS generate pt as pt
, FLATTEN(LSH(pt));
PARTITIONS = group PTS_HASHED by (lsh_id, hash);
-
+
QUERIES = LOAD 'queries' as (pt:bag{t:tuple(idx:int, val:double)});
QUERIES_HASHED = foreach QUERIES generate pt as query_pt
, FLATTEN(LSH(pt))
@@ -182,32 +176,32 @@ public class LSHPigTest extends PigTests
NEAR_NEIGHBORS = foreach QUERIES_W_PARTS generate query_pt as query_pt
, METRIC(query_pt, 1000, PTS_HASHED) as neighbor
;
-
+
describe NEAR_NEIGHBORS;
NEIGHBORS_PROJ = foreach NEAR_NEIGHBORS {
-
+
generate TOTUPLE(query_pt) as query_pt, neighbor.pt as matching_pts;
};
describe NEIGHBORS_PROJ;
NOT_NULL = filter NEIGHBORS_PROJ by SIZE(matching_pts) > 0;
NEIGHBORS_GRP = group NOT_NULL by query_pt;
describe NEIGHBORS_GRP;
-
+
NEIGHBOR_CNT = foreach NEIGHBORS_GRP{
MATCHING_PTS = foreach NOT_NULL generate matching_pts;
DIST_MATCHING_PTS = DISTINCT MATCHING_PTS;
generate group as query_pt, COUNT(NOT_NULL), DIST_MATCHING_PTS;
};
STORE NEIGHBOR_CNT INTO 'neighbors';
-
-
+
+
*/
@Multiline private String l1SparseTest;
-
+
@Test
public void testL1UDFSparse() throws Exception
{
-
+
setMemorySettings();
RandomGenerator rg = new JDKRandomGenerator();
rg.setSeed(0);
@@ -232,21 +226,21 @@ public class LSHPigTest extends PigTests
public double distance(RealVector v1, RealVector v2) {
return L1.distance(v1, v2);
}
-
+
};
verifyPoints(neighbors, d, 1000);
}
-
+
/**
-
+
define LSH datafu.pig.hash.lsh.L1PStableHash('3', '150', '1', '5', '0');
define METRIC datafu.pig.hash.lsh.metric.L1('3');
-
+
PTS = LOAD 'input' AS (dim1:double, dim2:double, dim3:double);
PTS_HASHED = foreach PTS generate TOTUPLE(dim1, dim2, dim3) as pt
, FLATTEN(LSH(TOTUPLE(dim1, dim2, dim3)));
PARTITIONS = group PTS_HASHED by (lsh_id, hash);
-
+
QUERIES = LOAD 'queries' as (dim1:double, dim2:double, dim3:double);
QUERIES_HASHED = foreach QUERIES generate TOTUPLE(dim1, dim2, dim3) as query_pt
, FLATTEN(LSH(TOTUPLE(dim1, dim2, dim3)))
@@ -255,28 +249,28 @@ public class LSHPigTest extends PigTests
NEAR_NEIGHBORS = foreach QUERIES_W_PARTS generate query_pt as query_pt
, METRIC(query_pt, 1000, PTS_HASHED) as neighbor
;
-
+
describe NEAR_NEIGHBORS;
NEIGHBORS_PROJ = foreach NEAR_NEIGHBORS {
-
+
generate query_pt as query_pt, neighbor.pt as matching_pts;
};
describe NEIGHBORS_PROJ;
NOT_NULL = filter NEIGHBORS_PROJ by SIZE(matching_pts) > 0;
NEIGHBORS_GRP = group NOT_NULL by query_pt;
describe NEIGHBORS_GRP;
-
+
NEIGHBOR_CNT = foreach NEIGHBORS_GRP{
MATCHING_PTS = foreach NOT_NULL generate FLATTEN(matching_pts);
DIST_MATCHING_PTS = DISTINCT MATCHING_PTS;
generate group as query_pt, COUNT(NOT_NULL), DIST_MATCHING_PTS;
};
STORE NEIGHBOR_CNT INTO 'neighbors';
-
-
+
+
*/
@Multiline private String l1Test;
-
+
@Test
public void testL1UDF() throws Exception
{
@@ -304,21 +298,21 @@ public class LSHPigTest extends PigTests
public double distance(RealVector v1, RealVector v2) {
return L1.distance(v1, v2);
}
-
+
};
verifyPoints(neighbors, d, 1000);
}
-
+
/**
-
+
define LSH datafu.pig.hash.lsh.L2PStableHash('3', '200', '1', '5', '0');
define METRIC datafu.pig.hash.lsh.metric.L2('3');
-
+
PTS = LOAD 'input' AS (dim1:double, dim2:double, dim3:double);
PTS_HASHED = foreach PTS generate TOTUPLE(dim1, dim2, dim3) as pt
, FLATTEN(LSH(TOTUPLE(dim1, dim2, dim3)));
PARTITIONS = group PTS_HASHED by (lsh_id, hash);
-
+
QUERIES = LOAD 'queries' as (dim1:double, dim2:double, dim3:double);
QUERIES_HASHED = foreach QUERIES generate TOTUPLE(dim1, dim2, dim3) as query_pt
, FLATTEN(LSH(TOTUPLE(dim1, dim2, dim3)))
@@ -329,25 +323,25 @@ public class LSHPigTest extends PigTests
;
describe NEAR_NEIGHBORS;
NEIGHBORS_PROJ = foreach NEAR_NEIGHBORS {
-
+
generate query_pt as query_pt, neighbor.pt as matching_pts;
};
describe NEIGHBORS_PROJ;
NOT_NULL = filter NEIGHBORS_PROJ by SIZE(matching_pts) > 0;
NEIGHBORS_GRP = group NOT_NULL by query_pt;
describe NEIGHBORS_GRP;
-
+
NEIGHBOR_CNT = foreach NEIGHBORS_GRP{
MATCHING_PTS = foreach NOT_NULL generate FLATTEN(matching_pts);
DIST_MATCHING_PTS = DISTINCT MATCHING_PTS;
generate group as query_pt, COUNT(NOT_NULL), DIST_MATCHING_PTS;
};
STORE NEIGHBOR_CNT INTO 'neighbors';
-
-
+
+
*/
@Multiline private String l2Test;
-
+
@Test
public void testL2UDF() throws Exception
{
@@ -375,21 +369,21 @@ public class LSHPigTest extends PigTests
public double distance(RealVector v1, RealVector v2) {
return L2.distance(v1, v2);
}
-
+
};
verifyPoints(neighbors, d, 1000);
}
-
+
/**
-
+
define LSH datafu.pig.hash.lsh.CosineDistanceHash('3', '1500', '5', '0');
define METRIC datafu.pig.hash.lsh.metric.Cosine('3');
-
+
PTS = LOAD 'input' AS (dim1:double, dim2:double, dim3:double);
PTS_HASHED = foreach PTS generate TOTUPLE(dim1, dim2, dim3) as pt
, FLATTEN(LSH(TOTUPLE(dim1, dim2, dim3)));
PARTITIONS = group PTS_HASHED by (lsh_id, hash);
-
+
QUERIES = LOAD 'queries' as (dim1:double, dim2:double, dim3:double);
QUERIES_HASHED = foreach QUERIES generate TOTUPLE(dim1, dim2, dim3) as query_pt
, FLATTEN(LSH(TOTUPLE(dim1, dim2, dim3)))
@@ -401,14 +395,14 @@ public class LSHPigTest extends PigTests
;
describe NEAR_NEIGHBORS;
NEIGHBORS_PROJ = foreach NEAR_NEIGHBORS {
-
+
generate query_pt as query_pt, neighbor.pt as matching_pts;
};
describe NEIGHBORS_PROJ;
NOT_NULL = filter NEIGHBORS_PROJ by SIZE(matching_pts) > 0;
NEIGHBORS_GRP = group NOT_NULL by query_pt;
describe NEIGHBORS_GRP;
-
+
NEIGHBOR_CNT = foreach NEIGHBORS_GRP{
MATCHING_PTS = foreach NOT_NULL generate FLATTEN(matching_pts);
DIST_MATCHING_PTS = DISTINCT MATCHING_PTS;
@@ -416,11 +410,11 @@ public class LSHPigTest extends PigTests
};
describe NEIGHBOR_CNT;
STORE NEIGHBOR_CNT INTO 'neighbors';
-
-
+
+
*/
@Multiline private String cosTest;
-
+
@Test
public void testCosineUDF() throws Exception
{
@@ -448,7 +442,7 @@ public class LSHPigTest extends PigTests
public double distance(RealVector v1, RealVector v2) {
return Cosine.distance(v1, v2);
}
-
+
};
verifyPoints(neighbors, d, .001);
}
@@ -456,7 +450,7 @@ public class LSHPigTest extends PigTests
{
public double distance(RealVector v1, RealVector v2);
}
-
+
private void verifyPoints(List<Tuple> neighbors, Distance d, double threshold) throws PigException
{
for(Tuple t : neighbors)
@@ -471,7 +465,7 @@ public class LSHPigTest extends PigTests
}
}
}
-
+
private Iterable<Long> getCounts(List<Tuple> neighbors)
{
return Iterables.transform(neighbors, new Function<Tuple, Long>()
@@ -496,7 +490,7 @@ public class LSHPigTest extends PigTests
}
return input;
}
-
+
private String[] getLines(List<RealVector> vectors)
{
String[] input = new String[vectors.size()];