You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sm...@apache.org on 2015/04/06 04:22:13 UTC
[1/5] mahout git commit: MAHOUT-1652: Java 7 upgrade,
this closes apache/mahout#112
Repository: mahout
Updated Branches:
refs/heads/master 53e5adac2 -> 85f9ece66
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassifier.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassifier.java b/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassifier.java
index d5f8d64..dcd4062 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassifier.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassifier.java
@@ -18,9 +18,11 @@ package org.apache.mahout.clustering.classify;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
+import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -41,19 +43,16 @@ import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterab
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
-
/**
* This classifier works with any ClusteringPolicy and its associated Clusters.
* It is initialized with a policy and a list of compatible clusters and
* thereafter it can classify any new Vector into one or more of the clusters
* based upon the pdf() function which each cluster supports.
- *
+ * <p/>
* In addition, it is an OnlineLearner and can be trained. Training amounts to
* asking the actual model to observe the vector and closing the classifier
* causes all the models to computeParameters.
- *
+ * <p/>
* Because a ClusterClassifier implements Writable, it can be written-to and
* read-from a sequence file as a single entity. For sequential and MapReduce
* clustering in conjunction with a ClusterIterator; however, it utilizes an
@@ -63,42 +62,41 @@ import com.google.common.io.Closeables;
* produce them.
*/
public class ClusterClassifier extends AbstractVectorClassifier implements OnlineLearner, Writable {
-
+
private static final String POLICY_FILE_NAME = "_policy";
-
+
private List<Cluster> models;
-
+
private String modelClass;
-
+
private ClusteringPolicy policy;
-
+
/**
* The public constructor accepts a list of clusters to become the models
- *
- * @param models
- * a List<Cluster>
- * @param policy
- * a ClusteringPolicy
+ *
+ * @param models a List<Cluster>
+ * @param policy a ClusteringPolicy
*/
public ClusterClassifier(List<Cluster> models, ClusteringPolicy policy) {
this.models = models;
modelClass = models.get(0).getClass().getName();
this.policy = policy;
}
-
+
// needed for serialization/De-serialization
- public ClusterClassifier() {}
-
+ public ClusterClassifier() {
+ }
+
// only used by MR ClusterIterator
protected ClusterClassifier(ClusteringPolicy policy) {
this.policy = policy;
}
-
+
@Override
public Vector classify(Vector instance) {
return policy.classify(instance, this);
}
-
+
@Override
public double classifyScalar(Vector instance) {
if (models.size() == 2) {
@@ -108,12 +106,12 @@ public class ClusterClassifier extends AbstractVectorClassifier implements Onlin
}
throw new IllegalStateException();
}
-
+
@Override
public int numCategories() {
return models.size();
}
-
+
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(models.size());
@@ -123,12 +121,12 @@ public class ClusterClassifier extends AbstractVectorClassifier implements Onlin
cluster.write(out);
}
}
-
+
@Override
public void readFields(DataInput in) throws IOException {
int size = in.readInt();
modelClass = in.readUTF();
- models = Lists.newArrayList();
+ models = new ArrayList<>();
ClusteringPolicyWritable clusteringPolicyWritable = new ClusteringPolicyWritable();
clusteringPolicyWritable.readFields(in);
policy = clusteringPolicyWritable.getValue();
@@ -138,73 +136,66 @@ public class ClusterClassifier extends AbstractVectorClassifier implements Onlin
models.add(element);
}
}
-
+
@Override
public void train(int actual, Vector instance) {
models.get(actual).observe(new VectorWritable(instance));
}
-
+
/**
* Train the models given an additional weight. Unique to ClusterClassifier
- *
- * @param actual
- * the int index of a model
- * @param data
- * a data Vector
- * @param weight
- * a double weighting factor
+ *
+ * @param actual the int index of a model
+ * @param data a data Vector
+ * @param weight a double weighting factor
*/
public void train(int actual, Vector data, double weight) {
models.get(actual).observe(new VectorWritable(data), weight);
}
-
+
@Override
public void train(long trackingKey, String groupKey, int actual, Vector instance) {
models.get(actual).observe(new VectorWritable(instance));
}
-
+
@Override
public void train(long trackingKey, int actual, Vector instance) {
models.get(actual).observe(new VectorWritable(instance));
}
-
+
@Override
public void close() {
policy.close(this);
}
-
+
public List<Cluster> getModels() {
return models;
}
-
+
public ClusteringPolicy getPolicy() {
return policy;
}
-
+
public void writeToSeqFiles(Path path) throws IOException {
writePolicy(policy, path);
Configuration config = new Configuration();
FileSystem fs = FileSystem.get(path.toUri(), config);
- SequenceFile.Writer writer = null;
ClusterWritable cw = new ClusterWritable();
for (int i = 0; i < models.size(); i++) {
- try {
+ try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, config,
+ new Path(path, "part-" + String.format(Locale.ENGLISH, "%05d", i)), IntWritable.class,
+ ClusterWritable.class)) {
Cluster cluster = models.get(i);
cw.setValue(cluster);
- writer = new SequenceFile.Writer(fs, config,
- new Path(path, "part-" + String.format(Locale.ENGLISH, "%05d", i)), IntWritable.class,
- ClusterWritable.class);
Writable key = new IntWritable(i);
writer.append(key, cw);
- } finally {
- Closeables.close(writer, false);
}
}
}
-
+
public void readFromSeqFiles(Configuration conf, Path path) throws IOException {
Configuration config = new Configuration();
- List<Cluster> clusters = Lists.newArrayList();
+ List<Cluster> clusters = new ArrayList<>();
for (ClusterWritable cw : new SequenceFileDirValueIterable<ClusterWritable>(path, PathType.LIST,
PathFilters.logsCRCFilter(), config)) {
Cluster cluster = cw.getValue();
@@ -215,7 +206,7 @@ public class ClusterClassifier extends AbstractVectorClassifier implements Onlin
modelClass = models.get(0).getClass().getName();
this.policy = readPolicy(path);
}
-
+
public static ClusteringPolicy readPolicy(Path path) throws IOException {
Path policyPath = new Path(path, POLICY_FILE_NAME);
Configuration config = new Configuration();
@@ -227,7 +218,7 @@ public class ClusterClassifier extends AbstractVectorClassifier implements Onlin
Closeables.close(reader, true);
return cpw.getValue();
}
-
+
public static void writePolicy(ClusteringPolicy policy, Path path) throws IOException {
Path policyPath = new Path(path, POLICY_FILE_NAME);
Configuration config = new Configuration();
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java b/mr/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
index c6c8427..98eb944 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
@@ -18,9 +18,9 @@
package org.apache.mahout.clustering.fuzzykmeans;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.List;
-import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.ToolRunner;
@@ -265,7 +265,7 @@ public class FuzzyKMeansDriver extends AbstractJob {
boolean runSequential)
throws IOException, InterruptedException, ClassNotFoundException {
- List<Cluster> clusters = Lists.newArrayList();
+ List<Cluster> clusters = new ArrayList<>();
FuzzyKMeansUtil.configureWithClusterInfo(conf, clustersIn, clusters);
if (conf == null) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/iterator/CIReducer.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/iterator/CIReducer.java b/mr/src/main/java/org/apache/mahout/clustering/iterator/CIReducer.java
index bf42eb1..ca63b0f 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/iterator/CIReducer.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/iterator/CIReducer.java
@@ -18,10 +18,10 @@
package org.apache.mahout.clustering.iterator;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
-import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
@@ -43,7 +43,7 @@ public class CIReducer extends Reducer<IntWritable,ClusterWritable,IntWritable,C
Cluster cluster = iter.next().getValue();
first.observe(cluster);
}
- List<Cluster> models = Lists.newArrayList();
+ List<Cluster> models = new ArrayList<>();
models.add(first);
classifier = new ClusterClassifier(models, policy);
classifier.close();
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/iterator/FuzzyKMeansClusteringPolicy.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/iterator/FuzzyKMeansClusteringPolicy.java b/mr/src/main/java/org/apache/mahout/clustering/iterator/FuzzyKMeansClusteringPolicy.java
index bc91f24..b4e41b6 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/iterator/FuzzyKMeansClusteringPolicy.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/iterator/FuzzyKMeansClusteringPolicy.java
@@ -19,6 +19,7 @@ package org.apache.mahout.clustering.iterator;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
@@ -28,8 +29,6 @@ import org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansClusterer;
import org.apache.mahout.clustering.fuzzykmeans.SoftCluster;
import org.apache.mahout.math.Vector;
-import com.google.common.collect.Lists;
-
/**
* This is a probability-weighted clustering policy, suitable for fuzzy k-means
* clustering
@@ -55,8 +54,8 @@ public class FuzzyKMeansClusteringPolicy extends AbstractClusteringPolicy {
@Override
public Vector classify(Vector data, ClusterClassifier prior) {
- Collection<SoftCluster> clusters = Lists.newArrayList();
- List<Double> distances = Lists.newArrayList();
+ Collection<SoftCluster> clusters = new ArrayList<>();
+ List<Double> distances = new ArrayList<>();
for (Cluster model : prior.getModels()) {
SoftCluster sc = (SoftCluster) model;
clusters.add(sc);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java b/mr/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
index 13f6b46..3b9094e 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
@@ -17,9 +17,9 @@
package org.apache.mahout.clustering.kmeans;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.List;
-import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.ToolRunner;
@@ -206,7 +206,7 @@ public class KMeansDriver extends AbstractJob {
InterruptedException, ClassNotFoundException {
double convergenceDelta = Double.parseDouble(delta);
- List<Cluster> clusters = Lists.newArrayList();
+ List<Cluster> clusters = new ArrayList<>();
KMeansUtil.configureWithClusterInfo(conf, clustersIn, clusters);
if (clusters.isEmpty()) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java b/mr/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
index cc9e4cd..fbbabc5 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/kmeans/RandomSeedGenerator.java
@@ -18,12 +18,11 @@
package org.apache.mahout.clustering.kmeans;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -85,12 +84,11 @@ public final class RandomSeedGenerator {
}
FileStatus[] inputFiles = fs.globStatus(inputPathPattern, PathFilters.logsCRCFilter());
- SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outFile, Text.class, ClusterWritable.class);
Random random = (seed != null) ? RandomUtils.getRandom(seed) : RandomUtils.getRandom();
- List<Text> chosenTexts = Lists.newArrayListWithCapacity(k);
- List<ClusterWritable> chosenClusters = Lists.newArrayListWithCapacity(k);
+ List<Text> chosenTexts = new ArrayList<>(k);
+ List<ClusterWritable> chosenClusters = new ArrayList<>(k);
int nextClusterId = 0;
int index = 0;
@@ -123,13 +121,12 @@ public final class RandomSeedGenerator {
}
}
- try {
+ try (SequenceFile.Writer writer =
+ SequenceFile.createWriter(fs, conf, outFile, Text.class, ClusterWritable.class)){
for (int i = 0; i < chosenTexts.size(); i++) {
writer.append(chosenTexts.get(i), chosenClusters.get(i));
}
log.info("Wrote {} Klusters to {}", k, outFile);
- } finally {
- Closeables.close(writer, false);
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java b/mr/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java
index 3eee446..31c0d60 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java
@@ -16,9 +16,13 @@
*/
package org.apache.mahout.clustering.lda.cvb;
+import java.io.IOException;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.List;
+
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileStatus;
@@ -45,10 +49,6 @@ import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.IOException;
-import java.net.URI;
-import java.util.List;
-
/**
* See {@link CachingCVB0Mapper} for more details on scalability and room for improvement.
* To try out this LDA implementation without using Hadoop, check out
@@ -274,7 +274,7 @@ public class CVB0Driver extends AbstractJob {
conf.set(MODEL_WEIGHT, "1"); // TODO
conf.set(TEST_SET_FRACTION, String.valueOf(testFraction));
- List<Double> perplexities = Lists.newArrayList();
+ List<Double> perplexities = new ArrayList<>();
for (int i = 1; i <= iterationNumber; i++) {
// form path to model
Path modelPath = modelPath(topicModelStateTempPath, i);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java b/mr/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java
index 07ae100..d7d09c5 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java
@@ -16,8 +16,12 @@
*/
package org.apache.mahout.clustering.lda.cvb;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -44,17 +48,13 @@ import org.apache.mahout.math.DenseMatrix;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.DistributedRowMatrixWriter;
import org.apache.mahout.math.Matrix;
+import org.apache.mahout.math.NamedVector;
import org.apache.mahout.math.SparseRowMatrix;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
-import org.apache.mahout.math.NamedVector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-
/**
* Runs the same algorithm as {@link CVB0Driver}, but sequentially, in memory. Memory requirements
* are currently: the entire corpus is read into RAM, two copies of the model (each of size
@@ -111,7 +111,7 @@ public class InMemoryCollapsedVariationalBayes0 extends AbstractJob {
this.terms = terms;
this.initialModelCorpusFraction = modelCorpusFraction;
numTerms = terms != null ? terms.length : corpus.numCols();
- Map<String, Integer> termIdMap = Maps.newHashMap();
+ Map<String, Integer> termIdMap = new HashMap<>();
if (terms != null) {
for (int t = 0; t < terms.length; t++) {
termIdMap.put(terms[t], t);
@@ -414,35 +414,12 @@ public class InMemoryCollapsedVariationalBayes0 extends AbstractJob {
return 0;
}
- /*
- private static Map<Integer, Map<String, Integer>> loadCorpus(String path) throws IOException {
- List<String> lines = Resources.readLines(Resources.getResource(path), Charsets.UTF_8);
- Map<Integer, Map<String, Integer>> corpus = Maps.newHashMap();
- for (int i=0; i<lines.size(); i++) {
- String line = lines.get(i);
- Map<String, Integer> doc = Maps.newHashMap();
- for (String s : line.split(" ")) {
- s = s.replaceAll("\\W", "").toLowerCase().trim();
- if (s.length() == 0) {
- continue;
- }
- if (!doc.containsKey(s)) {
- doc.put(s, 0);
- }
- doc.put(s, doc.get(s) + 1);
- }
- corpus.put(i, doc);
- }
- return corpus;
- }
- */
-
private static String[] loadDictionary(String dictionaryPath, Configuration conf) {
if (dictionaryPath == null) {
return null;
}
Path dictionaryFile = new Path(dictionaryPath);
- List<Pair<Integer, String>> termList = Lists.newArrayList();
+ List<Pair<Integer, String>> termList = new ArrayList<>();
int maxTermId = 0;
// key is word value is id
for (Pair<Writable, IntWritable> record
@@ -467,7 +444,7 @@ public class InMemoryCollapsedVariationalBayes0 extends AbstractJob {
throws IOException {
Path vectorPath = new Path(vectorPathString);
FileSystem fs = vectorPath.getFileSystem(conf);
- List<Path> subPaths = Lists.newArrayList();
+ List<Path> subPaths = new ArrayList<>();
if (fs.isFile(vectorPath)) {
subPaths.add(vectorPath);
} else {
@@ -475,7 +452,7 @@ public class InMemoryCollapsedVariationalBayes0 extends AbstractJob {
subPaths.add(fileStatus.getPath());
}
}
- List<Pair<Integer, Vector>> rowList = Lists.newArrayList();
+ List<Pair<Integer, Vector>> rowList = new ArrayList<>();
int numRows = Integer.MIN_VALUE;
int numCols = -1;
boolean sequentialAccess = false;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java b/mr/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java
index 912b6d5..c3f2bc0 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java
@@ -16,19 +16,10 @@
*/
package org.apache.mahout.clustering.lda.cvb;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import org.apache.hadoop.fs.Path;
-import org.apache.mahout.math.Matrix;
-import org.apache.mahout.math.MatrixSlice;
-import org.apache.mahout.math.SparseRowMatrix;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.VectorIterable;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Arrays;
+import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@@ -38,6 +29,15 @@ import java.util.concurrent.Callable;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
+import org.apache.hadoop.fs.Path;
+import org.apache.mahout.math.Matrix;
+import org.apache.mahout.math.MatrixSlice;
+import org.apache.mahout.math.SparseRowMatrix;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorIterable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
/**
* Multithreaded LDA model trainer class, which primarily operates by running a "map/reduce"
* operation, all in memory locally (ie not a hadoop job!) : the "map" operation is to take
@@ -141,7 +141,7 @@ public class ModelTrainer {
long startTime = System.nanoTime();
int i = 0;
double[] times = new double[100];
- Map<Vector, Vector> batch = Maps.newHashMap();
+ Map<Vector, Vector> batch = new HashMap<>();
int numTokensInBatch = 0;
long batchStart = System.nanoTime();
while (docIterator.hasNext() && docTopicIterator.hasNext()) {
@@ -185,7 +185,7 @@ public class ModelTrainer {
public void batchTrain(Map<Vector, Vector> batch, boolean update, int numDocTopicsIters) {
while (true) {
try {
- List<TrainerRunnable> runnables = Lists.newArrayList();
+ List<TrainerRunnable> runnables = new ArrayList<>();
for (Map.Entry<Vector, Vector> entry : batch.entrySet()) {
runnables.add(new TrainerRunnable(readModel, null, entry.getKey(),
entry.getValue(), new SparseRowMatrix(numTopics, numTerms, true),
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java b/mr/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java
index 7b7816c..9ba77c1 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java
@@ -16,7 +16,18 @@
*/
package org.apache.mahout.clustering.lda.cvb;
-import com.google.common.collect.Lists;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
@@ -39,17 +50,6 @@ import org.apache.mahout.math.stats.Sampler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Random;
-import java.util.concurrent.ArrayBlockingQueue;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
-
/**
* Thin wrapper around a {@link Matrix} of counts of occurrences of (topic, term) pairs. Dividing
* {code topicTermCount.viewRow(topic).get(term)} by the sum over the values for all terms in that
@@ -205,7 +205,7 @@ public class TopicModel implements Configurable, Iterable<MatrixSlice> {
throws IOException {
int numTopics = -1;
int numTerms = -1;
- List<Pair<Integer, Vector>> rows = Lists.newArrayList();
+ List<Pair<Integer, Vector>> rows = new ArrayList<>();
for (Path modelPath : modelPaths) {
for (Pair<IntWritable, VectorWritable> row
: new SequenceFileIterable<IntWritable, VectorWritable>(modelPath, true, conf)) {
@@ -414,7 +414,7 @@ public class TopicModel implements Configurable, Iterable<MatrixSlice> {
}
public static String vectorToSortedString(Vector vector, String[] dictionary) {
- List<Pair<String,Double>> vectorValues = Lists.newArrayListWithCapacity(vector.getNumNondefaultElements());
+ List<Pair<String,Double>> vectorValues = new ArrayList<>(vector.getNumNondefaultElements());
for (Element e : vector.nonZeroes()) {
vectorValues.add(Pair.of(dictionary != null ? dictionary[e.index()] : String.valueOf(e.index()),
e.get()));
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/spectral/VectorCache.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/spectral/VectorCache.java b/mr/src/main/java/org/apache/mahout/clustering/spectral/VectorCache.java
index 60e0a2e..4ec8149 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/spectral/VectorCache.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/spectral/VectorCache.java
@@ -21,7 +21,6 @@ import java.io.IOException;
import java.net.URI;
import java.util.Arrays;
-import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
@@ -70,12 +69,9 @@ public final class VectorCache {
DistributedCache.setCacheFiles(new URI[]{output.toUri()}, conf);
// set up the writer
- SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, output,
- IntWritable.class, VectorWritable.class);
- try {
+ try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, output,
+ IntWritable.class, VectorWritable.class)){
writer.append(key, new VectorWritable(vector));
- } finally {
- Closeables.close(writer, false);
}
if (deleteOnExit) {
@@ -112,12 +108,9 @@ public final class VectorCache {
*/
public static Vector load(Configuration conf, Path input) throws IOException {
log.info("Loading vector from: {}", input);
- SequenceFileValueIterator<VectorWritable> iterator =
- new SequenceFileValueIterator<>(input, true, conf);
- try {
+ try (SequenceFileValueIterator<VectorWritable> iterator =
+ new SequenceFileValueIterator<>(input, true, conf)){
return iterator.next().get();
- } finally {
- Closeables.close(iterator, true);
}
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/spectral/kmeans/EigenSeedGenerator.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/spectral/kmeans/EigenSeedGenerator.java b/mr/src/main/java/org/apache/mahout/clustering/spectral/kmeans/EigenSeedGenerator.java
index 5f9c1a6..3ce94dc 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/spectral/kmeans/EigenSeedGenerator.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/spectral/kmeans/EigenSeedGenerator.java
@@ -18,6 +18,7 @@
package org.apache.mahout.clustering.spectral.kmeans;
import java.io.IOException;
+import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
@@ -39,9 +40,6 @@ import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.collect.Maps;
-import com.google.common.io.Closeables;
-
/**
* Given an Input Path containing a {@link org.apache.hadoop.io.SequenceFile}, select k vectors and write them to the
* output file as a {@link org.apache.mahout.clustering.kmeans.Kluster} representing the initial centroid to use. The
@@ -72,15 +70,14 @@ public final class EigenSeedGenerator {
}
FileStatus[] inputFiles = fs.globStatus(inputPathPattern, PathFilters.logsCRCFilter());
- SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outFile, Text.class, ClusterWritable.class);
- Map<Integer,Double> maxEigens = Maps.newHashMapWithExpectedSize(k); // store
+ Map<Integer,Double> maxEigens = new HashMap<>(k); // store
// max
// value
// of
// each
// column
- Map<Integer,Text> chosenTexts = Maps.newHashMapWithExpectedSize(k);
- Map<Integer,ClusterWritable> chosenClusters = Maps.newHashMapWithExpectedSize(k);
+ Map<Integer,Text> chosenTexts = new HashMap<>(k);
+ Map<Integer,ClusterWritable> chosenClusters = new HashMap<>(k);
for (FileStatus fileStatus : inputFiles) {
if (!fileStatus.isDir()) {
@@ -108,13 +105,12 @@ public final class EigenSeedGenerator {
}
}
- try {
+ try (SequenceFile.Writer writer =
+ SequenceFile.createWriter(fs, conf, outFile, Text.class, ClusterWritable.class)){
for (Integer key : maxEigens.keySet()) {
writer.append(chosenTexts.get(key), chosenClusters.get(key));
}
log.info("EigenSeedGenerator:: Wrote {} Klusters to {}", chosenTexts.size(), outFile);
- } finally {
- Closeables.close(writer, false);
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/streaming/cluster/BallKMeans.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/streaming/cluster/BallKMeans.java b/mr/src/main/java/org/apache/mahout/clustering/streaming/cluster/BallKMeans.java
index 25a4022..25806fe 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/streaming/cluster/BallKMeans.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/streaming/cluster/BallKMeans.java
@@ -17,6 +17,7 @@
package org.apache.mahout.clustering.streaming.cluster;
+import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
@@ -26,7 +27,6 @@ import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.collect.Iterables;
import com.google.common.collect.Iterators;
-import com.google.common.collect.Lists;
import org.apache.mahout.clustering.ClusteringUtils;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.RandomUtils;
@@ -169,7 +169,7 @@ public class BallKMeans implements Iterable<Centroid> {
// If there will be no points assigned to the test set, return now.
if (testProbability == 0) {
return new Pair<List<? extends WeightedVector>, List<? extends WeightedVector>>(datapoints,
- Lists.<WeightedVector>newArrayList());
+ new ArrayList<WeightedVector>());
}
int numTest = (int) (testProbability * datapoints.size());
@@ -190,7 +190,7 @@ public class BallKMeans implements Iterable<Centroid> {
*/
public UpdatableSearcher cluster(List<? extends WeightedVector> datapoints) {
Pair<List<? extends WeightedVector>, List<? extends WeightedVector>> trainTestSplit = splitTrainTest(datapoints);
- List<Vector> bestCentroids = Lists.newArrayList();
+ List<Vector> bestCentroids = new ArrayList<>();
double cost = Double.POSITIVE_INFINITY;
double bestCost = Double.POSITIVE_INFINITY;
for (int i = 0; i < numRuns; ++i) {
@@ -377,11 +377,11 @@ public class BallKMeans implements Iterable<Centroid> {
DistanceMeasure distanceMeasure = centroids.getDistanceMeasure();
// closestClusterDistances.get(i) is the distance from the i'th cluster to its closest
// neighboring cluster.
- List<Double> closestClusterDistances = Lists.newArrayListWithExpectedSize(numClusters);
+ List<Double> closestClusterDistances = new ArrayList<>(numClusters);
// clusterAssignments[i] == j means that the i'th point is assigned to the j'th cluster. When
// these don't change, we are done.
// Each point is assigned to the invalid "-1" cluster initially.
- List<Integer> clusterAssignments = Lists.newArrayList(Collections.nCopies(datapoints.size(), -1));
+ List<Integer> clusterAssignments = new ArrayList<>(Collections.nCopies(datapoints.size(), -1));
boolean changed = true;
for (int i = 0; changed && i < maxNumIterations; i++) {
@@ -398,7 +398,7 @@ public class BallKMeans implements Iterable<Centroid> {
// Copies the current cluster centroids to newClusters and sets their weights to 0. This is
// so we calculate the new centroids as we go through the datapoints.
- List<Centroid> newCentroids = Lists.newArrayList();
+ List<Centroid> newCentroids = new ArrayList<>();
for (Vector centroid : centroids) {
// need a deep copy because we will mutate these values
Centroid newCentroid = (Centroid)centroid.clone();
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/streaming/cluster/StreamingKMeans.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/streaming/cluster/StreamingKMeans.java b/mr/src/main/java/org/apache/mahout/clustering/streaming/cluster/StreamingKMeans.java
index 0e3f068..604bc9d 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/streaming/cluster/StreamingKMeans.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/streaming/cluster/StreamingKMeans.java
@@ -17,6 +17,7 @@
package org.apache.mahout.clustering.streaming.cluster;
+import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
@@ -25,7 +26,6 @@ import java.util.Random;
import com.google.common.base.Function;
import com.google.common.collect.Iterables;
import com.google.common.collect.Iterators;
-import com.google.common.collect.Lists;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.math.Centroid;
@@ -323,7 +323,7 @@ public class StreamingKMeans implements Iterable<Centroid> {
if (!collapseClusters && centroids.size() > clusterOvershoot * numClusters) {
numClusters = (int) Math.max(numClusters, clusterLogFactor * Math.log(numProcessedDatapoints));
- List<Centroid> shuffled = Lists.newArrayList();
+ List<Centroid> shuffled = new ArrayList<>();
for (Vector vector : centroids) {
shuffled.add((Centroid) vector);
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansDriver.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansDriver.java b/mr/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansDriver.java
index 73776b9..0f6f7f2 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansDriver.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansDriver.java
@@ -18,6 +18,7 @@
package org.apache.mahout.clustering.streaming.mapreduce;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
@@ -26,7 +27,6 @@ import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -425,13 +425,13 @@ public final class StreamingKMeansDriver extends AbstractJob {
long start = System.currentTimeMillis();
// Run StreamingKMeans step in parallel by spawning 1 thread per input path to process.
ExecutorService pool = Executors.newCachedThreadPool();
- List<Future<Iterable<Centroid>>> intermediateCentroidFutures = Lists.newArrayList();
+ List<Future<Iterable<Centroid>>> intermediateCentroidFutures = new ArrayList<>();
for (FileStatus status : HadoopUtil.listStatus(FileSystem.get(conf), input, PathFilters.logsCRCFilter())) {
intermediateCentroidFutures.add(pool.submit(new StreamingKMeansThread(status.getPath(), conf)));
}
log.info("Finished running Mappers");
// Merge the resulting "mapper" centroids.
- List<Centroid> intermediateCentroids = Lists.newArrayList();
+ List<Centroid> intermediateCentroids = new ArrayList<>();
for (Future<Iterable<Centroid>> futureIterable : intermediateCentroidFutures) {
for (Centroid centroid : futureIterable.get()) {
intermediateCentroids.add(centroid);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansMapper.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansMapper.java b/mr/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansMapper.java
index ced11ea..f12a876 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansMapper.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansMapper.java
@@ -18,9 +18,9 @@
package org.apache.mahout.clustering.streaming.mapreduce;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.List;
-import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Writable;
@@ -59,7 +59,7 @@ public class StreamingKMeansMapper extends Mapper<Writable, VectorWritable, IntW
StreamingKMeansDriver.INVALID_DISTANCE_CUTOFF);
if (estimatedDistanceCutoff == StreamingKMeansDriver.INVALID_DISTANCE_CUTOFF) {
estimateDistanceCutoff = true;
- estimatePoints = Lists.newArrayList();
+ estimatePoints = new ArrayList<>();
}
// There is no way of estimating the distance cutoff unless we have some data.
clusterer = new StreamingKMeans(searcher, numClusters, estimatedDistanceCutoff);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansThread.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansThread.java b/mr/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansThread.java
index acb2b56..24cc1db 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansThread.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansThread.java
@@ -17,11 +17,11 @@
package org.apache.mahout.clustering.streaming.mapreduce;
+import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.Callable;
-import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.mahout.clustering.ClusteringUtils;
@@ -61,7 +61,7 @@ public class StreamingKMeansThread implements Callable<Iterable<Centroid>> {
Iterator<Centroid> dataPointsIterator = dataPoints.iterator();
if (estimateDistanceCutoff == StreamingKMeansDriver.INVALID_DISTANCE_CUTOFF) {
- List<Centroid> estimatePoints = Lists.newArrayListWithExpectedSize(NUM_ESTIMATE_POINTS);
+ List<Centroid> estimatePoints = new ArrayList<>(NUM_ESTIMATE_POINTS);
while (dataPointsIterator.hasNext() && estimatePoints.size() < NUM_ESTIMATE_POINTS) {
Centroid centroid = dataPointsIterator.next();
estimatePoints.add(centroid);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansUtilsMR.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansUtilsMR.java b/mr/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansUtilsMR.java
index 4bffb2b..f00cf56 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansUtilsMR.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansUtilsMR.java
@@ -22,7 +22,6 @@ import java.io.IOException;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.collect.Iterables;
-import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -133,31 +132,23 @@ public final class StreamingKMeansUtilsMR {
*/
public static void writeCentroidsToSequenceFile(Iterable<Centroid> centroids, Path path, Configuration conf)
throws IOException {
- SequenceFile.Writer writer = null;
- try {
- writer = SequenceFile.createWriter(FileSystem.get(conf), conf,
- path, IntWritable.class, CentroidWritable.class);
+ try (SequenceFile.Writer writer = SequenceFile.createWriter(FileSystem.get(conf), conf,
+ path, IntWritable.class, CentroidWritable.class)) {
int i = 0;
for (Centroid centroid : centroids) {
writer.append(new IntWritable(i++), new CentroidWritable(centroid));
}
- } finally {
- Closeables.close(writer, true);
}
}
public static void writeVectorsToSequenceFile(Iterable<? extends Vector> datapoints, Path path, Configuration conf)
throws IOException {
- SequenceFile.Writer writer = null;
- try {
- writer = SequenceFile.createWriter(FileSystem.get(conf), conf,
- path, IntWritable.class, VectorWritable.class);
+ try (SequenceFile.Writer writer = SequenceFile.createWriter(FileSystem.get(conf), conf,
+ path, IntWritable.class, VectorWritable.class)){
int i = 0;
for (Vector vector : datapoints) {
writer.append(new IntWritable(i++), new VectorWritable(vector));
}
- } finally {
- Closeables.close(writer, true);
}
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/streaming/tools/ResplitSequenceFiles.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/streaming/tools/ResplitSequenceFiles.java b/mr/src/main/java/org/apache/mahout/clustering/streaming/tools/ResplitSequenceFiles.java
index 55b7848..d7ca554 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/streaming/tools/ResplitSequenceFiles.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/streaming/tools/ResplitSequenceFiles.java
@@ -22,7 +22,6 @@ import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.Iterator;
-import com.google.common.base.Charsets;
import com.google.common.collect.Iterables;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
@@ -32,6 +31,7 @@ import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
import org.apache.commons.cli2.util.HelpFormatter;
+import org.apache.commons.io.Charsets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -71,7 +71,7 @@ public class ResplitSequenceFiles {
private void run(PrintWriter printWriter) throws IOException {
conf = new Configuration();
SequenceFileDirIterable<Writable, Writable> inputIterable = new
- SequenceFileDirIterable<Writable, Writable>(new Path(inputFile), PathType.LIST, conf);
+ SequenceFileDirIterable<>(new Path(inputFile), PathType.LIST, conf);
fs = FileSystem.get(conf);
int numEntries = Iterables.size(inputIterable);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java b/mr/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java
index 44a944d..ded76ad 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java
@@ -17,7 +17,10 @@
package org.apache.mahout.clustering.topdown.postprocessor;
-import com.google.common.collect.Maps;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -34,9 +37,6 @@ import org.apache.mahout.common.iterator.sequencefile.PathType;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
import org.apache.mahout.math.VectorWritable;
-import java.io.IOException;
-import java.util.Map;
-
/**
* This class reads the output of any clustering algorithm, and, creates separate directories for different
* clusters. Each cluster directory's name is its clusterId. Each and every point is written in the cluster
@@ -53,7 +53,7 @@ public final class ClusterOutputPostProcessor {
private final FileSystem fileSystem;
private final Configuration conf;
private final Path clusterPostProcessorOutput;
- private final Map<String, Path> postProcessedClusterDirectories = Maps.newHashMap();
+ private final Map<String, Path> postProcessedClusterDirectories = new HashMap<>();
private long uniqueVectorId = 0L;
private final Map<String, SequenceFile.Writer> writersForClusters;
@@ -63,7 +63,7 @@ public final class ClusterOutputPostProcessor {
this.clusterPostProcessorOutput = output;
this.clusteredPoints = PathDirectory.getClusterOutputClusteredPoints(clusterOutputToBeProcessed);
this.conf = hadoopConfiguration;
- this.writersForClusters = Maps.newHashMap();
+ this.writersForClusters = new HashMap<>();
fileSystem = clusteredPoints.getFileSystem(conf);
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/common/AbstractJob.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/common/AbstractJob.java b/mr/src/main/java/org/apache/mahout/common/AbstractJob.java
index ec77749..b732078 100644
--- a/mr/src/main/java/org/apache/mahout/common/AbstractJob.java
+++ b/mr/src/main/java/org/apache/mahout/common/AbstractJob.java
@@ -19,13 +19,14 @@ package org.apache.mahout.common;
import java.io.File;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicInteger;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
+import com.google.common.base.Preconditions;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -50,15 +51,13 @@ import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.lucene.AnalyzerUtils;
import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.base.Preconditions;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-
/**
* <p>Superclass of many Mahout Hadoop "jobs". A job drives configuration and launch of one or
* more maps and reduces in order to accomplish some task.</p>
@@ -113,7 +112,7 @@ public abstract class AbstractJob extends Configured implements Tool {
private Group group;
protected AbstractJob() {
- options = Lists.newLinkedList();
+ options = new LinkedList<>();
}
/** Returns the input path established by a call to {@link #parseArguments(String[])}.
@@ -451,24 +450,15 @@ public abstract class AbstractJob extends Configured implements Tool {
* @return the cardinality of the vector
*/
public int getDimensions(Path matrix) throws IOException {
-
- SequenceFile.Reader reader = null;
- try {
- reader = new SequenceFile.Reader(FileSystem.get(getConf()), matrix, getConf());
-
+ try (SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(getConf()), matrix, getConf())){
Writable row = ClassUtils.instantiateAs(reader.getKeyClass().asSubclass(Writable.class), Writable.class);
-
Preconditions.checkArgument(reader.getValueClass().equals(VectorWritable.class),
"value type of sequencefile must be a VectorWritable");
VectorWritable vectorWritable = new VectorWritable();
boolean hasAtLeastOneRow = reader.next(row, vectorWritable);
Preconditions.checkState(hasAtLeastOneRow, "matrix must have at least one row");
-
return vectorWritable.get().size();
-
- } finally {
- Closeables.close(reader, true);
}
}
@@ -523,7 +513,7 @@ public abstract class AbstractJob extends Configured implements Tool {
// nulls are ok, for cases where options are simple flags.
List<?> vo = cmdLine.getValues(o);
if (vo != null && !vo.isEmpty()) {
- List<String> vals = Lists.newArrayList();
+ List<String> vals = new ArrayList<>();
for (Object o1 : vo) {
vals.add(o1.toString());
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/common/CommandLineUtil.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/common/CommandLineUtil.java b/mr/src/main/java/org/apache/mahout/common/CommandLineUtil.java
index 0cc93ba..ac4ab88 100644
--- a/mr/src/main/java/org/apache/mahout/common/CommandLineUtil.java
+++ b/mr/src/main/java/org/apache/mahout/common/CommandLineUtil.java
@@ -21,10 +21,10 @@ import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
-import com.google.common.base.Charsets;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.OptionException;
import org.apache.commons.cli2.util.HelpFormatter;
+import org.apache.commons.io.Charsets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.GenericOptionsParser;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/common/HadoopUtil.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/common/HadoopUtil.java b/mr/src/main/java/org/apache/mahout/common/HadoopUtil.java
index f693821..27e5686 100644
--- a/mr/src/main/java/org/apache/mahout/common/HadoopUtil.java
+++ b/mr/src/main/java/org/apache/mahout/common/HadoopUtil.java
@@ -21,6 +21,7 @@ import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
@@ -28,8 +29,6 @@ import java.util.List;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FSDataInputStream;
@@ -326,21 +325,15 @@ public final class HadoopUtil {
public static void writeInt(int value, Path path, Configuration configuration) throws IOException {
FileSystem fs = FileSystem.get(path.toUri(), configuration);
- FSDataOutputStream out = fs.create(path);
- try {
+ try (FSDataOutputStream out = fs.create(path)) {
out.writeInt(value);
- } finally {
- Closeables.close(out, false);
}
}
public static int readInt(Path path, Configuration configuration) throws IOException {
FileSystem fs = FileSystem.get(path.toUri(), configuration);
- FSDataInputStream in = fs.open(path);
- try {
+ try (FSDataInputStream in = fs.open(path)) {
return in.readInt();
- } finally {
- Closeables.close(in, true);
}
}
@@ -353,7 +346,7 @@ public final class HadoopUtil {
*/
public static String buildDirList(FileSystem fs, FileStatus fileStatus) throws IOException {
boolean containsFiles = false;
- List<String> directoriesList = Lists.newArrayList();
+ List<String> directoriesList = new ArrayList<>();
for (FileStatus childFileStatus : fs.listStatus(fileStatus.getPath())) {
if (childFileStatus.isDir()) {
String subDirectoryList = buildDirList(fs, childFileStatus);
@@ -379,7 +372,7 @@ public final class HadoopUtil {
*/
public static String buildDirList(FileSystem fs, FileStatus fileStatus, PathFilter pathFilter) throws IOException {
boolean containsFiles = false;
- List<String> directoriesList = Lists.newArrayList();
+ List<String> directoriesList = new ArrayList<>();
for (FileStatus childFileStatus : fs.listStatus(fileStatus.getPath(), pathFilter)) {
if (childFileStatus.isDir()) {
String subDirectoryList = buildDirList(fs, childFileStatus);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/common/distance/MahalanobisDistanceMeasure.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/common/distance/MahalanobisDistanceMeasure.java b/mr/src/main/java/org/apache/mahout/common/distance/MahalanobisDistanceMeasure.java
index a8fa091..17ee714 100644
--- a/mr/src/main/java/org/apache/mahout/common/distance/MahalanobisDistanceMeasure.java
+++ b/mr/src/main/java/org/apache/mahout/common/distance/MahalanobisDistanceMeasure.java
@@ -17,9 +17,14 @@
package org.apache.mahout.common.distance;
+import java.io.DataInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -37,12 +42,6 @@ import org.apache.mahout.math.SingularValueDecomposition;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
-import java.io.DataInputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.Collection;
-import java.util.List;
-
//See http://en.wikipedia.org/wiki/Mahalanobis_distance for details
public class MahalanobisDistanceMeasure implements DistanceMeasure {
@@ -77,11 +76,8 @@ public class MahalanobisDistanceMeasure implements DistanceMeasure {
if (!fs.exists(inverseCovarianceFile.get())) {
throw new FileNotFoundException(inverseCovarianceFile.get().toString());
}
- DataInputStream in = fs.open(inverseCovarianceFile.get());
- try {
+ try (DataInputStream in = fs.open(inverseCovarianceFile.get())){
inverseCovarianceMatrix.readFields(in);
- } finally {
- Closeables.close(in, true);
}
this.inverseCovarianceMatrix = inverseCovarianceMatrix.get();
Preconditions.checkArgument(this.inverseCovarianceMatrix != null, "inverseCovarianceMatrix not initialized");
@@ -94,11 +90,8 @@ public class MahalanobisDistanceMeasure implements DistanceMeasure {
if (!fs.exists(meanVectorFile.get())) {
throw new FileNotFoundException(meanVectorFile.get().toString());
}
- DataInputStream in = fs.open(meanVectorFile.get());
- try {
+ try (DataInputStream in = fs.open(meanVectorFile.get())){
meanVector.readFields(in);
- } finally {
- Closeables.close(in, true);
}
this.meanVector = meanVector.get();
Preconditions.checkArgument(this.meanVector != null, "meanVector not initialized");
@@ -116,7 +109,7 @@ public class MahalanobisDistanceMeasure implements DistanceMeasure {
@Override
public void createParameters(String prefix, Configuration jobConf) {
- parameters = Lists.newArrayList();
+ parameters = new ArrayList<>();
inverseCovarianceFile = new PathParameter(prefix, "inverseCovarianceFile", jobConf, null,
"Path on DFS to a file containing the inverse covariance matrix.");
parameters.add(inverseCovarianceFile);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/common/distance/MinkowskiDistanceMeasure.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/common/distance/MinkowskiDistanceMeasure.java b/mr/src/main/java/org/apache/mahout/common/distance/MinkowskiDistanceMeasure.java
index 3a57f2f..c3a48cb 100644
--- a/mr/src/main/java/org/apache/mahout/common/distance/MinkowskiDistanceMeasure.java
+++ b/mr/src/main/java/org/apache/mahout/common/distance/MinkowskiDistanceMeasure.java
@@ -17,10 +17,10 @@
package org.apache.mahout.common.distance;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
-import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.mahout.common.parameters.DoubleParameter;
import org.apache.mahout.common.parameters.Parameter;
@@ -50,7 +50,7 @@ public class MinkowskiDistanceMeasure implements DistanceMeasure {
@Override
public void createParameters(String prefix, Configuration conf) {
- parameters = Lists.newArrayList();
+ parameters = new ArrayList<>();
Parameter<?> param =
new DoubleParameter(prefix, "exponent", conf, EXPONENT, "Exponent for Fractional Lagrange distance");
parameters.add(param);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/common/distance/WeightedDistanceMeasure.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/common/distance/WeightedDistanceMeasure.java b/mr/src/main/java/org/apache/mahout/common/distance/WeightedDistanceMeasure.java
index 0c1d2cd..1acbe86 100644
--- a/mr/src/main/java/org/apache/mahout/common/distance/WeightedDistanceMeasure.java
+++ b/mr/src/main/java/org/apache/mahout/common/distance/WeightedDistanceMeasure.java
@@ -20,11 +20,10 @@ package org.apache.mahout.common.distance;
import java.io.DataInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -46,7 +45,7 @@ public abstract class WeightedDistanceMeasure implements DistanceMeasure {
@Override
public void createParameters(String prefix, Configuration jobConf) {
- parameters = Lists.newArrayList();
+ parameters = new ArrayList<>();
weightsFile = new PathParameter(prefix, "weightsFile", jobConf, null,
"Path on DFS to a file containing the weights.");
parameters.add(weightsFile);
@@ -73,11 +72,8 @@ public abstract class WeightedDistanceMeasure implements DistanceMeasure {
if (!fs.exists(weightsFile.get())) {
throw new FileNotFoundException(weightsFile.get().toString());
}
- DataInputStream in = fs.open(weightsFile.get());
- try {
+ try (DataInputStream in = fs.open(weightsFile.get())){
weights.readFields(in);
- } finally {
- Closeables.close(in, true);
}
this.weights = weights.get();
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/common/distance/WeightedEuclideanDistanceMeasure.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/common/distance/WeightedEuclideanDistanceMeasure.java b/mr/src/main/java/org/apache/mahout/common/distance/WeightedEuclideanDistanceMeasure.java
index c6889e2..4c78d9f 100644
--- a/mr/src/main/java/org/apache/mahout/common/distance/WeightedEuclideanDistanceMeasure.java
+++ b/mr/src/main/java/org/apache/mahout/common/distance/WeightedEuclideanDistanceMeasure.java
@@ -17,7 +17,6 @@
package org.apache.mahout.common.distance;
-
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.Vector.Element;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/driver/MahoutDriver.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/driver/MahoutDriver.java b/mr/src/main/java/org/apache/mahout/driver/MahoutDriver.java
index 1fd5506..5c5b8a4 100644
--- a/mr/src/main/java/org/apache/mahout/driver/MahoutDriver.java
+++ b/mr/src/main/java/org/apache/mahout/driver/MahoutDriver.java
@@ -19,13 +19,13 @@ package org.apache.mahout.driver;
import java.io.IOException;
import java.io.InputStream;
+import java.util.ArrayList;
import java.util.Arrays;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
import com.google.common.io.Closeables;
import org.apache.hadoop.util.ProgramDriver;
import org.slf4j.Logger;
@@ -135,10 +135,10 @@ public final class MahoutDriver {
mainProps = new Properties();
}
- Map<String,String[]> argMap = Maps.newHashMap();
+ Map<String,String[]> argMap = new HashMap<>();
int i = 0;
while (i < args.length && args[i] != null) {
- List<String> argValues = Lists.newArrayList();
+ List<String> argValues = new ArrayList<>();
String arg = args[i];
i++;
if (arg.startsWith("-D")) { // '-Dkey=value' or '-Dkey=value1,value2,etc' case
@@ -170,7 +170,7 @@ public final class MahoutDriver {
}
// Now add command-line args
- List<String> argsList = Lists.newArrayList();
+ List<String> argsList = new ArrayList<>();
argsList.add(progName);
for (Map.Entry<String,String[]> entry : argMap.entrySet()) {
String arg = entry.getKey();
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java b/mr/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java
index b744287..4b2eea1 100644
--- a/mr/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java
+++ b/mr/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java
@@ -17,14 +17,11 @@
package org.apache.mahout.ep;
-import com.google.common.collect.Lists;
-import org.apache.hadoop.io.Writable;
-import org.apache.mahout.classifier.sgd.PolymorphicWritable;
-
import java.io.Closeable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
@@ -35,6 +32,10 @@ import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
+import com.google.common.collect.Lists;
+import org.apache.hadoop.io.Writable;
+import org.apache.mahout.classifier.sgd.PolymorphicWritable;
+
/**
* Allows evolutionary optimization where the state function can't be easily
* packaged for the optimizer to execute. A good example of this is with
@@ -82,7 +83,7 @@ public class EvolutionaryProcess<T extends Payload<U>, U> implements Writable, C
private int populationSize;
public EvolutionaryProcess() {
- population = Lists.newArrayList();
+ population = new ArrayList<>();
}
/**
@@ -119,7 +120,7 @@ public class EvolutionaryProcess<T extends Payload<U>, U> implements Writable, C
Collections.sort(population);
// we copy here to avoid concurrent modification
- List<State<T, U>> parents = Lists.newArrayList(population.subList(0, survivors));
+ List<State<T, U>> parents = new ArrayList<>(population.subList(0, survivors));
population.subList(survivors, population.size()).clear();
// fill out the population with offspring from the survivors
@@ -140,7 +141,7 @@ public class EvolutionaryProcess<T extends Payload<U>, U> implements Writable, C
* and rethrown nested in an ExecutionException.
*/
public State<T, U> parallelDo(final Function<Payload<U>> fn) throws InterruptedException, ExecutionException {
- Collection<Callable<State<T, U>>> tasks = Lists.newArrayList();
+ Collection<Callable<State<T, U>>> tasks = new ArrayList<>();
for (final State<T, U> state : population) {
tasks.add(new Callable<State<T, U>>() {
@Override
@@ -219,7 +220,7 @@ public class EvolutionaryProcess<T extends Payload<U>, U> implements Writable, C
public void readFields(DataInput input) throws IOException {
setThreadCount(input.readInt());
int n = input.readInt();
- population = Lists.newArrayList();
+ population = new ArrayList<>();
for (int i = 0; i < n; i++) {
State<T, U> state = (State<T, U>) PolymorphicWritable.read(input, State.class);
population.add(state);
[5/5] mahout git commit: MAHOUT-1652: Java 7 upgrade,
this closes apache/mahout#112
Posted by sm...@apache.org.
MAHOUT-1652: Java 7 upgrade, this closes apache/mahout#112
Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/85f9ece6
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/85f9ece6
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/85f9ece6
Branch: refs/heads/master
Commit: 85f9ece660407fe68ad61560ebd723a57ec761e6
Parents: 53e5ada
Author: Suneel Marthi <su...@gmail.com>
Authored: Sun Apr 5 22:21:29 2015 -0400
Committer: Suneel Marthi <su...@gmail.com>
Committed: Sun Apr 5 22:24:06 2015 -0400
----------------------------------------------------------------------
.../benchmark/ClosestCentroidBenchmark.java | 2 -
.../benchmark/SerializationBenchmark.java | 31 ++----
.../mahout/benchmark/VectorBenchmarks.java | 45 ++++----
.../taste/impl/model/hbase/HBaseDataModel.java | 42 ++++---
.../impl/model/jdbc/AbstractJDBCDataModel.java | 29 +++--
.../impl/model/mongodb/MongoDBDataModel.java | 54 +++++----
.../jdbc/MySQLJDBCItemSimilarity.java | 1 -
.../classifier/ConfusionMatrixDumper.java | 21 ++--
.../mahout/clustering/cdbw/CDbwEvaluator.java | 15 ++-
.../clustering/conversion/InputMapper.java | 16 +--
.../clustering/evaluation/ClusterEvaluator.java | 11 +-
.../evaluation/RepresentativePointsDriver.java | 28 ++---
.../evaluation/RepresentativePointsMapper.java | 10 +-
.../mahout/clustering/lda/LDAPrintTopics.java | 15 ++-
.../apache/mahout/text/LuceneIndexHelper.java | 3 +-
.../mahout/text/LuceneSegmentInputFormat.java | 10 +-
.../mahout/text/LuceneSegmentRecordReader.java | 1 -
.../mahout/text/LuceneStorageConfiguration.java | 25 +++--
.../text/MailArchivesClusteringAnalyzer.java | 1 +
.../mahout/text/PrefixAdditionFilter.java | 8 +-
.../mahout/text/SequenceFilesFromDirectory.java | 10 +-
.../SequenceFilesFromLuceneStorageDriver.java | 15 ++-
.../SequenceFilesFromLuceneStorageMapper.java | 9 +-
.../text/SequenceFilesFromMailArchives.java | 18 ++-
.../mahout/text/WholeFileRecordReader.java | 20 ++--
.../mahout/text/WikipediaToSequenceFile.java | 4 +-
.../text/wikipedia/WikipediaAnalyzer.java | 1 +
.../WikipediaDatasetCreatorDriver.java | 4 +-
.../WikipediaDatasetCreatorMapper.java | 25 ++---
.../mahout/text/wikipedia/WikipediaMapper.java | 6 +-
.../text/wikipedia/WikipediaXmlSplitter.java | 8 +-
.../mahout/text/wikipedia/XmlInputFormat.java | 6 +-
.../mahout/utils/ConcatenateVectorsJob.java | 8 +-
.../org/apache/mahout/utils/MatrixDumper.java | 2 +-
.../apache/mahout/utils/SequenceFileDumper.java | 11 +-
.../org/apache/mahout/utils/SplitInput.java | 55 ++++-----
.../org/apache/mahout/utils/SplitInputJob.java | 1 +
.../mahout/utils/clustering/ClusterDumper.java | 20 ++--
.../nlp/collocations/llr/BloomTokenFilter.java | 2 +-
.../mahout/utils/regex/AnalyzerTransformer.java | 17 +--
.../apache/mahout/utils/regex/RegexMapper.java | 14 +--
.../apache/mahout/utils/vectors/RowIdJob.java | 34 ++----
.../mahout/utils/vectors/VectorDumper.java | 6 +-
.../mahout/utils/vectors/VectorHelper.java | 15 ++-
.../mahout/utils/vectors/arff/ARFFIterator.java | 6 +-
.../mahout/utils/vectors/arff/ARFFModel.java | 1 -
.../utils/vectors/arff/ARFFVectorIterable.java | 8 +-
.../mahout/utils/vectors/arff/Driver.java | 111 +++++++++----------
.../utils/vectors/arff/MapBackedARFFModel.java | 12 +-
.../utils/vectors/lucene/CachedTermInfo.java | 12 +-
.../utils/vectors/lucene/ClusterLabels.java | 21 ++--
.../mahout/utils/vectors/lucene/Driver.java | 39 +++----
.../utils/vectors/lucene/LuceneIterator.java | 10 +-
.../mahout/clustering/TestClusterDumper.java | 23 ++--
.../clustering/cdbw/TestCDbwEvaluator.java | 25 ++---
.../mahout/text/AbstractLuceneStorageTest.java | 15 ++-
.../text/LuceneSegmentRecordReaderTest.java | 25 +++--
.../text/LuceneStorageConfigurationTest.java | 11 +-
...equenceFilesFromLuceneStorageDriverTest.java | 32 ++----
...SequenceFilesFromLuceneStorageMRJobTest.java | 15 +--
.../SequenceFilesFromLuceneStorageTest.java | 70 ++++++------
.../text/SequenceFilesFromMailArchivesTest.java | 19 +---
.../text/TestSequenceFilesFromDirectory.java | 58 +++-------
.../mahout/text/doc/NumericFieldDocument.java | 1 -
.../org/apache/mahout/utils/SplitInputTest.java | 29 ++---
.../mahout/utils/TestConcatenateVectorsJob.java | 4 +-
.../mahout/utils/email/MailProcessorTest.java | 8 +-
.../collocations/llr/BloomTokenFilterTest.java | 3 +-
.../mahout/utils/regex/RegexMapperTest.java | 4 +-
.../mahout/utils/regex/RegexUtilsTest.java | 10 +-
.../mahout/utils/vectors/VectorHelperTest.java | 19 +---
.../vectors/arff/ARFFVectorIterableTest.java | 3 +-
.../mahout/utils/vectors/arff/DriverTest.java | 3 +-
.../vectors/arff/MapBackedARFFModelTest.java | 1 -
.../vectors/csv/CSVVectorIteratorTest.java | 16 +--
.../utils/vectors/io/VectorWriterTest.java | 16 +--
.../org/apache/mahout/math/QRDecomposition.java | 1 -
.../mahout/math/SingularValueDecomposition.java | 2 +-
.../apache/mahout/math/SparseColumnMatrix.java | 4 +-
.../math/decomposer/hebbian/HebbianSolver.java | 10 +-
.../math/decomposer/hebbian/TrainingState.java | 4 +-
.../apache/mahout/math/stats/LogLikelihood.java | 8 +-
.../apache/mahout/common/RandomUtilsTest.java | 2 +-
.../taste/hadoop/RecommendedItemsWritable.java | 4 +-
.../mahout/cf/taste/hadoop/TopItemsQueue.java | 10 +-
.../apache/mahout/cf/taste/hadoop/als/ALS.java | 20 ++--
.../hadoop/als/FactorizationEvaluator.java | 14 +--
.../hadoop/als/ParallelALSFactorizationJob.java | 11 +-
.../mahout/cf/taste/hadoop/item/IDReader.java | 16 +--
.../item/ItemFilterAsVectorAndPrefsReducer.java | 12 +-
.../hadoop/item/ToVectorAndPrefReducer.java | 6 +-
.../hadoop/item/VectorAndPrefsWritable.java | 6 +-
.../similarity/item/TopSimilarItemsQueue.java | 10 +-
.../cf/taste/impl/common/RefreshHelper.java | 8 +-
.../AbstractDifferenceRecommenderEvaluator.java | 13 +--
.../eval/GenericRelevantItemsDataSplitter.java | 4 +-
.../cf/taste/impl/eval/LoadEvaluator.java | 4 +-
.../cf/taste/impl/model/AbstractIDMigrator.java | 3 +-
.../cf/taste/impl/model/file/FileDataModel.java | 13 +--
.../GenericItemBasedRecommender.java | 2 +-
.../impl/recommender/RandomRecommender.java | 4 +-
.../cf/taste/impl/recommender/TopItems.java | 13 +--
.../impl/recommender/svd/ALSWRFactorizer.java | 25 ++---
.../svd/FilePersistenceStrategy.java | 17 +--
.../recommender/svd/SVDPlusPlusFactorizer.java | 12 +-
.../precompute/FileSimilarItemsWriter.java | 2 +-
.../mahout/classifier/ConfusionMatrix.java | 10 +-
.../classifier/RegressionResultAnalyzer.java | 4 +-
.../apache/mahout/classifier/df/DFUtils.java | 54 ++++-----
.../mahout/classifier/df/DecisionForest.java | 10 +-
.../df/builder/DecisionTreeBuilder.java | 4 +-
.../apache/mahout/classifier/df/data/Data.java | 20 ++--
.../mahout/classifier/df/data/DataLoader.java | 11 +-
.../mahout/classifier/df/data/DataUtils.java | 4 +-
.../mahout/classifier/df/data/Dataset.java | 12 +-
.../classifier/df/data/DescriptorUtils.java | 4 +-
.../classifier/df/mapreduce/Classifier.java | 12 +-
.../df/mapreduce/inmem/InMemBuilder.java | 16 +--
.../df/mapreduce/inmem/InMemInputFormat.java | 18 +--
.../df/mapreduce/partial/Step1Mapper.java | 4 +-
.../classifier/df/ref/SequentialBuilder.java | 4 +-
.../mahout/classifier/df/tools/Describe.java | 12 +-
.../mahout/classifier/mlp/NeuralNetwork.java | 50 ++++-----
.../classifier/mlp/RunMultilayerPerceptron.java | 24 ++--
.../mlp/TrainMultilayerPerceptron.java | 59 ++++------
.../classifier/naivebayes/BayesUtils.java | 21 +---
.../classifier/naivebayes/NaiveBayesModel.java | 16 +--
.../naivebayes/test/TestNaiveBayesDriver.java | 11 +-
.../sequencelearning/hmm/BaumWelchTrainer.java | 10 +-
.../sequencelearning/hmm/HmmUtils.java | 7 +-
.../hmm/RandomSequenceGenerator.java | 14 +--
.../sequencelearning/hmm/ViterbiEvaluator.java | 19 ++--
.../sgd/AdaptiveLogisticRegression.java | 16 +--
.../mahout/classifier/sgd/CrossFoldLearner.java | 4 +-
.../mahout/classifier/sgd/CsvRecordFactory.java | 14 ++-
.../mahout/classifier/sgd/GradientMachine.java | 4 +-
.../mahout/classifier/sgd/ModelDissector.java | 14 +--
.../mahout/classifier/sgd/ModelSerializer.java | 15 +--
.../mahout/classifier/sgd/RankingGradient.java | 4 +-
.../mahout/clustering/AbstractCluster.java | 9 +-
.../mahout/clustering/ClusteringUtils.java | 3 +-
.../classify/ClusterClassificationDriver.java | 10 +-
.../classify/ClusterClassificationMapper.java | 10 +-
.../clustering/classify/ClusterClassifier.java | 91 +++++++--------
.../fuzzykmeans/FuzzyKMeansDriver.java | 4 +-
.../mahout/clustering/iterator/CIReducer.java | 4 +-
.../iterator/FuzzyKMeansClusteringPolicy.java | 7 +-
.../mahout/clustering/kmeans/KMeansDriver.java | 4 +-
.../clustering/kmeans/RandomSeedGenerator.java | 13 +--
.../mahout/clustering/lda/cvb/CVB0Driver.java | 12 +-
.../cvb/InMemoryCollapsedVariationalBayes0.java | 45 ++------
.../mahout/clustering/lda/cvb/ModelTrainer.java | 26 ++---
.../mahout/clustering/lda/cvb/TopicModel.java | 28 ++---
.../mahout/clustering/spectral/VectorCache.java | 15 +--
.../spectral/kmeans/EigenSeedGenerator.java | 16 +--
.../streaming/cluster/BallKMeans.java | 12 +-
.../streaming/cluster/StreamingKMeans.java | 4 +-
.../mapreduce/StreamingKMeansDriver.java | 6 +-
.../mapreduce/StreamingKMeansMapper.java | 4 +-
.../mapreduce/StreamingKMeansThread.java | 4 +-
.../mapreduce/StreamingKMeansUtilsMR.java | 17 +--
.../streaming/tools/ResplitSequenceFiles.java | 4 +-
.../ClusterOutputPostProcessor.java | 12 +-
.../org/apache/mahout/common/AbstractJob.java | 24 ++--
.../apache/mahout/common/CommandLineUtil.java | 2 +-
.../org/apache/mahout/common/HadoopUtil.java | 17 +--
.../distance/MahalanobisDistanceMeasure.java | 27 ++---
.../distance/MinkowskiDistanceMeasure.java | 4 +-
.../distance/WeightedDistanceMeasure.java | 10 +-
.../WeightedEuclideanDistanceMeasure.java | 1 -
.../org/apache/mahout/driver/MahoutDriver.java | 10 +-
.../apache/mahout/ep/EvolutionaryProcess.java | 17 +--
172 files changed, 1055 insertions(+), 1487 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java b/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java
index d28ee5a..b1c2ded 100644
--- a/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java
+++ b/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java
@@ -27,8 +27,6 @@ import org.apache.mahout.math.SparseMatrix;
import org.apache.mahout.math.Vector;
public class ClosestCentroidBenchmark {
- public static final String SERIALIZE = "Serialize";
- public static final String DESERIALIZE = "Deserialize";
private final VectorBenchmarks mark;
public ClosestCentroidBenchmark(VectorBenchmarks mark) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java b/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java
index 10fcd11..cd403c2 100644
--- a/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java
+++ b/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java
@@ -17,9 +17,6 @@
package org.apache.mahout.benchmark;
-import java.io.IOException;
-
-import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -30,6 +27,8 @@ import org.apache.mahout.common.TimingStatistics;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterator;
import org.apache.mahout.math.VectorWritable;
+import java.io.IOException;
+
import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_VECTOR;
import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_SPARSE_VECTOR;
import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_SPARSE_VECTOR;
@@ -51,14 +50,14 @@ public class SerializationBenchmark {
public void serializeBenchmark() throws IOException {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
- SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path("/tmp/dense-vector"), IntWritable.class,
- VectorWritable.class);
Writable one = new IntWritable(0);
VectorWritable vec = new VectorWritable();
TimingStatistics stats = new TimingStatistics();
- try {
+ try (SequenceFile.Writer writer =
+ new SequenceFile.Writer(fs, conf, new Path("/tmp/dense-vector"),
+ IntWritable.class, VectorWritable.class)){
for (int i = 0; i < mark.loop; i++) {
TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec);
vec.set(mark.vectors[0][mark.vIndex(i)]);
@@ -67,15 +66,13 @@ public class SerializationBenchmark {
break;
}
}
- } finally {
- Closeables.close(writer, false);
}
mark.printStats(stats, SERIALIZE, DENSE_VECTOR);
- writer = new SequenceFile.Writer(fs, conf, new Path("/tmp/randsparse-vector"), IntWritable.class,
- VectorWritable.class);
stats = new TimingStatistics();
- try {
+ try (SequenceFile.Writer writer =
+ new SequenceFile.Writer(fs, conf,
+ new Path("/tmp/randsparse-vector"), IntWritable.class, VectorWritable.class)){
for (int i = 0; i < mark.loop; i++) {
TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec);
vec.set(mark.vectors[1][mark.vIndex(i)]);
@@ -84,15 +81,13 @@ public class SerializationBenchmark {
break;
}
}
- } finally {
- Closeables.close(writer, false);
}
mark.printStats(stats, SERIALIZE, RAND_SPARSE_VECTOR);
- writer = new SequenceFile.Writer(fs, conf, new Path("/tmp/seqsparse-vector"), IntWritable.class,
- VectorWritable.class);
stats = new TimingStatistics();
- try {
+ try (SequenceFile.Writer writer =
+ new SequenceFile.Writer(fs, conf,
+ new Path("/tmp/seqsparse-vector"), IntWritable.class, VectorWritable.class)) {
for (int i = 0; i < mark.loop; i++) {
TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec);
vec.set(mark.vectors[2][mark.vIndex(i)]);
@@ -101,8 +96,6 @@ public class SerializationBenchmark {
break;
}
}
- } finally {
- Closeables.close(writer, false);
}
mark.printStats(stats, SERIALIZE, SEQ_SPARSE_VECTOR);
@@ -117,7 +110,7 @@ public class SerializationBenchmark {
private void doDeserializeBenchmark(String name, String pathString) throws IOException {
TimingStatistics stats = new TimingStatistics();
TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec);
- SequenceFileValueIterator<Writable> iterator = new SequenceFileValueIterator<Writable>(new Path(pathString), true,
+ SequenceFileValueIterator<Writable> iterator = new SequenceFileValueIterator<>(new Path(pathString), true,
new Configuration());
while (iterator.hasNext()) {
iterator.next();
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java b/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
index c29760a..a076322 100644
--- a/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
+++ b/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
@@ -17,17 +17,6 @@
package org.apache.mahout.benchmark;
-import java.io.IOException;
-import java.text.DecimalFormat;
-import java.util.BitSet;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Random;
-import java.util.concurrent.TimeUnit;
-import java.util.regex.Pattern;
-
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -56,8 +45,18 @@ import org.apache.mahout.math.Vector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
+import java.io.IOException;
+import java.text.DecimalFormat;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Pattern;
public class VectorBenchmarks {
private static final int MAX_TIME_MS = 5000;
@@ -93,11 +92,11 @@ public class VectorBenchmarks {
final long maxTimeUsec;
final long leadTimeUsec;
- private final List<Vector> randomVectors = Lists.newArrayList();
- private final List<int[]> randomVectorIndices = Lists.newArrayList();
- private final List<double[]> randomVectorValues = Lists.newArrayList();
- private final Map<String, Integer> implType = Maps.newHashMap();
- private final Map<String, List<String[]>> statsMap = Maps.newHashMap();
+ private final List<Vector> randomVectors = new ArrayList<>();
+ private final List<int[]> randomVectorIndices = new ArrayList<>();
+ private final List<double[]> randomVectorValues = new ArrayList<>();
+ private final Map<String, Integer> implType = new HashMap<>();
+ private final Map<String, List<String[]>> statsMap = new HashMap<>();
private final BenchmarkRunner runner;
private final Random r = RandomUtils.getRandom();
@@ -162,7 +161,7 @@ public class VectorBenchmarks {
}
int implId = implType.get(implName);
if (!statsMap.containsKey(benchmarkName)) {
- statsMap.put(benchmarkName, Lists.<String[]>newArrayList());
+ statsMap.put(benchmarkName, new ArrayList<String[]>());
}
List<String[]> implStats = statsMap.get(benchmarkName);
while (implStats.size() < implId + 1) {
@@ -224,7 +223,7 @@ public class VectorBenchmarks {
private boolean buildVectorIncrementally(TimingStatistics stats, int randomIndex, Vector v, boolean useSetQuick) {
int[] indexes = randomVectorIndices.get(randomIndex);
double[] values = randomVectorValues.get(randomIndex);
- List<Integer> randomOrder = Lists.newArrayList();
+ List<Integer> randomOrder = new ArrayList<>();
for (int i = 0; i < indexes.length; i++) {
randomOrder.add(i);
}
@@ -421,9 +420,9 @@ public class VectorBenchmarks {
}
private String asCsvString() {
- List<String> keys = Lists.newArrayList(statsMap.keySet());
+ List<String> keys = new ArrayList<>(statsMap.keySet());
Collections.sort(keys);
- Map<Integer,String> implMap = Maps.newHashMap();
+ Map<Integer,String> implMap = new HashMap<>();
for (Entry<String,Integer> e : implType.entrySet()) {
implMap.put(e.getValue(), e.getKey());
}
@@ -459,7 +458,7 @@ public class VectorBenchmarks {
}
}
sb.append('\n');
- List<String> keys = Lists.newArrayList(statsMap.keySet());
+ List<String> keys = new ArrayList<>(statsMap.keySet());
Collections.sort(keys);
for (String benchmarkName : keys) {
List<String[]> implTokenizedStats = statsMap.get(benchmarkName);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java b/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java
index 9378b11..9735ffe 100644
--- a/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java
+++ b/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java
@@ -17,15 +17,6 @@
package org.apache.mahout.cf.taste.impl.model.hbase;
-import java.io.Closeable;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-import java.util.SortedMap;
-
-import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
@@ -42,13 +33,9 @@ import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.FilterList;
-import org.apache.hadoop.hbase.filter.KeyOnlyFilter;
import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
+import org.apache.hadoop.hbase.filter.KeyOnlyFilter;
import org.apache.hadoop.hbase.util.Bytes;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
import org.apache.mahout.cf.taste.common.NoSuchItemException;
import org.apache.mahout.cf.taste.common.NoSuchUserException;
import org.apache.mahout.cf.taste.common.Refreshable;
@@ -59,6 +46,18 @@ import org.apache.mahout.cf.taste.impl.model.GenericItemPreferenceArray;
import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedMap;
/**
* <p>Naive approach of storing one preference as one value in the table.
@@ -134,15 +133,12 @@ public final class HBaseDataModel implements DataModel, Closeable {
* Create the table if it doesn't exist
*/
private void bootstrap(Configuration conf) throws IOException {
- HBaseAdmin admin = new HBaseAdmin(conf);
HTableDescriptor tDesc = new HTableDescriptor(Bytes.toBytes(tableName));
tDesc.addFamily(new HColumnDescriptor(USERS_CF));
tDesc.addFamily(new HColumnDescriptor(ITEMS_CF));
- try {
+ try (HBaseAdmin admin = new HBaseAdmin(conf)) {
admin.createTable(tDesc);
log.info("Created table {}", tableName);
- } finally {
- admin.close();
}
}
@@ -339,7 +335,7 @@ public final class HBaseDataModel implements DataModel, Closeable {
Result[] results;
try {
HTableInterface table = pool.getTable(tableName);
- List<Get> gets = Lists.newArrayListWithCapacity(2);
+ List<Get> gets = new ArrayList<>(2);
gets.add(new Get(itemToBytes(itemID1)));
gets.add(new Get(itemToBytes(itemID2)));
gets.get(0).addFamily(USERS_CF);
@@ -380,7 +376,7 @@ public final class HBaseDataModel implements DataModel, Closeable {
public void setPreference(long userID, long itemID, float value) throws TasteException {
try {
HTableInterface table = pool.getTable(tableName);
- List<Put> puts = Lists.newArrayListWithCapacity(2);
+ List<Put> puts = new ArrayList<>(2);
puts.add(new Put(userToBytes(userID)));
puts.add(new Put(itemToBytes(itemID)));
puts.get(0).add(ITEMS_CF, Bytes.toBytes(itemID), Bytes.toBytes(value));
@@ -396,7 +392,7 @@ public final class HBaseDataModel implements DataModel, Closeable {
public void removePreference(long userID, long itemID) throws TasteException {
try {
HTableInterface table = pool.getTable(tableName);
- List<Delete> deletes = Lists.newArrayListWithCapacity(2);
+ List<Delete> deletes = new ArrayList<>(2);
deletes.add(new Delete(userToBytes(userID)));
deletes.add(new Delete(itemToBytes(itemID)));
deletes.get(0).deleteColumns(ITEMS_CF, Bytes.toBytes(itemID));
@@ -457,7 +453,7 @@ public final class HBaseDataModel implements DataModel, Closeable {
Scan scan = new Scan(new byte[]{0x69}, new byte[]{0x70});
scan.setFilter(new FilterList(FilterList.Operator.MUST_PASS_ALL, new KeyOnlyFilter(), new FirstKeyOnlyFilter()));
ResultScanner scanner = table.getScanner(scan);
- Collection<Long> ids = Lists.newLinkedList();
+ Collection<Long> ids = new LinkedList<>();
for (Result result : scanner) {
ids.add(bytesToUserOrItemID(result.getRow()));
}
@@ -482,7 +478,7 @@ public final class HBaseDataModel implements DataModel, Closeable {
Scan scan = new Scan(new byte[]{0x75}, new byte[]{0x76});
scan.setFilter(new FilterList(FilterList.Operator.MUST_PASS_ALL, new KeyOnlyFilter(), new FirstKeyOnlyFilter()));
ResultScanner scanner = table.getScanner(scan);
- Collection<Long> ids = Lists.newLinkedList();
+ Collection<Long> ids = new LinkedList<>();
for (Result result : scanner) {
ids.add(bytesToUserOrItemID(result.getRow()));
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java b/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java
index d9317c0..66f0a77 100644
--- a/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java
+++ b/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java
@@ -17,17 +17,7 @@
package org.apache.mahout.cf.taste.impl.model.jdbc;
-import java.sql.Connection;
-import java.sql.PreparedStatement;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.sql.Statement;
-import java.util.Collection;
-import java.util.List;
-
-import javax.sql.DataSource;
-
-import com.google.common.collect.Lists;
+import com.google.common.base.Preconditions;
import org.apache.mahout.cf.taste.common.NoSuchItemException;
import org.apache.mahout.cf.taste.common.NoSuchUserException;
import org.apache.mahout.cf.taste.common.Refreshable;
@@ -49,7 +39,15 @@ import org.apache.mahout.common.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.base.Preconditions;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import javax.sql.DataSource;
/**
* <p>
@@ -96,7 +94,6 @@ public abstract class AbstractJDBCDataModel extends AbstractJDBCComponent implem
private final String getUsersSQL;
private final String getItemsSQL;
private final String getPrefsForItemSQL;
- //private final String getNumPreferenceForItemSQL;
private final String getNumPreferenceForItemsSQL;
private final String getMaxPreferenceSQL;
private final String getMinPreferenceSQL;
@@ -283,7 +280,7 @@ public abstract class AbstractJDBCDataModel extends AbstractJDBCComponent implem
log.debug("Executing SQL query: {}", getUserSQL);
rs = stmt.executeQuery();
- List<Preference> prefs = Lists.newArrayList();
+ List<Preference> prefs = new ArrayList<>();
while (rs.next()) {
prefs.add(buildPreference(rs));
}
@@ -323,7 +320,7 @@ public abstract class AbstractJDBCDataModel extends AbstractJDBCComponent implem
rs = stmt.executeQuery(getAllUsersSQL);
Long currentUserID = null;
- List<Preference> currentPrefs = Lists.newArrayList();
+ List<Preference> currentPrefs = new ArrayList<>();
while (rs.next()) {
long nextUserID = getLongColumn(rs, 1);
if (currentUserID != null && !currentUserID.equals(nextUserID) && !currentPrefs.isEmpty()) {
@@ -533,7 +530,7 @@ public abstract class AbstractJDBCDataModel extends AbstractJDBCComponent implem
log.debug("Executing SQL query: {}", getPrefsForItemSQL);
rs = stmt.executeQuery();
- List<Preference> prefs = Lists.newArrayList();
+ List<Preference> prefs = new ArrayList<>();
while (rs.next()) {
prefs.add(buildPreference(rs));
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/mongodb/MongoDBDataModel.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/mongodb/MongoDBDataModel.java b/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/mongodb/MongoDBDataModel.java
index fe6b843..92a4019 100644
--- a/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/mongodb/MongoDBDataModel.java
+++ b/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/mongodb/MongoDBDataModel.java
@@ -17,20 +17,15 @@
package org.apache.mahout.cf.taste.impl.model.mongodb;
-import java.text.DateFormat;
-import java.text.ParseException;
-import java.util.Collection;
-import java.util.Date;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.concurrent.locks.ReentrantLock;
-import java.net.UnknownHostException;
-import java.text.SimpleDateFormat;
-import java.util.regex.Pattern;
-
import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
+import com.mongodb.BasicDBObject;
+import com.mongodb.DB;
+import com.mongodb.DBCollection;
+import com.mongodb.DBCursor;
+import com.mongodb.DBObject;
+import com.mongodb.Mongo;
+import org.apache.mahout.cf.taste.common.NoSuchItemException;
+import org.apache.mahout.cf.taste.common.NoSuchUserException;
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
@@ -42,19 +37,22 @@ import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.Preference;
import org.apache.mahout.cf.taste.model.PreferenceArray;
-import org.apache.mahout.cf.taste.common.NoSuchUserException;
-import org.apache.mahout.cf.taste.common.NoSuchItemException;
-
import org.bson.types.ObjectId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.mongodb.BasicDBObject;
-import com.mongodb.DBObject;
-import com.mongodb.Mongo;
-import com.mongodb.DB;
-import com.mongodb.DBCollection;
-import com.mongodb.DBCursor;
+import java.net.UnknownHostException;
+import java.text.DateFormat;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Date;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.regex.Pattern;
/**
* <p>A {@link DataModel} backed by a MongoDB database. This class expects a
@@ -408,8 +406,8 @@ public final class MongoDBDataModel implements DataModel {
while (cursor.hasNext()) {
Map<String,Object> user = (Map<String,Object>) cursor.next().toMap();
String userID = getID(user.get(mongoUserID), true);
- Collection<List<String>> items = Lists.newArrayList();
- List<String> item = Lists.newArrayList();
+ Collection<List<String>> items = new ArrayList<>();
+ List<String> item = new ArrayList<>();
item.add(getID(user.get(mongoItemID), false));
item.add(Float.toString(getPreference(user.get(mongoPreference))));
items.add(item);
@@ -431,8 +429,8 @@ public final class MongoDBDataModel implements DataModel {
Map<String,Object> user = (Map<String,Object>) cursor.next().toMap();
if (!user.containsKey("deleted_at")) {
String userID = getID(user.get(mongoUserID), true);
- Collection<List<String>> items = Lists.newArrayList();
- List<String> item = Lists.newArrayList();
+ Collection<List<String>> items = new ArrayList<>();
+ List<String> item = new ArrayList<>();
item.add(getID(user.get(mongoItemID), false));
item.add(Float.toString(getPreference(user.get(mongoPreference))));
items.add(item);
@@ -552,7 +550,7 @@ public final class MongoDBDataModel implements DataModel {
Mongo mongoDDBB = new Mongo(mongoHost, mongoPort);
DB db = mongoDDBB.getDB(mongoDB);
mongoTimestamp = new Date(0);
- FastByIDMap<Collection<Preference>> userIDPrefMap = new FastByIDMap<Collection<Preference>>();
+ FastByIDMap<Collection<Preference>> userIDPrefMap = new FastByIDMap<>();
if (!mongoAuth || db.authenticate(mongoUsername, mongoPassword.toCharArray())) {
collection = db.getCollection(mongoCollection);
collectionMap = db.getCollection(mongoMapCollection);
@@ -572,7 +570,7 @@ public final class MongoDBDataModel implements DataModel {
float ratingValue = getPreference(user.get(mongoPreference));
Collection<Preference> userPrefs = userIDPrefMap.get(userID);
if (userPrefs == null) {
- userPrefs = Lists.newArrayListWithCapacity(2);
+ userPrefs = new ArrayList<>(2);
userIDPrefMap.put(userID, userPrefs);
}
userPrefs.add(new GenericPreference(userID, itemID, ratingValue));
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/MySQLJDBCItemSimilarity.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/MySQLJDBCItemSimilarity.java b/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/MySQLJDBCItemSimilarity.java
index 20d1384..af0742e 100644
--- a/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/MySQLJDBCItemSimilarity.java
+++ b/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/MySQLJDBCItemSimilarity.java
@@ -70,7 +70,6 @@ import org.apache.mahout.cf.taste.common.TasteException;
* Note that for each row, item_id_a should be less than item_id_b. It is redundant to store it both ways,
* so the pair is always stored as a pair with the lesser one first.
*
- * @see org.apache.mahout.cf.taste.impl.recommender.slopeone.jdbc.MySQLJDBCDiffStorage
* @see org.apache.mahout.cf.taste.impl.model.jdbc.MySQLJDBCDataModel
*/
public class MySQLJDBCItemSimilarity extends SQL92JDBCItemSimilarity {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java b/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java
index 45f300a..03a3000 100644
--- a/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java
+++ b/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java
@@ -17,16 +17,8 @@
package org.apache.mahout.classifier;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.io.PrintStream;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
-import com.google.common.base.Charsets;
+import com.google.common.collect.Lists;
+import org.apache.commons.io.Charsets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -38,7 +30,14 @@ import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.math.Matrix;
import org.apache.mahout.math.MatrixWritable;
-import com.google.common.collect.Lists;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.PrintStream;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
/**
* Export a ConfusionMatrix in various text formats: ToString version Grayscale HTML table Summary HTML table
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java b/integration/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java
index 972dec1..545c1ff 100644
--- a/integration/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java
+++ b/integration/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java
@@ -17,10 +17,6 @@
package org.apache.mahout.clustering.cdbw;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
-
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.mahout.clustering.Cluster;
@@ -41,8 +37,11 @@ import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
/**
* This class calculates the CDbw metric as defined in
@@ -53,7 +52,7 @@ public final class CDbwEvaluator {
private static final Logger log = LoggerFactory.getLogger(CDbwEvaluator.class);
private final Map<Integer,List<VectorWritable>> representativePoints;
- private final Map<Integer,Double> stDevs = Maps.newHashMap();
+ private final Map<Integer,Double> stDevs = new HashMap<>();
private final List<Cluster> clusters;
private final DistanceMeasure measure;
private Double interClusterDensity = null;
@@ -110,7 +109,7 @@ public final class CDbwEvaluator {
* @return a List<Cluster> of the clusters
*/
private static List<Cluster> loadClusters(Configuration conf, Path clustersIn) {
- List<Cluster> clusters = Lists.newArrayList();
+ List<Cluster> clusters = new ArrayList<>();
for (ClusterWritable clusterWritable : new SequenceFileDirValueIterable<ClusterWritable>(clustersIn, PathType.LIST,
PathFilters.logsCRCFilter(), conf)) {
Cluster cluster = clusterWritable.getValue();
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/clustering/conversion/InputMapper.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/clustering/conversion/InputMapper.java b/integration/src/main/java/org/apache/mahout/clustering/conversion/InputMapper.java
index 9dbce5a..e4c72c6 100644
--- a/integration/src/main/java/org/apache/mahout/clustering/conversion/InputMapper.java
+++ b/integration/src/main/java/org/apache/mahout/clustering/conversion/InputMapper.java
@@ -17,13 +17,6 @@
package org.apache.mahout.clustering.conversion;
-import java.io.IOException;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.InvocationTargetException;
-import java.util.Collection;
-import java.util.regex.Pattern;
-
-import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
@@ -31,6 +24,13 @@ import org.apache.hadoop.mapreduce.Mapper;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
+import java.io.IOException;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.regex.Pattern;
+
public class InputMapper extends Mapper<LongWritable, Text, Text, VectorWritable> {
private static final Pattern SPACE = Pattern.compile(" ");
@@ -42,7 +42,7 @@ public class InputMapper extends Mapper<LongWritable, Text, Text, VectorWritable
String[] numbers = SPACE.split(values.toString());
// sometimes there are multiple separator spaces
- Collection<Double> doubles = Lists.newArrayList();
+ Collection<Double> doubles = new ArrayList<>();
for (String value : numbers) {
if (!value.isEmpty()) {
doubles.add(Double.valueOf(value));
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/clustering/evaluation/ClusterEvaluator.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/clustering/evaluation/ClusterEvaluator.java b/integration/src/main/java/org/apache/mahout/clustering/evaluation/ClusterEvaluator.java
index 3cd06eb..757f38c 100644
--- a/integration/src/main/java/org/apache/mahout/clustering/evaluation/ClusterEvaluator.java
+++ b/integration/src/main/java/org/apache/mahout/clustering/evaluation/ClusterEvaluator.java
@@ -17,10 +17,6 @@
package org.apache.mahout.clustering.evaluation;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
-
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.mahout.clustering.Cluster;
@@ -37,7 +33,10 @@ import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.collect.Lists;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
public class ClusterEvaluator {
@@ -89,7 +88,7 @@ public class ClusterEvaluator {
* @return a List<Cluster> of the clusters
*/
private static List<Cluster> loadClusters(Configuration conf, Path clustersIn) {
- List<Cluster> clusters = Lists.newArrayList();
+ List<Cluster> clusters = new ArrayList<>();
for (ClusterWritable clusterWritable : new SequenceFileDirValueIterable<ClusterWritable>(clustersIn, PathType.LIST,
PathFilters.logsCRCFilter(), conf)) {
Cluster cluster = clusterWritable.getValue();
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java b/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java
index f18b584..2fe37ef 100644
--- a/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java
+++ b/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java
@@ -18,12 +18,10 @@
package org.apache.mahout.clustering.evaluation;
import java.io.IOException;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
-
-import com.google.common.collect.Maps;
-import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -98,8 +96,6 @@ public final class RepresentativePointsDriver extends AbstractJob {
* the Path to the directory containing representativePoints-i folders
* @param numIterations
* the int number of iterations to print
- * @throws IOException
- * if errors occur
*/
public static void printRepresentativePoints(Path output, int numIterations) {
for (int i = 0; i <= numIterations; i++) {
@@ -141,8 +137,8 @@ public final class RepresentativePointsDriver extends AbstractJob {
for (FileStatus part : fs.listStatus(inPath, PathFilters.logsCRCFilter())) {
Path inPart = part.getPath();
Path path = new Path(output, inPart.getName());
- SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class);
- try {
+ try (SequenceFile.Writer writer =
+ new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class)){
for (ClusterWritable clusterWritable : new SequenceFileValueIterable<ClusterWritable>(inPart, true, conf)) {
Cluster cluster = clusterWritable.getValue();
if (log.isDebugEnabled()) {
@@ -150,8 +146,6 @@ public final class RepresentativePointsDriver extends AbstractJob {
}
writer.append(new IntWritable(cluster.getId()), new VectorWritable(cluster.getCenter()));
}
- } finally {
- Closeables.close(writer, false);
}
}
}
@@ -184,7 +178,7 @@ public final class RepresentativePointsDriver extends AbstractJob {
DistanceMeasure measure) throws IOException {
Map<Integer,List<VectorWritable>> repPoints = RepresentativePointsMapper.getRepresentativePoints(conf, stateIn);
- Map<Integer,WeightedVectorWritable> mostDistantPoints = Maps.newHashMap();
+ Map<Integer,WeightedVectorWritable> mostDistantPoints = new HashMap<>();
FileSystem fs = FileSystem.get(clusteredPointsIn.toUri(), conf);
for (Pair<IntWritable,WeightedVectorWritable> record
: new SequenceFileDirIterable<IntWritable,WeightedVectorWritable>(clusteredPointsIn, PathType.LIST,
@@ -192,25 +186,19 @@ public final class RepresentativePointsDriver extends AbstractJob {
RepresentativePointsMapper.mapPoint(record.getFirst(), record.getSecond(), measure, repPoints, mostDistantPoints);
}
int part = 0;
- SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(stateOut, "part-m-" + part++),
- IntWritable.class, VectorWritable.class);
- try {
+ try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(stateOut, "part-m-" + part++),
+ IntWritable.class, VectorWritable.class)){
for (Entry<Integer,List<VectorWritable>> entry : repPoints.entrySet()) {
for (VectorWritable vw : entry.getValue()) {
writer.append(new IntWritable(entry.getKey()), vw);
}
}
- } finally {
- Closeables.close(writer, false);
}
- writer = new SequenceFile.Writer(fs, conf, new Path(stateOut, "part-m-" + part++), IntWritable.class,
- VectorWritable.class);
- try {
+ try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(stateOut, "part-m-" + part++),
+ IntWritable.class, VectorWritable.class)){
for (Map.Entry<Integer,WeightedVectorWritable> entry : mostDistantPoints.entrySet()) {
writer.append(new IntWritable(entry.getKey()), new VectorWritable(entry.getValue().getVector()));
}
- } finally {
- Closeables.close(writer, false);
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsMapper.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsMapper.java b/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsMapper.java
index 255e4a3..0ae79ad 100644
--- a/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsMapper.java
+++ b/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsMapper.java
@@ -18,11 +18,11 @@
package org.apache.mahout.clustering.evaluation;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
@@ -41,7 +41,7 @@ public class RepresentativePointsMapper
extends Mapper<IntWritable, WeightedVectorWritable, IntWritable, WeightedVectorWritable> {
private Map<Integer, List<VectorWritable>> representativePoints;
- private final Map<Integer, WeightedVectorWritable> mostDistantPoints = Maps.newHashMap();
+ private final Map<Integer, WeightedVectorWritable> mostDistantPoints = new HashMap<>();
private DistanceMeasure measure = new EuclideanDistanceMeasure();
@Override
@@ -98,7 +98,7 @@ public class RepresentativePointsMapper
}
public static Map<Integer, List<VectorWritable>> getRepresentativePoints(Configuration conf, Path statePath) {
- Map<Integer, List<VectorWritable>> representativePoints = Maps.newHashMap();
+ Map<Integer, List<VectorWritable>> representativePoints = new HashMap<>();
for (Pair<IntWritable,VectorWritable> record
: new SequenceFileDirIterable<IntWritable,VectorWritable>(statePath,
PathType.LIST,
@@ -107,7 +107,7 @@ public class RepresentativePointsMapper
int keyValue = record.getFirst().get();
List<VectorWritable> repPoints = representativePoints.get(keyValue);
if (repPoints == null) {
- repPoints = Lists.newArrayList();
+ repPoints = new ArrayList<>();
representativePoints.put(keyValue, repPoints);
}
repPoints.add(record.getSecond());
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java b/integration/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
index cb8d935..392909e 100644
--- a/integration/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
+++ b/integration/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
@@ -17,24 +17,22 @@
package org.apache.mahout.clustering.lda;
+import com.google.common.io.Closeables;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Queue;
-
-import com.google.common.base.Charsets;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.io.Closeables;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -43,6 +41,7 @@ import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
+import org.apache.commons.io.Charsets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
@@ -168,7 +167,7 @@ public final class LDAPrintTopics {
out.write("===========");
out.write('\n');
}
- List<Pair<String,Double>> topKasList = Lists.newArrayListWithCapacity(topK.size());
+ List<Pair<String,Double>> topKasList = new ArrayList<>(topK.size());
for (Pair<String,Double> wordWithScore : topK) {
topKasList.add(wordWithScore);
}
@@ -197,8 +196,8 @@ public final class LDAPrintTopics {
Configuration job,
List<String> wordList,
int numWordsToPrint) {
- List<Queue<Pair<String,Double>>> queues = Lists.newArrayList();
- Map<Integer,Double> expSums = Maps.newHashMap();
+ List<Queue<Pair<String,Double>>> queues = new ArrayList<>();
+ Map<Integer,Double> expSums = new HashMap<>();
for (Pair<IntPairWritable,DoubleWritable> record
: new SequenceFileDirIterable<IntPairWritable, DoubleWritable>(
new Path(dir, "part-*"), PathType.GLOB, null, null, true, job)) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/LuceneIndexHelper.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/LuceneIndexHelper.java b/integration/src/main/java/org/apache/mahout/text/LuceneIndexHelper.java
index 438beb9..465e51b 100644
--- a/integration/src/main/java/org/apache/mahout/text/LuceneIndexHelper.java
+++ b/integration/src/main/java/org/apache/mahout/text/LuceneIndexHelper.java
@@ -33,7 +33,8 @@ public class LuceneIndexHelper {
public static void fieldShouldExistInIndex(IndexReader reader, String fieldName) throws IOException {
IndexableField field = reader.document(0).getField(fieldName);
if (field == null || !field.fieldType().stored()) {
- throw new IllegalArgumentException("Field '" + fieldName + "' is possibly not stored since first document in index does not contain this field.");
+ throw new IllegalArgumentException("Field '" + fieldName +
+ "' is possibly not stored since first document in index does not contain this field.");
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java b/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java
index fabca54..1c4f8de 100644
--- a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java
+++ b/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java
@@ -16,7 +16,10 @@ package org.apache.mahout.text;
* limitations under the License.
*/
-import com.google.common.collect.Lists;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -32,9 +35,6 @@ import org.apache.lucene.index.SegmentInfos;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.IOException;
-import java.util.List;
-
/**
* {@link InputFormat} implementation which splits a Lucene index at the segment level.
*/
@@ -48,7 +48,7 @@ public class LuceneSegmentInputFormat extends InputFormat {
LuceneStorageConfiguration lucene2SeqConfiguration = new LuceneStorageConfiguration(configuration);
- List<LuceneSegmentInputSplit> inputSplits = Lists.newArrayList();
+ List<LuceneSegmentInputSplit> inputSplits = new ArrayList<>();
List<Path> indexPaths = lucene2SeqConfiguration.getIndexPaths();
for (Path indexPath : indexPaths) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java b/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java
index a0aa6b0..485e856 100644
--- a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java
+++ b/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java
@@ -98,6 +98,5 @@ public class LuceneSegmentRecordReader extends RecordReader<Text, NullWritable>
@Override
public void close() throws IOException {
segmentReader.close();
- //searcher.close();
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java b/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java
index 88f86c5..b36f3e9 100644
--- a/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java
+++ b/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java
@@ -16,9 +16,18 @@ package org.apache.mahout.text;
* limitations under the License.
*/
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
@@ -36,14 +45,6 @@ import org.apache.mahout.common.iterator.sequencefile.PathFilters;
import org.apache.mahout.common.iterator.sequencefile.PathType;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Set;
-
import static org.apache.lucene.util.Version.LUCENE_46;
/**
@@ -186,7 +187,7 @@ public class LuceneStorageConfiguration implements Writable {
}
public DocumentStoredFieldVisitor getStoredFieldVisitor() {
- Set<String> fieldSet = Sets.newHashSet(idField);
+ Set<String> fieldSet = new HashSet<>(Collections.singleton(idField));
fieldSet.addAll(fields);
return new DocumentStoredFieldVisitor(fieldSet);
}
@@ -205,7 +206,7 @@ public class LuceneStorageConfiguration implements Writable {
public void readFields(DataInput in) throws IOException {
try {
sequenceFilesOutputPath = new Path(in.readUTF());
- indexPaths = Lists.newArrayList();
+ indexPaths = new ArrayList<>();
String[] indexPaths = in.readUTF().split(SEPARATOR_PATHS);
for (String indexPath : indexPaths) {
this.indexPaths.add(new Path(indexPath));
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java b/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
index a7503e1..8776c5f 100644
--- a/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
+++ b/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
@@ -21,6 +21,7 @@ import java.io.Reader;
import java.util.Arrays;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java b/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java
index a13341b..37ebc44 100644
--- a/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java
+++ b/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java
@@ -17,7 +17,6 @@
package org.apache.mahout.text;
-import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -53,10 +52,7 @@ public final class PrefixAdditionFilter extends SequenceFilesFromDirectoryFilter
fs.listStatus(fst.getPath(),
new PrefixAdditionFilter(getConf(), dirPath, getOptions(), writer, getCharset(), fs));
} else {
- InputStream in = null;
- try {
- in = fs.open(fst.getPath());
-
+ try (InputStream in = fs.open(fst.getPath())){
StringBuilder file = new StringBuilder();
for (String aFit : new FileLineIterable(in, getCharset(), false)) {
file.append(aFit).append('\n');
@@ -65,8 +61,6 @@ public final class PrefixAdditionFilter extends SequenceFilesFromDirectoryFilter
? current.getName()
: current.getName() + Path.SEPARATOR + fst.getPath().getName();
writer.write(getPrefix() + Path.SEPARATOR + name, file.toString());
- } finally {
- Closeables.close(in, false);
}
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java
index 720078c..311ab8d 100644
--- a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java
+++ b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java
@@ -19,10 +19,9 @@ package org.apache.mahout.text;
import java.io.IOException;
import java.nio.charset.Charset;
+import java.util.HashMap;
import java.util.Map;
-import com.google.common.collect.Maps;
-import com.google.common.io.Closeables;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
@@ -100,9 +99,8 @@ public class SequenceFilesFromDirectory extends AbstractJob {
Charset charset = Charset.forName(getOption(CHARSET_OPTION[0]));
String keyPrefix = getOption(KEY_PREFIX_OPTION[0]);
FileSystem fs = FileSystem.get(input.toUri(), conf);
- ChunkedWriter writer = new ChunkedWriter(conf, Integer.parseInt(options.get(CHUNK_SIZE_OPTION[0])), output);
- try {
+ try (ChunkedWriter writer = new ChunkedWriter(conf, Integer.parseInt(options.get(CHUNK_SIZE_OPTION[0])), output)) {
SequenceFilesFromDirectoryFilter pathFilter;
String fileFilterClassName = options.get(FILE_FILTER_CLASS_OPTION[0]);
if (PrefixAdditionFilter.class.getName().equals(fileFilterClassName)) {
@@ -113,8 +111,6 @@ public class SequenceFilesFromDirectory extends AbstractJob {
new Object[] {conf, keyPrefix, options, writer, charset, fs});
}
fs.listStatus(input, pathFilter);
- } finally {
- Closeables.close(writer, false);
}
return 0;
}
@@ -209,7 +205,7 @@ public class SequenceFilesFromDirectory extends AbstractJob {
* @return Map of options
*/
protected Map<String, String> parseOptions() {
- Map<String, String> options = Maps.newHashMap();
+ Map<String, String> options = new HashMap<>();
options.put(CHUNK_SIZE_OPTION[0], getOption(CHUNK_SIZE_OPTION[0]));
options.put(FILE_FILTER_CLASS_OPTION[0], getOption(FILE_FILTER_CLASS_OPTION[0]));
options.put(CHARSET_OPTION[0], getOption(CHARSET_OPTION[0]));
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java
index d3903dd..1bd3f3e 100644
--- a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java
+++ b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java
@@ -17,7 +17,11 @@ package org.apache.mahout.text;
*/
-import com.google.common.collect.Lists;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Pattern;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.ToolRunner;
@@ -30,11 +34,6 @@ import org.apache.lucene.util.Version;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
-import java.util.List;
-import java.util.regex.Pattern;
-
-import static java.util.Arrays.asList;
-
/**
* Driver class for the lucene2seq program. Converts text contents of stored fields of a lucene index into a Hadoop
* SequenceFile. The key of the sequence file is the document ID and the value is the concatenated text of the specified
@@ -77,7 +76,7 @@ public class SequenceFilesFromLuceneStorageDriver extends AbstractJob {
Configuration configuration = getConf();
String[] paths = getInputPath().toString().split(",");
- List<Path> indexPaths = Lists.newArrayList();
+ List<Path> indexPaths = new ArrayList<>();
for (String path : paths) {
indexPaths.add(new Path(path));
}
@@ -91,7 +90,7 @@ public class SequenceFilesFromLuceneStorageDriver extends AbstractJob {
indexPaths,
sequenceFilesOutputPath,
idField,
- asList(fields.split(SEPARATOR_FIELDS)));
+ Arrays.asList(fields.split(SEPARATOR_FIELDS)));
Query query = DEFAULT_QUERY;
if (hasOption(OPTION_QUERY)) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java
index d87dadc..f31d055 100644
--- a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java
+++ b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java
@@ -18,6 +18,7 @@
package org.apache.mahout.text;
import com.google.common.base.Strings;
+import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
@@ -31,8 +32,6 @@ import org.apache.lucene.store.IOContext;
import java.io.IOException;
import java.util.List;
-import static org.apache.commons.lang.StringUtils.isBlank;
-
/**
* Maps document IDs to key value pairs with ID field as the key and the concatenated stored field(s)
* as value.
@@ -64,13 +63,13 @@ public class SequenceFilesFromLuceneStorageMapper extends Mapper<Text, NullWrita
Text theValue = new Text();
LuceneSeqFileHelper.populateValues(document, theValue, fields);
//if they are both empty, don't write
- if (isBlank(theKey.toString()) && isBlank(theValue.toString())) {
+ if (StringUtils.isBlank(theKey.toString()) && StringUtils.isBlank(theValue.toString())) {
context.getCounter(DataStatus.EMPTY_BOTH).increment(1);
return;
}
- if (isBlank(theKey.toString())) {
+ if (StringUtils.isBlank(theKey.toString())) {
context.getCounter(DataStatus.EMPTY_KEY).increment(1);
- } else if (isBlank(theValue.toString())) {
+ } else if (StringUtils.isBlank(theValue.toString())) {
context.getCounter(DataStatus.EMPTY_VALUE).increment(1);
}
context.write(theKey, theValue);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java
index 30c2a47..c17cc12 100644
--- a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java
+++ b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java
@@ -16,10 +16,6 @@
*/
package org.apache.mahout.text;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.io.Closeables;
-
import org.apache.commons.io.DirectoryWalker;
import org.apache.commons.io.comparator.CompositeFileComparator;
import org.apache.commons.io.comparator.DirectoryFileComparator;
@@ -46,10 +42,12 @@ import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.ArrayDeque;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.Deque;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
@@ -81,9 +79,9 @@ public final class SequenceFilesFromMailArchives extends AbstractJob {
private static final int MAX_JOB_SPLIT_LOCATIONS = 1000000;
public void createSequenceFiles(MailOptions options) throws IOException {
- ChunkedWriter writer = new ChunkedWriter(getConf(), options.getChunkSize(), new Path(options.getOutputDir()));
- MailProcessor processor = new MailProcessor(options, options.getPrefix(), writer);
- try {
+ try (ChunkedWriter writer =
+ new ChunkedWriter(getConf(), options.getChunkSize(), new Path(options.getOutputDir()))){
+ MailProcessor processor = new MailProcessor(options, options.getPrefix(), writer);
if (options.getInput().isDirectory()) {
PrefixAdditionDirectoryWalker walker = new PrefixAdditionDirectoryWalker(processor, writer);
walker.walk(options.getInput());
@@ -94,8 +92,6 @@ public final class SequenceFilesFromMailArchives extends AbstractJob {
long finish = System.currentTimeMillis();
log.info("Parsed {} messages from {} in time: {}", cnt, options.getInput().getAbsolutePath(), finish - start);
}
- } finally {
- Closeables.close(writer, false);
}
}
@@ -226,11 +222,11 @@ public final class SequenceFilesFromMailArchives extends AbstractJob {
options.setChunkSize(chunkSize);
options.setCharset(charset);
- List<Pattern> patterns = Lists.newArrayListWithCapacity(5);
+ List<Pattern> patterns = new ArrayList<>(5);
// patternOrder is used downstream so that we can know what order the text
// is in instead of encoding it in the string, which
// would require more processing later to remove it pre feature selection.
- Map<String, Integer> patternOrder = Maps.newHashMap();
+ Map<String, Integer> patternOrder = new HashMap<>();
int order = 0;
if (hasOption(FROM_OPTION[0])) {
patterns.add(MailProcessor.FROM_PREFIX);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/WholeFileRecordReader.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/WholeFileRecordReader.java b/integration/src/main/java/org/apache/mahout/text/WholeFileRecordReader.java
index fdb3654..b8441b7 100644
--- a/integration/src/main/java/org/apache/mahout/text/WholeFileRecordReader.java
+++ b/integration/src/main/java/org/apache/mahout/text/WholeFileRecordReader.java
@@ -5,9 +5,9 @@
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -19,7 +19,6 @@ package org.apache.mahout.text;
import java.io.IOException;
-import com.google.common.io.Closeables;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
@@ -55,7 +54,7 @@ public class WholeFileRecordReader extends RecordReader<IntWritable, BytesWritab
public WholeFileRecordReader(CombineFileSplit fileSplit, TaskAttemptContext taskAttemptContext, Integer idx)
throws IOException {
this.fileSplit = new FileSplit(fileSplit.getPath(idx), fileSplit.getOffset(idx),
- fileSplit.getLength(idx), fileSplit.getLocations());
+ fileSplit.getLength(idx), fileSplit.getLocations());
this.configuration = taskAttemptContext.getConfiguration();
this.index = new IntWritable(idx);
this.fileFilterClassName = this.configuration.get(FILE_FILTER_CLASS_OPTION[0]);
@@ -78,8 +77,9 @@ public class WholeFileRecordReader extends RecordReader<IntWritable, BytesWritab
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
- throws IOException, InterruptedException {
- if (!StringUtils.isBlank(fileFilterClassName) && !PrefixAdditionFilter.class.getName().equals(fileFilterClassName)) {
+ throws IOException, InterruptedException {
+ if (!StringUtils.isBlank(fileFilterClassName) &&
+ !PrefixAdditionFilter.class.getName().equals(fileFilterClassName)) {
try {
pathFilter = (PathFilter) Class.forName(fileFilterClassName).newInstance();
} catch (ClassNotFoundException | InstantiationException | IllegalAccessException e) {
@@ -106,15 +106,11 @@ public class WholeFileRecordReader extends RecordReader<IntWritable, BytesWritab
fileStatuses = fs.listStatus(file);
}
- FSDataInputStream in = null;
if (fileStatuses.length == 1) {
- try {
- in = fs.open(fileStatuses[0].getPath());
+ try (FSDataInputStream in = fs.open(fileStatuses[0].getPath())) {
IOUtils.readFully(in, contents, 0, contents.length);
value.setCapacity(contents.length);
value.set(contents, 0, contents.length);
- } finally {
- Closeables.close(in, false);
}
processed = true;
return true;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java b/integration/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java
index 1cde4cd..bed4640 100644
--- a/integration/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java
+++ b/integration/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java
@@ -19,10 +19,10 @@ package org.apache.mahout.text;
import java.io.File;
import java.io.IOException;
+import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
-import com.google.common.collect.Sets;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -168,7 +168,7 @@ public final class WikipediaToSequenceFile {
"org.apache.hadoop.io.serializer.JavaSerialization,"
+ "org.apache.hadoop.io.serializer.WritableSerialization");
- Set<String> categories = Sets.newHashSet();
+ Set<String> categories = new HashSet<>();
if (!catFile.isEmpty()) {
for (String line : new FileLineIterable(new File(catFile))) {
categories.add(line.trim().toLowerCase(Locale.ENGLISH));
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
index d9df97f..ad55ba7 100644
--- a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
+++ b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
@@ -18,6 +18,7 @@
package org.apache.mahout.text.wikipedia;
import java.io.Reader;
+
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorDriver.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorDriver.java b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorDriver.java
index 6632ad2..7113629 100644
--- a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorDriver.java
+++ b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorDriver.java
@@ -19,10 +19,10 @@ package org.apache.mahout.text.wikipedia;
import java.io.File;
import java.io.IOException;
+import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
-import com.google.common.collect.Sets;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -154,7 +154,7 @@ public final class WikipediaDatasetCreatorDriver {
// Dont ever forget this. People should keep track of how hadoop conf
// parameters can make or break a piece of code
- Set<String> categories = Sets.newHashSet();
+ Set<String> categories = new HashSet<>();
for (String line : new FileLineIterable(new File(catFile))) {
categories.add(line.trim().toLowerCase(Locale.ENGLISH));
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorMapper.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorMapper.java b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorMapper.java
index 54a1df3..50e5f37 100644
--- a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorMapper.java
+++ b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorMapper.java
@@ -17,14 +17,6 @@
package org.apache.mahout.text.wikipedia;
-import java.io.IOException;
-import java.io.StringReader;
-import java.util.List;
-import java.util.Locale;
-import java.util.Set;
-import java.util.regex.Pattern;
-
-import com.google.common.collect.Sets;
import com.google.common.io.Closeables;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.hadoop.conf.Configuration;
@@ -40,7 +32,14 @@ import org.apache.mahout.common.ClassUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.collect.Lists;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Set;
+import java.util.regex.Pattern;
/**
* Maps over Wikipedia xml format and output all document having the category listed in the input category
@@ -89,13 +88,13 @@ public class WikipediaDatasetCreatorMapper extends Mapper<LongWritable, Text, Te
Configuration conf = context.getConfiguration();
if (inputCategories == null) {
- Set<String> newCategories = Sets.newHashSet();
+ Set<String> newCategories = new HashSet<>();
DefaultStringifier<Set<String>> setStringifier =
- new DefaultStringifier<Set<String>>(conf, GenericsUtil.getClass(newCategories));
+ new DefaultStringifier<>(conf, GenericsUtil.getClass(newCategories));
String categoriesStr = conf.get("wikipedia.categories", setStringifier.toString(newCategories));
Set<String> inputCategoriesSet = setStringifier.fromString(categoriesStr);
- inputCategories = Lists.newArrayList(inputCategoriesSet);
- inputCategoryPatterns = Lists.newArrayListWithCapacity(inputCategories.size());
+ inputCategories = new ArrayList<>(inputCategoriesSet);
+ inputCategoryPatterns = new ArrayList<>(inputCategories.size());
for (String inputCategory : inputCategories) {
inputCategoryPatterns.add(Pattern.compile(".*\\b" + inputCategory + "\\b.*"));
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaMapper.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaMapper.java b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaMapper.java
index d880760..abd3a04 100644
--- a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaMapper.java
+++ b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaMapper.java
@@ -18,12 +18,12 @@
package org.apache.mahout.text.wikipedia;
import java.io.IOException;
+import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import com.google.common.collect.Sets;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.DefaultStringifier;
@@ -106,9 +106,9 @@ public class WikipediaMapper extends Mapper<LongWritable, Text, Text, Text> {
super.setup(context);
Configuration conf = context.getConfiguration();
- Set<String> newCategories = Sets.newHashSet();
+ Set<String> newCategories = new HashSet<>();
DefaultStringifier<Set<String>> setStringifier =
- new DefaultStringifier<Set<String>>(conf, GenericsUtil.getClass(newCategories));
+ new DefaultStringifier<>(conf, GenericsUtil.getClass(newCategories));
String categoriesStr = conf.get("wikipedia.categories");
inputCategories = setStringifier.fromString(categoriesStr);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaXmlSplitter.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaXmlSplitter.java b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaXmlSplitter.java
index c9a54e9..fc065fe 100644
--- a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaXmlSplitter.java
+++ b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaXmlSplitter.java
@@ -26,7 +26,6 @@ import java.net.URI;
import java.text.DecimalFormat;
import java.text.NumberFormat;
-import com.google.common.io.Closeables;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -219,12 +218,9 @@ public final class WikipediaXmlSplitter {
content.append("</mediawiki>");
fileNumber++;
String filename = outputDirPath + "/chunk-" + decimalFormatter.format(fileNumber) + ".xml";
- BufferedWriter chunkWriter =
- new BufferedWriter(new OutputStreamWriter(fs.create(new Path(filename)), "UTF-8"));
- try {
+ try (BufferedWriter chunkWriter =
+ new BufferedWriter(new OutputStreamWriter(fs.create(new Path(filename)), "UTF-8"))) {
chunkWriter.write(content.toString(), 0, content.length());
- } finally {
- Closeables.close(chunkWriter, false);
}
if (fileNumber >= numChunks) {
break;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/wikipedia/XmlInputFormat.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/wikipedia/XmlInputFormat.java b/integration/src/main/java/org/apache/mahout/text/wikipedia/XmlInputFormat.java
index 7f16f31..afd350f 100644
--- a/integration/src/main/java/org/apache/mahout/text/wikipedia/XmlInputFormat.java
+++ b/integration/src/main/java/org/apache/mahout/text/wikipedia/XmlInputFormat.java
@@ -17,10 +17,8 @@
package org.apache.mahout.text.wikipedia;
-import java.io.IOException;
-
-import com.google.common.base.Charsets;
import com.google.common.io.Closeables;
+import org.apache.commons.io.Charsets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
@@ -36,6 +34,8 @@ import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.io.IOException;
+
/**
* Reads records that are delimited by a specific begin/end tag.
*/
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsJob.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsJob.java b/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsJob.java
index 1814bd5..33d09a0 100644
--- a/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsJob.java
+++ b/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsJob.java
@@ -17,11 +17,9 @@
package org.apache.mahout.utils;
-
import java.io.IOException;
import com.google.common.base.Preconditions;
-import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -113,12 +111,8 @@ public class ConcatenateVectorsJob extends AbstractJob {
Preconditions.checkArgument(paths.length > 0, path.getName() + " is a file, should be a directory");
Path file = paths[0].getPath();
- SequenceFile.Reader reader = null;
- try {
- reader = new SequenceFile.Reader(fs, file, fs.getConf());
+ try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, fs.getConf())){
return reader.getKeyClass().asSubclass(Writable.class);
- } finally {
- Closeables.close(reader, true);
}
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java b/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java
index 8ab57be..f63de83 100644
--- a/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java
+++ b/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java
@@ -25,7 +25,7 @@ import java.io.PrintStream;
import java.util.List;
import java.util.Map;
-import com.google.common.base.Charsets;
+import org.apache.commons.io.Charsets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.ToolRunner;
[2/5] mahout git commit: MAHOUT-1652: Java 7 upgrade,
this closes apache/mahout#112
Posted by sm...@apache.org.
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/DFUtils.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/DFUtils.java b/mr/src/main/java/org/apache/mahout/classifier/df/DFUtils.java
index 137b174..86f99b6 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/DFUtils.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/DFUtils.java
@@ -5,9 +5,9 @@
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -22,10 +22,9 @@ import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.charset.Charset;
+import java.util.ArrayList;
import java.util.List;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
@@ -40,8 +39,9 @@ import org.apache.mahout.common.iterator.sequencefile.PathFilters;
*/
public final class DFUtils {
- private DFUtils() {}
-
+ private DFUtils() {
+ }
+
/**
* Writes an Node[] into a DataOutput
* @throws java.io.IOException
@@ -52,7 +52,7 @@ public final class DFUtils {
w.write(out);
}
}
-
+
/**
* Reads a Node[] from a DataInput
* @throws java.io.IOException
@@ -63,10 +63,10 @@ public final class DFUtils {
for (int index = 0; index < length; index++) {
nodes[index] = Node.read(in);
}
-
+
return nodes;
}
-
+
/**
* Writes a double[] into a DataOutput
* @throws java.io.IOException
@@ -77,7 +77,7 @@ public final class DFUtils {
out.writeDouble(value);
}
}
-
+
/**
* Reads a double[] from a DataInput
* @throws java.io.IOException
@@ -88,10 +88,10 @@ public final class DFUtils {
for (int index = 0; index < length; index++) {
array[index] = in.readDouble();
}
-
+
return array;
}
-
+
/**
* Writes an int[] into a DataOutput
* @throws java.io.IOException
@@ -102,7 +102,7 @@ public final class DFUtils {
out.writeInt(value);
}
}
-
+
/**
* Reads an int[] from a DataInput
* @throws java.io.IOException
@@ -113,16 +113,16 @@ public final class DFUtils {
for (int index = 0; index < length; index++) {
array[index] = in.readInt();
}
-
+
return array;
}
-
+
/**
* Return a list of all files in the output directory
* @throws IOException if no file is found
*/
public static Path[] listOutputFiles(FileSystem fs, Path outputPath) throws IOException {
- List<Path> outputFiles = Lists.newArrayList();
+ List<Path> outputFiles = new ArrayList<>();
for (FileStatus s : fs.listStatus(outputPath, PathFilters.logsCRCFilter())) {
if (!s.isDir() && !s.getPath().getName().startsWith("_")) {
outputFiles.add(s.getPath());
@@ -140,27 +140,24 @@ public final class DFUtils {
public static String elapsedTime(long milli) {
long seconds = milli / 1000;
milli %= 1000;
-
+
long minutes = seconds / 60;
seconds %= 60;
-
+
long hours = minutes / 60;
minutes %= 60;
-
+
return hours + "h " + minutes + "m " + seconds + "s " + milli;
}
public static void storeWritable(Configuration conf, Path path, Writable writable) throws IOException {
FileSystem fs = path.getFileSystem(conf);
- FSDataOutputStream out = fs.create(path);
- try {
+ try (FSDataOutputStream out = fs.create(path)) {
writable.write(out);
- } finally {
- Closeables.close(out, false);
}
}
-
+
/**
* Write a string to a path.
* @param conf From which the file system will be picked
@@ -169,13 +166,8 @@ public final class DFUtils {
* @throws IOException if things go poorly
*/
public static void storeString(Configuration conf, Path path, String string) throws IOException {
- DataOutputStream out = null;
- try {
- out = path.getFileSystem(conf).create(path);
+ try (DataOutputStream out = path.getFileSystem(conf).create(path)) {
out.write(string.getBytes(Charset.defaultCharset()));
- } finally {
- Closeables.close(out, false);
}
}
-
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/DecisionForest.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/DecisionForest.java b/mr/src/main/java/org/apache/mahout/classifier/df/DecisionForest.java
index 1b47ec7..bb4153e 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/DecisionForest.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/DecisionForest.java
@@ -18,8 +18,6 @@
package org.apache.mahout.classifier.df;
import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
@@ -34,6 +32,7 @@ import org.apache.mahout.classifier.df.node.Node;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.List;
import java.util.Random;
@@ -45,7 +44,7 @@ public class DecisionForest implements Writable {
private final List<Node> trees;
private DecisionForest() {
- trees = Lists.newArrayList();
+ trees = new ArrayList<>();
}
public DecisionForest(List<Node> trees) {
@@ -225,15 +224,12 @@ public class DecisionForest implements Writable {
DecisionForest forest = null;
for (Path path : files) {
- FSDataInputStream dataInput = new FSDataInputStream(fs.open(path));
- try {
+ try (FSDataInputStream dataInput = new FSDataInputStream(fs.open(path))) {
if (forest == null) {
forest = read(dataInput);
} else {
forest.readFields(dataInput);
}
- } finally {
- Closeables.close(dataInput, true);
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java b/mr/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java
index 895188b..8a7d945 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java
@@ -17,7 +17,6 @@
package org.apache.mahout.classifier.df.builder;
-import com.google.common.collect.Sets;
import org.apache.mahout.classifier.df.data.Data;
import org.apache.mahout.classifier.df.data.Dataset;
import org.apache.mahout.classifier.df.data.Instance;
@@ -34,6 +33,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Collection;
+import java.util.HashSet;
import java.util.Random;
/**
@@ -263,7 +263,7 @@ public class DecisionTreeBuilder implements TreeBuilder {
// tree is complemented
Collection<Double> subsetValues = null;
if (complemented) {
- subsetValues = Sets.newHashSet();
+ subsetValues = new HashSet<>();
for (double value : values) {
subsetValues.add(value);
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/data/Data.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/Data.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/Data.java
index c1bddd9..c68ce52 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/data/Data.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/Data.java
@@ -17,11 +17,11 @@
package org.apache.mahout.classifier.df.data;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
import org.apache.mahout.classifier.df.data.conditions.Condition;
+import java.util.ArrayList;
import java.util.Collection;
+import java.util.HashSet;
import java.util.List;
import java.util.Random;
@@ -38,12 +38,12 @@ public class Data implements Cloneable {
public Data(Dataset dataset) {
this.dataset = dataset;
- this.instances = Lists.newArrayList();
+ this.instances = new ArrayList<>();
}
public Data(Dataset dataset, List<Instance> instances) {
this.dataset = dataset;
- this.instances = Lists.newArrayList(instances);
+ this.instances = new ArrayList<>(instances);
}
/**
@@ -86,7 +86,7 @@ public class Data implements Cloneable {
* @return the subset from this data that matches the given condition
*/
public Data subset(Condition condition) {
- List<Instance> subset = Lists.newArrayList();
+ List<Instance> subset = new ArrayList<>();
for (Instance instance : instances) {
if (condition.isTrueFor(instance)) {
@@ -102,7 +102,7 @@ public class Data implements Cloneable {
*/
public Data bagging(Random rng) {
int datasize = size();
- List<Instance> bag = Lists.newArrayListWithCapacity(datasize);
+ List<Instance> bag = new ArrayList<>(datasize);
for (int i = 0; i < datasize; i++) {
bag.add(instances.get(rng.nextInt(datasize)));
@@ -121,7 +121,7 @@ public class Data implements Cloneable {
*/
public Data bagging(Random rng, boolean[] sampled) {
int datasize = size();
- List<Instance> bag = Lists.newArrayListWithCapacity(datasize);
+ List<Instance> bag = new ArrayList<>(datasize);
for (int i = 0; i < datasize; i++) {
int index = rng.nextInt(datasize);
@@ -136,7 +136,7 @@ public class Data implements Cloneable {
* Splits the data in two, returns one part, and this gets the rest of the data. <b>VERY SLOW!</b>
*/
public Data rsplit(Random rng, int subsize) {
- List<Instance> subset = Lists.newArrayListWithCapacity(subsize);
+ List<Instance> subset = new ArrayList<>(subsize);
for (int i = 0; i < subsize; i++) {
subset.add(instances.remove(rng.nextInt(instances.size())));
@@ -190,7 +190,7 @@ public class Data implements Cloneable {
* finds all distinct values of a given attribute
*/
public double[] values(int attr) {
- Collection<Double> result = Sets.newHashSet();
+ Collection<Double> result = new HashSet<>();
for (Instance instance : instances) {
result.add(instance.get(attr));
@@ -208,7 +208,7 @@ public class Data implements Cloneable {
@Override
public Data clone() {
- return new Data(dataset, Lists.newArrayList(instances));
+ return new Data(dataset, new ArrayList<>(instances));
}
@Override
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java
index 8eed6cf..c8d9dcd 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java
@@ -19,7 +19,6 @@ package org.apache.mahout.classifier.df.data;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -28,6 +27,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
import java.util.List;
import java.util.Scanner;
import java.util.Set;
@@ -80,7 +81,7 @@ public final class DataLoader {
if (attrs[attr].isCategorical() || (!regression && attrs[attr].isLabel())) {
// update values
if (values[attr] == null) {
- values[attr] = Sets.newHashSet();
+ values[attr] = new HashSet<>();
}
values[attr].add(token);
} else {
@@ -111,7 +112,7 @@ public final class DataLoader {
FSDataInputStream input = fs.open(fpath);
Scanner scanner = new Scanner(input, "UTF-8");
- List<Instance> instances = Lists.newArrayList();
+ List<Instance> instances = new ArrayList<>();
DataConverter converter = new DataConverter(dataset);
@@ -137,7 +138,7 @@ public final class DataLoader {
/** Loads the data from multiple paths specified by pathes */
public static Data loadData(Dataset dataset, FileSystem fs, Path[] pathes) throws IOException {
- List<Instance> instances = Lists.newArrayList();
+ List<Instance> instances = new ArrayList<>();
for (Path path : pathes) {
Data loadedData = loadData(dataset, fs, path);
@@ -150,7 +151,7 @@ public final class DataLoader {
/** Loads the data from a String array */
public static Data loadData(Dataset dataset, String[] data) {
- List<Instance> instances = Lists.newArrayList();
+ List<Instance> instances = new ArrayList<>();
DataConverter converter = new DataConverter(dataset);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/data/DataUtils.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/DataUtils.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/DataUtils.java
index 856d452..3eb126c 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/data/DataUtils.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/DataUtils.java
@@ -18,8 +18,8 @@
package org.apache.mahout.classifier.df.data;
import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
+import java.util.ArrayList;
import java.util.List;
import java.util.Random;
@@ -71,7 +71,7 @@ public final class DataUtils {
*/
public static int maxindex(Random rng, int[] values) {
int max = 0;
- List<Integer> maxindices = Lists.newArrayList();
+ List<Integer> maxindices = new ArrayList<>();
for (int index = 0; index < values.length; index++) {
if (values[index] > max) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java
index d2bec37..413389f 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java
@@ -18,8 +18,6 @@
package org.apache.mahout.classifier.df.data;
import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
import com.google.common.io.Closeables;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.hadoop.conf.Configuration;
@@ -32,6 +30,8 @@ import org.codehaus.jackson.type.TypeReference;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Arrays;
+import java.util.HashMap;
+import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
@@ -336,7 +336,7 @@ public class Dataset {
* @return some JSON
*/
public String toJSON() {
- List<Map<String, Object>> toWrite = Lists.newLinkedList();
+ List<Map<String, Object>> toWrite = new LinkedList<>();
// attributes does not include ignored columns and it does include the class label
int ignoredCount = 0;
for (int i = 0; i < attributes.length + ignored.length; i++) {
@@ -374,8 +374,8 @@ public class Dataset {
} catch (Exception ex) {
throw new RuntimeException(ex);
}
- List<Attribute> attributes = Lists.newLinkedList();
- List<Integer> ignored = Lists.newLinkedList();
+ List<Attribute> attributes = new LinkedList<>();
+ List<Integer> ignored = new LinkedList<>();
String[][] nominalValues = new String[fromJSON.size()][];
Dataset dataset = new Dataset();
for (int i = 0; i < fromJSON.size(); i++) {
@@ -412,7 +412,7 @@ public class Dataset {
* @return map of (AttributeTypes, Values)
*/
private Map<String, Object> getMap(Attribute type, String[] values, boolean isLabel) {
- Map<String, Object> attribute = Maps.newHashMap();
+ Map<String, Object> attribute = new HashMap<>();
attribute.put(TYPE, type.toString().toLowerCase(Locale.getDefault()));
attribute.put(VALUES, values);
attribute.put(LABEL, isLabel);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorUtils.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorUtils.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorUtils.java
index a2198b1..f2e0ce4 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorUtils.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorUtils.java
@@ -18,9 +18,9 @@
package org.apache.mahout.classifier.df.data;
import com.google.common.base.Splitter;
-import com.google.common.collect.Lists;
import org.apache.mahout.classifier.df.data.Dataset.Attribute;
+import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
@@ -40,7 +40,7 @@ public final class DescriptorUtils {
* if a bad token is encountered
*/
public static Attribute[] parseDescriptor(CharSequence descriptor) throws DescriptorException {
- List<Attribute> attributes = Lists.newArrayList();
+ List<Attribute> attributes = new ArrayList<>();
for (String token : SPACE.split(descriptor)) {
token = token.toUpperCase(Locale.ENGLISH);
if ("I".equals(token)) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java
index b8e5c2d..bdbaf2b 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java
@@ -17,11 +17,6 @@
package org.apache.mahout.classifier.df.mapreduce;
-import java.io.IOException;
-import java.util.List;
-import java.util.Random;
-
-import com.google.common.collect.Lists;
import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
@@ -51,6 +46,11 @@ import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
/**
* Mapreduce implementation that classifies the Input data using a previousely built decision forest
*/
@@ -144,7 +144,7 @@ public class Classifier {
Path[] outfiles = DFUtils.listOutputFiles(fs, mappersOutputPath);
// read all the output
- List<double[]> resList = Lists.newArrayList();
+ List<double[]> resList = new ArrayList<>();
for (Path path : outfiles) {
FSDataOutputStream ofile = null;
try {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemBuilder.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemBuilder.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemBuilder.java
index 573a1e0..4c33e73 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemBuilder.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemBuilder.java
@@ -17,8 +17,12 @@
package org.apache.mahout.classifier.df.mapreduce.inmem;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
@@ -36,10 +40,6 @@ import org.apache.mahout.classifier.df.node.Node;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-
/**
* MapReduce implementation where each mapper loads a full copy of the data in-memory. The forest trees are
* splitted across all the mappers
@@ -80,7 +80,7 @@ public class InMemBuilder extends Builder {
protected DecisionForest parseOutput(Job job) throws IOException {
Configuration conf = job.getConfiguration();
- Map<Integer,MapredOutput> output = Maps.newHashMap();
+ Map<Integer,MapredOutput> output = new HashMap<>();
Path outputPath = getOutputPath(conf);
FileSystem fs = outputPath.getFileSystem(conf);
@@ -101,7 +101,7 @@ public class InMemBuilder extends Builder {
* Process the output, extracting the trees
*/
private static DecisionForest processOutput(Map<Integer,MapredOutput> output) {
- List<Node> trees = Lists.newArrayList();
+ List<Node> trees = new ArrayList<>();
for (Map.Entry<Integer,MapredOutput> entry : output.entrySet()) {
MapredOutput value = entry.getValue();
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java
index a39218e..51e5a3e 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java
@@ -17,8 +17,15 @@
package org.apache.mahout.classifier.df.mapreduce.inmem;
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+import java.util.Random;
+
import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
@@ -33,13 +40,6 @@ import org.apache.mahout.common.RandomUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.List;
-import java.util.Locale;
-import java.util.Random;
-
/**
* Custom InputFormat that generates InputSplits given the desired number of trees.<br>
* each input split contains a subset of the trees.<br>
@@ -94,7 +94,7 @@ public class InMemInputFormat extends InputFormat<IntWritable,NullWritable> {
int id = 0;
- List<InputSplit> splits = Lists.newArrayListWithCapacity(numSplits);
+ List<InputSplit> splits = new ArrayList<>(numSplits);
for (int index = 0; index < numSplits - 1; index++) {
splits.add(new InMemInputSplit(id, splitSize, nextSeed()));
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java
index eaf0b15..648472c 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java
@@ -18,7 +18,6 @@
package org.apache.mahout.classifier.df.mapreduce.partial;
import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
@@ -35,6 +34,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.List;
import java.util.Random;
@@ -61,7 +61,7 @@ public class Step1Mapper extends MapredMapper<LongWritable,Text,TreeID,MapredOut
private int partition;
/** will contain all instances if this mapper's split */
- private final List<Instance> instances = Lists.newArrayList();
+ private final List<Instance> instances = new ArrayList<>();
public int getFirstTreeId() {
return firstTreeId;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/ref/SequentialBuilder.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/ref/SequentialBuilder.java b/mr/src/main/java/org/apache/mahout/classifier/df/ref/SequentialBuilder.java
index 292b591..d7f023b 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/ref/SequentialBuilder.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/ref/SequentialBuilder.java
@@ -17,7 +17,6 @@
package org.apache.mahout.classifier.df.ref;
-import com.google.common.collect.Lists;
import org.apache.mahout.classifier.df.Bagging;
import org.apache.mahout.classifier.df.DecisionForest;
import org.apache.mahout.classifier.df.builder.TreeBuilder;
@@ -26,6 +25,7 @@ import org.apache.mahout.classifier.df.node.Node;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.util.ArrayList;
import java.util.List;
import java.util.Random;
@@ -56,7 +56,7 @@ public class SequentialBuilder {
}
public DecisionForest build(int nbTrees) {
- List<Node> trees = Lists.newArrayList();
+ List<Node> trees = new ArrayList<>();
for (int treeId = 0; treeId < nbTrees; treeId++) {
trees.add(bagging.build(rng));
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/tools/Describe.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/tools/Describe.java b/mr/src/main/java/org/apache/mahout/classifier/df/tools/Describe.java
index 58814a8..226d3db 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/tools/Describe.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/tools/Describe.java
@@ -17,7 +17,11 @@
package org.apache.mahout.classifier.df.tools;
-import com.google.common.collect.Lists;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -38,10 +42,6 @@ import org.apache.mahout.common.CommandLineUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.IOException;
-import java.util.Collection;
-import java.util.List;
-
/**
* Generates a file descriptor for a given dataset
*/
@@ -138,7 +138,7 @@ public final class Describe {
}
private static List<String> convert(Collection<?> values) {
- List<String> list = Lists.newArrayListWithCapacity(values.size());
+ List<String> list = new ArrayList<>(values.size());
for (Object value : values) {
list.add(value.toString());
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/mlp/NeuralNetwork.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/mlp/NeuralNetwork.java b/mr/src/main/java/org/apache/mahout/classifier/mlp/NeuralNetwork.java
index 056bd48..f4e765c 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/mlp/NeuralNetwork.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/mlp/NeuralNetwork.java
@@ -19,10 +19,12 @@ package org.apache.mahout.classifier.mlp;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
+import com.google.common.base.Preconditions;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
@@ -41,10 +43,6 @@ import org.apache.mahout.math.function.DoubleFunction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
-
/**
* AbstractNeuralNetwork defines the general operations for a neural network
* based model. Typically, all derivative models such as Multilayer Perceptron
@@ -63,7 +61,7 @@ public abstract class NeuralNetwork {
/* The default momentum weight */
public static final double DEFAULT_MOMENTUM_WEIGHT = 0.1;
- public static enum TrainingMethod { GRADIENT_DESCENT }
+ public enum TrainingMethod { GRADIENT_DESCENT }
/* The name of the model */
protected String modelType;
@@ -113,11 +111,11 @@ public abstract class NeuralNetwork {
costFunctionName = "Minus_Squared";
modelType = getClass().getSimpleName();
- layerSizeList = Lists.newArrayList();
- layerSizeList = Lists.newArrayList();
- weightMatrixList = Lists.newArrayList();
- prevWeightUpdatesList = Lists.newArrayList();
- squashingFunctionList = Lists.newArrayList();
+ layerSizeList = new ArrayList<>();
+ layerSizeList = new ArrayList<>();
+ weightMatrixList = new ArrayList<>();
+ prevWeightUpdatesList = new ArrayList<>();
+ squashingFunctionList = new ArrayList<>();
}
/**
@@ -350,7 +348,7 @@ public abstract class NeuralNetwork {
* existing matrices.
*/
public void setWeightMatrices(Matrix[] matrices) {
- weightMatrixList = Lists.newArrayList();
+ weightMatrixList = new ArrayList<>();
Collections.addAll(weightMatrixList, matrices);
}
@@ -411,7 +409,7 @@ public abstract class NeuralNetwork {
* @return Cached output of each layer.
*/
protected List<Vector> getOutputInternal(Vector instance) {
- List<Vector> outputCache = Lists.newArrayList();
+ List<Vector> outputCache = new ArrayList<>();
// fill with instance
Vector intermediateOutput = instance;
outputCache.add(intermediateOutput);
@@ -592,14 +590,10 @@ public abstract class NeuralNetwork {
protected void readFromModel() throws IOException {
log.info("Load model from {}", modelPath);
Preconditions.checkArgument(modelPath != null, "Model path has not been set.");
- FSDataInputStream is = null;
- try {
- Path path = new Path(modelPath);
- FileSystem fs = path.getFileSystem(new Configuration());
- is = new FSDataInputStream(fs.open(path));
+ Path path = new Path(modelPath);
+ FileSystem fs = path.getFileSystem(new Configuration());
+ try (FSDataInputStream is = new FSDataInputStream(fs.open(path))) {
readFields(is);
- } finally {
- Closeables.close(is, true);
}
}
@@ -611,14 +605,10 @@ public abstract class NeuralNetwork {
public void writeModelToFile() throws IOException {
log.info("Write model to {}.", modelPath);
Preconditions.checkArgument(modelPath != null, "Model path has not been set.");
- FSDataOutputStream stream = null;
- try {
- Path path = new Path(modelPath);
- FileSystem fs = path.getFileSystem(new Configuration());
- stream = fs.create(path, true);
+ Path path = new Path(modelPath);
+ FileSystem fs = path.getFileSystem(new Configuration());
+ try (FSDataOutputStream stream = fs.create(path, true)) {
write(stream);
- } finally {
- Closeables.close(stream, false);
}
}
@@ -717,7 +707,7 @@ public abstract class NeuralNetwork {
// Read layer size list
int numLayers = input.readInt();
- layerSizeList = Lists.newArrayList();
+ layerSizeList = new ArrayList<>();
for (int i = 0; i < numLayers; i++) {
layerSizeList.add(input.readInt());
}
@@ -726,15 +716,15 @@ public abstract class NeuralNetwork {
// Read squash functions
int squashingFunctionSize = input.readInt();
- squashingFunctionList = Lists.newArrayList();
+ squashingFunctionList = new ArrayList<>();
for (int i = 0; i < squashingFunctionSize; i++) {
squashingFunctionList.add(WritableUtils.readString(input));
}
// Read weights and construct matrices of previous updates
int numOfMatrices = input.readInt();
- weightMatrixList = Lists.newArrayList();
- prevWeightUpdatesList = Lists.newArrayList();
+ weightMatrixList = new ArrayList<>();
+ prevWeightUpdatesList = new ArrayList<>();
for (int i = 0; i < numOfMatrices; i++) {
Matrix matrix = MatrixWritable.readMatrix(input);
weightMatrixList.add(matrix);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/mlp/RunMultilayerPerceptron.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/mlp/RunMultilayerPerceptron.java b/mr/src/main/java/org/apache/mahout/classifier/mlp/RunMultilayerPerceptron.java
index 6130530..270ea43 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/mlp/RunMultilayerPerceptron.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/mlp/RunMultilayerPerceptron.java
@@ -22,6 +22,7 @@ import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -41,9 +42,6 @@ import org.apache.mahout.math.Vector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
-
/** Run {@link MultilayerPerceptron} classification.
* @deprecated as of as of 0.10.0.
* */
@@ -61,11 +59,11 @@ public class RunMultilayerPerceptron {
int columnEnd;
boolean skipHeader;
}
-
+
public static void main(String[] args) throws Exception {
-
+
Parameters parameters = new Parameters();
-
+
if (parseArgs(args, parameters)) {
log.info("Load model from {}.", parameters.modelFilePathStr);
MultilayerPerceptron mlp = new MultilayerPerceptron(parameters.modelFilePathStr);
@@ -98,15 +96,10 @@ public class RunMultilayerPerceptron {
log.info("Read from column {} to column {}.", parameters.columnStart, parameters.columnEnd);
- BufferedWriter writer = null;
- BufferedReader reader = null;
- try {
- writer = new BufferedWriter(new OutputStreamWriter(outputFS.create(outputFilePath)));
- reader = new BufferedReader(new InputStreamReader(inputFS.open(inputFilePath)));
-
+ try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(outputFS.create(outputFilePath)));
+ BufferedReader reader = new BufferedReader(new InputStreamReader(inputFS.open(inputFilePath)))) {
String line;
-
if (parameters.skipHeader) {
reader.readLine();
}
@@ -125,9 +118,6 @@ public class RunMultilayerPerceptron {
}
mlp.close();
log.info("Labeling finished.");
- } finally {
- Closeables.close(reader, true);
- Closeables.close(writer, true);
}
}
}
@@ -154,7 +144,7 @@ public class RunMultilayerPerceptron {
.withDescription("type of input file, currently support 'csv'")
.create();
- List<Integer> columnRangeDefault = Lists.newArrayList();
+ List<Integer> columnRangeDefault = new ArrayList<>();
columnRangeDefault.add(0);
columnRangeDefault.add(Integer.MAX_VALUE);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/mlp/TrainMultilayerPerceptron.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/mlp/TrainMultilayerPerceptron.java b/mr/src/main/java/org/apache/mahout/classifier/mlp/TrainMultilayerPerceptron.java
index a194c4c..d634aa5 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/mlp/TrainMultilayerPerceptron.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/mlp/TrainMultilayerPerceptron.java
@@ -19,9 +19,12 @@ package org.apache.mahout.classifier.mlp;
import java.io.BufferedReader;
import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import com.google.common.base.Preconditions;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -38,11 +41,6 @@ import org.apache.mahout.math.Vector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.io.Closeables;
-
/** Train a {@link MultilayerPerceptron}.
* @deprecated as of as of 0.10.0.
* */
@@ -50,7 +48,7 @@ import com.google.common.io.Closeables;
public final class TrainMultilayerPerceptron {
private static final Logger log = LoggerFactory.getLogger(TrainMultilayerPerceptron.class);
-
+
/** The parameters used by MLP. */
static class Parameters {
double learningRate;
@@ -59,31 +57,17 @@ public final class TrainMultilayerPerceptron {
String inputFilePath;
boolean skipHeader;
- Map<String, Integer> labelsIndex = Maps.newHashMap();
+ Map<String, Integer> labelsIndex = new HashMap<>();
String modelFilePath;
boolean updateModel;
- List<Integer> layerSizeList = Lists.newArrayList();
+ List<Integer> layerSizeList = new ArrayList<>();
String squashingFunctionName;
}
- /*
- private double learningRate;
- private double momemtumWeight;
- private double regularizationWeight;
-
- private String inputFilePath;
- private boolean skipHeader;
- private Map<String, Integer> labelsIndex = Maps.newHashMap();
-
- private String modelFilePath;
- private boolean updateModel;
- private List<Integer> layerSizeList = Lists.newArrayList();
- private String squashingFunctionName;*/
-
public static void main(String[] args) throws Exception {
Parameters parameters = new Parameters();
-
+
if (parseArgs(args, parameters)) {
log.info("Validate model...");
// check whether the model already exists
@@ -109,31 +93,28 @@ public final class TrainMultilayerPerceptron {
}
mlp.setCostFunction("Minus_Squared");
mlp.setLearningRate(parameters.learningRate)
- .setMomentumWeight(parameters.momemtumWeight)
- .setRegularizationWeight(parameters.regularizationWeight);
+ .setMomentumWeight(parameters.momemtumWeight)
+ .setRegularizationWeight(parameters.regularizationWeight);
}
mlp.setModelPath(parameters.modelFilePath);
}
// set the parameters
mlp.setLearningRate(parameters.learningRate)
- .setMomentumWeight(parameters.momemtumWeight)
- .setRegularizationWeight(parameters.regularizationWeight);
+ .setMomentumWeight(parameters.momemtumWeight)
+ .setRegularizationWeight(parameters.regularizationWeight);
// train by the training data
Path trainingDataPath = new Path(parameters.inputFilePath);
FileSystem dataFs = trainingDataPath.getFileSystem(new Configuration());
Preconditions.checkArgument(dataFs.exists(trainingDataPath), "Training dataset %s cannot be found!",
- parameters.inputFilePath);
+ parameters.inputFilePath);
log.info("Read data and train model...");
- BufferedReader reader = null;
- try {
- reader = new BufferedReader(new InputStreamReader(dataFs.open(trainingDataPath)));
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(dataFs.open(trainingDataPath)))) {
String line;
-
// read training data line by line
if (parameters.skipHeader) {
reader.readLine();
@@ -163,15 +144,13 @@ public final class TrainMultilayerPerceptron {
log.info("Write trained model to {}", parameters.modelFilePath);
mlp.writeModelToFile();
mlp.close();
- } finally {
- Closeables.close(reader, true);
}
}
}
/**
* Parse the input arguments.
- *
+ *
* @param args The input arguments
* @param parameters The parameters parsed.
* @return Whether the input arguments are valid.
@@ -196,7 +175,7 @@ public final class TrainMultilayerPerceptron {
.withRequired(true)
.withChildren(skipHeaderGroup)
.withArgument(argumentBuilder.withName("path").withMinimum(1).withMaximum(1)
- .create()).withDescription("the file path of training dataset")
+ .create()).withDescription("the file path of training dataset")
.create();
Option labelsOption = optionBuilder
@@ -295,9 +274,9 @@ public final class TrainMultilayerPerceptron {
parameters.squashingFunctionName = getString(commandLine, squashingFunctionOption);
System.out.printf("Input: %s, Model: %s, Update: %s, Layer size: %s, Squashing function: %s, Learning rate: %f," +
- " Momemtum weight: %f, Regularization Weight: %f\n", parameters.inputFilePath, parameters.modelFilePath,
- parameters.updateModel, Arrays.toString(parameters.layerSizeList.toArray()),
- parameters.squashingFunctionName, parameters.learningRate, parameters.momemtumWeight,
+ " Momemtum weight: %f, Regularization Weight: %f\n", parameters.inputFilePath, parameters.modelFilePath,
+ parameters.updateModel, Arrays.toString(parameters.layerSizeList.toArray()),
+ parameters.squashingFunctionName, parameters.learningRate, parameters.momemtumWeight,
parameters.regularizationWeight);
return true;
@@ -321,7 +300,7 @@ public final class TrainMultilayerPerceptron {
static List<Integer> getIntegerList(CommandLine commandLine, Option option) {
List<String> list = commandLine.getValues(option);
- List<Integer> valList = Lists.newArrayList();
+ List<Integer> valList = new ArrayList<>();
for (String str : list) {
valList.add(Integer.parseInt(str));
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java b/mr/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java
index 1e5171c..c09dd83 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java
@@ -20,9 +20,11 @@ package org.apache.mahout.classifier.naivebayes;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Map;
import java.util.regex.Pattern;
+import com.google.common.base.Preconditions;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -44,11 +46,6 @@ import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.math.map.OpenObjectIntHashMap;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
-import com.google.common.io.Closeables;
-
public final class BayesUtils {
private static final Pattern SLASH = Pattern.compile("/");
@@ -104,14 +101,11 @@ public final class BayesUtils {
public static int writeLabelIndex(Configuration conf, Iterable<String> labels, Path indexPath)
throws IOException {
FileSystem fs = FileSystem.get(indexPath.toUri(), conf);
- SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, indexPath, Text.class, IntWritable.class);
int i = 0;
- try {
+ try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, indexPath, Text.class, IntWritable.class)) {
for (String label : labels) {
writer.append(new Text(label), new IntWritable(i++));
}
- } finally {
- Closeables.close(writer, false);
}
return i;
}
@@ -119,10 +113,9 @@ public final class BayesUtils {
public static int writeLabelIndex(Configuration conf, Path indexPath,
Iterable<Pair<Text,IntWritable>> labels) throws IOException {
FileSystem fs = FileSystem.get(indexPath.toUri(), conf);
- SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, indexPath, Text.class, IntWritable.class);
- Collection<String> seen = Sets.newHashSet();
+ Collection<String> seen = new HashSet<>();
int i = 0;
- try {
+ try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, indexPath, Text.class, IntWritable.class)){
for (Object label : labels) {
String theLabel = SLASH.split(((Pair<?, ?>) label).getFirst().toString())[1];
if (!seen.contains(theLabel)) {
@@ -130,8 +123,6 @@ public final class BayesUtils {
seen.add(theLabel);
}
}
- } finally {
- Closeables.close(writer, false);
}
return i;
}
@@ -154,7 +145,7 @@ public final class BayesUtils {
}
public static Map<String,Vector> readScoresFromCache(Configuration conf) throws IOException {
- Map<String,Vector> sumVectors = Maps.newHashMap();
+ Map<String,Vector> sumVectors = new HashMap<>();
for (Pair<Text,VectorWritable> entry
: new SequenceFileDirIterable<Text,VectorWritable>(HadoopUtil.getSingleCachedFile(conf),
PathType.LIST, PathFilters.partFilter(), conf)) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java b/mr/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java
index f180e8b..9f85aab 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java
@@ -31,7 +31,6 @@ import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import com.google.common.base.Preconditions;
-import com.google.common.io.Closeables;
/** NaiveBayesModel holds the weight matrix, the feature and label sums and the weight normalizer vectors.*/
public class NaiveBayesModel {
@@ -102,15 +101,14 @@ public class NaiveBayesModel {
public static NaiveBayesModel materialize(Path output, Configuration conf) throws IOException {
FileSystem fs = output.getFileSystem(conf);
- Vector weightsPerLabel = null;
+ Vector weightsPerLabel;
Vector perLabelThetaNormalizer = null;
- Vector weightsPerFeature = null;
+ Vector weightsPerFeature;
Matrix weightsPerLabelAndFeature;
float alphaI;
boolean isComplementary;
- FSDataInputStream in = fs.open(new Path(output, "naiveBayesModel.bin"));
- try {
+ try (FSDataInputStream in = fs.open(new Path(output, "naiveBayesModel.bin"))) {
alphaI = in.readFloat();
isComplementary = in.readBoolean();
weightsPerFeature = VectorWritable.readVector(in);
@@ -122,9 +120,8 @@ public class NaiveBayesModel {
for (int label = 0; label < weightsPerLabelAndFeature.numRows(); label++) {
weightsPerLabelAndFeature.assignRow(label, VectorWritable.readVector(in));
}
- } finally {
- Closeables.close(in, true);
}
+
NaiveBayesModel model = new NaiveBayesModel(weightsPerLabelAndFeature, weightsPerFeature, weightsPerLabel,
perLabelThetaNormalizer, alphaI, isComplementary);
model.validate();
@@ -133,8 +130,7 @@ public class NaiveBayesModel {
public void serialize(Path output, Configuration conf) throws IOException {
FileSystem fs = output.getFileSystem(conf);
- FSDataOutputStream out = fs.create(new Path(output, "naiveBayesModel.bin"));
- try {
+ try (FSDataOutputStream out = fs.create(new Path(output, "naiveBayesModel.bin"))) {
out.writeFloat(alphaI);
out.writeBoolean(isComplementary);
VectorWritable.writeVector(out, weightsPerFeature);
@@ -145,8 +141,6 @@ public class NaiveBayesModel {
for (int row = 0; row < weightsPerLabelAndFeature.numRows(); row++) {
VectorWritable.writeVector(out, weightsPerLabelAndFeature.viewRow(row));
}
- } finally {
- Closeables.close(out, false);
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java b/mr/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java
index 8fd422f..d9eedcf 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java
@@ -17,13 +17,12 @@
package org.apache.mahout.classifier.naivebayes.test;
-import com.google.common.base.Preconditions;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
-import com.google.common.io.Closeables;
+import com.google.common.base.Preconditions;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -127,10 +126,10 @@ public class TestNaiveBayesDriver extends AbstractJob {
} else {
classifier = new StandardNaiveBayesClassifier(model);
}
- SequenceFile.Writer writer = SequenceFile.createWriter(fs, getConf(), new Path(getOutputPath(), "part-r-00000"),
- Text.class, VectorWritable.class);
- try {
+ try (SequenceFile.Writer writer =
+ SequenceFile.createWriter(fs, getConf(), new Path(getOutputPath(), "part-r-00000"),
+ Text.class, VectorWritable.class)) {
SequenceFileDirIterable<Text, VectorWritable> dirIterable =
new SequenceFileDirIterable<>(getInputPath(), PathType.LIST, PathFilters.partFilter(), getConf());
// loop through the part-r-* files in getInputPath() and get classification scores for all entries
@@ -138,8 +137,6 @@ public class TestNaiveBayesDriver extends AbstractJob {
writer.append(new Text(SLASH.split(pair.getFirst().toString())[1]),
new VectorWritable(classifier.classifyFull(pair.getSecond().get())));
}
- } finally {
- Closeables.close(writer, false);
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java b/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java
index 942a101..6d4e2b0 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java
@@ -21,12 +21,11 @@ import java.io.DataOutputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Scanner;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -95,7 +94,7 @@ public final class BaumWelchTrainer {
//constructing random-generated HMM
HmmModel model = new HmmModel(nrOfHiddenStates, nrOfObservedStates, new Date().getTime());
- List<Integer> observations = Lists.newArrayList();
+ List<Integer> observations = new ArrayList<>();
//reading observations
try (Scanner scanner = new Scanner(new FileInputStream(input), "UTF-8")) {
@@ -114,11 +113,8 @@ public final class BaumWelchTrainer {
observationsArray, epsilon, maxIterations, true);
//serializing trained model
- DataOutputStream stream = new DataOutputStream(new FileOutputStream(output));
- try {
+ try (DataOutputStream stream = new DataOutputStream(new FileOutputStream(output))){
LossyHmmSerializer.serialize(trainedModel, stream);
- } finally {
- Closeables.close(stream, false);
}
//printing tranied model
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmUtils.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmUtils.java b/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmUtils.java
index 521be09..e710816 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmUtils.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmUtils.java
@@ -17,11 +17,12 @@
package org.apache.mahout.classifier.sequencelearning.hmm;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
-import com.google.common.collect.Lists;
+import com.google.common.base.Preconditions;
import org.apache.mahout.math.DenseMatrix;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.Matrix;
@@ -29,8 +30,6 @@ import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.SparseMatrix;
import org.apache.mahout.math.Vector;
-import com.google.common.base.Preconditions;
-
/**
* A collection of utilities for handling HMMModel objects.
*/
@@ -257,7 +256,7 @@ public final class HmmUtils {
int[] sequence,
boolean observed,
String defaultValue) {
- List<String> decoded = Lists.newArrayListWithCapacity(sequence.length);
+ List<String> decoded = new ArrayList<>(sequence.length);
for (int position : sequence) {
String nextState;
if (observed) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/RandomSequenceGenerator.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/RandomSequenceGenerator.java b/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/RandomSequenceGenerator.java
index cd2ced1..02baef1 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/RandomSequenceGenerator.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/RandomSequenceGenerator.java
@@ -25,8 +25,6 @@ import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
-import com.google.common.base.Charsets;
-import com.google.common.io.Closeables;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -35,6 +33,7 @@ import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
+import org.apache.commons.io.Charsets;
import org.apache.mahout.common.CommandLineUtil;
/**
@@ -80,26 +79,21 @@ public final class RandomSequenceGenerator {
int length = Integer.parseInt((String) commandLine.getValue(lengthOption));
//reading serialized HMM
- DataInputStream modelStream = new DataInputStream(new FileInputStream(modelPath));
HmmModel model;
- try {
+ try (DataInputStream modelStream = new DataInputStream(new FileInputStream(modelPath))){
model = LossyHmmSerializer.deserialize(modelStream);
- } finally {
- Closeables.close(modelStream, true);
}
//generating observations
int[] observations = HmmEvaluator.predict(model, length, System.currentTimeMillis());
//writing output
- PrintWriter writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(output), Charsets.UTF_8), true);
- try {
+ try (PrintWriter writer =
+ new PrintWriter(new OutputStreamWriter(new FileOutputStream(output), Charsets.UTF_8), true)){
for (int observation : observations) {
writer.print(observation);
writer.print(' ');
}
- } finally {
- Closeables.close(writer, false);
}
} catch (OptionException e) {
CommandLineUtil.printHelp(optionGroup);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java b/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java
index fb64385..317237d 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java
@@ -23,12 +23,10 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
+import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
-import com.google.common.base.Charsets;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -37,6 +35,7 @@ import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
+import org.apache.commons.io.Charsets;
import org.apache.mahout.common.CommandLineUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
@@ -82,16 +81,14 @@ public final class ViterbiEvaluator {
boolean computeLikelihood = commandLine.hasOption(likelihoodOption);
//reading serialized HMM
- DataInputStream modelStream = new DataInputStream(new FileInputStream(modelPath));
+ ;
HmmModel model;
- try {
+ try (DataInputStream modelStream = new DataInputStream(new FileInputStream(modelPath))) {
model = LossyHmmSerializer.deserialize(modelStream);
- } finally {
- Closeables.close(modelStream, true);
}
//reading observations
- List<Integer> observations = Lists.newArrayList();
+ List<Integer> observations = new ArrayList<>();
try (Scanner scanner = new Scanner(new FileInputStream(input), "UTF-8")) {
while (scanner.hasNextInt()) {
observations.add(scanner.nextInt());
@@ -107,14 +104,12 @@ public final class ViterbiEvaluator {
int[] hiddenStates = HmmEvaluator.decode(model, observationsArray, true);
//writing output
- PrintWriter writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(output), Charsets.UTF_8), true);
- try {
+ try (PrintWriter writer =
+ new PrintWriter(new OutputStreamWriter(new FileOutputStream(output), Charsets.UTF_8), true)) {
for (int hiddenState : hiddenStates) {
writer.print(hiddenState);
writer.print(' ');
}
- } finally {
- Closeables.close(writer, false);
}
if (computeLikelihood) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java b/mr/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java
index d00b021..24e5798 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java
@@ -17,7 +17,6 @@
package org.apache.mahout.classifier.sgd;
-import com.google.common.collect.Lists;
import org.apache.hadoop.io.Writable;
import org.apache.mahout.classifier.OnlineLearner;
import org.apache.mahout.ep.EvolutionaryProcess;
@@ -33,6 +32,7 @@ import org.slf4j.LoggerFactory;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.concurrent.ExecutionException;
@@ -79,7 +79,7 @@ public class AdaptiveLogisticRegression implements OnlineLearner, Writable {
private int currentStep = 1000;
private int bufferSize = 1000;
- private List<TrainingExample> buffer = Lists.newArrayList();
+ private List<TrainingExample> buffer = new ArrayList<>();
private EvolutionaryProcess<Wrapper, CrossFoldLearner> ep;
private State<Wrapper, CrossFoldLearner> best;
private int threadCount = DEFAULT_THREAD_COUNT;
@@ -118,7 +118,7 @@ public class AdaptiveLogisticRegression implements OnlineLearner, Writable {
this.numFeatures = numFeatures;
this.threadCount = threadCount;
this.poolSize = poolSize;
- seed = new State<Wrapper, CrossFoldLearner>(new double[2], 10);
+ seed = new State<>(new double[2], 10);
Wrapper w = new Wrapper(numCategories, numFeatures, prior);
seed.setPayload(w);
@@ -284,7 +284,7 @@ public class AdaptiveLogisticRegression implements OnlineLearner, Writable {
}
private void setupOptimizer(int poolSize) {
- ep = new EvolutionaryProcess<Wrapper, CrossFoldLearner>(threadCount, poolSize, seed);
+ ep = new EvolutionaryProcess<>(threadCount, poolSize, seed);
}
/**
@@ -561,22 +561,22 @@ public class AdaptiveLogisticRegression implements OnlineLearner, Writable {
bufferSize = in.readInt();
int n = in.readInt();
- buffer = Lists.newArrayList();
+ buffer = new ArrayList<>();
for (int i = 0; i < n; i++) {
TrainingExample example = new TrainingExample();
example.readFields(in);
buffer.add(example);
}
- ep = new EvolutionaryProcess<Wrapper, CrossFoldLearner>();
+ ep = new EvolutionaryProcess<>();
ep.readFields(in);
- best = new State<Wrapper, CrossFoldLearner>();
+ best = new State<>();
best.readFields(in);
threadCount = in.readInt();
poolSize = in.readInt();
- seed = new State<Wrapper, CrossFoldLearner>();
+ seed = new State<>();
seed.readFields(in);
numFeatures = in.readInt();
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sgd/CrossFoldLearner.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/sgd/CrossFoldLearner.java b/mr/src/main/java/org/apache/mahout/classifier/sgd/CrossFoldLearner.java
index 36bcae0..f56814b 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/sgd/CrossFoldLearner.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/sgd/CrossFoldLearner.java
@@ -17,7 +17,6 @@
package org.apache.mahout.classifier.sgd;
-import com.google.common.collect.Lists;
import org.apache.hadoop.io.Writable;
import org.apache.mahout.classifier.AbstractVectorClassifier;
import org.apache.mahout.classifier.OnlineLearner;
@@ -31,6 +30,7 @@ import org.apache.mahout.math.stats.OnlineAuc;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.List;
/**
@@ -47,7 +47,7 @@ public class CrossFoldLearner extends AbstractVectorClassifier implements Online
private static final double MIN_SCORE = 1.0e-50;
private OnlineAuc auc = new GlobalOnlineAuc();
private double logLikelihood;
- private final List<OnlineLogisticRegression> models = Lists.newArrayList();
+ private final List<OnlineLogisticRegression> models = new ArrayList<>();
// lambda, learningRate, perTermOffset, perTermExponent
private double[] parameters = new double[4];
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java b/mr/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java
index b21860f..dbf3198 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java
@@ -22,7 +22,6 @@ import com.google.common.base.Preconditions;
import com.google.common.collect.Collections2;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
import org.apache.commons.csv.CSVUtils;
import org.apache.mahout.math.Vector;
@@ -36,11 +35,14 @@ import org.apache.mahout.vectorizer.encoders.TextValueEncoder;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.TreeMap;
/**
* Converts CSV data lines to vectors.
@@ -81,7 +83,7 @@ public class CsvRecordFactory implements RecordFactory {
.put("t", TextValueEncoder.class)
.build();
- private final Map<String, Set<Integer>> traceDictionary = Maps.newTreeMap();
+ private final Map<String, Set<Integer>> traceDictionary = new TreeMap<>();
private int target;
private final Dictionary targetDictionary;
@@ -113,7 +115,7 @@ public class CsvRecordFactory implements RecordFactory {
return Arrays.asList(CSVUtils.parseLine(line));
}
catch (IOException e) {
- List<String> list = Lists.newArrayList();
+ List<String> list = new ArrayList<>();
list.add(line);
return list;
}
@@ -186,7 +188,7 @@ public class CsvRecordFactory implements RecordFactory {
@Override
public void firstLine(String line) {
// read variable names, build map of name -> column
- final Map<String, Integer> vars = Maps.newHashMap();
+ final Map<String, Integer> vars = new HashMap<>();
variableNames = parseCsvLine(line);
int column = 0;
for (String var : variableNames) {
@@ -202,7 +204,7 @@ public class CsvRecordFactory implements RecordFactory {
}
// create list of predictor column numbers
- predictors = Lists.newArrayList(Collections2.transform(typeMap.keySet(), new Function<String, Integer>() {
+ predictors = new ArrayList<>(Collections2.transform(typeMap.keySet(), new Function<String, Integer>() {
@Override
public Integer apply(String from) {
Integer r = vars.get(from);
@@ -217,7 +219,7 @@ public class CsvRecordFactory implements RecordFactory {
Collections.sort(predictors);
// and map from column number to type encoder for each column that is a predictor
- predictorEncoders = Maps.newHashMap();
+ predictorEncoders = new HashMap<>();
for (Integer predictor : predictors) {
String name;
Class<? extends FeatureVectorEncoder> c;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sgd/GradientMachine.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/sgd/GradientMachine.java b/mr/src/main/java/org/apache/mahout/classifier/sgd/GradientMachine.java
index d158f4d..90ef7a8 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/sgd/GradientMachine.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/sgd/GradientMachine.java
@@ -17,7 +17,6 @@
package org.apache.mahout.classifier.sgd;
-import com.google.common.collect.Sets;
import org.apache.hadoop.io.Writable;
import org.apache.mahout.classifier.AbstractVectorClassifier;
import org.apache.mahout.classifier.OnlineLearner;
@@ -31,6 +30,7 @@ import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Collection;
+import java.util.HashSet;
import java.util.Random;
/**
@@ -387,7 +387,7 @@ public class GradientMachine extends AbstractVectorClassifier implements OnlineL
public void train(long trackingKey, String groupKey, int actual, Vector instance) {
Vector hiddenActivation = inputToHidden(instance);
hiddenToOutput(hiddenActivation);
- Collection<Integer> goodLabels = Sets.newHashSet();
+ Collection<Integer> goodLabels = new HashSet<>();
goodLabels.add(actual);
updateRanking(hiddenActivation, goodLabels, 2, rnd);
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelDissector.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelDissector.java b/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelDissector.java
index ebb0614..bcd2ebc 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelDissector.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelDissector.java
@@ -17,14 +17,14 @@
package org.apache.mahout.classifier.sgd;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
import com.google.common.collect.Ordering;
import org.apache.mahout.classifier.AbstractVectorClassifier;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.math.Vector;
+import java.util.ArrayList;
import java.util.Collections;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
@@ -55,7 +55,7 @@ public class ModelDissector {
private final Map<String,Vector> weightMap;
public ModelDissector() {
- weightMap = Maps.newHashMap();
+ weightMap = new HashMap<>();
}
/**
@@ -105,14 +105,14 @@ public class ModelDissector {
* @return A list of the top variables.
*/
public List<Weight> summary(int n) {
- Queue<Weight> pq = new PriorityQueue<Weight>();
+ Queue<Weight> pq = new PriorityQueue<>();
for (Map.Entry<String, Vector> entry : weightMap.entrySet()) {
pq.add(new Weight(entry.getKey(), entry.getValue()));
while (pq.size() > n) {
pq.poll();
}
}
- List<Weight> r = Lists.newArrayList(pq);
+ List<Weight> r = new ArrayList<>(pq);
Collections.sort(r, Ordering.natural().reverse());
return r;
}
@@ -170,14 +170,14 @@ public class ModelDissector {
public Weight(String feature, Vector weights, int n) {
this.feature = feature;
// pick out the weight with the largest abs value, but don't forget the sign
- Queue<Category> biggest = new PriorityQueue<Category>(n + 1, Ordering.natural());
+ Queue<Category> biggest = new PriorityQueue<>(n + 1, Ordering.natural());
for (Vector.Element element : weights.all()) {
biggest.add(new Category(element.index(), element.get()));
while (biggest.size() > n) {
biggest.poll();
}
}
- categories = Lists.newArrayList(biggest);
+ categories = new ArrayList<>(biggest);
Collections.sort(categories, Ordering.natural().reverse());
value = categories.get(0).weight;
maxIndex = categories.get(0).index;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelSerializer.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelSerializer.java b/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelSerializer.java
index f0150e9..f89b245 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelSerializer.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelSerializer.java
@@ -37,29 +37,20 @@ public final class ModelSerializer {
}
public static void writeBinary(String path, CrossFoldLearner model) throws IOException {
- DataOutputStream out = new DataOutputStream(new FileOutputStream(path));
- try {
+ try (DataOutputStream out = new DataOutputStream(new FileOutputStream(path))) {
PolymorphicWritable.write(out, model);
- } finally {
- Closeables.close(out, false);
}
}
public static void writeBinary(String path, OnlineLogisticRegression model) throws IOException {
- DataOutputStream out = new DataOutputStream(new FileOutputStream(path));
- try {
+ try (DataOutputStream out = new DataOutputStream(new FileOutputStream(path))) {
PolymorphicWritable.write(out, model);
- } finally {
- Closeables.close(out, false);
}
}
public static void writeBinary(String path, AdaptiveLogisticRegression model) throws IOException {
- DataOutputStream out = new DataOutputStream(new FileOutputStream(path));
- try {
+ try (DataOutputStream out = new DataOutputStream(new FileOutputStream(path))){
PolymorphicWritable.write(out, model);
- } finally {
- Closeables.close(out, false);
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sgd/RankingGradient.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/sgd/RankingGradient.java b/mr/src/main/java/org/apache/mahout/classifier/sgd/RankingGradient.java
index b52cb8c..a04fc8b 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/sgd/RankingGradient.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/sgd/RankingGradient.java
@@ -17,12 +17,12 @@
package org.apache.mahout.classifier.sgd;
-import com.google.common.collect.Lists;
import org.apache.mahout.classifier.AbstractVectorClassifier;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.function.Functions;
import java.util.ArrayDeque;
+import java.util.ArrayList;
import java.util.Deque;
import java.util.List;
@@ -40,7 +40,7 @@ public class RankingGradient implements Gradient {
private int window = 10;
- private final List<Deque<Vector>> history = Lists.newArrayList();
+ private final List<Deque<Vector>> history = new ArrayList<>();
public RankingGradient(int window) {
this.window = window;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/AbstractCluster.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/AbstractCluster.java b/mr/src/main/java/org/apache/mahout/clustering/AbstractCluster.java
index cc05beb..86fa011 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/AbstractCluster.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/AbstractCluster.java
@@ -22,12 +22,11 @@ import java.io.DataOutput;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedList;
import java.util.List;
import java.util.Map;
-import java.util.HashMap;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configuration;
import org.apache.mahout.common.parameters.Parameter;
import org.apache.mahout.math.RandomAccessSparseVector;
@@ -359,7 +358,7 @@ public abstract class AbstractCluster implements Cluster {
// we assume sequential access in the output
Vector provider = v.isSequentialAccess() ? v : new SequentialAccessSparseVector(v);
- List<Object> terms = Lists.newLinkedList();
+ List<Object> terms = new LinkedList<>();
String term = "";
for (Element elem : provider.nonZeroes()) {
@@ -370,7 +369,7 @@ public abstract class AbstractCluster implements Cluster {
term = String.valueOf(elem.index());
}
- Map<String, Object> term_entry = Maps.newHashMap();
+ Map<String, Object> term_entry = new HashMap<>();
double roundedWeight = (double) Math.round(elem.get() * 1000) / 1000;
if (hasBindings || isSparse) {
term_entry.put(term, roundedWeight);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/ClusteringUtils.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/ClusteringUtils.java b/mr/src/main/java/org/apache/mahout/clustering/ClusteringUtils.java
index 421ffcf..ad0f8ec 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/ClusteringUtils.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/ClusteringUtils.java
@@ -17,6 +17,7 @@
package org.apache.mahout.clustering;
+import java.util.ArrayList;
import java.util.List;
import com.google.common.base.Preconditions;
@@ -52,7 +53,7 @@ public final class ClusteringUtils {
DistanceMeasure distanceMeasure) {
UpdatableSearcher searcher = new ProjectionSearch(distanceMeasure, 3, 1);
searcher.addAll(centroids);
- List<OnlineSummarizer> summarizers = Lists.newArrayList();
+ List<OnlineSummarizer> summarizers = new ArrayList<>();
if (searcher.size() == 0) {
return summarizers;
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java b/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java
index 6e2c3cf..384e294 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java
@@ -18,12 +18,12 @@
package org.apache.mahout.clustering.classify;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -159,7 +159,7 @@ public final class ClusterClassificationDriver extends AbstractJob {
* @throws IOException
*/
private static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException {
- List<Cluster> clusterModels = Lists.newArrayList();
+ List<Cluster> clusterModels = new ArrayList<>();
Path finalClustersPath = finalClustersPath(conf, clusterOutputPath);
Iterator<?> it = new SequenceFileDirValueIterator<Writable>(finalClustersPath, PathType.LIST,
PathFilters.partFilter(), null, false, conf);
@@ -225,7 +225,7 @@ public final class ClusterClassificationDriver extends AbstractJob {
private static void classifyAndWrite(List<Cluster> clusterModels, Double clusterClassificationThreshold,
boolean emitMostLikely, SequenceFile.Writer writer, VectorWritable vw, Vector pdfPerCluster) throws IOException {
- Map<Text, Text> props = Maps.newHashMap();
+ Map<Text, Text> props = new HashMap<>();
if (emitMostLikely) {
int maxValueIndex = pdfPerCluster.maxValueIndex();
WeightedPropertyVectorWritable weightedPropertyVectorWritable =
@@ -238,7 +238,7 @@ public final class ClusterClassificationDriver extends AbstractJob {
private static void writeAllAboveThreshold(List<Cluster> clusterModels, Double clusterClassificationThreshold,
SequenceFile.Writer writer, VectorWritable vw, Vector pdfPerCluster) throws IOException {
- Map<Text, Text> props = Maps.newHashMap();
+ Map<Text, Text> props = new HashMap<>();
for (Element pdf : pdfPerCluster.nonZeroes()) {
if (pdf.get() >= clusterClassificationThreshold) {
WeightedPropertyVectorWritable wvw = new WeightedPropertyVectorWritable(pdf.get(), vw.get(), props);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationMapper.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationMapper.java b/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationMapper.java
index 9edbd8e..dfddab0 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationMapper.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationMapper.java
@@ -18,12 +18,12 @@
package org.apache.mahout.clustering.classify;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -67,7 +67,7 @@ public class ClusterClassificationMapper extends
threshold = conf.getFloat(ClusterClassificationConfigKeys.OUTLIER_REMOVAL_THRESHOLD, 0.0f);
emitMostLikely = conf.getBoolean(ClusterClassificationConfigKeys.EMIT_MOST_LIKELY, false);
- clusterModels = Lists.newArrayList();
+ clusterModels = new ArrayList<>();
if (clustersIn != null && !clustersIn.isEmpty()) {
Path clustersInPath = new Path(clustersIn);
@@ -128,13 +128,13 @@ public class ClusterClassificationMapper extends
DistanceMeasure distanceMeasure = distanceMeasureCluster.getMeasure();
double distance = distanceMeasure.distance(cluster.getCenter(), vw.get());
- Map<Text, Text> props = Maps.newHashMap();
+ Map<Text, Text> props = new HashMap<>();
props.put(new Text("distance"), new Text(Double.toString(distance)));
context.write(clusterId, new WeightedPropertyVectorWritable(weight, vw.get(), props));
}
public static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException {
- List<Cluster> clusters = Lists.newArrayList();
+ List<Cluster> clusters = new ArrayList<>();
FileSystem fileSystem = clusterOutputPath.getFileSystem(conf);
FileStatus[] clusterFiles = fileSystem.listStatus(clusterOutputPath, PathFilters.finalPartFilter());
Iterator<?> it = new SequenceFileDirValueIterator<Writable>(
[4/5] mahout git commit: MAHOUT-1652: Java 7 upgrade,
this closes apache/mahout#112
Posted by sm...@apache.org.
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java b/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
index 2dcc8b0..e01868a 100644
--- a/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
+++ b/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
@@ -17,14 +17,15 @@
package org.apache.mahout.utils;
-import com.google.common.base.Charsets;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
-import com.google.common.io.Files;
import java.io.File;
import java.io.OutputStreamWriter;
import java.io.Writer;
+import java.util.ArrayList;
import java.util.List;
+
+import com.google.common.io.Closeables;
+import com.google.common.io.Files;
+import org.apache.commons.io.Charsets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
@@ -136,7 +137,7 @@ public final class SequenceFileDumper extends AbstractJob {
}
}
if (facets != null) {
- List<String> keyList = Lists.newArrayListWithCapacity(facets.size());
+ List<String> keyList = new ArrayList<>(facets.size());
IntArrayList valueList = new IntArrayList(facets.size());
facets.pairsSortedByKey(keyList, valueList);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/utils/SplitInput.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/utils/SplitInput.java b/integration/src/main/java/org/apache/mahout/utils/SplitInput.java
index 562e7df..6178f80 100644
--- a/integration/src/main/java/org/apache/mahout/utils/SplitInput.java
+++ b/integration/src/main/java/org/apache/mahout/utils/SplitInput.java
@@ -17,10 +17,17 @@
package org.apache.mahout.utils;
-import com.google.common.base.Charsets;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.nio.charset.Charset;
+import java.util.BitSet;
+
import com.google.common.base.Preconditions;
-import com.google.common.io.Closeables;
import org.apache.commons.cli2.OptionException;
+import org.apache.commons.io.Charsets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -40,14 +47,6 @@ import org.apache.mahout.math.jet.random.sampling.RandomSampler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.OutputStreamWriter;
-import java.io.Writer;
-import java.nio.charset.Charset;
-import java.util.BitSet;
-
/**
* A utility for splitting files in the input format used by the Bayes
* classifiers or anything else that has one item per line or SequenceFiles (key/value)
@@ -379,12 +378,9 @@ public class SplitInput extends AbstractJob {
int trainCount = 0;
int testCount = 0;
if (!useSequence) {
- BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(inputFile), charset));
- Writer trainingWriter = new OutputStreamWriter(fs.create(trainingOutputFile), charset);
- Writer testWriter = new OutputStreamWriter(fs.create(testOutputFile), charset);
-
-
- try {
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(inputFile), charset));
+ Writer trainingWriter = new OutputStreamWriter(fs.create(trainingOutputFile), charset);
+ Writer testWriter = new OutputStreamWriter(fs.create(testOutputFile), charset)){
String line;
int pos = 0;
@@ -412,19 +408,14 @@ public class SplitInput extends AbstractJob {
writer.write('\n');
}
- } finally {
- Closeables.close(reader, true);
- Closeables.close(trainingWriter, false);
- Closeables.close(testWriter, false);
}
} else {
- SequenceFileIterator<Writable, Writable> iterator =
- new SequenceFileIterator<>(inputFile, false, fs.getConf());
- SequenceFile.Writer trainingWriter = SequenceFile.createWriter(fs, fs.getConf(), trainingOutputFile,
- iterator.getKeyClass(), iterator.getValueClass());
- SequenceFile.Writer testWriter = SequenceFile.createWriter(fs, fs.getConf(), testOutputFile,
- iterator.getKeyClass(), iterator.getValueClass());
- try {
+ try (SequenceFileIterator<Writable, Writable> iterator =
+ new SequenceFileIterator<>(inputFile, false, fs.getConf());
+ SequenceFile.Writer trainingWriter = SequenceFile.createWriter(fs, fs.getConf(), trainingOutputFile,
+ iterator.getKeyClass(), iterator.getValueClass());
+ SequenceFile.Writer testWriter = SequenceFile.createWriter(fs, fs.getConf(), testOutputFile,
+ iterator.getKeyClass(), iterator.getValueClass())) {
int pos = 0;
while (iterator.hasNext()) {
@@ -450,10 +441,6 @@ public class SplitInput extends AbstractJob {
writer.append(pair.getFirst(), pair.getSecond());
}
- } finally {
- Closeables.close(iterator, true);
- Closeables.close(trainingWriter, false);
- Closeables.close(testWriter, false);
}
}
log.info("file: {}, input: {} train: {}, test: {} starting at {}",
@@ -668,15 +655,11 @@ public class SplitInput extends AbstractJob {
*/
public static int countLines(FileSystem fs, Path inputFile, Charset charset) throws IOException {
int lineCount = 0;
- BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(inputFile), charset));
- try {
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(inputFile), charset))){
while (reader.readLine() != null) {
lineCount++;
}
- } finally {
- Closeables.close(reader, true);
}
-
return lineCount;
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/utils/SplitInputJob.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/utils/SplitInputJob.java b/integration/src/main/java/org/apache/mahout/utils/SplitInputJob.java
index 825f7a5..4a1ff86 100644
--- a/integration/src/main/java/org/apache/mahout/utils/SplitInputJob.java
+++ b/integration/src/main/java/org/apache/mahout/utils/SplitInputJob.java
@@ -63,6 +63,7 @@ public final class SplitInputJob {
* training sets respectively
*
* @param initialConf
+ * Initial configuration
* @param inputPath
* path to input data SequenceFile
* @param outputPath
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java b/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
index 1856888..75b5ded 100644
--- a/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
+++ b/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
@@ -21,11 +21,15 @@ import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
+import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
+import java.util.TreeMap;
-import com.google.common.collect.Maps;
+import com.google.common.io.Closeables;
+import com.google.common.io.Files;
+import org.apache.commons.io.Charsets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -49,11 +53,6 @@ import org.apache.mahout.utils.vectors.VectorHelper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.base.Charsets;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
-import com.google.common.io.Files;
-
public final class ClusterDumper extends AbstractJob {
public static final String SAMPLE_POINTS = "samplePoints";
@@ -304,9 +303,10 @@ public final class ClusterDumper extends AbstractJob {
this.maxPointsPerCluster = maxPointsPerCluster;
}
- public static Map<Integer, List<WeightedPropertyVectorWritable>> readPoints(Path pointsPathDir, long maxPointsPerCluster,
- Configuration conf) {
- Map<Integer, List<WeightedPropertyVectorWritable>> result = Maps.newTreeMap();
+ public static Map<Integer, List<WeightedPropertyVectorWritable>> readPoints(Path pointsPathDir,
+ long maxPointsPerCluster,
+ Configuration conf) {
+ Map<Integer, List<WeightedPropertyVectorWritable>> result = new TreeMap<>();
for (Pair<IntWritable, WeightedPropertyVectorWritable> record
: new SequenceFileDirIterable<IntWritable, WeightedPropertyVectorWritable>(pointsPathDir, PathType.LIST,
PathFilters.logsCRCFilter(), conf)) {
@@ -316,7 +316,7 @@ public final class ClusterDumper extends AbstractJob {
int keyValue = record.getFirst().get();
List<WeightedPropertyVectorWritable> pointList = result.get(keyValue);
if (pointList == null) {
- pointList = Lists.newArrayList();
+ pointList = new ArrayList<>();
result.put(keyValue, pointList);
}
if (pointList.size() < maxPointsPerCluster) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilter.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilter.java b/integration/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilter.java
index f51e768..964c8cc 100644
--- a/integration/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilter.java
+++ b/integration/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilter.java
@@ -25,7 +25,7 @@ import java.nio.CharBuffer;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CodingErrorAction;
-import com.google.common.base.Charsets;
+import org.apache.commons.io.Charsets;
import org.apache.hadoop.util.bloom.Filter;
import org.apache.hadoop.util.bloom.Key;
import org.apache.lucene.analysis.TokenFilter;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java b/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java
index a7f0e67..36b166a 100644
--- a/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java
+++ b/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java
@@ -17,7 +17,9 @@
package org.apache.mahout.utils.regex;
-import com.google.common.io.Closeables;
+import java.io.IOException;
+import java.io.StringReader;
+
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
@@ -27,9 +29,6 @@ import org.apache.mahout.common.lucene.TokenStreamIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.IOException;
-import java.io.StringReader;
-
public class AnalyzerTransformer implements RegexTransformer {
private Analyzer analyzer;
@@ -53,9 +52,7 @@ public class AnalyzerTransformer implements RegexTransformer {
@Override
public String transformMatch(String match) {
StringBuilder result = new StringBuilder();
- TokenStream ts = null;
- try {
- ts = analyzer.tokenStream(fieldName, new StringReader(match));
+ try (TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(match))) {
ts.addAttribute(CharTermAttribute.class);
ts.reset();
TokenStreamIterator iter = new TokenStreamIterator(ts);
@@ -65,12 +62,6 @@ public class AnalyzerTransformer implements RegexTransformer {
ts.end();
} catch (IOException e) {
throw new IllegalStateException(e);
- } finally {
- try {
- Closeables.close(ts, true);
- } catch (IOException e) {
- log.error(e.getMessage(), e);
- }
}
return result.toString();
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/utils/regex/RegexMapper.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/utils/regex/RegexMapper.java b/integration/src/main/java/org/apache/mahout/utils/regex/RegexMapper.java
index a744928..04cacaa 100644
--- a/integration/src/main/java/org/apache/mahout/utils/regex/RegexMapper.java
+++ b/integration/src/main/java/org/apache/mahout/utils/regex/RegexMapper.java
@@ -17,7 +17,11 @@
package org.apache.mahout.utils.regex;
-import com.google.common.collect.Lists;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Pattern;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
@@ -25,10 +29,6 @@ import org.apache.hadoop.mapreduce.Mapper;
import org.apache.lucene.analysis.Analyzer;
import org.apache.mahout.common.ClassUtils;
-import java.io.IOException;
-import java.util.List;
-import java.util.regex.Pattern;
-
public class RegexMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
public static final String REGEX = "regex";
@@ -45,7 +45,7 @@ public class RegexMapper extends Mapper<LongWritable, Text, LongWritable, Text>
@Override
protected void setup(Context context) throws IOException, InterruptedException {
- groupsToKeep = Lists.newArrayList();
+ groupsToKeep = new ArrayList<>();
Configuration config = context.getConfiguration();
String regexStr = config.get(REGEX);
regex = Pattern.compile(regexStr);
@@ -72,7 +72,7 @@ public class RegexMapper extends Mapper<LongWritable, Text, LongWritable, Text>
@Override
protected void map(LongWritable key, Text text, Context context) throws IOException, InterruptedException {
String result = RegexUtils.extract(text.toString(), regex, groupsToKeep, " ", transformer);
- if (result != null && !result.isEmpty()) {
+ if (!result.isEmpty()) {
String format = formatter.format(result);
context.write(key, new Text(format));
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/utils/vectors/RowIdJob.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/utils/vectors/RowIdJob.java b/integration/src/main/java/org/apache/mahout/utils/vectors/RowIdJob.java
index 0304306..13d61b8 100644
--- a/integration/src/main/java/org/apache/mahout/utils/vectors/RowIdJob.java
+++ b/integration/src/main/java/org/apache/mahout/utils/vectors/RowIdJob.java
@@ -5,9 +5,9 @@
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -20,7 +20,6 @@ package org.apache.mahout.utils.vectors;
import java.util.List;
import java.util.Map;
-import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -58,7 +57,7 @@ public class RowIdJob extends AbstractJob {
addInputOption();
addOutputOption();
- Map<String,List<String>> parsedArgs = parseArguments(args);
+ Map<String, List<String>> parsedArgs = parseArguments(args);
if (parsedArgs == null) {
return -1;
}
@@ -69,23 +68,17 @@ public class RowIdJob extends AbstractJob {
Path outputPath = getOutputPath();
Path indexPath = new Path(outputPath, "docIndex");
Path matrixPath = new Path(outputPath, "matrix");
- SequenceFile.Writer indexWriter = SequenceFile.createWriter(fs,
- conf,
- indexPath,
- IntWritable.class,
- Text.class);
- SequenceFile.Writer matrixWriter = SequenceFile.createWriter(fs,
- conf,
- matrixPath,
- IntWritable.class,
- VectorWritable.class);
- try {
+
+ try (SequenceFile.Writer indexWriter = SequenceFile.createWriter(fs, conf, indexPath,
+ IntWritable.class, Text.class);
+ SequenceFile.Writer matrixWriter = SequenceFile.createWriter(fs, conf, matrixPath, IntWritable.class,
+ VectorWritable.class)) {
IntWritable docId = new IntWritable();
int i = 0;
int numCols = 0;
- for (Pair<Text,VectorWritable> record
- : new SequenceFileDirIterable<Text,VectorWritable>(getInputPath(), PathType.LIST, PathFilters.logsCRCFilter(),
- null, true, conf)) {
+ for (Pair<Text, VectorWritable> record
+ : new SequenceFileDirIterable<Text, VectorWritable>(getInputPath(), PathType.LIST, PathFilters.logsCRCFilter(),
+ null, true, conf)) {
VectorWritable value = record.getSecond();
docId.set(i);
indexWriter.append(docId, record.getFirst());
@@ -96,9 +89,6 @@ public class RowIdJob extends AbstractJob {
log.info("Wrote out matrix with {} rows and {} columns to {}", i, numCols, matrixPath);
return 0;
- } finally {
- Closeables.close(indexWriter, false);
- Closeables.close(matrixWriter, false);
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java b/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
index 9214434..93ad0d5 100644
--- a/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
+++ b/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
@@ -21,13 +21,13 @@ import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
+import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
-import com.google.common.base.Charsets;
-import com.google.common.collect.Sets;
import com.google.common.io.Closeables;
import com.google.common.io.Files;
+import org.apache.commons.io.Charsets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -132,7 +132,7 @@ public final class VectorDumper extends AbstractJob {
Set<String> filters;
if (hasOption("filter")) {
- filters = Sets.newHashSet(getOptions("filter"));
+ filters = new HashSet<>(getOptions("filter"));
} else {
filters = null;
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java b/integration/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java
index 29b02aa..66c3fb6 100644
--- a/integration/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java
+++ b/integration/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java
@@ -19,8 +19,6 @@ package org.apache.mahout.utils.vectors;
import com.google.common.base.Function;
import com.google.common.collect.Collections2;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
@@ -39,11 +37,12 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
+import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
-import java.util.Comparator;
import java.util.regex.Pattern;
/** Static utility methods related to vectors. */
@@ -82,7 +81,7 @@ public final class VectorHelper {
public static List<Pair<Integer, Double>> topEntries(Vector vector, int maxEntries) {
// Get the size of nonZero elements in the input vector
- int sizeOfNonZeroElementsInVector = Iterables.size(vector.nonZeroes());
+ int sizeOfNonZeroElementsInVector = vector.getNumNonZeroElements();
// If the sizeOfNonZeroElementsInVector < maxEntries then set maxEntries = sizeOfNonZeroElementsInVector
// otherwise the call to queue.pop() returns a Pair(null, null) and the subsequent call
@@ -95,7 +94,7 @@ public final class VectorHelper {
for (Element e : vector.nonZeroes()) {
queue.insertWithOverflow(Pair.of(e.index(), e.get()));
}
- List<Pair<Integer, Double>> entries = Lists.newArrayList();
+ List<Pair<Integer, Double>> entries = new ArrayList<>();
Pair<Integer, Double> pair;
while ((pair = queue.pop()) != null) {
if (pair.getFirst() > -1) {
@@ -112,7 +111,7 @@ public final class VectorHelper {
}
public static List<Pair<Integer, Double>> firstEntries(Vector vector, int maxEntries) {
- List<Pair<Integer, Double>> entries = Lists.newArrayList();
+ List<Pair<Integer, Double>> entries = new ArrayList<>();
Iterator<Vector.Element> it = vector.nonZeroes().iterator();
int i = 0;
while (it.hasNext() && i++ < maxEntries) {
@@ -125,7 +124,7 @@ public final class VectorHelper {
public static List<Pair<String, Double>> toWeightedTerms(Collection<Pair<Integer, Double>> entries,
final String[] dictionary) {
if (dictionary != null) {
- return Lists.newArrayList(Collections2.transform(entries,
+ return new ArrayList<>(Collections2.transform(entries,
new Function<Pair<Integer, Double>, Pair<String, Double>>() {
@Override
public Pair<String, Double> apply(Pair<Integer, Double> p) {
@@ -133,7 +132,7 @@ public final class VectorHelper {
}
}));
} else {
- return Lists.newArrayList(Collections2.transform(entries,
+ return new ArrayList<>(Collections2.transform(entries,
new Function<Pair<Integer, Double>, Pair<String, Double>>() {
@Override
public Pair<String, Double> apply(Pair<Integer, Double> p) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFIterator.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFIterator.java b/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFIterator.java
index bf5b58b..f2632a4 100644
--- a/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFIterator.java
+++ b/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFIterator.java
@@ -19,12 +19,12 @@ package org.apache.mahout.utils.vectors.arff;
import java.io.BufferedReader;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import java.util.List;
import com.google.common.collect.AbstractIterator;
-import com.google.common.collect.Lists;
import com.google.common.io.Closeables;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.RandomAccessSparseVector;
@@ -103,7 +103,7 @@ final class ARFFIterator extends AbstractIterator<Vector> {
*/
public static String[] splitCSV(String line) {
StringBuilder sb = new StringBuilder(128);
- List<String> tokens = Lists.newArrayList();
+ List<String> tokens = new ArrayList<>();
char escapeChar = '\0';
for (int i = 0; i < line.length(); i++) {
char c = line.charAt(i);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFModel.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFModel.java b/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFModel.java
index c005005..fc86997 100644
--- a/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFModel.java
+++ b/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFModel.java
@@ -20,7 +20,6 @@ package org.apache.mahout.utils.vectors.arff;
import java.text.DateFormat;
import java.util.Map;
-
/**
* An interface for representing an ARFFModel. Implementations can decide on the best approach
* for storing the model, as some approaches will be fine for smaller files, while larger
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java b/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
index 72b840f..180a1e1 100644
--- a/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
+++ b/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
@@ -17,10 +17,6 @@
package org.apache.mahout.utils.vectors.arff;
-import com.google.common.base.Charsets;
-import com.google.common.io.Files;
-import org.apache.mahout.math.Vector;
-
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
@@ -32,6 +28,10 @@ import java.text.SimpleDateFormat;
import java.util.Iterator;
import java.util.Locale;
+import com.google.common.io.Files;
+import org.apache.commons.io.Charsets;
+import org.apache.mahout.math.Vector;
+
/**
* Read in ARFF (http://www.cs.waikato.ac.nz/~ml/weka/arff.html) and create {@link Vector}s
* <p/>
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java b/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java
index 63a9f0d..ccecbb1 100644
--- a/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java
+++ b/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java
@@ -5,9 +5,9 @@
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -21,17 +21,16 @@ import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.Writer;
+import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
+import java.util.HashMap;
+import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
-import com.google.common.base.Charsets;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.io.Closeables;
import com.google.common.io.Files;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
@@ -41,6 +40,7 @@ import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
+import org.apache.commons.io.Charsets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -62,42 +62,43 @@ public final class Driver {
/** used for JSON serialization/deserialization */
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
- private Driver() { }
-
+ private Driver() {
+ }
+
public static void main(String[] args) throws IOException {
DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
ArgumentBuilder abuilder = new ArgumentBuilder();
GroupBuilder gbuilder = new GroupBuilder();
-
+
Option inputOpt = obuilder
.withLongName("input")
.withRequired(true)
.withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
.withDescription(
- "The file or directory containing the ARFF files. If it is a directory, all .arff files will be converted")
+ "The file or directory containing the ARFF files. If it is a directory, all .arff files will be converted")
.withShortName("d").create();
-
+
Option outputOpt = obuilder.withLongName("output").withRequired(true).withArgument(
- abuilder.withName("output").withMinimum(1).withMaximum(1).create()).withDescription(
- "The output directory. Files will have the same name as the input, but with the extension .mvc")
+ abuilder.withName("output").withMinimum(1).withMaximum(1).create()).withDescription(
+ "The output directory. Files will have the same name as the input, but with the extension .mvc")
.withShortName("o").create();
-
+
Option maxOpt = obuilder.withLongName("max").withRequired(false).withArgument(
- abuilder.withName("max").withMinimum(1).withMaximum(1).create()).withDescription(
- "The maximum number of vectors to output. If not specified, then it will loop over all docs")
+ abuilder.withName("max").withMinimum(1).withMaximum(1).create()).withDescription(
+ "The maximum number of vectors to output. If not specified, then it will loop over all docs")
.withShortName("m").create();
-
+
Option dictOutOpt = obuilder.withLongName("dictOut").withRequired(true).withArgument(
- abuilder.withName("dictOut").withMinimum(1).withMaximum(1).create()).withDescription(
- "The file to output the label bindings").withShortName("t").create();
-
+ abuilder.withName("dictOut").withMinimum(1).withMaximum(1).create()).withDescription(
+ "The file to output the label bindings").withShortName("t").create();
+
Option jsonDictonaryOpt = obuilder.withLongName("json-dictonary").withRequired(false)
- .withDescription("Write dictonary in JSON format").withShortName("j").create();
-
+ .withDescription("Write dictonary in JSON format").withShortName("j").create();
+
Option delimiterOpt = obuilder.withLongName("delimiter").withRequired(false).withArgument(
- abuilder.withName("delimiter").withMinimum(1).withMaximum(1).create()).withDescription(
- "The delimiter for outputing the dictionary").withShortName("l").create();
-
+ abuilder.withName("delimiter").withMinimum(1).withMaximum(1).create()).withDescription(
+ "The delimiter for outputing the dictionary").withShortName("l").create();
+
Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
.create();
Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(maxOpt)
@@ -108,9 +109,9 @@ public final class Driver {
Parser parser = new Parser();
parser.setGroup(group);
CommandLine cmdLine = parser.parse(args);
-
+
if (cmdLine.hasOption(helpOpt)) {
-
+
CommandLineUtil.printHelp(group);
return;
}
@@ -137,7 +138,7 @@ public final class Driver {
return name.endsWith(".arff");
}
});
-
+
for (File file : files) {
writeFile(outDir, file, maxDocs, model, dictOut, delimiter, jsonDictonary);
}
@@ -145,31 +146,28 @@ public final class Driver {
writeFile(outDir, input, maxDocs, model, dictOut, delimiter, jsonDictonary);
}
}
-
+
} catch (OptionException e) {
log.error("Exception", e);
CommandLineUtil.printHelp(group);
}
}
-
+
protected static void writeLabelBindings(File dictOut, ARFFModel arffModel, String delimiter, boolean jsonDictonary)
- throws IOException {
- Writer writer = Files.newWriterSupplier(dictOut, Charsets.UTF_8, true).getOutput();
- try {
+ throws IOException {
+ try (Writer writer = Files.newWriterSupplier(dictOut, Charsets.UTF_8, true).getOutput()) {
if (jsonDictonary) {
writeLabelBindingsJSON(writer, arffModel);
} else {
writeLabelBindings(writer, arffModel, delimiter);
}
- } finally {
- Closeables.close(writer, false);
}
}
- protected static void writeLabelBindingsJSON(Writer writer, ARFFModel arffModel) throws IOException {
+ protected static void writeLabelBindingsJSON(Writer writer, ARFFModel arffModel) throws IOException {
// Turn the map of labels into a list order by order of appearance
- List<Entry<String, Integer>> attributes = Lists.newArrayList();
+ List<Entry<String, Integer>> attributes = new ArrayList<>();
attributes.addAll(arffModel.getLabelBindings().entrySet());
Collections.sort(attributes, new Comparator<Map.Entry<String, Integer>>() {
@Override
@@ -177,13 +175,13 @@ public final class Driver {
return t.getValue().compareTo(t1.getValue());
}
});
-
+
// write a map for each object
- List<Map<String, Object>> jsonObjects = Lists.newLinkedList();
- for (int i = 0; i < attributes.size(); i++) {
-
+ List<Map<String, Object>> jsonObjects = new LinkedList<>();
+ for (int i = 0; i < attributes.size(); i++) {
+
Entry<String, Integer> modelRepresentation = attributes.get(i);
- Map<String, Object> jsonRepresentation = Maps.newHashMap();
+ Map<String, Object> jsonRepresentation = new HashMap<>();
jsonObjects.add(jsonRepresentation);
// the last one is the class label
jsonRepresentation.put("label", i < (attributes.size() - 1) ? String.valueOf(false) : String.valueOf(true));
@@ -232,37 +230,34 @@ public final class Driver {
}
}
}
-
+
protected static void writeFile(String outDir,
- File file,
- long maxDocs,
- ARFFModel arffModel,
- File dictOut,
- String delimiter,
- boolean jsonDictonary) throws IOException {
+ File file,
+ long maxDocs,
+ ARFFModel arffModel,
+ File dictOut,
+ String delimiter,
+ boolean jsonDictonary) throws IOException {
log.info("Converting File: {}", file);
ARFFModel model = new MapBackedARFFModel(arffModel.getWords(), arffModel.getWordCount() + 1, arffModel
.getNominalMap());
Iterable<Vector> iteratable = new ARFFVectorIterable(file, model);
String outFile = outDir + '/' + file.getName() + ".mvc";
-
- VectorWriter vectorWriter = getSeqFileWriter(outFile);
- try {
+
+ try (VectorWriter vectorWriter = getSeqFileWriter(outFile)) {
long numDocs = vectorWriter.write(iteratable, maxDocs);
writeLabelBindings(dictOut, model, delimiter, jsonDictonary);
log.info("Wrote: {} vectors", numDocs);
- } finally {
- Closeables.close(vectorWriter, false);
}
}
-
+
private static VectorWriter getSeqFileWriter(String outFile) throws IOException {
Path path = new Path(outFile);
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
SequenceFile.Writer seqWriter = SequenceFile.createWriter(fs, conf, path, LongWritable.class,
- VectorWritable.class);
+ VectorWritable.class);
return new SequenceFileVectorWriter(seqWriter);
}
-
+
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java b/integration/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java
index a272053..e911b1a 100644
--- a/integration/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java
+++ b/integration/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java
@@ -17,8 +17,6 @@
package org.apache.mahout.utils.vectors.arff;
-import com.google.common.collect.Maps;
-
import java.text.DateFormat;
import java.text.NumberFormat;
import java.text.ParseException;
@@ -56,10 +54,10 @@ public class MapBackedARFFModel implements ARFFModel {
public MapBackedARFFModel(Map<String,Long> words, long wordCount, Map<String,Map<String,Integer>> nominalMap) {
this.words = words;
this.wordCount = wordCount;
- labelBindings = Maps.newHashMap();
- idxLabel = Maps.newHashMap();
- typeMap = Maps.newHashMap();
- dateMap = Maps.newHashMap();
+ labelBindings = new HashMap<>();
+ idxLabel = new HashMap<>();
+ typeMap = new HashMap<>();
+ dateMap = new HashMap<>();
this.nominalMap = nominalMap;
}
@@ -230,7 +228,7 @@ public class MapBackedARFFModel implements ARFFModel {
public void addNominal(String label, String nominal, int idx) {
Map<String,Integer> noms = nominalMap.get(label);
if (noms == null) {
- noms = Maps.newHashMap();
+ noms = new HashMap<>();
nominalMap.put(label, noms);
}
noms.put(nominal, idx);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfo.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfo.java b/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfo.java
index ffe7baa..718704a 100644
--- a/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfo.java
+++ b/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfo.java
@@ -17,7 +17,11 @@
package org.apache.mahout.utils.vectors.lucene;
-import com.google.common.collect.Maps;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Terms;
@@ -26,10 +30,6 @@ import org.apache.lucene.util.BytesRef;
import org.apache.mahout.utils.vectors.TermEntry;
import org.apache.mahout.utils.vectors.TermInfo;
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.Map;
-
/**
* Caches TermEntries from a single field. Materializes all values in the TermEnum to memory (much like FieldCache)
@@ -47,7 +47,7 @@ public class CachedTermInfo implements TermInfo {
int numDocs = reader.numDocs();
double percent = numDocs * maxDfPercent / 100.0;
//Should we use a linked hash map so that we know terms are in order?
- termEntries = Maps.newLinkedHashMap();
+ termEntries = new LinkedHashMap<>();
int count = 0;
BytesRef text;
while ((text = te.next()) != null) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java b/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
index 06bec60..6ef7fba 100644
--- a/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
+++ b/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
@@ -23,15 +23,14 @@ import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Collection;
import java.util.Collections;
+import java.util.HashSet;
import java.util.LinkedHashMap;
+import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
-import com.google.common.base.Charsets;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
import com.google.common.io.Closeables;
import com.google.common.io.Files;
import org.apache.commons.cli2.CommandLine;
@@ -42,6 +41,7 @@ import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
+import org.apache.commons.io.Charsets;
import org.apache.hadoop.fs.Path;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocsEnum;
@@ -107,13 +107,8 @@ public class ClusterLabels {
public void getLabels() throws IOException {
- Writer writer;
- if (this.output == null) {
- writer = new OutputStreamWriter(System.out, Charsets.UTF_8);
- } else {
- writer = Files.newWriter(new File(this.output), Charsets.UTF_8);
- }
- try {
+ try (Writer writer = (this.output == null) ?
+ new OutputStreamWriter(System.out, Charsets.UTF_8) : Files.newWriter(new File(this.output), Charsets.UTF_8)){
for (Map.Entry<Integer, List<WeightedPropertyVectorWritable>> integerListEntry : clusterIdToPoints.entrySet()) {
List<WeightedPropertyVectorWritable> wpvws = integerListEntry.getValue();
List<TermInfoClusterInOut> termInfos = getClusterLabels(integerListEntry.getKey(), wpvws);
@@ -139,8 +134,6 @@ public class ClusterLabels {
}
}
}
- } finally {
- Closeables.close(writer, false);
}
}
@@ -162,7 +155,7 @@ public class ClusterLabels {
log.info("# of documents in the index {}", reader.numDocs());
- Collection<String> idSet = Sets.newHashSet();
+ Collection<String> idSet = new HashSet<>();
for (WeightedPropertyVectorWritable wpvw : wpvws) {
Vector vector = wpvw.getVector();
if (vector instanceof NamedVector) {
@@ -214,7 +207,7 @@ public class ClusterLabels {
}
- List<TermInfoClusterInOut> clusteredTermInfo = Lists.newLinkedList();
+ List<TermInfoClusterInOut> clusteredTermInfo = new LinkedList<>();
int clusterSize = wpvws.size();
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java b/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java
index bdc5652..2eeebd9 100644
--- a/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java
+++ b/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java
@@ -5,9 +5,9 @@
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -22,9 +22,7 @@ import java.io.IOException;
import java.io.Writer;
import java.util.Iterator;
-import com.google.common.base.Charsets;
import com.google.common.base.Preconditions;
-import com.google.common.io.Closeables;
import com.google.common.io.Files;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
@@ -34,6 +32,7 @@ import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
+import org.apache.commons.io.Charsets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -80,15 +79,15 @@ public final class Driver {
File file = new File(luceneDir);
Preconditions.checkArgument(file.isDirectory(),
- "Lucene directory: " + file.getAbsolutePath()
- + " does not exist or is not a directory");
+ "Lucene directory: " + file.getAbsolutePath()
+ + " does not exist or is not a directory");
Preconditions.checkArgument(maxDocs >= 0, "maxDocs must be >= 0");
Preconditions.checkArgument(minDf >= 1, "minDf must be >= 1");
Preconditions.checkArgument(maxDFPercent <= 99, "maxDFPercent must be <= 99");
Directory dir = FSDirectory.open(file);
IndexReader reader = DirectoryReader.open(dir);
-
+
Weight weight;
if ("tf".equalsIgnoreCase(weightType)) {
@@ -100,7 +99,7 @@ public final class Driver {
}
TermInfo termInfo = new CachedTermInfo(reader, field, minDf, maxDFPercent);
-
+
LuceneIterable iterable;
if (norm == LuceneIterable.NO_NORMALIZING) {
iterable = new LuceneIterable(reader, idField, field, termInfo, weight, LuceneIterable.NO_NORMALIZING,
@@ -111,22 +110,16 @@ public final class Driver {
log.info("Output File: {}", outFile);
- VectorWriter vectorWriter = getSeqFileWriter(outFile);
- try {
+ try (VectorWriter vectorWriter = getSeqFileWriter(outFile)) {
long numDocs = vectorWriter.write(iterable, maxDocs);
log.info("Wrote: {} vectors", numDocs);
- } finally {
- Closeables.close(vectorWriter, false);
}
File dictOutFile = new File(dictOut);
log.info("Dictionary Output file: {}", dictOutFile);
Writer writer = Files.newWriter(dictOutFile, Charsets.UTF_8);
- DelimitedTermInfoWriter tiWriter = new DelimitedTermInfoWriter(writer, delimiter, field);
- try {
+ try (DelimitedTermInfoWriter tiWriter = new DelimitedTermInfoWriter(writer, delimiter, field)) {
tiWriter.write(termInfo);
- } finally {
- Closeables.close(tiWriter, false);
}
if (!"".equals(seqDictOut)) {
@@ -135,12 +128,9 @@ public final class Driver {
Path path = new Path(seqDictOut);
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
- SequenceFile.Writer seqWriter = null;
- try {
- seqWriter = SequenceFile.createWriter(fs, conf, path, Text.class, IntWritable.class);
+ try (SequenceFile.Writer seqWriter = SequenceFile.createWriter(fs, conf, path, Text.class, IntWritable.class)) {
Text term = new Text();
IntWritable termIndex = new IntWritable();
-
Iterator<TermEntry> termEntries = termInfo.getAllEntries();
while (termEntries.hasNext()) {
TermEntry termEntry = termEntries.next();
@@ -148,10 +138,7 @@ public final class Driver {
termIndex.set(termEntry.getTermIdx());
seqWriter.append(term, termIndex);
}
- } finally {
- Closeables.close(seqWriter, false);
}
-
}
}
@@ -215,7 +202,7 @@ public final class Driver {
Option maxPercentErrorDocsOpt = obuilder.withLongName("maxPercentErrorDocs").withRequired(false).withArgument(
abuilder.withName("maxPercentErrorDocs").withMinimum(1).withMaximum(1).create()).withDescription(
- "The max percentage of docs that can have a null term vector. These are noise document and can occur if the "
+ "The max percentage of docs that can have a null term vector. These are noise document and can occur if the "
+ "analyzer used strips out all terms in the target field. This percentage is expressed as a value "
+ "between 0 and 1. The default is 0.").withShortName("err").create();
@@ -302,7 +289,7 @@ public final class Driver {
// TODO: Make this parameter driven
SequenceFile.Writer seqWriter = SequenceFile.createWriter(fs, conf, path, LongWritable.class,
- VectorWritable.class);
+ VectorWritable.class);
return new SequenceFileVectorWriter(seqWriter);
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterator.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterator.java b/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterator.java
index 70394ac..6a8c659 100644
--- a/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterator.java
+++ b/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterator.java
@@ -17,15 +17,15 @@
package org.apache.mahout.utils.vectors.lucene;
+import java.io.IOException;
+import java.util.Set;
+import java.util.TreeSet;
+
import com.google.common.base.Preconditions;
-import com.google.common.collect.Sets;
import org.apache.lucene.index.IndexReader;
import org.apache.mahout.utils.vectors.TermInfo;
import org.apache.mahout.vectorizer.Weight;
-import java.io.IOException;
-import java.util.Set;
-
/**
* An {@link java.util.Iterator} over {@link org.apache.mahout.math.Vector}s that uses a Lucene index as the source
* for creating the {@link org.apache.mahout.math.Vector}s. The field used to create the vectors currently must have
@@ -77,7 +77,7 @@ public class LuceneIterator extends AbstractLuceneIterator {
"Must be: 0.0 <= maxPercentErrorDocs <= 1.0");
this.idField = idField;
if (idField != null) {
- idFieldSelector = Sets.newTreeSet();
+ idFieldSelector = new TreeSet<>();
idFieldSelector.add(idField);
} else {
/*The field in the index containing the index. If null, then the Lucene internal doc id is used
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java b/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
index 4bbab65..a1d2bbb 100644
--- a/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
+++ b/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
@@ -17,8 +17,11 @@
package org.apache.mahout.clustering;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -52,10 +55,6 @@ import org.apache.mahout.vectorizer.Weight;
import org.junit.Before;
import org.junit.Test;
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.List;
-
public final class TestClusterDumper extends MahoutTestCase {
private static final String[] DOCS = {
@@ -92,13 +91,10 @@ public final class TestClusterDumper extends MahoutTestCase {
}
private void getSampleData(String[] docs2) throws IOException {
- sampleData = Lists.newArrayList();
+ sampleData = new ArrayList<>();
RAMDirectory directory = new RAMDirectory();
-
- IndexWriter writer = new IndexWriter(directory,
- new IndexWriterConfig(Version.LUCENE_46, new StandardAnalyzer(Version.LUCENE_46)));
-
- try {
+ try (IndexWriter writer = new IndexWriter(directory,
+ new IndexWriterConfig(Version.LUCENE_46, new StandardAnalyzer(Version.LUCENE_46)))){
for (int i = 0; i < docs2.length; i++) {
Document doc = new Document();
Field id = new StringField("id", "doc_" + i, Field.Store.YES);
@@ -116,13 +112,10 @@ public final class TestClusterDumper extends MahoutTestCase {
doc.add(text);
writer.addDocument(doc);
}
- } finally {
- Closeables.close(writer, false);
}
IndexReader reader = DirectoryReader.open(directory);
-
Weight weight = new TFIDF();
TermInfo termInfo = new CachedTermInfo(reader, "content", 1, 100);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java b/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java
index 78367cc..597ed01 100644
--- a/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java
+++ b/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java
@@ -18,7 +18,9 @@
package org.apache.mahout.clustering.cdbw;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Collection;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -28,9 +30,9 @@ import org.apache.hadoop.fs.Path;
import org.apache.mahout.clustering.Cluster;
import org.apache.mahout.clustering.ClusteringTestUtils;
import org.apache.mahout.clustering.TestClusterEvaluator;
+import org.apache.mahout.clustering.UncommonDistributions;
import org.apache.mahout.clustering.canopy.Canopy;
import org.apache.mahout.clustering.canopy.CanopyDriver;
-import org.apache.mahout.clustering.UncommonDistributions;
import org.apache.mahout.clustering.evaluation.RepresentativePointsDriver;
import org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver;
import org.apache.mahout.clustering.kmeans.KMeansDriver;
@@ -46,9 +48,6 @@ import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-
public final class TestCDbwEvaluator extends MahoutTestCase {
private static final double[][] REFERENCE = { {1, 1}, {2, 1}, {1, 2}, {2, 2}, {3, 3}, {4, 4}, {5, 4}, {4, 5}, {5, 5}};
@@ -63,9 +62,9 @@ public final class TestCDbwEvaluator extends MahoutTestCase {
private FileSystem fs;
- private final Collection<VectorWritable> sampleData = Lists.newArrayList();
+ private final Collection<VectorWritable> sampleData = new ArrayList<>();
- private List<VectorWritable> referenceData = Lists.newArrayList();
+ private List<VectorWritable> referenceData = new ArrayList<>();
private Path testdata;
@@ -96,14 +95,14 @@ public final class TestCDbwEvaluator extends MahoutTestCase {
* the DistanceMeasure
*/
private void initData(double dC, double dP, DistanceMeasure measure) {
- clusters = Lists.newArrayList();
+ clusters = new ArrayList<>();
clusters.add(new Canopy(new DenseVector(new double[] {-dC, -dC}), 1, measure));
clusters.add(new Canopy(new DenseVector(new double[] {-dC, dC}), 3, measure));
clusters.add(new Canopy(new DenseVector(new double[] {dC, dC}), 5, measure));
clusters.add(new Canopy(new DenseVector(new double[] {dC, -dC}), 7, measure));
- representativePoints = Maps.newHashMap();
+ representativePoints = new HashMap<>();
for (Cluster cluster : clusters) {
- List<VectorWritable> points = Lists.newArrayList();
+ List<VectorWritable> points = new ArrayList<>();
representativePoints.put(cluster.getId(), points);
points.add(new VectorWritable(cluster.getCenter().clone()));
points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] {dP, dP}))));
@@ -182,7 +181,7 @@ public final class TestCDbwEvaluator extends MahoutTestCase {
initData(1, 0.25, measure);
Canopy cluster = new Canopy(new DenseVector(new double[] {10, 10}), 19, measure);
clusters.add(cluster);
- List<VectorWritable> points = Lists.newArrayList();
+ List<VectorWritable> points = new ArrayList<>();
representativePoints.put(cluster.getId(), points);
CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, measure);
System.out.println("CDbw = " + evaluator.getCDbw());
@@ -198,7 +197,7 @@ public final class TestCDbwEvaluator extends MahoutTestCase {
initData(1, 0.25, measure);
Canopy cluster = new Canopy(new DenseVector(new double[] {0, 0}), 19, measure);
clusters.add(cluster);
- List<VectorWritable> points = Lists.newArrayList();
+ List<VectorWritable> points = new ArrayList<>();
points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] {1, 1}))));
representativePoints.put(cluster.getId(), points);
CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, measure);
@@ -221,7 +220,7 @@ public final class TestCDbwEvaluator extends MahoutTestCase {
initData(1, 0.25, measure);
Canopy cluster = new Canopy(new DenseVector(new double[] {0, 0}), 19, measure);
clusters.add(cluster);
- List<VectorWritable> points = Lists.newArrayList();
+ List<VectorWritable> points = new ArrayList<>();
points.add(new VectorWritable(cluster.getCenter()));
points.add(new VectorWritable(cluster.getCenter()));
points.add(new VectorWritable(cluster.getCenter()));
@@ -246,7 +245,7 @@ public final class TestCDbwEvaluator extends MahoutTestCase {
initData(1, 0.25, measure);
Canopy cluster = new Canopy(new DenseVector(new double[] {0, 0}), 19, measure);
clusters.add(cluster);
- List<VectorWritable> points = Lists.newArrayList();
+ List<VectorWritable> points = new ArrayList<>();
Vector delta = new DenseVector(new double[] {0, Double.MIN_NORMAL});
points.add(new VectorWritable(delta.clone()));
points.add(new VectorWritable(delta.clone()));
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/test/java/org/apache/mahout/text/AbstractLuceneStorageTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/text/AbstractLuceneStorageTest.java b/integration/src/test/java/org/apache/mahout/text/AbstractLuceneStorageTest.java
index baa7ac5..c7486c5 100644
--- a/integration/src/test/java/org/apache/mahout/text/AbstractLuceneStorageTest.java
+++ b/integration/src/test/java/org/apache/mahout/text/AbstractLuceneStorageTest.java
@@ -16,7 +16,11 @@
*/
package org.apache.mahout.text;
-import com.google.common.collect.Lists;
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
@@ -33,11 +37,6 @@ import org.apache.mahout.text.doc.NumericFieldDocument;
import org.apache.mahout.text.doc.SingleFieldDocument;
import org.apache.mahout.text.doc.TestDocument;
-import java.io.File;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.List;
-
/**
* Abstract test for working with Lucene storage.
*/
@@ -45,8 +44,8 @@ public abstract class AbstractLuceneStorageTest extends MahoutTestCase {
protected Path indexPath1;
protected Path indexPath2;
- protected List<TestDocument> docs = Lists.newArrayList();
- protected List<TestDocument> misshapenDocs = Lists.newArrayList();
+ protected List<TestDocument> docs = new ArrayList<>();
+ protected List<TestDocument> misshapenDocs = new ArrayList<>();
@Override
public void setUp() throws Exception {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/test/java/org/apache/mahout/text/LuceneSegmentRecordReaderTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/text/LuceneSegmentRecordReaderTest.java b/integration/src/test/java/org/apache/mahout/text/LuceneSegmentRecordReaderTest.java
index 28f2ac8..c64dbda 100644
--- a/integration/src/test/java/org/apache/mahout/text/LuceneSegmentRecordReaderTest.java
+++ b/integration/src/test/java/org/apache/mahout/text/LuceneSegmentRecordReaderTest.java
@@ -16,23 +16,24 @@
*/
package org.apache.mahout.text;
+import java.io.IOException;
+import java.lang.reflect.InvocationTargetException;
+import java.util.Collections;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
-import org.apache.lucene.index.*;
+import org.apache.lucene.index.SegmentCommitInfo;
+import org.apache.lucene.index.SegmentInfos;
import org.apache.mahout.common.HadoopUtil;
-
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
-import java.io.IOException;
-import java.lang.reflect.InvocationTargetException;
-
-import static java.util.Arrays.asList;
-import static org.apache.mahout.text.doc.SingleFieldDocument.*;
+import static org.apache.mahout.text.doc.SingleFieldDocument.FIELD;
+import static org.apache.mahout.text.doc.SingleFieldDocument.ID_FIELD;
public class LuceneSegmentRecordReaderTest extends AbstractLuceneStorageTest {
private Configuration configuration;
@@ -44,7 +45,8 @@ public class LuceneSegmentRecordReaderTest extends AbstractLuceneStorageTest {
@Before
public void before() throws IOException, InterruptedException {
LuceneStorageConfiguration lucene2SeqConf = new LuceneStorageConfiguration(getConfiguration(),
- asList(getIndexPath1()), new Path("output"), ID_FIELD, asList(FIELD));
+ Collections.singletonList(getIndexPath1()), new Path("output"), ID_FIELD,
+ Collections.singletonList(FIELD));
configuration = lucene2SeqConf.serialize();
recordReader = new LuceneSegmentRecordReader();
commitDocuments(getDirectory(getIndexPath1AsFile()), docs.subList(0, 500));
@@ -82,7 +84,8 @@ public class LuceneSegmentRecordReaderTest extends AbstractLuceneStorageTest {
@Test(expected = IllegalArgumentException.class)
public void testNonExistingIdField() throws Exception {
configuration = new LuceneStorageConfiguration(getConfiguration(),
- asList(getIndexPath1()), new Path("output"), "nonExistingId", asList(FIELD)).serialize();
+ Collections.singletonList(getIndexPath1()), new Path("output"), "nonExistingId",
+ Collections.singletonList(FIELD)).serialize();
SegmentCommitInfo segmentInfo = segmentInfos.iterator().next();
LuceneSegmentInputSplit inputSplit = new LuceneSegmentInputSplit(getIndexPath1(),
segmentInfo.info.name, segmentInfo.sizeInBytes());
@@ -92,8 +95,8 @@ public class LuceneSegmentRecordReaderTest extends AbstractLuceneStorageTest {
@Test(expected = IllegalArgumentException.class)
public void testNonExistingField() throws Exception {
- configuration = new LuceneStorageConfiguration(getConfiguration(), asList(getIndexPath1()),
- new Path("output"), ID_FIELD, asList("nonExistingField")).serialize();
+ configuration = new LuceneStorageConfiguration(getConfiguration(), Collections.singletonList(getIndexPath1()),
+ new Path("output"), ID_FIELD, Collections.singletonList("nonExistingField")).serialize();
SegmentCommitInfo segmentInfo = segmentInfos.iterator().next();
LuceneSegmentInputSplit inputSplit = new LuceneSegmentInputSplit(getIndexPath1(),
segmentInfo.info.name, segmentInfo.sizeInBytes());
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/test/java/org/apache/mahout/text/LuceneStorageConfigurationTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/text/LuceneStorageConfigurationTest.java b/integration/src/test/java/org/apache/mahout/text/LuceneStorageConfigurationTest.java
index d1e65c1..f58224c 100644
--- a/integration/src/test/java/org/apache/mahout/text/LuceneStorageConfigurationTest.java
+++ b/integration/src/test/java/org/apache/mahout/text/LuceneStorageConfigurationTest.java
@@ -16,16 +16,14 @@
*/
package org.apache.mahout.text;
+import java.io.IOException;
+import java.util.Collections;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.mahout.common.MahoutTestCase;
import org.junit.Test;
-import java.io.IOException;
-
-import static java.util.Arrays.asList;
-import static org.junit.Assert.assertEquals;
-
public class LuceneStorageConfigurationTest extends MahoutTestCase {
@Test
@@ -34,7 +32,8 @@ public class LuceneStorageConfigurationTest extends MahoutTestCase {
Path indexPath = new Path("indexPath");
Path outputPath = new Path("outputPath");
LuceneStorageConfiguration luceneStorageConfiguration =
- new LuceneStorageConfiguration(configuration, asList(indexPath), outputPath, "id", asList("field"));
+ new LuceneStorageConfiguration(configuration, Collections.singletonList(indexPath), outputPath,
+ "id", Collections.singletonList("field"));
Configuration serializedConfiguration = luceneStorageConfiguration.serialize();
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriverTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriverTest.java b/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriverTest.java
index 7cebc60..03aed0f 100644
--- a/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriverTest.java
+++ b/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriverTest.java
@@ -17,28 +17,23 @@
package org.apache.mahout.text;
-import com.google.common.collect.Iterators;
-import org.apache.commons.lang.StringUtils;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Writable;
import org.apache.lucene.search.TermQuery;
import org.apache.mahout.common.HadoopUtil;
-import org.apache.mahout.common.iterator.sequencefile.PathFilters;
-import org.apache.mahout.common.iterator.sequencefile.PathType;
-import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterator;
import org.apache.mahout.text.doc.MultipleFieldsDocument;
import org.apache.mahout.text.doc.SingleFieldDocument;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
-import java.io.IOException;
-import java.util.List;
-
-import static java.util.Arrays.asList;
-
public class SequenceFilesFromLuceneStorageDriverTest extends AbstractLuceneStorageTest {
private SequenceFilesFromLuceneStorageDriver driver;
@@ -56,7 +51,7 @@ public class SequenceFilesFromLuceneStorageDriverTest extends AbstractLuceneStor
seqFilesOutputPath = new Path(getTestTempDirPath(), "seqfiles");
idField = SingleFieldDocument.ID_FIELD;
- fields = asList("field");
+ fields = Collections.singletonList("field");
driver = new SequenceFilesFromLuceneStorageDriver() {
@Override
@@ -76,13 +71,13 @@ public class SequenceFilesFromLuceneStorageDriverTest extends AbstractLuceneStor
@Test
public void testNewLucene2SeqConfiguration() {
lucene2SeqConf = driver.newLucene2SeqConfiguration(conf,
- asList(new Path(getIndexPath1().toString())),
+ Collections.singletonList(new Path(getIndexPath1().toString())),
seqFilesOutputPath,
idField,
fields);
assertEquals(conf, lucene2SeqConf.getConfiguration());
- assertEquals(asList(getIndexPath1()), lucene2SeqConf.getIndexPaths());
+ assertEquals(Collections.singletonList(getIndexPath1()), lucene2SeqConf.getIndexPaths());
assertEquals(seqFilesOutputPath, lucene2SeqConf.getSequenceFilesOutputPath());
assertEquals(idField, lucene2SeqConf.getIdField());
assertEquals(fields, lucene2SeqConf.getFields());
@@ -90,7 +85,8 @@ public class SequenceFilesFromLuceneStorageDriverTest extends AbstractLuceneStor
@Test
public void testRun() throws Exception {
- List<MultipleFieldsDocument> docs = asList(new MultipleFieldsDocument("123", "test 1", "test 2", "test 3"));
+ List<MultipleFieldsDocument> docs =
+ Collections.singletonList(new MultipleFieldsDocument("123", "test 1", "test 2", "test 3"));
commitDocuments(getDirectory(getIndexPath1AsFile()), docs.get(0));
String queryField = "queryfield";
@@ -115,7 +111,7 @@ public class SequenceFilesFromLuceneStorageDriverTest extends AbstractLuceneStor
assertEquals(getIndexPath1().toUri().getPath(), lucene2SeqConf.getIndexPaths().get(0).toUri().getPath());
assertEquals(seqFilesOutputPath, lucene2SeqConf.getSequenceFilesOutputPath());
assertEquals(idField, lucene2SeqConf.getIdField());
- assertEquals(asList(field1, field2), lucene2SeqConf.getFields());
+ assertEquals(Arrays.asList(field1, field2), lucene2SeqConf.getFields());
assertTrue(lucene2SeqConf.getQuery() instanceof TermQuery);
assertEquals(queryField, ((TermQuery) lucene2SeqConf.getQuery()).getTerm().field());
@@ -167,10 +163,6 @@ public class SequenceFilesFromLuceneStorageDriverTest extends AbstractLuceneStor
driver.run(args);
assertTrue(FileSystem.get(conf).exists(seqFilesOutputPath));
//shouldn't be any real files in the seq files out path
- SequenceFileDirIterator<Writable, Writable> iter =
- new SequenceFileDirIterator<Writable, Writable>(seqFilesOutputPath, PathType.LIST, PathFilters.logsCRCFilter(), null, false, conf);
- assertFalse(Iterators.size(iter) > 0);
-
}
@Test
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMRJobTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMRJobTest.java b/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMRJobTest.java
index 111bc85..fc03e49 100644
--- a/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMRJobTest.java
+++ b/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMRJobTest.java
@@ -16,7 +16,12 @@
*/
package org.apache.mahout.text;
-import com.google.common.collect.Maps;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
@@ -28,10 +33,6 @@ import org.junit.After;
import org.junit.Before;
import org.junit.Test;
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.Map;
-
import static java.util.Arrays.asList;
public class SequenceFilesFromLuceneStorageMRJobTest extends AbstractLuceneStorageTest {
@@ -45,7 +46,7 @@ public class SequenceFilesFromLuceneStorageMRJobTest extends AbstractLuceneStora
Configuration configuration = getConfiguration();
Path seqOutputPath = new Path(getTestTempDirPath(), "seqOutputPath");//don't make the output directory
lucene2SeqConf = new LuceneStorageConfiguration(configuration, asList(getIndexPath1(), getIndexPath2()),
- seqOutputPath, SingleFieldDocument.ID_FIELD, asList(SingleFieldDocument.FIELD));
+ seqOutputPath, SingleFieldDocument.ID_FIELD, Collections.singletonList(SingleFieldDocument.FIELD));
}
@After
@@ -66,7 +67,7 @@ public class SequenceFilesFromLuceneStorageMRJobTest extends AbstractLuceneStora
lucene2seq.run(lucene2SeqConf);
Iterator<Pair<Text, Text>> iterator = lucene2SeqConf.getSequenceFileIterator();
- Map<String, Text> map = Maps.newHashMap();
+ Map<String, Text> map = new HashMap<>();
while (iterator.hasNext()) {
Pair<Text, Text> next = iterator.next();
map.put(next.getFirst().toString(), next.getSecond());
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageTest.java b/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageTest.java
index ccff1d6..3cd87f2 100644
--- a/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageTest.java
+++ b/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageTest.java
@@ -16,7 +16,13 @@
*/
package org.apache.mahout.text;
-import com.google.common.collect.Maps;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
@@ -37,12 +43,6 @@ import org.junit.After;
import org.junit.Before;
import org.junit.Test;
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.Map;
-
-import static java.util.Arrays.asList;
-
public class SequenceFilesFromLuceneStorageTest extends AbstractLuceneStorageTest {
private SequenceFilesFromLuceneStorage lucene2Seq;
@@ -57,11 +57,8 @@ public class SequenceFilesFromLuceneStorageTest extends AbstractLuceneStorageTes
lucene2Seq = new SequenceFilesFromLuceneStorage();
lucene2SeqConf = new LuceneStorageConfiguration(configuration,
- asList(getIndexPath1(), getIndexPath2()),
- seqFilesOutputPath,
- SingleFieldDocument.ID_FIELD,
- asList(SingleFieldDocument.FIELD));
-
+ Arrays.asList(getIndexPath1(), getIndexPath2()), seqFilesOutputPath,
+ SingleFieldDocument.ID_FIELD, Collections.singletonList(SingleFieldDocument.FIELD));
}
@After
@@ -83,7 +80,7 @@ public class SequenceFilesFromLuceneStorageTest extends AbstractLuceneStorageTes
lucene2Seq.run(lucene2SeqConf);
Iterator<Pair<Text, Text>> iterator = lucene2SeqConf.getSequenceFileIterator();
- Map<String, Text> map = Maps.newHashMap();
+ Map<String, Text> map = new HashMap<>();
while (iterator.hasNext()) {
Pair<Text, Text> next = iterator.next();
map.put(next.getFirst().toString(), next.getSecond());
@@ -106,10 +103,8 @@ public class SequenceFilesFromLuceneStorageTest extends AbstractLuceneStorageTes
commitDocuments(getDirectory(getIndexPath1AsFile()), new UnstoredFieldsDocument("5", "This is test document 5"));
LuceneStorageConfiguration lucene2SeqConf = new LuceneStorageConfiguration(configuration,
- asList(getIndexPath1()),
- seqFilesOutputPath,
- SingleFieldDocument.ID_FIELD,
- asList(UnstoredFieldsDocument.FIELD, UnstoredFieldsDocument.UNSTORED_FIELD));
+ Collections.singletonList(getIndexPath1()), seqFilesOutputPath,
+ SingleFieldDocument.ID_FIELD, Arrays.asList(UnstoredFieldsDocument.FIELD, UnstoredFieldsDocument.UNSTORED_FIELD));
lucene2Seq.run(lucene2SeqConf);
}
@@ -139,10 +134,8 @@ public class SequenceFilesFromLuceneStorageTest extends AbstractLuceneStorageTes
public void testRunQuery() throws IOException {
commitDocuments(getDirectory(getIndexPath1AsFile()), docs);
LuceneStorageConfiguration lucene2SeqConf = new LuceneStorageConfiguration(configuration,
- asList(getIndexPath1()),
- seqFilesOutputPath,
- SingleFieldDocument.ID_FIELD,
- asList(SingleFieldDocument.FIELD));
+ Collections.singletonList(getIndexPath1()), seqFilesOutputPath,
+ SingleFieldDocument.ID_FIELD, Collections.singletonList(SingleFieldDocument.FIELD));
Query query = new TermQuery(new Term(lucene2SeqConf.getFields().get(0), "599"));
@@ -159,15 +152,18 @@ public class SequenceFilesFromLuceneStorageTest extends AbstractLuceneStorageTes
@Test
public void testRunMultipleFields() throws IOException {
LuceneStorageConfiguration lucene2SeqConf = new LuceneStorageConfiguration(configuration,
- asList(getIndexPath1()),
- seqFilesOutputPath,
+ Collections.singletonList(getIndexPath1()), seqFilesOutputPath,
SingleFieldDocument.ID_FIELD,
- asList(MultipleFieldsDocument.FIELD, MultipleFieldsDocument.FIELD1, MultipleFieldsDocument.FIELD2));
+ Arrays.asList(MultipleFieldsDocument.FIELD, MultipleFieldsDocument.FIELD1, MultipleFieldsDocument.FIELD2));
- MultipleFieldsDocument multipleFieldsDocument1 = new MultipleFieldsDocument("1", "This is field 1-1", "This is field 1-2", "This is field 1-3");
- MultipleFieldsDocument multipleFieldsDocument2 = new MultipleFieldsDocument("2", "This is field 2-1", "This is field 2-2", "This is field 2-3");
- MultipleFieldsDocument multipleFieldsDocument3 = new MultipleFieldsDocument("3", "This is field 3-1", "This is field 3-2", "This is field 3-3");
- commitDocuments(getDirectory(getIndexPath1AsFile()), multipleFieldsDocument1, multipleFieldsDocument2, multipleFieldsDocument3);
+ MultipleFieldsDocument multipleFieldsDocument1 =
+ new MultipleFieldsDocument("1", "This is field 1-1", "This is field 1-2", "This is field 1-3");
+ MultipleFieldsDocument multipleFieldsDocument2 =
+ new MultipleFieldsDocument("2", "This is field 2-1", "This is field 2-2", "This is field 2-3");
+ MultipleFieldsDocument multipleFieldsDocument3 =
+ new MultipleFieldsDocument("3", "This is field 3-1", "This is field 3-2", "This is field 3-3");
+ commitDocuments(getDirectory(getIndexPath1AsFile()), multipleFieldsDocument1,
+ multipleFieldsDocument2, multipleFieldsDocument3);
lucene2Seq.run(lucene2SeqConf);
@@ -181,10 +177,8 @@ public class SequenceFilesFromLuceneStorageTest extends AbstractLuceneStorageTes
@Test
public void testRunNumericField() throws IOException {
LuceneStorageConfiguration lucene2SeqConf = new LuceneStorageConfiguration(configuration,
- asList(getIndexPath1()),
- seqFilesOutputPath,
- SingleFieldDocument.ID_FIELD,
- asList(NumericFieldDocument.FIELD, NumericFieldDocument.NUMERIC_FIELD));
+ Collections.singletonList(getIndexPath1()), seqFilesOutputPath,
+ SingleFieldDocument.ID_FIELD, Arrays.asList(NumericFieldDocument.FIELD, NumericFieldDocument.NUMERIC_FIELD));
NumericFieldDocument doc1 = new NumericFieldDocument("1", "This is field 1", 100);
NumericFieldDocument doc2 = new NumericFieldDocument("2", "This is field 2", 200);
@@ -206,10 +200,10 @@ public class SequenceFilesFromLuceneStorageTest extends AbstractLuceneStorageTes
commitDocuments(getDirectory(getIndexPath1AsFile()), docs.subList(0, 500));
lucene2SeqConf = new LuceneStorageConfiguration(configuration,
- asList(getIndexPath1()),
+ Collections.singletonList(getIndexPath1()),
seqFilesOutputPath,
"nonExistingField",
- asList(SingleFieldDocument.FIELD));
+ Collections.singletonList(SingleFieldDocument.FIELD));
lucene2Seq.run(lucene2SeqConf);
}
@@ -219,10 +213,10 @@ public class SequenceFilesFromLuceneStorageTest extends AbstractLuceneStorageTes
commitDocuments(getDirectory(getIndexPath1AsFile()), docs.subList(0, 500));
lucene2SeqConf = new LuceneStorageConfiguration(configuration,
- asList(getIndexPath1()),
+ Collections.singletonList(getIndexPath1()),
seqFilesOutputPath,
SingleFieldDocument.ID_FIELD,
- asList(SingleFieldDocument.FIELD, "nonExistingField"));
+ Arrays.asList(SingleFieldDocument.FIELD, "nonExistingField"));
lucene2Seq.run(lucene2SeqConf);
}
@@ -240,10 +234,10 @@ public class SequenceFilesFromLuceneStorageTest extends AbstractLuceneStorageTes
commitDocuments(getDirectory(getIndexPath1AsFile()), document);
lucene2SeqConf = new LuceneStorageConfiguration(configuration,
- asList(getIndexPath1()),
+ Collections.singletonList(getIndexPath1()),
seqFilesOutputPath,
SingleFieldDocument.ID_FIELD,
- asList(SingleFieldDocument.FIELD, "indexed"));
+ Arrays.asList(SingleFieldDocument.FIELD, "indexed"));
lucene2Seq.run(lucene2SeqConf);
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromMailArchivesTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromMailArchivesTest.java b/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromMailArchivesTest.java
index 12c1451..ef2b8a6 100644
--- a/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromMailArchivesTest.java
+++ b/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromMailArchivesTest.java
@@ -20,8 +20,6 @@ import java.io.File;
import java.io.FileOutputStream;
import java.util.zip.GZIPOutputStream;
-import com.google.common.io.Closeables;
-
import org.apache.commons.lang3.SystemUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
@@ -56,25 +54,18 @@ public final class SequenceFilesFromMailArchivesTest extends MahoutTestCase {
File subDir = new File(inputDir, "subdir");
subDir.mkdir();
File gzFile = new File(subDir, "mail-messages.gz");
- GZIPOutputStream gzOut = null;
- try {
- gzOut = new GZIPOutputStream(new FileOutputStream(gzFile));
+ try (GZIPOutputStream gzOut = new GZIPOutputStream(new FileOutputStream(gzFile))) {
gzOut.write(testMailMessages.getBytes("UTF-8"));
gzOut.finish();
- } finally {
- Closeables.close(gzOut, false);
}
File subDir2 = new File(subDir, "subsubdir");
subDir2.mkdir();
File gzFile2 = new File(subDir2, "mail-messages-2.gz");
- try {
- gzOut = new GZIPOutputStream(new FileOutputStream(gzFile2));
+ try (GZIPOutputStream gzOut = new GZIPOutputStream(new FileOutputStream(gzFile2))) {
gzOut.write(testMailMessages.getBytes("UTF-8"));
gzOut.finish();
- } finally {
- Closeables.close(gzOut, false);
- }
+ }
}
@Test
@@ -100,7 +91,7 @@ public final class SequenceFilesFromMailArchivesTest extends MahoutTestCase {
Assert.assertTrue("Expected chunk file " + expectedChunkPath + " not found!", expectedChunkFile.isFile());
Configuration conf = getConfiguration();
- SequenceFileIterator<Text, Text> iterator = new SequenceFileIterator<Text, Text>(new Path(expectedChunkPath), true, conf);
+ SequenceFileIterator<Text, Text> iterator = new SequenceFileIterator<>(new Path(expectedChunkPath), true, conf);
Assert.assertTrue("First key/value pair not found!", iterator.hasNext());
Pair<Text, Text> record = iterator.next();
@@ -155,7 +146,7 @@ public final class SequenceFilesFromMailArchivesTest extends MahoutTestCase {
assertEquals(1, fileStatuses.length); // only one
assertEquals("part-m-00000", fileStatuses[0].getPath().getName());
SequenceFileIterator<Text, Text> iterator =
- new SequenceFileIterator<Text, Text>(mrOutputDir.suffix("/part-m-00000"), true, configuration);
+ new SequenceFileIterator<>(mrOutputDir.suffix("/part-m-00000"), true, configuration);
Assert.assertTrue("First key/value pair not found!", iterator.hasNext());
Pair<Text, Text> record = iterator.next();
[3/5] mahout git commit: MAHOUT-1652: Java 7 upgrade,
this closes apache/mahout#112
Posted by sm...@apache.org.
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/test/java/org/apache/mahout/text/TestSequenceFilesFromDirectory.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/text/TestSequenceFilesFromDirectory.java b/integration/src/test/java/org/apache/mahout/text/TestSequenceFilesFromDirectory.java
index aec5e39..040c8e4 100644
--- a/integration/src/test/java/org/apache/mahout/text/TestSequenceFilesFromDirectory.java
+++ b/integration/src/test/java/org/apache/mahout/text/TestSequenceFilesFromDirectory.java
@@ -20,12 +20,10 @@ package org.apache.mahout.text;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
+import java.util.HashMap;
import java.util.Map;
-import com.google.common.base.Charsets;
-import com.google.common.collect.Maps;
-import com.google.common.io.Closeables;
-
+import org.apache.commons.io.Charsets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -162,13 +160,10 @@ public final class TestSequenceFilesFromDirectory extends MahoutTestCase {
private static void createFilesFromArrays(Configuration conf, Path inputDir, String[][] data) throws IOException {
FileSystem fs = FileSystem.get(conf);
- OutputStreamWriter writer;
for (String[] aData : data) {
- writer = new OutputStreamWriter(fs.create(new Path(inputDir, aData[0])), Charsets.UTF_8);
- try {
+ try (OutputStreamWriter writer =
+ new OutputStreamWriter(fs.create(new Path(inputDir, aData[0])), Charsets.UTF_8)){
writer.write(aData[1]);
- } finally {
- Closeables.close(writer, false);
}
}
}
@@ -182,21 +177,15 @@ public final class TestSequenceFilesFromDirectory extends MahoutTestCase {
String currentRecursiveDir = inputDir.toString();
for (String[] aData : data) {
- OutputStreamWriter writer;
-
currentRecursiveDir += "/" + aData[0];
File subDir = new File(currentRecursiveDir);
subDir.mkdir();
curPath = new Path(subDir.toString(), "file.txt");
- writer = new OutputStreamWriter(fs.create(curPath), Charsets.UTF_8);
-
logger.info("Created file: {}", curPath.toString());
- try {
+ try (OutputStreamWriter writer = new OutputStreamWriter(fs.create(curPath), Charsets.UTF_8)){
writer.write(aData[1]);
- } finally {
- Closeables.close(writer, false);
}
}
}
@@ -212,23 +201,20 @@ public final class TestSequenceFilesFromDirectory extends MahoutTestCase {
assertEquals(1, fileStatuses.length); // only one
assertEquals("chunk-0", fileStatuses[0].getPath().getName());
- Map<String, String> fileToData = Maps.newHashMap();
+ Map<String, String> fileToData = new HashMap<>();
for (String[] aData : data) {
fileToData.put(prefix + Path.SEPARATOR + aData[0], aData[1]);
}
// read a chunk to check content
- SequenceFileIterator<Text, Text> iterator =
- new SequenceFileIterator<>(fileStatuses[0].getPath(), true, configuration);
- try {
+ try (SequenceFileIterator<Text, Text> iterator =
+ new SequenceFileIterator<>(fileStatuses[0].getPath(), true, configuration)){
while (iterator.hasNext()) {
Pair<Text, Text> record = iterator.next();
String retrievedData = fileToData.get(record.getFirst().toString().trim());
assertNotNull(retrievedData);
assertEquals(retrievedData, record.getSecond().toString().trim());
}
- } finally {
- Closeables.close(iterator, true);
}
}
@@ -246,7 +232,7 @@ public final class TestSequenceFilesFromDirectory extends MahoutTestCase {
assertEquals("chunk-0", fileStatuses[0].getPath().getName());
- Map<String, String> fileToData = Maps.newHashMap();
+ Map<String, String> fileToData = new HashMap<>();
String currentPath = prefix;
for (String[] aData : data) {
currentPath += Path.SEPARATOR + aData[0];
@@ -254,9 +240,8 @@ public final class TestSequenceFilesFromDirectory extends MahoutTestCase {
}
// read a chunk to check content
- SequenceFileIterator<Text, Text> iterator =
- new SequenceFileIterator<>(fileStatuses[0].getPath(), true, configuration);
- try {
+ try (SequenceFileIterator<Text, Text> iterator =
+ new SequenceFileIterator<>(fileStatuses[0].getPath(), true, configuration)) {
while (iterator.hasNext()) {
Pair<Text, Text> record = iterator.next();
String retrievedData = fileToData.get(record.getFirst().toString().trim());
@@ -266,8 +251,6 @@ public final class TestSequenceFilesFromDirectory extends MahoutTestCase {
assertEquals(retrievedData, record.getSecond().toString().trim());
System.out.printf(">>> k: %s, v: %s\n", record.getFirst().toString(), record.getSecond().toString());
}
- } finally {
- Closeables.close(iterator, true);
}
}
@@ -279,16 +262,15 @@ public final class TestSequenceFilesFromDirectory extends MahoutTestCase {
FileStatus[] fileStatuses = fs.listStatus(outputDir.suffix("/part-m-00000"), PathFilters.logsCRCFilter());
assertEquals(1, fileStatuses.length); // only one
assertEquals("part-m-00000", fileStatuses[0].getPath().getName());
- Map<String, String> fileToData = Maps.newHashMap();
+ Map<String, String> fileToData = new HashMap<>();
for (String[] aData : data) {
System.out.printf("map.put: %s %s\n", prefix + Path.SEPARATOR + aData[0], aData[1]);
fileToData.put(prefix + Path.SEPARATOR + aData[0], aData[1]);
}
// read a chunk to check content
- SequenceFileIterator<Text, Text> iterator = new SequenceFileIterator<>(
- fileStatuses[0].getPath(), true, conf);
- try {
+ try (SequenceFileIterator<Text, Text> iterator = new SequenceFileIterator<>(
+ fileStatuses[0].getPath(), true, conf)) {
while (iterator.hasNext()) {
Pair<Text, Text> record = iterator.next();
String retrievedData = fileToData.get(record.getFirst().toString().trim());
@@ -297,8 +279,6 @@ public final class TestSequenceFilesFromDirectory extends MahoutTestCase {
assertNotNull(retrievedData);
assertEquals(retrievedData, record.getSecond().toString().trim());
}
- } finally {
- Closeables.close(iterator, true);
}
}
@@ -310,7 +290,7 @@ public final class TestSequenceFilesFromDirectory extends MahoutTestCase {
FileStatus[] fileStatuses = fs.listStatus(outputDir.suffix("/part-m-00000"), PathFilters.logsCRCFilter());
assertEquals(1, fileStatuses.length); // only one
assertEquals("part-m-00000", fileStatuses[0].getPath().getName());
- Map<String, String> fileToData = Maps.newHashMap();
+ Map<String, String> fileToData = new HashMap<>();
String currentPath = prefix;
for (String[] aData : data) {
@@ -319,9 +299,8 @@ public final class TestSequenceFilesFromDirectory extends MahoutTestCase {
}
// read a chunk to check content
- SequenceFileIterator<Text, Text> iterator = new SequenceFileIterator<>(
- fileStatuses[0].getPath(), true, configuration);
- try {
+ try (SequenceFileIterator<Text, Text> iterator = new SequenceFileIterator<>(
+ fileStatuses[0].getPath(), true, configuration)){
while (iterator.hasNext()) {
Pair<Text, Text> record = iterator.next();
System.out.printf("MR-Recur > Trying to check: %s\n", record.getFirst().toString().trim());
@@ -329,9 +308,6 @@ public final class TestSequenceFilesFromDirectory extends MahoutTestCase {
assertNotNull(retrievedData);
assertEquals(retrievedData, record.getSecond().toString().trim());
}
- } finally {
- Closeables.close(iterator, true);
}
}
}
-
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/test/java/org/apache/mahout/text/doc/NumericFieldDocument.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/text/doc/NumericFieldDocument.java b/integration/src/test/java/org/apache/mahout/text/doc/NumericFieldDocument.java
index 5a4f9fa..55d6e0f 100644
--- a/integration/src/test/java/org/apache/mahout/text/doc/NumericFieldDocument.java
+++ b/integration/src/test/java/org/apache/mahout/text/doc/NumericFieldDocument.java
@@ -22,7 +22,6 @@ import org.apache.lucene.document.IntField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
-
/**
* Document with numeric field.
*/
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/test/java/org/apache/mahout/utils/SplitInputTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/SplitInputTest.java b/integration/src/test/java/org/apache/mahout/utils/SplitInputTest.java
index 72d60ec..7ffa690 100644
--- a/integration/src/test/java/org/apache/mahout/utils/SplitInputTest.java
+++ b/integration/src/test/java/org/apache/mahout/utils/SplitInputTest.java
@@ -23,17 +23,14 @@ import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.charset.Charset;
-import com.google.common.base.Charsets;
import com.google.common.io.Closeables;
-
+import org.apache.commons.io.Charsets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.classifier.ClassifierData;
import org.apache.mahout.common.MahoutTestCase;
@@ -68,7 +65,7 @@ public final class SplitInputTest extends MahoutTestCase {
super.setUp();
- countMap = new OpenObjectIntHashMap<String>();
+ countMap = new OpenObjectIntHashMap<>();
charset = Charsets.UTF_8;
tempSequenceDirectory = getTestTempFilePath("tmpsequence");
@@ -192,15 +189,13 @@ public final class SplitInputTest extends MahoutTestCase {
* @param path path for test SequenceFile
* @param testPoints number of records in test SequenceFile
*/
- private void writeVectorSequenceFile(Path path, int testPoints)
- throws IOException {
+ private void writeVectorSequenceFile(Path path, int testPoints) throws IOException {
Path tempSequenceFile = new Path(path, "part-00000");
Configuration conf = getConfiguration();
IntWritable key = new IntWritable();
VectorWritable value = new VectorWritable();
- SequenceFile.Writer writer = null;
- try {
- writer = SequenceFile.createWriter(fs, conf, tempSequenceFile, IntWritable.class, VectorWritable.class);
+ try (SequenceFile.Writer writer =
+ SequenceFile.createWriter(fs, conf, tempSequenceFile, IntWritable.class, VectorWritable.class)) {
for (int i = 0; i < testPoints; i++) {
key.set(i);
Vector v = new SequentialAccessSparseVector(4);
@@ -208,8 +203,6 @@ public final class SplitInputTest extends MahoutTestCase {
value.set(v);
writer.append(key, value);
}
- } finally {
- IOUtils.closeStream(writer);
}
}
@@ -223,18 +216,12 @@ public final class SplitInputTest extends MahoutTestCase {
Configuration conf = getConfiguration();
Text key = new Text();
Text value = new Text();
- SequenceFile.Writer writer = null;
- try {
- writer =
- SequenceFile.createWriter(fs, conf, tempSequenceFile,
- Text.class, Text.class);
+ try (SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, tempSequenceFile, Text.class, Text.class)){
for (int i = 0; i < testPoints; i++) {
key.set(Integer.toString(i));
value.set("Line " + i);
writer.append(key, value);
}
- } finally {
- IOUtils.closeStream(writer);
}
}
@@ -243,7 +230,7 @@ public final class SplitInputTest extends MahoutTestCase {
* @param sequenceFilePath path to SequenceFile
*/
private void displaySequenceFile(Path sequenceFilePath) throws IOException {
- for (Pair<?,?> record : new SequenceFileIterable<Writable,Writable>(sequenceFilePath, true, getConfiguration())) {
+ for (Pair<?,?> record : new SequenceFileIterable<>(sequenceFilePath, true, getConfiguration())) {
System.out.println(record.getFirst() + "\t" + record.getSecond());
}
}
@@ -255,7 +242,7 @@ public final class SplitInputTest extends MahoutTestCase {
*/
private int getNumberRecords(Path sequenceFilePath) throws IOException {
int numberRecords = 0;
- for (Object value : new SequenceFileValueIterable<Writable>(sequenceFilePath, true, getConfiguration())) {
+ for (Object value : new SequenceFileValueIterable<>(sequenceFilePath, true, getConfiguration())) {
numberRecords++;
}
return numberRecords;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/test/java/org/apache/mahout/utils/TestConcatenateVectorsJob.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/TestConcatenateVectorsJob.java b/integration/src/test/java/org/apache/mahout/utils/TestConcatenateVectorsJob.java
index 329b590..98f5ca8 100644
--- a/integration/src/test/java/org/apache/mahout/utils/TestConcatenateVectorsJob.java
+++ b/integration/src/test/java/org/apache/mahout/utils/TestConcatenateVectorsJob.java
@@ -17,9 +17,9 @@
package org.apache.mahout.utils;
+import java.util.ArrayList;
import java.util.List;
-import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Reducer;
@@ -66,7 +66,7 @@ public class TestConcatenateVectorsJob extends MahoutTestCase {
for(int i = 0; i < 3; i++) {
double[] values = DATA_A[i];
- List<VectorWritable> vwList = Lists.newArrayList();
+ List<VectorWritable> vwList = new ArrayList<>();
if (values.length > 0) {
Vector v = new DenseVector(values);
VectorWritable vw = new VectorWritable();
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/test/java/org/apache/mahout/utils/email/MailProcessorTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/email/MailProcessorTest.java b/integration/src/test/java/org/apache/mahout/utils/email/MailProcessorTest.java
index 77307bb..c519f85 100644
--- a/integration/src/test/java/org/apache/mahout/utils/email/MailProcessorTest.java
+++ b/integration/src/test/java/org/apache/mahout/utils/email/MailProcessorTest.java
@@ -17,15 +17,15 @@
package org.apache.mahout.utils.email;
-import com.google.common.base.Charsets;
-import org.apache.mahout.common.MahoutTestCase;
-import org.junit.Test;
-
import java.io.File;
import java.io.StringWriter;
import java.net.URL;
import java.util.regex.Pattern;
+import org.apache.commons.io.Charsets;
+import org.apache.mahout.common.MahoutTestCase;
+import org.junit.Test;
+
public final class MailProcessorTest extends MahoutTestCase {
@Test
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilterTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilterTest.java b/integration/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilterTest.java
index 9df4930..37efc01 100644
--- a/integration/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilterTest.java
+++ b/integration/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilterTest.java
@@ -26,8 +26,7 @@ import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharsetEncoder;
-import com.google.common.base.Charsets;
-
+import org.apache.commons.io.Charsets;
import org.apache.hadoop.util.bloom.BloomFilter;
import org.apache.hadoop.util.bloom.Filter;
import org.apache.hadoop.util.bloom.Key;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/test/java/org/apache/mahout/utils/regex/RegexMapperTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/regex/RegexMapperTest.java b/integration/src/test/java/org/apache/mahout/utils/regex/RegexMapperTest.java
index 728a78c..d569acf 100644
--- a/integration/src/test/java/org/apache/mahout/utils/regex/RegexMapperTest.java
+++ b/integration/src/test/java/org/apache/mahout/utils/regex/RegexMapperTest.java
@@ -60,7 +60,7 @@ public final class RegexMapperTest extends MahoutTestCase {
conf.set(RegexMapper.REGEX, "(\\d+)\\.(\\d+)\\.(\\d+)");
conf.set(RegexMapper.TRANSFORMER_CLASS, URLDecodeTransformer.class.getName());
conf.setStrings(RegexMapper.GROUP_MATCHERS, "1", "3");
- DummyRecordWriter<LongWritable, Text> mapWriter = new DummyRecordWriter<LongWritable, Text>();
+ DummyRecordWriter<LongWritable, Text> mapWriter = new DummyRecordWriter<>();
Mapper<LongWritable, Text, LongWritable, Text>.Context mapContext = DummyRecordWriter
.build(mapper, conf, mapWriter);
@@ -83,7 +83,7 @@ public final class RegexMapperTest extends MahoutTestCase {
conf.set(RegexMapper.REGEX, "(?<=(\\?|&)q=).*?(?=&|$)");
conf.set(RegexMapper.TRANSFORMER_CLASS, URLDecodeTransformer.class.getName());
conf.set(RegexMapper.FORMATTER_CLASS, FPGFormatter.class.getName());
- DummyRecordWriter<LongWritable, Text> mapWriter = new DummyRecordWriter<LongWritable, Text>();
+ DummyRecordWriter<LongWritable, Text> mapWriter = new DummyRecordWriter<>();
Mapper<LongWritable, Text, LongWritable, Text>.Context mapContext = DummyRecordWriter
.build(mapper, conf, mapWriter);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/test/java/org/apache/mahout/utils/regex/RegexUtilsTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/regex/RegexUtilsTest.java b/integration/src/test/java/org/apache/mahout/utils/regex/RegexUtilsTest.java
index 0599028..8ae10a5 100644
--- a/integration/src/test/java/org/apache/mahout/utils/regex/RegexUtilsTest.java
+++ b/integration/src/test/java/org/apache/mahout/utils/regex/RegexUtilsTest.java
@@ -17,14 +17,14 @@
package org.apache.mahout.utils.regex;
-import com.google.common.collect.Lists;
-import org.apache.mahout.common.MahoutTestCase;
-import org.junit.Test;
-
+import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.regex.Pattern;
+import org.apache.mahout.common.MahoutTestCase;
+import org.junit.Test;
+
public final class RegexUtilsTest extends MahoutTestCase {
static final String[] TEST_STRS = {
@@ -53,7 +53,7 @@ public final class RegexUtilsTest extends MahoutTestCase {
assertEquals(res, "import statement 1", res);
pattern = Pattern.compile("(start=1) HTTP");
- Collection<Integer> groupsToKeep = Lists.newArrayList();
+ Collection<Integer> groupsToKeep = new ArrayList<>();
groupsToKeep.add(1);
res = RegexUtils.extract(line, pattern, groupsToKeep, " ", RegexUtils.IDENTITY_TRANSFORMER);
assertEquals(res, "start=1", res);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/test/java/org/apache/mahout/utils/vectors/VectorHelperTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/vectors/VectorHelperTest.java b/integration/src/test/java/org/apache/mahout/utils/vectors/VectorHelperTest.java
index 3fcfbf8..c55fd8d 100644
--- a/integration/src/test/java/org/apache/mahout/utils/vectors/VectorHelperTest.java
+++ b/integration/src/test/java/org/apache/mahout/utils/vectors/VectorHelperTest.java
@@ -17,9 +17,8 @@
package org.apache.mahout.utils.vectors;
-import com.google.common.collect.Iterables;
+import java.util.Random;
-import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -33,8 +32,6 @@ import org.apache.mahout.math.Vector;
import org.junit.Before;
import org.junit.Test;
-import java.util.Random;
-
public final class VectorHelperTest extends MahoutTestCase {
private static final int NUM_DOCS = 100;
@@ -52,28 +49,24 @@ public final class VectorHelperTest extends MahoutTestCase {
inputPathOne = getTestTempFilePath("documents/docs-one.file");
FileSystem fs = FileSystem.get(inputPathOne.toUri(), conf);
- SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, inputPathOne, Text.class, IntWritable.class);
- try {
+ try (SequenceFile.Writer writer =
+ new SequenceFile.Writer(fs, conf, inputPathOne, Text.class, IntWritable.class)) {
Random rd = RandomUtils.getRandom();
for (int i = 0; i < NUM_DOCS; i++) {
// Make all indices higher than dictionary size
writer.append(new Text("Document::ID::" + i), new IntWritable(NUM_DOCS + rd.nextInt(NUM_DOCS)));
}
- } finally {
- Closeables.close(writer, false);
}
inputPathTwo = getTestTempFilePath("documents/docs-two.file");
fs = FileSystem.get(inputPathTwo.toUri(), conf);
- writer = new SequenceFile.Writer(fs, conf, inputPathTwo, Text.class, IntWritable.class);
- try {
+ try (SequenceFile.Writer writer =
+ new SequenceFile.Writer(fs, conf, inputPathTwo, Text.class, IntWritable.class)) {
Random rd = RandomUtils.getRandom();
for (int i = 0; i < NUM_DOCS; i++) {
// Keep indices within number of documents
writer.append(new Text("Document::ID::" + i), new IntWritable(rd.nextInt(NUM_DOCS)));
}
- } finally {
- Closeables.close(writer, false);
}
}
@@ -120,7 +113,7 @@ public final class VectorHelperTest extends MahoutTestCase {
// check if sizeOfNonZeroElementsInVector < maxEntries
assertTrue(VectorHelper.topEntries(v, 9).size() < 9);
// check if sizeOfNonZeroElementsInVector > maxEntries
- assertTrue(VectorHelper.topEntries(v, 5).size() < Iterables.size(v.nonZeroes()));
+ assertTrue(VectorHelper.topEntries(v, 5).size() < v.getNumNonZeroElements());
}
@Test
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java b/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java
index bf7415f..4c7f17a 100644
--- a/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java
+++ b/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java
@@ -25,9 +25,8 @@ import java.util.Iterator;
import java.util.Locale;
import java.util.Map;
-import com.google.common.base.Charsets;
import com.google.common.io.Resources;
-
+import org.apache.commons.io.Charsets;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.RandomAccessSparseVector;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/test/java/org/apache/mahout/utils/vectors/arff/DriverTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/vectors/arff/DriverTest.java b/integration/src/test/java/org/apache/mahout/utils/vectors/arff/DriverTest.java
index 693a8db..7e7623e 100644
--- a/integration/src/test/java/org/apache/mahout/utils/vectors/arff/DriverTest.java
+++ b/integration/src/test/java/org/apache/mahout/utils/vectors/arff/DriverTest.java
@@ -18,9 +18,8 @@ package org.apache.mahout.utils.vectors.arff;
import java.io.IOException;
import java.io.StringWriter;
-import com.google.common.base.Charsets;
import com.google.common.io.Resources;
-
+import org.apache.commons.io.Charsets;
import org.apache.mahout.common.MahoutTestCase;
import org.junit.Test;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/test/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModelTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModelTest.java b/integration/src/test/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModelTest.java
index 1e3b8f8..2867640 100644
--- a/integration/src/test/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModelTest.java
+++ b/integration/src/test/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModelTest.java
@@ -21,7 +21,6 @@ import org.junit.Test;
import java.util.Map;
-
public class MapBackedARFFModelTest extends MahoutTestCase {
@Test
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIteratorTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIteratorTest.java b/integration/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIteratorTest.java
index ca3781b..e76cf70 100644
--- a/integration/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIteratorTest.java
+++ b/integration/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIteratorTest.java
@@ -5,9 +5,9 @@
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -22,8 +22,6 @@ import java.io.StringReader;
import java.io.StringWriter;
import java.util.Iterator;
-import com.google.common.io.Closeables;
-
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.math.Vector;
import org.apache.mahout.utils.vectors.RandomVectorIterable;
@@ -37,19 +35,17 @@ public class CSVVectorIteratorTest extends MahoutTestCase {
public void testCount() throws Exception {
StringWriter sWriter = new StringWriter();
- TextualVectorWriter writer = new TextualVectorWriter(sWriter) {
+ try (TextualVectorWriter writer = new TextualVectorWriter(sWriter) {
@Override
public void write(Vector vector) throws IOException {
String vecStr = VectorHelper.vectorToCSVString(vector, false);
getWriter().write(vecStr);
}
- };
- try {
+ }) {
Iterable<Vector> iter = new RandomVectorIterable(50);
writer.write(iter);
- } finally {
- Closeables.close(writer, false);
}
+
Iterator<Vector> csvIter = new CSVVectorIterator(new StringReader(sWriter.getBuffer().toString()));
int count = 0;
while (csvIter.hasNext()) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/test/java/org/apache/mahout/utils/vectors/io/VectorWriterTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/vectors/io/VectorWriterTest.java b/integration/src/test/java/org/apache/mahout/utils/vectors/io/VectorWriterTest.java
index 96b2704..e2f7032 100644
--- a/integration/src/test/java/org/apache/mahout/utils/vectors/io/VectorWriterTest.java
+++ b/integration/src/test/java/org/apache/mahout/utils/vectors/io/VectorWriterTest.java
@@ -18,11 +18,9 @@
package org.apache.mahout.utils.vectors.io;
import java.io.StringWriter;
+import java.util.ArrayList;
import java.util.Collection;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
-
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -44,11 +42,8 @@ public final class VectorWriterTest extends MahoutTestCase {
Configuration conf = getConfiguration();
FileSystem fs = FileSystem.get(conf);
SequenceFile.Writer seqWriter = new SequenceFile.Writer(fs, conf, path, LongWritable.class, VectorWritable.class);
- SequenceFileVectorWriter writer = new SequenceFileVectorWriter(seqWriter);
- try {
+ try (SequenceFileVectorWriter writer = new SequenceFileVectorWriter(seqWriter)) {
writer.write(new RandomVectorIterable(50));
- } finally {
- Closeables.close(writer, false);
}
long count = HadoopUtil.countRecords(path, conf);
@@ -58,14 +53,11 @@ public final class VectorWriterTest extends MahoutTestCase {
@Test
public void testTextOutputSize() throws Exception {
StringWriter strWriter = new StringWriter();
- VectorWriter writer = new TextualVectorWriter(strWriter);
- try {
- Collection<Vector> vectors = Lists.newArrayList();
+ try (VectorWriter writer = new TextualVectorWriter(strWriter)) {
+ Collection<Vector> vectors = new ArrayList<>();
vectors.add(new DenseVector(new double[]{0.3, 1.5, 4.5}));
vectors.add(new DenseVector(new double[]{1.3, 1.5, 3.5}));
writer.write(vectors);
- } finally {
- Closeables.close(writer, false);
}
String buffer = strWriter.toString();
assertNotNull(buffer);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/math/src/main/java/org/apache/mahout/math/QRDecomposition.java
----------------------------------------------------------------------
diff --git a/math/src/main/java/org/apache/mahout/math/QRDecomposition.java b/math/src/main/java/org/apache/mahout/math/QRDecomposition.java
index 732529a..5a3d918 100644
--- a/math/src/main/java/org/apache/mahout/math/QRDecomposition.java
+++ b/math/src/main/java/org/apache/mahout/math/QRDecomposition.java
@@ -27,7 +27,6 @@ import org.apache.mahout.math.function.Functions;
import java.util.Locale;
-
/**
For an <tt>m x n</tt> matrix <tt>A</tt> with <tt>m >= n</tt>, the QR decomposition is an <tt>m x n</tt>
orthogonal matrix <tt>Q</tt> and an <tt>n x n</tt> upper triangular matrix <tt>R</tt> so that
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/math/src/main/java/org/apache/mahout/math/SingularValueDecomposition.java
----------------------------------------------------------------------
diff --git a/math/src/main/java/org/apache/mahout/math/SingularValueDecomposition.java b/math/src/main/java/org/apache/mahout/math/SingularValueDecomposition.java
index ecb34c6..2abff10 100644
--- a/math/src/main/java/org/apache/mahout/math/SingularValueDecomposition.java
+++ b/math/src/main/java/org/apache/mahout/math/SingularValueDecomposition.java
@@ -589,7 +589,7 @@ public class SingularValueDecomposition implements java.io.Serializable {
}
/**
- * @parameter minSingularValue
+ * @param minSingularValue
* minSingularValue - value below which singular values are ignored (a 0 or negative
* value implies all singular value will be used)
* @return Returns the n × n covariance matrix.
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/math/src/main/java/org/apache/mahout/math/SparseColumnMatrix.java
----------------------------------------------------------------------
diff --git a/math/src/main/java/org/apache/mahout/math/SparseColumnMatrix.java b/math/src/main/java/org/apache/mahout/math/SparseColumnMatrix.java
index 6a75716..c678b35 100644
--- a/math/src/main/java/org/apache/mahout/math/SparseColumnMatrix.java
+++ b/math/src/main/java/org/apache/mahout/math/SparseColumnMatrix.java
@@ -42,8 +42,8 @@ public class SparseColumnMatrix extends AbstractMatrix {
/**
* Construct a matrix of the given cardinality
*
- * @param rows
- * @param columns
+ * @param rows # of rows
+ * @param columns # of columns
*/
public SparseColumnMatrix(int rows, int columns) {
super(rows, columns);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/math/src/main/java/org/apache/mahout/math/decomposer/hebbian/HebbianSolver.java
----------------------------------------------------------------------
diff --git a/math/src/main/java/org/apache/mahout/math/decomposer/hebbian/HebbianSolver.java b/math/src/main/java/org/apache/mahout/math/decomposer/hebbian/HebbianSolver.java
index fe3d11c..5b5cc9b 100644
--- a/math/src/main/java/org/apache/mahout/math/decomposer/hebbian/HebbianSolver.java
+++ b/math/src/main/java/org/apache/mahout/math/decomposer/hebbian/HebbianSolver.java
@@ -17,21 +17,21 @@
package org.apache.mahout.math.decomposer.hebbian;
+import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.Random;
-import com.google.common.collect.Lists;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.math.DenseMatrix;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.Matrix;
+import org.apache.mahout.math.Vector;
import org.apache.mahout.math.decomposer.AsyncEigenVerifier;
import org.apache.mahout.math.decomposer.EigenStatus;
import org.apache.mahout.math.decomposer.SingularVectorVerifier;
-import org.apache.mahout.math.function.TimesFunction;
-import org.apache.mahout.math.Vector;
import org.apache.mahout.math.function.PlusMult;
+import org.apache.mahout.math.function.TimesFunction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -171,7 +171,7 @@ public class HebbianSolver {
int desiredRank) {
int cols = corpus.numCols();
Matrix eigens = new DenseMatrix(desiredRank, cols);
- List<Double> eigenValues = Lists.newArrayList();
+ List<Double> eigenValues = new ArrayList<>();
log.info("Finding {} singular vectors of matrix with {} rows, via Hebbian", desiredRank, corpus.numRows());
/*
* The corpusProjections matrix is a running cache of the residual projection of each corpus vector against all
@@ -278,7 +278,7 @@ public class HebbianSolver {
currentPseudoEigen.assign(previousEigen, new PlusMult(-state.getHelperVector().get(i)));
state.getHelperVector().set(i, 0);
}
- if (DEBUG && currentPseudoEigen.norm(2) > 0) {
+ if (currentPseudoEigen.norm(2) > 0) {
for (int i = 0; i < state.getNumEigensProcessed(); i++) {
Vector previousEigen = previousEigens.viewRow(i);
log.info("dot with previous: {}", previousEigen.dot(currentPseudoEigen) / currentPseudoEigen.norm(2));
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/math/src/main/java/org/apache/mahout/math/decomposer/hebbian/TrainingState.java
----------------------------------------------------------------------
diff --git a/math/src/main/java/org/apache/mahout/math/decomposer/hebbian/TrainingState.java b/math/src/main/java/org/apache/mahout/math/decomposer/hebbian/TrainingState.java
index d1fcb6a..af6c2ef 100644
--- a/math/src/main/java/org/apache/mahout/math/decomposer/hebbian/TrainingState.java
+++ b/math/src/main/java/org/apache/mahout/math/decomposer/hebbian/TrainingState.java
@@ -17,9 +17,9 @@
package org.apache.mahout.math.decomposer.hebbian;
+import java.util.ArrayList;
import java.util.List;
-import com.google.common.collect.Lists;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.Matrix;
import org.apache.mahout.math.Vector;
@@ -44,7 +44,7 @@ public class TrainingState {
trainingIndex = 0;
helperVector = new DenseVector(eigens.numRows());
firstPass = true;
- statusProgress = Lists.newArrayList();
+ statusProgress = new ArrayList<>();
activationNumerator = 0;
activationDenominatorSquared = 0;
numEigensProcessed = 0;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java
----------------------------------------------------------------------
diff --git a/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java b/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java
index 336449c..d2c8434 100644
--- a/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java
+++ b/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java
@@ -18,10 +18,10 @@
package org.apache.mahout.math.stats;
import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
import com.google.common.collect.Multiset;
import com.google.common.collect.Ordering;
+import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.PriorityQueue;
@@ -159,7 +159,7 @@ public final class LogLikelihood {
return Double.compare(tScoredItem.score, tScoredItem1.score);
}
};
- Queue<ScoredItem<T>> best = new PriorityQueue<ScoredItem<T>>(maxReturn + 1, byScoreAscending);
+ Queue<ScoredItem<T>> best = new PriorityQueue<>(maxReturn + 1, byScoreAscending);
for (T t : a.elementSet()) {
compareAndAdd(a, b, maxReturn, threshold, totalA, totalB, best, t);
@@ -175,7 +175,7 @@ public final class LogLikelihood {
}
}
- List<ScoredItem<T>> r = Lists.newArrayList(best);
+ List<ScoredItem<T>> r = new ArrayList<>(best);
Collections.sort(r, byScoreAscending.reverse());
return r;
}
@@ -192,7 +192,7 @@ public final class LogLikelihood {
int kB = b.count(t);
double score = rootLogLikelihoodRatio(kA, totalA - kA, kB, totalB - kB);
if (score >= threshold) {
- ScoredItem<T> x = new ScoredItem<T>(t, score);
+ ScoredItem<T> x = new ScoredItem<>(t, score);
best.add(x);
while (best.size() > maxReturn) {
best.poll();
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/math/src/test/java/org/apache/mahout/common/RandomUtilsTest.java
----------------------------------------------------------------------
diff --git a/math/src/test/java/org/apache/mahout/common/RandomUtilsTest.java b/math/src/test/java/org/apache/mahout/common/RandomUtilsTest.java
index 29ef6f8..973cee5 100644
--- a/math/src/test/java/org/apache/mahout/common/RandomUtilsTest.java
+++ b/math/src/test/java/org/apache/mahout/common/RandomUtilsTest.java
@@ -23,7 +23,7 @@ import org.junit.Test;
import java.util.Random;
/**
- * Tests {@linkRandomUtils}.
+ * Tests {@link RandomUtils}.
*/
public final class RandomUtilsTest extends MahoutTestCase {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java
index 947204d..bc832aa 100644
--- a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java
@@ -20,9 +20,9 @@ package org.apache.mahout.cf.taste.hadoop;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.List;
-import com.google.common.collect.Lists;
import org.apache.hadoop.io.Writable;
import org.apache.mahout.cf.taste.impl.recommender.GenericRecommendedItem;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
@@ -65,7 +65,7 @@ public final class RecommendedItemsWritable implements Writable {
@Override
public void readFields(DataInput in) throws IOException {
int size = in.readInt();
- recommended = Lists.newArrayListWithCapacity(size);
+ recommended = new ArrayList<>(size);
for (int i = 0; i < size; i++) {
long itemID = Varint.readSignedVarLong(in);
float value = in.readFloat();
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/TopItemsQueue.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/TopItemsQueue.java b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/TopItemsQueue.java
index 0f9ea75..8f563b0 100644
--- a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/TopItemsQueue.java
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/TopItemsQueue.java
@@ -17,13 +17,13 @@
package org.apache.mahout.cf.taste.hadoop;
-import com.google.common.collect.Lists;
-import org.apache.lucene.util.PriorityQueue;
-import org.apache.mahout.cf.taste.recommender.RecommendedItem;
-
+import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
+import org.apache.lucene.util.PriorityQueue;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+
public class TopItemsQueue extends PriorityQueue<MutableRecommendedItem> {
private static final long SENTINEL_ID = Long.MIN_VALUE;
@@ -36,7 +36,7 @@ public class TopItemsQueue extends PriorityQueue<MutableRecommendedItem> {
}
public List<RecommendedItem> getTopItems() {
- List<RecommendedItem> recommendedItems = Lists.newArrayListWithCapacity(maxSize);
+ List<RecommendedItem> recommendedItems = new ArrayList<>(maxSize);
while (size() > 0) {
MutableRecommendedItem topItem = pop();
// filter out "sentinel" objects necessary for maintaining an efficient priority queue
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ALS.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ALS.java b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ALS.java
index c5ccf38..1c07b67 100644
--- a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ALS.java
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ALS.java
@@ -17,9 +17,12 @@
package org.apache.mahout.cf.taste.hadoop.als;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
@@ -37,10 +40,6 @@ import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.math.als.AlternatingLeastSquaresSolver;
import org.apache.mahout.math.map.OpenIntObjectHashMap;
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.List;
-
final class ALS {
private ALS() {}
@@ -65,15 +64,10 @@ final class ALS {
LocalFileSystem localFs = FileSystem.getLocal(conf);
for (Path cachedFile : cachedFiles) {
-
- SequenceFile.Reader reader = null;
- try {
- reader = new SequenceFile.Reader(localFs, cachedFile, conf);
+ try (SequenceFile.Reader reader = new SequenceFile.Reader(localFs, cachedFile, conf)){
while (reader.next(rowIndex, row)) {
featureMatrix.put(rowIndex.get(), row.get());
}
- } finally {
- Closeables.close(reader, true);
}
}
@@ -96,7 +90,7 @@ final class ALS {
double lambda, int numFeatures) {
Vector ratings = ratingsWritable.get();
- List<Vector> featureVectors = Lists.newArrayListWithCapacity(ratings.getNumNondefaultElements());
+ List<Vector> featureVectors = new ArrayList<>(ratings.getNumNondefaultElements());
for (Vector.Element e : ratings.nonZeroes()) {
int index = e.index();
featureVectors.add(uOrM.get(index));
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/FactorizationEvaluator.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/FactorizationEvaluator.java b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/FactorizationEvaluator.java
index 3048b77..e69053c 100644
--- a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/FactorizationEvaluator.java
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/FactorizationEvaluator.java
@@ -23,8 +23,7 @@ import java.io.OutputStreamWriter;
import java.util.List;
import java.util.Map;
-import com.google.common.base.Charsets;
-import com.google.common.io.Closeables;
+import org.apache.commons.io.Charsets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
@@ -104,17 +103,12 @@ public class FactorizationEvaluator extends AbstractJob {
return -1;
}
- BufferedWriter writer = null;
- try {
- FileSystem fs = FileSystem.get(getOutputPath().toUri(), getConf());
- FSDataOutputStream outputStream = fs.create(getOutputPath("rmse.txt"));
+ FileSystem fs = FileSystem.get(getOutputPath().toUri(), getConf());
+ FSDataOutputStream outputStream = fs.create(getOutputPath("rmse.txt"));
+ try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(outputStream, Charsets.UTF_8))){
double rmse = computeRmse(errors);
- writer = new BufferedWriter(new OutputStreamWriter(outputStream, Charsets.UTF_8));
writer.write(String.valueOf(rmse));
- } finally {
- Closeables.close(writer, false);
}
-
return 0;
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java
index 624a8c4..2ce9b61 100644
--- a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java
@@ -23,7 +23,6 @@ import java.util.Map;
import java.util.Random;
import com.google.common.base.Preconditions;
-import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileStatus;
@@ -218,11 +217,9 @@ public class ParallelALSFactorizationJob extends AbstractJob {
Random random = RandomUtils.getRandom();
FileSystem fs = FileSystem.get(pathToM(-1).toUri(), getConf());
- SequenceFile.Writer writer = null;
- try {
- writer = new SequenceFile.Writer(fs, getConf(), new Path(pathToM(-1), "part-m-00000"), IntWritable.class,
- VectorWritable.class);
-
+ try (SequenceFile.Writer writer =
+ new SequenceFile.Writer(fs, getConf(), new Path(pathToM(-1), "part-m-00000"),
+ IntWritable.class, VectorWritable.class)) {
IntWritable index = new IntWritable();
VectorWritable featureVector = new VectorWritable();
@@ -236,8 +233,6 @@ public class ParallelALSFactorizationJob extends AbstractJob {
featureVector.set(row);
writer.append(index, featureVector);
}
- } finally {
- Closeables.close(writer, false);
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/item/IDReader.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/item/IDReader.java b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/item/IDReader.java
index b8cf6bb..7797fe9 100644
--- a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/item/IDReader.java
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/item/IDReader.java
@@ -17,12 +17,12 @@
package org.apache.mahout.cf.taste.hadoop.item;
-import com.google.common.collect.Maps;
-import com.google.common.io.Closeables;
import java.io.IOException;
import java.io.InputStream;
+import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.mahout.cf.taste.impl.common.FastIDSet;
@@ -142,10 +142,9 @@ public class IDReader {
}
private Map<Long, FastIDSet> readUserItemFilter(String pathString) throws IOException {
- Map<Long, FastIDSet> result = Maps.newHashMap();
- InputStream in = openFile(pathString);
+ Map<Long, FastIDSet> result = new HashMap<>();
- try {
+ try (InputStream in = openFile(pathString)) {
for (String line : new FileLineIterable(in)) {
try {
String[] tokens = SEPARATOR.split(line);
@@ -157,8 +156,6 @@ public class IDReader {
log.warn("userItemFile line ignored: {}", line);
}
}
- } finally {
- Closeables.close(in, true);
}
return result;
@@ -202,9 +199,8 @@ public class IDReader {
if (pathString != null) {
result = new FastIDSet();
- InputStream in = openFile(pathString);
- try {
+ try (InputStream in = openFile(pathString)){
for (String line : new FileLineIterable(in)) {
try {
result.add(Long.parseLong(line));
@@ -212,8 +208,6 @@ public class IDReader {
log.warn("line ignored: {}", line);
}
}
- } finally {
- Closeables.close(in, true);
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemFilterAsVectorAndPrefsReducer.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemFilterAsVectorAndPrefsReducer.java b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemFilterAsVectorAndPrefsReducer.java
index d9a7d25..4415a55 100644
--- a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemFilterAsVectorAndPrefsReducer.java
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemFilterAsVectorAndPrefsReducer.java
@@ -17,7 +17,10 @@
package org.apache.mahout.cf.taste.hadoop.item;
-import com.google.common.collect.Lists;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils;
import org.apache.mahout.math.RandomAccessSparseVector;
@@ -25,9 +28,6 @@ import org.apache.mahout.math.VarIntWritable;
import org.apache.mahout.math.VarLongWritable;
import org.apache.mahout.math.Vector;
-import java.io.IOException;
-import java.util.List;
-
/**
* we use a neat little trick to explicitly filter items for some users: we inject a NaN summand into the preference
* estimation for those items, which makes {@link org.apache.mahout.cf.taste.hadoop.item.AggregateAndRecommendReducer}
@@ -48,8 +48,8 @@ public class ItemFilterAsVectorAndPrefsReducer
/* artificial NaN summand to exclude this item from the recommendations for all users specified in userIDs */
vector.set(itemIDIndex, Double.NaN);
- List<Long> userIDs = Lists.newArrayList();
- List<Float> prefValues = Lists.newArrayList();
+ List<Long> userIDs = new ArrayList<>();
+ List<Float> prefValues = new ArrayList<>();
for (VarLongWritable userID : values) {
userIDs.add(userID.get());
prefValues.add(1.0f);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToVectorAndPrefReducer.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToVectorAndPrefReducer.java b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToVectorAndPrefReducer.java
index 74d30cb..9167437 100644
--- a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToVectorAndPrefReducer.java
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToVectorAndPrefReducer.java
@@ -18,9 +18,9 @@
package org.apache.mahout.cf.taste.hadoop.item;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.List;
-import com.google.common.collect.Lists;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.mahout.math.VarIntWritable;
import org.apache.mahout.math.Vector;
@@ -35,8 +35,8 @@ public final class ToVectorAndPrefReducer extends
Iterable<VectorOrPrefWritable> values,
Context context) throws IOException, InterruptedException {
- List<Long> userIDs = Lists.newArrayList();
- List<Float> prefValues = Lists.newArrayList();
+ List<Long> userIDs = new ArrayList<>();
+ List<Float> prefValues = new ArrayList<>();
Vector similarityMatrixColumn = null;
for (VectorOrPrefWritable value : values) {
if (value.getVector() == null) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/item/VectorAndPrefsWritable.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/item/VectorAndPrefsWritable.java b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/item/VectorAndPrefsWritable.java
index 495a920..11d496f 100644
--- a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/item/VectorAndPrefsWritable.java
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/item/VectorAndPrefsWritable.java
@@ -20,9 +20,9 @@ package org.apache.mahout.cf.taste.hadoop.item;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.List;
-import com.google.common.collect.Lists;
import org.apache.hadoop.io.Writable;
import org.apache.mahout.math.Varint;
import org.apache.mahout.math.Vector;
@@ -77,8 +77,8 @@ public final class VectorAndPrefsWritable implements Writable {
writable.readFields(in);
vector = writable.get();
int size = Varint.readUnsignedVarInt(in);
- userIDs = Lists.newArrayListWithCapacity(size);
- values = Lists.newArrayListWithCapacity(size);
+ userIDs = new ArrayList<>(size);
+ values = new ArrayList<>(size);
for (int i = 0; i < size; i++) {
userIDs.add(Varint.readSignedVarLong(in));
values.add(in.readFloat());
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/TopSimilarItemsQueue.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/TopSimilarItemsQueue.java b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/TopSimilarItemsQueue.java
index b0ba24d..acb6392 100644
--- a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/TopSimilarItemsQueue.java
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/TopSimilarItemsQueue.java
@@ -17,13 +17,13 @@
package org.apache.mahout.cf.taste.hadoop.similarity.item;
-import com.google.common.collect.Lists;
-import org.apache.lucene.util.PriorityQueue;
-import org.apache.mahout.cf.taste.similarity.precompute.SimilarItem;
-
+import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
+import org.apache.lucene.util.PriorityQueue;
+import org.apache.mahout.cf.taste.similarity.precompute.SimilarItem;
+
public class TopSimilarItemsQueue extends PriorityQueue<SimilarItem> {
private static final long SENTINEL_ID = Long.MIN_VALUE;
@@ -36,7 +36,7 @@ public class TopSimilarItemsQueue extends PriorityQueue<SimilarItem> {
}
public List<SimilarItem> getTopItems() {
- List<SimilarItem> items = Lists.newArrayListWithCapacity(maxSize);
+ List<SimilarItem> items = new ArrayList<>(maxSize);
while (size() > 0) {
SimilarItem topItem = pop();
// filter out "sentinel" objects necessary for maintaining an efficient priority queue
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/RefreshHelper.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/RefreshHelper.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/RefreshHelper.java
index cc91560..3e03108 100644
--- a/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/RefreshHelper.java
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/RefreshHelper.java
@@ -17,13 +17,13 @@
package org.apache.mahout.cf.taste.impl.common;
+import java.util.ArrayList;
import java.util.Collection;
+import java.util.HashSet;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.locks.ReentrantLock;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
import org.apache.mahout.cf.taste.common.Refreshable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -47,7 +47,7 @@ public final class RefreshHelper implements Refreshable {
* encapsulates the containing object's own refresh logic
*/
public RefreshHelper(Callable<?> refreshRunnable) {
- this.dependencies = Lists.newArrayListWithCapacity(3);
+ this.dependencies = new ArrayList<>(3);
this.refreshLock = new ReentrantLock();
this.refreshRunnable = refreshRunnable;
}
@@ -99,7 +99,7 @@ public final class RefreshHelper implements Refreshable {
* param.
*/
public static Collection<Refreshable> buildRefreshed(Collection<Refreshable> currentAlreadyRefreshed) {
- return currentAlreadyRefreshed == null ? Sets.<Refreshable>newHashSetWithExpectedSize(3) : currentAlreadyRefreshed;
+ return currentAlreadyRefreshed == null ? new HashSet<Refreshable>(3) : currentAlreadyRefreshed;
}
/**
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/AbstractDifferenceRecommenderEvaluator.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/AbstractDifferenceRecommenderEvaluator.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/AbstractDifferenceRecommenderEvaluator.java
index f6598f3..f926f18 100644
--- a/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/AbstractDifferenceRecommenderEvaluator.java
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/AbstractDifferenceRecommenderEvaluator.java
@@ -17,6 +17,7 @@
package org.apache.mahout.cf.taste.impl.eval;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
@@ -29,7 +30,7 @@ import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
-import com.google.common.collect.Lists;
+import com.google.common.base.Preconditions;
import org.apache.mahout.cf.taste.common.NoSuchItemException;
import org.apache.mahout.cf.taste.common.NoSuchUserException;
import org.apache.mahout.cf.taste.common.TasteException;
@@ -51,8 +52,6 @@ import org.apache.mahout.common.RandomUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.base.Preconditions;
-
/**
* Abstract superclass of a couple implementations, providing shared functionality.
*/
@@ -142,12 +141,12 @@ public abstract class AbstractDifferenceRecommenderEvaluator implements Recommen
Preference newPref = new GenericPreference(userID, prefs.getItemID(i), prefs.getValue(i));
if (random.nextDouble() < trainingPercentage) {
if (oneUserTrainingPrefs == null) {
- oneUserTrainingPrefs = Lists.newArrayListWithCapacity(3);
+ oneUserTrainingPrefs = new ArrayList<>(3);
}
oneUserTrainingPrefs.add(newPref);
} else {
if (oneUserTestPrefs == null) {
- oneUserTestPrefs = Lists.newArrayListWithCapacity(3);
+ oneUserTestPrefs = new ArrayList<>(3);
}
oneUserTestPrefs.add(newPref);
}
@@ -173,7 +172,7 @@ public abstract class AbstractDifferenceRecommenderEvaluator implements Recommen
private double getEvaluation(FastByIDMap<PreferenceArray> testPrefs, Recommender recommender)
throws TasteException {
reset();
- Collection<Callable<Void>> estimateCallables = Lists.newArrayList();
+ Collection<Callable<Void>> estimateCallables = new ArrayList<>();
AtomicInteger noEstimateCounter = new AtomicInteger();
for (Map.Entry<Long,PreferenceArray> entry : testPrefs.entrySet()) {
estimateCallables.add(
@@ -217,7 +216,7 @@ public abstract class AbstractDifferenceRecommenderEvaluator implements Recommen
private static Collection<Callable<Void>> wrapWithStatsCallables(Iterable<Callable<Void>> callables,
AtomicInteger noEstimateCounter,
RunningAverageAndStdDev timing) {
- Collection<Callable<Void>> wrapped = Lists.newArrayList();
+ Collection<Callable<Void>> wrapped = new ArrayList<>();
int count = 0;
for (Callable<Void> callable : callables) {
boolean logStats = count++ % 1000 == 0; // log every 1000 or so iterations
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRelevantItemsDataSplitter.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRelevantItemsDataSplitter.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRelevantItemsDataSplitter.java
index b0ef18c..f4e4522 100644
--- a/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRelevantItemsDataSplitter.java
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRelevantItemsDataSplitter.java
@@ -17,7 +17,6 @@
package org.apache.mahout.cf.taste.impl.eval;
-import com.google.common.collect.Lists;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.eval.RelevantItemsDataSplitter;
import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
@@ -27,6 +26,7 @@ import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.Preference;
import org.apache.mahout.cf.taste.model.PreferenceArray;
+import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
@@ -62,7 +62,7 @@ public final class GenericRelevantItemsDataSplitter implements RelevantItemsData
// If we're dealing with the very user that we're evaluating for precision/recall,
if (userID == otherUserID) {
// then must remove all the test IDs, the "relevant" item IDs
- List<Preference> prefs2 = Lists.newArrayListWithCapacity(prefs2Array.length());
+ List<Preference> prefs2 = new ArrayList<>(prefs2Array.length());
for (Preference pref : prefs2Array) {
prefs2.add(pref);
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/LoadEvaluator.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/LoadEvaluator.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/LoadEvaluator.java
index abb5ed8..2d27a37 100644
--- a/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/LoadEvaluator.java
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/LoadEvaluator.java
@@ -17,11 +17,11 @@
package org.apache.mahout.cf.taste.impl.eval;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.concurrent.Callable;
import java.util.concurrent.atomic.AtomicInteger;
-import com.google.common.collect.Lists;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.FullRunningAverageAndStdDev;
import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
@@ -48,7 +48,7 @@ public final class LoadEvaluator {
LongPrimitiveIterator userSampler =
SamplingLongPrimitiveIterator.maybeWrapIterator(dataModel.getUserIDs(), sampleRate);
recommender.recommend(userSampler.next(), howMany); // Warm up
- Collection<Callable<Void>> callables = Lists.newArrayList();
+ Collection<Callable<Void>> callables = new ArrayList<>();
while (userSampler.hasNext()) {
callables.add(new LoadCallable(recommender, userSampler.next()));
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractIDMigrator.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractIDMigrator.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractIDMigrator.java
index 94f2d0b..6efa6fa 100644
--- a/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractIDMigrator.java
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractIDMigrator.java
@@ -19,10 +19,9 @@ package org.apache.mahout.cf.taste.impl.model;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
-
import java.util.Collection;
-import com.google.common.base.Charsets;
+import org.apache.commons.io.Charsets;
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.model.IDMigrator;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
index da6845e..aa8e8d2 100644
--- a/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
@@ -21,6 +21,7 @@ import java.io.File;
import java.io.FileFilter;
import java.io.FileNotFoundException;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
@@ -28,6 +29,9 @@ import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.locks.ReentrantLock;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Splitter;
+import com.google.common.io.Closeables;
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
@@ -45,11 +49,6 @@ import org.apache.mahout.common.iterator.FileLineIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.base.Preconditions;
-import com.google.common.base.Splitter;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
-
/**
* <p>
* A {@link DataModel} backed by a delimited file. This class expects a file where each line
@@ -207,7 +206,7 @@ public class FileDataModel extends AbstractDataModel {
throw new IllegalArgumentException("Did not find a delimiter(pattern) in first line");
}
}
- List<String> firstLineSplit = Lists.newArrayList();
+ List<String> firstLineSplit = new ArrayList<>();
for (String token : delimiterPattern.split(firstLine)) {
firstLineSplit.add(token);
}
@@ -526,7 +525,7 @@ public class FileDataModel extends AbstractDataModel {
if (!exists) {
if (prefs == null) {
- prefs = Lists.newArrayListWithCapacity(2);
+ prefs = new ArrayList<>(2);
((FastByIDMap<Collection<Preference>>) data).put(userID, prefs);
}
prefs.add(new GenericPreference(userID, itemID, preferenceValue));
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java
index 413db4b..6dc8aa5 100644
--- a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java
@@ -17,7 +17,6 @@
package org.apache.mahout.cf.taste.impl.recommender;
-import org.apache.mahout.cf.taste.recommender.CandidateItemsStrategy;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
@@ -31,6 +30,7 @@ import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
import org.apache.mahout.cf.taste.impl.common.RunningAverage;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.apache.mahout.cf.taste.recommender.CandidateItemsStrategy;
import org.apache.mahout.cf.taste.recommender.IDRescorer;
import org.apache.mahout.cf.taste.recommender.ItemBasedRecommender;
import org.apache.mahout.cf.taste.recommender.MostSimilarItemsCandidateItemsStrategy;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/RandomRecommender.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/RandomRecommender.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/RandomRecommender.java
index ef11f0d..08aa5ae 100644
--- a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/RandomRecommender.java
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/RandomRecommender.java
@@ -17,11 +17,11 @@
package org.apache.mahout.cf.taste.impl.recommender;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Random;
-import com.google.common.collect.Lists;
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
@@ -68,7 +68,7 @@ public final class RandomRecommender extends AbstractRecommender {
throws TasteException {
DataModel dataModel = getDataModel();
int numItems = dataModel.getNumItems();
- List<RecommendedItem> result = Lists.newArrayListWithCapacity(howMany);
+ List<RecommendedItem> result = new ArrayList<>(howMany);
while (result.size() < howMany) {
LongPrimitiveIterator it = dataModel.getItemIDs();
it.skip(random.nextInt(numItems));
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java
index 3c27145..f7b4385 100644
--- a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java
@@ -17,13 +17,14 @@
package org.apache.mahout.cf.taste.impl.recommender;
+import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.PriorityQueue;
import java.util.Queue;
-import com.google.common.collect.Lists;
+import com.google.common.base.Preconditions;
import org.apache.mahout.cf.taste.common.NoSuchItemException;
import org.apache.mahout.cf.taste.common.NoSuchUserException;
import org.apache.mahout.cf.taste.common.TasteException;
@@ -33,8 +34,6 @@ import org.apache.mahout.cf.taste.impl.similarity.GenericUserSimilarity;
import org.apache.mahout.cf.taste.recommender.IDRescorer;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
-import com.google.common.base.Preconditions;
-
/**
* <p>
* A simple class that refactors the "find top N things" logic that is used in several places.
@@ -83,7 +82,7 @@ public final class TopItems {
if (size == 0) {
return Collections.emptyList();
}
- List<RecommendedItem> result = Lists.newArrayListWithCapacity(size);
+ List<RecommendedItem> result = new ArrayList<>(size);
result.addAll(topItems);
Collections.sort(result, ByValueRecommendedItemComparator.getInstance());
return result;
@@ -123,7 +122,7 @@ public final class TopItems {
if (size == 0) {
return NO_IDS;
}
- List<SimilarUser> sorted = Lists.newArrayListWithCapacity(size);
+ List<SimilarUser> sorted = new ArrayList<>(size);
sorted.addAll(topUsers);
Collections.sort(sorted);
long[] result = new long[size];
@@ -168,7 +167,7 @@ public final class TopItems {
if (size == 0) {
return Collections.emptyList();
}
- List<GenericItemSimilarity.ItemItemSimilarity> result = Lists.newArrayListWithCapacity(size);
+ List<GenericItemSimilarity.ItemItemSimilarity> result = new ArrayList<>(size);
result.addAll(topSimilarities);
Collections.sort(result);
return result;
@@ -199,7 +198,7 @@ public final class TopItems {
if (size == 0) {
return Collections.emptyList();
}
- List<GenericUserSimilarity.UserUserSimilarity> result = Lists.newArrayListWithCapacity(size);
+ List<GenericUserSimilarity.UserUserSimilarity> result = new ArrayList<>(size);
result.addAll(topSimilarities);
Collections.sort(result);
return result;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ALSWRFactorizer.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ALSWRFactorizer.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ALSWRFactorizer.java
index 33be59d..0ba5139 100644
--- a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ALSWRFactorizer.java
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ALSWRFactorizer.java
@@ -17,7 +17,13 @@
package org.apache.mahout.cf.taste.impl.recommender.svd;
-import com.google.common.collect.Lists;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
@@ -31,17 +37,10 @@ import org.apache.mahout.math.SequentialAccessSparseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.als.AlternatingLeastSquaresSolver;
import org.apache.mahout.math.als.ImplicitFeedbackAlternatingLeastSquaresSolver;
+import org.apache.mahout.math.map.OpenIntObjectHashMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.apache.mahout.math.map.OpenIntObjectHashMap;
-
-import java.util.List;
-import java.util.Random;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
-
/**
* factorizes the rating matrix using "Alternating-Least-Squares with Weighted-λ-Regularization" as described in
* <a href="http://www.hpl.hp.com/personal/Robert_Schreiber/papers/2008%20AAIM%20Netflix/netflix_aaim08(submitted).pdf">
@@ -191,7 +190,7 @@ public class ALSWRFactorizer extends AbstractFactorizer {
queue.execute(new Runnable() {
@Override
public void run() {
- List<Vector> featureVectors = Lists.newArrayList();
+ List<Vector> featureVectors = new ArrayList<>();
while (itemIDsFromUser.hasNext()) {
long itemID = itemIDsFromUser.nextLong();
featureVectors.add(features.getItemFeatureColumn(itemIndex(itemID)));
@@ -229,7 +228,7 @@ public class ALSWRFactorizer extends AbstractFactorizer {
queue.execute(new Runnable() {
@Override
public void run() {
- List<Vector> featureVectors = Lists.newArrayList();
+ List<Vector> featureVectors = new ArrayList<>();
for (Preference pref : itemPrefs) {
long userID = pref.getUserID();
featureVectors.add(features.getUserFeatureColumn(userIndex(userID)));
@@ -272,7 +271,7 @@ public class ALSWRFactorizer extends AbstractFactorizer {
//TODO find a way to get rid of the object overhead here
protected OpenIntObjectHashMap<Vector> itemFeaturesMapping(LongPrimitiveIterator itemIDs, int numItems,
double[][] featureMatrix) {
- OpenIntObjectHashMap<Vector> mapping = new OpenIntObjectHashMap<Vector>(numItems);
+ OpenIntObjectHashMap<Vector> mapping = new OpenIntObjectHashMap<>(numItems);
while (itemIDs.hasNext()) {
long itemID = itemIDs.next();
int itemIndex = itemIndex(itemID);
@@ -284,7 +283,7 @@ public class ALSWRFactorizer extends AbstractFactorizer {
protected OpenIntObjectHashMap<Vector> userFeaturesMapping(LongPrimitiveIterator userIDs, int numUsers,
double[][] featureMatrix) {
- OpenIntObjectHashMap<Vector> mapping = new OpenIntObjectHashMap<Vector>(numUsers);
+ OpenIntObjectHashMap<Vector> mapping = new OpenIntObjectHashMap<>(numUsers);
while (userIDs.hasNext()) {
long userID = userIDs.next();
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/FilePersistenceStrategy.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/FilePersistenceStrategy.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/FilePersistenceStrategy.java
index d1d23a5..08c038a 100644
--- a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/FilePersistenceStrategy.java
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/FilePersistenceStrategy.java
@@ -30,7 +30,6 @@ import java.io.IOException;
import java.util.Map;
import com.google.common.base.Preconditions;
-import com.google.common.io.Closeables;
import org.apache.mahout.cf.taste.common.NoSuchItemException;
import org.apache.mahout.cf.taste.common.NoSuchUserException;
import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
@@ -57,25 +56,17 @@ public class FilePersistenceStrategy implements PersistenceStrategy {
log.info("{} does not yet exist, no factorization found", file.getAbsolutePath());
return null;
}
- DataInputStream in = null;
- try {
+ try (DataInputStream in = new DataInputStream(new BufferedInputStream(new FileInputStream(file)))){
log.info("Reading factorization from {}...", file.getAbsolutePath());
- in = new DataInputStream(new BufferedInputStream(new FileInputStream(file)));
return readBinary(in);
- } finally {
- Closeables.close(in, true);
}
}
@Override
public void maybePersist(Factorization factorization) throws IOException {
- DataOutputStream out = null;
- try {
+ try (DataOutputStream out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(file)))){
log.info("Writing factorization to {}...", file.getAbsolutePath());
- out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(file)));
writeBinary(factorization, out);
- } finally {
- Closeables.close(out, false);
}
}
@@ -118,7 +109,7 @@ public class FilePersistenceStrategy implements PersistenceStrategy {
int numUsers = in.readInt();
int numItems = in.readInt();
- FastByIDMap<Integer> userIDMapping = new FastByIDMap<Integer>(numUsers);
+ FastByIDMap<Integer> userIDMapping = new FastByIDMap<>(numUsers);
double[][] userFeatures = new double[numUsers][numFeatures];
for (int n = 0; n < numUsers; n++) {
@@ -130,7 +121,7 @@ public class FilePersistenceStrategy implements PersistenceStrategy {
}
}
- FastByIDMap<Integer> itemIDMapping = new FastByIDMap<Integer>(numItems);
+ FastByIDMap<Integer> itemIDMapping = new FastByIDMap<>(numItems);
double[][] itemFeatures = new double[numItems][numFeatures];
for (int n = 0; n < numItems; n++) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDPlusPlusFactorizer.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDPlusPlusFactorizer.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDPlusPlusFactorizer.java
index 8967134..20446f8 100644
--- a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDPlusPlusFactorizer.java
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDPlusPlusFactorizer.java
@@ -17,14 +17,14 @@
package org.apache.mahout.cf.taste.impl.recommender.svd;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
+import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.FastIDSet;
import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
+import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.common.RandomUtils;
-import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.model.DataModel;
+import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
@@ -80,13 +80,13 @@ public final class SVDPlusPlusFactorizer extends RatingSGDFactorizer {
}
/* get internal item IDs which we will need several times */
- itemsByUser = Maps.newHashMap();
+ itemsByUser = new HashMap<>();
LongPrimitiveIterator userIDs = dataModel.getUserIDs();
while (userIDs.hasNext()) {
long userId = userIDs.nextLong();
int userIndex = userIndex(userId);
FastIDSet itemIDsFromUser = dataModel.getItemIDsFromUser(userId);
- List<Integer> itemIndexes = Lists.newArrayListWithCapacity(itemIDsFromUser.size());
+ List<Integer> itemIndexes = new ArrayList<>(itemIDsFromUser.size());
itemsByUser.put(userIndex, itemIndexes);
for (long itemID2 : itemIDsFromUser) {
int i2 = itemIndex(itemID2);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/FileSimilarItemsWriter.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/FileSimilarItemsWriter.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/FileSimilarItemsWriter.java
index ca0e0b2..631ec9b 100644
--- a/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/FileSimilarItemsWriter.java
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/FileSimilarItemsWriter.java
@@ -23,8 +23,8 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
-import com.google.common.base.Charsets;
import com.google.common.io.Closeables;
+import org.apache.commons.io.Charsets;
import org.apache.mahout.cf.taste.similarity.precompute.SimilarItem;
import org.apache.mahout.cf.taste.similarity.precompute.SimilarItems;
import org.apache.mahout.cf.taste.similarity.precompute.SimilarItemsWriter;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java b/mr/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java
index 0baa4bf..73ba521 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java
@@ -19,17 +19,17 @@ package org.apache.mahout.classifier;
import java.util.Collection;
import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
import java.util.Map;
+import com.google.common.base.Preconditions;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.math3.stat.descriptive.moment.Mean;
import org.apache.mahout.cf.taste.impl.common.FullRunningAverageAndStdDev;
import org.apache.mahout.cf.taste.impl.common.RunningAverageAndStdDev;
import org.apache.mahout.math.DenseMatrix;
import org.apache.mahout.math.Matrix;
-
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Maps;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -42,7 +42,7 @@ import org.slf4j.LoggerFactory;
*/
public class ConfusionMatrix {
private static final Logger LOG = LoggerFactory.getLogger(ConfusionMatrix.class);
- private final Map<String,Integer> labelMap = Maps.newLinkedHashMap();
+ private final Map<String,Integer> labelMap = new LinkedHashMap<>();
private final int[][] confusionMatrix;
private int samples = 0;
private String defaultLabel = "unknown";
@@ -333,7 +333,7 @@ public class ConfusionMatrix {
m.set(r, c, confusionMatrix[r][c]);
}
}
- Map<String,Integer> labels = Maps.newHashMap();
+ Map<String,Integer> labels = new HashMap<>();
for (Map.Entry<String, Integer> entry : labelMap.entrySet()) {
labels.put(entry.getKey(), entry.getValue());
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/RegressionResultAnalyzer.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/RegressionResultAnalyzer.java b/mr/src/main/java/org/apache/mahout/classifier/RegressionResultAnalyzer.java
index 5d8b9ed..35c11ee 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/RegressionResultAnalyzer.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/RegressionResultAnalyzer.java
@@ -19,10 +19,10 @@ package org.apache.mahout.classifier;
import java.text.DecimalFormat;
import java.text.NumberFormat;
+import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
-import com.google.common.collect.Lists;
import org.apache.commons.lang3.StringUtils;
/**
@@ -56,7 +56,7 @@ public class RegressionResultAnalyzer {
*/
public void addInstance(double actual, double result) {
if (results == null) {
- results = Lists.newArrayList();
+ results = new ArrayList<>();
}
results.add(new Result(actual, result));
}