You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sm...@apache.org on 2015/04/06 04:22:17 UTC
[5/5] mahout git commit: MAHOUT-1652: Java 7 upgrade,
this closes apache/mahout#112
MAHOUT-1652: Java 7 upgrade, this closes apache/mahout#112
Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/85f9ece6
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/85f9ece6
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/85f9ece6
Branch: refs/heads/master
Commit: 85f9ece660407fe68ad61560ebd723a57ec761e6
Parents: 53e5ada
Author: Suneel Marthi <su...@gmail.com>
Authored: Sun Apr 5 22:21:29 2015 -0400
Committer: Suneel Marthi <su...@gmail.com>
Committed: Sun Apr 5 22:24:06 2015 -0400
----------------------------------------------------------------------
.../benchmark/ClosestCentroidBenchmark.java | 2 -
.../benchmark/SerializationBenchmark.java | 31 ++----
.../mahout/benchmark/VectorBenchmarks.java | 45 ++++----
.../taste/impl/model/hbase/HBaseDataModel.java | 42 ++++---
.../impl/model/jdbc/AbstractJDBCDataModel.java | 29 +++--
.../impl/model/mongodb/MongoDBDataModel.java | 54 +++++----
.../jdbc/MySQLJDBCItemSimilarity.java | 1 -
.../classifier/ConfusionMatrixDumper.java | 21 ++--
.../mahout/clustering/cdbw/CDbwEvaluator.java | 15 ++-
.../clustering/conversion/InputMapper.java | 16 +--
.../clustering/evaluation/ClusterEvaluator.java | 11 +-
.../evaluation/RepresentativePointsDriver.java | 28 ++---
.../evaluation/RepresentativePointsMapper.java | 10 +-
.../mahout/clustering/lda/LDAPrintTopics.java | 15 ++-
.../apache/mahout/text/LuceneIndexHelper.java | 3 +-
.../mahout/text/LuceneSegmentInputFormat.java | 10 +-
.../mahout/text/LuceneSegmentRecordReader.java | 1 -
.../mahout/text/LuceneStorageConfiguration.java | 25 +++--
.../text/MailArchivesClusteringAnalyzer.java | 1 +
.../mahout/text/PrefixAdditionFilter.java | 8 +-
.../mahout/text/SequenceFilesFromDirectory.java | 10 +-
.../SequenceFilesFromLuceneStorageDriver.java | 15 ++-
.../SequenceFilesFromLuceneStorageMapper.java | 9 +-
.../text/SequenceFilesFromMailArchives.java | 18 ++-
.../mahout/text/WholeFileRecordReader.java | 20 ++--
.../mahout/text/WikipediaToSequenceFile.java | 4 +-
.../text/wikipedia/WikipediaAnalyzer.java | 1 +
.../WikipediaDatasetCreatorDriver.java | 4 +-
.../WikipediaDatasetCreatorMapper.java | 25 ++---
.../mahout/text/wikipedia/WikipediaMapper.java | 6 +-
.../text/wikipedia/WikipediaXmlSplitter.java | 8 +-
.../mahout/text/wikipedia/XmlInputFormat.java | 6 +-
.../mahout/utils/ConcatenateVectorsJob.java | 8 +-
.../org/apache/mahout/utils/MatrixDumper.java | 2 +-
.../apache/mahout/utils/SequenceFileDumper.java | 11 +-
.../org/apache/mahout/utils/SplitInput.java | 55 ++++-----
.../org/apache/mahout/utils/SplitInputJob.java | 1 +
.../mahout/utils/clustering/ClusterDumper.java | 20 ++--
.../nlp/collocations/llr/BloomTokenFilter.java | 2 +-
.../mahout/utils/regex/AnalyzerTransformer.java | 17 +--
.../apache/mahout/utils/regex/RegexMapper.java | 14 +--
.../apache/mahout/utils/vectors/RowIdJob.java | 34 ++----
.../mahout/utils/vectors/VectorDumper.java | 6 +-
.../mahout/utils/vectors/VectorHelper.java | 15 ++-
.../mahout/utils/vectors/arff/ARFFIterator.java | 6 +-
.../mahout/utils/vectors/arff/ARFFModel.java | 1 -
.../utils/vectors/arff/ARFFVectorIterable.java | 8 +-
.../mahout/utils/vectors/arff/Driver.java | 111 +++++++++----------
.../utils/vectors/arff/MapBackedARFFModel.java | 12 +-
.../utils/vectors/lucene/CachedTermInfo.java | 12 +-
.../utils/vectors/lucene/ClusterLabels.java | 21 ++--
.../mahout/utils/vectors/lucene/Driver.java | 39 +++----
.../utils/vectors/lucene/LuceneIterator.java | 10 +-
.../mahout/clustering/TestClusterDumper.java | 23 ++--
.../clustering/cdbw/TestCDbwEvaluator.java | 25 ++---
.../mahout/text/AbstractLuceneStorageTest.java | 15 ++-
.../text/LuceneSegmentRecordReaderTest.java | 25 +++--
.../text/LuceneStorageConfigurationTest.java | 11 +-
...equenceFilesFromLuceneStorageDriverTest.java | 32 ++----
...SequenceFilesFromLuceneStorageMRJobTest.java | 15 +--
.../SequenceFilesFromLuceneStorageTest.java | 70 ++++++------
.../text/SequenceFilesFromMailArchivesTest.java | 19 +---
.../text/TestSequenceFilesFromDirectory.java | 58 +++-------
.../mahout/text/doc/NumericFieldDocument.java | 1 -
.../org/apache/mahout/utils/SplitInputTest.java | 29 ++---
.../mahout/utils/TestConcatenateVectorsJob.java | 4 +-
.../mahout/utils/email/MailProcessorTest.java | 8 +-
.../collocations/llr/BloomTokenFilterTest.java | 3 +-
.../mahout/utils/regex/RegexMapperTest.java | 4 +-
.../mahout/utils/regex/RegexUtilsTest.java | 10 +-
.../mahout/utils/vectors/VectorHelperTest.java | 19 +---
.../vectors/arff/ARFFVectorIterableTest.java | 3 +-
.../mahout/utils/vectors/arff/DriverTest.java | 3 +-
.../vectors/arff/MapBackedARFFModelTest.java | 1 -
.../vectors/csv/CSVVectorIteratorTest.java | 16 +--
.../utils/vectors/io/VectorWriterTest.java | 16 +--
.../org/apache/mahout/math/QRDecomposition.java | 1 -
.../mahout/math/SingularValueDecomposition.java | 2 +-
.../apache/mahout/math/SparseColumnMatrix.java | 4 +-
.../math/decomposer/hebbian/HebbianSolver.java | 10 +-
.../math/decomposer/hebbian/TrainingState.java | 4 +-
.../apache/mahout/math/stats/LogLikelihood.java | 8 +-
.../apache/mahout/common/RandomUtilsTest.java | 2 +-
.../taste/hadoop/RecommendedItemsWritable.java | 4 +-
.../mahout/cf/taste/hadoop/TopItemsQueue.java | 10 +-
.../apache/mahout/cf/taste/hadoop/als/ALS.java | 20 ++--
.../hadoop/als/FactorizationEvaluator.java | 14 +--
.../hadoop/als/ParallelALSFactorizationJob.java | 11 +-
.../mahout/cf/taste/hadoop/item/IDReader.java | 16 +--
.../item/ItemFilterAsVectorAndPrefsReducer.java | 12 +-
.../hadoop/item/ToVectorAndPrefReducer.java | 6 +-
.../hadoop/item/VectorAndPrefsWritable.java | 6 +-
.../similarity/item/TopSimilarItemsQueue.java | 10 +-
.../cf/taste/impl/common/RefreshHelper.java | 8 +-
.../AbstractDifferenceRecommenderEvaluator.java | 13 +--
.../eval/GenericRelevantItemsDataSplitter.java | 4 +-
.../cf/taste/impl/eval/LoadEvaluator.java | 4 +-
.../cf/taste/impl/model/AbstractIDMigrator.java | 3 +-
.../cf/taste/impl/model/file/FileDataModel.java | 13 +--
.../GenericItemBasedRecommender.java | 2 +-
.../impl/recommender/RandomRecommender.java | 4 +-
.../cf/taste/impl/recommender/TopItems.java | 13 +--
.../impl/recommender/svd/ALSWRFactorizer.java | 25 ++---
.../svd/FilePersistenceStrategy.java | 17 +--
.../recommender/svd/SVDPlusPlusFactorizer.java | 12 +-
.../precompute/FileSimilarItemsWriter.java | 2 +-
.../mahout/classifier/ConfusionMatrix.java | 10 +-
.../classifier/RegressionResultAnalyzer.java | 4 +-
.../apache/mahout/classifier/df/DFUtils.java | 54 ++++-----
.../mahout/classifier/df/DecisionForest.java | 10 +-
.../df/builder/DecisionTreeBuilder.java | 4 +-
.../apache/mahout/classifier/df/data/Data.java | 20 ++--
.../mahout/classifier/df/data/DataLoader.java | 11 +-
.../mahout/classifier/df/data/DataUtils.java | 4 +-
.../mahout/classifier/df/data/Dataset.java | 12 +-
.../classifier/df/data/DescriptorUtils.java | 4 +-
.../classifier/df/mapreduce/Classifier.java | 12 +-
.../df/mapreduce/inmem/InMemBuilder.java | 16 +--
.../df/mapreduce/inmem/InMemInputFormat.java | 18 +--
.../df/mapreduce/partial/Step1Mapper.java | 4 +-
.../classifier/df/ref/SequentialBuilder.java | 4 +-
.../mahout/classifier/df/tools/Describe.java | 12 +-
.../mahout/classifier/mlp/NeuralNetwork.java | 50 ++++-----
.../classifier/mlp/RunMultilayerPerceptron.java | 24 ++--
.../mlp/TrainMultilayerPerceptron.java | 59 ++++------
.../classifier/naivebayes/BayesUtils.java | 21 +---
.../classifier/naivebayes/NaiveBayesModel.java | 16 +--
.../naivebayes/test/TestNaiveBayesDriver.java | 11 +-
.../sequencelearning/hmm/BaumWelchTrainer.java | 10 +-
.../sequencelearning/hmm/HmmUtils.java | 7 +-
.../hmm/RandomSequenceGenerator.java | 14 +--
.../sequencelearning/hmm/ViterbiEvaluator.java | 19 ++--
.../sgd/AdaptiveLogisticRegression.java | 16 +--
.../mahout/classifier/sgd/CrossFoldLearner.java | 4 +-
.../mahout/classifier/sgd/CsvRecordFactory.java | 14 ++-
.../mahout/classifier/sgd/GradientMachine.java | 4 +-
.../mahout/classifier/sgd/ModelDissector.java | 14 +--
.../mahout/classifier/sgd/ModelSerializer.java | 15 +--
.../mahout/classifier/sgd/RankingGradient.java | 4 +-
.../mahout/clustering/AbstractCluster.java | 9 +-
.../mahout/clustering/ClusteringUtils.java | 3 +-
.../classify/ClusterClassificationDriver.java | 10 +-
.../classify/ClusterClassificationMapper.java | 10 +-
.../clustering/classify/ClusterClassifier.java | 91 +++++++--------
.../fuzzykmeans/FuzzyKMeansDriver.java | 4 +-
.../mahout/clustering/iterator/CIReducer.java | 4 +-
.../iterator/FuzzyKMeansClusteringPolicy.java | 7 +-
.../mahout/clustering/kmeans/KMeansDriver.java | 4 +-
.../clustering/kmeans/RandomSeedGenerator.java | 13 +--
.../mahout/clustering/lda/cvb/CVB0Driver.java | 12 +-
.../cvb/InMemoryCollapsedVariationalBayes0.java | 45 ++------
.../mahout/clustering/lda/cvb/ModelTrainer.java | 26 ++---
.../mahout/clustering/lda/cvb/TopicModel.java | 28 ++---
.../mahout/clustering/spectral/VectorCache.java | 15 +--
.../spectral/kmeans/EigenSeedGenerator.java | 16 +--
.../streaming/cluster/BallKMeans.java | 12 +-
.../streaming/cluster/StreamingKMeans.java | 4 +-
.../mapreduce/StreamingKMeansDriver.java | 6 +-
.../mapreduce/StreamingKMeansMapper.java | 4 +-
.../mapreduce/StreamingKMeansThread.java | 4 +-
.../mapreduce/StreamingKMeansUtilsMR.java | 17 +--
.../streaming/tools/ResplitSequenceFiles.java | 4 +-
.../ClusterOutputPostProcessor.java | 12 +-
.../org/apache/mahout/common/AbstractJob.java | 24 ++--
.../apache/mahout/common/CommandLineUtil.java | 2 +-
.../org/apache/mahout/common/HadoopUtil.java | 17 +--
.../distance/MahalanobisDistanceMeasure.java | 27 ++---
.../distance/MinkowskiDistanceMeasure.java | 4 +-
.../distance/WeightedDistanceMeasure.java | 10 +-
.../WeightedEuclideanDistanceMeasure.java | 1 -
.../org/apache/mahout/driver/MahoutDriver.java | 10 +-
.../apache/mahout/ep/EvolutionaryProcess.java | 17 +--
172 files changed, 1055 insertions(+), 1487 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java b/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java
index d28ee5a..b1c2ded 100644
--- a/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java
+++ b/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java
@@ -27,8 +27,6 @@ import org.apache.mahout.math.SparseMatrix;
import org.apache.mahout.math.Vector;
public class ClosestCentroidBenchmark {
- public static final String SERIALIZE = "Serialize";
- public static final String DESERIALIZE = "Deserialize";
private final VectorBenchmarks mark;
public ClosestCentroidBenchmark(VectorBenchmarks mark) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java b/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java
index 10fcd11..cd403c2 100644
--- a/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java
+++ b/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java
@@ -17,9 +17,6 @@
package org.apache.mahout.benchmark;
-import java.io.IOException;
-
-import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -30,6 +27,8 @@ import org.apache.mahout.common.TimingStatistics;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterator;
import org.apache.mahout.math.VectorWritable;
+import java.io.IOException;
+
import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_VECTOR;
import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_SPARSE_VECTOR;
import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_SPARSE_VECTOR;
@@ -51,14 +50,14 @@ public class SerializationBenchmark {
public void serializeBenchmark() throws IOException {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
- SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path("/tmp/dense-vector"), IntWritable.class,
- VectorWritable.class);
Writable one = new IntWritable(0);
VectorWritable vec = new VectorWritable();
TimingStatistics stats = new TimingStatistics();
- try {
+ try (SequenceFile.Writer writer =
+ new SequenceFile.Writer(fs, conf, new Path("/tmp/dense-vector"),
+ IntWritable.class, VectorWritable.class)){
for (int i = 0; i < mark.loop; i++) {
TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec);
vec.set(mark.vectors[0][mark.vIndex(i)]);
@@ -67,15 +66,13 @@ public class SerializationBenchmark {
break;
}
}
- } finally {
- Closeables.close(writer, false);
}
mark.printStats(stats, SERIALIZE, DENSE_VECTOR);
- writer = new SequenceFile.Writer(fs, conf, new Path("/tmp/randsparse-vector"), IntWritable.class,
- VectorWritable.class);
stats = new TimingStatistics();
- try {
+ try (SequenceFile.Writer writer =
+ new SequenceFile.Writer(fs, conf,
+ new Path("/tmp/randsparse-vector"), IntWritable.class, VectorWritable.class)){
for (int i = 0; i < mark.loop; i++) {
TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec);
vec.set(mark.vectors[1][mark.vIndex(i)]);
@@ -84,15 +81,13 @@ public class SerializationBenchmark {
break;
}
}
- } finally {
- Closeables.close(writer, false);
}
mark.printStats(stats, SERIALIZE, RAND_SPARSE_VECTOR);
- writer = new SequenceFile.Writer(fs, conf, new Path("/tmp/seqsparse-vector"), IntWritable.class,
- VectorWritable.class);
stats = new TimingStatistics();
- try {
+ try (SequenceFile.Writer writer =
+ new SequenceFile.Writer(fs, conf,
+ new Path("/tmp/seqsparse-vector"), IntWritable.class, VectorWritable.class)) {
for (int i = 0; i < mark.loop; i++) {
TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec);
vec.set(mark.vectors[2][mark.vIndex(i)]);
@@ -101,8 +96,6 @@ public class SerializationBenchmark {
break;
}
}
- } finally {
- Closeables.close(writer, false);
}
mark.printStats(stats, SERIALIZE, SEQ_SPARSE_VECTOR);
@@ -117,7 +110,7 @@ public class SerializationBenchmark {
private void doDeserializeBenchmark(String name, String pathString) throws IOException {
TimingStatistics stats = new TimingStatistics();
TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec);
- SequenceFileValueIterator<Writable> iterator = new SequenceFileValueIterator<Writable>(new Path(pathString), true,
+ SequenceFileValueIterator<Writable> iterator = new SequenceFileValueIterator<>(new Path(pathString), true,
new Configuration());
while (iterator.hasNext()) {
iterator.next();
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java b/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
index c29760a..a076322 100644
--- a/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
+++ b/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
@@ -17,17 +17,6 @@
package org.apache.mahout.benchmark;
-import java.io.IOException;
-import java.text.DecimalFormat;
-import java.util.BitSet;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Random;
-import java.util.concurrent.TimeUnit;
-import java.util.regex.Pattern;
-
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -56,8 +45,18 @@ import org.apache.mahout.math.Vector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
+import java.io.IOException;
+import java.text.DecimalFormat;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Pattern;
public class VectorBenchmarks {
private static final int MAX_TIME_MS = 5000;
@@ -93,11 +92,11 @@ public class VectorBenchmarks {
final long maxTimeUsec;
final long leadTimeUsec;
- private final List<Vector> randomVectors = Lists.newArrayList();
- private final List<int[]> randomVectorIndices = Lists.newArrayList();
- private final List<double[]> randomVectorValues = Lists.newArrayList();
- private final Map<String, Integer> implType = Maps.newHashMap();
- private final Map<String, List<String[]>> statsMap = Maps.newHashMap();
+ private final List<Vector> randomVectors = new ArrayList<>();
+ private final List<int[]> randomVectorIndices = new ArrayList<>();
+ private final List<double[]> randomVectorValues = new ArrayList<>();
+ private final Map<String, Integer> implType = new HashMap<>();
+ private final Map<String, List<String[]>> statsMap = new HashMap<>();
private final BenchmarkRunner runner;
private final Random r = RandomUtils.getRandom();
@@ -162,7 +161,7 @@ public class VectorBenchmarks {
}
int implId = implType.get(implName);
if (!statsMap.containsKey(benchmarkName)) {
- statsMap.put(benchmarkName, Lists.<String[]>newArrayList());
+ statsMap.put(benchmarkName, new ArrayList<String[]>());
}
List<String[]> implStats = statsMap.get(benchmarkName);
while (implStats.size() < implId + 1) {
@@ -224,7 +223,7 @@ public class VectorBenchmarks {
private boolean buildVectorIncrementally(TimingStatistics stats, int randomIndex, Vector v, boolean useSetQuick) {
int[] indexes = randomVectorIndices.get(randomIndex);
double[] values = randomVectorValues.get(randomIndex);
- List<Integer> randomOrder = Lists.newArrayList();
+ List<Integer> randomOrder = new ArrayList<>();
for (int i = 0; i < indexes.length; i++) {
randomOrder.add(i);
}
@@ -421,9 +420,9 @@ public class VectorBenchmarks {
}
private String asCsvString() {
- List<String> keys = Lists.newArrayList(statsMap.keySet());
+ List<String> keys = new ArrayList<>(statsMap.keySet());
Collections.sort(keys);
- Map<Integer,String> implMap = Maps.newHashMap();
+ Map<Integer,String> implMap = new HashMap<>();
for (Entry<String,Integer> e : implType.entrySet()) {
implMap.put(e.getValue(), e.getKey());
}
@@ -459,7 +458,7 @@ public class VectorBenchmarks {
}
}
sb.append('\n');
- List<String> keys = Lists.newArrayList(statsMap.keySet());
+ List<String> keys = new ArrayList<>(statsMap.keySet());
Collections.sort(keys);
for (String benchmarkName : keys) {
List<String[]> implTokenizedStats = statsMap.get(benchmarkName);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java b/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java
index 9378b11..9735ffe 100644
--- a/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java
+++ b/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java
@@ -17,15 +17,6 @@
package org.apache.mahout.cf.taste.impl.model.hbase;
-import java.io.Closeable;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-import java.util.SortedMap;
-
-import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
@@ -42,13 +33,9 @@ import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.FilterList;
-import org.apache.hadoop.hbase.filter.KeyOnlyFilter;
import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
+import org.apache.hadoop.hbase.filter.KeyOnlyFilter;
import org.apache.hadoop.hbase.util.Bytes;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
import org.apache.mahout.cf.taste.common.NoSuchItemException;
import org.apache.mahout.cf.taste.common.NoSuchUserException;
import org.apache.mahout.cf.taste.common.Refreshable;
@@ -59,6 +46,18 @@ import org.apache.mahout.cf.taste.impl.model.GenericItemPreferenceArray;
import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedMap;
/**
* <p>Naive approach of storing one preference as one value in the table.
@@ -134,15 +133,12 @@ public final class HBaseDataModel implements DataModel, Closeable {
* Create the table if it doesn't exist
*/
private void bootstrap(Configuration conf) throws IOException {
- HBaseAdmin admin = new HBaseAdmin(conf);
HTableDescriptor tDesc = new HTableDescriptor(Bytes.toBytes(tableName));
tDesc.addFamily(new HColumnDescriptor(USERS_CF));
tDesc.addFamily(new HColumnDescriptor(ITEMS_CF));
- try {
+ try (HBaseAdmin admin = new HBaseAdmin(conf)) {
admin.createTable(tDesc);
log.info("Created table {}", tableName);
- } finally {
- admin.close();
}
}
@@ -339,7 +335,7 @@ public final class HBaseDataModel implements DataModel, Closeable {
Result[] results;
try {
HTableInterface table = pool.getTable(tableName);
- List<Get> gets = Lists.newArrayListWithCapacity(2);
+ List<Get> gets = new ArrayList<>(2);
gets.add(new Get(itemToBytes(itemID1)));
gets.add(new Get(itemToBytes(itemID2)));
gets.get(0).addFamily(USERS_CF);
@@ -380,7 +376,7 @@ public final class HBaseDataModel implements DataModel, Closeable {
public void setPreference(long userID, long itemID, float value) throws TasteException {
try {
HTableInterface table = pool.getTable(tableName);
- List<Put> puts = Lists.newArrayListWithCapacity(2);
+ List<Put> puts = new ArrayList<>(2);
puts.add(new Put(userToBytes(userID)));
puts.add(new Put(itemToBytes(itemID)));
puts.get(0).add(ITEMS_CF, Bytes.toBytes(itemID), Bytes.toBytes(value));
@@ -396,7 +392,7 @@ public final class HBaseDataModel implements DataModel, Closeable {
public void removePreference(long userID, long itemID) throws TasteException {
try {
HTableInterface table = pool.getTable(tableName);
- List<Delete> deletes = Lists.newArrayListWithCapacity(2);
+ List<Delete> deletes = new ArrayList<>(2);
deletes.add(new Delete(userToBytes(userID)));
deletes.add(new Delete(itemToBytes(itemID)));
deletes.get(0).deleteColumns(ITEMS_CF, Bytes.toBytes(itemID));
@@ -457,7 +453,7 @@ public final class HBaseDataModel implements DataModel, Closeable {
Scan scan = new Scan(new byte[]{0x69}, new byte[]{0x70});
scan.setFilter(new FilterList(FilterList.Operator.MUST_PASS_ALL, new KeyOnlyFilter(), new FirstKeyOnlyFilter()));
ResultScanner scanner = table.getScanner(scan);
- Collection<Long> ids = Lists.newLinkedList();
+ Collection<Long> ids = new LinkedList<>();
for (Result result : scanner) {
ids.add(bytesToUserOrItemID(result.getRow()));
}
@@ -482,7 +478,7 @@ public final class HBaseDataModel implements DataModel, Closeable {
Scan scan = new Scan(new byte[]{0x75}, new byte[]{0x76});
scan.setFilter(new FilterList(FilterList.Operator.MUST_PASS_ALL, new KeyOnlyFilter(), new FirstKeyOnlyFilter()));
ResultScanner scanner = table.getScanner(scan);
- Collection<Long> ids = Lists.newLinkedList();
+ Collection<Long> ids = new LinkedList<>();
for (Result result : scanner) {
ids.add(bytesToUserOrItemID(result.getRow()));
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java b/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java
index d9317c0..66f0a77 100644
--- a/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java
+++ b/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java
@@ -17,17 +17,7 @@
package org.apache.mahout.cf.taste.impl.model.jdbc;
-import java.sql.Connection;
-import java.sql.PreparedStatement;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.sql.Statement;
-import java.util.Collection;
-import java.util.List;
-
-import javax.sql.DataSource;
-
-import com.google.common.collect.Lists;
+import com.google.common.base.Preconditions;
import org.apache.mahout.cf.taste.common.NoSuchItemException;
import org.apache.mahout.cf.taste.common.NoSuchUserException;
import org.apache.mahout.cf.taste.common.Refreshable;
@@ -49,7 +39,15 @@ import org.apache.mahout.common.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.base.Preconditions;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import javax.sql.DataSource;
/**
* <p>
@@ -96,7 +94,6 @@ public abstract class AbstractJDBCDataModel extends AbstractJDBCComponent implem
private final String getUsersSQL;
private final String getItemsSQL;
private final String getPrefsForItemSQL;
- //private final String getNumPreferenceForItemSQL;
private final String getNumPreferenceForItemsSQL;
private final String getMaxPreferenceSQL;
private final String getMinPreferenceSQL;
@@ -283,7 +280,7 @@ public abstract class AbstractJDBCDataModel extends AbstractJDBCComponent implem
log.debug("Executing SQL query: {}", getUserSQL);
rs = stmt.executeQuery();
- List<Preference> prefs = Lists.newArrayList();
+ List<Preference> prefs = new ArrayList<>();
while (rs.next()) {
prefs.add(buildPreference(rs));
}
@@ -323,7 +320,7 @@ public abstract class AbstractJDBCDataModel extends AbstractJDBCComponent implem
rs = stmt.executeQuery(getAllUsersSQL);
Long currentUserID = null;
- List<Preference> currentPrefs = Lists.newArrayList();
+ List<Preference> currentPrefs = new ArrayList<>();
while (rs.next()) {
long nextUserID = getLongColumn(rs, 1);
if (currentUserID != null && !currentUserID.equals(nextUserID) && !currentPrefs.isEmpty()) {
@@ -533,7 +530,7 @@ public abstract class AbstractJDBCDataModel extends AbstractJDBCComponent implem
log.debug("Executing SQL query: {}", getPrefsForItemSQL);
rs = stmt.executeQuery();
- List<Preference> prefs = Lists.newArrayList();
+ List<Preference> prefs = new ArrayList<>();
while (rs.next()) {
prefs.add(buildPreference(rs));
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/mongodb/MongoDBDataModel.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/mongodb/MongoDBDataModel.java b/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/mongodb/MongoDBDataModel.java
index fe6b843..92a4019 100644
--- a/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/mongodb/MongoDBDataModel.java
+++ b/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/mongodb/MongoDBDataModel.java
@@ -17,20 +17,15 @@
package org.apache.mahout.cf.taste.impl.model.mongodb;
-import java.text.DateFormat;
-import java.text.ParseException;
-import java.util.Collection;
-import java.util.Date;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.concurrent.locks.ReentrantLock;
-import java.net.UnknownHostException;
-import java.text.SimpleDateFormat;
-import java.util.regex.Pattern;
-
import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
+import com.mongodb.BasicDBObject;
+import com.mongodb.DB;
+import com.mongodb.DBCollection;
+import com.mongodb.DBCursor;
+import com.mongodb.DBObject;
+import com.mongodb.Mongo;
+import org.apache.mahout.cf.taste.common.NoSuchItemException;
+import org.apache.mahout.cf.taste.common.NoSuchUserException;
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
@@ -42,19 +37,22 @@ import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.Preference;
import org.apache.mahout.cf.taste.model.PreferenceArray;
-import org.apache.mahout.cf.taste.common.NoSuchUserException;
-import org.apache.mahout.cf.taste.common.NoSuchItemException;
-
import org.bson.types.ObjectId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.mongodb.BasicDBObject;
-import com.mongodb.DBObject;
-import com.mongodb.Mongo;
-import com.mongodb.DB;
-import com.mongodb.DBCollection;
-import com.mongodb.DBCursor;
+import java.net.UnknownHostException;
+import java.text.DateFormat;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Date;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.regex.Pattern;
/**
* <p>A {@link DataModel} backed by a MongoDB database. This class expects a
@@ -408,8 +406,8 @@ public final class MongoDBDataModel implements DataModel {
while (cursor.hasNext()) {
Map<String,Object> user = (Map<String,Object>) cursor.next().toMap();
String userID = getID(user.get(mongoUserID), true);
- Collection<List<String>> items = Lists.newArrayList();
- List<String> item = Lists.newArrayList();
+ Collection<List<String>> items = new ArrayList<>();
+ List<String> item = new ArrayList<>();
item.add(getID(user.get(mongoItemID), false));
item.add(Float.toString(getPreference(user.get(mongoPreference))));
items.add(item);
@@ -431,8 +429,8 @@ public final class MongoDBDataModel implements DataModel {
Map<String,Object> user = (Map<String,Object>) cursor.next().toMap();
if (!user.containsKey("deleted_at")) {
String userID = getID(user.get(mongoUserID), true);
- Collection<List<String>> items = Lists.newArrayList();
- List<String> item = Lists.newArrayList();
+ Collection<List<String>> items = new ArrayList<>();
+ List<String> item = new ArrayList<>();
item.add(getID(user.get(mongoItemID), false));
item.add(Float.toString(getPreference(user.get(mongoPreference))));
items.add(item);
@@ -552,7 +550,7 @@ public final class MongoDBDataModel implements DataModel {
Mongo mongoDDBB = new Mongo(mongoHost, mongoPort);
DB db = mongoDDBB.getDB(mongoDB);
mongoTimestamp = new Date(0);
- FastByIDMap<Collection<Preference>> userIDPrefMap = new FastByIDMap<Collection<Preference>>();
+ FastByIDMap<Collection<Preference>> userIDPrefMap = new FastByIDMap<>();
if (!mongoAuth || db.authenticate(mongoUsername, mongoPassword.toCharArray())) {
collection = db.getCollection(mongoCollection);
collectionMap = db.getCollection(mongoMapCollection);
@@ -572,7 +570,7 @@ public final class MongoDBDataModel implements DataModel {
float ratingValue = getPreference(user.get(mongoPreference));
Collection<Preference> userPrefs = userIDPrefMap.get(userID);
if (userPrefs == null) {
- userPrefs = Lists.newArrayListWithCapacity(2);
+ userPrefs = new ArrayList<>(2);
userIDPrefMap.put(userID, userPrefs);
}
userPrefs.add(new GenericPreference(userID, itemID, ratingValue));
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/MySQLJDBCItemSimilarity.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/MySQLJDBCItemSimilarity.java b/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/MySQLJDBCItemSimilarity.java
index 20d1384..af0742e 100644
--- a/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/MySQLJDBCItemSimilarity.java
+++ b/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/MySQLJDBCItemSimilarity.java
@@ -70,7 +70,6 @@ import org.apache.mahout.cf.taste.common.TasteException;
* Note that for each row, item_id_a should be less than item_id_b. It is redundant to store it both ways,
* so the pair is always stored as a pair with the lesser one first.
*
- * @see org.apache.mahout.cf.taste.impl.recommender.slopeone.jdbc.MySQLJDBCDiffStorage
* @see org.apache.mahout.cf.taste.impl.model.jdbc.MySQLJDBCDataModel
*/
public class MySQLJDBCItemSimilarity extends SQL92JDBCItemSimilarity {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java b/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java
index 45f300a..03a3000 100644
--- a/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java
+++ b/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java
@@ -17,16 +17,8 @@
package org.apache.mahout.classifier;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.io.PrintStream;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
-import com.google.common.base.Charsets;
+import com.google.common.collect.Lists;
+import org.apache.commons.io.Charsets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -38,7 +30,14 @@ import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.math.Matrix;
import org.apache.mahout.math.MatrixWritable;
-import com.google.common.collect.Lists;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.PrintStream;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
/**
* Export a ConfusionMatrix in various text formats: ToString version Grayscale HTML table Summary HTML table
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java b/integration/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java
index 972dec1..545c1ff 100644
--- a/integration/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java
+++ b/integration/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java
@@ -17,10 +17,6 @@
package org.apache.mahout.clustering.cdbw;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
-
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.mahout.clustering.Cluster;
@@ -41,8 +37,11 @@ import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
/**
* This class calculates the CDbw metric as defined in
@@ -53,7 +52,7 @@ public final class CDbwEvaluator {
private static final Logger log = LoggerFactory.getLogger(CDbwEvaluator.class);
private final Map<Integer,List<VectorWritable>> representativePoints;
- private final Map<Integer,Double> stDevs = Maps.newHashMap();
+ private final Map<Integer,Double> stDevs = new HashMap<>();
private final List<Cluster> clusters;
private final DistanceMeasure measure;
private Double interClusterDensity = null;
@@ -110,7 +109,7 @@ public final class CDbwEvaluator {
* @return a List<Cluster> of the clusters
*/
private static List<Cluster> loadClusters(Configuration conf, Path clustersIn) {
- List<Cluster> clusters = Lists.newArrayList();
+ List<Cluster> clusters = new ArrayList<>();
for (ClusterWritable clusterWritable : new SequenceFileDirValueIterable<ClusterWritable>(clustersIn, PathType.LIST,
PathFilters.logsCRCFilter(), conf)) {
Cluster cluster = clusterWritable.getValue();
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/clustering/conversion/InputMapper.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/clustering/conversion/InputMapper.java b/integration/src/main/java/org/apache/mahout/clustering/conversion/InputMapper.java
index 9dbce5a..e4c72c6 100644
--- a/integration/src/main/java/org/apache/mahout/clustering/conversion/InputMapper.java
+++ b/integration/src/main/java/org/apache/mahout/clustering/conversion/InputMapper.java
@@ -17,13 +17,6 @@
package org.apache.mahout.clustering.conversion;
-import java.io.IOException;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.InvocationTargetException;
-import java.util.Collection;
-import java.util.regex.Pattern;
-
-import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
@@ -31,6 +24,13 @@ import org.apache.hadoop.mapreduce.Mapper;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
+import java.io.IOException;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.regex.Pattern;
+
public class InputMapper extends Mapper<LongWritable, Text, Text, VectorWritable> {
private static final Pattern SPACE = Pattern.compile(" ");
@@ -42,7 +42,7 @@ public class InputMapper extends Mapper<LongWritable, Text, Text, VectorWritable
String[] numbers = SPACE.split(values.toString());
// sometimes there are multiple separator spaces
- Collection<Double> doubles = Lists.newArrayList();
+ Collection<Double> doubles = new ArrayList<>();
for (String value : numbers) {
if (!value.isEmpty()) {
doubles.add(Double.valueOf(value));
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/clustering/evaluation/ClusterEvaluator.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/clustering/evaluation/ClusterEvaluator.java b/integration/src/main/java/org/apache/mahout/clustering/evaluation/ClusterEvaluator.java
index 3cd06eb..757f38c 100644
--- a/integration/src/main/java/org/apache/mahout/clustering/evaluation/ClusterEvaluator.java
+++ b/integration/src/main/java/org/apache/mahout/clustering/evaluation/ClusterEvaluator.java
@@ -17,10 +17,6 @@
package org.apache.mahout.clustering.evaluation;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
-
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.mahout.clustering.Cluster;
@@ -37,7 +33,10 @@ import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.collect.Lists;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
public class ClusterEvaluator {
@@ -89,7 +88,7 @@ public class ClusterEvaluator {
* @return a List<Cluster> of the clusters
*/
private static List<Cluster> loadClusters(Configuration conf, Path clustersIn) {
- List<Cluster> clusters = Lists.newArrayList();
+ List<Cluster> clusters = new ArrayList<>();
for (ClusterWritable clusterWritable : new SequenceFileDirValueIterable<ClusterWritable>(clustersIn, PathType.LIST,
PathFilters.logsCRCFilter(), conf)) {
Cluster cluster = clusterWritable.getValue();
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java b/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java
index f18b584..2fe37ef 100644
--- a/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java
+++ b/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java
@@ -18,12 +18,10 @@
package org.apache.mahout.clustering.evaluation;
import java.io.IOException;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
-
-import com.google.common.collect.Maps;
-import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -98,8 +96,6 @@ public final class RepresentativePointsDriver extends AbstractJob {
* the Path to the directory containing representativePoints-i folders
* @param numIterations
* the int number of iterations to print
- * @throws IOException
- * if errors occur
*/
public static void printRepresentativePoints(Path output, int numIterations) {
for (int i = 0; i <= numIterations; i++) {
@@ -141,8 +137,8 @@ public final class RepresentativePointsDriver extends AbstractJob {
for (FileStatus part : fs.listStatus(inPath, PathFilters.logsCRCFilter())) {
Path inPart = part.getPath();
Path path = new Path(output, inPart.getName());
- SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class);
- try {
+ try (SequenceFile.Writer writer =
+ new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class)){
for (ClusterWritable clusterWritable : new SequenceFileValueIterable<ClusterWritable>(inPart, true, conf)) {
Cluster cluster = clusterWritable.getValue();
if (log.isDebugEnabled()) {
@@ -150,8 +146,6 @@ public final class RepresentativePointsDriver extends AbstractJob {
}
writer.append(new IntWritable(cluster.getId()), new VectorWritable(cluster.getCenter()));
}
- } finally {
- Closeables.close(writer, false);
}
}
}
@@ -184,7 +178,7 @@ public final class RepresentativePointsDriver extends AbstractJob {
DistanceMeasure measure) throws IOException {
Map<Integer,List<VectorWritable>> repPoints = RepresentativePointsMapper.getRepresentativePoints(conf, stateIn);
- Map<Integer,WeightedVectorWritable> mostDistantPoints = Maps.newHashMap();
+ Map<Integer,WeightedVectorWritable> mostDistantPoints = new HashMap<>();
FileSystem fs = FileSystem.get(clusteredPointsIn.toUri(), conf);
for (Pair<IntWritable,WeightedVectorWritable> record
: new SequenceFileDirIterable<IntWritable,WeightedVectorWritable>(clusteredPointsIn, PathType.LIST,
@@ -192,25 +186,19 @@ public final class RepresentativePointsDriver extends AbstractJob {
RepresentativePointsMapper.mapPoint(record.getFirst(), record.getSecond(), measure, repPoints, mostDistantPoints);
}
int part = 0;
- SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(stateOut, "part-m-" + part++),
- IntWritable.class, VectorWritable.class);
- try {
+ try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(stateOut, "part-m-" + part++),
+ IntWritable.class, VectorWritable.class)){
for (Entry<Integer,List<VectorWritable>> entry : repPoints.entrySet()) {
for (VectorWritable vw : entry.getValue()) {
writer.append(new IntWritable(entry.getKey()), vw);
}
}
- } finally {
- Closeables.close(writer, false);
}
- writer = new SequenceFile.Writer(fs, conf, new Path(stateOut, "part-m-" + part++), IntWritable.class,
- VectorWritable.class);
- try {
+ try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(stateOut, "part-m-" + part++),
+ IntWritable.class, VectorWritable.class)){
for (Map.Entry<Integer,WeightedVectorWritable> entry : mostDistantPoints.entrySet()) {
writer.append(new IntWritable(entry.getKey()), new VectorWritable(entry.getValue().getVector()));
}
- } finally {
- Closeables.close(writer, false);
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsMapper.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsMapper.java b/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsMapper.java
index 255e4a3..0ae79ad 100644
--- a/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsMapper.java
+++ b/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsMapper.java
@@ -18,11 +18,11 @@
package org.apache.mahout.clustering.evaluation;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
@@ -41,7 +41,7 @@ public class RepresentativePointsMapper
extends Mapper<IntWritable, WeightedVectorWritable, IntWritable, WeightedVectorWritable> {
private Map<Integer, List<VectorWritable>> representativePoints;
- private final Map<Integer, WeightedVectorWritable> mostDistantPoints = Maps.newHashMap();
+ private final Map<Integer, WeightedVectorWritable> mostDistantPoints = new HashMap<>();
private DistanceMeasure measure = new EuclideanDistanceMeasure();
@Override
@@ -98,7 +98,7 @@ public class RepresentativePointsMapper
}
public static Map<Integer, List<VectorWritable>> getRepresentativePoints(Configuration conf, Path statePath) {
- Map<Integer, List<VectorWritable>> representativePoints = Maps.newHashMap();
+ Map<Integer, List<VectorWritable>> representativePoints = new HashMap<>();
for (Pair<IntWritable,VectorWritable> record
: new SequenceFileDirIterable<IntWritable,VectorWritable>(statePath,
PathType.LIST,
@@ -107,7 +107,7 @@ public class RepresentativePointsMapper
int keyValue = record.getFirst().get();
List<VectorWritable> repPoints = representativePoints.get(keyValue);
if (repPoints == null) {
- repPoints = Lists.newArrayList();
+ repPoints = new ArrayList<>();
representativePoints.put(keyValue, repPoints);
}
repPoints.add(record.getSecond());
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java b/integration/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
index cb8d935..392909e 100644
--- a/integration/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
+++ b/integration/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
@@ -17,24 +17,22 @@
package org.apache.mahout.clustering.lda;
+import com.google.common.io.Closeables;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Queue;
-
-import com.google.common.base.Charsets;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.io.Closeables;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -43,6 +41,7 @@ import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
+import org.apache.commons.io.Charsets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
@@ -168,7 +167,7 @@ public final class LDAPrintTopics {
out.write("===========");
out.write('\n');
}
- List<Pair<String,Double>> topKasList = Lists.newArrayListWithCapacity(topK.size());
+ List<Pair<String,Double>> topKasList = new ArrayList<>(topK.size());
for (Pair<String,Double> wordWithScore : topK) {
topKasList.add(wordWithScore);
}
@@ -197,8 +196,8 @@ public final class LDAPrintTopics {
Configuration job,
List<String> wordList,
int numWordsToPrint) {
- List<Queue<Pair<String,Double>>> queues = Lists.newArrayList();
- Map<Integer,Double> expSums = Maps.newHashMap();
+ List<Queue<Pair<String,Double>>> queues = new ArrayList<>();
+ Map<Integer,Double> expSums = new HashMap<>();
for (Pair<IntPairWritable,DoubleWritable> record
: new SequenceFileDirIterable<IntPairWritable, DoubleWritable>(
new Path(dir, "part-*"), PathType.GLOB, null, null, true, job)) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/LuceneIndexHelper.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/LuceneIndexHelper.java b/integration/src/main/java/org/apache/mahout/text/LuceneIndexHelper.java
index 438beb9..465e51b 100644
--- a/integration/src/main/java/org/apache/mahout/text/LuceneIndexHelper.java
+++ b/integration/src/main/java/org/apache/mahout/text/LuceneIndexHelper.java
@@ -33,7 +33,8 @@ public class LuceneIndexHelper {
public static void fieldShouldExistInIndex(IndexReader reader, String fieldName) throws IOException {
IndexableField field = reader.document(0).getField(fieldName);
if (field == null || !field.fieldType().stored()) {
- throw new IllegalArgumentException("Field '" + fieldName + "' is possibly not stored since first document in index does not contain this field.");
+ throw new IllegalArgumentException("Field '" + fieldName +
+ "' is possibly not stored since first document in index does not contain this field.");
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java b/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java
index fabca54..1c4f8de 100644
--- a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java
+++ b/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java
@@ -16,7 +16,10 @@ package org.apache.mahout.text;
* limitations under the License.
*/
-import com.google.common.collect.Lists;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -32,9 +35,6 @@ import org.apache.lucene.index.SegmentInfos;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.IOException;
-import java.util.List;
-
/**
* {@link InputFormat} implementation which splits a Lucene index at the segment level.
*/
@@ -48,7 +48,7 @@ public class LuceneSegmentInputFormat extends InputFormat {
LuceneStorageConfiguration lucene2SeqConfiguration = new LuceneStorageConfiguration(configuration);
- List<LuceneSegmentInputSplit> inputSplits = Lists.newArrayList();
+ List<LuceneSegmentInputSplit> inputSplits = new ArrayList<>();
List<Path> indexPaths = lucene2SeqConfiguration.getIndexPaths();
for (Path indexPath : indexPaths) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java b/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java
index a0aa6b0..485e856 100644
--- a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java
+++ b/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java
@@ -98,6 +98,5 @@ public class LuceneSegmentRecordReader extends RecordReader<Text, NullWritable>
@Override
public void close() throws IOException {
segmentReader.close();
- //searcher.close();
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java b/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java
index 88f86c5..b36f3e9 100644
--- a/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java
+++ b/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java
@@ -16,9 +16,18 @@ package org.apache.mahout.text;
* limitations under the License.
*/
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
@@ -36,14 +45,6 @@ import org.apache.mahout.common.iterator.sequencefile.PathFilters;
import org.apache.mahout.common.iterator.sequencefile.PathType;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Set;
-
import static org.apache.lucene.util.Version.LUCENE_46;
/**
@@ -186,7 +187,7 @@ public class LuceneStorageConfiguration implements Writable {
}
public DocumentStoredFieldVisitor getStoredFieldVisitor() {
- Set<String> fieldSet = Sets.newHashSet(idField);
+ Set<String> fieldSet = new HashSet<>(Collections.singleton(idField));
fieldSet.addAll(fields);
return new DocumentStoredFieldVisitor(fieldSet);
}
@@ -205,7 +206,7 @@ public class LuceneStorageConfiguration implements Writable {
public void readFields(DataInput in) throws IOException {
try {
sequenceFilesOutputPath = new Path(in.readUTF());
- indexPaths = Lists.newArrayList();
+ indexPaths = new ArrayList<>();
String[] indexPaths = in.readUTF().split(SEPARATOR_PATHS);
for (String indexPath : indexPaths) {
this.indexPaths.add(new Path(indexPath));
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java b/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
index a7503e1..8776c5f 100644
--- a/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
+++ b/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
@@ -21,6 +21,7 @@ import java.io.Reader;
import java.util.Arrays;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java b/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java
index a13341b..37ebc44 100644
--- a/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java
+++ b/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java
@@ -17,7 +17,6 @@
package org.apache.mahout.text;
-import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -53,10 +52,7 @@ public final class PrefixAdditionFilter extends SequenceFilesFromDirectoryFilter
fs.listStatus(fst.getPath(),
new PrefixAdditionFilter(getConf(), dirPath, getOptions(), writer, getCharset(), fs));
} else {
- InputStream in = null;
- try {
- in = fs.open(fst.getPath());
-
+ try (InputStream in = fs.open(fst.getPath())){
StringBuilder file = new StringBuilder();
for (String aFit : new FileLineIterable(in, getCharset(), false)) {
file.append(aFit).append('\n');
@@ -65,8 +61,6 @@ public final class PrefixAdditionFilter extends SequenceFilesFromDirectoryFilter
? current.getName()
: current.getName() + Path.SEPARATOR + fst.getPath().getName();
writer.write(getPrefix() + Path.SEPARATOR + name, file.toString());
- } finally {
- Closeables.close(in, false);
}
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java
index 720078c..311ab8d 100644
--- a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java
+++ b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java
@@ -19,10 +19,9 @@ package org.apache.mahout.text;
import java.io.IOException;
import java.nio.charset.Charset;
+import java.util.HashMap;
import java.util.Map;
-import com.google.common.collect.Maps;
-import com.google.common.io.Closeables;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
@@ -100,9 +99,8 @@ public class SequenceFilesFromDirectory extends AbstractJob {
Charset charset = Charset.forName(getOption(CHARSET_OPTION[0]));
String keyPrefix = getOption(KEY_PREFIX_OPTION[0]);
FileSystem fs = FileSystem.get(input.toUri(), conf);
- ChunkedWriter writer = new ChunkedWriter(conf, Integer.parseInt(options.get(CHUNK_SIZE_OPTION[0])), output);
- try {
+ try (ChunkedWriter writer = new ChunkedWriter(conf, Integer.parseInt(options.get(CHUNK_SIZE_OPTION[0])), output)) {
SequenceFilesFromDirectoryFilter pathFilter;
String fileFilterClassName = options.get(FILE_FILTER_CLASS_OPTION[0]);
if (PrefixAdditionFilter.class.getName().equals(fileFilterClassName)) {
@@ -113,8 +111,6 @@ public class SequenceFilesFromDirectory extends AbstractJob {
new Object[] {conf, keyPrefix, options, writer, charset, fs});
}
fs.listStatus(input, pathFilter);
- } finally {
- Closeables.close(writer, false);
}
return 0;
}
@@ -209,7 +205,7 @@ public class SequenceFilesFromDirectory extends AbstractJob {
* @return Map of options
*/
protected Map<String, String> parseOptions() {
- Map<String, String> options = Maps.newHashMap();
+ Map<String, String> options = new HashMap<>();
options.put(CHUNK_SIZE_OPTION[0], getOption(CHUNK_SIZE_OPTION[0]));
options.put(FILE_FILTER_CLASS_OPTION[0], getOption(FILE_FILTER_CLASS_OPTION[0]));
options.put(CHARSET_OPTION[0], getOption(CHARSET_OPTION[0]));
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java
index d3903dd..1bd3f3e 100644
--- a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java
+++ b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java
@@ -17,7 +17,11 @@ package org.apache.mahout.text;
*/
-import com.google.common.collect.Lists;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Pattern;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.ToolRunner;
@@ -30,11 +34,6 @@ import org.apache.lucene.util.Version;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
-import java.util.List;
-import java.util.regex.Pattern;
-
-import static java.util.Arrays.asList;
-
/**
* Driver class for the lucene2seq program. Converts text contents of stored fields of a lucene index into a Hadoop
* SequenceFile. The key of the sequence file is the document ID and the value is the concatenated text of the specified
@@ -77,7 +76,7 @@ public class SequenceFilesFromLuceneStorageDriver extends AbstractJob {
Configuration configuration = getConf();
String[] paths = getInputPath().toString().split(",");
- List<Path> indexPaths = Lists.newArrayList();
+ List<Path> indexPaths = new ArrayList<>();
for (String path : paths) {
indexPaths.add(new Path(path));
}
@@ -91,7 +90,7 @@ public class SequenceFilesFromLuceneStorageDriver extends AbstractJob {
indexPaths,
sequenceFilesOutputPath,
idField,
- asList(fields.split(SEPARATOR_FIELDS)));
+ Arrays.asList(fields.split(SEPARATOR_FIELDS)));
Query query = DEFAULT_QUERY;
if (hasOption(OPTION_QUERY)) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java
index d87dadc..f31d055 100644
--- a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java
+++ b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java
@@ -18,6 +18,7 @@
package org.apache.mahout.text;
import com.google.common.base.Strings;
+import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
@@ -31,8 +32,6 @@ import org.apache.lucene.store.IOContext;
import java.io.IOException;
import java.util.List;
-import static org.apache.commons.lang.StringUtils.isBlank;
-
/**
* Maps document IDs to key value pairs with ID field as the key and the concatenated stored field(s)
* as value.
@@ -64,13 +63,13 @@ public class SequenceFilesFromLuceneStorageMapper extends Mapper<Text, NullWrita
Text theValue = new Text();
LuceneSeqFileHelper.populateValues(document, theValue, fields);
//if they are both empty, don't write
- if (isBlank(theKey.toString()) && isBlank(theValue.toString())) {
+ if (StringUtils.isBlank(theKey.toString()) && StringUtils.isBlank(theValue.toString())) {
context.getCounter(DataStatus.EMPTY_BOTH).increment(1);
return;
}
- if (isBlank(theKey.toString())) {
+ if (StringUtils.isBlank(theKey.toString())) {
context.getCounter(DataStatus.EMPTY_KEY).increment(1);
- } else if (isBlank(theValue.toString())) {
+ } else if (StringUtils.isBlank(theValue.toString())) {
context.getCounter(DataStatus.EMPTY_VALUE).increment(1);
}
context.write(theKey, theValue);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java
index 30c2a47..c17cc12 100644
--- a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java
+++ b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java
@@ -16,10 +16,6 @@
*/
package org.apache.mahout.text;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.io.Closeables;
-
import org.apache.commons.io.DirectoryWalker;
import org.apache.commons.io.comparator.CompositeFileComparator;
import org.apache.commons.io.comparator.DirectoryFileComparator;
@@ -46,10 +42,12 @@ import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.ArrayDeque;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.Deque;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
@@ -81,9 +79,9 @@ public final class SequenceFilesFromMailArchives extends AbstractJob {
private static final int MAX_JOB_SPLIT_LOCATIONS = 1000000;
public void createSequenceFiles(MailOptions options) throws IOException {
- ChunkedWriter writer = new ChunkedWriter(getConf(), options.getChunkSize(), new Path(options.getOutputDir()));
- MailProcessor processor = new MailProcessor(options, options.getPrefix(), writer);
- try {
+ try (ChunkedWriter writer =
+ new ChunkedWriter(getConf(), options.getChunkSize(), new Path(options.getOutputDir()))){
+ MailProcessor processor = new MailProcessor(options, options.getPrefix(), writer);
if (options.getInput().isDirectory()) {
PrefixAdditionDirectoryWalker walker = new PrefixAdditionDirectoryWalker(processor, writer);
walker.walk(options.getInput());
@@ -94,8 +92,6 @@ public final class SequenceFilesFromMailArchives extends AbstractJob {
long finish = System.currentTimeMillis();
log.info("Parsed {} messages from {} in time: {}", cnt, options.getInput().getAbsolutePath(), finish - start);
}
- } finally {
- Closeables.close(writer, false);
}
}
@@ -226,11 +222,11 @@ public final class SequenceFilesFromMailArchives extends AbstractJob {
options.setChunkSize(chunkSize);
options.setCharset(charset);
- List<Pattern> patterns = Lists.newArrayListWithCapacity(5);
+ List<Pattern> patterns = new ArrayList<>(5);
// patternOrder is used downstream so that we can know what order the text
// is in instead of encoding it in the string, which
// would require more processing later to remove it pre feature selection.
- Map<String, Integer> patternOrder = Maps.newHashMap();
+ Map<String, Integer> patternOrder = new HashMap<>();
int order = 0;
if (hasOption(FROM_OPTION[0])) {
patterns.add(MailProcessor.FROM_PREFIX);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/WholeFileRecordReader.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/WholeFileRecordReader.java b/integration/src/main/java/org/apache/mahout/text/WholeFileRecordReader.java
index fdb3654..b8441b7 100644
--- a/integration/src/main/java/org/apache/mahout/text/WholeFileRecordReader.java
+++ b/integration/src/main/java/org/apache/mahout/text/WholeFileRecordReader.java
@@ -5,9 +5,9 @@
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -19,7 +19,6 @@ package org.apache.mahout.text;
import java.io.IOException;
-import com.google.common.io.Closeables;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
@@ -55,7 +54,7 @@ public class WholeFileRecordReader extends RecordReader<IntWritable, BytesWritab
public WholeFileRecordReader(CombineFileSplit fileSplit, TaskAttemptContext taskAttemptContext, Integer idx)
throws IOException {
this.fileSplit = new FileSplit(fileSplit.getPath(idx), fileSplit.getOffset(idx),
- fileSplit.getLength(idx), fileSplit.getLocations());
+ fileSplit.getLength(idx), fileSplit.getLocations());
this.configuration = taskAttemptContext.getConfiguration();
this.index = new IntWritable(idx);
this.fileFilterClassName = this.configuration.get(FILE_FILTER_CLASS_OPTION[0]);
@@ -78,8 +77,9 @@ public class WholeFileRecordReader extends RecordReader<IntWritable, BytesWritab
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
- throws IOException, InterruptedException {
- if (!StringUtils.isBlank(fileFilterClassName) && !PrefixAdditionFilter.class.getName().equals(fileFilterClassName)) {
+ throws IOException, InterruptedException {
+ if (!StringUtils.isBlank(fileFilterClassName) &&
+ !PrefixAdditionFilter.class.getName().equals(fileFilterClassName)) {
try {
pathFilter = (PathFilter) Class.forName(fileFilterClassName).newInstance();
} catch (ClassNotFoundException | InstantiationException | IllegalAccessException e) {
@@ -106,15 +106,11 @@ public class WholeFileRecordReader extends RecordReader<IntWritable, BytesWritab
fileStatuses = fs.listStatus(file);
}
- FSDataInputStream in = null;
if (fileStatuses.length == 1) {
- try {
- in = fs.open(fileStatuses[0].getPath());
+ try (FSDataInputStream in = fs.open(fileStatuses[0].getPath())) {
IOUtils.readFully(in, contents, 0, contents.length);
value.setCapacity(contents.length);
value.set(contents, 0, contents.length);
- } finally {
- Closeables.close(in, false);
}
processed = true;
return true;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java b/integration/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java
index 1cde4cd..bed4640 100644
--- a/integration/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java
+++ b/integration/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java
@@ -19,10 +19,10 @@ package org.apache.mahout.text;
import java.io.File;
import java.io.IOException;
+import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
-import com.google.common.collect.Sets;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -168,7 +168,7 @@ public final class WikipediaToSequenceFile {
"org.apache.hadoop.io.serializer.JavaSerialization,"
+ "org.apache.hadoop.io.serializer.WritableSerialization");
- Set<String> categories = Sets.newHashSet();
+ Set<String> categories = new HashSet<>();
if (!catFile.isEmpty()) {
for (String line : new FileLineIterable(new File(catFile))) {
categories.add(line.trim().toLowerCase(Locale.ENGLISH));
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
index d9df97f..ad55ba7 100644
--- a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
+++ b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
@@ -18,6 +18,7 @@
package org.apache.mahout.text.wikipedia;
import java.io.Reader;
+
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorDriver.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorDriver.java b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorDriver.java
index 6632ad2..7113629 100644
--- a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorDriver.java
+++ b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorDriver.java
@@ -19,10 +19,10 @@ package org.apache.mahout.text.wikipedia;
import java.io.File;
import java.io.IOException;
+import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
-import com.google.common.collect.Sets;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -154,7 +154,7 @@ public final class WikipediaDatasetCreatorDriver {
// Dont ever forget this. People should keep track of how hadoop conf
// parameters can make or break a piece of code
- Set<String> categories = Sets.newHashSet();
+ Set<String> categories = new HashSet<>();
for (String line : new FileLineIterable(new File(catFile))) {
categories.add(line.trim().toLowerCase(Locale.ENGLISH));
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorMapper.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorMapper.java b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorMapper.java
index 54a1df3..50e5f37 100644
--- a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorMapper.java
+++ b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorMapper.java
@@ -17,14 +17,6 @@
package org.apache.mahout.text.wikipedia;
-import java.io.IOException;
-import java.io.StringReader;
-import java.util.List;
-import java.util.Locale;
-import java.util.Set;
-import java.util.regex.Pattern;
-
-import com.google.common.collect.Sets;
import com.google.common.io.Closeables;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.hadoop.conf.Configuration;
@@ -40,7 +32,14 @@ import org.apache.mahout.common.ClassUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.collect.Lists;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Set;
+import java.util.regex.Pattern;
/**
* Maps over Wikipedia xml format and output all document having the category listed in the input category
@@ -89,13 +88,13 @@ public class WikipediaDatasetCreatorMapper extends Mapper<LongWritable, Text, Te
Configuration conf = context.getConfiguration();
if (inputCategories == null) {
- Set<String> newCategories = Sets.newHashSet();
+ Set<String> newCategories = new HashSet<>();
DefaultStringifier<Set<String>> setStringifier =
- new DefaultStringifier<Set<String>>(conf, GenericsUtil.getClass(newCategories));
+ new DefaultStringifier<>(conf, GenericsUtil.getClass(newCategories));
String categoriesStr = conf.get("wikipedia.categories", setStringifier.toString(newCategories));
Set<String> inputCategoriesSet = setStringifier.fromString(categoriesStr);
- inputCategories = Lists.newArrayList(inputCategoriesSet);
- inputCategoryPatterns = Lists.newArrayListWithCapacity(inputCategories.size());
+ inputCategories = new ArrayList<>(inputCategoriesSet);
+ inputCategoryPatterns = new ArrayList<>(inputCategories.size());
for (String inputCategory : inputCategories) {
inputCategoryPatterns.add(Pattern.compile(".*\\b" + inputCategory + "\\b.*"));
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaMapper.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaMapper.java b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaMapper.java
index d880760..abd3a04 100644
--- a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaMapper.java
+++ b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaMapper.java
@@ -18,12 +18,12 @@
package org.apache.mahout.text.wikipedia;
import java.io.IOException;
+import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import com.google.common.collect.Sets;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.DefaultStringifier;
@@ -106,9 +106,9 @@ public class WikipediaMapper extends Mapper<LongWritable, Text, Text, Text> {
super.setup(context);
Configuration conf = context.getConfiguration();
- Set<String> newCategories = Sets.newHashSet();
+ Set<String> newCategories = new HashSet<>();
DefaultStringifier<Set<String>> setStringifier =
- new DefaultStringifier<Set<String>>(conf, GenericsUtil.getClass(newCategories));
+ new DefaultStringifier<>(conf, GenericsUtil.getClass(newCategories));
String categoriesStr = conf.get("wikipedia.categories");
inputCategories = setStringifier.fromString(categoriesStr);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaXmlSplitter.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaXmlSplitter.java b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaXmlSplitter.java
index c9a54e9..fc065fe 100644
--- a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaXmlSplitter.java
+++ b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaXmlSplitter.java
@@ -26,7 +26,6 @@ import java.net.URI;
import java.text.DecimalFormat;
import java.text.NumberFormat;
-import com.google.common.io.Closeables;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -219,12 +218,9 @@ public final class WikipediaXmlSplitter {
content.append("</mediawiki>");
fileNumber++;
String filename = outputDirPath + "/chunk-" + decimalFormatter.format(fileNumber) + ".xml";
- BufferedWriter chunkWriter =
- new BufferedWriter(new OutputStreamWriter(fs.create(new Path(filename)), "UTF-8"));
- try {
+ try (BufferedWriter chunkWriter =
+ new BufferedWriter(new OutputStreamWriter(fs.create(new Path(filename)), "UTF-8"))) {
chunkWriter.write(content.toString(), 0, content.length());
- } finally {
- Closeables.close(chunkWriter, false);
}
if (fileNumber >= numChunks) {
break;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/wikipedia/XmlInputFormat.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/wikipedia/XmlInputFormat.java b/integration/src/main/java/org/apache/mahout/text/wikipedia/XmlInputFormat.java
index 7f16f31..afd350f 100644
--- a/integration/src/main/java/org/apache/mahout/text/wikipedia/XmlInputFormat.java
+++ b/integration/src/main/java/org/apache/mahout/text/wikipedia/XmlInputFormat.java
@@ -17,10 +17,8 @@
package org.apache.mahout.text.wikipedia;
-import java.io.IOException;
-
-import com.google.common.base.Charsets;
import com.google.common.io.Closeables;
+import org.apache.commons.io.Charsets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
@@ -36,6 +34,8 @@ import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.io.IOException;
+
/**
* Reads records that are delimited by a specific begin/end tag.
*/
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsJob.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsJob.java b/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsJob.java
index 1814bd5..33d09a0 100644
--- a/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsJob.java
+++ b/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsJob.java
@@ -17,11 +17,9 @@
package org.apache.mahout.utils;
-
import java.io.IOException;
import com.google.common.base.Preconditions;
-import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -113,12 +111,8 @@ public class ConcatenateVectorsJob extends AbstractJob {
Preconditions.checkArgument(paths.length > 0, path.getName() + " is a file, should be a directory");
Path file = paths[0].getPath();
- SequenceFile.Reader reader = null;
- try {
- reader = new SequenceFile.Reader(fs, file, fs.getConf());
+ try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, fs.getConf())){
return reader.getKeyClass().asSubclass(Writable.class);
- } finally {
- Closeables.close(reader, true);
}
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java b/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java
index 8ab57be..f63de83 100644
--- a/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java
+++ b/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java
@@ -25,7 +25,7 @@ import java.io.PrintStream;
import java.util.List;
import java.util.Map;
-import com.google.common.base.Charsets;
+import org.apache.commons.io.Charsets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.ToolRunner;