You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sm...@apache.org on 2015/04/06 04:22:14 UTC
[2/5] mahout git commit: MAHOUT-1652: Java 7 upgrade,
this closes apache/mahout#112
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/DFUtils.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/DFUtils.java b/mr/src/main/java/org/apache/mahout/classifier/df/DFUtils.java
index 137b174..86f99b6 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/DFUtils.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/DFUtils.java
@@ -5,9 +5,9 @@
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -22,10 +22,9 @@ import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.charset.Charset;
+import java.util.ArrayList;
import java.util.List;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
@@ -40,8 +39,9 @@ import org.apache.mahout.common.iterator.sequencefile.PathFilters;
*/
public final class DFUtils {
- private DFUtils() {}
-
+ private DFUtils() {
+ }
+
/**
* Writes an Node[] into a DataOutput
* @throws java.io.IOException
@@ -52,7 +52,7 @@ public final class DFUtils {
w.write(out);
}
}
-
+
/**
* Reads a Node[] from a DataInput
* @throws java.io.IOException
@@ -63,10 +63,10 @@ public final class DFUtils {
for (int index = 0; index < length; index++) {
nodes[index] = Node.read(in);
}
-
+
return nodes;
}
-
+
/**
* Writes a double[] into a DataOutput
* @throws java.io.IOException
@@ -77,7 +77,7 @@ public final class DFUtils {
out.writeDouble(value);
}
}
-
+
/**
* Reads a double[] from a DataInput
* @throws java.io.IOException
@@ -88,10 +88,10 @@ public final class DFUtils {
for (int index = 0; index < length; index++) {
array[index] = in.readDouble();
}
-
+
return array;
}
-
+
/**
* Writes an int[] into a DataOutput
* @throws java.io.IOException
@@ -102,7 +102,7 @@ public final class DFUtils {
out.writeInt(value);
}
}
-
+
/**
* Reads an int[] from a DataInput
* @throws java.io.IOException
@@ -113,16 +113,16 @@ public final class DFUtils {
for (int index = 0; index < length; index++) {
array[index] = in.readInt();
}
-
+
return array;
}
-
+
/**
* Return a list of all files in the output directory
* @throws IOException if no file is found
*/
public static Path[] listOutputFiles(FileSystem fs, Path outputPath) throws IOException {
- List<Path> outputFiles = Lists.newArrayList();
+ List<Path> outputFiles = new ArrayList<>();
for (FileStatus s : fs.listStatus(outputPath, PathFilters.logsCRCFilter())) {
if (!s.isDir() && !s.getPath().getName().startsWith("_")) {
outputFiles.add(s.getPath());
@@ -140,27 +140,24 @@ public final class DFUtils {
public static String elapsedTime(long milli) {
long seconds = milli / 1000;
milli %= 1000;
-
+
long minutes = seconds / 60;
seconds %= 60;
-
+
long hours = minutes / 60;
minutes %= 60;
-
+
return hours + "h " + minutes + "m " + seconds + "s " + milli;
}
public static void storeWritable(Configuration conf, Path path, Writable writable) throws IOException {
FileSystem fs = path.getFileSystem(conf);
- FSDataOutputStream out = fs.create(path);
- try {
+ try (FSDataOutputStream out = fs.create(path)) {
writable.write(out);
- } finally {
- Closeables.close(out, false);
}
}
-
+
/**
* Write a string to a path.
* @param conf From which the file system will be picked
@@ -169,13 +166,8 @@ public final class DFUtils {
* @throws IOException if things go poorly
*/
public static void storeString(Configuration conf, Path path, String string) throws IOException {
- DataOutputStream out = null;
- try {
- out = path.getFileSystem(conf).create(path);
+ try (DataOutputStream out = path.getFileSystem(conf).create(path)) {
out.write(string.getBytes(Charset.defaultCharset()));
- } finally {
- Closeables.close(out, false);
}
}
-
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/DecisionForest.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/DecisionForest.java b/mr/src/main/java/org/apache/mahout/classifier/df/DecisionForest.java
index 1b47ec7..bb4153e 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/DecisionForest.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/DecisionForest.java
@@ -18,8 +18,6 @@
package org.apache.mahout.classifier.df;
import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
@@ -34,6 +32,7 @@ import org.apache.mahout.classifier.df.node.Node;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.List;
import java.util.Random;
@@ -45,7 +44,7 @@ public class DecisionForest implements Writable {
private final List<Node> trees;
private DecisionForest() {
- trees = Lists.newArrayList();
+ trees = new ArrayList<>();
}
public DecisionForest(List<Node> trees) {
@@ -225,15 +224,12 @@ public class DecisionForest implements Writable {
DecisionForest forest = null;
for (Path path : files) {
- FSDataInputStream dataInput = new FSDataInputStream(fs.open(path));
- try {
+ try (FSDataInputStream dataInput = new FSDataInputStream(fs.open(path))) {
if (forest == null) {
forest = read(dataInput);
} else {
forest.readFields(dataInput);
}
- } finally {
- Closeables.close(dataInput, true);
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java b/mr/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java
index 895188b..8a7d945 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java
@@ -17,7 +17,6 @@
package org.apache.mahout.classifier.df.builder;
-import com.google.common.collect.Sets;
import org.apache.mahout.classifier.df.data.Data;
import org.apache.mahout.classifier.df.data.Dataset;
import org.apache.mahout.classifier.df.data.Instance;
@@ -34,6 +33,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Collection;
+import java.util.HashSet;
import java.util.Random;
/**
@@ -263,7 +263,7 @@ public class DecisionTreeBuilder implements TreeBuilder {
// tree is complemented
Collection<Double> subsetValues = null;
if (complemented) {
- subsetValues = Sets.newHashSet();
+ subsetValues = new HashSet<>();
for (double value : values) {
subsetValues.add(value);
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/data/Data.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/Data.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/Data.java
index c1bddd9..c68ce52 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/data/Data.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/Data.java
@@ -17,11 +17,11 @@
package org.apache.mahout.classifier.df.data;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
import org.apache.mahout.classifier.df.data.conditions.Condition;
+import java.util.ArrayList;
import java.util.Collection;
+import java.util.HashSet;
import java.util.List;
import java.util.Random;
@@ -38,12 +38,12 @@ public class Data implements Cloneable {
public Data(Dataset dataset) {
this.dataset = dataset;
- this.instances = Lists.newArrayList();
+ this.instances = new ArrayList<>();
}
public Data(Dataset dataset, List<Instance> instances) {
this.dataset = dataset;
- this.instances = Lists.newArrayList(instances);
+ this.instances = new ArrayList<>(instances);
}
/**
@@ -86,7 +86,7 @@ public class Data implements Cloneable {
* @return the subset from this data that matches the given condition
*/
public Data subset(Condition condition) {
- List<Instance> subset = Lists.newArrayList();
+ List<Instance> subset = new ArrayList<>();
for (Instance instance : instances) {
if (condition.isTrueFor(instance)) {
@@ -102,7 +102,7 @@ public class Data implements Cloneable {
*/
public Data bagging(Random rng) {
int datasize = size();
- List<Instance> bag = Lists.newArrayListWithCapacity(datasize);
+ List<Instance> bag = new ArrayList<>(datasize);
for (int i = 0; i < datasize; i++) {
bag.add(instances.get(rng.nextInt(datasize)));
@@ -121,7 +121,7 @@ public class Data implements Cloneable {
*/
public Data bagging(Random rng, boolean[] sampled) {
int datasize = size();
- List<Instance> bag = Lists.newArrayListWithCapacity(datasize);
+ List<Instance> bag = new ArrayList<>(datasize);
for (int i = 0; i < datasize; i++) {
int index = rng.nextInt(datasize);
@@ -136,7 +136,7 @@ public class Data implements Cloneable {
* Splits the data in two, returns one part, and this gets the rest of the data. <b>VERY SLOW!</b>
*/
public Data rsplit(Random rng, int subsize) {
- List<Instance> subset = Lists.newArrayListWithCapacity(subsize);
+ List<Instance> subset = new ArrayList<>(subsize);
for (int i = 0; i < subsize; i++) {
subset.add(instances.remove(rng.nextInt(instances.size())));
@@ -190,7 +190,7 @@ public class Data implements Cloneable {
* finds all distinct values of a given attribute
*/
public double[] values(int attr) {
- Collection<Double> result = Sets.newHashSet();
+ Collection<Double> result = new HashSet<>();
for (Instance instance : instances) {
result.add(instance.get(attr));
@@ -208,7 +208,7 @@ public class Data implements Cloneable {
@Override
public Data clone() {
- return new Data(dataset, Lists.newArrayList(instances));
+ return new Data(dataset, new ArrayList<>(instances));
}
@Override
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java
index 8eed6cf..c8d9dcd 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java
@@ -19,7 +19,6 @@ package org.apache.mahout.classifier.df.data;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -28,6 +27,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
import java.util.List;
import java.util.Scanner;
import java.util.Set;
@@ -80,7 +81,7 @@ public final class DataLoader {
if (attrs[attr].isCategorical() || (!regression && attrs[attr].isLabel())) {
// update values
if (values[attr] == null) {
- values[attr] = Sets.newHashSet();
+ values[attr] = new HashSet<>();
}
values[attr].add(token);
} else {
@@ -111,7 +112,7 @@ public final class DataLoader {
FSDataInputStream input = fs.open(fpath);
Scanner scanner = new Scanner(input, "UTF-8");
- List<Instance> instances = Lists.newArrayList();
+ List<Instance> instances = new ArrayList<>();
DataConverter converter = new DataConverter(dataset);
@@ -137,7 +138,7 @@ public final class DataLoader {
/** Loads the data from multiple paths specified by pathes */
public static Data loadData(Dataset dataset, FileSystem fs, Path[] pathes) throws IOException {
- List<Instance> instances = Lists.newArrayList();
+ List<Instance> instances = new ArrayList<>();
for (Path path : pathes) {
Data loadedData = loadData(dataset, fs, path);
@@ -150,7 +151,7 @@ public final class DataLoader {
/** Loads the data from a String array */
public static Data loadData(Dataset dataset, String[] data) {
- List<Instance> instances = Lists.newArrayList();
+ List<Instance> instances = new ArrayList<>();
DataConverter converter = new DataConverter(dataset);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/data/DataUtils.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/DataUtils.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/DataUtils.java
index 856d452..3eb126c 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/data/DataUtils.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/DataUtils.java
@@ -18,8 +18,8 @@
package org.apache.mahout.classifier.df.data;
import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
+import java.util.ArrayList;
import java.util.List;
import java.util.Random;
@@ -71,7 +71,7 @@ public final class DataUtils {
*/
public static int maxindex(Random rng, int[] values) {
int max = 0;
- List<Integer> maxindices = Lists.newArrayList();
+ List<Integer> maxindices = new ArrayList<>();
for (int index = 0; index < values.length; index++) {
if (values[index] > max) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java
index d2bec37..413389f 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java
@@ -18,8 +18,6 @@
package org.apache.mahout.classifier.df.data;
import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
import com.google.common.io.Closeables;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.hadoop.conf.Configuration;
@@ -32,6 +30,8 @@ import org.codehaus.jackson.type.TypeReference;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Arrays;
+import java.util.HashMap;
+import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
@@ -336,7 +336,7 @@ public class Dataset {
* @return some JSON
*/
public String toJSON() {
- List<Map<String, Object>> toWrite = Lists.newLinkedList();
+ List<Map<String, Object>> toWrite = new LinkedList<>();
// attributes does not include ignored columns and it does include the class label
int ignoredCount = 0;
for (int i = 0; i < attributes.length + ignored.length; i++) {
@@ -374,8 +374,8 @@ public class Dataset {
} catch (Exception ex) {
throw new RuntimeException(ex);
}
- List<Attribute> attributes = Lists.newLinkedList();
- List<Integer> ignored = Lists.newLinkedList();
+ List<Attribute> attributes = new LinkedList<>();
+ List<Integer> ignored = new LinkedList<>();
String[][] nominalValues = new String[fromJSON.size()][];
Dataset dataset = new Dataset();
for (int i = 0; i < fromJSON.size(); i++) {
@@ -412,7 +412,7 @@ public class Dataset {
* @return map of (AttributeTypes, Values)
*/
private Map<String, Object> getMap(Attribute type, String[] values, boolean isLabel) {
- Map<String, Object> attribute = Maps.newHashMap();
+ Map<String, Object> attribute = new HashMap<>();
attribute.put(TYPE, type.toString().toLowerCase(Locale.getDefault()));
attribute.put(VALUES, values);
attribute.put(LABEL, isLabel);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorUtils.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorUtils.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorUtils.java
index a2198b1..f2e0ce4 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorUtils.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorUtils.java
@@ -18,9 +18,9 @@
package org.apache.mahout.classifier.df.data;
import com.google.common.base.Splitter;
-import com.google.common.collect.Lists;
import org.apache.mahout.classifier.df.data.Dataset.Attribute;
+import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
@@ -40,7 +40,7 @@ public final class DescriptorUtils {
* if a bad token is encountered
*/
public static Attribute[] parseDescriptor(CharSequence descriptor) throws DescriptorException {
- List<Attribute> attributes = Lists.newArrayList();
+ List<Attribute> attributes = new ArrayList<>();
for (String token : SPACE.split(descriptor)) {
token = token.toUpperCase(Locale.ENGLISH);
if ("I".equals(token)) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java
index b8e5c2d..bdbaf2b 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java
@@ -17,11 +17,6 @@
package org.apache.mahout.classifier.df.mapreduce;
-import java.io.IOException;
-import java.util.List;
-import java.util.Random;
-
-import com.google.common.collect.Lists;
import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
@@ -51,6 +46,11 @@ import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
/**
* Mapreduce implementation that classifies the Input data using a previousely built decision forest
*/
@@ -144,7 +144,7 @@ public class Classifier {
Path[] outfiles = DFUtils.listOutputFiles(fs, mappersOutputPath);
// read all the output
- List<double[]> resList = Lists.newArrayList();
+ List<double[]> resList = new ArrayList<>();
for (Path path : outfiles) {
FSDataOutputStream ofile = null;
try {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemBuilder.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemBuilder.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemBuilder.java
index 573a1e0..4c33e73 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemBuilder.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemBuilder.java
@@ -17,8 +17,12 @@
package org.apache.mahout.classifier.df.mapreduce.inmem;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
@@ -36,10 +40,6 @@ import org.apache.mahout.classifier.df.node.Node;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-
/**
* MapReduce implementation where each mapper loads a full copy of the data in-memory. The forest trees are
* splitted across all the mappers
@@ -80,7 +80,7 @@ public class InMemBuilder extends Builder {
protected DecisionForest parseOutput(Job job) throws IOException {
Configuration conf = job.getConfiguration();
- Map<Integer,MapredOutput> output = Maps.newHashMap();
+ Map<Integer,MapredOutput> output = new HashMap<>();
Path outputPath = getOutputPath(conf);
FileSystem fs = outputPath.getFileSystem(conf);
@@ -101,7 +101,7 @@ public class InMemBuilder extends Builder {
* Process the output, extracting the trees
*/
private static DecisionForest processOutput(Map<Integer,MapredOutput> output) {
- List<Node> trees = Lists.newArrayList();
+ List<Node> trees = new ArrayList<>();
for (Map.Entry<Integer,MapredOutput> entry : output.entrySet()) {
MapredOutput value = entry.getValue();
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java
index a39218e..51e5a3e 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java
@@ -17,8 +17,15 @@
package org.apache.mahout.classifier.df.mapreduce.inmem;
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+import java.util.Random;
+
import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
@@ -33,13 +40,6 @@ import org.apache.mahout.common.RandomUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.List;
-import java.util.Locale;
-import java.util.Random;
-
/**
* Custom InputFormat that generates InputSplits given the desired number of trees.<br>
* each input split contains a subset of the trees.<br>
@@ -94,7 +94,7 @@ public class InMemInputFormat extends InputFormat<IntWritable,NullWritable> {
int id = 0;
- List<InputSplit> splits = Lists.newArrayListWithCapacity(numSplits);
+ List<InputSplit> splits = new ArrayList<>(numSplits);
for (int index = 0; index < numSplits - 1; index++) {
splits.add(new InMemInputSplit(id, splitSize, nextSeed()));
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java
index eaf0b15..648472c 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java
@@ -18,7 +18,6 @@
package org.apache.mahout.classifier.df.mapreduce.partial;
import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
@@ -35,6 +34,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.List;
import java.util.Random;
@@ -61,7 +61,7 @@ public class Step1Mapper extends MapredMapper<LongWritable,Text,TreeID,MapredOut
private int partition;
/** will contain all instances if this mapper's split */
- private final List<Instance> instances = Lists.newArrayList();
+ private final List<Instance> instances = new ArrayList<>();
public int getFirstTreeId() {
return firstTreeId;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/ref/SequentialBuilder.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/ref/SequentialBuilder.java b/mr/src/main/java/org/apache/mahout/classifier/df/ref/SequentialBuilder.java
index 292b591..d7f023b 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/ref/SequentialBuilder.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/ref/SequentialBuilder.java
@@ -17,7 +17,6 @@
package org.apache.mahout.classifier.df.ref;
-import com.google.common.collect.Lists;
import org.apache.mahout.classifier.df.Bagging;
import org.apache.mahout.classifier.df.DecisionForest;
import org.apache.mahout.classifier.df.builder.TreeBuilder;
@@ -26,6 +25,7 @@ import org.apache.mahout.classifier.df.node.Node;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.util.ArrayList;
import java.util.List;
import java.util.Random;
@@ -56,7 +56,7 @@ public class SequentialBuilder {
}
public DecisionForest build(int nbTrees) {
- List<Node> trees = Lists.newArrayList();
+ List<Node> trees = new ArrayList<>();
for (int treeId = 0; treeId < nbTrees; treeId++) {
trees.add(bagging.build(rng));
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/tools/Describe.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/tools/Describe.java b/mr/src/main/java/org/apache/mahout/classifier/df/tools/Describe.java
index 58814a8..226d3db 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/df/tools/Describe.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/df/tools/Describe.java
@@ -17,7 +17,11 @@
package org.apache.mahout.classifier.df.tools;
-import com.google.common.collect.Lists;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -38,10 +42,6 @@ import org.apache.mahout.common.CommandLineUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.IOException;
-import java.util.Collection;
-import java.util.List;
-
/**
* Generates a file descriptor for a given dataset
*/
@@ -138,7 +138,7 @@ public final class Describe {
}
private static List<String> convert(Collection<?> values) {
- List<String> list = Lists.newArrayListWithCapacity(values.size());
+ List<String> list = new ArrayList<>(values.size());
for (Object value : values) {
list.add(value.toString());
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/mlp/NeuralNetwork.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/mlp/NeuralNetwork.java b/mr/src/main/java/org/apache/mahout/classifier/mlp/NeuralNetwork.java
index 056bd48..f4e765c 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/mlp/NeuralNetwork.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/mlp/NeuralNetwork.java
@@ -19,10 +19,12 @@ package org.apache.mahout.classifier.mlp;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
+import com.google.common.base.Preconditions;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
@@ -41,10 +43,6 @@ import org.apache.mahout.math.function.DoubleFunction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
-
/**
* AbstractNeuralNetwork defines the general operations for a neural network
* based model. Typically, all derivative models such as Multilayer Perceptron
@@ -63,7 +61,7 @@ public abstract class NeuralNetwork {
/* The default momentum weight */
public static final double DEFAULT_MOMENTUM_WEIGHT = 0.1;
- public static enum TrainingMethod { GRADIENT_DESCENT }
+ public enum TrainingMethod { GRADIENT_DESCENT }
/* The name of the model */
protected String modelType;
@@ -113,11 +111,11 @@ public abstract class NeuralNetwork {
costFunctionName = "Minus_Squared";
modelType = getClass().getSimpleName();
- layerSizeList = Lists.newArrayList();
- layerSizeList = Lists.newArrayList();
- weightMatrixList = Lists.newArrayList();
- prevWeightUpdatesList = Lists.newArrayList();
- squashingFunctionList = Lists.newArrayList();
+ layerSizeList = new ArrayList<>();
+ layerSizeList = new ArrayList<>();
+ weightMatrixList = new ArrayList<>();
+ prevWeightUpdatesList = new ArrayList<>();
+ squashingFunctionList = new ArrayList<>();
}
/**
@@ -350,7 +348,7 @@ public abstract class NeuralNetwork {
* existing matrices.
*/
public void setWeightMatrices(Matrix[] matrices) {
- weightMatrixList = Lists.newArrayList();
+ weightMatrixList = new ArrayList<>();
Collections.addAll(weightMatrixList, matrices);
}
@@ -411,7 +409,7 @@ public abstract class NeuralNetwork {
* @return Cached output of each layer.
*/
protected List<Vector> getOutputInternal(Vector instance) {
- List<Vector> outputCache = Lists.newArrayList();
+ List<Vector> outputCache = new ArrayList<>();
// fill with instance
Vector intermediateOutput = instance;
outputCache.add(intermediateOutput);
@@ -592,14 +590,10 @@ public abstract class NeuralNetwork {
protected void readFromModel() throws IOException {
log.info("Load model from {}", modelPath);
Preconditions.checkArgument(modelPath != null, "Model path has not been set.");
- FSDataInputStream is = null;
- try {
- Path path = new Path(modelPath);
- FileSystem fs = path.getFileSystem(new Configuration());
- is = new FSDataInputStream(fs.open(path));
+ Path path = new Path(modelPath);
+ FileSystem fs = path.getFileSystem(new Configuration());
+ try (FSDataInputStream is = new FSDataInputStream(fs.open(path))) {
readFields(is);
- } finally {
- Closeables.close(is, true);
}
}
@@ -611,14 +605,10 @@ public abstract class NeuralNetwork {
public void writeModelToFile() throws IOException {
log.info("Write model to {}.", modelPath);
Preconditions.checkArgument(modelPath != null, "Model path has not been set.");
- FSDataOutputStream stream = null;
- try {
- Path path = new Path(modelPath);
- FileSystem fs = path.getFileSystem(new Configuration());
- stream = fs.create(path, true);
+ Path path = new Path(modelPath);
+ FileSystem fs = path.getFileSystem(new Configuration());
+ try (FSDataOutputStream stream = fs.create(path, true)) {
write(stream);
- } finally {
- Closeables.close(stream, false);
}
}
@@ -717,7 +707,7 @@ public abstract class NeuralNetwork {
// Read layer size list
int numLayers = input.readInt();
- layerSizeList = Lists.newArrayList();
+ layerSizeList = new ArrayList<>();
for (int i = 0; i < numLayers; i++) {
layerSizeList.add(input.readInt());
}
@@ -726,15 +716,15 @@ public abstract class NeuralNetwork {
// Read squash functions
int squashingFunctionSize = input.readInt();
- squashingFunctionList = Lists.newArrayList();
+ squashingFunctionList = new ArrayList<>();
for (int i = 0; i < squashingFunctionSize; i++) {
squashingFunctionList.add(WritableUtils.readString(input));
}
// Read weights and construct matrices of previous updates
int numOfMatrices = input.readInt();
- weightMatrixList = Lists.newArrayList();
- prevWeightUpdatesList = Lists.newArrayList();
+ weightMatrixList = new ArrayList<>();
+ prevWeightUpdatesList = new ArrayList<>();
for (int i = 0; i < numOfMatrices; i++) {
Matrix matrix = MatrixWritable.readMatrix(input);
weightMatrixList.add(matrix);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/mlp/RunMultilayerPerceptron.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/mlp/RunMultilayerPerceptron.java b/mr/src/main/java/org/apache/mahout/classifier/mlp/RunMultilayerPerceptron.java
index 6130530..270ea43 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/mlp/RunMultilayerPerceptron.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/mlp/RunMultilayerPerceptron.java
@@ -22,6 +22,7 @@ import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -41,9 +42,6 @@ import org.apache.mahout.math.Vector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
-
/** Run {@link MultilayerPerceptron} classification.
* @deprecated as of as of 0.10.0.
* */
@@ -61,11 +59,11 @@ public class RunMultilayerPerceptron {
int columnEnd;
boolean skipHeader;
}
-
+
public static void main(String[] args) throws Exception {
-
+
Parameters parameters = new Parameters();
-
+
if (parseArgs(args, parameters)) {
log.info("Load model from {}.", parameters.modelFilePathStr);
MultilayerPerceptron mlp = new MultilayerPerceptron(parameters.modelFilePathStr);
@@ -98,15 +96,10 @@ public class RunMultilayerPerceptron {
log.info("Read from column {} to column {}.", parameters.columnStart, parameters.columnEnd);
- BufferedWriter writer = null;
- BufferedReader reader = null;
- try {
- writer = new BufferedWriter(new OutputStreamWriter(outputFS.create(outputFilePath)));
- reader = new BufferedReader(new InputStreamReader(inputFS.open(inputFilePath)));
-
+ try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(outputFS.create(outputFilePath)));
+ BufferedReader reader = new BufferedReader(new InputStreamReader(inputFS.open(inputFilePath)))) {
String line;
-
if (parameters.skipHeader) {
reader.readLine();
}
@@ -125,9 +118,6 @@ public class RunMultilayerPerceptron {
}
mlp.close();
log.info("Labeling finished.");
- } finally {
- Closeables.close(reader, true);
- Closeables.close(writer, true);
}
}
}
@@ -154,7 +144,7 @@ public class RunMultilayerPerceptron {
.withDescription("type of input file, currently support 'csv'")
.create();
- List<Integer> columnRangeDefault = Lists.newArrayList();
+ List<Integer> columnRangeDefault = new ArrayList<>();
columnRangeDefault.add(0);
columnRangeDefault.add(Integer.MAX_VALUE);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/mlp/TrainMultilayerPerceptron.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/mlp/TrainMultilayerPerceptron.java b/mr/src/main/java/org/apache/mahout/classifier/mlp/TrainMultilayerPerceptron.java
index a194c4c..d634aa5 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/mlp/TrainMultilayerPerceptron.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/mlp/TrainMultilayerPerceptron.java
@@ -19,9 +19,12 @@ package org.apache.mahout.classifier.mlp;
import java.io.BufferedReader;
import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import com.google.common.base.Preconditions;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -38,11 +41,6 @@ import org.apache.mahout.math.Vector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.io.Closeables;
-
/** Train a {@link MultilayerPerceptron}.
* @deprecated as of as of 0.10.0.
* */
@@ -50,7 +48,7 @@ import com.google.common.io.Closeables;
public final class TrainMultilayerPerceptron {
private static final Logger log = LoggerFactory.getLogger(TrainMultilayerPerceptron.class);
-
+
/** The parameters used by MLP. */
static class Parameters {
double learningRate;
@@ -59,31 +57,17 @@ public final class TrainMultilayerPerceptron {
String inputFilePath;
boolean skipHeader;
- Map<String, Integer> labelsIndex = Maps.newHashMap();
+ Map<String, Integer> labelsIndex = new HashMap<>();
String modelFilePath;
boolean updateModel;
- List<Integer> layerSizeList = Lists.newArrayList();
+ List<Integer> layerSizeList = new ArrayList<>();
String squashingFunctionName;
}
- /*
- private double learningRate;
- private double momemtumWeight;
- private double regularizationWeight;
-
- private String inputFilePath;
- private boolean skipHeader;
- private Map<String, Integer> labelsIndex = Maps.newHashMap();
-
- private String modelFilePath;
- private boolean updateModel;
- private List<Integer> layerSizeList = Lists.newArrayList();
- private String squashingFunctionName;*/
-
public static void main(String[] args) throws Exception {
Parameters parameters = new Parameters();
-
+
if (parseArgs(args, parameters)) {
log.info("Validate model...");
// check whether the model already exists
@@ -109,31 +93,28 @@ public final class TrainMultilayerPerceptron {
}
mlp.setCostFunction("Minus_Squared");
mlp.setLearningRate(parameters.learningRate)
- .setMomentumWeight(parameters.momemtumWeight)
- .setRegularizationWeight(parameters.regularizationWeight);
+ .setMomentumWeight(parameters.momemtumWeight)
+ .setRegularizationWeight(parameters.regularizationWeight);
}
mlp.setModelPath(parameters.modelFilePath);
}
// set the parameters
mlp.setLearningRate(parameters.learningRate)
- .setMomentumWeight(parameters.momemtumWeight)
- .setRegularizationWeight(parameters.regularizationWeight);
+ .setMomentumWeight(parameters.momemtumWeight)
+ .setRegularizationWeight(parameters.regularizationWeight);
// train by the training data
Path trainingDataPath = new Path(parameters.inputFilePath);
FileSystem dataFs = trainingDataPath.getFileSystem(new Configuration());
Preconditions.checkArgument(dataFs.exists(trainingDataPath), "Training dataset %s cannot be found!",
- parameters.inputFilePath);
+ parameters.inputFilePath);
log.info("Read data and train model...");
- BufferedReader reader = null;
- try {
- reader = new BufferedReader(new InputStreamReader(dataFs.open(trainingDataPath)));
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(dataFs.open(trainingDataPath)))) {
String line;
-
// read training data line by line
if (parameters.skipHeader) {
reader.readLine();
@@ -163,15 +144,13 @@ public final class TrainMultilayerPerceptron {
log.info("Write trained model to {}", parameters.modelFilePath);
mlp.writeModelToFile();
mlp.close();
- } finally {
- Closeables.close(reader, true);
}
}
}
/**
* Parse the input arguments.
- *
+ *
* @param args The input arguments
* @param parameters The parameters parsed.
* @return Whether the input arguments are valid.
@@ -196,7 +175,7 @@ public final class TrainMultilayerPerceptron {
.withRequired(true)
.withChildren(skipHeaderGroup)
.withArgument(argumentBuilder.withName("path").withMinimum(1).withMaximum(1)
- .create()).withDescription("the file path of training dataset")
+ .create()).withDescription("the file path of training dataset")
.create();
Option labelsOption = optionBuilder
@@ -295,9 +274,9 @@ public final class TrainMultilayerPerceptron {
parameters.squashingFunctionName = getString(commandLine, squashingFunctionOption);
System.out.printf("Input: %s, Model: %s, Update: %s, Layer size: %s, Squashing function: %s, Learning rate: %f," +
- " Momemtum weight: %f, Regularization Weight: %f\n", parameters.inputFilePath, parameters.modelFilePath,
- parameters.updateModel, Arrays.toString(parameters.layerSizeList.toArray()),
- parameters.squashingFunctionName, parameters.learningRate, parameters.momemtumWeight,
+ " Momemtum weight: %f, Regularization Weight: %f\n", parameters.inputFilePath, parameters.modelFilePath,
+ parameters.updateModel, Arrays.toString(parameters.layerSizeList.toArray()),
+ parameters.squashingFunctionName, parameters.learningRate, parameters.momemtumWeight,
parameters.regularizationWeight);
return true;
@@ -321,7 +300,7 @@ public final class TrainMultilayerPerceptron {
static List<Integer> getIntegerList(CommandLine commandLine, Option option) {
List<String> list = commandLine.getValues(option);
- List<Integer> valList = Lists.newArrayList();
+ List<Integer> valList = new ArrayList<>();
for (String str : list) {
valList.add(Integer.parseInt(str));
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java b/mr/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java
index 1e5171c..c09dd83 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java
@@ -20,9 +20,11 @@ package org.apache.mahout.classifier.naivebayes;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Map;
import java.util.regex.Pattern;
+import com.google.common.base.Preconditions;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -44,11 +46,6 @@ import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.math.map.OpenObjectIntHashMap;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
-import com.google.common.io.Closeables;
-
public final class BayesUtils {
private static final Pattern SLASH = Pattern.compile("/");
@@ -104,14 +101,11 @@ public final class BayesUtils {
public static int writeLabelIndex(Configuration conf, Iterable<String> labels, Path indexPath)
throws IOException {
FileSystem fs = FileSystem.get(indexPath.toUri(), conf);
- SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, indexPath, Text.class, IntWritable.class);
int i = 0;
- try {
+ try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, indexPath, Text.class, IntWritable.class)) {
for (String label : labels) {
writer.append(new Text(label), new IntWritable(i++));
}
- } finally {
- Closeables.close(writer, false);
}
return i;
}
@@ -119,10 +113,9 @@ public final class BayesUtils {
public static int writeLabelIndex(Configuration conf, Path indexPath,
Iterable<Pair<Text,IntWritable>> labels) throws IOException {
FileSystem fs = FileSystem.get(indexPath.toUri(), conf);
- SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, indexPath, Text.class, IntWritable.class);
- Collection<String> seen = Sets.newHashSet();
+ Collection<String> seen = new HashSet<>();
int i = 0;
- try {
+ try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, indexPath, Text.class, IntWritable.class)){
for (Object label : labels) {
String theLabel = SLASH.split(((Pair<?, ?>) label).getFirst().toString())[1];
if (!seen.contains(theLabel)) {
@@ -130,8 +123,6 @@ public final class BayesUtils {
seen.add(theLabel);
}
}
- } finally {
- Closeables.close(writer, false);
}
return i;
}
@@ -154,7 +145,7 @@ public final class BayesUtils {
}
public static Map<String,Vector> readScoresFromCache(Configuration conf) throws IOException {
- Map<String,Vector> sumVectors = Maps.newHashMap();
+ Map<String,Vector> sumVectors = new HashMap<>();
for (Pair<Text,VectorWritable> entry
: new SequenceFileDirIterable<Text,VectorWritable>(HadoopUtil.getSingleCachedFile(conf),
PathType.LIST, PathFilters.partFilter(), conf)) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java b/mr/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java
index f180e8b..9f85aab 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java
@@ -31,7 +31,6 @@ import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import com.google.common.base.Preconditions;
-import com.google.common.io.Closeables;
/** NaiveBayesModel holds the weight matrix, the feature and label sums and the weight normalizer vectors.*/
public class NaiveBayesModel {
@@ -102,15 +101,14 @@ public class NaiveBayesModel {
public static NaiveBayesModel materialize(Path output, Configuration conf) throws IOException {
FileSystem fs = output.getFileSystem(conf);
- Vector weightsPerLabel = null;
+ Vector weightsPerLabel;
Vector perLabelThetaNormalizer = null;
- Vector weightsPerFeature = null;
+ Vector weightsPerFeature;
Matrix weightsPerLabelAndFeature;
float alphaI;
boolean isComplementary;
- FSDataInputStream in = fs.open(new Path(output, "naiveBayesModel.bin"));
- try {
+ try (FSDataInputStream in = fs.open(new Path(output, "naiveBayesModel.bin"))) {
alphaI = in.readFloat();
isComplementary = in.readBoolean();
weightsPerFeature = VectorWritable.readVector(in);
@@ -122,9 +120,8 @@ public class NaiveBayesModel {
for (int label = 0; label < weightsPerLabelAndFeature.numRows(); label++) {
weightsPerLabelAndFeature.assignRow(label, VectorWritable.readVector(in));
}
- } finally {
- Closeables.close(in, true);
}
+
NaiveBayesModel model = new NaiveBayesModel(weightsPerLabelAndFeature, weightsPerFeature, weightsPerLabel,
perLabelThetaNormalizer, alphaI, isComplementary);
model.validate();
@@ -133,8 +130,7 @@ public class NaiveBayesModel {
public void serialize(Path output, Configuration conf) throws IOException {
FileSystem fs = output.getFileSystem(conf);
- FSDataOutputStream out = fs.create(new Path(output, "naiveBayesModel.bin"));
- try {
+ try (FSDataOutputStream out = fs.create(new Path(output, "naiveBayesModel.bin"))) {
out.writeFloat(alphaI);
out.writeBoolean(isComplementary);
VectorWritable.writeVector(out, weightsPerFeature);
@@ -145,8 +141,6 @@ public class NaiveBayesModel {
for (int row = 0; row < weightsPerLabelAndFeature.numRows(); row++) {
VectorWritable.writeVector(out, weightsPerLabelAndFeature.viewRow(row));
}
- } finally {
- Closeables.close(out, false);
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java b/mr/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java
index 8fd422f..d9eedcf 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java
@@ -17,13 +17,12 @@
package org.apache.mahout.classifier.naivebayes.test;
-import com.google.common.base.Preconditions;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
-import com.google.common.io.Closeables;
+import com.google.common.base.Preconditions;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -127,10 +126,10 @@ public class TestNaiveBayesDriver extends AbstractJob {
} else {
classifier = new StandardNaiveBayesClassifier(model);
}
- SequenceFile.Writer writer = SequenceFile.createWriter(fs, getConf(), new Path(getOutputPath(), "part-r-00000"),
- Text.class, VectorWritable.class);
- try {
+ try (SequenceFile.Writer writer =
+ SequenceFile.createWriter(fs, getConf(), new Path(getOutputPath(), "part-r-00000"),
+ Text.class, VectorWritable.class)) {
SequenceFileDirIterable<Text, VectorWritable> dirIterable =
new SequenceFileDirIterable<>(getInputPath(), PathType.LIST, PathFilters.partFilter(), getConf());
// loop through the part-r-* files in getInputPath() and get classification scores for all entries
@@ -138,8 +137,6 @@ public class TestNaiveBayesDriver extends AbstractJob {
writer.append(new Text(SLASH.split(pair.getFirst().toString())[1]),
new VectorWritable(classifier.classifyFull(pair.getSecond().get())));
}
- } finally {
- Closeables.close(writer, false);
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java b/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java
index 942a101..6d4e2b0 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java
@@ -21,12 +21,11 @@ import java.io.DataOutputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Scanner;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -95,7 +94,7 @@ public final class BaumWelchTrainer {
//constructing random-generated HMM
HmmModel model = new HmmModel(nrOfHiddenStates, nrOfObservedStates, new Date().getTime());
- List<Integer> observations = Lists.newArrayList();
+ List<Integer> observations = new ArrayList<>();
//reading observations
try (Scanner scanner = new Scanner(new FileInputStream(input), "UTF-8")) {
@@ -114,11 +113,8 @@ public final class BaumWelchTrainer {
observationsArray, epsilon, maxIterations, true);
//serializing trained model
- DataOutputStream stream = new DataOutputStream(new FileOutputStream(output));
- try {
+ try (DataOutputStream stream = new DataOutputStream(new FileOutputStream(output))){
LossyHmmSerializer.serialize(trainedModel, stream);
- } finally {
- Closeables.close(stream, false);
}
//printing tranied model
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmUtils.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmUtils.java b/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmUtils.java
index 521be09..e710816 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmUtils.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmUtils.java
@@ -17,11 +17,12 @@
package org.apache.mahout.classifier.sequencelearning.hmm;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
-import com.google.common.collect.Lists;
+import com.google.common.base.Preconditions;
import org.apache.mahout.math.DenseMatrix;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.Matrix;
@@ -29,8 +30,6 @@ import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.SparseMatrix;
import org.apache.mahout.math.Vector;
-import com.google.common.base.Preconditions;
-
/**
* A collection of utilities for handling HMMModel objects.
*/
@@ -257,7 +256,7 @@ public final class HmmUtils {
int[] sequence,
boolean observed,
String defaultValue) {
- List<String> decoded = Lists.newArrayListWithCapacity(sequence.length);
+ List<String> decoded = new ArrayList<>(sequence.length);
for (int position : sequence) {
String nextState;
if (observed) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/RandomSequenceGenerator.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/RandomSequenceGenerator.java b/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/RandomSequenceGenerator.java
index cd2ced1..02baef1 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/RandomSequenceGenerator.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/RandomSequenceGenerator.java
@@ -25,8 +25,6 @@ import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
-import com.google.common.base.Charsets;
-import com.google.common.io.Closeables;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -35,6 +33,7 @@ import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
+import org.apache.commons.io.Charsets;
import org.apache.mahout.common.CommandLineUtil;
/**
@@ -80,26 +79,21 @@ public final class RandomSequenceGenerator {
int length = Integer.parseInt((String) commandLine.getValue(lengthOption));
//reading serialized HMM
- DataInputStream modelStream = new DataInputStream(new FileInputStream(modelPath));
HmmModel model;
- try {
+ try (DataInputStream modelStream = new DataInputStream(new FileInputStream(modelPath))){
model = LossyHmmSerializer.deserialize(modelStream);
- } finally {
- Closeables.close(modelStream, true);
}
//generating observations
int[] observations = HmmEvaluator.predict(model, length, System.currentTimeMillis());
//writing output
- PrintWriter writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(output), Charsets.UTF_8), true);
- try {
+ try (PrintWriter writer =
+ new PrintWriter(new OutputStreamWriter(new FileOutputStream(output), Charsets.UTF_8), true)){
for (int observation : observations) {
writer.print(observation);
writer.print(' ');
}
- } finally {
- Closeables.close(writer, false);
}
} catch (OptionException e) {
CommandLineUtil.printHelp(optionGroup);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java b/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java
index fb64385..317237d 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java
@@ -23,12 +23,10 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
+import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
-import com.google.common.base.Charsets;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -37,6 +35,7 @@ import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
+import org.apache.commons.io.Charsets;
import org.apache.mahout.common.CommandLineUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
@@ -82,16 +81,14 @@ public final class ViterbiEvaluator {
boolean computeLikelihood = commandLine.hasOption(likelihoodOption);
//reading serialized HMM
- DataInputStream modelStream = new DataInputStream(new FileInputStream(modelPath));
+ ;
HmmModel model;
- try {
+ try (DataInputStream modelStream = new DataInputStream(new FileInputStream(modelPath))) {
model = LossyHmmSerializer.deserialize(modelStream);
- } finally {
- Closeables.close(modelStream, true);
}
//reading observations
- List<Integer> observations = Lists.newArrayList();
+ List<Integer> observations = new ArrayList<>();
try (Scanner scanner = new Scanner(new FileInputStream(input), "UTF-8")) {
while (scanner.hasNextInt()) {
observations.add(scanner.nextInt());
@@ -107,14 +104,12 @@ public final class ViterbiEvaluator {
int[] hiddenStates = HmmEvaluator.decode(model, observationsArray, true);
//writing output
- PrintWriter writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(output), Charsets.UTF_8), true);
- try {
+ try (PrintWriter writer =
+ new PrintWriter(new OutputStreamWriter(new FileOutputStream(output), Charsets.UTF_8), true)) {
for (int hiddenState : hiddenStates) {
writer.print(hiddenState);
writer.print(' ');
}
- } finally {
- Closeables.close(writer, false);
}
if (computeLikelihood) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java b/mr/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java
index d00b021..24e5798 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java
@@ -17,7 +17,6 @@
package org.apache.mahout.classifier.sgd;
-import com.google.common.collect.Lists;
import org.apache.hadoop.io.Writable;
import org.apache.mahout.classifier.OnlineLearner;
import org.apache.mahout.ep.EvolutionaryProcess;
@@ -33,6 +32,7 @@ import org.slf4j.LoggerFactory;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.concurrent.ExecutionException;
@@ -79,7 +79,7 @@ public class AdaptiveLogisticRegression implements OnlineLearner, Writable {
private int currentStep = 1000;
private int bufferSize = 1000;
- private List<TrainingExample> buffer = Lists.newArrayList();
+ private List<TrainingExample> buffer = new ArrayList<>();
private EvolutionaryProcess<Wrapper, CrossFoldLearner> ep;
private State<Wrapper, CrossFoldLearner> best;
private int threadCount = DEFAULT_THREAD_COUNT;
@@ -118,7 +118,7 @@ public class AdaptiveLogisticRegression implements OnlineLearner, Writable {
this.numFeatures = numFeatures;
this.threadCount = threadCount;
this.poolSize = poolSize;
- seed = new State<Wrapper, CrossFoldLearner>(new double[2], 10);
+ seed = new State<>(new double[2], 10);
Wrapper w = new Wrapper(numCategories, numFeatures, prior);
seed.setPayload(w);
@@ -284,7 +284,7 @@ public class AdaptiveLogisticRegression implements OnlineLearner, Writable {
}
private void setupOptimizer(int poolSize) {
- ep = new EvolutionaryProcess<Wrapper, CrossFoldLearner>(threadCount, poolSize, seed);
+ ep = new EvolutionaryProcess<>(threadCount, poolSize, seed);
}
/**
@@ -561,22 +561,22 @@ public class AdaptiveLogisticRegression implements OnlineLearner, Writable {
bufferSize = in.readInt();
int n = in.readInt();
- buffer = Lists.newArrayList();
+ buffer = new ArrayList<>();
for (int i = 0; i < n; i++) {
TrainingExample example = new TrainingExample();
example.readFields(in);
buffer.add(example);
}
- ep = new EvolutionaryProcess<Wrapper, CrossFoldLearner>();
+ ep = new EvolutionaryProcess<>();
ep.readFields(in);
- best = new State<Wrapper, CrossFoldLearner>();
+ best = new State<>();
best.readFields(in);
threadCount = in.readInt();
poolSize = in.readInt();
- seed = new State<Wrapper, CrossFoldLearner>();
+ seed = new State<>();
seed.readFields(in);
numFeatures = in.readInt();
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sgd/CrossFoldLearner.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/sgd/CrossFoldLearner.java b/mr/src/main/java/org/apache/mahout/classifier/sgd/CrossFoldLearner.java
index 36bcae0..f56814b 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/sgd/CrossFoldLearner.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/sgd/CrossFoldLearner.java
@@ -17,7 +17,6 @@
package org.apache.mahout.classifier.sgd;
-import com.google.common.collect.Lists;
import org.apache.hadoop.io.Writable;
import org.apache.mahout.classifier.AbstractVectorClassifier;
import org.apache.mahout.classifier.OnlineLearner;
@@ -31,6 +30,7 @@ import org.apache.mahout.math.stats.OnlineAuc;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.List;
/**
@@ -47,7 +47,7 @@ public class CrossFoldLearner extends AbstractVectorClassifier implements Online
private static final double MIN_SCORE = 1.0e-50;
private OnlineAuc auc = new GlobalOnlineAuc();
private double logLikelihood;
- private final List<OnlineLogisticRegression> models = Lists.newArrayList();
+ private final List<OnlineLogisticRegression> models = new ArrayList<>();
// lambda, learningRate, perTermOffset, perTermExponent
private double[] parameters = new double[4];
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java b/mr/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java
index b21860f..dbf3198 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java
@@ -22,7 +22,6 @@ import com.google.common.base.Preconditions;
import com.google.common.collect.Collections2;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
import org.apache.commons.csv.CSVUtils;
import org.apache.mahout.math.Vector;
@@ -36,11 +35,14 @@ import org.apache.mahout.vectorizer.encoders.TextValueEncoder;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.TreeMap;
/**
* Converts CSV data lines to vectors.
@@ -81,7 +83,7 @@ public class CsvRecordFactory implements RecordFactory {
.put("t", TextValueEncoder.class)
.build();
- private final Map<String, Set<Integer>> traceDictionary = Maps.newTreeMap();
+ private final Map<String, Set<Integer>> traceDictionary = new TreeMap<>();
private int target;
private final Dictionary targetDictionary;
@@ -113,7 +115,7 @@ public class CsvRecordFactory implements RecordFactory {
return Arrays.asList(CSVUtils.parseLine(line));
}
catch (IOException e) {
- List<String> list = Lists.newArrayList();
+ List<String> list = new ArrayList<>();
list.add(line);
return list;
}
@@ -186,7 +188,7 @@ public class CsvRecordFactory implements RecordFactory {
@Override
public void firstLine(String line) {
// read variable names, build map of name -> column
- final Map<String, Integer> vars = Maps.newHashMap();
+ final Map<String, Integer> vars = new HashMap<>();
variableNames = parseCsvLine(line);
int column = 0;
for (String var : variableNames) {
@@ -202,7 +204,7 @@ public class CsvRecordFactory implements RecordFactory {
}
// create list of predictor column numbers
- predictors = Lists.newArrayList(Collections2.transform(typeMap.keySet(), new Function<String, Integer>() {
+ predictors = new ArrayList<>(Collections2.transform(typeMap.keySet(), new Function<String, Integer>() {
@Override
public Integer apply(String from) {
Integer r = vars.get(from);
@@ -217,7 +219,7 @@ public class CsvRecordFactory implements RecordFactory {
Collections.sort(predictors);
// and map from column number to type encoder for each column that is a predictor
- predictorEncoders = Maps.newHashMap();
+ predictorEncoders = new HashMap<>();
for (Integer predictor : predictors) {
String name;
Class<? extends FeatureVectorEncoder> c;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sgd/GradientMachine.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/sgd/GradientMachine.java b/mr/src/main/java/org/apache/mahout/classifier/sgd/GradientMachine.java
index d158f4d..90ef7a8 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/sgd/GradientMachine.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/sgd/GradientMachine.java
@@ -17,7 +17,6 @@
package org.apache.mahout.classifier.sgd;
-import com.google.common.collect.Sets;
import org.apache.hadoop.io.Writable;
import org.apache.mahout.classifier.AbstractVectorClassifier;
import org.apache.mahout.classifier.OnlineLearner;
@@ -31,6 +30,7 @@ import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Collection;
+import java.util.HashSet;
import java.util.Random;
/**
@@ -387,7 +387,7 @@ public class GradientMachine extends AbstractVectorClassifier implements OnlineL
public void train(long trackingKey, String groupKey, int actual, Vector instance) {
Vector hiddenActivation = inputToHidden(instance);
hiddenToOutput(hiddenActivation);
- Collection<Integer> goodLabels = Sets.newHashSet();
+ Collection<Integer> goodLabels = new HashSet<>();
goodLabels.add(actual);
updateRanking(hiddenActivation, goodLabels, 2, rnd);
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelDissector.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelDissector.java b/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelDissector.java
index ebb0614..bcd2ebc 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelDissector.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelDissector.java
@@ -17,14 +17,14 @@
package org.apache.mahout.classifier.sgd;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
import com.google.common.collect.Ordering;
import org.apache.mahout.classifier.AbstractVectorClassifier;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.math.Vector;
+import java.util.ArrayList;
import java.util.Collections;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
@@ -55,7 +55,7 @@ public class ModelDissector {
private final Map<String,Vector> weightMap;
public ModelDissector() {
- weightMap = Maps.newHashMap();
+ weightMap = new HashMap<>();
}
/**
@@ -105,14 +105,14 @@ public class ModelDissector {
* @return A list of the top variables.
*/
public List<Weight> summary(int n) {
- Queue<Weight> pq = new PriorityQueue<Weight>();
+ Queue<Weight> pq = new PriorityQueue<>();
for (Map.Entry<String, Vector> entry : weightMap.entrySet()) {
pq.add(new Weight(entry.getKey(), entry.getValue()));
while (pq.size() > n) {
pq.poll();
}
}
- List<Weight> r = Lists.newArrayList(pq);
+ List<Weight> r = new ArrayList<>(pq);
Collections.sort(r, Ordering.natural().reverse());
return r;
}
@@ -170,14 +170,14 @@ public class ModelDissector {
public Weight(String feature, Vector weights, int n) {
this.feature = feature;
// pick out the weight with the largest abs value, but don't forget the sign
- Queue<Category> biggest = new PriorityQueue<Category>(n + 1, Ordering.natural());
+ Queue<Category> biggest = new PriorityQueue<>(n + 1, Ordering.natural());
for (Vector.Element element : weights.all()) {
biggest.add(new Category(element.index(), element.get()));
while (biggest.size() > n) {
biggest.poll();
}
}
- categories = Lists.newArrayList(biggest);
+ categories = new ArrayList<>(biggest);
Collections.sort(categories, Ordering.natural().reverse());
value = categories.get(0).weight;
maxIndex = categories.get(0).index;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelSerializer.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelSerializer.java b/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelSerializer.java
index f0150e9..f89b245 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelSerializer.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelSerializer.java
@@ -37,29 +37,20 @@ public final class ModelSerializer {
}
public static void writeBinary(String path, CrossFoldLearner model) throws IOException {
- DataOutputStream out = new DataOutputStream(new FileOutputStream(path));
- try {
+ try (DataOutputStream out = new DataOutputStream(new FileOutputStream(path))) {
PolymorphicWritable.write(out, model);
- } finally {
- Closeables.close(out, false);
}
}
public static void writeBinary(String path, OnlineLogisticRegression model) throws IOException {
- DataOutputStream out = new DataOutputStream(new FileOutputStream(path));
- try {
+ try (DataOutputStream out = new DataOutputStream(new FileOutputStream(path))) {
PolymorphicWritable.write(out, model);
- } finally {
- Closeables.close(out, false);
}
}
public static void writeBinary(String path, AdaptiveLogisticRegression model) throws IOException {
- DataOutputStream out = new DataOutputStream(new FileOutputStream(path));
- try {
+ try (DataOutputStream out = new DataOutputStream(new FileOutputStream(path))){
PolymorphicWritable.write(out, model);
- } finally {
- Closeables.close(out, false);
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sgd/RankingGradient.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/classifier/sgd/RankingGradient.java b/mr/src/main/java/org/apache/mahout/classifier/sgd/RankingGradient.java
index b52cb8c..a04fc8b 100644
--- a/mr/src/main/java/org/apache/mahout/classifier/sgd/RankingGradient.java
+++ b/mr/src/main/java/org/apache/mahout/classifier/sgd/RankingGradient.java
@@ -17,12 +17,12 @@
package org.apache.mahout.classifier.sgd;
-import com.google.common.collect.Lists;
import org.apache.mahout.classifier.AbstractVectorClassifier;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.function.Functions;
import java.util.ArrayDeque;
+import java.util.ArrayList;
import java.util.Deque;
import java.util.List;
@@ -40,7 +40,7 @@ public class RankingGradient implements Gradient {
private int window = 10;
- private final List<Deque<Vector>> history = Lists.newArrayList();
+ private final List<Deque<Vector>> history = new ArrayList<>();
public RankingGradient(int window) {
this.window = window;
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/AbstractCluster.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/AbstractCluster.java b/mr/src/main/java/org/apache/mahout/clustering/AbstractCluster.java
index cc05beb..86fa011 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/AbstractCluster.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/AbstractCluster.java
@@ -22,12 +22,11 @@ import java.io.DataOutput;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedList;
import java.util.List;
import java.util.Map;
-import java.util.HashMap;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configuration;
import org.apache.mahout.common.parameters.Parameter;
import org.apache.mahout.math.RandomAccessSparseVector;
@@ -359,7 +358,7 @@ public abstract class AbstractCluster implements Cluster {
// we assume sequential access in the output
Vector provider = v.isSequentialAccess() ? v : new SequentialAccessSparseVector(v);
- List<Object> terms = Lists.newLinkedList();
+ List<Object> terms = new LinkedList<>();
String term = "";
for (Element elem : provider.nonZeroes()) {
@@ -370,7 +369,7 @@ public abstract class AbstractCluster implements Cluster {
term = String.valueOf(elem.index());
}
- Map<String, Object> term_entry = Maps.newHashMap();
+ Map<String, Object> term_entry = new HashMap<>();
double roundedWeight = (double) Math.round(elem.get() * 1000) / 1000;
if (hasBindings || isSparse) {
term_entry.put(term, roundedWeight);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/ClusteringUtils.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/ClusteringUtils.java b/mr/src/main/java/org/apache/mahout/clustering/ClusteringUtils.java
index 421ffcf..ad0f8ec 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/ClusteringUtils.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/ClusteringUtils.java
@@ -17,6 +17,7 @@
package org.apache.mahout.clustering;
+import java.util.ArrayList;
import java.util.List;
import com.google.common.base.Preconditions;
@@ -52,7 +53,7 @@ public final class ClusteringUtils {
DistanceMeasure distanceMeasure) {
UpdatableSearcher searcher = new ProjectionSearch(distanceMeasure, 3, 1);
searcher.addAll(centroids);
- List<OnlineSummarizer> summarizers = Lists.newArrayList();
+ List<OnlineSummarizer> summarizers = new ArrayList<>();
if (searcher.size() == 0) {
return summarizers;
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java b/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java
index 6e2c3cf..384e294 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java
@@ -18,12 +18,12 @@
package org.apache.mahout.clustering.classify;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -159,7 +159,7 @@ public final class ClusterClassificationDriver extends AbstractJob {
* @throws IOException
*/
private static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException {
- List<Cluster> clusterModels = Lists.newArrayList();
+ List<Cluster> clusterModels = new ArrayList<>();
Path finalClustersPath = finalClustersPath(conf, clusterOutputPath);
Iterator<?> it = new SequenceFileDirValueIterator<Writable>(finalClustersPath, PathType.LIST,
PathFilters.partFilter(), null, false, conf);
@@ -225,7 +225,7 @@ public final class ClusterClassificationDriver extends AbstractJob {
private static void classifyAndWrite(List<Cluster> clusterModels, Double clusterClassificationThreshold,
boolean emitMostLikely, SequenceFile.Writer writer, VectorWritable vw, Vector pdfPerCluster) throws IOException {
- Map<Text, Text> props = Maps.newHashMap();
+ Map<Text, Text> props = new HashMap<>();
if (emitMostLikely) {
int maxValueIndex = pdfPerCluster.maxValueIndex();
WeightedPropertyVectorWritable weightedPropertyVectorWritable =
@@ -238,7 +238,7 @@ public final class ClusterClassificationDriver extends AbstractJob {
private static void writeAllAboveThreshold(List<Cluster> clusterModels, Double clusterClassificationThreshold,
SequenceFile.Writer writer, VectorWritable vw, Vector pdfPerCluster) throws IOException {
- Map<Text, Text> props = Maps.newHashMap();
+ Map<Text, Text> props = new HashMap<>();
for (Element pdf : pdfPerCluster.nonZeroes()) {
if (pdf.get() >= clusterClassificationThreshold) {
WeightedPropertyVectorWritable wvw = new WeightedPropertyVectorWritable(pdf.get(), vw.get(), props);
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationMapper.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationMapper.java b/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationMapper.java
index 9edbd8e..dfddab0 100644
--- a/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationMapper.java
+++ b/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationMapper.java
@@ -18,12 +18,12 @@
package org.apache.mahout.clustering.classify;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -67,7 +67,7 @@ public class ClusterClassificationMapper extends
threshold = conf.getFloat(ClusterClassificationConfigKeys.OUTLIER_REMOVAL_THRESHOLD, 0.0f);
emitMostLikely = conf.getBoolean(ClusterClassificationConfigKeys.EMIT_MOST_LIKELY, false);
- clusterModels = Lists.newArrayList();
+ clusterModels = new ArrayList<>();
if (clustersIn != null && !clustersIn.isEmpty()) {
Path clustersInPath = new Path(clustersIn);
@@ -128,13 +128,13 @@ public class ClusterClassificationMapper extends
DistanceMeasure distanceMeasure = distanceMeasureCluster.getMeasure();
double distance = distanceMeasure.distance(cluster.getCenter(), vw.get());
- Map<Text, Text> props = Maps.newHashMap();
+ Map<Text, Text> props = new HashMap<>();
props.put(new Text("distance"), new Text(Double.toString(distance)));
context.write(clusterId, new WeightedPropertyVectorWritable(weight, vw.get(), props));
}
public static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException {
- List<Cluster> clusters = Lists.newArrayList();
+ List<Cluster> clusters = new ArrayList<>();
FileSystem fileSystem = clusterOutputPath.getFileSystem(conf);
FileStatus[] clusterFiles = fileSystem.listStatus(clusterOutputPath, PathFilters.finalPartFilter());
Iterator<?> it = new SequenceFileDirValueIterator<Writable>(