You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@samoa.apache.org by gd...@apache.org on 2016/04/19 10:35:00 UTC

[01/19] incubator-samoa git commit: SAMOA-58: Incorrect Samza artifact version. Samza no longer produces separate serializers artifact. isFinite since 1.8 - changed to work with what's available since 1.7

Repository: incubator-samoa
Updated Branches:
  refs/heads/master 0640831c5 -> bc92205bc


SAMOA-58: Incorrect Samza artifact version.
Samza no longer produces separate serializers artifact.
isFinite since 1.8 - changed to work with what's available since 1.7


Project: http://git-wip-us.apache.org/repos/asf/incubator-samoa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-samoa/commit/7ecdc4b6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-samoa/tree/7ecdc4b6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-samoa/diff/7ecdc4b6

Branch: refs/heads/master
Commit: 7ecdc4b6f07ccafd119a5d95c2d30453367c47ef
Parents: a342340
Author: edi_bice <ed...@yahoo.com>
Authored: Mon Mar 14 14:49:12 2016 -0400
Committer: Gianmarco De Francisci Morales <gd...@apache.org>
Committed: Tue Apr 19 11:33:42 2016 +0300

----------------------------------------------------------------------
 pom.xml                                                        | 2 +-
 .../src/main/java/org/apache/samoa/instances/AvroLoader.java   | 4 +++-
 samoa-samza/pom.xml                                            | 6 ------
 3 files changed, 4 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/7ecdc4b6/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 191499f..edc49bd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -125,7 +125,7 @@
         <kryo.version>2.21</kryo.version>
         <metrics-core.version>2.2.0</metrics-core.version>
         <miniball.version>1.0.3</miniball.version>
-        <samza.version>0.10</samza.version>
+        <samza.version>0.10.0</samza.version>
         <flink.version>0.10.1</flink.version>
         <slf4j-log4j12.version>1.7.2</slf4j-log4j12.version>
         <slf4j-simple.version>1.7.5</slf4j-simple.version>

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/7ecdc4b6/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java
----------------------------------------------------------------------
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java b/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java
index 8c49537..2b36744 100644
--- a/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java
+++ b/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java
@@ -167,7 +167,9 @@ public abstract class AvroLoader implements Loader {
       {
         if (value instanceof Double) {
           Double v = (double) value;
-          if (Double.isFinite(v)) this.setSparseValue(instance, indexValues, attributeValues, numAttribute, (double) value);
+          //if (Double.isFinite(v))
+          if (!Double.isNaN(v) && !Double.isInfinite(v))
+            this.setSparseValue(instance, indexValues, attributeValues, numAttribute, (double) value);
         }
         else if (value instanceof Long)
           this.setSparseValue(instance, indexValues, attributeValues, numAttribute, (long) value);

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/7ecdc4b6/samoa-samza/pom.xml
----------------------------------------------------------------------
diff --git a/samoa-samza/pom.xml b/samoa-samza/pom.xml
index 945ae39..2ea0d98 100644
--- a/samoa-samza/pom.xml
+++ b/samoa-samza/pom.xml
@@ -62,12 +62,6 @@
       <version>${samza.version}</version>
     </dependency>
 
-    <dependency>
-      <groupId>org.apache.samza</groupId>
-      <artifactId>samza-serializers_2.10</artifactId>
-      <version>${samza.version}</version>
-    </dependency>
-
     <!--<dependency> <groupId>org.apache.samza</groupId> <artifactId>samza-shell</artifactId> <classifier>dist</classifier> 
       <type>tgz</type> <version>${samza.version}</version> </dependency> -->
 


[12/19] incubator-samoa git commit: SAMOA-58: was not initializing arrays prior to use

Posted by gd...@apache.org.
SAMOA-58: was not initializing arrays prior to use


Project: http://git-wip-us.apache.org/repos/asf/incubator-samoa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-samoa/commit/c743b7e0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-samoa/tree/c743b7e0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-samoa/diff/c743b7e0

Branch: refs/heads/master
Commit: c743b7e08d3174d247e534fa7fba2794093828af
Parents: 397de94
Author: edi_bice <ed...@yahoo.com>
Authored: Mon Feb 22 15:56:51 2016 -0500
Committer: Gianmarco De Francisci Morales <gd...@apache.org>
Committed: Tue Apr 19 11:33:42 2016 +0300

----------------------------------------------------------------------
 .../samoa/evaluation/F1ClassificationPerformanceEvaluator.java    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/c743b7e0/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java
----------------------------------------------------------------------
diff --git a/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java b/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java
index b59480a..3046360 100644
--- a/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java
+++ b/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java
@@ -33,7 +33,7 @@ public class F1ClassificationPerformanceEvaluator extends AbstractMOAObject impl
         ClassificationPerformanceEvaluator {
 
     private static final long serialVersionUID = 1L;
-    protected int numClasses;
+    protected int numClasses = -1;
 
     protected long[] support;
     protected long[] truePos;
@@ -63,6 +63,7 @@ public class F1ClassificationPerformanceEvaluator extends AbstractMOAObject impl
 
     @Override
     public void addResult(Instance inst, double[] classVotes) {
+        if (numClasses==-1) reset(inst.numClasses());
         int trueClass = (int) inst.classValue();
         this.support[trueClass] += 1;
         int predictedClass = Utils.maxIndex(classVotes);


[17/19] incubator-samoa git commit: SAMOA-58: adding all measurements

Posted by gd...@apache.org.
SAMOA-58: adding all measurements


Project: http://git-wip-us.apache.org/repos/asf/incubator-samoa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-samoa/commit/5d131e28
Tree: http://git-wip-us.apache.org/repos/asf/incubator-samoa/tree/5d131e28
Diff: http://git-wip-us.apache.org/repos/asf/incubator-samoa/diff/5d131e28

Branch: refs/heads/master
Commit: 5d131e283fc717059f0067c1fef0a40da1a73448
Parents: e5ca1b2
Author: edi_bice <ed...@yahoo.com>
Authored: Mon Feb 22 16:48:35 2016 -0500
Committer: Gianmarco De Francisci Morales <gd...@apache.org>
Committed: Tue Apr 19 11:33:42 2016 +0300

----------------------------------------------------------------------
 .../evaluation/F1ClassificationPerformanceEvaluator.java | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/5d131e28/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java
----------------------------------------------------------------------
diff --git a/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java b/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java
index 7ad4292..89e74be 100644
--- a/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java
+++ b/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java
@@ -26,6 +26,10 @@ import org.apache.samoa.instances.Utils;
 import org.apache.samoa.moa.AbstractMOAObject;
 import org.apache.samoa.moa.core.Measurement;
 
+import java.util.Collections;
+import java.util.List;
+import java.util.Vector;
+
 /**
  * Created by Edi Bice (edi.bice gmail com) on 2/22/2016.
  */
@@ -84,7 +88,12 @@ public class F1ClassificationPerformanceEvaluator extends AbstractMOAObject impl
 
     @Override
     public Measurement[] getPerformanceMeasurements() {
-        return getF1Measurements();
+        List<Measurement> measurements = new Vector<>();
+        Collections.addAll(measurements, getSupportMeasurements());
+        Collections.addAll(measurements, getPrecisionMeasurements());
+        Collections.addAll(measurements, getRecallMeasurements());
+        Collections.addAll(measurements, getF1Measurements());
+        return measurements.toArray(new Measurement[measurements.size()]);
     }
 
     private Measurement[] getSupportMeasurements() {


[16/19] incubator-samoa git commit: SAMOA-58: Just because it fools compiler ...

Posted by gd...@apache.org.
SAMOA-58: Just because it fools compiler ...


Project: http://git-wip-us.apache.org/repos/asf/incubator-samoa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-samoa/commit/a1e0376e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-samoa/tree/a1e0376e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-samoa/diff/a1e0376e

Branch: refs/heads/master
Commit: a1e0376e99771d485709319800f62e8046573fcd
Parents: 9e174aa
Author: edi_bice <ed...@yahoo.com>
Authored: Tue Mar 1 14:52:47 2016 -0500
Committer: Gianmarco De Francisci Morales <gd...@apache.org>
Committed: Tue Apr 19 11:33:42 2016 +0300

----------------------------------------------------------------------
 .../src/main/java/org/apache/samoa/streams/FileStream.java      | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/a1e0376e/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java
----------------------------------------------------------------------
diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java b/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java
index d9a7554..e9595b1 100644
--- a/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java
+++ b/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java
@@ -31,6 +31,7 @@ import org.apache.samoa.streams.fs.FileStreamSource;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.Arrays;
 
 /**
  * InstanceStream for files (Abstract class: subclass this class for different file formats)
@@ -58,7 +59,7 @@ public abstract class FileStream extends AbstractOptionHandler implements Instan
   //protected transient Reader fileReader;
   protected transient InputStream inputStream;
   protected Instances instances;
-  protected FloatOption[] classWeights;
+  protected FloatOption[] classWeights; // = new FloatOption[0];
 
   protected boolean hitEndOfStream;
   private boolean hasStarted;
@@ -169,7 +170,7 @@ public abstract class FileStream extends AbstractOptionHandler implements Instan
   @Override
   public void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) {
     this.fileSource = sourceTypeOption.getValue();
-    this.classWeights = (FloatOption[]) classWeightsOption.getList();
+    this.classWeights = Arrays.copyOf(classWeightsOption.getList(), classWeightsOption.getList().length, FloatOption[].class);
     this.hasStarted = false;
   }
 


[14/19] incubator-samoa git commit: SAMOA-58: need more decimal digits

Posted by gd...@apache.org.
SAMOA-58: need more decimal digits


Project: http://git-wip-us.apache.org/repos/asf/incubator-samoa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-samoa/commit/e5ca1b27
Tree: http://git-wip-us.apache.org/repos/asf/incubator-samoa/tree/e5ca1b27
Diff: http://git-wip-us.apache.org/repos/asf/incubator-samoa/diff/e5ca1b27

Branch: refs/heads/master
Commit: e5ca1b27a142fefa6997b89356e4c16afab8ea4c
Parents: 1c8778e
Author: edi_bice <ed...@yahoo.com>
Authored: Mon Feb 22 16:42:20 2016 -0500
Committer: Gianmarco De Francisci Morales <gd...@apache.org>
Committed: Tue Apr 19 11:33:42 2016 +0300

----------------------------------------------------------------------
 .../evaluation/F1ClassificationPerformanceEvaluator.java    | 6 +++---
 .../main/java/org/apache/samoa/moa/core/Measurement.java    | 9 +++++++--
 2 files changed, 10 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/e5ca1b27/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java
----------------------------------------------------------------------
diff --git a/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java b/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java
index 726f487..7ad4292 100644
--- a/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java
+++ b/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java
@@ -100,7 +100,7 @@ public class F1ClassificationPerformanceEvaluator extends AbstractMOAObject impl
         Measurement[] measurements = new Measurement[this.numClasses];
         for (int i = 0; i < this.numClasses; i++) {
             String ml = String.format("class %s precision", i);
-            measurements[i] = new Measurement(ml, getPrecision(i));
+            measurements[i] = new Measurement(ml, getPrecision(i), 10);
         }
         return measurements;
     }
@@ -109,7 +109,7 @@ public class F1ClassificationPerformanceEvaluator extends AbstractMOAObject impl
         Measurement[] measurements = new Measurement[this.numClasses];
         for (int i = 0; i < this.numClasses; i++) {
             String ml = String.format("class %s recall", i);
-            measurements[i] = new Measurement(ml, getRecall(i));
+            measurements[i] = new Measurement(ml, getRecall(i), 10);
         }
         return measurements;
     }
@@ -118,7 +118,7 @@ public class F1ClassificationPerformanceEvaluator extends AbstractMOAObject impl
         Measurement[] measurements = new Measurement[this.numClasses];
         for (int i = 0; i < this.numClasses; i++) {
             String ml = String.format("class %s f1-score", i);
-            measurements[i] = new Measurement(ml, getF1Score(i));
+            measurements[i] = new Measurement(ml, getF1Score(i), 10);
         }
         return measurements;
     }

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/e5ca1b27/samoa-api/src/main/java/org/apache/samoa/moa/core/Measurement.java
----------------------------------------------------------------------
diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/core/Measurement.java b/samoa-api/src/main/java/org/apache/samoa/moa/core/Measurement.java
index 8fc80c7..8a3d598 100644
--- a/samoa-api/src/main/java/org/apache/samoa/moa/core/Measurement.java
+++ b/samoa-api/src/main/java/org/apache/samoa/moa/core/Measurement.java
@@ -36,14 +36,19 @@ public class Measurement extends AbstractMOAObject {
   private static final long serialVersionUID = 1L;
 
   protected String name;
-
   protected double value;
+  protected int fractionDigits;
 
   public Measurement(String name, double value) {
     this.name = name;
     this.value = value;
   }
 
+  public Measurement(String name, double value, int fractionDigits) {
+    this(name, value);
+    this.fractionDigits = fractionDigits;
+  }
+
   public String getName() {
     return this.name;
   }
@@ -110,6 +115,6 @@ public class Measurement extends AbstractMOAObject {
   public void getDescription(StringBuilder sb, int indent) {
     sb.append(getName());
     sb.append(" = ");
-    sb.append(StringUtils.doubleToString(getValue(), 3));
+    sb.append(StringUtils.doubleToString(getValue(), this.fractionDigits));
   }
 }


[05/19] incubator-samoa git commit: SAMOA-58: Only add fields of supported types (double, float, long, int and enum) rather than adding and defaulting all non-enum to numeric and failing at value parse time

Posted by gd...@apache.org.
SAMOA-58: Only add fields of supported types (double, float, long, int and enum) rather than adding and defaulting all non-enum to numeric and failing at value parse time


Project: http://git-wip-us.apache.org/repos/asf/incubator-samoa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-samoa/commit/381e6a91
Tree: http://git-wip-us.apache.org/repos/asf/incubator-samoa/tree/381e6a91
Diff: http://git-wip-us.apache.org/repos/asf/incubator-samoa/diff/381e6a91

Branch: refs/heads/master
Commit: 381e6a91e3b5ca4c1de96264537ea670ccb7af6f
Parents: 0640831
Author: edi_bice <ed...@yahoo.com>
Authored: Wed Feb 17 10:45:07 2016 -0500
Committer: Gianmarco De Francisci Morales <gd...@apache.org>
Committed: Tue Apr 19 11:33:42 2016 +0300

----------------------------------------------------------------------
 .../src/main/java/org/apache/samoa/instances/AvroLoader.java    | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/381e6a91/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java
----------------------------------------------------------------------
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java b/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java
index 0547a5c..e7f18b5 100644
--- a/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java
+++ b/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java
@@ -254,7 +254,10 @@ public abstract class AvroLoader implements Loader {
         List<String> attributeLabels = attributeSchema.getEnumSymbols();
         attributes.add(new Attribute(field.name(), attributeLabels));
       }
-      else
+      else if (attributeSchema.getType() == Schema.Type.DOUBLE
+              || attributeSchema.getType() == Schema.Type.FLOAT
+              || attributeSchema.getType() == Schema.Type.LONG
+              || attributeSchema.getType() == Schema.Type.INT)
         attributes.add(new Attribute(field.name()));
     }
     return new InstanceInformation(relation, attributes);


[06/19] incubator-samoa git commit: SAMOA-58: until we change samza to produce files with .avro extension

Posted by gd...@apache.org.
SAMOA-58: until we change samza to produce files with .avro extension


Project: http://git-wip-us.apache.org/repos/asf/incubator-samoa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-samoa/commit/ebb35163
Tree: http://git-wip-us.apache.org/repos/asf/incubator-samoa/tree/ebb35163
Diff: http://git-wip-us.apache.org/repos/asf/incubator-samoa/diff/ebb35163

Branch: refs/heads/master
Commit: ebb35163f36434a632d64d95706166fc9a6eaee7
Parents: 381e6a9
Author: edi_bice <ed...@yahoo.com>
Authored: Wed Feb 17 16:53:02 2016 -0500
Committer: Gianmarco De Francisci Morales <gd...@apache.org>
Committed: Tue Apr 19 11:33:42 2016 +0300

----------------------------------------------------------------------
 .../src/main/java/org/apache/samoa/streams/AvroFileStream.java    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/ebb35163/samoa-api/src/main/java/org/apache/samoa/streams/AvroFileStream.java
----------------------------------------------------------------------
diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/AvroFileStream.java b/samoa-api/src/main/java/org/apache/samoa/streams/AvroFileStream.java
index 15229a4..5b4e755 100644
--- a/samoa-api/src/main/java/org/apache/samoa/streams/AvroFileStream.java
+++ b/samoa-api/src/main/java/org/apache/samoa/streams/AvroFileStream.java
@@ -139,7 +139,8 @@ public class AvroFileStream extends FileStream {
   public void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) {
     super.prepareForUseImpl(monitor, repository);
     String filePath = this.avroFileOption.getFile().getAbsolutePath();
-    this.fileSource.init(filePath, AvroFileStream.AVRO_FILE_EXTENSION);
+    //this.fileSource.init(filePath, AvroFileStream.AVRO_FILE_EXTENSION);
+    this.fileSource.init(filePath, null);
     this.lastInstanceRead = null;
   }
 


[10/19] incubator-samoa git commit: SAMOA-58: Not sure why these still implement Learner instead of ClassificationLearner - technically yes one could have an ensemble of regressors or classifiers but these are not generic as currently implemented.

Posted by gd...@apache.org.
SAMOA-58: Not sure why these still implement Learner instead of ClassificationLearner - technically yes one could have an ensemble of regressors or classifiers but these are not generic as currently implemented.

And yes this is not a pedantic change. It is needed for the compatibility test between ensemble learners and evaluators to work correctly.


Project: http://git-wip-us.apache.org/repos/asf/incubator-samoa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-samoa/commit/397de94a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-samoa/tree/397de94a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-samoa/diff/397de94a

Branch: refs/heads/master
Commit: 397de94aee1e4a945003efef9755effc0673627a
Parents: a6b6e2e
Author: edi_bice <ed...@yahoo.com>
Authored: Mon Feb 22 15:44:55 2016 -0500
Committer: Gianmarco De Francisci Morales <gd...@apache.org>
Committed: Tue Apr 19 11:33:42 2016 +0300

----------------------------------------------------------------------
 .../org/apache/samoa/learners/classifiers/SingleClassifier.java   | 3 ++-
 .../samoa/learners/classifiers/ensemble/AdaptiveBagging.java      | 3 ++-
 .../org/apache/samoa/learners/classifiers/ensemble/Bagging.java   | 3 ++-
 .../org/apache/samoa/learners/classifiers/ensemble/Boosting.java  | 3 ++-
 4 files changed, 8 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/397de94a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/SingleClassifier.java
----------------------------------------------------------------------
diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/SingleClassifier.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/SingleClassifier.java
index 5c989f3..b2a09da 100644
--- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/SingleClassifier.java
+++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/SingleClassifier.java
@@ -31,6 +31,7 @@ import java.util.Set;
 import org.apache.samoa.core.Processor;
 import org.apache.samoa.instances.Instances;
 import org.apache.samoa.learners.AdaptiveLearner;
+import org.apache.samoa.learners.ClassificationLearner;
 import org.apache.samoa.learners.Learner;
 import org.apache.samoa.moa.classifiers.core.driftdetection.ChangeDetector;
 import org.apache.samoa.topology.Stream;
@@ -44,7 +45,7 @@ import com.github.javacliparser.Configurable;
  * Classifier that contain a single classifier.
  * 
  */
-public final class SingleClassifier implements Learner, AdaptiveLearner, Configurable {
+public final class SingleClassifier implements ClassificationLearner, AdaptiveLearner, Configurable {
 
   private static final long serialVersionUID = 684111382631697031L;
 

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/397de94a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/AdaptiveBagging.java
----------------------------------------------------------------------
diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/AdaptiveBagging.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/AdaptiveBagging.java
index 4b2c531..6680862 100644
--- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/AdaptiveBagging.java
+++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/AdaptiveBagging.java
@@ -31,6 +31,7 @@ import java.util.Set;
 import org.apache.samoa.core.Processor;
 import org.apache.samoa.instances.Instances;
 import org.apache.samoa.learners.AdaptiveLearner;
+import org.apache.samoa.learners.ClassificationLearner;
 import org.apache.samoa.learners.Learner;
 import org.apache.samoa.learners.classifiers.trees.VerticalHoeffdingTree;
 import org.apache.samoa.moa.classifiers.core.driftdetection.ADWINChangeDetector;
@@ -47,7 +48,7 @@ import com.github.javacliparser.IntOption;
 /**
  * An adaptive version of the Bagging Classifier by Oza and Russell.
  */
-public class AdaptiveBagging implements Learner, Configurable {
+public class AdaptiveBagging implements ClassificationLearner, Configurable {
 
   private static final long serialVersionUID = 8217274236558839040L;
   private static final Logger logger = LoggerFactory.getLogger(AdaptiveBagging.class);

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/397de94a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/Bagging.java
----------------------------------------------------------------------
diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/Bagging.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/Bagging.java
index 5d7bbfc..7178738 100644
--- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/Bagging.java
+++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/Bagging.java
@@ -28,6 +28,7 @@ import java.util.Set;
 
 import org.apache.samoa.core.Processor;
 import org.apache.samoa.instances.Instances;
+import org.apache.samoa.learners.ClassificationLearner;
 import org.apache.samoa.learners.Learner;
 import org.apache.samoa.learners.classifiers.trees.VerticalHoeffdingTree;
 import org.apache.samoa.topology.Stream;
@@ -43,7 +44,7 @@ import com.google.common.collect.ImmutableSet;
 /**
  * The Bagging Classifier by Oza and Russell.
  */
-public class Bagging implements Learner, Configurable {
+public class Bagging implements ClassificationLearner, Configurable {
 
   /** The Constant serialVersionUID. */
   private static final long serialVersionUID = -2971850264864952099L;

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/397de94a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/Boosting.java
----------------------------------------------------------------------
diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/Boosting.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/Boosting.java
index 4971fba..6512028 100644
--- a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/Boosting.java
+++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/Boosting.java
@@ -30,6 +30,7 @@ import java.util.Set;
 
 import org.apache.samoa.core.Processor;
 import org.apache.samoa.instances.Instances;
+import org.apache.samoa.learners.ClassificationLearner;
 import org.apache.samoa.learners.Learner;
 import org.apache.samoa.learners.classifiers.SingleClassifier;
 import org.apache.samoa.topology.Stream;
@@ -42,7 +43,7 @@ import com.github.javacliparser.IntOption;
 /**
  * The Bagging Classifier by Oza and Russell.
  */
-public class Boosting implements Learner, Configurable {
+public class Boosting implements ClassificationLearner, Configurable {
 
   /** The Constant serialVersionUID. */
   private static final long serialVersionUID = -2971850264864952099L;


[04/19] incubator-samoa git commit: SAMOA-58: configure don't code

Posted by gd...@apache.org.
SAMOA-58: configure don't code


Project: http://git-wip-us.apache.org/repos/asf/incubator-samoa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-samoa/commit/127eaab9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-samoa/tree/127eaab9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-samoa/diff/127eaab9

Branch: refs/heads/master
Commit: 127eaab905c46304744361f1d1f9e18ab8c78717
Parents: 6e81a62
Author: edi_bice <ed...@yahoo.com>
Authored: Mon Jan 25 12:02:22 2016 -0500
Committer: Gianmarco De Francisci Morales <gd...@apache.org>
Committed: Tue Apr 19 11:33:42 2016 +0300

----------------------------------------------------------------------
 .../META-INF/services/org.apache.hadoop.fs.FileSystem |  2 ++
 .../java/org/apache/samoa/utils/SystemsUtils.java     | 14 ++++++++++++++
 2 files changed, 16 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/127eaab9/samoa-api/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem
----------------------------------------------------------------------
diff --git a/samoa-api/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem b/samoa-api/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem
new file mode 100644
index 0000000..bcd64ed
--- /dev/null
+++ b/samoa-api/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem
@@ -0,0 +1,2 @@
+org.apache.hadoop.fs.LocalFileSystem
+org.apache.hadoop.hdfs.DistributedFileSystem
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/127eaab9/samoa-samza/src/main/java/org/apache/samoa/utils/SystemsUtils.java
----------------------------------------------------------------------
diff --git a/samoa-samza/src/main/java/org/apache/samoa/utils/SystemsUtils.java b/samoa-samza/src/main/java/org/apache/samoa/utils/SystemsUtils.java
index 6c369ae..ad2b383 100644
--- a/samoa-samza/src/main/java/org/apache/samoa/utils/SystemsUtils.java
+++ b/samoa-samza/src/main/java/org/apache/samoa/utils/SystemsUtils.java
@@ -96,6 +96,20 @@ public class SystemsUtils {
     private static String hdfsConfPath;
     private static String configHomePath;
     private static String samoaDir = null;
+    
+    private static Configuration getConfig() {
+      Configuration config = new Configuration();
+      config.addResource(new Path(coreConfPath));
+      config.addResource(new Path(hdfsConfPath));
+      /* will do same differently - see http://www.lucidelectricdreams.com/2013/11/no-filesystem-for-scheme-hdfs.html
+      config.set("fs.hdfs.impl",
+              org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()
+      );
+      config.set("fs.file.impl",
+              org.apache.hadoop.fs.LocalFileSystem.class.getName()
+      ); */
+      return config;
+    }
 
     static void setHadoopConfigHome(String hadoopConfPath) {
       logger.info("Hadoop config home:{}", hadoopConfPath);


[19/19] incubator-samoa git commit: SAMOA-58: Reverting back to Samza 0.7.0 in order to keep this pull request from exploding into a full Samza upgrade set of changes (did that in a separate pull request which I closed for similar scope creep and polluti

Posted by gd...@apache.org.
SAMOA-58: Reverting back to Samza 0.7.0 in order to keep this pull request from exploding into a full Samza upgrade set of changes (did that in a separate pull request which I closed for similar scope creep and pollution)
Fix #48


Project: http://git-wip-us.apache.org/repos/asf/incubator-samoa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-samoa/commit/bc92205b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-samoa/tree/bc92205b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-samoa/diff/bc92205b

Branch: refs/heads/master
Commit: bc92205bc97e54e863035a9787196852bc45a9ed
Parents: 7ecdc4b
Author: edi_bice <ed...@yahoo.com>
Authored: Mon Mar 14 16:26:42 2016 -0400
Committer: Gianmarco De Francisci Morales <gd...@apache.org>
Committed: Tue Apr 19 11:34:04 2016 +0300

----------------------------------------------------------------------
 pom.xml             | 2 +-
 samoa-samza/pom.xml | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/bc92205b/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index edc49bd..4f55800 100644
--- a/pom.xml
+++ b/pom.xml
@@ -125,7 +125,7 @@
         <kryo.version>2.21</kryo.version>
         <metrics-core.version>2.2.0</metrics-core.version>
         <miniball.version>1.0.3</miniball.version>
-        <samza.version>0.10.0</samza.version>
+        <samza.version>0.7.0</samza.version>
         <flink.version>0.10.1</flink.version>
         <slf4j-log4j12.version>1.7.2</slf4j-log4j12.version>
         <slf4j-simple.version>1.7.5</slf4j-simple.version>

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/bc92205b/samoa-samza/pom.xml
----------------------------------------------------------------------
diff --git a/samoa-samza/pom.xml b/samoa-samza/pom.xml
index 2ea0d98..945ae39 100644
--- a/samoa-samza/pom.xml
+++ b/samoa-samza/pom.xml
@@ -62,6 +62,12 @@
       <version>${samza.version}</version>
     </dependency>
 
+    <dependency>
+      <groupId>org.apache.samza</groupId>
+      <artifactId>samza-serializers_2.10</artifactId>
+      <version>${samza.version}</version>
+    </dependency>
+
     <!--<dependency> <groupId>org.apache.samza</groupId> <artifactId>samza-shell</artifactId> <classifier>dist</classifier> 
       <type>tgz</type> <version>${samza.version}</version> </dependency> -->
 


[07/19] incubator-samoa git commit: SAMOA-58: A tentative solution to issue described in:

Posted by gd...@apache.org.
SAMOA-58: A tentative solution to issue described in:

https://issues.apache.org/jira/browse/SAMOA-58


Project: http://git-wip-us.apache.org/repos/asf/incubator-samoa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-samoa/commit/16046cce
Tree: http://git-wip-us.apache.org/repos/asf/incubator-samoa/tree/16046cce
Diff: http://git-wip-us.apache.org/repos/asf/incubator-samoa/diff/16046cce

Branch: refs/heads/master
Commit: 16046cce5609fc1d505da72901c8fecdf708d844
Parents: ebb3516
Author: edi_bice <ed...@yahoo.com>
Authored: Thu Feb 18 17:06:12 2016 -0500
Committer: Gianmarco De Francisci Morales <gd...@apache.org>
Committed: Tue Apr 19 11:33:42 2016 +0300

----------------------------------------------------------------------
 .../main/java/org/apache/samoa/streams/FileStream.java   | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/16046cce/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java
----------------------------------------------------------------------
diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java b/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java
index eece541..2998b22 100644
--- a/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java
+++ b/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java
@@ -80,7 +80,16 @@ public abstract class FileStream extends AbstractOptionHandler implements Instan
 
   @Override
   public boolean hasMoreInstances() {
-    return !this.hitEndOfStream;
+    if (this.hitEndOfStream) {
+      if (getNextFileReader()) {
+        this.hitEndOfStream = false;
+        return hasMoreInstances();
+      } else {
+        return false;
+      }
+    } else {
+      return true;
+    }
   }
 
   @Override


[08/19] incubator-samoa git commit: SAMOA-58: cherry-picked from faf branch - changes needed to be able to read from HDFS on a YARN 2.7.1 cluster

Posted by gd...@apache.org.
SAMOA-58: cherry-picked from faf branch - changes needed to be able to read from HDFS on a YARN 2.7.1 cluster


Project: http://git-wip-us.apache.org/repos/asf/incubator-samoa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-samoa/commit/2c60852a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-samoa/tree/2c60852a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-samoa/diff/2c60852a

Branch: refs/heads/master
Commit: 2c60852a9a7559788c46c56d8680058e344fb763
Parents: 127eaab
Author: Edi Bice <ed...@yahoo.com>
Authored: Mon Feb 22 14:25:43 2016 +0000
Committer: Gianmarco De Francisci Morales <gd...@apache.org>
Committed: Tue Apr 19 11:33:42 2016 +0300

----------------------------------------------------------------------
 bin/samoa |  9 ++++++---
 pom.xml   | 10 +++++-----
 2 files changed, 11 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/2c60852a/bin/samoa
----------------------------------------------------------------------
diff --git a/bin/samoa b/bin/samoa
index 1a26caa..5f41ce6 100755
--- a/bin/samoa
+++ b/bin/samoa
@@ -47,7 +47,7 @@ JAR_PATH=$2
 JAR_FILE=$(basename $JAR_PATH)
 JAR_DIR=$(dirname $JAR_PATH)
 OPTIONS=$3
-
+#JAVA_OPTS="-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=5005"
 
 if [ $PLATFORM = 'S4' ]; then
 
@@ -274,8 +274,11 @@ elif [ $PLATFORM = 'SAMZA' ]; then
         done
 
         DEPLOYABLE=$JAR_PATH
-        java -Dsamza.log.dir=$BASE_DIR/logs -Dsamza.container.name=client -cp $DEPLOYABLE org.apache.samoa.SamzaDoTask $COMPLETE_ARG --mode=$MODE_ARG \
-               --yarn_home=$YARN_HOME/conf --zookeeper=$ZOOKEEPER_HOST:$ZOOKEEPER_PORT --kafka=$KAFKA_BROKER_LIST \
+        #HADOOP_CLASSPATH=`hadoop classpath`
+        YARN_CLASSPATH=`yarn classpath`
+        java -Dsamza.log.dir=$BASE_DIR/logs -Dsamza.container.name=client \
+               -cp $YARN_CLASSPATH:$DEPLOYABLE org.apache.samoa.SamzaDoTask $COMPLETE_ARG \
+               --mode=$MODE_ARG --yarn_home=$YARN_HOME/conf --zookeeper=$ZOOKEEPER_HOST:$ZOOKEEPER_PORT --kafka=$KAFKA_BROKER_LIST \
                --jar_package=$JAR_PATH --yarn_am_mem=$YARN_AM_MEMORY --yarn_container_mem=$YARN_CONTAINER_MEMORY \
                --kafka_replication_factor=$KAFKA_REPLICATION_FACTOR --checkpoint_frequency=$CHECKPOINT_FREQUENCY \
                --kryo_register=$BASE_DIR/$KRYO_REGISTER_FILE --pi_per_container=$PI_PER_CONTAINER \

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/2c60852a/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 3304937..e02a1d6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -116,7 +116,7 @@
 
         <commons-lang3.version>3.1</commons-lang3.version>
         <guava.version>17.0</guava.version>
-        <hadoop.version>2.2.0</hadoop.version>
+        <hadoop.version>2.6.0</hadoop.version>
         <javacliparser.version>0.5.0</javacliparser.version>
         <jcip-annotations.version>1.0</jcip-annotations.version>
         <jmockit.version>1.13</jmockit.version>
@@ -125,7 +125,7 @@
         <kryo.version>2.21</kryo.version>
         <metrics-core.version>2.2.0</metrics-core.version>
         <miniball.version>1.0.3</miniball.version>
-        <samza.version>0.7.0</samza.version>
+        <samza.version>0.10</samza.version>
         <flink.version>0.10.1</flink.version>
         <slf4j-log4j12.version>1.7.2</slf4j-log4j12.version>
         <slf4j-simple.version>1.7.5</slf4j-simple.version>
@@ -156,10 +156,10 @@
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-compiler-plugin</artifactId>
-                <version>3.1</version>
+                <version>3.5</version>
                 <configuration>
-                    <source>1.7</source>
-                    <target>1.7</target>
+                    <source>1.8</source>
+                    <target>1.8</target>
                 </configuration>
             </plugin>
             <plugin>


[11/19] incubator-samoa git commit: SAMOA-58: Implementation of the standard precision, recall, and f1-score measures for each class in a multi-class setting

Posted by gd...@apache.org.
SAMOA-58: Implementation of the standard precision, recall, and f1-score measures for each class in a multi-class setting


Project: http://git-wip-us.apache.org/repos/asf/incubator-samoa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-samoa/commit/a6b6e2e5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-samoa/tree/a6b6e2e5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-samoa/diff/a6b6e2e5

Branch: refs/heads/master
Commit: a6b6e2e52afd7e8eb7a57a55cfc9f337220d7588
Parents: 2c60852
Author: edi_bice <ed...@yahoo.com>
Authored: Mon Feb 22 14:19:00 2016 -0500
Committer: Gianmarco De Francisci Morales <gd...@apache.org>
Committed: Tue Apr 19 11:33:42 2016 +0300

----------------------------------------------------------------------
 .../F1ClassificationPerformanceEvaluator.java   | 146 +++++++++++++++++++
 1 file changed, 146 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/a6b6e2e5/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java
----------------------------------------------------------------------
diff --git a/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java b/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java
new file mode 100644
index 0000000..b59480a
--- /dev/null
+++ b/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java
@@ -0,0 +1,146 @@
+package org.apache.samoa.evaluation;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ * Copyright (C) 2014 - 2016 Apache Software Foundation
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+
+import org.apache.samoa.instances.Instance;
+import org.apache.samoa.instances.Utils;
+import org.apache.samoa.moa.AbstractMOAObject;
+import org.apache.samoa.moa.core.Measurement;
+
+/**
+ * Created by Edi Bice (edi.bice gmail com) on 2/22/2016.
+ */
+public class F1ClassificationPerformanceEvaluator extends AbstractMOAObject implements
+        ClassificationPerformanceEvaluator {
+
+    private static final long serialVersionUID = 1L;
+    protected int numClasses;
+
+    protected long[] support;
+    protected long[] truePos;
+    protected long[] falsePos;
+    protected long[] trueNeg;
+    protected long[] falseNeg;
+
+    @Override
+    public void reset() {
+        reset(this.numClasses);
+    }
+
+    public void reset(int numClasses) {
+        this.numClasses = numClasses;
+        this.truePos = new long[numClasses];
+        this.falsePos = new long[numClasses];
+        this.trueNeg = new long[numClasses];
+        this.falseNeg = new long[numClasses];
+        for (int i = 0; i < this.numClasses; i++) {
+            this.support[i] = 0;
+            this.truePos[i] = 0;
+            this.falsePos[i] = 0;
+            this.trueNeg[i] = 0;
+            this.falseNeg[i] = 0;
+        }
+    }
+
+    @Override
+    public void addResult(Instance inst, double[] classVotes) {
+        int trueClass = (int) inst.classValue();
+        this.support[trueClass] += 1;
+        int predictedClass = Utils.maxIndex(classVotes);
+        if (predictedClass == trueClass) {
+            this.truePos[trueClass] += 1;
+            for (int i = 0; i < this.numClasses; i++) {
+                if (i!=predictedClass) this.trueNeg[i] += 1;
+            }
+        } else {
+            this.falsePos[predictedClass] += 1;
+            this.falseNeg[trueClass] += 1;
+            for (int i = 0; i < this.numClasses; i++) {
+                if (!(i==predictedClass || i==trueClass)) this.trueNeg[i] += 1;
+            }
+        }
+    }
+
+    @Override
+    public Measurement[] getPerformanceMeasurements() {
+        return getF1Measurements();
+    }
+
+    private Measurement[] getSupportMeasurements() {
+        Measurement[] measurements = new Measurement[this.numClasses];
+        for (int i = 0; i < this.numClasses; i++) {
+            String ml = String.format("class %s support", i);
+            measurements[i] = new Measurement(ml, this.support[i]);
+        }
+        return measurements;
+    }
+
+    private Measurement[] getPrecisionMeasurements() {
+        Measurement[] measurements = new Measurement[this.numClasses];
+        for (int i = 0; i < this.numClasses; i++) {
+            String ml = String.format("class %s precision", i);
+            measurements[i] = new Measurement(ml, getPrecision(i));
+        }
+        return measurements;
+    }
+
+    private Measurement[] getRecallMeasurements() {
+        Measurement[] measurements = new Measurement[this.numClasses];
+        for (int i = 0; i < this.numClasses; i++) {
+            String ml = String.format("class %s recall", i);
+            measurements[i] = new Measurement(ml, getRecall(i));
+        }
+        return measurements;
+    }
+
+    private Measurement[] getF1Measurements() {
+        Measurement[] measurements = new Measurement[this.numClasses];
+        for (int i = 0; i < this.numClasses; i++) {
+            String ml = String.format("class %s f1-score", i);
+            measurements[i] = new Measurement(ml, getF1Score(i));
+        }
+        return measurements;
+    }
+
+    @Override
+    public void getDescription(StringBuilder sb, int indent) {
+        Measurement.getMeasurementsDescription(getSupportMeasurements(), sb, indent);
+        Measurement.getMeasurementsDescription(getPrecisionMeasurements(), sb, indent);
+        Measurement.getMeasurementsDescription(getRecallMeasurements(), sb, indent);
+        Measurement.getMeasurementsDescription(getF1Measurements(), sb, indent);
+    }
+
+    private double getPrecision(int classIndex) {
+        return (double) this.truePos[classIndex] / (this.truePos[classIndex] + this.falsePos[classIndex]);
+    }
+
+    private double getRecall(int classIndex) {
+        return (double) this.truePos[classIndex] / (this.truePos[classIndex] + this.falseNeg[classIndex]);
+    }
+
+    private double getF1Score(int classIndex) {
+        double precision = getPrecision(classIndex);
+        double recall = getRecall(classIndex);
+        return 2 * (precision * recall) / (precision + recall);
+    }
+
+}


[03/19] incubator-samoa git commit: SAMOA-58: Needed to loop over all Union types to determine if Union-typed field is numeric or not.

Posted by gd...@apache.org.
SAMOA-58: Needed to loop over all Union types to determine if Union-typed field is numeric or not.


Project: http://git-wip-us.apache.org/repos/asf/incubator-samoa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-samoa/commit/78ae8fce
Tree: http://git-wip-us.apache.org/repos/asf/incubator-samoa/tree/78ae8fce
Diff: http://git-wip-us.apache.org/repos/asf/incubator-samoa/diff/78ae8fce

Branch: refs/heads/master
Commit: 78ae8fce13927afd4ec04d1f83e7a500009520e2
Parents: 5d131e2
Author: edi_bice <ed...@yahoo.com>
Authored: Tue Feb 23 10:49:47 2016 -0500
Committer: Gianmarco De Francisci Morales <gd...@apache.org>
Committed: Tue Apr 19 11:33:42 2016 +0300

----------------------------------------------------------------------
 .../org/apache/samoa/instances/AvroLoader.java  | 23 ++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/78ae8fce/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java
----------------------------------------------------------------------
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java b/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java
index e7f18b5..9f0664c 100644
--- a/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java
+++ b/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java
@@ -254,15 +254,30 @@ public abstract class AvroLoader implements Loader {
         List<String> attributeLabels = attributeSchema.getEnumSymbols();
         attributes.add(new Attribute(field.name(), attributeLabels));
       }
-      else if (attributeSchema.getType() == Schema.Type.DOUBLE
-              || attributeSchema.getType() == Schema.Type.FLOAT
-              || attributeSchema.getType() == Schema.Type.LONG
-              || attributeSchema.getType() == Schema.Type.INT)
+      else if (isNumeric(field))
         attributes.add(new Attribute(field.name()));
     }
     return new InstanceInformation(relation, attributes);
   }
 
+  private boolean isNumeric(Field field) {
+    if (field.schema().getType() == Schema.Type.DOUBLE
+            || field.schema().getType() == Schema.Type.FLOAT
+            || field.schema().getType() == Schema.Type.LONG
+            || field.schema().getType() == Schema.Type.INT)
+      return true;
+    if (field.schema().getType() == Schema.Type.UNION) {
+      for (Schema schema: field.schema().getTypes()) {
+        if (schema.getType() == Schema.Type.DOUBLE
+                || schema.getType() == Schema.Type.FLOAT
+                || schema.getType() == Schema.Type.LONG
+                || schema.getType() == Schema.Type.INT)
+          return true;
+      }
+    }
+    return false;
+  }
+
   /**
    * Identifies if the dataset is is Sparse or Dense
    * 


[02/19] incubator-samoa git commit: SAMOA-58: changes @gdfm requested

Posted by gd...@apache.org.
SAMOA-58: changes @gdfm requested


Project: http://git-wip-us.apache.org/repos/asf/incubator-samoa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-samoa/commit/a3423402
Tree: http://git-wip-us.apache.org/repos/asf/incubator-samoa/tree/a3423402
Diff: http://git-wip-us.apache.org/repos/asf/incubator-samoa/diff/a3423402

Branch: refs/heads/master
Commit: a34234026571ab3710e08cbcdf2f6260c8f2b7ca
Parents: a1e0376
Author: edi_bice <ed...@yahoo.com>
Authored: Mon Mar 14 14:01:59 2016 -0400
Committer: Gianmarco De Francisci Morales <gd...@apache.org>
Committed: Tue Apr 19 11:33:42 2016 +0300

----------------------------------------------------------------------
 pom.xml                                                       | 4 ++--
 .../src/main/java/org/apache/samoa/moa/core/Measurement.java  | 6 +++---
 .../src/main/java/org/apache/samoa/streams/FileStream.java    | 7 +++++--
 3 files changed, 10 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/a3423402/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index e02a1d6..191499f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -158,8 +158,8 @@
                 <artifactId>maven-compiler-plugin</artifactId>
                 <version>3.5</version>
                 <configuration>
-                    <source>1.8</source>
-                    <target>1.8</target>
+                    <source>1.7</source>
+                    <target>1.7</target>
                 </configuration>
             </plugin>
             <plugin>

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/a3423402/samoa-api/src/main/java/org/apache/samoa/moa/core/Measurement.java
----------------------------------------------------------------------
diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/core/Measurement.java b/samoa-api/src/main/java/org/apache/samoa/moa/core/Measurement.java
index 8a3d598..cfda55f 100644
--- a/samoa-api/src/main/java/org/apache/samoa/moa/core/Measurement.java
+++ b/samoa-api/src/main/java/org/apache/samoa/moa/core/Measurement.java
@@ -40,12 +40,12 @@ public class Measurement extends AbstractMOAObject {
   protected int fractionDigits;
 
   public Measurement(String name, double value) {
-    this.name = name;
-    this.value = value;
+    this(name, value, 3);
   }
 
   public Measurement(String name, double value, int fractionDigits) {
-    this(name, value);
+    this.name = name;
+    this.value = value;
     this.fractionDigits = fractionDigits;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/a3423402/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java
----------------------------------------------------------------------
diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java b/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java
index e9595b1..05dcb1a 100644
--- a/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java
+++ b/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java
@@ -20,7 +20,10 @@ package org.apache.samoa.streams;
  * #L%
  */
 
-import com.github.javacliparser.*;
+import com.github.javacliparser.ClassOption;
+import com.github.javacliparser.FloatOption;
+import com.github.javacliparser.IntOption;
+import com.github.javacliparser.ListOption;
 import org.apache.samoa.instances.Instances;
 import org.apache.samoa.instances.InstancesHeader;
 import org.apache.samoa.moa.core.InstanceExample;
@@ -107,7 +110,7 @@ public abstract class FileStream extends AbstractOptionHandler implements Instan
     if (classWeights != null && classWeights.length > 0) {
       int i = (int) prevInstance.instance.classValue();
       double w = 1.0;
-      if (i>=0 && i<classWeights.length)
+      if (i >= 0 && i < classWeights.length)
         w = classWeights[i].getValue();
       prevInstance.setWeight(w);
     }


[18/19] incubator-samoa git commit: SAMOA-58: Refactored class index command line parameter in Avro- and ArffFileStream up to common parent FileStream.

Posted by gd...@apache.org.
SAMOA-58: Refactored class index command line parameter in Avro- and ArffFileStream up to common parent FileStream.

Added class weights command line parameter to FileStream.


Project: http://git-wip-us.apache.org/repos/asf/incubator-samoa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-samoa/commit/9e174aa4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-samoa/tree/9e174aa4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-samoa/diff/9e174aa4

Branch: refs/heads/master
Commit: 9e174aa4cbc308ab66038f9059b79e69610e2d72
Parents: 15df252
Author: edi_bice <ed...@yahoo.com>
Authored: Tue Mar 1 14:26:27 2016 -0500
Committer: Gianmarco De Francisci Morales <gd...@apache.org>
Committed: Tue Apr 19 11:33:42 2016 +0300

----------------------------------------------------------------------
 .../apache/samoa/streams/ArffFileStream.java    |  4 +--
 .../apache/samoa/streams/AvroFileStream.java    |  4 +--
 .../org/apache/samoa/streams/FileStream.java    | 27 +++++++++++++++-----
 3 files changed, 24 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9e174aa4/samoa-api/src/main/java/org/apache/samoa/streams/ArffFileStream.java
----------------------------------------------------------------------
diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/ArffFileStream.java b/samoa-api/src/main/java/org/apache/samoa/streams/ArffFileStream.java
index 417eb2e..070021e 100644
--- a/samoa-api/src/main/java/org/apache/samoa/streams/ArffFileStream.java
+++ b/samoa-api/src/main/java/org/apache/samoa/streams/ArffFileStream.java
@@ -41,9 +41,9 @@ public class ArffFileStream extends FileStream {
   public FileOption arffFileOption = new FileOption("arffFile", 'f',
       "ARFF File(s) to load.", null, null, false);
 
-  public IntOption classIndexOption = new IntOption("classIndex", 'c',
+  /*public IntOption classIndexOption = new IntOption("classIndex", 'c',
       "Class index of data. 0 for none or -1 for last attribute in file.",
-      -1, -1, Integer.MAX_VALUE);
+      -1, -1, Integer.MAX_VALUE);*/
 
   protected InstanceExample lastInstanceRead;
   private BufferedReader fileReader;

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9e174aa4/samoa-api/src/main/java/org/apache/samoa/streams/AvroFileStream.java
----------------------------------------------------------------------
diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/AvroFileStream.java b/samoa-api/src/main/java/org/apache/samoa/streams/AvroFileStream.java
index 59bf22b..7c575d0 100644
--- a/samoa-api/src/main/java/org/apache/samoa/streams/AvroFileStream.java
+++ b/samoa-api/src/main/java/org/apache/samoa/streams/AvroFileStream.java
@@ -45,8 +45,8 @@ public class AvroFileStream extends FileStream {
   private static final Logger logger = LoggerFactory.getLogger(AvroFileStream.class);
 
   public FileOption avroFileOption = new FileOption("avroFile", 'f', "Avro File(s) to load.", null, null, false);
-  public IntOption classIndexOption = new IntOption("classIndex", 'c',
-      "Class index of data. 0 for none or -1 for last attribute in file.", -1, -1, Integer.MAX_VALUE);
+  /*public IntOption classIndexOption = new IntOption("classIndex", 'c',
+      "Class index of data. 0 for none or -1 for last attribute in file.", -1, -1, Integer.MAX_VALUE);*/
   public StringOption encodingFormatOption = new StringOption("encodingFormatOption", 'e',
       "Encoding format for Avro Files. Can be JSON/AVRO", "BINARY");
 

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9e174aa4/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java
----------------------------------------------------------------------
diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java b/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java
index cfa8de5..d9a7554 100644
--- a/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java
+++ b/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java
@@ -20,12 +20,7 @@ package org.apache.samoa.streams;
  * #L%
  */
 
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.Reader;
-
+import com.github.javacliparser.*;
 import org.apache.samoa.instances.Instances;
 import org.apache.samoa.instances.InstancesHeader;
 import org.apache.samoa.moa.core.InstanceExample;
@@ -34,7 +29,8 @@ import org.apache.samoa.moa.options.AbstractOptionHandler;
 import org.apache.samoa.moa.tasks.TaskMonitor;
 import org.apache.samoa.streams.fs.FileStreamSource;
 
-import com.github.javacliparser.ClassOption;
+import java.io.IOException;
+import java.io.InputStream;
 
 /**
  * InstanceStream for files (Abstract class: subclass this class for different file formats)
@@ -51,10 +47,18 @@ public abstract class FileStream extends AbstractOptionHandler implements Instan
       's', "Source Type (HDFS, local FS)", FileStreamSource.class,
       "LocalFileStreamSource");
 
+  public IntOption classIndexOption = new IntOption("classIndex", 'c',
+          "Class index of data. 0 for none or -1 for last attribute in file.", -1, -1, Integer.MAX_VALUE);
+
+  private FloatOption floatOption = new FloatOption("classWeight", 'w', "", 1.0);
+  public ListOption classWeightsOption = new ListOption("classWeights", 'w',
+          "Class weights in order of class index.", floatOption, new FloatOption[0], ':');
+
   protected transient FileStreamSource fileSource;
   //protected transient Reader fileReader;
   protected transient InputStream inputStream;
   protected Instances instances;
+  protected FloatOption[] classWeights;
 
   protected boolean hitEndOfStream;
   private boolean hasStarted;
@@ -99,6 +103,13 @@ public abstract class FileStream extends AbstractOptionHandler implements Instan
       readNextInstanceFromStream();
     }
     InstanceExample prevInstance = this.getLastInstanceRead();
+    if (classWeights != null && classWeights.length > 0) {
+      int i = (int) prevInstance.instance.classValue();
+      double w = 1.0;
+      if (i>=0 && i<classWeights.length)
+        w = classWeights[i].getValue();
+      prevInstance.setWeight(w);
+    }
     readNextInstanceFromStream();
     return prevInstance;
   }
@@ -158,6 +169,8 @@ public abstract class FileStream extends AbstractOptionHandler implements Instan
   @Override
   public void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) {
     this.fileSource = sourceTypeOption.getValue();
+    this.classWeights = (FloatOption[]) classWeightsOption.getList();
     this.hasStarted = false;
   }
+
 }


[09/19] incubator-samoa git commit: SAMOA-58: Filter out NaN and Inf values

Posted by gd...@apache.org.
SAMOA-58: Filter out NaN and Inf values


Project: http://git-wip-us.apache.org/repos/asf/incubator-samoa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-samoa/commit/15df2523
Tree: http://git-wip-us.apache.org/repos/asf/incubator-samoa/tree/15df2523
Diff: http://git-wip-us.apache.org/repos/asf/incubator-samoa/diff/15df2523

Branch: refs/heads/master
Commit: 15df2523193a2130d077f0afc1b53e7c596231e1
Parents: 78ae8fc
Author: edi_bice <ed...@yahoo.com>
Authored: Tue Feb 23 13:59:33 2016 -0500
Committer: Gianmarco De Francisci Morales <gd...@apache.org>
Committed: Tue Apr 19 11:33:42 2016 +0300

----------------------------------------------------------------------
 .../src/main/java/org/apache/samoa/instances/AvroLoader.java  | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/15df2523/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java
----------------------------------------------------------------------
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java b/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java
index 9f0664c..8c49537 100644
--- a/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java
+++ b/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java
@@ -26,6 +26,7 @@ import java.util.List;
 
 import org.apache.avro.Schema;
 import org.apache.avro.Schema.Field;
+import org.apache.avro.SchemaBuilder;
 import org.apache.avro.generic.GenericData.EnumSymbol;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.io.DatumReader;
@@ -164,8 +165,10 @@ public abstract class AvroLoader implements Loader {
 
       if (isNumeric)
       {
-        if (value instanceof Double)
-          this.setSparseValue(instance, indexValues, attributeValues, numAttribute, (double) value);
+        if (value instanceof Double) {
+          Double v = (double) value;
+          if (Double.isFinite(v)) this.setSparseValue(instance, indexValues, attributeValues, numAttribute, (double) value);
+        }
         else if (value instanceof Long)
           this.setSparseValue(instance, indexValues, attributeValues, numAttribute, (long) value);
         else if (value instanceof Integer)


[15/19] incubator-samoa git commit: SAMOA-58: Issue described in https://issues.apache.org/jira/browse/SAMOA-58 was apparently more complicated than what was expected in previous commit. While we did succeed in replacing the first exhausted file stream w

Posted by gd...@apache.org.
SAMOA-58: Issue described in https://issues.apache.org/jira/browse/SAMOA-58 was apparently more complicated than what was expected in previous commit. While we did succeed in replacing the first exhausted file stream with a new one, the loader was not changed and would return null. This rework of AvroFileStream, FileStream and ArffFileStream hopefully cleans things up a bit and allows multi-file streams of either (Avro or Arff) type.


Project: http://git-wip-us.apache.org/repos/asf/incubator-samoa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-samoa/commit/6e81a62d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-samoa/tree/6e81a62d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-samoa/diff/6e81a62d

Branch: refs/heads/master
Commit: 6e81a62d776545500cf269457032a69f81f1a987
Parents: 16046cc
Author: edi_bice <ed...@yahoo.com>
Authored: Fri Feb 19 11:55:03 2016 -0500
Committer: Gianmarco De Francisci Morales <gd...@apache.org>
Committed: Tue Apr 19 11:33:42 2016 +0300

----------------------------------------------------------------------
 .../apache/samoa/streams/ArffFileStream.java    | 36 +++++++++++++-------
 .../apache/samoa/streams/AvroFileStream.java    |  6 ++--
 .../org/apache/samoa/streams/FileStream.java    | 31 ++++-------------
 3 files changed, 32 insertions(+), 41 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/6e81a62d/samoa-api/src/main/java/org/apache/samoa/streams/ArffFileStream.java
----------------------------------------------------------------------
diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/ArffFileStream.java b/samoa-api/src/main/java/org/apache/samoa/streams/ArffFileStream.java
index 9f8a322..417eb2e 100644
--- a/samoa-api/src/main/java/org/apache/samoa/streams/ArffFileStream.java
+++ b/samoa-api/src/main/java/org/apache/samoa/streams/ArffFileStream.java
@@ -20,7 +20,9 @@ package org.apache.samoa.streams;
  * #L%
  */
 
+import java.io.BufferedReader;
 import java.io.IOException;
+import java.io.InputStreamReader;
 
 import org.apache.samoa.instances.Instances;
 import org.apache.samoa.moa.core.InstanceExample;
@@ -44,6 +46,7 @@ public class ArffFileStream extends FileStream {
       -1, -1, Integer.MAX_VALUE);
 
   protected InstanceExample lastInstanceRead;
+  private BufferedReader fileReader;
 
   @Override
   public void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) {
@@ -56,32 +59,39 @@ public class ArffFileStream extends FileStream {
   @Override
   protected void reset() {
     try {
-      if (this.fileReader != null)
-        this.fileReader.close();
-
       fileSource.reset();
     } catch (IOException ioe) {
       throw new RuntimeException("FileStream restart failed.", ioe);
     }
 
-    if (!getNextFileReader()) {
+    if (!getNextFileStream()) {
       hitEndOfStream = true;
       throw new RuntimeException("FileStream is empty.");
     }
   }
 
   @Override
-  protected boolean getNextFileReader() {
-    boolean ret = super.getNextFileReader();
-    if (ret) {
-      this.instances = new Instances(this.fileReader, 1, -1);
-      if (this.classIndexOption.getValue() < 0) {
-        this.instances.setClassIndex(this.instances.numAttributes() - 1);
-      } else if (this.classIndexOption.getValue() > 0) {
-        this.instances.setClassIndex(this.classIndexOption.getValue() - 1);
+  protected boolean getNextFileStream() {
+    if (this.fileReader != null)
+      try {
+        this.fileReader.close();
+      } catch (IOException ioe) {
+        ioe.printStackTrace();
       }
+
+    this.inputStream = this.fileSource.getNextInputStream();
+    if (inputStream == null)
+      return false;
+
+    this.fileReader = new BufferedReader(new InputStreamReader(this.inputStream));
+    this.instances = new Instances(this.fileReader, 1, -1);
+    if (this.classIndexOption.getValue() < 0) {
+      this.instances.setClassIndex(this.instances.numAttributes() - 1);
+    } else if (this.classIndexOption.getValue() > 0) {
+      this.instances.setClassIndex(this.classIndexOption.getValue() - 1);
     }
-    return ret;
+
+    return true;
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/6e81a62d/samoa-api/src/main/java/org/apache/samoa/streams/AvroFileStream.java
----------------------------------------------------------------------
diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/AvroFileStream.java b/samoa-api/src/main/java/org/apache/samoa/streams/AvroFileStream.java
index 5b4e755..59bf22b 100644
--- a/samoa-api/src/main/java/org/apache/samoa/streams/AvroFileStream.java
+++ b/samoa-api/src/main/java/org/apache/samoa/streams/AvroFileStream.java
@@ -54,7 +54,7 @@ public class AvroFileStream extends FileStream {
   protected InstanceExample lastInstanceRead;
 
   /** Represents the binary input stream of avro data **/
-  protected transient InputStream inputStream = null;
+  //protected transient InputStream inputStream = null;
 
   /** The extension to be considered for the files **/
   private static final String AVRO_FILE_EXTENSION = "avro";
@@ -87,6 +87,7 @@ public class AvroFileStream extends FileStream {
    * 
    * @return
    */
+  @Override
   protected boolean getNextFileStream() {
     if (this.inputStream != null)
       try {
@@ -97,8 +98,7 @@ public class AvroFileStream extends FileStream {
       }
 
     this.inputStream = this.fileSource.getNextInputStream();
-
-    if (this.inputStream == null)
+    if (inputStream == null)
       return false;
 
     this.instances = new Instances(this.inputStream, classIndexOption.getValue(), encodingFormatOption.getValue());

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/6e81a62d/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java
----------------------------------------------------------------------
diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java b/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java
index 2998b22..cfa8de5 100644
--- a/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java
+++ b/samoa-api/src/main/java/org/apache/samoa/streams/FileStream.java
@@ -52,7 +52,8 @@ public abstract class FileStream extends AbstractOptionHandler implements Instan
       "LocalFileStreamSource");
 
   protected transient FileStreamSource fileSource;
-  protected transient Reader fileReader;
+  //protected transient Reader fileReader;
+  protected transient InputStream inputStream;
   protected Instances instances;
 
   protected boolean hitEndOfStream;
@@ -81,7 +82,7 @@ public abstract class FileStream extends AbstractOptionHandler implements Instan
   @Override
   public boolean hasMoreInstances() {
     if (this.hitEndOfStream) {
-      if (getNextFileReader()) {
+      if (getNextFileStream()) {
         this.hitEndOfStream = false;
         return hasMoreInstances();
       } else {
@@ -115,38 +116,18 @@ public abstract class FileStream extends AbstractOptionHandler implements Instan
 
   protected void reset() {
     try {
-      if (this.fileReader != null)
-        this.fileReader.close();
-
       fileSource.reset();
     } catch (IOException ioe) {
       throw new RuntimeException("FileStream restart failed.", ioe);
     }
 
-    if (!getNextFileReader()) {
+    if (!getNextFileStream()) {
       hitEndOfStream = true;
       throw new RuntimeException("FileStream is empty.");
     }
-
-    this.instances = new Instances(this.fileReader, 1, -1);
-    this.instances.setClassIndex(this.instances.numAttributes() - 1);
   }
 
-  protected boolean getNextFileReader() {
-    if (this.fileReader != null)
-      try {
-        this.fileReader.close();
-      } catch (IOException ioe) {
-        ioe.printStackTrace();
-      }
-
-    InputStream inputStream = this.fileSource.getNextInputStream();
-    if (inputStream == null)
-      return false;
-
-    this.fileReader = new BufferedReader(new InputStreamReader(inputStream));
-    return true;
-  }
+  protected abstract boolean getNextFileStream();
 
   protected boolean readNextInstanceFromStream() {
     if (!hasStarted) {
@@ -158,7 +139,7 @@ public abstract class FileStream extends AbstractOptionHandler implements Instan
       if (readNextInstanceFromFile())
         return true;
 
-      if (!getNextFileReader()) {
+      if (!getNextFileStream()) {
         this.hitEndOfStream = true;
         return false;
       }


[13/19] incubator-samoa git commit: SAMOA-58: forgot to initialize support array

Posted by gd...@apache.org.
SAMOA-58: forgot to initialize support array


Project: http://git-wip-us.apache.org/repos/asf/incubator-samoa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-samoa/commit/1c8778e0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-samoa/tree/1c8778e0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-samoa/diff/1c8778e0

Branch: refs/heads/master
Commit: 1c8778e04f0070c65ddf1717666e16656091c10f
Parents: c743b7e
Author: edi_bice <ed...@yahoo.com>
Authored: Mon Feb 22 16:03:09 2016 -0500
Committer: Gianmarco De Francisci Morales <gd...@apache.org>
Committed: Tue Apr 19 11:33:42 2016 +0300

----------------------------------------------------------------------
 .../samoa/evaluation/F1ClassificationPerformanceEvaluator.java      | 1 +
 1 file changed, 1 insertion(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/1c8778e0/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java
----------------------------------------------------------------------
diff --git a/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java b/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java
index 3046360..726f487 100644
--- a/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java
+++ b/samoa-api/src/main/java/org/apache/samoa/evaluation/F1ClassificationPerformanceEvaluator.java
@@ -48,6 +48,7 @@ public class F1ClassificationPerformanceEvaluator extends AbstractMOAObject impl
 
     public void reset(int numClasses) {
         this.numClasses = numClasses;
+        this.support = new long[numClasses];
         this.truePos = new long[numClasses];
         this.falsePos = new long[numClasses];
         this.trueNeg = new long[numClasses];