You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ra...@apache.org on 2018/06/27 14:51:53 UTC

[25/51] [partial] mahout git commit: MAHOUT-2042 and MAHOUT-2045 Delete directories which were moved/no longer in use

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/integration/src/test/java/org/apache/mahout/text/doc/NumericFieldDocument.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/text/doc/NumericFieldDocument.java b/integration/src/test/java/org/apache/mahout/text/doc/NumericFieldDocument.java
deleted file mode 100644
index e06e8d6..0000000
--- a/integration/src/test/java/org/apache/mahout/text/doc/NumericFieldDocument.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.text.doc;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.IntField;
-import org.apache.lucene.document.StringField;
-import org.apache.lucene.document.TextField;
-
-/**
- * Document with numeric field.
- */
-@Deprecated
-public class NumericFieldDocument extends SingleFieldDocument {
-
-  public static final String NUMERIC_FIELD = "numeric";
-
-  private int numericField;
-
-  public NumericFieldDocument(String id, String field, int numericField) {
-    super(id, field);
-    this.numericField = numericField;
-  }
-
-  @Override
-  public Document asLuceneDocument() {
-    Document document = new Document();
-
-    document.add(new StringField(ID_FIELD, getId(), Field.Store.YES));
-    document.add(new TextField(FIELD, getField(), Field.Store.YES));
-    document.add(new IntField(NUMERIC_FIELD, numericField, Field.Store.YES));
-
-    return document;
-  }
-
-  public int getNumericField() {
-    return numericField;
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/integration/src/test/java/org/apache/mahout/text/doc/SingleFieldDocument.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/text/doc/SingleFieldDocument.java b/integration/src/test/java/org/apache/mahout/text/doc/SingleFieldDocument.java
deleted file mode 100644
index 4636a51..0000000
--- a/integration/src/test/java/org/apache/mahout/text/doc/SingleFieldDocument.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.text.doc;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.StringField;
-import org.apache.lucene.document.TextField;
-
-/**
- * Used for testing lucene2seq
- */
-@Deprecated
-public class SingleFieldDocument implements TestDocument {
-
-  public static final String ID_FIELD = "idField";
-  public static final String FIELD = "field";
-
-  private String id;
-  private String field;
-
-  public SingleFieldDocument(String id, String field) {
-    this.id = id;
-    this.field = field;
-  }
-
-  @Override
-  public String getId() {
-    return id;
-  }
-
-  @Override
-  public String getField() {
-    return field;
-  }
-
-  @Override
-  public Document asLuceneDocument() {
-    Document document = new Document();
-
-    Field idField = new StringField(ID_FIELD, getId(), Field.Store.YES);
-    Field field = new TextField(FIELD, getField(), Field.Store.YES);
-
-    document.add(idField);
-    document.add(field);
-
-    return document;
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/integration/src/test/java/org/apache/mahout/text/doc/TestDocument.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/text/doc/TestDocument.java b/integration/src/test/java/org/apache/mahout/text/doc/TestDocument.java
deleted file mode 100644
index 7243c71..0000000
--- a/integration/src/test/java/org/apache/mahout/text/doc/TestDocument.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.text.doc;
-
-import org.apache.lucene.document.Document;
-@Deprecated
-public interface TestDocument {
-
-  String getId();
-
-  String getField();
-
-  Document asLuceneDocument();
-
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/integration/src/test/java/org/apache/mahout/text/doc/UnstoredFieldsDocument.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/text/doc/UnstoredFieldsDocument.java b/integration/src/test/java/org/apache/mahout/text/doc/UnstoredFieldsDocument.java
deleted file mode 100644
index 6eb43f6..0000000
--- a/integration/src/test/java/org/apache/mahout/text/doc/UnstoredFieldsDocument.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.text.doc;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.StringField;
-
-/**
- * Used for testing lucene2seq
- */
-@Deprecated
-public class UnstoredFieldsDocument extends SingleFieldDocument {
-
-  public static final String UNSTORED_FIELD = "unstored";
-
-  public UnstoredFieldsDocument(String id, String field) {
-    super(id, field);
-  }
-
-  @Override
-  public Document asLuceneDocument() {
-    Document document = super.asLuceneDocument();
-
-    document.add(new StringField(UNSTORED_FIELD, "", Field.Store.NO));
-
-    return document;
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/integration/src/test/java/org/apache/mahout/utils/Bump125Test.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/Bump125Test.java b/integration/src/test/java/org/apache/mahout/utils/Bump125Test.java
deleted file mode 100644
index 65b308f..0000000
--- a/integration/src/test/java/org/apache/mahout/utils/Bump125Test.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.utils;
-
-import com.google.common.collect.Lists;
-
-import org.apache.mahout.common.MahoutTestCase;
-import org.junit.Test;
-
-import java.util.Iterator;
-
-public class Bump125Test extends MahoutTestCase {
-  @Test
-  public void testIncrement() throws Exception {
-    Iterator<Integer> ref = Lists.newArrayList(1, 2, 3, 4, 5, 6, 7,
-            8, 9, 10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 50, 60,
-            70, 80, 100, 120, 140, 160, 180, 200, 250, 300, 350,
-            400, 500, 600, 700, 800, 1000, 1200, 1400, 1600, 1800,
-            2000, 2500, 3000, 3500, 4000, 5000, 6000, 7000)
-            .iterator();
-    Bump125 b = new Bump125();
-    for (int i = 0; i < 50; i++) {
-      long x = b.increment();
-      assertEquals(ref.next().longValue(), x);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/integration/src/test/java/org/apache/mahout/utils/SplitInputTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/SplitInputTest.java b/integration/src/test/java/org/apache/mahout/utils/SplitInputTest.java
deleted file mode 100644
index 7ffa690..0000000
--- a/integration/src/test/java/org/apache/mahout/utils/SplitInputTest.java
+++ /dev/null
@@ -1,418 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.utils;
-
-import java.io.BufferedWriter;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.io.Writer;
-import java.nio.charset.Charset;
-
-import com.google.common.io.Closeables;
-import org.apache.commons.io.Charsets;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.mahout.classifier.ClassifierData;
-import org.apache.mahout.common.MahoutTestCase;
-import org.apache.mahout.common.Pair;
-import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
-import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterable;
-import org.apache.mahout.math.SequentialAccessSparseVector;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.VectorWritable;
-import org.apache.mahout.math.map.OpenObjectIntHashMap;
-import org.junit.Before;
-import org.junit.Test;
-
-public final class SplitInputTest extends MahoutTestCase {
-
-  private OpenObjectIntHashMap<String> countMap;
-  private Charset charset;
-  private FileSystem fs;
-  private Path tempInputFile;
-  private Path tempTrainingDirectory;
-  private Path tempTestDirectory;
-  private Path tempMapRedOutputDirectory;
-  private Path tempInputDirectory;
-  private Path tempSequenceDirectory;
-  private SplitInput si;
-
-  @Override
-  @Before
-  public void setUp() throws Exception {
-    Configuration conf = getConfiguration();
-    fs = FileSystem.get(conf);
-
-    super.setUp();
-
-    countMap = new OpenObjectIntHashMap<>();
-
-    charset = Charsets.UTF_8;
-    tempSequenceDirectory = getTestTempFilePath("tmpsequence");
-    tempInputFile = getTestTempFilePath("bayesinputfile");
-    tempTrainingDirectory = getTestTempDirPath("bayestrain");
-    tempTestDirectory = getTestTempDirPath("bayestest");
-    tempMapRedOutputDirectory = new Path(getTestTempDirPath(), "mapRedOutput");
-    tempInputDirectory = getTestTempDirPath("bayesinputdir");
-
-    si = new SplitInput();
-    si.setTrainingOutputDirectory(tempTrainingDirectory);
-    si.setTestOutputDirectory(tempTestDirectory);
-    si.setInputDirectory(tempInputDirectory);
-  }
-
-  private void writeMultipleInputFiles() throws IOException {
-    Writer writer = null;
-    String currentLabel = null;
-    try {
-     for (String[] entry : ClassifierData.DATA) {
-      if (!entry[0].equals(currentLabel)) {
-        currentLabel = entry[0];
-        Closeables.close(writer, false);
-
-        writer = new BufferedWriter(new OutputStreamWriter(fs.create(new Path(tempInputDirectory, currentLabel)),
-            Charsets.UTF_8));
-      }
-      countMap.adjustOrPutValue(currentLabel, 1, 1);
-      writer.write(currentLabel + '\t' + entry[1] + '\n');
-     }
-    }finally {
-     Closeables.close(writer, false);
-    }
-  }
-
-  private void writeSingleInputFile() throws IOException {
-    Writer writer = new BufferedWriter(new OutputStreamWriter(fs.create(tempInputFile), Charsets.UTF_8));
-    try {
-      for (String[] entry : ClassifierData.DATA) {
-        writer.write(entry[0] + '\t' + entry[1] + '\n');
-      }
-    } finally {
-      Closeables.close(writer, true);
-    }
-  }
-
-  @Test
-  public void testSplitDirectory() throws Exception {
-
-    writeMultipleInputFiles();
-
-    final int testSplitSize = 1;
-    si.setTestSplitSize(testSplitSize);
-    si.setCallback(new SplitInput.SplitCallback() {
-          @Override
-          public void splitComplete(Path inputFile, int lineCount, int trainCount, int testCount, int testSplitStart) {
-            int trainingLines = countMap.get(inputFile.getName()) - testSplitSize;
-            assertSplit(fs, inputFile, charset, testSplitSize, trainingLines, tempTrainingDirectory, tempTestDirectory);
-          }
-    });
-
-    si.splitDirectory(tempInputDirectory);
-  }
-
-  @Test
-  public void testSplitFile() throws Exception {
-    writeSingleInputFile();
-    si.setTestSplitSize(2);
-    si.setCallback(new TestCallback(2, 10));
-    si.splitFile(tempInputFile);
-  }
-
-  @Test
-  public void testSplitFileLocation() throws Exception {
-    writeSingleInputFile();
-    si.setTestSplitSize(2);
-    si.setSplitLocation(50);
-    si.setCallback(new TestCallback(2, 10));
-    si.splitFile(tempInputFile);
-  }
-
-  @Test
-  public void testSplitFilePct() throws Exception {
-    writeSingleInputFile();
-    si.setTestSplitPct(25);
-
-    si.setCallback(new TestCallback(3, 9));
-    si.splitFile(tempInputFile);
-  }
-
-  @Test
-  public void testSplitFilePctLocation() throws Exception {
-    writeSingleInputFile();
-    si.setTestSplitPct(25);
-    si.setSplitLocation(50);
-    si.setCallback(new TestCallback(3, 9));
-    si.splitFile(tempInputFile);
-  }
-
-  @Test
-  public void testSplitFileRandomSelectionSize() throws Exception {
-    writeSingleInputFile();
-    si.setTestRandomSelectionSize(5);
-
-    si.setCallback(new TestCallback(5, 7));
-    si.splitFile(tempInputFile);
-  }
-
-  @Test
-  public void testSplitFileRandomSelectionPct() throws Exception {
-    writeSingleInputFile();
-    si.setTestRandomSelectionPct(25);
-
-    si.setCallback(new TestCallback(3, 9));
-    si.splitFile(tempInputFile);
-  }
-
-  /**
-   * Create a Sequencefile for testing consisting of IntWritable
-   * keys and VectorWritable values
-   * @param path path for test SequenceFile
-   * @param testPoints number of records in test SequenceFile
-   */
-  private void writeVectorSequenceFile(Path path, int testPoints) throws IOException {
-    Path tempSequenceFile = new Path(path, "part-00000");
-    Configuration conf = getConfiguration();
-    IntWritable key = new IntWritable();
-    VectorWritable value = new VectorWritable();
-    try (SequenceFile.Writer writer =
-             SequenceFile.createWriter(fs, conf, tempSequenceFile, IntWritable.class, VectorWritable.class)) {
-      for (int i = 0; i < testPoints; i++) {
-        key.set(i);
-        Vector v = new SequentialAccessSparseVector(4);
-        v.assign(i);
-        value.set(v);
-        writer.append(key, value);
-      }
-    }
-  }
-
-  /**
-   * Create a Sequencefile for testing consisting of IntWritable keys and Text values
-   * @param path path for test SequenceFile
-   * @param testPoints number of records in test SequenceFile
-   */
-  private void writeTextSequenceFile(Path path, int testPoints) throws IOException {
-    Path tempSequenceFile = new Path(path, "part-00000");
-    Configuration conf = getConfiguration();
-    Text key = new Text();
-    Text value = new Text();
-    try (SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, tempSequenceFile, Text.class, Text.class)){
-      for (int i = 0; i < testPoints; i++) {
-        key.set(Integer.toString(i));
-        value.set("Line " + i);
-        writer.append(key, value);
-      }
-    }
-  }
-
-  /**
-   * Display contents of a SequenceFile
-   * @param sequenceFilePath path to SequenceFile
-   */
-  private void displaySequenceFile(Path sequenceFilePath) throws IOException {
-    for (Pair<?,?> record : new SequenceFileIterable<>(sequenceFilePath, true, getConfiguration())) {
-      System.out.println(record.getFirst() + "\t" + record.getSecond());
-    }
-  }
-
-  /**
-   * Determine number of records in a SequenceFile
-   * @param sequenceFilePath path to SequenceFile
-   * @return number of records
-   */
-  private int getNumberRecords(Path sequenceFilePath) throws IOException {
-    int numberRecords = 0;
-    for (Object value : new SequenceFileValueIterable<>(sequenceFilePath, true, getConfiguration())) {
-      numberRecords++;
-    }
-    return numberRecords;
-  }
-
-  /**
-   * Test map reduce version of split input with Text, Text key value
-   * pairs in input
-   */
-  @Test
-  public void testSplitInputMapReduceText() throws Exception {
-    writeTextSequenceFile(tempSequenceDirectory, 1000);
-    testSplitInputMapReduce(1000);
-  }
-
-  /** Test map reduce version of split input with Text, Text key value pairs in input called from command line */
-  @Test
-  public void testSplitInputMapReduceTextCli() throws Exception {
-    writeTextSequenceFile(tempSequenceDirectory, 1000);
-    testSplitInputMapReduceCli(1000);
-  }
-
-  /**
-   * Test map reduce version of split input with IntWritable, Vector key value
-   * pairs in input
-   */
-  @Test
-  public void testSplitInputMapReduceVector() throws Exception {
-    writeVectorSequenceFile(tempSequenceDirectory, 1000);
-    testSplitInputMapReduce(1000);
-  }
-
-  /**
-   * Test map reduce version of split input with IntWritable, Vector key value
-   * pairs in input called from command line
-   */
-  @Test
-  public void testSplitInputMapReduceVectorCli() throws Exception {
-    writeVectorSequenceFile(tempSequenceDirectory, 1000);
-    testSplitInputMapReduceCli(1000);
-  }
-
-  /**
-   * Test map reduce version of split input through CLI
-   */
-  private void testSplitInputMapReduceCli(int numPoints) throws Exception {
-    int randomSelectionPct = 25;
-    int keepPct = 10;
-    String[] args =
-        { "--method", "mapreduce", "--input", tempSequenceDirectory.toString(),
-            "--mapRedOutputDir", tempMapRedOutputDirectory.toString(),
-            "--randomSelectionPct", Integer.toString(randomSelectionPct),
-            "--keepPct", Integer.toString(keepPct), "-ow" };
-    ToolRunner.run(getConfiguration(), new SplitInput(), args);
-    validateSplitInputMapReduce(numPoints, randomSelectionPct, keepPct);
-  }
-
-  /**
-   * Test map reduce version of split input through method call
-   */
-  private void testSplitInputMapReduce(int numPoints) throws Exception {
-    int randomSelectionPct = 25;
-    si.setTestRandomSelectionPct(randomSelectionPct);
-    int keepPct = 10;
-    si.setKeepPct(keepPct);
-    si.setMapRedOutputDirectory(tempMapRedOutputDirectory);
-    si.setUseMapRed(true);
-    si.splitDirectory(getConfiguration(), tempSequenceDirectory);
-
-    validateSplitInputMapReduce(numPoints, randomSelectionPct, keepPct);
-  }
-
-  /**
-   * Validate that number of test records and number of training records
-   * are consistant with keepPct and randomSelectionPct
-   */
-  private void validateSplitInputMapReduce(int numPoints, int randomSelectionPct, int keepPct) throws IOException {
-    Path testPath = new Path(tempMapRedOutputDirectory, "test-r-00000");
-    Path trainingPath = new Path(tempMapRedOutputDirectory, "training-r-00000");
-    int numberTestRecords = getNumberRecords(testPath);
-    int numberTrainingRecords = getNumberRecords(trainingPath);
-    System.out.printf("Test data: %d records\n", numberTestRecords);
-    displaySequenceFile(testPath);
-    System.out.printf("Training data: %d records\n", numberTrainingRecords);
-    displaySequenceFile(trainingPath);
-    assertEquals((randomSelectionPct / 100.0) * (keepPct / 100.0) * numPoints,
-        numberTestRecords, 2);
-    assertEquals(
-        (1 - randomSelectionPct / 100.0) * (keepPct / 100.0) * numPoints,
-        numberTrainingRecords, 2);
-  }
-
-  @Test
-  public void testValidate() throws Exception {
-    SplitInput st = new SplitInput();
-    assertValidateException(st);
-
-    st.setTestSplitSize(100);
-    assertValidateException(st);
-
-    st.setTestOutputDirectory(tempTestDirectory);
-    assertValidateException(st);
-
-    st.setTrainingOutputDirectory(tempTrainingDirectory);
-    st.validate();
-
-    st.setTestSplitPct(50);
-    assertValidateException(st);
-
-    st = new SplitInput();
-    st.setTestRandomSelectionPct(50);
-    st.setTestOutputDirectory(tempTestDirectory);
-    st.setTrainingOutputDirectory(tempTrainingDirectory);
-    st.validate();
-
-    st.setTestSplitPct(50);
-    assertValidateException(st);
-
-    st = new SplitInput();
-    st.setTestRandomSelectionPct(50);
-    st.setTestOutputDirectory(tempTestDirectory);
-    st.setTrainingOutputDirectory(tempTrainingDirectory);
-    st.validate();
-
-    st.setTestSplitSize(100);
-    assertValidateException(st);
-  }
-
-  private class TestCallback implements SplitInput.SplitCallback {
-    private final int testSplitSize;
-    private final int trainingLines;
-
-    private TestCallback(int testSplitSize, int trainingLines) {
-      this.testSplitSize = testSplitSize;
-      this.trainingLines = trainingLines;
-    }
-
-    @Override
-    public void splitComplete(Path inputFile, int lineCount, int trainCount, int testCount, int testSplitStart) {
-      assertSplit(fs, tempInputFile, charset, testSplitSize, trainingLines, tempTrainingDirectory, tempTestDirectory);
-    }
-  }
-
-  private static void assertValidateException(SplitInput st) throws IOException {
-    try {
-      st.validate();
-      fail("Expected IllegalArgumentException");
-    } catch (IllegalArgumentException iae) {
-      // good
-    }
-  }
-
-  private static void assertSplit(FileSystem fs,
-                                  Path tempInputFile,
-                                  Charset charset,
-                                  int testSplitSize,
-                                  int trainingLines,
-                                  Path tempTrainingDirectory,
-                                  Path tempTestDirectory) {
-
-    try {
-      Path testFile = new Path(tempTestDirectory, tempInputFile.getName());
-      //assertTrue("test file exists", testFile.isFile());
-      assertEquals("test line count", testSplitSize, SplitInput.countLines(fs, testFile, charset));
-
-      Path trainingFile = new Path(tempTrainingDirectory, tempInputFile.getName());
-      //assertTrue("training file exists", trainingFile.isFile());
-      assertEquals("training line count", trainingLines, SplitInput.countLines(fs, trainingFile, charset));
-    } catch (IOException ioe) {
-      fail(ioe.toString());
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/integration/src/test/java/org/apache/mahout/utils/email/MailProcessorTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/email/MailProcessorTest.java b/integration/src/test/java/org/apache/mahout/utils/email/MailProcessorTest.java
deleted file mode 100644
index c519f85..0000000
--- a/integration/src/test/java/org/apache/mahout/utils/email/MailProcessorTest.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.utils.email;
-
-import java.io.File;
-import java.io.StringWriter;
-import java.net.URL;
-import java.util.regex.Pattern;
-
-import org.apache.commons.io.Charsets;
-import org.apache.mahout.common.MahoutTestCase;
-import org.junit.Test;
-
-public final class MailProcessorTest extends MahoutTestCase {
-
-  @Test
-  public void testLabel() throws Exception {
-    StringWriter writer = new StringWriter();
-    MailOptions options = new MailOptions();
-    options.setSeparator(":::");
-    options.setCharset(Charsets.UTF_8);
-        options.setPatternsToMatch(new Pattern[]{
-        MailProcessor.FROM_PREFIX, MailProcessor.SUBJECT_PREFIX, MailProcessor.TO_PREFIX});
-    options.setInput(new File(System.getProperty("user.dir")));
-    MailProcessor proc = new MailProcessor(options, "", writer);
-    URL url = MailProcessorTest.class.getClassLoader().getResource("test.mbox");
-    File file = new File(url.toURI());
-    long count = proc.parseMboxLineByLine(file);
-    assertEquals(7, count);
-  }
-
-  @Test
-  public void testStripQuoted() throws Exception {
-    StringWriter writer = new StringWriter();
-    MailOptions options = new MailOptions();
-    options.setSeparator(":::");
-    options.setCharset(Charsets.UTF_8);
-        options.setPatternsToMatch(new Pattern[]{
-        MailProcessor.SUBJECT_PREFIX});
-    options.setInput(new File(System.getProperty("user.dir")));
-    options.setIncludeBody(true);
-    MailProcessor proc = new MailProcessor(options, "", writer);
-    URL url = MailProcessorTest.class.getClassLoader().getResource("test.mbox");
-    File file = new File(url.toURI());
-    long count = proc.parseMboxLineByLine(file);
-    assertEquals(7, count);
-    assertTrue(writer.getBuffer().toString().contains("> Cocoon Cron Block Configurable Clustering"));
-    writer = new StringWriter();
-    proc = new MailProcessor(options, "", writer);
-    options.setStripQuotedText(true);
-    count = proc.parseMboxLineByLine(file);
-    assertEquals(7, count);
-    assertFalse(writer.getBuffer().toString().contains("> Cocoon Cron Block Configurable Clustering"));
-
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/integration/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilterTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilterTest.java b/integration/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilterTest.java
deleted file mode 100644
index 4fdbbbc..0000000
--- a/integration/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilterTest.java
+++ /dev/null
@@ -1,154 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.mahout.utils.nlp.collocations.llr;
-
-import java.io.IOException;
-import java.io.Reader;
-import java.io.StringReader;
-import java.nio.ByteBuffer;
-import java.nio.CharBuffer;
-import java.nio.charset.CharsetEncoder;
-
-import org.apache.commons.io.Charsets;
-import org.apache.hadoop.util.bloom.BloomFilter;
-import org.apache.hadoop.util.bloom.Filter;
-import org.apache.hadoop.util.bloom.Key;
-import org.apache.hadoop.util.hash.Hash;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
-import org.apache.lucene.analysis.shingle.ShingleFilter;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.mahout.common.MahoutTestCase;
-import org.junit.Test;
-
-public final class BloomTokenFilterTest extends MahoutTestCase {
-  
-  private static final CharsetEncoder encoder = Charsets.UTF_8.newEncoder();
-
-  private static final String input = "The best of times the worst of times";
-  private static final String[] allTokens = {
-      "The", "best", "of", "times", "the", "worst", "of", "times"
-  };
-  private static final String[] expectedNonKeepTokens = { "best", "times", "the", "worst", "times" };
-  private static final String[] expectedKeepTokens = { "The", "of", "of" };
-  private static final String[] filterTokens    = { "The", "of" };
-  private static final String[] notFilterTokens = { "best", "worst", "the", "times"};
-  private static final String[] shingleKeepTokens = {
-      "The best", "best of times", "the worst", "worst of times", "of times"
-  };
-  private static final String[] expectedShingleTokens = {
-      "The best", "best of times", "of times", "the worst", "worst of times", "of times"
-  };
-  
-  /** test standalone filter without tokenfilter wrapping */
-  @Test
-  public void testFilter() throws IOException {
-    Filter filter = getFilter(filterTokens);
-    Key k = new Key();
-    for (String s: filterTokens) {
-      setKey(k,s);
-      assertTrue("Key for string " + s + " should be filter member", filter.membershipTest(k));
-    }
-    
-    for (String s: notFilterTokens)  {
-      setKey(k,s);
-      assertFalse("Key for string " + s + " should not be filter member", filter.membershipTest(k));
-    }
-  }
-  
-  /** normal case, unfiltered analyzer */
-  @Test
-  public void testAnalyzer() throws IOException {
-    Reader reader = new StringReader(input);
-    Analyzer analyzer = new WhitespaceAnalyzer();
-    TokenStream ts = analyzer.tokenStream(null, reader);
-    ts.reset();
-    validateTokens(allTokens, ts);
-    ts.end();
-    ts.close();
-  }
-  
-  /** filtered analyzer */
-  @Test
-  public void testNonKeepdAnalyzer() throws IOException {
-    Reader reader = new StringReader(input);
-    Analyzer analyzer = new WhitespaceAnalyzer();
-    TokenStream ts = analyzer.tokenStream(null, reader);
-    ts.reset();
-    TokenStream f = new BloomTokenFilter(getFilter(filterTokens), false /* toss matching tokens */, ts);
-    validateTokens(expectedNonKeepTokens, f);
-    ts.end();
-    ts.close();
-  }
-
-  /** keep analyzer */
-  @Test
-  public void testKeepAnalyzer() throws IOException {
-    Reader reader = new StringReader(input);
-    Analyzer analyzer = new WhitespaceAnalyzer();
-    TokenStream ts = analyzer.tokenStream(null, reader);
-    ts.reset();
-    TokenStream f = new BloomTokenFilter(getFilter(filterTokens), true /* keep matching tokens */, ts);
-    validateTokens(expectedKeepTokens, f);
-    ts.end();
-    ts.close();
-  }
-  
-  /** shingles, keep those matching whitelist */
-  @Test
-  public void testShingleFilteredAnalyzer() throws IOException {
-    Reader reader = new StringReader(input);
-    Analyzer analyzer = new WhitespaceAnalyzer();
-    TokenStream ts = analyzer.tokenStream(null, reader);
-    ts.reset();
-    ShingleFilter sf = new ShingleFilter(ts, 3);
-    TokenStream f = new BloomTokenFilter(getFilter(shingleKeepTokens),  true, sf);
-    validateTokens(expectedShingleTokens, f);
-    ts.end();
-    ts.close();
-  }
-  
-  private static void setKey(Key k, String s) throws IOException {
-    ByteBuffer buffer = encoder.encode(CharBuffer.wrap(s.toCharArray()));
-    k.set(buffer.array(), 1.0);
-  }
-  
-  private static void validateTokens(String[] expected, TokenStream ts) throws IOException {
-    int pos = 0;
-    while (ts.incrementToken()) {
-      assertTrue("Analyzer produced too many tokens", pos <= expected.length);
-      CharTermAttribute termAttr = ts.getAttribute(CharTermAttribute.class);
-      assertEquals("Unexpected term", expected[pos++], termAttr.toString());
-    }
-    assertEquals("Analyzer produced too few terms", expected.length, pos);
-  }
-
-  private static Filter getFilter(String[] tokens) throws IOException {
-    Filter filter = new BloomFilter(100,50, Hash.JENKINS_HASH);
-    Key k = new Key();
-    for (String s: tokens) {
-      setKey(k,s);
-      filter.add(k);
-    }
-    return filter;
-  }
-  
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/integration/src/test/java/org/apache/mahout/utils/regex/RegexMapperTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/regex/RegexMapperTest.java b/integration/src/test/java/org/apache/mahout/utils/regex/RegexMapperTest.java
deleted file mode 100644
index 8ab643b..0000000
--- a/integration/src/test/java/org/apache/mahout/utils/regex/RegexMapperTest.java
+++ /dev/null
@@ -1,104 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.utils.regex;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.mahout.common.DummyRecordWriter;
-import org.apache.mahout.common.MahoutTestCase;
-import org.junit.Test;
-
-import java.util.List;
-
-public final class RegexMapperTest extends MahoutTestCase {
-
-  @Test
-  public void testRegex() throws Exception {
-    RegexMapper mapper = new RegexMapper();
-    Configuration conf = getConfiguration();
-    conf.set(RegexMapper.REGEX, "(?<=(\\?|&)q=).*?(?=&|$)");
-    conf.set(RegexMapper.TRANSFORMER_CLASS, URLDecodeTransformer.class.getName());
-    DummyRecordWriter<LongWritable, Text> mapWriter = new DummyRecordWriter<>();
-    Mapper<LongWritable, Text, LongWritable, Text>.Context mapContext = DummyRecordWriter
-            .build(mapper, conf, mapWriter);
-
-    mapper.setup(mapContext);
-    for (int i = 0; i < RegexUtilsTest.TEST_STRS.length; i++) {
-      String testStr = RegexUtilsTest.TEST_STRS[i];
-
-      LongWritable key = new LongWritable(i);
-      mapper.map(key, new Text(testStr), mapContext);
-      List<Text> value = mapWriter.getValue(key);
-      if (!RegexUtilsTest.GOLD[i].isEmpty()) {
-        assertEquals(1, value.size());
-        assertEquals(RegexUtilsTest.GOLD[i], value.get(0).toString());
-      }
-    }
-  }
-
-  @Test
-  public void testGroups() throws Exception {
-    RegexMapper mapper = new RegexMapper();
-    Configuration conf = getConfiguration();
-    conf.set(RegexMapper.REGEX, "(\\d+)\\.(\\d+)\\.(\\d+)");
-    conf.set(RegexMapper.TRANSFORMER_CLASS, URLDecodeTransformer.class.getName());
-    conf.setStrings(RegexMapper.GROUP_MATCHERS, "1", "3");
-    DummyRecordWriter<LongWritable, Text> mapWriter = new DummyRecordWriter<>();
-    Mapper<LongWritable, Text, LongWritable, Text>.Context mapContext = DummyRecordWriter
-            .build(mapper, conf, mapWriter);
-
-    mapper.setup(mapContext);
-    for (int i = 0; i < RegexUtilsTest.TEST_STRS.length; i++) {
-      String testStr = RegexUtilsTest.TEST_STRS[i];
-
-      LongWritable key = new LongWritable(i);
-      mapper.map(key, new Text(testStr), mapContext);
-      List<Text> value = mapWriter.getValue(key);
-      assertEquals(1, value.size());
-      assertEquals("127 0", value.get(0).toString());
-    }
-  }
-
-  @Test
-  public void testFPGFormatter() throws Exception {
-    RegexMapper mapper = new RegexMapper();
-    Configuration conf = getConfiguration();
-    conf.set(RegexMapper.REGEX, "(?<=(\\?|&)q=).*?(?=&|$)");
-    conf.set(RegexMapper.TRANSFORMER_CLASS, URLDecodeTransformer.class.getName());
-    conf.set(RegexMapper.FORMATTER_CLASS, FPGFormatter.class.getName());
-    DummyRecordWriter<LongWritable, Text> mapWriter = new DummyRecordWriter<>();
-    Mapper<LongWritable, Text, LongWritable, Text>.Context mapContext = DummyRecordWriter
-            .build(mapper, conf, mapWriter);
-
-    mapper.setup(mapContext);
-    RegexFormatter formatter = new FPGFormatter();
-    for (int i = 0; i < RegexUtilsTest.TEST_STRS.length; i++) {
-      String testStr = RegexUtilsTest.TEST_STRS[i];
-
-      LongWritable key = new LongWritable(i);
-      mapper.map(key, new Text(testStr), mapContext);
-      List<Text> value = mapWriter.getValue(key);
-      if (!RegexUtilsTest.GOLD[i].isEmpty()) {
-        assertEquals(1, value.size());
-        assertEquals(formatter.format(RegexUtilsTest.GOLD[i]), value.get(0).toString());
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/integration/src/test/java/org/apache/mahout/utils/regex/RegexUtilsTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/regex/RegexUtilsTest.java b/integration/src/test/java/org/apache/mahout/utils/regex/RegexUtilsTest.java
deleted file mode 100644
index 8ae10a5..0000000
--- a/integration/src/test/java/org/apache/mahout/utils/regex/RegexUtilsTest.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.utils.regex;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.regex.Pattern;
-
-import org.apache.mahout.common.MahoutTestCase;
-import org.junit.Test;
-
-public final class RegexUtilsTest extends MahoutTestCase {
-
-  static final String[] TEST_STRS = {
-          "127.0.0.1 -  -  [01/10/2011:00:01:51 +0000] \"GET /solr/collection1/browse?q=foo&rows=10&wt=json&hl=true&hl.fl=body&hl.fl=content",
-          "127.0.0.1 -  -  [01/10/2011:00:20:58 +0000] \"GET /solr/collection1/browse?q=Using+Solr+Search+RDBMS&fq=%7B%21tag%3Dsource%7D%28%28source%3Alucid+AND+lucid_facet%3A%28site%29%29%29&rows=10",
-          "127.0.0.1 -  -  [01/10/2011:00:21:21 +0000] \"GET /solr/collection1/browse?q=language+detection&start=560&rows=10 HTTP/1.1\" 200 45071",
-          "127.0.0.1 -  -  [01/10/2011:00:21:21 +0000] \"GET /solr/collection1/browse?q=&start=560&rows=10 HTTP/1.1\" 200 45071"
-  };
-  static final String[] GOLD = {"foo", "Using Solr Search RDBMS", "language detection", ""};
-
-  @Test
-  public void testExtract() throws Exception {
-    Pattern pattern = Pattern.compile("(?<=(\\?|&)q=).*?(?=&|$)");
-    String line = "127.0.0.1 -  -  [24/05/2010:01:19:22 +0000] \"GET /solr/select?q=import statement&start=1 HTTP/1.1\" 200 37571";
-    String res = RegexUtils.extract(line, pattern, Collections.<Integer>emptyList(), " ", RegexUtils.IDENTITY_TRANSFORMER);
-    assertEquals(res, "import statement", res);
-
-    for (int i = 0; i < TEST_STRS.length; i++) {
-      String testStr = TEST_STRS[i];
-      res = RegexUtils.extract(testStr, pattern, Collections.<Integer>emptyList(), " ", new URLDecodeTransformer());
-      assertEquals(GOLD[i], res);
-    }
-
-    pattern = Pattern.compile("((?<=(\\?|&)q=)(.*?)(?=(&|$))|(?<=((\\?|&)start=))(\\d+))");
-    res = RegexUtils.extract(line, pattern, Collections.<Integer>emptyList(), " ", RegexUtils.IDENTITY_TRANSFORMER);
-    assertEquals(res, "import statement 1", res);
-
-    pattern = Pattern.compile("(start=1) HTTP");
-    Collection<Integer> groupsToKeep = new ArrayList<>();
-    groupsToKeep.add(1);
-    res = RegexUtils.extract(line, pattern, groupsToKeep, " ", RegexUtils.IDENTITY_TRANSFORMER);
-    assertEquals(res, "start=1", res);
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/integration/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java b/integration/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java
deleted file mode 100644
index 2ddce14..0000000
--- a/integration/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.utils.vectors;
-
-import java.util.Iterator;
-import java.util.Random;
-
-import com.google.common.base.Function;
-import com.google.common.collect.Iterators;
-import org.apache.mahout.common.RandomUtils;
-import org.apache.mahout.common.iterator.CountingIterator;
-import org.apache.mahout.math.DenseVector;
-import org.apache.mahout.math.RandomAccessSparseVector;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.function.DoubleFunction;
-
-public final class RandomVectorIterable implements Iterable<Vector> {
-
-  public enum VectorType {DENSE, SPARSE}
-
-  private final int numItems;
-  private final VectorType type;
-  
-  public RandomVectorIterable() {
-    this(100, VectorType.SPARSE);
-  }
-  
-  public RandomVectorIterable(int numItems) {
-    this(numItems, VectorType.SPARSE);
-  }
-  
-  public RandomVectorIterable(int numItems, VectorType type) {
-    this.numItems = numItems;
-    this.type = type;
-  }
-  
-  @Override
-  public Iterator<Vector> iterator() {
-    return Iterators.transform(
-        new CountingIterator(numItems),
-        new Function<Integer, Vector>() {
-          private final Random random = RandomUtils.getRandom();
-          @Override
-          public Vector apply(Integer dummy) {
-            Vector result =
-                type == VectorType.SPARSE ? new RandomAccessSparseVector(numItems) : new DenseVector(numItems);
-            result.assign(new DoubleFunction() {
-              @Override
-              public double apply(double ignored) {
-                return random.nextDouble();
-              }
-            });
-            return result;
-          }
-        });
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/integration/src/test/java/org/apache/mahout/utils/vectors/VectorHelperTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/vectors/VectorHelperTest.java b/integration/src/test/java/org/apache/mahout/utils/vectors/VectorHelperTest.java
deleted file mode 100644
index c55fd8d..0000000
--- a/integration/src/test/java/org/apache/mahout/utils/vectors/VectorHelperTest.java
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.utils.vectors;
-
-import java.util.Random;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Text;
-import org.apache.mahout.common.MahoutTestCase;
-import org.apache.mahout.common.RandomUtils;
-import org.apache.mahout.math.SequentialAccessSparseVector;
-import org.apache.mahout.math.Vector;
-import org.junit.Before;
-import org.junit.Test;
-
-public final class VectorHelperTest extends MahoutTestCase {
-
-  private static final int NUM_DOCS = 100;
-
-  private Path inputPathOne;
-  private Path inputPathTwo;
-
-  private Configuration conf;
-
-  @Override
-  @Before
-  public void setUp() throws Exception {
-    super.setUp();
-    conf = getConfiguration();
-
-    inputPathOne = getTestTempFilePath("documents/docs-one.file");
-    FileSystem fs = FileSystem.get(inputPathOne.toUri(), conf);
-    try (SequenceFile.Writer writer =
-             new SequenceFile.Writer(fs, conf, inputPathOne, Text.class, IntWritable.class)) {
-      Random rd = RandomUtils.getRandom();
-      for (int i = 0; i < NUM_DOCS; i++) {
-        // Make all indices higher than dictionary size
-        writer.append(new Text("Document::ID::" + i), new IntWritable(NUM_DOCS + rd.nextInt(NUM_DOCS)));
-      }
-    }
-
-    inputPathTwo = getTestTempFilePath("documents/docs-two.file");
-    fs = FileSystem.get(inputPathTwo.toUri(), conf);
-    try (SequenceFile.Writer writer =
-             new SequenceFile.Writer(fs, conf, inputPathTwo, Text.class, IntWritable.class)) {
-      Random rd = RandomUtils.getRandom();
-      for (int i = 0; i < NUM_DOCS; i++) {
-        // Keep indices within number of documents
-        writer.append(new Text("Document::ID::" + i), new IntWritable(rd.nextInt(NUM_DOCS)));
-      }
-    }
-  }
-
-  @Test
-  public void testJsonFormatting() throws Exception {
-    Vector v = new SequentialAccessSparseVector(10);
-    v.set(2, 3.1);
-    v.set(4, 1.0);
-    v.set(6, 8.1);
-    v.set(7, -100);
-    v.set(9, 12.2);
-    String UNUSED = "UNUSED";
-    String[] dictionary = {
-        UNUSED, UNUSED, "two", UNUSED, "four", UNUSED, "six", "seven", UNUSED, "nine"
-    };
-
-    assertEquals("sorted json form incorrect: ", "{nine:12.2,six:8.1,two:3.1}",
-        VectorHelper.vectorToJson(v, dictionary, 3, true));
-    assertEquals("unsorted form incorrect: ", "{two:3.1,four:1.0}",
-        VectorHelper.vectorToJson(v, dictionary, 2, false));
-    assertEquals("sorted json form incorrect: ", "{nine:12.2,six:8.1,two:3.1,four:1.0}",
-        VectorHelper.vectorToJson(v, dictionary, 4, true));
-    assertEquals("sorted json form incorrect: ", "{nine:12.2,six:8.1,two:3.1,four:1.0,seven:-100.0}",
-        VectorHelper.vectorToJson(v, dictionary, 5, true));
-    assertEquals("sorted json form incorrect: ", "{nine:12.2,six:8.1}",
-        VectorHelper.vectorToJson(v, dictionary, 2, true));
-    assertEquals("unsorted form incorrect: ", "{two:3.1,four:1.0}",
-        VectorHelper.vectorToJson(v, dictionary, 2, false));
-  }
-
-  @Test
-  public void testTopEntries() throws Exception {
-    Vector v = new SequentialAccessSparseVector(10);
-    v.set(2, 3.1);
-    v.set(4, 1.0);
-    v.set(6, 8.1);
-    v.set(7, -100);
-    v.set(9, 12.2);
-    v.set(1, 0.0);
-    v.set(3, 0.0);
-    v.set(8, 2.7);
-    // check if sizeOFNonZeroElementsInVector = maxEntries
-    assertEquals(6, VectorHelper.topEntries(v, 6).size());
-    // check if sizeOfNonZeroElementsInVector < maxEntries
-    assertTrue(VectorHelper.topEntries(v, 9).size() < 9);
-    // check if sizeOfNonZeroElementsInVector > maxEntries
-    assertTrue(VectorHelper.topEntries(v, 5).size() < v.getNumNonZeroElements());
-  }
-
-  @Test
-  public void testTopEntriesWhenAllZeros() throws Exception {
-    Vector v = new SequentialAccessSparseVector(10);
-    v.set(2, 0.0);
-    v.set(4, 0.0);
-    v.set(6, 0.0);
-    v.set(7, 0);
-    v.set(9, 0.0);
-    v.set(1, 0.0);
-    v.set(3, 0.0);
-    v.set(8, 0.0);
-    assertEquals(0, VectorHelper.topEntries(v, 6).size());
-  }
-
-  @Test
-  public void testLoadTermDictionary() throws Exception {
-    // With indices higher than dictionary size
-    VectorHelper.loadTermDictionary(conf, inputPathOne.toString());
-    // With dictionary size higher than indices
-    VectorHelper.loadTermDictionary(conf, inputPathTwo.toString());
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFTypeTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFTypeTest.java b/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFTypeTest.java
deleted file mode 100644
index 2ea8b89..0000000
--- a/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFTypeTest.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright 2013 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.utils.vectors.arff;
-
-import org.apache.mahout.common.MahoutTestCase;
-import org.junit.Test;
-
-public final class ARFFTypeTest extends MahoutTestCase {
-
-  @Test
-  public void removeQuotes() {
-    assertNull(ARFFType.removeQuotes(null));
-    assertEquals("", ARFFType.removeQuotes("\"\""));
-    assertEquals("", ARFFType.removeQuotes("''"));
-    assertEquals("", ARFFType.removeQuotes(""));
-    assertEquals("", ARFFType.removeQuotes("  "));
-    assertEquals("single", ARFFType.removeQuotes("'single'"));
-    assertEquals("double", ARFFType.removeQuotes("\"double\""));
-    assertEquals("trim", ARFFType.removeQuotes(" trim "));
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java b/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java
deleted file mode 100644
index 4c7f17a..0000000
--- a/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java
+++ /dev/null
@@ -1,289 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.utils.vectors.arff;
-
-import java.io.IOException;
-import java.text.DateFormat;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-import java.util.Iterator;
-import java.util.Locale;
-import java.util.Map;
-
-import com.google.common.io.Resources;
-import org.apache.commons.io.Charsets;
-import org.apache.mahout.common.MahoutTestCase;
-import org.apache.mahout.math.DenseVector;
-import org.apache.mahout.math.RandomAccessSparseVector;
-import org.apache.mahout.math.Vector;
-import org.junit.Test;
-
-public final class ARFFVectorIterableTest extends MahoutTestCase {
-
-  @Test
-  public void testValues() throws Exception {
-    ARFFVectorIterable iterable = readModelFromResource("sample.arff");
-
-    assertEquals("Mahout", iterable.getModel().getRelation());
-    Map<String, Integer> bindings = iterable.getModel().getLabelBindings();
-    assertNotNull(bindings);
-    assertEquals(5, bindings.size());
-    Iterator<Vector> iter = iterable.iterator();
-    assertTrue(iter.hasNext());
-    Vector next = iter.next();
-    assertNotNull(next);
-    assertTrue("Wrong instanceof", next instanceof DenseVector);
-    assertEquals(1.0, next.get(0), EPSILON);
-    assertEquals(2.0, next.get(1), EPSILON);
-    assertTrue(iter.hasNext());
-    next = iter.next();
-    assertNotNull(next);
-    assertTrue("Wrong instanceof", next instanceof DenseVector);
-    assertEquals(2.0, next.get(0), EPSILON);
-    assertEquals(3.0, next.get(1), EPSILON);
-
-    assertTrue(iter.hasNext());
-    next = iter.next();
-    assertNotNull(next);
-    assertTrue("Wrong instanceof", next instanceof RandomAccessSparseVector);
-    assertEquals(5.0, next.get(0), EPSILON);
-    assertEquals(23.0, next.get(1), EPSILON);
-
-    assertFalse(iter.hasNext());
-  }
-
-  @Test
-  public void testDense() throws Exception {
-    Iterable<Vector> iterable = readModelFromResource("sample-dense.arff");
-    Vector firstVector = iterable.iterator().next();
-    assertEquals(1.0, firstVector.get(0), 0);
-    assertEquals(65.0, firstVector.get(1), 0);
-    assertEquals(1.0, firstVector.get(3), 0);
-    assertEquals(1.0, firstVector.get(4), 0);
-
-    int count = 0;
-    for (Vector vector : iterable) {
-      assertTrue("Vector is not dense", vector instanceof DenseVector);
-      count++;
-    }
-    assertEquals(5, count);
-  }
-
-  @Test
-  public void testSparse() throws Exception {
-    Iterable<Vector> iterable = readModelFromResource("sample-sparse.arff");
-
-    Vector firstVector = iterable.iterator().next();
-    assertEquals(23.1, firstVector.get(1), 0);
-    assertEquals(3.23, firstVector.get(2), 0);
-    assertEquals(1.2, firstVector.get(3), 0);
-
-    int count = 0;
-    for (Vector vector : iterable) {
-      assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector);
-      count++;
-    }
-    assertEquals(9, count);
-  }
-
-  @Test
-  public void testNonNumeric() throws Exception {
-    MapBackedARFFModel model = new MapBackedARFFModel();
-    ARFFVectorIterable iterable = getVectors("non-numeric-1.arff", model);
-    int count = 0;
-    for (Vector vector : iterable) {
-      assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector);
-      count++;
-    }
-
-    iterable = getVectors("non-numeric-1.arff", model);
-    Iterator<Vector> iter = iterable.iterator();
-    Vector firstVector = iter.next();
-
-    assertEquals(1.0, firstVector.get(2), 0);
-
-    assertEquals(10, count);
-    Map<String, Map<String, Integer>> nominalMap = iterable.getModel().getNominalMap();
-    assertNotNull(nominalMap);
-    assertEquals(1, nominalMap.size());
-    Map<String, Integer> noms = nominalMap.get("bar");
-    assertNotNull("nominals for bar are null", noms);
-    assertEquals(5, noms.size());
-    Map<Integer, ARFFType> integerARFFTypeMap = model.getTypeMap();
-    assertNotNull("Type map null", integerARFFTypeMap);
-    assertEquals(5, integerARFFTypeMap.size());
-    Map<String, Long> words = model.getWords();
-    assertNotNull("words null", words);
-    assertEquals(10, words.size());
-    Map<Integer, DateFormat> integerDateFormatMap = model.getDateMap();
-    assertNotNull("date format null", integerDateFormatMap);
-    assertEquals(1, integerDateFormatMap.size());
-  }
-
-  @Test
-  public void testDate() throws Exception {
-    ARFFVectorIterable iterable = readModelFromResource("date.arff");
-    Iterator<Vector> iter = iterable.iterator();
-    Vector firstVector = iter.next();
-
-    DateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ENGLISH);
-    Date date = format.parse("2001-07-04T12:08:56");
-    long result = date.getTime();
-    assertEquals(result, firstVector.get(1), 0);
-
-    format = new SimpleDateFormat("yyyy.MM.dd G 'at' HH:mm:ss z", Locale.ENGLISH);
-    date = format.parse("2001.07.04 AD at 12:08:56 PDT");
-    result = date.getTime();
-    assertEquals(result, firstVector.get(2), 0);
-
-    format = new SimpleDateFormat("EEE, MMM d, ''yy", Locale.ENGLISH);
-    date = format.parse("Wed, Jul 4, '01,4 0:08 PM, PDT");
-    result = date.getTime();
-    assertEquals(result, firstVector.get(3), 0);
-
-    format = new SimpleDateFormat("K:mm a, z", Locale.ENGLISH);
-    date = format.parse("0:08 PM, PDT");
-    result = date.getTime();
-    assertEquals(result, firstVector.get(4), 0);
-
-    format = new SimpleDateFormat("yyyyy.MMMMM.dd GGG hh:mm aaa", Locale.ENGLISH);
-    date = format.parse("02001.July.04 AD 12:08 PM");
-    result = date.getTime();
-    assertEquals(result, firstVector.get(5), 0);
-
-    format = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss Z", Locale.ENGLISH);
-    date = format.parse("Wed, 4 Jul 2001 12:08:56 -0700");
-    result = date.getTime();
-    assertEquals(result, firstVector.get(6), 0);
-
-  }
-
-  @Test
-  public void testMultipleNoms() throws Exception {
-    MapBackedARFFModel model = new MapBackedARFFModel();
-    ARFFVectorIterable iterable = getVectors("non-numeric-1.arff", model);
-    int count = 0;
-    for (Vector vector : iterable) {
-      assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector);
-      count++;
-    }
-    assertEquals(10, count);
-    Map<String,Map<String,Integer>> nominalMap = iterable.getModel().getNominalMap();
-    assertNotNull(nominalMap);
-    assertEquals(1, nominalMap.size());
-    Map<String,Integer> noms = nominalMap.get("bar");
-    assertNotNull("nominals for bar are null", noms);
-    assertEquals(5, noms.size());
-    Map<Integer,ARFFType> integerARFFTypeMap = model.getTypeMap();
-    assertNotNull("Type map null", integerARFFTypeMap);
-    assertEquals(5, integerARFFTypeMap.size());
-    Map<String,Long> words = model.getWords();
-    assertNotNull("words null", words);
-    assertEquals(10, words.size());
-
-    Map<Integer,DateFormat> integerDateFormatMap = model.getDateMap();
-    assertNotNull("date format null", integerDateFormatMap);
-    assertEquals(1, integerDateFormatMap.size());
-
-
-    iterable = getVectors("non-numeric-2.arff", model);
-    count = 0;
-    for (Vector vector : iterable) {
-      assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector);
-      count++;
-    }
-    nominalMap = model.getNominalMap();
-    assertNotNull(nominalMap);
-    assertEquals(2, nominalMap.size());
-    noms = nominalMap.get("test");
-    assertNotNull("nominals for bar are null", noms);
-    assertEquals(2, noms.size());
-  }
-
-  @Test
-  public void testNumerics() throws Exception {
-    String arff = "@RELATION numerics\n"
-      + "@ATTRIBUTE theNumeric NUMERIC\n"
-      + "@ATTRIBUTE theInteger INTEGER\n"
-      + "@ATTRIBUTE theReal REAL\n"
-      + "@DATA\n"
-      + "1.0,2,3.0";
-    ARFFModel model = new MapBackedARFFModel();
-    ARFFVectorIterable iterable = new ARFFVectorIterable(arff, model);
-    model = iterable.getModel();
-    assertNotNull(model);
-    assertEquals(3, model.getLabelSize());
-    assertEquals(ARFFType.NUMERIC, model.getARFFType(0));
-    assertEquals(ARFFType.INTEGER, model.getARFFType(1));
-    assertEquals(ARFFType.REAL, model.getARFFType(2));
-    Iterator<Vector> it = iterable.iterator();
-    Vector vector = it.next();
-    assertEquals(1.0, vector.get(0), EPSILON);
-    assertEquals(2.0, vector.get(1), EPSILON);
-    assertEquals(3.0, vector.get(2), EPSILON);
-  }
-
-  @Test
-  public void testQuotes() throws Exception {
-    // ARFF allows quotes on identifiers
-    ARFFModel model = new MapBackedARFFModel();
-    ARFFVectorIterable iterable = getVectors("quoted-id.arff", model);
-    model = iterable.getModel();
-    assertNotNull(model);
-    assertEquals("quotes", model.getRelation());
-
-    // check attribute labels
-    assertEquals(4, model.getLabelSize());
-    assertEquals(ARFFType.NUMERIC, model.getARFFType(0));
-    assertEquals(ARFFType.INTEGER, model.getARFFType(1));
-    assertEquals(ARFFType.REAL, model.getARFFType(2));
-    assertEquals(ARFFType.NOMINAL, model.getARFFType(3));
-
-    Map<String, Integer> labelBindings = model.getLabelBindings();
-    assertTrue(labelBindings.keySet().contains("thenumeric"));
-    assertTrue(labelBindings.keySet().contains("theinteger"));
-    assertTrue(labelBindings.keySet().contains("thereal"));
-    assertTrue(labelBindings.keySet().contains("thenominal"));
-
-    // check nominal values
-    Map<String, Integer> nominalMap = model.getNominalMap().get("thenominal");
-    assertNotNull(nominalMap);
-    assertEquals(3, nominalMap.size());
-    assertTrue(nominalMap.keySet().contains("double-quote"));
-    assertTrue(nominalMap.keySet().contains("single-quote"));
-    assertTrue(nominalMap.keySet().contains("no-quote"));
-
-    // check data values
-    Iterator<Vector> it = iterable.iterator();
-    Vector vector = it.next();
-    assertEquals(nominalMap.get("no-quote"), vector.get(3), EPSILON);
-    assertEquals(nominalMap.get("single-quote"), it.next().get(3), EPSILON);
-    assertEquals(nominalMap.get("double-quote"), it.next().get(3), EPSILON);
-  }
-
-  static ARFFVectorIterable getVectors(String resourceName, ARFFModel model) throws IOException {
-    String sample = Resources.toString(Resources.getResource(resourceName), Charsets.UTF_8);
-    return new ARFFVectorIterable(sample, model);
-  }
-
-  private static ARFFVectorIterable readModelFromResource(String resourceName) throws IOException {
-    ARFFModel model = new MapBackedARFFModel();
-    return getVectors(resourceName, model);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/integration/src/test/java/org/apache/mahout/utils/vectors/arff/DriverTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/vectors/arff/DriverTest.java b/integration/src/test/java/org/apache/mahout/utils/vectors/arff/DriverTest.java
deleted file mode 100644
index 7e7623e..0000000
--- a/integration/src/test/java/org/apache/mahout/utils/vectors/arff/DriverTest.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright 2013 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.utils.vectors.arff;
-
-import java.io.IOException;
-import java.io.StringWriter;
-
-import com.google.common.io.Resources;
-import org.apache.commons.io.Charsets;
-import org.apache.mahout.common.MahoutTestCase;
-import org.junit.Test;
-
-/**
- * Test case for {@link Driver}
- */
-public class DriverTest extends MahoutTestCase {
-
-  @Test
-  public void dictionary() throws IOException {
-
-    ARFFModel model = new MapBackedARFFModel();
-    ARFFVectorIterableTest.getVectors("sample-dense.arff", model);
-    StringWriter writer = new StringWriter();
-    Driver.writeLabelBindings(writer, model, ",");
-    String expected1 = Resources.toString(Resources.getResource("expected-arff-dictionary.csv"), Charsets.UTF_8);
-    String expected2 = Resources.toString(Resources.getResource("expected-arff-dictionary-2.csv"), Charsets.UTF_8);
-    assertTrue(expected1.equals(writer.toString()) || expected2.equals(writer.toString()));
-  }
-
-
-  @Test
-  public void dictionaryJSON() throws IOException {
-    ARFFModel model = new MapBackedARFFModel();
-    ARFFVectorIterableTest.getVectors("sample-dense.arff", model);
-    StringWriter writer = new StringWriter();
-    Driver.writeLabelBindingsJSON(writer, model);
-    String expected1 = Resources.toString(Resources.getResource("expected-arff-schema.json"), Charsets.UTF_8);
-    String expected2 = Resources.toString(Resources.getResource("expected-arff-schema-2.json"), Charsets.UTF_8);
-    assertTrue(expected1.equals(writer.toString()) || expected2.equals(writer.toString()));
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/integration/src/test/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModelTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModelTest.java b/integration/src/test/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModelTest.java
deleted file mode 100644
index 2867640..0000000
--- a/integration/src/test/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModelTest.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright 2013 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.utils.vectors.arff;
-
-import org.apache.mahout.common.MahoutTestCase;
-import org.junit.Test;
-
-import java.util.Map;
-
-public class MapBackedARFFModelTest extends MahoutTestCase {
-
-  @Test
-  public void processNominal() {
-    String windy = "windy";
-    String breezy = "breezy";
-
-    ARFFModel model = new MapBackedARFFModel();
-    model.addNominal(windy, breezy, 77);
-    model.addNominal(windy, "strong", 23);
-    model.addNominal(windy, "nuking", 55);
-    Map<String, Map<String, Integer>> nominalMap = model.getNominalMap();
-
-    assertEquals(1, nominalMap.size());
-    Map<String, Integer> windyValues = nominalMap.get(windy);
-    assertEquals(77, windyValues.get(breezy).intValue());
-  }
-
-  @Test
-  public void processBadNumeric() {
-    ARFFModel model = new MapBackedARFFModel();
-    model.addLabel("b1shkt70694difsmmmdv0ikmoh", 77);
-    model.addType(77, ARFFType.REAL);
-    assertTrue(Double.isNaN(model.getValue("b1shkt70694difsmmmdv0ikmoh", 77)));
-  }
-
-  @Test
-  public void processGoodNumeric() {
-    ARFFModel model = new MapBackedARFFModel();
-    model.addLabel("1234", 77);
-    model.addType(77, ARFFType.INTEGER);
-    assertTrue(1234 == model.getValue("1234", 77));
-    model.addLabel("131.34", 78);
-    model.addType(78, ARFFType.REAL);
-    assertTrue(131.34 == model.getValue("131.34", 78));
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/integration/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIteratorTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIteratorTest.java b/integration/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIteratorTest.java
deleted file mode 100644
index e76cf70..0000000
--- a/integration/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIteratorTest.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.utils.vectors.csv;
-
-import java.io.IOException;
-import java.io.StringReader;
-import java.io.StringWriter;
-import java.util.Iterator;
-
-import org.apache.mahout.common.MahoutTestCase;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.utils.vectors.RandomVectorIterable;
-import org.apache.mahout.utils.vectors.VectorHelper;
-import org.apache.mahout.utils.vectors.io.TextualVectorWriter;
-import org.junit.Test;
-
-public class CSVVectorIteratorTest extends MahoutTestCase {
-
-  @Test
-  public void testCount() throws Exception {
-
-    StringWriter sWriter = new StringWriter();
-    try (TextualVectorWriter writer = new TextualVectorWriter(sWriter) {
-      @Override
-      public void write(Vector vector) throws IOException {
-        String vecStr = VectorHelper.vectorToCSVString(vector, false);
-        getWriter().write(vecStr);
-      }
-    }) {
-      Iterable<Vector> iter = new RandomVectorIterable(50);
-      writer.write(iter);
-    }
-
-    Iterator<Vector> csvIter = new CSVVectorIterator(new StringReader(sWriter.getBuffer().toString()));
-    int count = 0;
-    while (csvIter.hasNext()) {
-      csvIter.next();
-      count++;
-    }
-    assertEquals(50, count);
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/integration/src/test/java/org/apache/mahout/utils/vectors/io/VectorWriterTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/vectors/io/VectorWriterTest.java b/integration/src/test/java/org/apache/mahout/utils/vectors/io/VectorWriterTest.java
deleted file mode 100644
index e2f7032..0000000
--- a/integration/src/test/java/org/apache/mahout/utils/vectors/io/VectorWriterTest.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.utils.vectors.io;
-
-import java.io.StringWriter;
-import java.util.ArrayList;
-import java.util.Collection;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.mahout.common.HadoopUtil;
-import org.apache.mahout.common.MahoutTestCase;
-import org.apache.mahout.math.DenseVector;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.VectorWritable;
-import org.apache.mahout.utils.vectors.RandomVectorIterable;
-import org.junit.Test;
-
-public final class VectorWriterTest extends MahoutTestCase {
-
-  @Test
-  public void testSFVW() throws Exception {
-    Path path = getTestTempFilePath("sfvw");
-    Configuration conf = getConfiguration();
-    FileSystem fs = FileSystem.get(conf);
-    SequenceFile.Writer seqWriter = new SequenceFile.Writer(fs, conf, path, LongWritable.class, VectorWritable.class);
-    try (SequenceFileVectorWriter writer = new SequenceFileVectorWriter(seqWriter)) {
-      writer.write(new RandomVectorIterable(50));
-    }
-
-    long count = HadoopUtil.countRecords(path, conf);
-    assertEquals(50, count);
-  }
-
-  @Test
-  public void testTextOutputSize() throws Exception {
-    StringWriter strWriter = new StringWriter();
-    try (VectorWriter writer = new TextualVectorWriter(strWriter)) {
-      Collection<Vector> vectors = new ArrayList<>();
-      vectors.add(new DenseVector(new double[]{0.3, 1.5, 4.5}));
-      vectors.add(new DenseVector(new double[]{1.3, 1.5, 3.5}));
-      writer.write(vectors);
-    }
-    String buffer = strWriter.toString();
-    assertNotNull(buffer);
-    assertFalse(buffer.isEmpty());
-    
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfoTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfoTest.java b/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfoTest.java
deleted file mode 100644
index 890a14b..0000000
--- a/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfoTest.java
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.utils.vectors.lucene;
-
-
-import java.io.IOException;
-
-import com.google.common.io.Closeables;
-
-import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldType;
-import org.apache.lucene.document.StringField;
-import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.IndexOptions;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.store.RAMDirectory;
-import org.apache.mahout.common.MahoutTestCase;
-import org.junit.Before;
-import org.junit.Test;
-
-public class CachedTermInfoTest extends MahoutTestCase {
-  private RAMDirectory directory;
-  private static final String[] DOCS = {
-          "a a b b c c",
-          "a b a b a b a b",
-          "a b a",
-          "a",
-          "b",
-          "a",
-          "a"
-  };
-
-  private static final String[] DOCS2 = {
-          "d d d d",
-          "e e e e",
-          "d e d e",
-          "d",
-          "e",
-          "d",
-          "e"
-  };
-
-  @Before
-  public void before() throws IOException {
-    directory = new RAMDirectory();
-
-    FieldType fieldType = new FieldType();
-    fieldType.setStored(false);
-    fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
-    fieldType.setTokenized(true);
-    fieldType.setStoreTermVectors(false);
-    fieldType.setStoreTermVectorPositions(false);
-    fieldType.setStoreTermVectorOffsets(false);
-    fieldType.freeze();
-
-    directory = createTestIndex(fieldType, directory, 0);
-  }
-
-  @Test
-  public void test() throws Exception {
-    IndexReader reader = DirectoryReader.open(directory);
-    CachedTermInfo cti = new CachedTermInfo(reader, "content", 0, 100);
-    assertEquals(3, cti.totalTerms("content"));
-    assertNotNull(cti.getTermEntry("content", "a"));
-    assertNull(cti.getTermEntry("content", "e"));
-    //minDf
-    cti = new CachedTermInfo(reader, "content", 3, 100);
-    assertEquals(2, cti.totalTerms("content"));
-    assertNotNull(cti.getTermEntry("content", "a"));
-    assertNull(cti.getTermEntry("content", "c"));
-    //maxDFPercent, a is in 6 of 7 docs: numDocs * maxDfPercent / 100 < 6 to exclude, 85% should suffice to exclude a
-    cti = new CachedTermInfo(reader, "content", 0, 85);
-    assertEquals(2, cti.totalTerms("content"));
-    assertNotNull(cti.getTermEntry("content", "b"));
-    assertNotNull(cti.getTermEntry("content", "c"));
-    assertNull(cti.getTermEntry("content", "a"));
-
-
-  }
-
-  static RAMDirectory createTestIndex(FieldType fieldType,
-                                      RAMDirectory directory,
-                                      int startingId) throws IOException {
-    IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(new WhitespaceAnalyzer()));
-
-    try {
-      for (int i = 0; i < DOCS.length; i++) {
-        Document doc = new Document();
-        Field id = new StringField("id", "doc_" + (i + startingId), Field.Store.YES);
-        doc.add(id);
-        Field text = new Field("content", DOCS[i], fieldType);
-        doc.add(text);
-        Field text2 = new Field("content2", DOCS2[i], fieldType);
-        doc.add(text2);
-        writer.addDocument(doc);
-      }
-    } finally {
-      Closeables.close(writer, false);
-    }
-    return directory;
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/DriverTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/DriverTest.java b/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/DriverTest.java
deleted file mode 100644
index 86c8305..0000000
--- a/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/DriverTest.java
+++ /dev/null
@@ -1,136 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.utils.vectors.lucene;
-
-import com.google.common.collect.Sets;
-import com.google.common.io.Closeables;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Text;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldType;
-import org.apache.lucene.index.IndexOptions;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.SimpleFSDirectory;
-import org.apache.mahout.common.MahoutTestCase;
-import org.junit.Before;
-import org.junit.Test;
-
-import java.io.File;
-import java.io.IOException;
-import java.nio.file.Paths;
-import java.util.Set;
-
-public class DriverTest extends MahoutTestCase {
-
-  private File indexDir;
-  private File outputDir;
-  private Configuration conf;
-
-  @Before
-  @Override
-  public void setUp() throws Exception {
-    super.setUp();
-    indexDir = getTestTempDir("intermediate");
-    indexDir.delete();
-    outputDir = getTestTempDir("output");
-    outputDir.delete();
-
-    conf = getConfiguration();
-  }
-
-  private Document asDocument(String line) {
-    Document doc = new Document();
-    doc.add(new TextFieldWithTermVectors("text", line));
-    return doc;
-  }
-
-  static class TextFieldWithTermVectors extends Field {
-
-    public static final FieldType TYPE = new FieldType();
-
-    static {
-      TYPE.setOmitNorms(true);
-      TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
-      TYPE.setStored(true);
-      TYPE.setTokenized(true);
-      TYPE.setStoreTermVectors(true);
-      TYPE.freeze();
-    }
-
-    public TextFieldWithTermVectors(String name, String value) {
-      super(name, value, TYPE);
-    }
-  }
-
-  @Test
-  public void sequenceFileDictionary() throws IOException {
-
-    Directory index = new SimpleFSDirectory(Paths.get(indexDir.getAbsolutePath()));
-    Analyzer analyzer = new StandardAnalyzer();
-    IndexWriterConfig config = new IndexWriterConfig(analyzer);
-    config.setCommitOnClose(true);
-    final IndexWriter writer = new IndexWriter(index, config);
-
-    try {
-      writer.addDocument(asDocument("One Ring to rule them all"));
-      writer.addDocument(asDocument("One Ring to find them,"));
-      writer.addDocument(asDocument("One Ring to bring them all"));
-      writer.addDocument(asDocument("and in the darkness bind them"));
-    } finally {
-      writer.close();
-    }
-
-    File seqDict = new File(outputDir, "dict.seq");
-
-    Driver.main(new String[] {
-        "--dir", indexDir.getAbsolutePath(),
-        "--output", new File(outputDir, "out").getAbsolutePath(),
-        "--field", "text",
-        "--dictOut", new File(outputDir, "dict.txt").getAbsolutePath(),
-        "--seqDictOut", seqDict.getAbsolutePath(),
-    });
-
-    SequenceFile.Reader reader = null;
-    Set<String> indexTerms = Sets.newHashSet();
-    try {
-      reader = new SequenceFile.Reader(FileSystem.getLocal(conf), new Path(seqDict.getAbsolutePath()), conf);
-      Text term = new Text();
-      IntWritable termIndex = new IntWritable();
-
-      while (reader.next(term, termIndex)) {
-        indexTerms.add(term.toString());
-      }
-    } finally {
-      Closeables.close(reader, true);
-    }
-
-    Set<String> expectedIndexTerms = Sets.newHashSet("all", "bind", "bring", "darkness", "find", "one", "ring", "rule");
-
-    // should contain the same terms as expected
-    assertEquals(expectedIndexTerms.size(), Sets.union(expectedIndexTerms, indexTerms).size());
-  }
-}