You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ap...@apache.org on 2017/01/26 04:24:22 UTC
[4/5] mahout git commit: MAHOUT-1885: Inital commit of VCL bindings. closes apache/mahout#269 closes apache/mahout#261

http://git-wip-us.apache.org/repos/asf/mahout/blob/034790cc/spark/src/test/scala/org/apache/mahout/drivers/ItemSimilarityDriverSuite.scala
----------------------------------------------------------------------
diff --git a/spark/src/test/scala/org/apache/mahout/drivers/ItemSimilarityDriverSuite.scala b/spark/src/test/scala/org/apache/mahout/drivers/ItemSimilarityDriverSuite.scala
index 628d981..fc84577 100644
--- a/spark/src/test/scala/org/apache/mahout/drivers/ItemSimilarityDriverSuite.scala
+++ b/spark/src/test/scala/org/apache/mahout/drivers/ItemSimilarityDriverSuite.scala
@@ -1,832 +1,832 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.drivers
-
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{Path, FileSystem}
-import org.apache.mahout.math.indexeddataset.{BiDictionary, IndexedDataset}
-import org.apache.mahout.sparkbindings.indexeddataset.IndexedDatasetSpark
-import org.scalatest.{ConfigMap, FunSuite}
-import org.apache.mahout.sparkbindings._
-import org.apache.mahout.sparkbindings.test.DistributedSparkSuite
-import org.apache.mahout.math.drm._
-import org.apache.mahout.math.scalabindings._
-
-import scala.collection.immutable.HashMap
-
-//todo: take out, only for temp tests
-
-import org.apache.mahout.math.scalabindings._
-import RLikeOps._
-import org.apache.mahout.math.drm._
-import RLikeDrmOps._
-import scala.collection.JavaConversions._
-
-
-class ItemSimilarityDriverSuite extends FunSuite with DistributedSparkSuite {
-
-  /*
-    final val matrixLLRCoocAtAControl = dense(
-      (0.0,                0.6331745808516107, 0.0,                     0.0,                0.0),
-      (0.6331745808516107, 0.0,                0.0,                     0.0,                0.0),
-      (0.0,                0.0,                0.0,                     0.6331745808516107, 0.0),
-      (0.0,                0.0,                0.6331745808516107,      0.0,                0.0),
-      (0.0,                0.0,                0.0,                     0.0,                0.0))
-
-    final val matrixLLRCoocBtAControl = dense(
-        (1.7260924347106847, 1.7260924347106847, 1.7260924347106847, 1.7260924347106847, 0.0),
-        (0.6795961471815897, 0.6795961471815897, 0.6795961471815897, 0.6795961471815897, 0.0),
-        (0.6795961471815897, 0.6795961471815897, 0.6795961471815897, 0.6795961471815897, 0.0),
-        (1.7260924347106847, 1.7260924347106847, 1.7260924347106847, 1.7260924347106847, 0.0),
-        (0.0,                0.0,                0.6795961471815897, 0.0,                4.498681156950466))
-  */
-
-
-  final val SelfSimilairtyLines = Iterable(
-    "galaxy\tnexus:1.7260924347106847",
-    "ipad\tiphone:1.7260924347106847",
-    "nexus\tgalaxy:1.7260924347106847",
-    "iphone\tipad:1.7260924347106847",
-    "surface")
-
-  val CrossSimilarityLines = Iterable(
-    "iphone\tnexus:1.7260924347106847 iphone:1.7260924347106847 ipad:1.7260924347106847 galaxy:1.7260924347106847",
-    "ipad\tnexus:0.6795961471815897 iphone:0.6795961471815897 ipad:0.6795961471815897 galaxy:0.6795961471815897",
-    "nexus\tnexus:0.6795961471815897 iphone:0.6795961471815897 ipad:0.6795961471815897 galaxy:0.6795961471815897",
-    "galaxy\tnexus:1.7260924347106847 iphone:1.7260924347106847 ipad:1.7260924347106847 galaxy:1.7260924347106847",
-    "surface\tsurface:4.498681156950466 nexus:0.6795961471815897")
-
-  // todo: a better test would be to sort each vector by itemID and compare rows, tokens misses some error cases
-  final val SelfSimilairtyTokens = tokenize(Iterable(
-    "galaxy\tnexus:1.7260924347106847",
-    "ipad\tiphone:1.7260924347106847",
-    "nexus\tgalaxy:1.7260924347106847",
-    "iphone\tipad:1.7260924347106847",
-    "surface"))
-
-  val CrossSimilarityTokens = tokenize(Iterable(
-    "iphone\tnexus:1.7260924347106847 iphone:1.7260924347106847 ipad:1.7260924347106847 galaxy:1.7260924347106847",
-    "ipad\tnexus:0.6795961471815897 iphone:0.6795961471815897 ipad:0.6795961471815897 galaxy:0.6795961471815897",
-    "nexus\tnexus:0.6795961471815897 iphone:0.6795961471815897 ipad:0.6795961471815897 galaxy:0.6795961471815897",
-    "galaxy\tnexus:1.7260924347106847 iphone:1.7260924347106847 ipad:1.7260924347106847 galaxy:1.7260924347106847",
-    "surface\tsurface:4.498681156950466 nexus:0.6795961471815897"))
-
-  /*
-    //Clustered Spark and HDFS, not a good everyday build test
-    ItemSimilarityDriver.main(Array(
-        "--input", "hdfs://occam4:54310/user/pat/spark-itemsimilarity/cf-data.txt",
-        "--output", "hdfs://occam4:54310/user/pat/spark-itemsimilarity/similarityMatrices/",
-        "--master", "spark://occam4:7077",
-        "--filter1", "purchase",
-        "--filter2", "view",
-        "--inDelim", ",",
-        "--itemIDColumn", "2",
-        "--rowIDColumn", "0",
-        "--filterColumn", "1"))
-  */
-  // local multi-threaded Spark with HDFS using large dataset
-  // not a good build test.
-  /*
-    ItemSimilarityDriver.main(Array(
-      "--input", "hdfs://occam4:54310/user/pat/xrsj/ratings_data.txt",
-      "--output", "hdfs://occam4:54310/user/pat/xrsj/similarityMatrices/",
-      "--master", "local[4]",
-      "--filter1", "purchase",
-      "--filter2", "view",
-      "--inDelim", ",",
-      "--itemIDColumn", "2",
-      "--rowIDColumn", "0",
-      "--filterColumn", "1"))
-  */
-
-  test("ItemSimilarityDriver, non-full-spec CSV") {
-
-    val InFile = TmpDir + "in-file.csv/" //using part files, not single file
-    val OutPath = TmpDir + "similarity-matrices/"
-
-    val lines = Array(
-      "u1,purchase,iphone",
-      "u1,purchase,ipad",
-      "u2,purchase,nexus",
-      "u2,purchase,galaxy",
-      "u3,purchase,surface",
-      "u4,purchase,iphone",
-      "u4,purchase,galaxy",
-      "u1,view,iphone",
-      "u1,view,ipad",
-      "u1,view,nexus",
-      "u1,view,galaxy",
-      "u2,view,iphone",
-      "u2,view,ipad",
-      "u2,view,nexus",
-      "u2,view,galaxy",
-      "u3,view,surface",
-      "u3,view,nexus",
-      "u4,view,iphone",
-      "u4,view,ipad",
-      "u4,view,galaxy")
-
-    // this will create multiple part-xxxxx files in the InFile dir but other tests will
-    // take account of one actual file
-    val linesRdd = mahoutCtx.parallelize(lines).saveAsTextFile(InFile)
-
-    // local multi-threaded Spark with default HDFS
-    ItemSimilarityDriver.main(Array(
-      "--input", InFile,
-      "--output", OutPath,
-      "--master", masterUrl,
-      "--filter1", "purchase",
-      "--filter2", "view",
-      "--inDelim", ",",
-      "--itemIDColumn", "2",
-      "--rowIDColumn", "0",
-      "--filterColumn", "1",
-      "--writeAllDatasets"))
-
-    // todo: these comparisons rely on a sort producing the same lines, which could possibly
-    // fail since the sort is on value and these can be the same for all items in a vector
-    val similarityLines = mahoutCtx.textFile(OutPath + "/similarity-matrix/").collect.toIterable
-    tokenize(similarityLines) should contain theSameElementsAs SelfSimilairtyTokens
-    val crossSimilarityLines = mahoutCtx.textFile(OutPath + "/cross-similarity-matrix/").collect.toIterable
-    tokenize(crossSimilarityLines) should contain theSameElementsAs CrossSimilarityTokens
-  }
-
-
-
-  test("ItemSimilarityDriver TSV ") {
-
-    val InFile = TmpDir + "in-file.tsv/"
-    val OutPath = TmpDir + "similarity-matrices/"
-
-    val lines = Array(
-      "u1\tpurchase\tiphone",
-      "u1\tpurchase\tipad",
-      "u2\tpurchase\tnexus",
-      "u2\tpurchase\tgalaxy",
-      "u3\tpurchase\tsurface",
-      "u4\tpurchase\tiphone",
-      "u4\tpurchase\tgalaxy",
-      "u1\tview\tiphone",
-      "u1\tview\tipad",
-      "u1\tview\tnexus",
-      "u1\tview\tgalaxy",
-      "u2\tview\tiphone",
-      "u2\tview\tipad",
-      "u2\tview\tnexus",
-      "u2\tview\tgalaxy",
-      "u3\tview\tsurface",
-      "u3\tview\tnexus",
-      "u4\tview\tiphone",
-      "u4\tview\tipad",
-      "u4\tview\tgalaxy")
-
-    // this will create multiple part-xxxxx files in the InFile dir but other tests will
-    // take account of one actual file
-    val linesRdd = mahoutCtx.parallelize(lines).saveAsTextFile(InFile)
-
-    // local multi-threaded Spark with default HDFS
-    ItemSimilarityDriver.main(Array(
-      "--input", InFile,
-      "--output", OutPath,
-      "--master", masterUrl,
-      "--filter1", "purchase",
-      "--filter2", "view",
-      "--inDelim", "[,\t]",
-      "--itemIDColumn", "2",
-      "--rowIDColumn", "0",
-      "--filterColumn", "1"))
-
-    // todo: a better test would be to get sorted vectors and compare rows instead of tokens, this might miss
-    // some error cases
-    val similarityLines = mahoutCtx.textFile(OutPath + "/similarity-matrix/").collect.toIterable
-    tokenize(similarityLines) should contain theSameElementsAs SelfSimilairtyTokens
-    val crossSimilarityLines = mahoutCtx.textFile(OutPath + "/cross-similarity-matrix/").collect.toIterable
-    tokenize(crossSimilarityLines) should contain theSameElementsAs CrossSimilarityTokens
-
-  }
-
-  test("ItemSimilarityDriver log-ish files") {
-
-    val InFile = TmpDir + "in-file.log/"
-    val OutPath = TmpDir + "similarity-matrices/"
-
-    val lines = Array(
-      "2014-06-23 14:46:53.115\tu1\tpurchase\trandom text\tiphone",
-      "2014-06-23 14:46:53.115\tu1\tpurchase\trandom text\tipad",
-      "2014-06-23 14:46:53.115\tu2\tpurchase\trandom text\tnexus",
-      "2014-06-23 14:46:53.115\tu2\tpurchase\trandom text\tgalaxy",
-      "2014-06-23 14:46:53.115\tu3\tpurchase\trandom text\tsurface",
-      "2014-06-23 14:46:53.115\tu4\tpurchase\trandom text\tiphone",
-      "2014-06-23 14:46:53.115\tu4\tpurchase\trandom text\tgalaxy",
-      "2014-06-23 14:46:53.115\tu1\tview\trandom text\tiphone",
-      "2014-06-23 14:46:53.115\tu1\tview\trandom text\tipad",
-      "2014-06-23 14:46:53.115\tu1\tview\trandom text\tnexus",
-      "2014-06-23 14:46:53.115\tu1\tview\trandom text\tgalaxy",
-      "2014-06-23 14:46:53.115\tu2\tview\trandom text\tiphone",
-      "2014-06-23 14:46:53.115\tu2\tview\trandom text\tipad",
-      "2014-06-23 14:46:53.115\tu2\tview\trandom text\tnexus",
-      "2014-06-23 14:46:53.115\tu2\tview\trandom text\tgalaxy",
-      "2014-06-23 14:46:53.115\tu3\tview\trandom text\tsurface",
-      "2014-06-23 14:46:53.115\tu3\tview\trandom text\tnexus",
-      "2014-06-23 14:46:53.115\tu4\tview\trandom text\tiphone",
-      "2014-06-23 14:46:53.115\tu4\tview\trandom text\tipad",
-      "2014-06-23 14:46:53.115\tu4\tview\trandom text\tgalaxy")
-
-    // this will create multiple part-xxxxx files in the InFile dir but other tests will
-    // take account of one actual file
-    val linesRdd = mahoutCtx.parallelize(lines).saveAsTextFile(InFile)
-
-    // local multi-threaded Spark with default HDFS
-    ItemSimilarityDriver.main(Array(
-      "--input", InFile,
-      "--output", OutPath,
-      "--master", masterUrl,
-      "--filter1", "purchase",
-      "--filter2", "view",
-      "--inDelim", "\t",
-      "--itemIDColumn", "4",
-      "--rowIDColumn", "1",
-      "--filterColumn", "2"))
-
-
-    val similarityLines = mahoutCtx.textFile(OutPath + "/similarity-matrix/").collect.toIterable
-    tokenize(similarityLines) should contain theSameElementsAs SelfSimilairtyTokens
-    val crossSimilarityLines = mahoutCtx.textFile(OutPath + "/cross-similarity-matrix/").collect.toIterable
-    tokenize(crossSimilarityLines) should contain theSameElementsAs CrossSimilarityTokens
-
-  }
-
-  test("ItemSimilarityDriver legacy supported file format") {
-
-    val InDir = TmpDir + "in-dir/"
-    val InFilename = "in-file.tsv"
-    val InPath = InDir + InFilename
-
-    val OutPath = TmpDir + "similarity-matrices"
-
-    val lines = Array(
-      "0,0,1",
-      "0,1,1",
-      "1,2,1",
-      "1,3,1",
-      "2,4,1",
-      "3,0,1",
-      "3,3,1")
-
-    val Answer = tokenize(Iterable(
-      "0\t1:1.7260924347106847",
-      "3\t2:1.7260924347106847",
-      "1\t0:1.7260924347106847",
-      "4",
-      "2\t3:1.7260924347106847"))
-
-    // this creates one part-0000 file in the directory
-    mahoutCtx.parallelize(lines).coalesce(1, shuffle = true).saveAsTextFile(InDir)
-
-    // to change from using part files to a single .tsv file we'll need to use HDFS
-    val fs = FileSystem.get(new Configuration())
-    //rename part-00000 to something.tsv
-    fs.rename(new Path(InDir + "part-00000"), new Path(InPath))
-
-    // local multi-threaded Spark with default HDFS
-    ItemSimilarityDriver.main(Array(
-      "--input", InPath,
-      "--output", OutPath,
-      "--master", masterUrl))
-
-    val similarityLines = mahoutCtx.textFile(OutPath + "/similarity-matrix/").collect.toIterable
-    tokenize(similarityLines) should contain theSameElementsAs Answer
-
-  }
-
-  test("ItemSimilarityDriver write search engine output") {
-
-    val InDir = TmpDir + "in-dir/"
-    val InFilename = "in-file.tsv"
-    val InPath = InDir + InFilename
-
-    val OutPath = TmpDir + "similarity-matrices"
-
-    val lines = Array(
-      "0,0,1",
-      "0,1,1",
-      "1,2,1",
-      "1,3,1",
-      "2,4,1",
-      "3,0,1",
-      "3,3,1")
-
-    val Answer = tokenize(Iterable(
-      "0\t1",
-      "3\t2",
-      "1\t0",
-      "4",
-      "2\t3"))
-
-    // this creates one part-0000 file in the directory
-    mahoutCtx.parallelize(lines).coalesce(1, shuffle = true).saveAsTextFile(InDir)
-
-    // to change from using part files to a single .tsv file we'll need to use HDFS
-    val fs = FileSystem.get(new Configuration())
-    //rename part-00000 to something.tsv
-    fs.rename(new Path(InDir + "part-00000"), new Path(InPath))
-
-    // local multi-threaded Spark with default HDFS
-    ItemSimilarityDriver.main(Array(
-      "--input", InPath,
-      "--output", OutPath,
-      "--master", masterUrl,
-      "--omitStrength"))
-
-    val similarityLines = mahoutCtx.textFile(OutPath + "/similarity-matrix/").collect.toIterable
-    tokenize(similarityLines) should contain theSameElementsAs Answer
-
-  }
-
-  test("ItemSimilarityDriver recursive file discovery using filename patterns") {
-    //directory structure using the following
-    // tmp/data/m1.tsv
-    // tmp/data/more-data/another-dir/m2.tsv
-    val M1Lines = Array(
-      "u1\tpurchase\tiphone",
-      "u1\tpurchase\tipad",
-      "u2\tpurchase\tnexus",
-      "u2\tpurchase\tgalaxy",
-      "u3\tpurchase\tsurface",
-      "u4\tpurchase\tiphone",
-      "u4\tpurchase\tgalaxy")
-
-    val M2Lines = Array(
-      "u1\tview\tiphone",
-      "u1\tview\tipad",
-      "u1\tview\tnexus",
-      "u1\tview\tgalaxy",
-      "u2\tview\tiphone",
-      "u2\tview\tipad",
-      "u2\tview\tnexus",
-      "u2\tview\tgalaxy",
-      "u3\tview\tsurface",
-      "u3\tview\tnexus",
-      "u4\tview\tiphone",
-      "u4\tview\tipad",
-      "u4\tview\tgalaxy")
-
-    val InFilenameM1 = "m1.tsv"
-    val InDirM1 = TmpDir + "data/"
-    val InPathM1 = InDirM1 + InFilenameM1
-    val InFilenameM2 = "m2.tsv"
-    val InDirM2 = TmpDir + "data/more-data/another-dir/"
-    val InPathM2 = InDirM2 + InFilenameM2
-
-    val InPathStart = TmpDir + "data/"
-    val OutPath = TmpDir + "similarity-matrices"
-
-    // this creates one part-0000 file in the directory
-    mahoutCtx.parallelize(M1Lines).coalesce(1, shuffle = true).saveAsTextFile(InDirM1)
-
-    // to change from using part files to a single .tsv file we'll need to use HDFS
-    val fs = FileSystem.get(new Configuration())
-    //rename part-00000 to something.tsv
-    fs.rename(new Path(InDirM1 + "part-00000"), new Path(InPathM1))
-
-    // this creates one part-0000 file in the directory
-    mahoutCtx.parallelize(M2Lines).coalesce(1, shuffle = true).saveAsTextFile(InDirM2)
-
-    // to change from using part files to a single .tsv file we'll need to use HDFS
-    //rename part-00000 to tmp/some-location/something.tsv
-    fs.rename(new Path(InDirM2 + "part-00000"), new Path(InPathM2))
-
-    // local multi-threaded Spark with default FS, suitable for build tests but need better location for data
-
-    ItemSimilarityDriver.main(Array(
-      "--input", InPathStart,
-      "--output", OutPath,
-      "--master", masterUrl,
-      "--filter1", "purchase",
-      "--filter2", "view",
-      "--inDelim", "\t",
-      "--itemIDColumn", "2",
-      "--rowIDColumn", "0",
-      "--filterColumn", "1",
-      "--filenamePattern", "m..tsv",
-      "--recursive"))
-
-    val similarityLines = mahoutCtx.textFile(OutPath + "/similarity-matrix/").collect.toIterable
-    tokenize(similarityLines) should contain theSameElementsAs SelfSimilairtyTokens
-    val crossSimilarityLines = mahoutCtx.textFile(OutPath + "/cross-similarity-matrix/").collect.toIterable
-    tokenize(crossSimilarityLines) should contain theSameElementsAs CrossSimilarityTokens
-
-  }
-
-  test("ItemSimilarityDriver, two input paths") {
-
-    val InFile1 = TmpDir + "in-file1.csv/" //using part files, not single file
-    val InFile2 = TmpDir + "in-file2.csv/" //using part files, not single file
-    val OutPath = TmpDir + "similarity-matrices/"
-
-    val lines = Array(
-      "u1,purchase,iphone",
-      "u1,purchase,ipad",
-      "u2,purchase,nexus",
-      "u2,purchase,galaxy",
-      "u3,purchase,surface",
-      "u4,purchase,iphone",
-      "u4,purchase,galaxy",
-      "u1,view,iphone",
-      "u1,view,ipad",
-      "u1,view,nexus",
-      "u1,view,galaxy",
-      "u2,view,iphone",
-      "u2,view,ipad",
-      "u2,view,nexus",
-      "u2,view,galaxy",
-      "u3,view,surface",
-      "u3,view,nexus",
-      "u4,view,iphone",
-      "u4,view,ipad",
-      "u4,view,galaxy")
-
-    // this will create multiple part-xxxxx files in the InFile dir but other tests will
-    // take account of one actual file
-    val linesRdd1 = mahoutCtx.parallelize(lines).saveAsTextFile(InFile1)
-    val linesRdd2 = mahoutCtx.parallelize(lines).saveAsTextFile(InFile2)
-
-    // local multi-threaded Spark with default HDFS
-    ItemSimilarityDriver.main(Array(
-      "--input", InFile1,
-      "--input2", InFile2,
-      "--output", OutPath,
-      "--master", masterUrl,
-      "--filter1", "purchase",
-      "--filter2", "view",
-      "--inDelim", ",",
-      "--itemIDColumn", "2",
-      "--rowIDColumn", "0",
-      "--filterColumn", "1"))
-
-    val similarityLines = mahoutCtx.textFile(OutPath + "/similarity-matrix/").collect.toIterable
-    tokenize(similarityLines) should contain theSameElementsAs SelfSimilairtyTokens
-    val crossSimilarityLines = mahoutCtx.textFile(OutPath + "/cross-similarity-matrix/").collect.toIterable
-    tokenize(crossSimilarityLines) should contain theSameElementsAs CrossSimilarityTokens
-
-  }
-
-  test("ItemSimilarityDriver, two inputs of different dimensions") {
-
-    val InFile1 = TmpDir + "in-file1.csv/" //using part files, not single file
-    val InFile2 = TmpDir + "in-file2.csv/" //using part files, not single file
-    val OutPath = TmpDir + "similarity-matrices/"
-
-    val lines = Array(
-      "u1,purchase,iphone",
-      "u1,purchase,ipad",
-      "u2,purchase,nexus",
-      "u2,purchase,galaxy",
-      // remove one user so A'B will be of different dimensions
-      // ItemSimilarityDriver should create one unified user dictionary and so account for this
-      // discrepancy as a blank row: "u3,purchase,surface",
-      "u4,purchase,iphone",
-      "u4,purchase,galaxy",
-      "u1,view,iphone",
-      "u1,view,ipad",
-      "u1,view,nexus",
-      "u1,view,galaxy",
-      "u2,view,iphone",
-      "u2,view,ipad",
-      "u2,view,nexus",
-      "u2,view,galaxy",
-      "u3,view,surface",
-      "u3,view,nexus",
-      "u4,view,iphone",
-      "u4,view,ipad",
-      "u4,view,galaxy")
-
-    val UnequalDimensionsSelfSimilarity = tokenize(Iterable(
-      "ipad\tiphone:1.7260924347106847",
-      "iphone\tipad:1.7260924347106847",
-      "nexus\tgalaxy:1.7260924347106847",
-      "galaxy\tnexus:1.7260924347106847"))
-
-    //only surface purchase was removed so no cross-similarity for surface
-    val UnequalDimensionsCrossSimilarity = tokenize(Iterable(
-      "galaxy\tgalaxy:1.7260924347106847 iphone:1.7260924347106847 ipad:1.7260924347106847 nexus:1.7260924347106847",
-      "iphone\tgalaxy:1.7260924347106847 iphone:1.7260924347106847 ipad:1.7260924347106847 nexus:1.7260924347106847",
-      "ipad\tgalaxy:0.6795961471815897 iphone:0.6795961471815897 ipad:0.6795961471815897 nexus:0.6795961471815897",
-      "nexus\tiphone:0.6795961471815897 ipad:0.6795961471815897 nexus:0.6795961471815897 galaxy:0.6795961471815897"))
-    // this will create multiple part-xxxxx files in the InFile dir but other tests will
-    // take account of one actual file
-    val linesRdd1 = mahoutCtx.parallelize(lines).saveAsTextFile(InFile1)
-    val linesRdd2 = mahoutCtx.parallelize(lines).saveAsTextFile(InFile2)
-
-    // local multi-threaded Spark with default HDFS
-    ItemSimilarityDriver.main(Array(
-      "--input", InFile1,
-      "--input2", InFile2,
-      "--output", OutPath,
-      "--master", masterUrl,
-      "--filter1", "purchase",
-      "--filter2", "view",
-      "--inDelim", ",",
-      "--itemIDColumn", "2",
-      "--rowIDColumn", "0",
-      "--filterColumn", "1"))
-
-    val similarityLines = mahoutCtx.textFile(OutPath + "/similarity-matrix/").collect.toIterable
-    val crossSimilarityLines = mahoutCtx.textFile(OutPath + "/cross-similarity-matrix/").collect.toIterable
-    tokenize(similarityLines) should contain theSameElementsAs UnequalDimensionsSelfSimilarity
-    tokenize(crossSimilarityLines) should contain theSameElementsAs UnequalDimensionsCrossSimilarity
-
-  }
-
-  test("ItemSimilarityDriver cross similarity two separate items spaces") {
-    /* cross-similarity with category views, same user space
-            	phones	tablets	mobile_acc	soap
-          u1	0	      1	      1	          0
-          u2	1	      1	      1	          0
-          u3	0	      0	      1	          0
-          u4	1	      1	      0	          1
-    */
-    val InFile1 = TmpDir + "in-file1.csv/" //using part files, not single file
-    val InFile2 = TmpDir + "in-file2.csv/" //using part files, not single file
-    val OutPath = TmpDir + "similarity-matrices/"
-
-    val lines = Array(
-      "u1,purchase,iphone",
-      "u1,purchase,ipad",
-      "u2,purchase,nexus",
-      "u2,purchase,galaxy",
-      "u3,purchase,surface",
-      "u4,purchase,iphone",
-      "u4,purchase,galaxy",
-      "u1,view,phones",
-      "u1,view,mobile_acc",
-      "u2,view,phones",
-      "u2,view,tablets",
-      "u2,view,mobile_acc",
-      "u3,view,mobile_acc",
-      "u4,view,phones",
-      "u4,view,tablets",
-      "u4,view,soap")
-
-    val UnequalDimensionsCrossSimilarityLines = tokenize(Iterable(
-      "iphone\tmobile_acc:1.7260924347106847 soap:1.7260924347106847 phones:1.7260924347106847",
-      "surface\tmobile_acc:0.6795961471815897",
-      "nexus\ttablets:1.7260924347106847 mobile_acc:0.6795961471815897 phones:0.6795961471815897",
-      "galaxy\ttablets:5.545177444479561 soap:1.7260924347106847 phones:1.7260924347106847 " +
-        "mobile_acc:1.7260924347106847",
-      "ipad\tmobile_acc:0.6795961471815897 phones:0.6795961471815897"))
-
-    // this will create multiple part-xxxxx files in the InFile dir but other tests will
-    // take account of one actual file
-    val linesRdd1 = mahoutCtx.parallelize(lines).saveAsTextFile(InFile1)
-    val linesRdd2 = mahoutCtx.parallelize(lines).saveAsTextFile(InFile2)
-
-    // local multi-threaded Spark with default HDFS
-    ItemSimilarityDriver.main(Array(
-      "--input", InFile1,
-      "--input2", InFile2,
-      "--output", OutPath,
-      "--master", masterUrl,
-      "--filter1", "purchase",
-      "--filter2", "view",
-      "--inDelim", ",",
-      "--itemIDColumn", "2",
-      "--rowIDColumn", "0",
-      "--filterColumn", "1",
-      "--writeAllDatasets"))
-
-    val similarityLines = mahoutCtx.textFile(OutPath + "/similarity-matrix/").collect.toIterable
-    val crossSimilarityLines = mahoutCtx.textFile(OutPath + "/cross-similarity-matrix/").collect.toIterable
-    tokenize(similarityLines) should contain theSameElementsAs SelfSimilairtyTokens
-    tokenize(crossSimilarityLines) should contain theSameElementsAs UnequalDimensionsCrossSimilarityLines
-
-  }
-
-  test("A.t %*% B after changing row cardinality of A") {
-    // todo: move to math tests but this is Spark specific
-
-    val a = dense(
-      (1.0, 1.0))
-
-    val b = dense(
-      (1.0, 1.0),
-      (1.0, 1.0),
-      (1.0, 1.0))
-
-    val inCoreABiggertBAnswer = dense(
-      (1.0, 1.0),
-      (1.0, 1.0))
-
-    val drmA = drmParallelize(m = a, numPartitions = 2)
-    val drmB = drmParallelize(m = b, numPartitions = 2)
-
-    // modified to return a new CheckpointedDrm so maintains immutability but still only increases the row cardinality
-    // by returning new CheckpointedDrmSpark[K](rdd, n, ncol, _cacheStorageLevel ) Hack for now.
-    val drmABigger = drmWrap[Int](drmA.rdd, 3, 2)
-
-
-    val ABiggertB = drmABigger.t %*% drmB
-    val inCoreABiggertB = ABiggertB.collect
-
-    assert(inCoreABiggertB === inCoreABiggertBAnswer)
-
-    val bp = 0
-  }
-
-  test("Changing row cardinality of an IndexedDataset") {
-
-    val a = dense(
-      (1.0, 1.0))
-
-    val drmA = drmParallelize(m = a, numPartitions = 2)
-    val emptyIDs = new BiDictionary(new HashMap[String, Int]())
-    val indexedDatasetA = new IndexedDatasetSpark(drmA, emptyIDs, emptyIDs)
-    val biggerIDSA = indexedDatasetA.newRowCardinality(5)
-
-    assert(biggerIDSA.matrix.nrow == 5)
-
-  }
-
-  test("ItemSimilarityDriver cross similarity two separate items spaces, missing rows in B") {
-    /* cross-similarity with category views, same user space
-            	phones	tablets	mobile_acc	soap
-            u1	0	      1	      1	          0
-            u2	1	      1	      1	          0
-removed ==> u3	0	      0	      1	          0
-            u4	1	      1	      0	          1
-    */
-    val InFile1 = TmpDir + "in-file1.csv/" //using part files, not single file
-    val InFile2 = TmpDir + "in-file2.csv/" //using part files, not single file
-    val OutPath = TmpDir + "similarity-matrices/"
-
-    val lines = Array(
-      "u1,purchase,iphone",
-      "u1,purchase,ipad",
-      "u2,purchase,nexus",
-      "u2,purchase,galaxy",
-      "u3,purchase,surface",
-      "u4,purchase,iphone",
-      "u4,purchase,galaxy",
-      "u1,view,phones",
-      "u1,view,mobile_acc",
-      "u2,view,phones",
-      "u2,view,tablets",
-      "u2,view,mobile_acc",
-      //"u3,view,mobile_acc",// if this line is removed the cross-cooccurrence should work
-      "u4,view,phones",
-      "u4,view,tablets",
-      "u4,view,soap")
-
-    val UnequalDimensionsCrossSimilarityLines = tokenize(Iterable(
-      "galaxy\ttablets:5.545177444479561 soap:1.7260924347106847 phones:1.7260924347106847",
-      "ipad\tmobile_acc:1.7260924347106847 phones:0.6795961471815897",
-      "surface",
-      "nexus\tmobile_acc:1.7260924347106847 tablets:1.7260924347106847 phones:0.6795961471815897",
-      "iphone\tsoap:1.7260924347106847 phones:1.7260924347106847"))
-
-    // this will create multiple part-xxxxx files in the InFile dir but other tests will
-    // take account of one actual file
-    val linesRdd1 = mahoutCtx.parallelize(lines).saveAsTextFile(InFile1)
-    val linesRdd2 = mahoutCtx.parallelize(lines).saveAsTextFile(InFile2)
-
-    // local multi-threaded Spark with default HDFS
-    ItemSimilarityDriver.main(Array(
-      "--input", InFile1,
-      "--input2", InFile2,
-      "--output", OutPath,
-      "--master", masterUrl,
-      "--filter1", "purchase",
-      "--filter2", "view",
-      "--inDelim", ",",
-      "--itemIDColumn", "2",
-      "--rowIDColumn", "0",
-      "--filterColumn", "1",
-      "--writeAllDatasets"))
-
-    val similarityLines = mahoutCtx.textFile(OutPath + "/similarity-matrix/").collect.toIterable
-    val crossSimilarityLines = mahoutCtx.textFile(OutPath + "/cross-similarity-matrix/").collect.toIterable
-    tokenize(similarityLines) should contain theSameElementsAs SelfSimilairtyTokens
-    tokenize(crossSimilarityLines) should contain theSameElementsAs UnequalDimensionsCrossSimilarityLines
-  }
-
-  test("ItemSimilarityDriver cross similarity two separate items spaces, adding rows in B") {
-    /* cross-similarity with category views, same user space
-            	phones	tablets	mobile_acc	soap
-            u1	0	      1	      1	          0
-            u2	1	      1	      1	          0
-removed ==> u3	0	      0	      1	          0
-            u4	1	      1	      0	          1
-    */
-    val InFile1 = TmpDir + "in-file1.csv/" //using part files, not single file
-    val InFile2 = TmpDir + "in-file2.csv/" //using part files, not single file
-    val OutPath = TmpDir + "similarity-matrices/"
-
-    val lines = Array(
-      "u1,purchase,iphone",
-      "u1,purchase,ipad",
-      "u2,purchase,nexus",
-      "u2,purchase,galaxy",
-      "u3,purchase,surface",
-      "u4,purchase,iphone",
-      "u4,purchase,galaxy",
-      "u1,view,phones",
-      "u1,view,mobile_acc",
-      "u2,view,phones",
-      "u2,view,tablets",
-      "u2,view,mobile_acc",
-      "u3,view,mobile_acc",// if this line is removed the cross-cooccurrence should work
-      "u4,view,phones",
-      "u4,view,tablets",
-      "u4,view,soap",
-      "u5,view,soap")
-
-    val UnequalDimensionsSimilarityTokens = List(
-      "galaxy",
-      "nexus:2.231435513142097",
-      "iphone:0.13844293808390518",
-      "nexus",
-      "galaxy:2.231435513142097",
-      "ipad",
-      "iphone:2.231435513142097",
-      "surface",
-      "iphone",
-      "ipad:2.231435513142097",
-      "galaxy:0.13844293808390518")
-
-    val UnequalDimensionsCrossSimilarityLines = List(
-      "galaxy",
-      "tablets:6.730116670092563",
-      "phones:2.9110316603236868",
-      "soap:0.13844293808390518",
-      "mobile_acc:0.13844293808390518",
-      "nexus",
-      "tablets:2.231435513142097",
-      "mobile_acc:1.184939225613002",
-      "phones:1.184939225613002",
-      "ipad", "mobile_acc:1.184939225613002",
-      "phones:1.184939225613002",
-      "surface",
-      "mobile_acc:1.184939225613002",
-      "iphone",
-      "phones:2.9110316603236868",
-      "soap:0.13844293808390518",
-      "tablets:0.13844293808390518",
-      "mobile_acc:0.13844293808390518")
-
-    // this will create multiple part-xxxxx files in the InFile dir but other tests will
-    // take account of one actual file
-    val linesRdd1 = mahoutCtx.parallelize(lines).saveAsTextFile(InFile1)
-    val linesRdd2 = mahoutCtx.parallelize(lines).saveAsTextFile(InFile2)
-
-    // local multi-threaded Spark with default HDFS
-    ItemSimilarityDriver.main(Array(
-      "--input", InFile1,
-      "--input2", InFile2,
-      "--output", OutPath,
-      "--master", masterUrl,
-      "--filter1", "purchase",
-      "--filter2", "view",
-      "--inDelim", ",",
-      "--itemIDColumn", "2",
-      "--rowIDColumn", "0",
-      "--filterColumn", "1",
-      "--writeAllDatasets"))
-
-    val similarityLines = mahoutCtx.textFile(OutPath + "/similarity-matrix/").collect.toIterable
-    val crossSimilarityLines = mahoutCtx.textFile(OutPath + "/cross-similarity-matrix/").collect.toIterable
-    tokenize(similarityLines) should contain theSameElementsAs UnequalDimensionsSimilarityTokens
-    tokenize(crossSimilarityLines) should contain theSameElementsAs UnequalDimensionsCrossSimilarityLines
-  }
-
-  // convert into an Iterable of tokens for 'should contain theSameElementsAs Iterable'
-  def tokenize(a: Iterable[String]): Iterable[String] = {
-    var r: Iterable[String] = Iterable()
-    a.foreach { l =>
-      l.split("\t").foreach { s =>
-        r = r ++ s.split("[\t ]")
-      }
-    }
-    r
-  }
-
-  override protected def beforeAll(configMap: ConfigMap) {
-    super.beforeAll(configMap)
-    ItemSimilarityDriver.useContext(mahoutCtx)
-  }
-
-}
+///*
+// * Licensed to the Apache Software Foundation (ASF) under one or more
+// * contributor license agreements.  See the NOTICE file distributed with
+// * this work for additional information regarding copyright ownership.
+// * The ASF licenses this file to You under the Apache License, Version 2.0
+// * (the "License"); you may not use this file except in compliance with
+// * the License.  You may obtain a copy of the License at
+// *
+// *     http://www.apache.org/licenses/LICENSE-2.0
+// *
+// * Unless required by applicable law or agreed to in writing, software
+// * distributed under the License is distributed on an "AS IS" BASIS,
+// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// * See the License for the specific language governing permissions and
+// * limitations under the License.
+// */
+//
+//package org.apache.mahout.drivers
+//
+//import org.apache.hadoop.conf.Configuration
+//import org.apache.hadoop.fs.{Path, FileSystem}
+//import org.apache.mahout.math.indexeddataset.{BiDictionary, IndexedDataset}
+//import org.apache.mahout.sparkbindings.indexeddataset.IndexedDatasetSpark
+//import org.scalatest.{ConfigMap, FunSuite}
+//import org.apache.mahout.sparkbindings._
+//import org.apache.mahout.sparkbindings.test.DistributedSparkSuite
+//import org.apache.mahout.math.drm._
+//import org.apache.mahout.math.scalabindings._
+//
+//import scala.collection.immutable.HashMap
+//
+////todo: take out, only for temp tests
+//
+//import org.apache.mahout.math.scalabindings._
+//import RLikeOps._
+//import org.apache.mahout.math.drm._
+//import RLikeDrmOps._
+//import scala.collection.JavaConversions._
+//
+//
+//class ItemSimilarityDriverSuite extends FunSuite with DistributedSparkSuite {
+//
+//  /*
+//    final val matrixLLRCoocAtAControl = dense(
+//      (0.0,                0.6331745808516107, 0.0,                     0.0,                0.0),
+//      (0.6331745808516107, 0.0,                0.0,                     0.0,                0.0),
+//      (0.0,                0.0,                0.0,                     0.6331745808516107, 0.0),
+//      (0.0,                0.0,                0.6331745808516107,      0.0,                0.0),
+//      (0.0,                0.0,                0.0,                     0.0,                0.0))
+//
+//    final val matrixLLRCoocBtAControl = dense(
+//        (1.7260924347106847, 1.7260924347106847, 1.7260924347106847, 1.7260924347106847, 0.0),
+//        (0.6795961471815897, 0.6795961471815897, 0.6795961471815897, 0.6795961471815897, 0.0),
+//        (0.6795961471815897, 0.6795961471815897, 0.6795961471815897, 0.6795961471815897, 0.0),
+//        (1.7260924347106847, 1.7260924347106847, 1.7260924347106847, 1.7260924347106847, 0.0),
+//        (0.0,                0.0,                0.6795961471815897, 0.0,                4.498681156950466))
+//  */
+//
+//
+//  final val SelfSimilairtyLines = Iterable(
+//    "galaxy\tnexus:1.7260924347106847",
+//    "ipad\tiphone:1.7260924347106847",
+//    "nexus\tgalaxy:1.7260924347106847",
+//    "iphone\tipad:1.7260924347106847",
+//    "surface")
+//
+//  val CrossSimilarityLines = Iterable(
+//    "iphone\tnexus:1.7260924347106847 iphone:1.7260924347106847 ipad:1.7260924347106847 galaxy:1.7260924347106847",
+//    "ipad\tnexus:0.6795961471815897 iphone:0.6795961471815897 ipad:0.6795961471815897 galaxy:0.6795961471815897",
+//    "nexus\tnexus:0.6795961471815897 iphone:0.6795961471815897 ipad:0.6795961471815897 galaxy:0.6795961471815897",
+//    "galaxy\tnexus:1.7260924347106847 iphone:1.7260924347106847 ipad:1.7260924347106847 galaxy:1.7260924347106847",
+//    "surface\tsurface:4.498681156950466 nexus:0.6795961471815897")
+//
+//  // todo: a better test would be to sort each vector by itemID and compare rows, tokens misses some error cases
+//  final val SelfSimilairtyTokens = tokenize(Iterable(
+//    "galaxy\tnexus:1.7260924347106847",
+//    "ipad\tiphone:1.7260924347106847",
+//    "nexus\tgalaxy:1.7260924347106847",
+//    "iphone\tipad:1.7260924347106847",
+//    "surface"))
+//
+//  val CrossSimilarityTokens = tokenize(Iterable(
+//    "iphone\tnexus:1.7260924347106847 iphone:1.7260924347106847 ipad:1.7260924347106847 galaxy:1.7260924347106847",
+//    "ipad\tnexus:0.6795961471815897 iphone:0.6795961471815897 ipad:0.6795961471815897 galaxy:0.6795961471815897",
+//    "nexus\tnexus:0.6795961471815897 iphone:0.6795961471815897 ipad:0.6795961471815897 galaxy:0.6795961471815897",
+//    "galaxy\tnexus:1.7260924347106847 iphone:1.7260924347106847 ipad:1.7260924347106847 galaxy:1.7260924347106847",
+//    "surface\tsurface:4.498681156950466 nexus:0.6795961471815897"))
+//
+//  /*
+//    //Clustered Spark and HDFS, not a good everyday build test
+//    ItemSimilarityDriver.main(Array(
+//        "--input", "hdfs://occam4:54310/user/pat/spark-itemsimilarity/cf-data.txt",
+//        "--output", "hdfs://occam4:54310/user/pat/spark-itemsimilarity/similarityMatrices/",
+//        "--master", "spark://occam4:7077",
+//        "--filter1", "purchase",
+//        "--filter2", "view",
+//        "--inDelim", ",",
+//        "--itemIDColumn", "2",
+//        "--rowIDColumn", "0",
+//        "--filterColumn", "1"))
+//  */
+//  // local multi-threaded Spark with HDFS using large dataset
+//  // not a good build test.
+//  /*
+//    ItemSimilarityDriver.main(Array(
+//      "--input", "hdfs://occam4:54310/user/pat/xrsj/ratings_data.txt",
+//      "--output", "hdfs://occam4:54310/user/pat/xrsj/similarityMatrices/",
+//      "--master", "local[4]",
+//      "--filter1", "purchase",
+//      "--filter2", "view",
+//      "--inDelim", ",",
+//      "--itemIDColumn", "2",
+//      "--rowIDColumn", "0",
+//      "--filterColumn", "1"))
+//  */
+//
+//  test("ItemSimilarityDriver, non-full-spec CSV") {
+//
+//    val InFile = TmpDir + "in-file.csv/" //using part files, not single file
+//    val OutPath = TmpDir + "similarity-matrices/"
+//
+//    val lines = Array(
+//      "u1,purchase,iphone",
+//      "u1,purchase,ipad",
+//      "u2,purchase,nexus",
+//      "u2,purchase,galaxy",
+//      "u3,purchase,surface",
+//      "u4,purchase,iphone",
+//      "u4,purchase,galaxy",
+//      "u1,view,iphone",
+//      "u1,view,ipad",
+//      "u1,view,nexus",
+//      "u1,view,galaxy",
+//      "u2,view,iphone",
+//      "u2,view,ipad",
+//      "u2,view,nexus",
+//      "u2,view,galaxy",
+//      "u3,view,surface",
+//      "u3,view,nexus",
+//      "u4,view,iphone",
+//      "u4,view,ipad",
+//      "u4,view,galaxy")
+//
+//    // this will create multiple part-xxxxx files in the InFile dir but other tests will
+//    // take account of one actual file
+//    val linesRdd = mahoutCtx.parallelize(lines).saveAsTextFile(InFile)
+//
+//    // local multi-threaded Spark with default HDFS
+//    ItemSimilarityDriver.main(Array(
+//      "--input", InFile,
+//      "--output", OutPath,
+//      "--master", masterUrl,
+//      "--filter1", "purchase",
+//      "--filter2", "view",
+//      "--inDelim", ",",
+//      "--itemIDColumn", "2",
+//      "--rowIDColumn", "0",
+//      "--filterColumn", "1",
+//      "--writeAllDatasets"))
+//
+//    // todo: these comparisons rely on a sort producing the same lines, which could possibly
+//    // fail since the sort is on value and these can be the same for all items in a vector
+//    val similarityLines = mahoutCtx.textFile(OutPath + "/similarity-matrix/").collect.toIterable
+//    tokenize(similarityLines) should contain theSameElementsAs SelfSimilairtyTokens
+//    val crossSimilarityLines = mahoutCtx.textFile(OutPath + "/cross-similarity-matrix/").collect.toIterable
+//    tokenize(crossSimilarityLines) should contain theSameElementsAs CrossSimilarityTokens
+//  }
+//
+//
+//
+//  test("ItemSimilarityDriver TSV ") {
+//
+//    val InFile = TmpDir + "in-file.tsv/"
+//    val OutPath = TmpDir + "similarity-matrices/"
+//
+//    val lines = Array(
+//      "u1\tpurchase\tiphone",
+//      "u1\tpurchase\tipad",
+//      "u2\tpurchase\tnexus",
+//      "u2\tpurchase\tgalaxy",
+//      "u3\tpurchase\tsurface",
+//      "u4\tpurchase\tiphone",
+//      "u4\tpurchase\tgalaxy",
+//      "u1\tview\tiphone",
+//      "u1\tview\tipad",
+//      "u1\tview\tnexus",
+//      "u1\tview\tgalaxy",
+//      "u2\tview\tiphone",
+//      "u2\tview\tipad",
+//      "u2\tview\tnexus",
+//      "u2\tview\tgalaxy",
+//      "u3\tview\tsurface",
+//      "u3\tview\tnexus",
+//      "u4\tview\tiphone",
+//      "u4\tview\tipad",
+//      "u4\tview\tgalaxy")
+//
+//    // this will create multiple part-xxxxx files in the InFile dir but other tests will
+//    // take account of one actual file
+//    val linesRdd = mahoutCtx.parallelize(lines).saveAsTextFile(InFile)
+//
+//    // local multi-threaded Spark with default HDFS
+//    ItemSimilarityDriver.main(Array(
+//      "--input", InFile,
+//      "--output", OutPath,
+//      "--master", masterUrl,
+//      "--filter1", "purchase",
+//      "--filter2", "view",
+//      "--inDelim", "[,\t]",
+//      "--itemIDColumn", "2",
+//      "--rowIDColumn", "0",
+//      "--filterColumn", "1"))
+//
+//    // todo: a better test would be to get sorted vectors and compare rows instead of tokens, this might miss
+//    // some error cases
+//    val similarityLines = mahoutCtx.textFile(OutPath + "/similarity-matrix/").collect.toIterable
+//    tokenize(similarityLines) should contain theSameElementsAs SelfSimilairtyTokens
+//    val crossSimilarityLines = mahoutCtx.textFile(OutPath + "/cross-similarity-matrix/").collect.toIterable
+//    tokenize(crossSimilarityLines) should contain theSameElementsAs CrossSimilarityTokens
+//
+//  }
+//
+//  test("ItemSimilarityDriver log-ish files") {
+//
+//    val InFile = TmpDir + "in-file.log/"
+//    val OutPath = TmpDir + "similarity-matrices/"
+//
+//    val lines = Array(
+//      "2014-06-23 14:46:53.115\tu1\tpurchase\trandom text\tiphone",
+//      "2014-06-23 14:46:53.115\tu1\tpurchase\trandom text\tipad",
+//      "2014-06-23 14:46:53.115\tu2\tpurchase\trandom text\tnexus",
+//      "2014-06-23 14:46:53.115\tu2\tpurchase\trandom text\tgalaxy",
+//      "2014-06-23 14:46:53.115\tu3\tpurchase\trandom text\tsurface",
+//      "2014-06-23 14:46:53.115\tu4\tpurchase\trandom text\tiphone",
+//      "2014-06-23 14:46:53.115\tu4\tpurchase\trandom text\tgalaxy",
+//      "2014-06-23 14:46:53.115\tu1\tview\trandom text\tiphone",
+//      "2014-06-23 14:46:53.115\tu1\tview\trandom text\tipad",
+//      "2014-06-23 14:46:53.115\tu1\tview\trandom text\tnexus",
+//      "2014-06-23 14:46:53.115\tu1\tview\trandom text\tgalaxy",
+//      "2014-06-23 14:46:53.115\tu2\tview\trandom text\tiphone",
+//      "2014-06-23 14:46:53.115\tu2\tview\trandom text\tipad",
+//      "2014-06-23 14:46:53.115\tu2\tview\trandom text\tnexus",
+//      "2014-06-23 14:46:53.115\tu2\tview\trandom text\tgalaxy",
+//      "2014-06-23 14:46:53.115\tu3\tview\trandom text\tsurface",
+//      "2014-06-23 14:46:53.115\tu3\tview\trandom text\tnexus",
+//      "2014-06-23 14:46:53.115\tu4\tview\trandom text\tiphone",
+//      "2014-06-23 14:46:53.115\tu4\tview\trandom text\tipad",
+//      "2014-06-23 14:46:53.115\tu4\tview\trandom text\tgalaxy")
+//
+//    // this will create multiple part-xxxxx files in the InFile dir but other tests will
+//    // take account of one actual file
+//    val linesRdd = mahoutCtx.parallelize(lines).saveAsTextFile(InFile)
+//
+//    // local multi-threaded Spark with default HDFS
+//    ItemSimilarityDriver.main(Array(
+//      "--input", InFile,
+//      "--output", OutPath,
+//      "--master", masterUrl,
+//      "--filter1", "purchase",
+//      "--filter2", "view",
+//      "--inDelim", "\t",
+//      "--itemIDColumn", "4",
+//      "--rowIDColumn", "1",
+//      "--filterColumn", "2"))
+//
+//
+//    val similarityLines = mahoutCtx.textFile(OutPath + "/similarity-matrix/").collect.toIterable
+//    tokenize(similarityLines) should contain theSameElementsAs SelfSimilairtyTokens
+//    val crossSimilarityLines = mahoutCtx.textFile(OutPath + "/cross-similarity-matrix/").collect.toIterable
+//    tokenize(crossSimilarityLines) should contain theSameElementsAs CrossSimilarityTokens
+//
+//  }
+//
+//  test("ItemSimilarityDriver legacy supported file format") {
+//
+//    val InDir = TmpDir + "in-dir/"
+//    val InFilename = "in-file.tsv"
+//    val InPath = InDir + InFilename
+//
+//    val OutPath = TmpDir + "similarity-matrices"
+//
+//    val lines = Array(
+//      "0,0,1",
+//      "0,1,1",
+//      "1,2,1",
+//      "1,3,1",
+//      "2,4,1",
+//      "3,0,1",
+//      "3,3,1")
+//
+//    val Answer = tokenize(Iterable(
+//      "0\t1:1.7260924347106847",
+//      "3\t2:1.7260924347106847",
+//      "1\t0:1.7260924347106847",
+//      "4",
+//      "2\t3:1.7260924347106847"))
+//
+//    // this creates one part-0000 file in the directory
+//    mahoutCtx.parallelize(lines).coalesce(1, shuffle = true).saveAsTextFile(InDir)
+//
+//    // to change from using part files to a single .tsv file we'll need to use HDFS
+//    val fs = FileSystem.get(new Configuration())
+//    //rename part-00000 to something.tsv
+//    fs.rename(new Path(InDir + "part-00000"), new Path(InPath))
+//
+//    // local multi-threaded Spark with default HDFS
+//    ItemSimilarityDriver.main(Array(
+//      "--input", InPath,
+//      "--output", OutPath,
+//      "--master", masterUrl))
+//
+//    val similarityLines = mahoutCtx.textFile(OutPath + "/similarity-matrix/").collect.toIterable
+//    tokenize(similarityLines) should contain theSameElementsAs Answer
+//
+//  }
+//
+//  test("ItemSimilarityDriver write search engine output") {
+//
+//    val InDir = TmpDir + "in-dir/"
+//    val InFilename = "in-file.tsv"
+//    val InPath = InDir + InFilename
+//
+//    val OutPath = TmpDir + "similarity-matrices"
+//
+//    val lines = Array(
+//      "0,0,1",
+//      "0,1,1",
+//      "1,2,1",
+//      "1,3,1",
+//      "2,4,1",
+//      "3,0,1",
+//      "3,3,1")
+//
+//    val Answer = tokenize(Iterable(
+//      "0\t1",
+//      "3\t2",
+//      "1\t0",
+//      "4",
+//      "2\t3"))
+//
+//    // this creates one part-0000 file in the directory
+//    mahoutCtx.parallelize(lines).coalesce(1, shuffle = true).saveAsTextFile(InDir)
+//
+//    // to change from using part files to a single .tsv file we'll need to use HDFS
+//    val fs = FileSystem.get(new Configuration())
+//    //rename part-00000 to something.tsv
+//    fs.rename(new Path(InDir + "part-00000"), new Path(InPath))
+//
+//    // local multi-threaded Spark with default HDFS
+//    ItemSimilarityDriver.main(Array(
+//      "--input", InPath,
+//      "--output", OutPath,
+//      "--master", masterUrl,
+//      "--omitStrength"))
+//
+//    val similarityLines = mahoutCtx.textFile(OutPath + "/similarity-matrix/").collect.toIterable
+//    tokenize(similarityLines) should contain theSameElementsAs Answer
+//
+//  }
+//
+//  test("ItemSimilarityDriver recursive file discovery using filename patterns") {
+//    //directory structure using the following
+//    // tmp/data/m1.tsv
+//    // tmp/data/more-data/another-dir/m2.tsv
+//    val M1Lines = Array(
+//      "u1\tpurchase\tiphone",
+//      "u1\tpurchase\tipad",
+//      "u2\tpurchase\tnexus",
+//      "u2\tpurchase\tgalaxy",
+//      "u3\tpurchase\tsurface",
+//      "u4\tpurchase\tiphone",
+//      "u4\tpurchase\tgalaxy")
+//
+//    val M2Lines = Array(
+//      "u1\tview\tiphone",
+//      "u1\tview\tipad",
+//      "u1\tview\tnexus",
+//      "u1\tview\tgalaxy",
+//      "u2\tview\tiphone",
+//      "u2\tview\tipad",
+//      "u2\tview\tnexus",
+//      "u2\tview\tgalaxy",
+//      "u3\tview\tsurface",
+//      "u3\tview\tnexus",
+//      "u4\tview\tiphone",
+//      "u4\tview\tipad",
+//      "u4\tview\tgalaxy")
+//
+//    val InFilenameM1 = "m1.tsv"
+//    val InDirM1 = TmpDir + "data/"
+//    val InPathM1 = InDirM1 + InFilenameM1
+//    val InFilenameM2 = "m2.tsv"
+//    val InDirM2 = TmpDir + "data/more-data/another-dir/"
+//    val InPathM2 = InDirM2 + InFilenameM2
+//
+//    val InPathStart = TmpDir + "data/"
+//    val OutPath = TmpDir + "similarity-matrices"
+//
+//    // this creates one part-0000 file in the directory
+//    mahoutCtx.parallelize(M1Lines).coalesce(1, shuffle = true).saveAsTextFile(InDirM1)
+//
+//    // to change from using part files to a single .tsv file we'll need to use HDFS
+//    val fs = FileSystem.get(new Configuration())
+//    //rename part-00000 to something.tsv
+//    fs.rename(new Path(InDirM1 + "part-00000"), new Path(InPathM1))
+//
+//    // this creates one part-0000 file in the directory
+//    mahoutCtx.parallelize(M2Lines).coalesce(1, shuffle = true).saveAsTextFile(InDirM2)
+//
+//    // to change from using part files to a single .tsv file we'll need to use HDFS
+//    //rename part-00000 to tmp/some-location/something.tsv
+//    fs.rename(new Path(InDirM2 + "part-00000"), new Path(InPathM2))
+//
+//    // local multi-threaded Spark with default FS, suitable for build tests but need better location for data
+//
+//    ItemSimilarityDriver.main(Array(
+//      "--input", InPathStart,
+//      "--output", OutPath,
+//      "--master", masterUrl,
+//      "--filter1", "purchase",
+//      "--filter2", "view",
+//      "--inDelim", "\t",
+//      "--itemIDColumn", "2",
+//      "--rowIDColumn", "0",
+//      "--filterColumn", "1",
+//      "--filenamePattern", "m..tsv",
+//      "--recursive"))
+//
+//    val similarityLines = mahoutCtx.textFile(OutPath + "/similarity-matrix/").collect.toIterable
+//    tokenize(similarityLines) should contain theSameElementsAs SelfSimilairtyTokens
+//    val crossSimilarityLines = mahoutCtx.textFile(OutPath + "/cross-similarity-matrix/").collect.toIterable
+//    tokenize(crossSimilarityLines) should contain theSameElementsAs CrossSimilarityTokens
+//
+//  }
+//
+//  test("ItemSimilarityDriver, two input paths") {
+//
+//    val InFile1 = TmpDir + "in-file1.csv/" //using part files, not single file
+//    val InFile2 = TmpDir + "in-file2.csv/" //using part files, not single file
+//    val OutPath = TmpDir + "similarity-matrices/"
+//
+//    val lines = Array(
+//      "u1,purchase,iphone",
+//      "u1,purchase,ipad",
+//      "u2,purchase,nexus",
+//      "u2,purchase,galaxy",
+//      "u3,purchase,surface",
+//      "u4,purchase,iphone",
+//      "u4,purchase,galaxy",
+//      "u1,view,iphone",
+//      "u1,view,ipad",
+//      "u1,view,nexus",
+//      "u1,view,galaxy",
+//      "u2,view,iphone",
+//      "u2,view,ipad",
+//      "u2,view,nexus",
+//      "u2,view,galaxy",
+//      "u3,view,surface",
+//      "u3,view,nexus",
+//      "u4,view,iphone",
+//      "u4,view,ipad",
+//      "u4,view,galaxy")
+//
+//    // this will create multiple part-xxxxx files in the InFile dir but other tests will
+//    // take account of one actual file
+//    val linesRdd1 = mahoutCtx.parallelize(lines).saveAsTextFile(InFile1)
+//    val linesRdd2 = mahoutCtx.parallelize(lines).saveAsTextFile(InFile2)
+//
+//    // local multi-threaded Spark with default HDFS
+//    ItemSimilarityDriver.main(Array(
+//      "--input", InFile1,
+//      "--input2", InFile2,
+//      "--output", OutPath,
+//      "--master", masterUrl,
+//      "--filter1", "purchase",
+//      "--filter2", "view",
+//      "--inDelim", ",",
+//      "--itemIDColumn", "2",
+//      "--rowIDColumn", "0",
+//      "--filterColumn", "1"))
+//
+//    val similarityLines = mahoutCtx.textFile(OutPath + "/similarity-matrix/").collect.toIterable
+//    tokenize(similarityLines) should contain theSameElementsAs SelfSimilairtyTokens
+//    val crossSimilarityLines = mahoutCtx.textFile(OutPath + "/cross-similarity-matrix/").collect.toIterable
+//    tokenize(crossSimilarityLines) should contain theSameElementsAs CrossSimilarityTokens
+//
+//  }
+//
+//  test("ItemSimilarityDriver, two inputs of different dimensions") {
+//
+//    val InFile1 = TmpDir + "in-file1.csv/" //using part files, not single file
+//    val InFile2 = TmpDir + "in-file2.csv/" //using part files, not single file
+//    val OutPath = TmpDir + "similarity-matrices/"
+//
+//    val lines = Array(
+//      "u1,purchase,iphone",
+//      "u1,purchase,ipad",
+//      "u2,purchase,nexus",
+//      "u2,purchase,galaxy",
+//      // remove one user so A'B will be of different dimensions
+//      // ItemSimilarityDriver should create one unified user dictionary and so account for this
+//      // discrepancy as a blank row: "u3,purchase,surface",
+//      "u4,purchase,iphone",
+//      "u4,purchase,galaxy",
+//      "u1,view,iphone",
+//      "u1,view,ipad",
+//      "u1,view,nexus",
+//      "u1,view,galaxy",
+//      "u2,view,iphone",
+//      "u2,view,ipad",
+//      "u2,view,nexus",
+//      "u2,view,galaxy",
+//      "u3,view,surface",
+//      "u3,view,nexus",
+//      "u4,view,iphone",
+//      "u4,view,ipad",
+//      "u4,view,galaxy")
+//
+//    val UnequalDimensionsSelfSimilarity = tokenize(Iterable(
+//      "ipad\tiphone:1.7260924347106847",
+//      "iphone\tipad:1.7260924347106847",
+//      "nexus\tgalaxy:1.7260924347106847",
+//      "galaxy\tnexus:1.7260924347106847"))
+//
+//    //only surface purchase was removed so no cross-similarity for surface
+//    val UnequalDimensionsCrossSimilarity = tokenize(Iterable(
+//      "galaxy\tgalaxy:1.7260924347106847 iphone:1.7260924347106847 ipad:1.7260924347106847 nexus:1.7260924347106847",
+//      "iphone\tgalaxy:1.7260924347106847 iphone:1.7260924347106847 ipad:1.7260924347106847 nexus:1.7260924347106847",
+//      "ipad\tgalaxy:0.6795961471815897 iphone:0.6795961471815897 ipad:0.6795961471815897 nexus:0.6795961471815897",
+//      "nexus\tiphone:0.6795961471815897 ipad:0.6795961471815897 nexus:0.6795961471815897 galaxy:0.6795961471815897"))
+//    // this will create multiple part-xxxxx files in the InFile dir but other tests will
+//    // take account of one actual file
+//    val linesRdd1 = mahoutCtx.parallelize(lines).saveAsTextFile(InFile1)
+//    val linesRdd2 = mahoutCtx.parallelize(lines).saveAsTextFile(InFile2)
+//
+//    // local multi-threaded Spark with default HDFS
+//    ItemSimilarityDriver.main(Array(
+//      "--input", InFile1,
+//      "--input2", InFile2,
+//      "--output", OutPath,
+//      "--master", masterUrl,
+//      "--filter1", "purchase",
+//      "--filter2", "view",
+//      "--inDelim", ",",
+//      "--itemIDColumn", "2",
+//      "--rowIDColumn", "0",
+//      "--filterColumn", "1"))
+//
+//    val similarityLines = mahoutCtx.textFile(OutPath + "/similarity-matrix/").collect.toIterable
+//    val crossSimilarityLines = mahoutCtx.textFile(OutPath + "/cross-similarity-matrix/").collect.toIterable
+//    tokenize(similarityLines) should contain theSameElementsAs UnequalDimensionsSelfSimilarity
+//    tokenize(crossSimilarityLines) should contain theSameElementsAs UnequalDimensionsCrossSimilarity
+//
+//  }
+//
+//  test("ItemSimilarityDriver cross similarity two separate items spaces") {
+//    /* cross-similarity with category views, same user space
+//            	phones	tablets	mobile_acc	soap
+//          u1	0	      1	      1	          0
+//          u2	1	      1	      1	          0
+//          u3	0	      0	      1	          0
+//          u4	1	      1	      0	          1
+//    */
+//    val InFile1 = TmpDir + "in-file1.csv/" //using part files, not single file
+//    val InFile2 = TmpDir + "in-file2.csv/" //using part files, not single file
+//    val OutPath = TmpDir + "similarity-matrices/"
+//
+//    val lines = Array(
+//      "u1,purchase,iphone",
+//      "u1,purchase,ipad",
+//      "u2,purchase,nexus",
+//      "u2,purchase,galaxy",
+//      "u3,purchase,surface",
+//      "u4,purchase,iphone",
+//      "u4,purchase,galaxy",
+//      "u1,view,phones",
+//      "u1,view,mobile_acc",
+//      "u2,view,phones",
+//      "u2,view,tablets",
+//      "u2,view,mobile_acc",
+//      "u3,view,mobile_acc",
+//      "u4,view,phones",
+//      "u4,view,tablets",
+//      "u4,view,soap")
+//
+//    val UnequalDimensionsCrossSimilarityLines = tokenize(Iterable(
+//      "iphone\tmobile_acc:1.7260924347106847 soap:1.7260924347106847 phones:1.7260924347106847",
+//      "surface\tmobile_acc:0.6795961471815897",
+//      "nexus\ttablets:1.7260924347106847 mobile_acc:0.6795961471815897 phones:0.6795961471815897",
+//      "galaxy\ttablets:5.545177444479561 soap:1.7260924347106847 phones:1.7260924347106847 " +
+//        "mobile_acc:1.7260924347106847",
+//      "ipad\tmobile_acc:0.6795961471815897 phones:0.6795961471815897"))
+//
+//    // this will create multiple part-xxxxx files in the InFile dir but other tests will
+//    // take account of one actual file
+//    val linesRdd1 = mahoutCtx.parallelize(lines).saveAsTextFile(InFile1)
+//    val linesRdd2 = mahoutCtx.parallelize(lines).saveAsTextFile(InFile2)
+//
+//    // local multi-threaded Spark with default HDFS
+//    ItemSimilarityDriver.main(Array(
+//      "--input", InFile1,
+//      "--input2", InFile2,
+//      "--output", OutPath,
+//      "--master", masterUrl,
+//      "--filter1", "purchase",
+//      "--filter2", "view",
+//      "--inDelim", ",",
+//      "--itemIDColumn", "2",
+//      "--rowIDColumn", "0",
+//      "--filterColumn", "1",
+//      "--writeAllDatasets"))
+//
+//    val similarityLines = mahoutCtx.textFile(OutPath + "/similarity-matrix/").collect.toIterable
+//    val crossSimilarityLines = mahoutCtx.textFile(OutPath + "/cross-similarity-matrix/").collect.toIterable
+//    tokenize(similarityLines) should contain theSameElementsAs SelfSimilairtyTokens
+//    tokenize(crossSimilarityLines) should contain theSameElementsAs UnequalDimensionsCrossSimilarityLines
+//
+//  }
+//
+//  test("A.t %*% B after changing row cardinality of A") {
+//    // todo: move to math tests but this is Spark specific
+//
+//    val a = dense(
+//      (1.0, 1.0))
+//
+//    val b = dense(
+//      (1.0, 1.0),
+//      (1.0, 1.0),
+//      (1.0, 1.0))
+//
+//    val inCoreABiggertBAnswer = dense(
+//      (1.0, 1.0),
+//      (1.0, 1.0))
+//
+//    val drmA = drmParallelize(m = a, numPartitions = 2)
+//    val drmB = drmParallelize(m = b, numPartitions = 2)
+//
+//    // modified to return a new CheckpointedDrm so maintains immutability but still only increases the row cardinality
+//    // by returning new CheckpointedDrmSpark[K](rdd, n, ncol, _cacheStorageLevel ) Hack for now.
+//    val drmABigger = drmWrap[Int](drmA.rdd, 3, 2)
+//
+//
+//    val ABiggertB = drmABigger.t %*% drmB
+//    val inCoreABiggertB = ABiggertB.collect
+//
+//    assert(inCoreABiggertB === inCoreABiggertBAnswer)
+//
+//    val bp = 0
+//  }
+//
+//  test("Changing row cardinality of an IndexedDataset") {
+//
+//    val a = dense(
+//      (1.0, 1.0))
+//
+//    val drmA = drmParallelize(m = a, numPartitions = 2)
+//    val emptyIDs = new BiDictionary(new HashMap[String, Int]())
+//    val indexedDatasetA = new IndexedDatasetSpark(drmA, emptyIDs, emptyIDs)
+//    val biggerIDSA = indexedDatasetA.newRowCardinality(5)
+//
+//    assert(biggerIDSA.matrix.nrow == 5)
+//
+//  }
+//
+//  test("ItemSimilarityDriver cross similarity two separate items spaces, missing rows in B") {
+//    /* cross-similarity with category views, same user space
+//            	phones	tablets	mobile_acc	soap
+//            u1	0	      1	      1	          0
+//            u2	1	      1	      1	          0
+//removed ==> u3	0	      0	      1	          0
+//            u4	1	      1	      0	          1
+//    */
+//    val InFile1 = TmpDir + "in-file1.csv/" //using part files, not single file
+//    val InFile2 = TmpDir + "in-file2.csv/" //using part files, not single file
+//    val OutPath = TmpDir + "similarity-matrices/"
+//
+//    val lines = Array(
+//      "u1,purchase,iphone",
+//      "u1,purchase,ipad",
+//      "u2,purchase,nexus",
+//      "u2,purchase,galaxy",
+//      "u3,purchase,surface",
+//      "u4,purchase,iphone",
+//      "u4,purchase,galaxy",
+//      "u1,view,phones",
+//      "u1,view,mobile_acc",
+//      "u2,view,phones",
+//      "u2,view,tablets",
+//      "u2,view,mobile_acc",
+//      //"u3,view,mobile_acc",// if this line is removed the cross-cooccurrence should work
+//      "u4,view,phones",
+//      "u4,view,tablets",
+//      "u4,view,soap")
+//
+//    val UnequalDimensionsCrossSimilarityLines = tokenize(Iterable(
+//      "galaxy\ttablets:5.545177444479561 soap:1.7260924347106847 phones:1.7260924347106847",
+//      "ipad\tmobile_acc:1.7260924347106847 phones:0.6795961471815897",
+//      "surface",
+//      "nexus\tmobile_acc:1.7260924347106847 tablets:1.7260924347106847 phones:0.6795961471815897",
+//      "iphone\tsoap:1.7260924347106847 phones:1.7260924347106847"))
+//
+//    // this will create multiple part-xxxxx files in the InFile dir but other tests will
+//    // take account of one actual file
+//    val linesRdd1 = mahoutCtx.parallelize(lines).saveAsTextFile(InFile1)
+//    val linesRdd2 = mahoutCtx.parallelize(lines).saveAsTextFile(InFile2)
+//
+//    // local multi-threaded Spark with default HDFS
+//    ItemSimilarityDriver.main(Array(
+//      "--input", InFile1,
+//      "--input2", InFile2,
+//      "--output", OutPath,
+//      "--master", masterUrl,
+//      "--filter1", "purchase",
+//      "--filter2", "view",
+//      "--inDelim", ",",
+//      "--itemIDColumn", "2",
+//      "--rowIDColumn", "0",
+//      "--filterColumn", "1",
+//      "--writeAllDatasets"))
+//
+//    val similarityLines = mahoutCtx.textFile(OutPath + "/similarity-matrix/").collect.toIterable
+//    val crossSimilarityLines = mahoutCtx.textFile(OutPath + "/cross-similarity-matrix/").collect.toIterable
+//    tokenize(similarityLines) should contain theSameElementsAs SelfSimilairtyTokens
+//    tokenize(crossSimilarityLines) should contain theSameElementsAs UnequalDimensionsCrossSimilarityLines
+//  }
+//
+//  test("ItemSimilarityDriver cross similarity two separate items spaces, adding rows in B") {
+//    /* cross-similarity with category views, same user space
+//            	phones	tablets	mobile_acc	soap
+//            u1	0	      1	      1	          0
+//            u2	1	      1	      1	          0
+//removed ==> u3	0	      0	      1	          0
+//            u4	1	      1	      0	          1
+//    */
+//    val InFile1 = TmpDir + "in-file1.csv/" //using part files, not single file
+//    val InFile2 = TmpDir + "in-file2.csv/" //using part files, not single file
+//    val OutPath = TmpDir + "similarity-matrices/"
+//
+//    val lines = Array(
+//      "u1,purchase,iphone",
+//      "u1,purchase,ipad",
+//      "u2,purchase,nexus",
+//      "u2,purchase,galaxy",
+//      "u3,purchase,surface",
+//      "u4,purchase,iphone",
+//      "u4,purchase,galaxy",
+//      "u1,view,phones",
+//      "u1,view,mobile_acc",
+//      "u2,view,phones",
+//      "u2,view,tablets",
+//      "u2,view,mobile_acc",
+//      "u3,view,mobile_acc",// if this line is removed the cross-cooccurrence should work
+//      "u4,view,phones",
+//      "u4,view,tablets",
+//      "u4,view,soap",
+//      "u5,view,soap")
+//
+//    val UnequalDimensionsSimilarityTokens = List(
+//      "galaxy",
+//      "nexus:2.231435513142097",
+//      "iphone:0.13844293808390518",
+//      "nexus",
+//      "galaxy:2.231435513142097",
+//      "ipad",
+//      "iphone:2.231435513142097",
+//      "surface",
+//      "iphone",
+//      "ipad:2.231435513142097",
+//      "galaxy:0.13844293808390518")
+//
+//    val UnequalDimensionsCrossSimilarityLines = List(
+//      "galaxy",
+//      "tablets:6.730116670092563",
+//      "phones:2.9110316603236868",
+//      "soap:0.13844293808390518",
+//      "mobile_acc:0.13844293808390518",
+//      "nexus",
+//      "tablets:2.231435513142097",
+//      "mobile_acc:1.184939225613002",
+//      "phones:1.184939225613002",
+//      "ipad", "mobile_acc:1.184939225613002",
+//      "phones:1.184939225613002",
+//      "surface",
+//      "mobile_acc:1.184939225613002",
+//      "iphone",
+//      "phones:2.9110316603236868",
+//      "soap:0.13844293808390518",
+//      "tablets:0.13844293808390518",
+//      "mobile_acc:0.13844293808390518")
+//
+//    // this will create multiple part-xxxxx files in the InFile dir but other tests will
+//    // take account of one actual file
+//    val linesRdd1 = mahoutCtx.parallelize(lines).saveAsTextFile(InFile1)
+//    val linesRdd2 = mahoutCtx.parallelize(lines).saveAsTextFile(InFile2)
+//
+//    // local multi-threaded Spark with default HDFS
+//    ItemSimilarityDriver.main(Array(
+//      "--input", InFile1,
+//      "--input2", InFile2,
+//      "--output", OutPath,
+//      "--master", masterUrl,
+//      "--filter1", "purchase",
+//      "--filter2", "view",
+//      "--inDelim", ",",
+//      "--itemIDColumn", "2",
+//      "--rowIDColumn", "0",
+//      "--filterColumn", "1",
+//      "--writeAllDatasets"))
+//
+//    val similarityLines = mahoutCtx.textFile(OutPath + "/similarity-matrix/").collect.toIterable
+//    val crossSimilarityLines = mahoutCtx.textFile(OutPath + "/cross-similarity-matrix/").collect.toIterable
+//    tokenize(similarityLines) should contain theSameElementsAs UnequalDimensionsSimilarityTokens
+//    tokenize(crossSimilarityLines) should contain theSameElementsAs UnequalDimensionsCrossSimilarityLines
+//  }
+//
+//  // convert into an Iterable of tokens for 'should contain theSameElementsAs Iterable'
+//  def tokenize(a: Iterable[String]): Iterable[String] = {
+//    var r: Iterable[String] = Iterable()
+//    a.foreach { l =>
+//      l.split("\t").foreach { s =>
+//        r = r ++ s.split("[\t ]")
+//      }
+//    }
+//    r
+//  }
+//
+//  override protected def beforeAll(configMap: ConfigMap) {
+//    super.beforeAll(configMap)
+//    ItemSimilarityDriver.useContext(mahoutCtx)
+//  }
+//
+//}

http://git-wip-us.apache.org/repos/asf/mahout/blob/034790cc/spark/src/test/scala/org/apache/mahout/drivers/RowSimilarityDriverSuite.scala
----------------------------------------------------------------------
diff --git a/spark/src/test/scala/org/apache/mahout/drivers/RowSimilarityDriverSuite.scala b/spark/src/test/scala/org/apache/mahout/drivers/RowSimilarityDriverSuite.scala
index eccddb1..e6f917c 100644
--- a/spark/src/test/scala/org/apache/mahout/drivers/RowSimilarityDriverSuite.scala
+++ b/spark/src/test/scala/org/apache/mahout/drivers/RowSimilarityDriverSuite.scala
@@ -1,139 +1,139 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.drivers
-
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FileSystem, Path}
-import org.apache.mahout.math.drm.RLikeDrmOps._
-import org.apache.mahout.math.drm._
-import org.apache.mahout.math.scalabindings.RLikeOps._
-import org.apache.mahout.math.scalabindings._
-import org.apache.mahout.sparkbindings._
-import org.apache.mahout.sparkbindings.test.DistributedSparkSuite
-import org.scalatest.{ConfigMap, FunSuite}
-
-
-class RowSimilarityDriverSuite extends FunSuite with DistributedSparkSuite  {
-
-  val TextDocs = Array(
-    "doc1\tNow is the time for all good people to come to aid of their party",
-    "doc2\tNow is the time for all good people to come to aid of their country",
-    "doc3\tNow is the time for all good people to come to aid of their hood",
-    "doc4\tNow is the time for all good people to come to aid of their friends",
-    "doc5\tNow is the time for all good people to come to aid of their looser brother",
-    "doc6\tThe quick brown fox jumped over the lazy dog",
-    "doc7\tThe quick brown fox jumped over the lazy boy",
-    "doc8\tThe quick brown fox jumped over the lazy cat",
-    "doc9\tThe quick brown fox jumped over the lazy wolverine",
-    "doc10\tThe quick brown fox jumped over the lazy cantelope")// yes that's spelled correctly.
-
-  test("RowSimilarityDriver text docs no strengths") {
-
-    val firstFiveSimDocsTokens = tokenize(Iterable(
-      "doc1\tdoc3 doc2 doc4 doc5"))
-
-    val lastFiveSimDocsTokens = tokenize(Iterable(
-      "doc6\tdoc8 doc10 doc7 doc9"))
-
-    val inDir = TmpDir + "in-dir/"
-    val inFilename = "in-file.tsv"
-    val inPath = inDir + inFilename
-
-    val outPath = TmpDir + "similarity-matrices/"
-
-
-    // this creates one part-0000 file in the directory
-    mahoutCtx.parallelize(TextDocs).coalesce(1, shuffle=true).saveAsTextFile(inDir)
-
-    // to change from using part files to a single .tsv file we'll need to use HDFS
-    val fs = FileSystem.get(new Configuration())
-    //rename part-00000 to something.tsv
-    fs.rename(new Path(inDir + "part-00000"), new Path(inPath))
-
-    // local multi-threaded Spark with default HDFS
-    RowSimilarityDriver.main(Array(
-      "--input", inPath,
-      "--output", outPath,
-      "--omitStrength",
-      "--maxSimilaritiesPerRow", "4", // would get all docs similar if we didn't limit them
-      "--master", masterUrl))
-
-    val simLines = mahoutCtx.textFile(outPath).collect
-    simLines.foreach { line =>
-      val lineTokens = line.split("[\t ]")
-      if (lineTokens.contains("doc1") ) // docs are two flavors so if only 4 similarities it will effectively classify
-        lineTokens should contain theSameElementsAs firstFiveSimDocsTokens
-      else
-        lineTokens should contain theSameElementsAs lastFiveSimDocsTokens
-    }
-
-  }
-
-  test("RowSimilarityDriver text docs") {
-
-    val simDocsTokens = tokenize(Iterable(
-      "doc1\tdoc3:27.87301122947484 doc2:27.87301122947484 doc4:27.87301122947484 doc5:23.42278065550721",
-      "doc2\tdoc4:27.87301122947484 doc3:27.87301122947484 doc1:27.87301122947484 doc5:23.42278065550721",
-      "doc3\tdoc4:27.87301122947484 doc2:27.87301122947484 doc1:27.87301122947484 doc5:23.42278065550721",
-      "doc4\tdoc3:27.87301122947484 doc2:27.87301122947484 doc1:27.87301122947484 doc5:23.42278065550721",
-      "doc5\tdoc4:23.42278065550721 doc2:23.42278065550721 doc3:23.42278065550721 doc1:23.42278065550721",
-      "doc6\tdoc8:22.936393049704463 doc10:22.936393049704463 doc7:22.936393049704463 doc9:22.936393049704463",
-      "doc7\tdoc6:22.936393049704463 doc8:22.936393049704463 doc10:22.936393049704463 doc9:22.936393049704463",
-      "doc8\tdoc6:22.936393049704463 doc10:22.936393049704463 doc7:22.936393049704463 doc9:22.936393049704463",
-      "doc9\tdoc6:22.936393049704463 doc8:22.936393049704463 doc10:22.936393049704463 doc7:22.936393049704463",
-      "doc10\tdoc6:22.936393049704463 doc8:22.936393049704463 doc7:22.936393049704463 doc9:22.936393049704463"))
-
-    val inDir = TmpDir + "in-dir/"
-    val inFilename = "in-file.tsv"
-    val inPath = inDir + inFilename
-
-    val outPath = TmpDir + "similarity-matrix/"
-
-
-    // this creates one part-0000 file in the directory
-    mahoutCtx.parallelize(TextDocs).coalesce(1, shuffle=true).saveAsTextFile(inDir)
-
-    // to change from using part files to a single .tsv file we'll need to use HDFS
-    val fs = FileSystem.get(new Configuration())
-    //rename part-00000 to something.tsv
-    fs.rename(new Path(inDir + "part-00000"), new Path(inPath))
-
-    // local multi-threaded Spark with default HDFS
-    RowSimilarityDriver.main(Array(
-      "--input", inPath,
-      "--output", outPath,
-      "--maxSimilaritiesPerRow", "4", // would get all docs similar if we didn't limit them
-      "--master", masterUrl))
-
-    val simLines = mahoutCtx.textFile(outPath).collect
-    tokenize(simLines) should contain theSameElementsAs simDocsTokens
-  }
-
-  // convert into an Iterable of tokens for 'should contain theSameElementsAs Iterable'
-  def tokenize(a: Iterable[String], splitString: String = "[\t ]"): Iterable[String] = {
-    var r: Iterable[String] = Iterable()
-    a.foreach ( l => r = r ++ l.split(splitString) )
-    r
-  }
-
-  override protected def beforeAll(configMap: ConfigMap) {
-    super.beforeAll(configMap)
-    RowSimilarityDriver.useContext(mahoutCtx)
-  }
-
-}
+///*
+// * Licensed to the Apache Software Foundation (ASF) under one or more
+// * contributor license agreements.  See the NOTICE file distributed with
+// * this work for additional information regarding copyright ownership.
+// * The ASF licenses this file to You under the Apache License, Version 2.0
+// * (the "License"); you may not use this file except in compliance with
+// * the License.  You may obtain a copy of the License at
+// *
+// *     http://www.apache.org/licenses/LICENSE-2.0
+// *
+// * Unless required by applicable law or agreed to in writing, software
+// * distributed under the License is distributed on an "AS IS" BASIS,
+// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// * See the License for the specific language governing permissions and
+// * limitations under the License.
+// */
+//
+//package org.apache.mahout.drivers
+//
+//import org.apache.hadoop.conf.Configuration
+//import org.apache.hadoop.fs.{FileSystem, Path}
+//import org.apache.mahout.math.drm.RLikeDrmOps._
+//import org.apache.mahout.math.drm._
+//import org.apache.mahout.math.scalabindings.RLikeOps._
+//import org.apache.mahout.math.scalabindings._
+//import org.apache.mahout.sparkbindings._
+//import org.apache.mahout.sparkbindings.test.DistributedSparkSuite
+//import org.scalatest.{ConfigMap, FunSuite}
+//
+//
+//class RowSimilarityDriverSuite extends FunSuite with DistributedSparkSuite  {
+//
+//  val TextDocs = Array(
+//    "doc1\tNow is the time for all good people to come to aid of their party",
+//    "doc2\tNow is the time for all good people to come to aid of their country",
+//    "doc3\tNow is the time for all good people to come to aid of their hood",
+//    "doc4\tNow is the time for all good people to come to aid of their friends",
+//    "doc5\tNow is the time for all good people to come to aid of their looser brother",
+//    "doc6\tThe quick brown fox jumped over the lazy dog",
+//    "doc7\tThe quick brown fox jumped over the lazy boy",
+//    "doc8\tThe quick brown fox jumped over the lazy cat",
+//    "doc9\tThe quick brown fox jumped over the lazy wolverine",
+//    "doc10\tThe quick brown fox jumped over the lazy cantelope")// yes that's spelled correctly.
+//
+//  test("RowSimilarityDriver text docs no strengths") {
+//
+//    val firstFiveSimDocsTokens = tokenize(Iterable(
+//      "doc1\tdoc3 doc2 doc4 doc5"))
+//
+//    val lastFiveSimDocsTokens = tokenize(Iterable(
+//      "doc6\tdoc8 doc10 doc7 doc9"))
+//
+//    val inDir = TmpDir + "in-dir/"
+//    val inFilename = "in-file.tsv"
+//    val inPath = inDir + inFilename
+//
+//    val outPath = TmpDir + "similarity-matrices/"
+//
+//
+//    // this creates one part-0000 file in the directory
+//    mahoutCtx.parallelize(TextDocs).coalesce(1, shuffle=true).saveAsTextFile(inDir)
+//
+//    // to change from using part files to a single .tsv file we'll need to use HDFS
+//    val fs = FileSystem.get(new Configuration())
+//    //rename part-00000 to something.tsv
+//    fs.rename(new Path(inDir + "part-00000"), new Path(inPath))
+//
+//    // local multi-threaded Spark with default HDFS
+//    RowSimilarityDriver.main(Array(
+//      "--input", inPath,
+//      "--output", outPath,
+//      "--omitStrength",
+//      "--maxSimilaritiesPerRow", "4", // would get all docs similar if we didn't limit them
+//      "--master", masterUrl))
+//
+//    val simLines = mahoutCtx.textFile(outPath).collect
+//    simLines.foreach { line =>
+//      val lineTokens = line.split("[\t ]")
+//      if (lineTokens.contains("doc1") ) // docs are two flavors so if only 4 similarities it will effectively classify
+//        lineTokens should contain theSameElementsAs firstFiveSimDocsTokens
+//      else
+//        lineTokens should contain theSameElementsAs lastFiveSimDocsTokens
+//    }
+//
+//  }
+//
+//  test("RowSimilarityDriver text docs") {
+//
+//    val simDocsTokens = tokenize(Iterable(
+//      "doc1\tdoc3:27.87301122947484 doc2:27.87301122947484 doc4:27.87301122947484 doc5:23.42278065550721",
+//      "doc2\tdoc4:27.87301122947484 doc3:27.87301122947484 doc1:27.87301122947484 doc5:23.42278065550721",
+//      "doc3\tdoc4:27.87301122947484 doc2:27.87301122947484 doc1:27.87301122947484 doc5:23.42278065550721",
+//      "doc4\tdoc3:27.87301122947484 doc2:27.87301122947484 doc1:27.87301122947484 doc5:23.42278065550721",
+//      "doc5\tdoc4:23.42278065550721 doc2:23.42278065550721 doc3:23.42278065550721 doc1:23.42278065550721",
+//      "doc6\tdoc8:22.936393049704463 doc10:22.936393049704463 doc7:22.936393049704463 doc9:22.936393049704463",
+//      "doc7\tdoc6:22.936393049704463 doc8:22.936393049704463 doc10:22.936393049704463 doc9:22.936393049704463",
+//      "doc8\tdoc6:22.936393049704463 doc10:22.936393049704463 doc7:22.936393049704463 doc9:22.936393049704463",
+//      "doc9\tdoc6:22.936393049704463 doc8:22.936393049704463 doc10:22.936393049704463 doc7:22.936393049704463",
+//      "doc10\tdoc6:22.936393049704463 doc8:22.936393049704463 doc7:22.936393049704463 doc9:22.936393049704463"))
+//
+//    val inDir = TmpDir + "in-dir/"
+//    val inFilename = "in-file.tsv"
+//    val inPath = inDir + inFilename
+//
+//    val outPath = TmpDir + "similarity-matrix/"
+//
+//
+//    // this creates one part-0000 file in the directory
+//    mahoutCtx.parallelize(TextDocs).coalesce(1, shuffle=true).saveAsTextFile(inDir)
+//
+//    // to change from using part files to a single .tsv file we'll need to use HDFS
+//    val fs = FileSystem.get(new Configuration())
+//    //rename part-00000 to something.tsv
+//    fs.rename(new Path(inDir + "part-00000"), new Path(inPath))
+//
+//    // local multi-threaded Spark with default HDFS
+//    RowSimilarityDriver.main(Array(
+//      "--input", inPath,
+//      "--output", outPath,
+//      "--maxSimilaritiesPerRow", "4", // would get all docs similar if we didn't limit them
+//      "--master", masterUrl))
+//
+//    val simLines = mahoutCtx.textFile(outPath).collect
+//    tokenize(simLines) should contain theSameElementsAs simDocsTokens
+//  }
+//
+//  // convert into an Iterable of tokens for 'should contain theSameElementsAs Iterable'
+//  def tokenize(a: Iterable[String], splitString: String = "[\t ]"): Iterable[String] = {
+//    var r: Iterable[String] = Iterable()
+//    a.foreach ( l => r = r ++ l.split(splitString) )
+//    r
+//  }
+//
+//  override protected def beforeAll(configMap: ConfigMap) {
+//    super.beforeAll(configMap)
+//    RowSimilarityDriver.useContext(mahoutCtx)
+//  }
+//
+//}

http://git-wip-us.apache.org/repos/asf/mahout/blob/034790cc/spark/src/test/scala/org/apache/mahout/drivers/TextDelimitedReaderWriterSuite.scala
----------------------------------------------------------------------
diff --git a/spark/src/test/scala/org/apache/mahout/drivers/TextDelimitedReaderWriterSuite.scala b/spark/src/test/scala/org/apache/mahout/drivers/TextDelimitedReaderWriterSuite.scala
index 5d92cca..8e56f1e 100644
--- a/spark/src/test/scala/org/apache/mahout/drivers/TextDelimitedReaderWriterSuite.scala
+++ b/spark/src/test/scala/org/apache/mahout/drivers/TextDelimitedReaderWriterSuite.scala
@@ -1,53 +1,53 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.drivers
-
-import org.apache.mahout.math.indexeddataset.DefaultIndexedDatasetReadSchema
-import org.apache.mahout.sparkbindings._
-import org.apache.mahout.sparkbindings.test.DistributedSparkSuite
-import org.scalatest.FunSuite
-
-import scala.collection.JavaConversions._
-
-class TextDelimitedReaderWriterSuite extends FunSuite with DistributedSparkSuite {
-  test("indexedDatasetDFSRead should read sparse matrix file with null rows") {
-    val OutFile = TmpDir + "similarity-matrices/part-00000"
-
-    val lines = Array(
-      "galaxy\tnexus:1.0",
-      "ipad\tiphone:2.0",
-      "nexus\tgalaxy:3.0",
-      "iphone\tipad:4.0",
-      "surface"
-    )
-    val linesRdd = mahoutCtx.parallelize(lines).saveAsTextFile(OutFile)
-
-    val data = mahoutCtx.indexedDatasetDFSRead(OutFile, DefaultIndexedDatasetReadSchema)
-
-    data.rowIDs.toMap.keySet should equal(Set("galaxy", "ipad", "nexus", "iphone", "surface"))
-    data.columnIDs.toMap.keySet should equal(Set("nexus", "iphone", "galaxy", "ipad"))
-
-    val a = data.matrix.collect
-    a.setRowLabelBindings(mapAsJavaMap(data.rowIDs.toMap).asInstanceOf[java.util.Map[java.lang.String, java.lang.Integer]])
-    a.setColumnLabelBindings(mapAsJavaMap(data.columnIDs.toMap).asInstanceOf[java.util.Map[java.lang.String, java.lang.Integer]])
-    a.get("galaxy", "nexus") should equal(1.0)
-    a.get("ipad", "iphone") should equal(2.0)
-    a.get("nexus", "galaxy") should equal(3.0)
-    a.get("iphone", "ipad") should equal(4.0)
-  }
-}
+///*
+// * Licensed to the Apache Software Foundation (ASF) under one or more
+// * contributor license agreements.  See the NOTICE file distributed with
+// * this work for additional information regarding copyright ownership.
+// * The ASF licenses this file to You under the Apache License, Version 2.0
+// * (the "License"); you may not use this file except in compliance with
+// * the License.  You may obtain a copy of the License at
+// *
+// *     http://www.apache.org/licenses/LICENSE-2.0
+// *
+// * Unless required by applicable law or agreed to in writing, software
+// * distributed under the License is distributed on an "AS IS" BASIS,
+// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// * See the License for the specific language governing permissions and
+// * limitations under the License.
+// */
+//
+//package org.apache.mahout.drivers
+//
+//import org.apache.mahout.math.indexeddataset.DefaultIndexedDatasetReadSchema
+//import org.apache.mahout.sparkbindings._
+//import org.apache.mahout.sparkbindings.test.DistributedSparkSuite
+//import org.scalatest.FunSuite
+//
+//import scala.collection.JavaConversions._
+//
+//class TextDelimitedReaderWriterSuite extends FunSuite with DistributedSparkSuite {
+//  test("indexedDatasetDFSRead should read sparse matrix file with null rows") {
+//    val OutFile = TmpDir + "similarity-matrices/part-00000"
+//
+//    val lines = Array(
+//      "galaxy\tnexus:1.0",
+//      "ipad\tiphone:2.0",
+//      "nexus\tgalaxy:3.0",
+//      "iphone\tipad:4.0",
+//      "surface"
+//    )
+//    val linesRdd = mahoutCtx.parallelize(lines).saveAsTextFile(OutFile)
+//
+//    val data = mahoutCtx.indexedDatasetDFSRead(OutFile, DefaultIndexedDatasetReadSchema)
+//
+//    data.rowIDs.toMap.keySet should equal(Set("galaxy", "ipad", "nexus", "iphone", "surface"))
+//    data.columnIDs.toMap.keySet should equal(Set("nexus", "iphone", "galaxy", "ipad"))
+//
+//    val a = data.matrix.collect
+//    a.setRowLabelBindings(mapAsJavaMap(data.rowIDs.toMap).asInstanceOf[java.util.Map[java.lang.String, java.lang.Integer]])
+//    a.setColumnLabelBindings(mapAsJavaMap(data.columnIDs.toMap).asInstanceOf[java.util.Map[java.lang.String, java.lang.Integer]])
+//    a.get("galaxy", "nexus") should equal(1.0)
+//    a.get("ipad", "iphone") should equal(2.0)
+//    a.get("nexus", "galaxy") should equal(3.0)
+//    a.get("iphone", "ipad") should equal(4.0)
+//  }
+//}

http://git-wip-us.apache.org/repos/asf/mahout/blob/034790cc/spark/src/test/scala/org/apache/mahout/sparkbindings/SparkBindingsSuite.scala
----------------------------------------------------------------------
diff --git a/spark/src/test/scala/org/apache/mahout/sparkbindings/SparkBindingsSuite.scala b/spark/src/test/scala/org/apache/mahout/sparkbindings/SparkBindingsSuite.scala
index 61244a1..dece685 100644
--- a/spark/src/test/scala/org/apache/mahout/sparkbindings/SparkBindingsSuite.scala
+++ b/spark/src/test/scala/org/apache/mahout/sparkbindings/SparkBindingsSuite.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.mahout.sparkbindings
 
 import java.io.{Closeable, File}
@@ -8,9 +25,7 @@ import org.scalatest.FunSuite
 
 import scala.collection._
 
-/**
- * @author dmitriy
- */
+
 class SparkBindingsSuite extends FunSuite with DistributedSparkSuite {
 
   // This test will succeed only when MAHOUT_HOME is set in the environment. So we keep it for
@@ -26,7 +41,8 @@ class SparkBindingsSuite extends FunSuite with DistributedSparkSuite {
       }
 
       mahoutJars.size should be > 0
-      mahoutJars.size shouldBe 4
+      // this will depend on the viennacl profile.
+      // mahoutJars.size shouldBe 4
     } finally {
       IOUtilsScala.close(closeables)
     }

http://git-wip-us.apache.org/repos/asf/mahout/blob/034790cc/spark/src/test/scala/org/apache/mahout/sparkbindings/test/DistributedSparkSuite.scala
----------------------------------------------------------------------
diff --git a/spark/src/test/scala/org/apache/mahout/sparkbindings/test/DistributedSparkSuite.scala b/spark/src/test/scala/org/apache/mahout/sparkbindings/test/DistributedSparkSuite.scala
index 4c75e75..48d84f8 100644
--- a/spark/src/test/scala/org/apache/mahout/sparkbindings/test/DistributedSparkSuite.scala
+++ b/spark/src/test/scala/org/apache/mahout/sparkbindings/test/DistributedSparkSuite.scala
@@ -33,7 +33,7 @@ trait DistributedSparkSuite extends DistributedMahoutSuite with LoggerConfigurat
   protected var masterUrl = null.asInstanceOf[String]
 
   protected def initContext() {
-    masterUrl = System.getProperties.getOrElse("test.spark.master", "local[3]")
+    masterUrl = System.getProperties.getOrElse("test.spark.master", "local[1]")
     val isLocal = masterUrl.startsWith("local")
     mahoutCtx = mahoutSparkContext(masterUrl = this.masterUrl,
       appName = "MahoutUnitTests",