You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by jk...@apache.org on 2015/07/01 05:19:53 UTC

spark git commit: [SPARK-8471] [ML] Rename DiscreteCosineTransformer to DCT

Repository: spark
Updated Branches:
  refs/heads/master 3bee0f146 -> f45756988


[SPARK-8471] [ML] Rename DiscreteCosineTransformer to DCT

Rename DiscreteCosineTransformer and related classes to DCT.

Author: Feynman Liang <fl...@databricks.com>

Closes #7138 from feynmanliang/dct-features and squashes the following commits:

e547b3e [Feynman Liang] Fix renaming bug
9d5c9e4 [Feynman Liang] Lowercase JavaDCTSuite variable
f9a8958 [Feynman Liang] Remove old files
f8fe794 [Feynman Liang] Merge branch 'master' into dct-features
894d0b2 [Feynman Liang] Rename DiscreteCosineTransformer to DCT
433dbc7 [Feynman Liang] Test refactoring
91e9636 [Feynman Liang] Style guide and test helper refactor
b5ac19c [Feynman Liang] Use Vector types, add Java test
530983a [Feynman Liang] Tests for other numeric datatypes
195d7aa [Feynman Liang] Implement support for arbitrary numeric types
95d4939 [Feynman Liang] Working DCT for 1D Doubles


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f4575698
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f4575698
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f4575698

Branch: refs/heads/master
Commit: f457569886e9de9256ad269cb4a3d73a8918766d
Parents: 3bee0f1
Author: Feynman Liang <fl...@databricks.com>
Authored: Tue Jun 30 20:19:43 2015 -0700
Committer: Joseph K. Bradley <jo...@databricks.com>
Committed: Tue Jun 30 20:19:43 2015 -0700

----------------------------------------------------------------------
 .../scala/org/apache/spark/ml/feature/DCT.scala | 72 ++++++++++++++++++
 .../ml/feature/DiscreteCosineTransformer.scala  | 72 ------------------
 .../apache/spark/ml/feature/JavaDCTSuite.java   | 78 ++++++++++++++++++++
 .../JavaDiscreteCosineTransformerSuite.java     | 78 --------------------
 .../org/apache/spark/ml/feature/DCTSuite.scala  | 73 ++++++++++++++++++
 .../DiscreteCosineTransformerSuite.scala        | 73 ------------------
 6 files changed, 223 insertions(+), 223 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/f4575698/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
new file mode 100644
index 0000000..2283476
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature
+
+import edu.emory.mathcs.jtransforms.dct._
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.ml.UnaryTransformer
+import org.apache.spark.ml.param.BooleanParam
+import org.apache.spark.ml.util.Identifiable
+import org.apache.spark.mllib.linalg.{Vector, VectorUDT, Vectors}
+import org.apache.spark.sql.types.DataType
+
+/**
+ * :: Experimental ::
+ * A feature transformer that takes the 1D discrete cosine transform of a real vector. No zero
+ * padding is performed on the input vector.
+ * It returns a real vector of the same length representing the DCT. The return vector is scaled
+ * such that the transform matrix is unitary (aka scaled DCT-II).
+ *
+ * More information on [[https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II Wikipedia]].
+ */
+@Experimental
+class DCT(override val uid: String)
+  extends UnaryTransformer[Vector, Vector, DCT] {
+
+  def this() = this(Identifiable.randomUID("dct"))
+
+  /**
+   * Indicates whether to perform the inverse DCT (true) or forward DCT (false).
+   * Default: false
+   * @group param
+   */
+  def inverse: BooleanParam = new BooleanParam(
+    this, "inverse", "Set transformer to perform inverse DCT")
+
+  /** @group setParam */
+  def setInverse(value: Boolean): this.type = set(inverse, value)
+
+  /** @group getParam */
+  def getInverse: Boolean = $(inverse)
+
+  setDefault(inverse -> false)
+
+  override protected def createTransformFunc: Vector => Vector = { vec =>
+    val result = vec.toArray
+    val jTransformer = new DoubleDCT_1D(result.length)
+    if ($(inverse)) jTransformer.inverse(result, true) else jTransformer.forward(result, true)
+    Vectors.dense(result)
+  }
+
+  override protected def validateInputType(inputType: DataType): Unit = {
+    require(inputType.isInstanceOf[VectorUDT], s"Input type must be VectorUDT but got $inputType.")
+  }
+
+  override protected def outputDataType: DataType = new VectorUDT
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/f4575698/mllib/src/main/scala/org/apache/spark/ml/feature/DiscreteCosineTransformer.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/DiscreteCosineTransformer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/DiscreteCosineTransformer.scala
deleted file mode 100644
index a2f4d59..0000000
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/DiscreteCosineTransformer.scala
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.ml.feature
-
-import edu.emory.mathcs.jtransforms.dct._
-
-import org.apache.spark.annotation.Experimental
-import org.apache.spark.ml.UnaryTransformer
-import org.apache.spark.ml.param.BooleanParam
-import org.apache.spark.ml.util.Identifiable
-import org.apache.spark.mllib.linalg.{Vector, VectorUDT, Vectors}
-import org.apache.spark.sql.types.DataType
-
-/**
- * :: Experimental ::
- * A feature transformer that takes the 1D discrete cosine transform of a real vector. No zero
- * padding is performed on the input vector.
- * It returns a real vector of the same length representing the DCT. The return vector is scaled
- * such that the transform matrix is unitary (aka scaled DCT-II).
- *
- * More information on [[https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II Wikipedia]].
- */
-@Experimental
-class DiscreteCosineTransformer(override val uid: String)
-  extends UnaryTransformer[Vector, Vector, DiscreteCosineTransformer] {
-
-  def this() = this(Identifiable.randomUID("dct"))
-
-  /**
-   * Indicates whether to perform the inverse DCT (true) or forward DCT (false).
-   * Default: false
-   * @group param
-   */
-  def inverse: BooleanParam = new BooleanParam(
-    this, "inverse", "Set transformer to perform inverse DCT")
-
-  /** @group setParam */
-  def setInverse(value: Boolean): this.type = set(inverse, value)
-
-  /** @group getParam */
-  def getInverse: Boolean = $(inverse)
-
-  setDefault(inverse -> false)
-
-  override protected def createTransformFunc: Vector => Vector = { vec =>
-    val result = vec.toArray
-    val jTransformer = new DoubleDCT_1D(result.length)
-    if ($(inverse)) jTransformer.inverse(result, true) else jTransformer.forward(result, true)
-    Vectors.dense(result)
-  }
-
-  override protected def validateInputType(inputType: DataType): Unit = {
-    require(inputType.isInstanceOf[VectorUDT], s"Input type must be VectorUDT but got $inputType.")
-  }
-
-  override protected def outputDataType: DataType = new VectorUDT
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/f4575698/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java
----------------------------------------------------------------------
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java
new file mode 100644
index 0000000..845eed6
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature;
+
+import com.google.common.collect.Lists;
+import edu.emory.mathcs.jtransforms.dct.DoubleDCT_1D;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.VectorUDT;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+public class JavaDCTSuite {
+  private transient JavaSparkContext jsc;
+  private transient SQLContext jsql;
+
+  @Before
+  public void setUp() {
+    jsc = new JavaSparkContext("local", "JavaDCTSuite");
+    jsql = new SQLContext(jsc);
+  }
+
+  @After
+  public void tearDown() {
+    jsc.stop();
+    jsc = null;
+  }
+
+  @Test
+  public void javaCompatibilityTest() {
+    double[] input = new double[] {1D, 2D, 3D, 4D};
+    JavaRDD<Row> data = jsc.parallelize(Lists.newArrayList(
+      RowFactory.create(Vectors.dense(input))
+    ));
+    DataFrame dataset = jsql.createDataFrame(data, new StructType(new StructField[]{
+      new StructField("vec", (new VectorUDT()), false, Metadata.empty())
+    }));
+
+    double[] expectedResult = input.clone();
+    (new DoubleDCT_1D(input.length)).forward(expectedResult, true);
+
+    DCT dct = new DCT()
+      .setInputCol("vec")
+      .setOutputCol("resultVec");
+
+    Row[] result = dct.transform(dataset).select("resultVec").collect();
+    Vector resultVec = result[0].getAs("resultVec");
+
+    Assert.assertArrayEquals(expectedResult, resultVec.toArray(), 1e-6);
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/f4575698/mllib/src/test/java/org/apache/spark/ml/feature/JavaDiscreteCosineTransformerSuite.java
----------------------------------------------------------------------
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaDiscreteCosineTransformerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaDiscreteCosineTransformerSuite.java
deleted file mode 100644
index 28bc5f6..0000000
--- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaDiscreteCosineTransformerSuite.java
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.ml.feature;
-
-import com.google.common.collect.Lists;
-import edu.emory.mathcs.jtransforms.dct.DoubleDCT_1D;
-import org.junit.After;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.mllib.linalg.VectorUDT;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.sql.DataFrame;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.RowFactory;
-import org.apache.spark.sql.SQLContext;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
-
-public class JavaDiscreteCosineTransformerSuite {
-  private transient JavaSparkContext jsc;
-  private transient SQLContext jsql;
-
-  @Before
-  public void setUp() {
-    jsc = new JavaSparkContext("local", "JavaDiscreteCosineTransformerSuite");
-    jsql = new SQLContext(jsc);
-  }
-
-  @After
-  public void tearDown() {
-    jsc.stop();
-    jsc = null;
-  }
-
-  @Test
-  public void javaCompatibilityTest() {
-    double[] input = new double[] {1D, 2D, 3D, 4D};
-    JavaRDD<Row> data = jsc.parallelize(Lists.newArrayList(
-      RowFactory.create(Vectors.dense(input))
-    ));
-    DataFrame dataset = jsql.createDataFrame(data, new StructType(new StructField[]{
-      new StructField("vec", (new VectorUDT()), false, Metadata.empty())
-    }));
-
-    double[] expectedResult = input.clone();
-    (new DoubleDCT_1D(input.length)).forward(expectedResult, true);
-
-    DiscreteCosineTransformer DCT = new DiscreteCosineTransformer()
-      .setInputCol("vec")
-      .setOutputCol("resultVec");
-
-    Row[] result = DCT.transform(dataset).select("resultVec").collect();
-    Vector resultVec = result[0].getAs("resultVec");
-
-    Assert.assertArrayEquals(expectedResult, resultVec.toArray(), 1e-6);
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/f4575698/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala
new file mode 100644
index 0000000..37ed236
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature
+
+import scala.beans.BeanInfo
+
+import edu.emory.mathcs.jtransforms.dct.DoubleDCT_1D
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.mllib.linalg.{Vector, Vectors}
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.sql.{DataFrame, Row}
+
+@BeanInfo
+case class DCTTestData(vec: Vector, wantedVec: Vector)
+
+class DCTSuite extends SparkFunSuite with MLlibTestSparkContext {
+
+  test("forward transform of discrete cosine matches jTransforms result") {
+    val data = Vectors.dense((0 until 128).map(_ => 2D * math.random - 1D).toArray)
+    val inverse = false
+
+    testDCT(data, inverse)
+  }
+
+  test("inverse transform of discrete cosine matches jTransforms result") {
+    val data = Vectors.dense((0 until 128).map(_ => 2D * math.random - 1D).toArray)
+    val inverse = true
+
+    testDCT(data, inverse)
+  }
+
+  private def testDCT(data: Vector, inverse: Boolean): Unit = {
+    val expectedResultBuffer = data.toArray.clone()
+    if (inverse) {
+      (new DoubleDCT_1D(data.size)).inverse(expectedResultBuffer, true)
+    } else {
+      (new DoubleDCT_1D(data.size)).forward(expectedResultBuffer, true)
+    }
+    val expectedResult = Vectors.dense(expectedResultBuffer)
+
+    val dataset = sqlContext.createDataFrame(Seq(
+      DCTTestData(data, expectedResult)
+    ))
+
+    val transformer = new DCT()
+      .setInputCol("vec")
+      .setOutputCol("resultVec")
+      .setInverse(inverse)
+
+    transformer.transform(dataset)
+      .select("resultVec", "wantedVec")
+      .collect()
+      .foreach { case Row(resultVec: Vector, wantedVec: Vector) =>
+      assert(Vectors.sqdist(resultVec, wantedVec) < 1e-6)
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/f4575698/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala
deleted file mode 100644
index ed0fc11..0000000
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/DiscreteCosineTransformerSuite.scala
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.ml.feature
-
-import scala.beans.BeanInfo
-
-import edu.emory.mathcs.jtransforms.dct.DoubleDCT_1D
-
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.mllib.linalg.{Vector, Vectors}
-import org.apache.spark.mllib.util.MLlibTestSparkContext
-import org.apache.spark.sql.{DataFrame, Row}
-
-@BeanInfo
-case class DCTTestData(vec: Vector, wantedVec: Vector)
-
-class DiscreteCosineTransformerSuite extends SparkFunSuite with MLlibTestSparkContext {
-
-  test("forward transform of discrete cosine matches jTransforms result") {
-    val data = Vectors.dense((0 until 128).map(_ => 2D * math.random - 1D).toArray)
-    val inverse = false
-
-    testDCT(data, inverse)
-  }
-
-  test("inverse transform of discrete cosine matches jTransforms result") {
-    val data = Vectors.dense((0 until 128).map(_ => 2D * math.random - 1D).toArray)
-    val inverse = true
-
-    testDCT(data, inverse)
-  }
-
-  private def testDCT(data: Vector, inverse: Boolean): Unit = {
-    val expectedResultBuffer = data.toArray.clone()
-    if (inverse) {
-      (new DoubleDCT_1D(data.size)).inverse(expectedResultBuffer, true)
-    } else {
-      (new DoubleDCT_1D(data.size)).forward(expectedResultBuffer, true)
-    }
-    val expectedResult = Vectors.dense(expectedResultBuffer)
-
-    val dataset = sqlContext.createDataFrame(Seq(
-      DCTTestData(data, expectedResult)
-    ))
-
-    val transformer = new DiscreteCosineTransformer()
-      .setInputCol("vec")
-      .setOutputCol("resultVec")
-      .setInverse(inverse)
-
-    transformer.transform(dataset)
-      .select("resultVec", "wantedVec")
-      .collect()
-      .foreach { case Row(resultVec: Vector, wantedVec: Vector) =>
-      assert(Vectors.sqdist(resultVec, wantedVec) < 1e-6)
-    }
-  }
-}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org