You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2015/01/14 09:39:02 UTC
spark git commit: [SPARK-5167][SQL] Move Row into sql package and make it usable for Java.

Repository: spark
Updated Branches:
  refs/heads/master f9969098c -> d5eeb3516


[SPARK-5167][SQL] Move Row into sql package and make it usable for Java.

Mostly just moving stuff around. This should still be source compatible since we type aliased Row previously in org.apache.spark.sql.Row.

Added the following APIs to Row:
```scala
def getMap[K, V](i: Int): scala.collection.Map[K, V]
def getJavaMap[K, V](i: Int): java.util.Map[K, V]
def getSeq[T](i: Int): Seq[T]
def getList[T](i: Int): java.util.List[T]
def getStruct(i: Int): StructType
```

Author: Reynold Xin <rx...@databricks.com>

Closes #4030 from rxin/sql-row and squashes the following commits:

6c85c29 [Reynold Xin] Fixed style violation by adding a new line to Row.scala.
82b064a [Reynold Xin] [SPARK-5167][SQL] Move Row into sql package and make it usable for Java.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d5eeb351
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d5eeb351
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d5eeb351

Branch: refs/heads/master
Commit: d5eeb35167e1ab72fab7778757163ff0aacaef2c
Parents: f996909
Author: Reynold Xin <rx...@databricks.com>
Authored: Wed Jan 14 00:38:55 2015 -0800
Committer: Reynold Xin <rx...@databricks.com>
Committed: Wed Jan 14 00:38:55 2015 -0800

----------------------------------------------------------------------
 .../java/org/apache/spark/sql/RowFactory.java   |  34 +++
 .../main/scala/org/apache/spark/sql/Row.scala   | 240 ++++++++++++++++
 .../spark/sql/catalyst/expressions/Row.scala    | 272 -------------------
 .../sql/catalyst/expressions/package.scala      |   4 +
 .../spark/sql/catalyst/expressions/rows.scala   | 207 ++++++++++++++
 .../org/apache/spark/sql/types/dataTypes.scala  |   2 +-
 .../org/apache/spark/sql/api/java/Row.scala     |   2 +-
 .../scala/org/apache/spark/sql/package.scala    |  83 ------
 8 files changed, 487 insertions(+), 357 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/d5eeb351/sql/catalyst/src/main/java/org/apache/spark/sql/RowFactory.java
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/RowFactory.java b/sql/catalyst/src/main/java/org/apache/spark/sql/RowFactory.java
new file mode 100644
index 0000000..62fcec8
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/RowFactory.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql;
+
+import org.apache.spark.sql.catalyst.expressions.GenericRow;
+
+/**
+ * A factory class used to construct {@link Row} objects.
+ */
+public class RowFactory {
+
+  /**
+   * Create a {@link Row} from an array of values. Position i in the array becomes position i
+   * in the created {@link Row} object.
+   */
+  public static Row create(Object[] values) {
+    return new GenericRow(values);
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/d5eeb351/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
new file mode 100644
index 0000000..d7a4e01
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
@@ -0,0 +1,240 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.catalyst.expressions.GenericRow
+
+
+object Row {
+  /**
+   * This method can be used to extract fields from a [[Row]] object in a pattern match. Example:
+   * {{{
+   * import org.apache.spark.sql._
+   *
+   * val pairs = sql("SELECT key, value FROM src").rdd.map {
+   *   case Row(key: Int, value: String) =>
+   *     key -> value
+   * }
+   * }}}
+   */
+  def unapplySeq(row: Row): Some[Seq[Any]] = Some(row)
+
+  /**
+   * This method can be used to construct a [[Row]] with the given values.
+   */
+  def apply(values: Any*): Row = new GenericRow(values.toArray)
+
+  /**
+   * This method can be used to construct a [[Row]] from a [[Seq]] of values.
+   */
+  def fromSeq(values: Seq[Any]): Row = new GenericRow(values.toArray)
+}
+
+
+/**
+ * Represents one row of output from a relational operator.  Allows both generic access by ordinal,
+ * which will incur boxing overhead for primitives, as well as native primitive access.
+ *
+ * It is invalid to use the native primitive interface to retrieve a value that is null, instead a
+ * user must check `isNullAt` before attempting to retrieve a value that might be null.
+ *
+ * To create a new Row, use [[RowFactory.create()]] in Java or [[Row.apply()]] in Scala.
+ *
+ * A [[Row]] object can be constructed by providing field values. Example:
+ * {{{
+ * import org.apache.spark.sql._
+ *
+ * // Create a Row from values.
+ * Row(value1, value2, value3, ...)
+ * // Create a Row from a Seq of values.
+ * Row.fromSeq(Seq(value1, value2, ...))
+ * }}}
+ *
+ * A value of a row can be accessed through both generic access by ordinal,
+ * which will incur boxing overhead for primitives, as well as native primitive access.
+ * An example of generic access by ordinal:
+ * {{{
+ * import org.apache.spark.sql._
+ *
+ * val row = Row(1, true, "a string", null)
+ * // row: Row = [1,true,a string,null]
+ * val firstValue = row(0)
+ * // firstValue: Any = 1
+ * val fourthValue = row(3)
+ * // fourthValue: Any = null
+ * }}}
+ *
+ * For native primitive access, it is invalid to use the native primitive interface to retrieve
+ * a value that is null, instead a user must check `isNullAt` before attempting to retrieve a
+ * value that might be null.
+ * An example of native primitive access:
+ * {{{
+ * // using the row from the previous example.
+ * val firstValue = row.getInt(0)
+ * // firstValue: Int = 1
+ * val isNull = row.isNullAt(3)
+ * // isNull: Boolean = true
+ * }}}
+ *
+ * Interfaces related to native primitive access are:
+ *
+ * `isNullAt(i: Int): Boolean`
+ *
+ * `getInt(i: Int): Int`
+ *
+ * `getLong(i: Int): Long`
+ *
+ * `getDouble(i: Int): Double`
+ *
+ * `getFloat(i: Int): Float`
+ *
+ * `getBoolean(i: Int): Boolean`
+ *
+ * `getShort(i: Int): Short`
+ *
+ * `getByte(i: Int): Byte`
+ *
+ * `getString(i: Int): String`
+ *
+ * In Scala, fields in a [[Row]] object can be extracted in a pattern match. Example:
+ * {{{
+ * import org.apache.spark.sql._
+ *
+ * val pairs = sql("SELECT key, value FROM src").rdd.map {
+ *   case Row(key: Int, value: String) =>
+ *     key -> value
+ * }
+ * }}}
+ *
+ * @group row
+ */
+trait Row extends Seq[Any] with Serializable {
+  def apply(i: Int): Any
+
+  /** Returns the value at position i. If the value is null, null is returned. */
+  def get(i: Int): Any = apply(i)
+
+  /** Checks whether the value at position i is null. */
+  def isNullAt(i: Int): Boolean
+
+  /**
+   * Returns the value at position i as a primitive int.
+   * Throws an exception if the type mismatches or if the value is null.
+   */
+  def getInt(i: Int): Int
+
+  /**
+   * Returns the value at position i as a primitive long.
+   * Throws an exception if the type mismatches or if the value is null.
+   */
+  def getLong(i: Int): Long
+
+  /**
+   * Returns the value at position i as a primitive double.
+   * Throws an exception if the type mismatches or if the value is null.
+   */
+  def getDouble(i: Int): Double
+
+  /**
+   * Returns the value at position i as a primitive float.
+   * Throws an exception if the type mismatches or if the value is null.
+   */
+  def getFloat(i: Int): Float
+
+  /**
+   * Returns the value at position i as a primitive boolean.
+   * Throws an exception if the type mismatches or if the value is null.
+   */
+  def getBoolean(i: Int): Boolean
+
+  /**
+   * Returns the value at position i as a primitive short.
+   * Throws an exception if the type mismatches or if the value is null.
+   */
+  def getShort(i: Int): Short
+
+  /**
+   * Returns the value at position i as a primitive byte.
+   * Throws an exception if the type mismatches or if the value is null.
+   */
+  def getByte(i: Int): Byte
+
+  /**
+   * Returns the value at position i as a String object.
+   * Throws an exception if the type mismatches or if the value is null.
+   */
+  def getString(i: Int): String
+
+  /**
+   * Return the value at position i of array type as a Scala Seq.
+   * Throws an exception if the type mismatches.
+   */
+  def getSeq[T](i: Int): Seq[T] = apply(i).asInstanceOf[Seq[T]]
+
+  /**
+   * Return the value at position i of array type as [[java.util.List]].
+   * Throws an exception if the type mismatches.
+   */
+  def getList[T](i: Int): java.util.List[T] = {
+    scala.collection.JavaConversions.seqAsJavaList(getSeq[T](i))
+  }
+
+  /**
+   * Return the value at position i of map type as a Scala Map.
+   * Throws an exception if the type mismatches.
+   */
+  def getMap[K, V](i: Int): scala.collection.Map[K, V] = apply(i).asInstanceOf[Map[K, V]]
+
+  /**
+   * Return the value at position i of array type as a [[java.util.Map]].
+   * Throws an exception if the type mismatches.
+   */
+  def getJavaMap[K, V](i: Int): java.util.Map[K, V] = {
+    scala.collection.JavaConversions.mapAsJavaMap(getMap[K, V](i))
+  }
+
+  /**
+   * Return the value at position i of struct type as an [[Row]] object.
+   * Throws an exception if the type mismatches.
+   */
+  def getStruct(i: Int): Row = getAs[Row](i)
+
+  /**
+   * Returns the value at position i.
+   * Throws an exception if the type mismatches.
+   */
+  def getAs[T](i: Int): T = apply(i).asInstanceOf[T]
+
+  override def toString(): String = s"[${this.mkString(",")}]"
+
+  /**
+   * Make a copy of the current [[Row]] object.
+   */
+  def copy(): Row
+
+  /** Returns true if there are any NULL values in this row. */
+  def anyNull: Boolean = {
+    val l = length
+    var i = 0
+    while (i < l) {
+      if (isNullAt(i)) { return true }
+      i += 1
+    }
+    false
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/d5eeb351/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Row.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Row.scala
deleted file mode 100644
index dcda53b..0000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Row.scala
+++ /dev/null
@@ -1,272 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.expressions
-
-import org.apache.spark.sql.types.NativeType
-
-object Row {
-  /**
-   * This method can be used to extract fields from a [[Row]] object in a pattern match. Example:
-   * {{{
-   * import org.apache.spark.sql._
-   *
-   * val pairs = sql("SELECT key, value FROM src").rdd.map {
-   *   case Row(key: Int, value: String) =>
-   *     key -> value
-   * }
-   * }}}
-   */
-  def unapplySeq(row: Row): Some[Seq[Any]] = Some(row)
-
-  /**
-   * This method can be used to construct a [[Row]] with the given values.
-   */
-  def apply(values: Any*): Row = new GenericRow(values.toArray)
-
-  /**
-   * This method can be used to construct a [[Row]] from a [[Seq]] of values.
-   */
-  def fromSeq(values: Seq[Any]): Row = new GenericRow(values.toArray)
-}
-
-/**
- * Represents one row of output from a relational operator.  Allows both generic access by ordinal,
- * which will incur boxing overhead for primitives, as well as native primitive access.
- *
- * It is invalid to use the native primitive interface to retrieve a value that is null, instead a
- * user must check [[isNullAt]] before attempting to retrieve a value that might be null.
- */
-trait Row extends Seq[Any] with Serializable {
-  def apply(i: Int): Any
-
-  def isNullAt(i: Int): Boolean
-
-  def getInt(i: Int): Int
-  def getLong(i: Int): Long
-  def getDouble(i: Int): Double
-  def getFloat(i: Int): Float
-  def getBoolean(i: Int): Boolean
-  def getShort(i: Int): Short
-  def getByte(i: Int): Byte
-  def getString(i: Int): String
-  def getAs[T](i: Int): T = apply(i).asInstanceOf[T]
-
-  override def toString() =
-    s"[${this.mkString(",")}]"
-
-  def copy(): Row
-
-  /** Returns true if there are any NULL values in this row. */
-  def anyNull: Boolean = {
-    var i = 0
-    while (i < length) {
-      if (isNullAt(i)) { return true }
-      i += 1
-    }
-    false
-  }
-}
-
-/**
- * An extended interface to [[Row]] that allows the values for each column to be updated.  Setting
- * a value through a primitive function implicitly marks that column as not null.
- */
-trait MutableRow extends Row {
-  def setNullAt(i: Int): Unit
-
-  def update(ordinal: Int, value: Any)
-
-  def setInt(ordinal: Int, value: Int)
-  def setLong(ordinal: Int, value: Long)
-  def setDouble(ordinal: Int, value: Double)
-  def setBoolean(ordinal: Int, value: Boolean)
-  def setShort(ordinal: Int, value: Short)
-  def setByte(ordinal: Int, value: Byte)
-  def setFloat(ordinal: Int, value: Float)
-  def setString(ordinal: Int, value: String)
-}
-
-/**
- * A row with no data.  Calling any methods will result in an error.  Can be used as a placeholder.
- */
-object EmptyRow extends Row {
-  def apply(i: Int): Any = throw new UnsupportedOperationException
-
-  def iterator = Iterator.empty
-  def length = 0
-  def isNullAt(i: Int): Boolean = throw new UnsupportedOperationException
-
-  def getInt(i: Int): Int = throw new UnsupportedOperationException
-  def getLong(i: Int): Long = throw new UnsupportedOperationException
-  def getDouble(i: Int): Double = throw new UnsupportedOperationException
-  def getFloat(i: Int): Float = throw new UnsupportedOperationException
-  def getBoolean(i: Int): Boolean = throw new UnsupportedOperationException
-  def getShort(i: Int): Short = throw new UnsupportedOperationException
-  def getByte(i: Int): Byte = throw new UnsupportedOperationException
-  def getString(i: Int): String = throw new UnsupportedOperationException
-  override def getAs[T](i: Int): T = throw new UnsupportedOperationException
-
-  def copy() = this
-}
-
-/**
- * A row implementation that uses an array of objects as the underlying storage.  Note that, while
- * the array is not copied, and thus could technically be mutated after creation, this is not
- * allowed.
- */
-class GenericRow(protected[sql] val values: Array[Any]) extends Row {
-  /** No-arg constructor for serialization. */
-  def this() = this(null)
-
-  def this(size: Int) = this(new Array[Any](size))
-
-  def iterator = values.iterator
-
-  def length = values.length
-
-  def apply(i: Int) = values(i)
-
-  def isNullAt(i: Int) = values(i) == null
-
-  def getInt(i: Int): Int = {
-    if (values(i) == null) sys.error("Failed to check null bit for primitive int value.")
-    values(i).asInstanceOf[Int]
-  }
-
-  def getLong(i: Int): Long = {
-    if (values(i) == null) sys.error("Failed to check null bit for primitive long value.")
-    values(i).asInstanceOf[Long]
-  }
-
-  def getDouble(i: Int): Double = {
-    if (values(i) == null) sys.error("Failed to check null bit for primitive double value.")
-    values(i).asInstanceOf[Double]
-  }
-
-  def getFloat(i: Int): Float = {
-    if (values(i) == null) sys.error("Failed to check null bit for primitive float value.")
-    values(i).asInstanceOf[Float]
-  }
-
-  def getBoolean(i: Int): Boolean = {
-    if (values(i) == null) sys.error("Failed to check null bit for primitive boolean value.")
-    values(i).asInstanceOf[Boolean]
-  }
-
-  def getShort(i: Int): Short = {
-    if (values(i) == null) sys.error("Failed to check null bit for primitive short value.")
-    values(i).asInstanceOf[Short]
-  }
-
-  def getByte(i: Int): Byte = {
-    if (values(i) == null) sys.error("Failed to check null bit for primitive byte value.")
-    values(i).asInstanceOf[Byte]
-  }
-
-  def getString(i: Int): String = {
-    if (values(i) == null) sys.error("Failed to check null bit for primitive String value.")
-    values(i).asInstanceOf[String]
-  }
-
-  // Custom hashCode function that matches the efficient code generated version.
-  override def hashCode(): Int = {
-    var result: Int = 37
-
-    var i = 0
-    while (i < values.length) {
-      val update: Int =
-        if (isNullAt(i)) {
-          0
-        } else {
-          apply(i) match {
-            case b: Boolean => if (b) 0 else 1
-            case b: Byte => b.toInt
-            case s: Short => s.toInt
-            case i: Int => i
-            case l: Long => (l ^ (l >>> 32)).toInt
-            case f: Float => java.lang.Float.floatToIntBits(f)
-            case d: Double =>
-              val b = java.lang.Double.doubleToLongBits(d)
-              (b ^ (b >>> 32)).toInt
-            case other => other.hashCode()
-          }
-        }
-      result = 37 * result + update
-      i += 1
-    }
-    result
-  }
-
-  def copy() = this
-}
-
-class GenericMutableRow(v: Array[Any]) extends GenericRow(v) with MutableRow {
-  /** No-arg constructor for serialization. */
-  def this() = this(null)
-
-  def this(size: Int) = this(new Array[Any](size))
-
-  override def setBoolean(ordinal: Int, value: Boolean): Unit = { values(ordinal) = value }
-  override def setByte(ordinal: Int, value: Byte): Unit = { values(ordinal) = value }
-  override def setDouble(ordinal: Int, value: Double): Unit = { values(ordinal) = value }
-  override def setFloat(ordinal: Int, value: Float): Unit = { values(ordinal) = value }
-  override def setInt(ordinal: Int, value: Int): Unit = { values(ordinal) = value }
-  override def setLong(ordinal: Int, value: Long): Unit = { values(ordinal) = value }
-  override def setString(ordinal: Int, value: String): Unit = { values(ordinal) = value }
-
-  override def setNullAt(i: Int): Unit = { values(i) = null }
-
-  override def setShort(ordinal: Int, value: Short): Unit = { values(ordinal) = value }
-
-  override def update(ordinal: Int, value: Any): Unit = { values(ordinal) = value }
-
-  override def copy() = new GenericRow(values.clone())
-}
-
-
-class RowOrdering(ordering: Seq[SortOrder]) extends Ordering[Row] {
-  def this(ordering: Seq[SortOrder], inputSchema: Seq[Attribute]) =
-    this(ordering.map(BindReferences.bindReference(_, inputSchema)))
-
-  def compare(a: Row, b: Row): Int = {
-    var i = 0
-    while (i < ordering.size) {
-      val order = ordering(i)
-      val left = order.child.eval(a)
-      val right = order.child.eval(b)
-
-      if (left == null && right == null) {
-        // Both null, continue looking.
-      } else if (left == null) {
-        return if (order.direction == Ascending) -1 else 1
-      } else if (right == null) {
-        return if (order.direction == Ascending) 1 else -1
-      } else {
-        val comparison = order.dataType match {
-          case n: NativeType if order.direction == Ascending =>
-            n.ordering.asInstanceOf[Ordering[Any]].compare(left, right)
-          case n: NativeType if order.direction == Descending =>
-            n.ordering.asInstanceOf[Ordering[Any]].reverse.compare(left, right)
-        }
-        if (comparison != 0) return comparison
-      }
-      i += 1
-    }
-    return 0
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/d5eeb351/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
index 55d9599..fbc97b2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
@@ -49,6 +49,10 @@ package org.apache.spark.sql.catalyst
  */
 package object expressions  {
 
+  type Row = org.apache.spark.sql.Row
+
+  val Row = org.apache.spark.sql.Row
+
   /**
    * Converts a [[Row]] to another Row given a sequence of expression that define each column of the
    * new row. If the schema of the input row is specified, then the given expression will be bound

http://git-wip-us.apache.org/repos/asf/spark/blob/d5eeb351/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
new file mode 100644
index 0000000..c22b842
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.sql.types.NativeType
+
+
+/**
+ * An extended interface to [[Row]] that allows the values for each column to be updated.  Setting
+ * a value through a primitive function implicitly marks that column as not null.
+ */
+trait MutableRow extends Row {
+  def setNullAt(i: Int): Unit
+
+  def update(ordinal: Int, value: Any)
+
+  def setInt(ordinal: Int, value: Int)
+  def setLong(ordinal: Int, value: Long)
+  def setDouble(ordinal: Int, value: Double)
+  def setBoolean(ordinal: Int, value: Boolean)
+  def setShort(ordinal: Int, value: Short)
+  def setByte(ordinal: Int, value: Byte)
+  def setFloat(ordinal: Int, value: Float)
+  def setString(ordinal: Int, value: String)
+}
+
+/**
+ * A row with no data.  Calling any methods will result in an error.  Can be used as a placeholder.
+ */
+object EmptyRow extends Row {
+  override def apply(i: Int): Any = throw new UnsupportedOperationException
+  override def iterator = Iterator.empty
+  override def length = 0
+  override def isNullAt(i: Int): Boolean = throw new UnsupportedOperationException
+  override def getInt(i: Int): Int = throw new UnsupportedOperationException
+  override def getLong(i: Int): Long = throw new UnsupportedOperationException
+  override def getDouble(i: Int): Double = throw new UnsupportedOperationException
+  override def getFloat(i: Int): Float = throw new UnsupportedOperationException
+  override def getBoolean(i: Int): Boolean = throw new UnsupportedOperationException
+  override def getShort(i: Int): Short = throw new UnsupportedOperationException
+  override def getByte(i: Int): Byte = throw new UnsupportedOperationException
+  override def getString(i: Int): String = throw new UnsupportedOperationException
+  override def getAs[T](i: Int): T = throw new UnsupportedOperationException
+  def copy() = this
+}
+
+/**
+ * A row implementation that uses an array of objects as the underlying storage.  Note that, while
+ * the array is not copied, and thus could technically be mutated after creation, this is not
+ * allowed.
+ */
+class GenericRow(protected[sql] val values: Array[Any]) extends Row {
+  /** No-arg constructor for serialization. */
+  def this() = this(null)
+
+  def this(size: Int) = this(new Array[Any](size))
+
+  override def iterator = values.iterator
+
+  override def length = values.length
+
+  override def apply(i: Int) = values(i)
+
+  override def isNullAt(i: Int) = values(i) == null
+
+  override def getInt(i: Int): Int = {
+    if (values(i) == null) sys.error("Failed to check null bit for primitive int value.")
+    values(i).asInstanceOf[Int]
+  }
+
+  override def getLong(i: Int): Long = {
+    if (values(i) == null) sys.error("Failed to check null bit for primitive long value.")
+    values(i).asInstanceOf[Long]
+  }
+
+  override def getDouble(i: Int): Double = {
+    if (values(i) == null) sys.error("Failed to check null bit for primitive double value.")
+    values(i).asInstanceOf[Double]
+  }
+
+  override def getFloat(i: Int): Float = {
+    if (values(i) == null) sys.error("Failed to check null bit for primitive float value.")
+    values(i).asInstanceOf[Float]
+  }
+
+  override def getBoolean(i: Int): Boolean = {
+    if (values(i) == null) sys.error("Failed to check null bit for primitive boolean value.")
+    values(i).asInstanceOf[Boolean]
+  }
+
+  override def getShort(i: Int): Short = {
+    if (values(i) == null) sys.error("Failed to check null bit for primitive short value.")
+    values(i).asInstanceOf[Short]
+  }
+
+  override def getByte(i: Int): Byte = {
+    if (values(i) == null) sys.error("Failed to check null bit for primitive byte value.")
+    values(i).asInstanceOf[Byte]
+  }
+
+  override def getString(i: Int): String = {
+    if (values(i) == null) sys.error("Failed to check null bit for primitive String value.")
+    values(i).asInstanceOf[String]
+  }
+
+  // Custom hashCode function that matches the efficient code generated version.
+  override def hashCode(): Int = {
+    var result: Int = 37
+
+    var i = 0
+    while (i < values.length) {
+      val update: Int =
+        if (isNullAt(i)) {
+          0
+        } else {
+          apply(i) match {
+            case b: Boolean => if (b) 0 else 1
+            case b: Byte => b.toInt
+            case s: Short => s.toInt
+            case i: Int => i
+            case l: Long => (l ^ (l >>> 32)).toInt
+            case f: Float => java.lang.Float.floatToIntBits(f)
+            case d: Double =>
+              val b = java.lang.Double.doubleToLongBits(d)
+              (b ^ (b >>> 32)).toInt
+            case other => other.hashCode()
+          }
+        }
+      result = 37 * result + update
+      i += 1
+    }
+    result
+  }
+
+  def copy() = this
+}
+
+class GenericMutableRow(v: Array[Any]) extends GenericRow(v) with MutableRow {
+  /** No-arg constructor for serialization. */
+  def this() = this(null)
+
+  def this(size: Int) = this(new Array[Any](size))
+
+  override def setBoolean(ordinal: Int, value: Boolean): Unit = { values(ordinal) = value }
+  override def setByte(ordinal: Int, value: Byte): Unit = { values(ordinal) = value }
+  override def setDouble(ordinal: Int, value: Double): Unit = { values(ordinal) = value }
+  override def setFloat(ordinal: Int, value: Float): Unit = { values(ordinal) = value }
+  override def setInt(ordinal: Int, value: Int): Unit = { values(ordinal) = value }
+  override def setLong(ordinal: Int, value: Long): Unit = { values(ordinal) = value }
+  override def setString(ordinal: Int, value: String): Unit = { values(ordinal) = value }
+
+  override def setNullAt(i: Int): Unit = { values(i) = null }
+
+  override def setShort(ordinal: Int, value: Short): Unit = { values(ordinal) = value }
+
+  override def update(ordinal: Int, value: Any): Unit = { values(ordinal) = value }
+
+  override def copy() = new GenericRow(values.clone())
+}
+
+
+class RowOrdering(ordering: Seq[SortOrder]) extends Ordering[Row] {
+  def this(ordering: Seq[SortOrder], inputSchema: Seq[Attribute]) =
+    this(ordering.map(BindReferences.bindReference(_, inputSchema)))
+
+  def compare(a: Row, b: Row): Int = {
+    var i = 0
+    while (i < ordering.size) {
+      val order = ordering(i)
+      val left = order.child.eval(a)
+      val right = order.child.eval(b)
+
+      if (left == null && right == null) {
+        // Both null, continue looking.
+      } else if (left == null) {
+        return if (order.direction == Ascending) -1 else 1
+      } else if (right == null) {
+        return if (order.direction == Ascending) 1 else -1
+      } else {
+        val comparison = order.dataType match {
+          case n: NativeType if order.direction == Ascending =>
+            n.ordering.asInstanceOf[Ordering[Any]].compare(left, right)
+          case n: NativeType if order.direction == Descending =>
+            n.ordering.asInstanceOf[Ordering[Any]].reverse.compare(left, right)
+        }
+        if (comparison != 0) return comparison
+      }
+      i += 1
+    }
+    return 0
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/d5eeb351/sql/catalyst/src/main/scala/org/apache/spark/sql/types/dataTypes.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/dataTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/dataTypes.scala
index fa0a355..e38ad63 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/dataTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/dataTypes.scala
@@ -727,7 +727,7 @@ object StructType {
  * //   StructType(List(StructField(b,LongType,false), StructField(c,BooleanType,false)))
  * }}}
  *
- * A [[org.apache.spark.sql.catalyst.expressions.Row]] object is used as a value of the StructType.
+ * A [[org.apache.spark.sql.Row]] object is used as a value of the StructType.
  * Example:
  * {{{
  * import org.apache.spark.sql._

http://git-wip-us.apache.org/repos/asf/spark/blob/d5eeb351/sql/core/src/main/scala/org/apache/spark/sql/api/java/Row.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/java/Row.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/java/Row.scala
index 207e280..4faa79a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/java/Row.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/java/Row.scala
@@ -23,7 +23,7 @@ import scala.collection.JavaConversions
 import scala.math.BigDecimal
 
 import org.apache.spark.api.java.JavaUtils.mapAsSerializableJavaMap
-import org.apache.spark.sql.catalyst.expressions.{Row => ScalaRow}
+import org.apache.spark.sql.{Row => ScalaRow}
 
 /**
  * A result row from a Spark SQL query.

http://git-wip-us.apache.org/repos/asf/spark/blob/d5eeb351/sql/core/src/main/scala/org/apache/spark/sql/package.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/package.scala
index b75266d..6dd39be 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/package.scala
@@ -34,89 +34,6 @@ import org.apache.spark.sql.execution.SparkPlan
 package object sql {
 
   /**
-   * :: DeveloperApi ::
-   *
-   * Represents one row of output from a relational operator.
-   * @group row
-   */
-  @DeveloperApi
-  type Row = catalyst.expressions.Row
-
-  /**
-   * :: DeveloperApi ::
-   *
-   * A [[Row]] object can be constructed by providing field values. Example:
-   * {{{
-   * import org.apache.spark.sql._
-   *
-   * // Create a Row from values.
-   * Row(value1, value2, value3, ...)
-   * // Create a Row from a Seq of values.
-   * Row.fromSeq(Seq(value1, value2, ...))
-   * }}}
-   *
-   * A value of a row can be accessed through both generic access by ordinal,
-   * which will incur boxing overhead for primitives, as well as native primitive access.
-   * An example of generic access by ordinal:
-   * {{{
-   * import org.apache.spark.sql._
-   *
-   * val row = Row(1, true, "a string", null)
-   * // row: Row = [1,true,a string,null]
-   * val firstValue = row(0)
-   * // firstValue: Any = 1
-   * val fourthValue = row(3)
-   * // fourthValue: Any = null
-   * }}}
-   *
-   * For native primitive access, it is invalid to use the native primitive interface to retrieve
-   * a value that is null, instead a user must check `isNullAt` before attempting to retrieve a
-   * value that might be null.
-   * An example of native primitive access:
-   * {{{
-   * // using the row from the previous example.
-   * val firstValue = row.getInt(0)
-   * // firstValue: Int = 1
-   * val isNull = row.isNullAt(3)
-   * // isNull: Boolean = true
-   * }}}
-   *
-   * Interfaces related to native primitive access are:
-   *
-   * `isNullAt(i: Int): Boolean`
-   *
-   * `getInt(i: Int): Int`
-   *
-   * `getLong(i: Int): Long`
-   *
-   * `getDouble(i: Int): Double`
-   *
-   * `getFloat(i: Int): Float`
-   *
-   * `getBoolean(i: Int): Boolean`
-   *
-   * `getShort(i: Int): Short`
-   *
-   * `getByte(i: Int): Byte`
-   *
-   * `getString(i: Int): String`
-   *
-   * Fields in a [[Row]] object can be extracted in a pattern match. Example:
-   * {{{
-   * import org.apache.spark.sql._
-   *
-   * val pairs = sql("SELECT key, value FROM src").rdd.map {
-   *   case Row(key: Int, value: String) =>
-   *     key -> value
-   * }
-   * }}}
-   *
-   * @group row
-   */
-  @DeveloperApi
-  val Row = catalyst.expressions.Row
-
-  /**
    * Converts a logical plan into zero or more SparkPlans.
    */
   @DeveloperApi


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org