You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2016/08/03 00:23:36 UTC
spark git commit: [SQL][MINOR] use stricter type parameter to make it
clear that parquet reader returns UnsafeRow
Repository: spark
Updated Branches:
refs/heads/master 386127377 -> ae226283e
[SQL][MINOR] use stricter type parameter to make it clear that parquet reader returns UnsafeRow
## What changes were proposed in this pull request?
a small code style change, it's better to make the type parameter more accurate.
## How was this patch tested?
N/A
Author: Wenchen Fan <we...@databricks.com>
Closes #14458 from cloud-fan/parquet.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ae226283
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ae226283
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ae226283
Branch: refs/heads/master
Commit: ae226283e19ce396216c73b0ae2470efa122b65b
Parents: 3861273
Author: Wenchen Fan <we...@databricks.com>
Authored: Wed Aug 3 08:23:26 2016 +0800
Committer: Cheng Lian <li...@databricks.com>
Committed: Wed Aug 3 08:23:26 2016 +0800
----------------------------------------------------------------------
.../execution/datasources/parquet/ParquetFileFormat.scala | 4 ++--
.../datasources/parquet/ParquetReadSupport.scala | 10 +++++-----
.../datasources/parquet/ParquetRecordMaterializer.scala | 6 +++---
3 files changed, 10 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/ae226283/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index 772e031..c3e75f1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -370,11 +370,11 @@ private[sql] class ParquetFileFormat
logDebug(s"Falling back to parquet-mr")
val reader = pushed match {
case Some(filter) =>
- new ParquetRecordReader[InternalRow](
+ new ParquetRecordReader[UnsafeRow](
new ParquetReadSupport,
FilterCompat.get(filter, null))
case _ =>
- new ParquetRecordReader[InternalRow](new ParquetReadSupport)
+ new ParquetRecordReader[UnsafeRow](new ParquetReadSupport)
}
reader.initialize(split, hadoopAttemptContext)
reader
http://git-wip-us.apache.org/repos/asf/spark/blob/ae226283/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
index 8a2e0d7..f1a35dd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
@@ -29,12 +29,12 @@ import org.apache.parquet.schema._
import org.apache.parquet.schema.Type.Repetition
import org.apache.spark.internal.Logging
-import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.UnsafeRow
import org.apache.spark.sql.types._
/**
* A Parquet [[ReadSupport]] implementation for reading Parquet records as Catalyst
- * [[InternalRow]]s.
+ * [[UnsafeRow]]s.
*
* The API interface of [[ReadSupport]] is a little bit over complicated because of historical
* reasons. In older versions of parquet-mr (say 1.6.0rc3 and prior), [[ReadSupport]] need to be
@@ -48,7 +48,7 @@ import org.apache.spark.sql.types._
* Due to this reason, we no longer rely on [[ReadContext]] to pass requested schema from [[init()]]
* to [[prepareForRead()]], but use a private `var` for simplicity.
*/
-private[parquet] class ParquetReadSupport extends ReadSupport[InternalRow] with Logging {
+private[parquet] class ParquetReadSupport extends ReadSupport[UnsafeRow] with Logging {
private var catalystRequestedSchema: StructType = _
/**
@@ -72,13 +72,13 @@ private[parquet] class ParquetReadSupport extends ReadSupport[InternalRow] with
/**
* Called on executor side after [[init()]], before instantiating actual Parquet record readers.
* Responsible for instantiating [[RecordMaterializer]], which is used for converting Parquet
- * records to Catalyst [[InternalRow]]s.
+ * records to Catalyst [[UnsafeRow]]s.
*/
override def prepareForRead(
conf: Configuration,
keyValueMetaData: JMap[String, String],
fileSchema: MessageType,
- readContext: ReadContext): RecordMaterializer[InternalRow] = {
+ readContext: ReadContext): RecordMaterializer[UnsafeRow] = {
log.debug(s"Preparing for read Parquet file with message type: $fileSchema")
val parquetRequestedSchema = readContext.getRequestedSchema
http://git-wip-us.apache.org/repos/asf/spark/blob/ae226283/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala
index d12e780..4e49a0d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources.parquet
import org.apache.parquet.io.api.{GroupConverter, RecordMaterializer}
import org.apache.parquet.schema.MessageType
-import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.UnsafeRow
import org.apache.spark.sql.types.StructType
/**
@@ -32,12 +32,12 @@ import org.apache.spark.sql.types.StructType
*/
private[parquet] class ParquetRecordMaterializer(
parquetSchema: MessageType, catalystSchema: StructType, schemaConverter: ParquetSchemaConverter)
- extends RecordMaterializer[InternalRow] {
+ extends RecordMaterializer[UnsafeRow] {
private val rootConverter =
new ParquetRowConverter(schemaConverter, parquetSchema, catalystSchema, NoopUpdater)
- override def getCurrentRecord: InternalRow = rootConverter.currentRecord
+ override def getCurrentRecord: UnsafeRow = rootConverter.currentRecord
override def getRootConverter: GroupConverter = rootConverter
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org