You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Naresh Kumar (JIRA)" <ji...@apache.org> on 2018/02/13 06:58:00 UTC
[jira] [Updated] (SPARK-23403)
java.lang.ArrayIndexOutOfBoundsException: 10
[ https://issues.apache.org/jira/browse/SPARK-23403?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Naresh Kumar updated SPARK-23403:
---------------------------------
Docs Text:
val washing_flat=sc.textFile("hdfs://ip-172-31-53-45:8020/user/narine91267897/washing_flat.csv")
washing_flat: org.apache.spark.rdd.RDD[String] = hdfs://ip-172-31-55-77:8020/user/narine91267897/washing_flat.csv MapPartitionsRDD[
24] at textFile at <console>:33
scala> val schema=StructType(Array(
| StructField("id",StringType,true),
| StructField("rev",StringType,true),
| StructField("count",LongType,true),
| StructField("flowrate",LongType,true),
| StructField("fluidlevel",StringType,true),
| StructField("frequency",LongType,true),
| StructField("hardness",LongType,true),
| StructField("speed",LongType,true),
| StructField("temperature",LongType,true),
| StructField("ts",LongType,true),
| StructField("voltage",LongType,true)))
scala> val rowRDD=washing_flat.map(line => line.split(",")).map(row => Row(row(0)
| ,row(1)
| ,row(2),
| row(3),
| row(4),
| row(5),
| row(6),
| row(7),
| row(8),
| row(9),
| row(10)))
rowRDD: org.apache.spark.rdd.RDD[org.apache.spark.sql.Row] = MapPartitionsRDD[26] at map at <console>:35
scala> val washing_df=spark.createDataFrame(rowRDD,schema)
washing_df: org.apache.spark.sql.DataFrame = [id: string, rev: string ... 9 more fields]
scala> washing_df.printSchema
root
|-- id: string (nullable = true)
|-- rev: string (nullable = true)
|-- count: long (nullable = true)
|-- flowrate: long (nullable = true)
|-- fluidlevel: string (nullable = true)
|-- frequency: long (nullable = true)
|-- hardness: long (nullable = true)
|-- speed: long (nullable = true)
|-- temperature: long (nullable = true)
|-- ts: long (nullable = true)
|-- voltage: long (nullable = true)
scala> washing_df.show(5)
18/02/13 05:54:51 ERROR executor.Executor: Exception in task 0.0 in stage 4.0 (TID 5)
java.lang.ArrayIndexOutOfBoundsException: 10
was:
val washing_flat=sc.textFile("hdfs://ip-172-31-53-48.ec2.internal:8020/user/narine91267897/washing_flat.csv")
washing_flat: org.apache.spark.rdd.RDD[String] = hdfs://ip-172-31-55-77:8020/user/narine91267897/washing_flat.csv MapPartitionsRDD[
24] at textFile at <console>:33
scala> val schema=StructType(Array(
| StructField("id",StringType,true),
| StructField("rev",StringType,true),
| StructField("count",LongType,true),
| StructField("flowrate",LongType,true),
| StructField("fluidlevel",StringType,true),
| StructField("frequency",LongType,true),
| StructField("hardness",LongType,true),
| StructField("speed",LongType,true),
| StructField("temperature",LongType,true),
| StructField("ts",LongType,true),
| StructField("voltage",LongType,true)))
scala> val rowRDD=washing_flat.map(line => line.split(",")).map(row => Row(row(0)
| ,row(1)
| ,row(2),
| row(3),
| row(4),
| row(5),
| row(6),
| row(7),
| row(8),
| row(9),
| row(10)))
rowRDD: org.apache.spark.rdd.RDD[org.apache.spark.sql.Row] = MapPartitionsRDD[26] at map at <console>:35
scala> val washing_df=spark.createDataFrame(rowRDD,schema)
washing_df: org.apache.spark.sql.DataFrame = [id: string, rev: string ... 9 more fields]
scala> washing_df.printSchema
root
|-- id: string (nullable = true)
|-- rev: string (nullable = true)
|-- count: long (nullable = true)
|-- flowrate: long (nullable = true)
|-- fluidlevel: string (nullable = true)
|-- frequency: long (nullable = true)
|-- hardness: long (nullable = true)
|-- speed: long (nullable = true)
|-- temperature: long (nullable = true)
|-- ts: long (nullable = true)
|-- voltage: long (nullable = true)
scala> washing_df.show(5)
18/02/13 05:54:51 ERROR executor.Executor: Exception in task 0.0 in stage 4.0 (TID 5)
java.lang.ArrayIndexOutOfBoundsException: 10
> java.lang.ArrayIndexOutOfBoundsException: 10
> --------------------------------------------
>
> Key: SPARK-23403
> URL: https://issues.apache.org/jira/browse/SPARK-23403
> Project: Spark
> Issue Type: Bug
> Components: Spark Shell
> Affects Versions: 2.2.0
> Reporter: Naresh Kumar
> Priority: Major
>
> java.lang.ArrayIndexOutOfBoundsException: 10, while retriving records from Dataframe in spark-shell
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org