You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Naresh Kumar (JIRA)" <ji...@apache.org> on 2018/02/13 06:58:00 UTC
[jira] [Updated] (SPARK-23403) java.lang.ArrayIndexOutOfBoundsException: 10

     [ https://issues.apache.org/jira/browse/SPARK-23403?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Naresh Kumar updated SPARK-23403:
---------------------------------
    Docs Text: 
val washing_flat=sc.textFile("hdfs://ip-172-31-53-45:8020/user/narine91267897/washing_flat.csv")
washing_flat: org.apache.spark.rdd.RDD[String] = hdfs://ip-172-31-55-77:8020/user/narine91267897/washing_flat.csv MapPartitionsRDD[
24] at textFile at <console>:33
scala> val schema=StructType(Array(
     |      StructField("id",StringType,true),
     |      StructField("rev",StringType,true),
     |      StructField("count",LongType,true),
     |      StructField("flowrate",LongType,true),
     |      StructField("fluidlevel",StringType,true),
     |      StructField("frequency",LongType,true),
     |      StructField("hardness",LongType,true),
     |      StructField("speed",LongType,true),
     |      StructField("temperature",LongType,true),
     |      StructField("ts",LongType,true),
     |      StructField("voltage",LongType,true)))

scala> val rowRDD=washing_flat.map(line => line.split(",")).map(row => Row(row(0)
     | ,row(1)
     | ,row(2),
     | row(3),
     | row(4),
     | row(5),
     | row(6),
     | row(7),
     | row(8),
     | row(9),
     | row(10)))
rowRDD: org.apache.spark.rdd.RDD[org.apache.spark.sql.Row] = MapPartitionsRDD[26] at map at <console>:35
scala> val washing_df=spark.createDataFrame(rowRDD,schema)
washing_df: org.apache.spark.sql.DataFrame = [id: string, rev: string ... 9 more fields]
scala> washing_df.printSchema
root
 |-- id: string (nullable = true)
 |-- rev: string (nullable = true)
 |-- count: long (nullable = true)
 |-- flowrate: long (nullable = true)
 |-- fluidlevel: string (nullable = true)
 |-- frequency: long (nullable = true)
 |-- hardness: long (nullable = true)
 |-- speed: long (nullable = true)
 |-- temperature: long (nullable = true)
 |-- ts: long (nullable = true)
 |-- voltage: long (nullable = true)
scala> washing_df.show(5)
18/02/13 05:54:51 ERROR executor.Executor: Exception in task 0.0 in stage 4.0 (TID 5)
java.lang.ArrayIndexOutOfBoundsException: 10


  was:
val washing_flat=sc.textFile("hdfs://ip-172-31-53-48.ec2.internal:8020/user/narine91267897/washing_flat.csv")
washing_flat: org.apache.spark.rdd.RDD[String] = hdfs://ip-172-31-55-77:8020/user/narine91267897/washing_flat.csv MapPartitionsRDD[
24] at textFile at <console>:33
scala> val schema=StructType(Array(
     |      StructField("id",StringType,true),
     |      StructField("rev",StringType,true),
     |      StructField("count",LongType,true),
     |      StructField("flowrate",LongType,true),
     |      StructField("fluidlevel",StringType,true),
     |      StructField("frequency",LongType,true),
     |      StructField("hardness",LongType,true),
     |      StructField("speed",LongType,true),
     |      StructField("temperature",LongType,true),
     |      StructField("ts",LongType,true),
     |      StructField("voltage",LongType,true)))

scala> val rowRDD=washing_flat.map(line => line.split(",")).map(row => Row(row(0)
     | ,row(1)
     | ,row(2),
     | row(3),
     | row(4),
     | row(5),
     | row(6),
     | row(7),
     | row(8),
     | row(9),
     | row(10)))
rowRDD: org.apache.spark.rdd.RDD[org.apache.spark.sql.Row] = MapPartitionsRDD[26] at map at <console>:35
scala> val washing_df=spark.createDataFrame(rowRDD,schema)
washing_df: org.apache.spark.sql.DataFrame = [id: string, rev: string ... 9 more fields]
scala> washing_df.printSchema
root
 |-- id: string (nullable = true)
 |-- rev: string (nullable = true)
 |-- count: long (nullable = true)
 |-- flowrate: long (nullable = true)
 |-- fluidlevel: string (nullable = true)
 |-- frequency: long (nullable = true)
 |-- hardness: long (nullable = true)
 |-- speed: long (nullable = true)
 |-- temperature: long (nullable = true)
 |-- ts: long (nullable = true)
 |-- voltage: long (nullable = true)
scala> washing_df.show(5)
18/02/13 05:54:51 ERROR executor.Executor: Exception in task 0.0 in stage 4.0 (TID 5)
java.lang.ArrayIndexOutOfBoundsException: 10



> java.lang.ArrayIndexOutOfBoundsException: 10
> --------------------------------------------
>
>                 Key: SPARK-23403
>                 URL: https://issues.apache.org/jira/browse/SPARK-23403
>             Project: Spark
>          Issue Type: Bug
>          Components: Spark Shell
>    Affects Versions: 2.2.0
>            Reporter: Naresh Kumar
>            Priority: Major
>
> java.lang.ArrayIndexOutOfBoundsException: 10, while retriving records from Dataframe in spark-shell



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org