You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Paul Wu (JIRA)" <ji...@apache.org> on 2015/07/24 18:01:05 UTC

[jira] [Issue Comment Deleted] (SPARK-9255) Timestamp handling incorrect for Spark 1.4.1 on Linux

     [ https://issues.apache.org/jira/browse/SPARK-9255?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Paul Wu updated SPARK-9255:
---------------------------
    Comment: was deleted

(was: [~srowen] , could you please see the attachment timestamp_bug2.zip? It has the detailed comment on how I created the project and how I tested it.  It is very, very simple project and I tested it many times and got the same conclusion: the bug is indeed in 1.4.1 on Redhat with JDK 1.8, but not in 1.3.0.)

> Timestamp handling incorrect for Spark 1.4.1 on Linux
> -----------------------------------------------------
>
>                 Key: SPARK-9255
>                 URL: https://issues.apache.org/jira/browse/SPARK-9255
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 1.4.1
>         Environment: Redhat Linux, Java 8.0 and Spark 1.4.1 release.
>            Reporter: Paul Wu
>         Attachments: timestamp_bug2.zip, tstest
>
>
> This is a very strange case involving timestamp  I can run the program on Windows using dev pom.xml (1.4.1) or 1.4.1 or 1.3.0 release downloaded from Apache  without issues , but when I ran it on Spark 1.4.1 release either downloaded from Apache or the version built with scala 2.11 on redhat linux, it has the following error (the code I used is after this stack trace):
> 15/07/22 12:02:50  ERROR Executor 96: Exception in task 0.0 in stage 0.0 (TID 0)
> java.util.concurrent.ExecutionException: scala.tools.reflect.ToolBoxError: reflective compilation has failed:
> value < is not a member of TimestampType.this.InternalType
>         at org.spark-project.guava.util.concurrent.AbstractFuture$Sync.getValue(AbstractFuture.java:306)
>         at org.spark-project.guava.util.concurrent.AbstractFuture$Sync.get(AbstractFuture.java:293)
>         at org.spark-project.guava.util.concurrent.AbstractFuture.get(AbstractFuture.java:116)
>         at org.spark-project.guava.util.concurrent.Uninterruptibles.getUninterruptibly(Uninterruptibles.java:135)
>         at org.spark-project.guava.cache.LocalCache$Segment.getAndRecordStats(LocalCache.java:2410)
>         at org.spark-project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2380)
>         at org.spark-project.guava.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342)
>         at org.spark-project.guava.cache.LocalCache$Segment.get(LocalCache.java:2257)
>         at org.spark-project.guava.cache.LocalCache.get(LocalCache.java:4000)
>         at org.spark-project.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004)
>         at org.spark-project.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874)
>         at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.generate(CodeGenerator.scala:105)
>         at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.generate(CodeGenerator.scala:102)
>         at org.apache.spark.sql.execution.SparkPlan.newMutableProjection(SparkPlan.scala:170)
>         at org.apache.spark.sql.execution.GeneratedAggregate$$anonfun$9.apply(GeneratedAggregate.scala:261)
>         at org.apache.spark.sql.execution.GeneratedAggregate$$anonfun$9.apply(GeneratedAggregate.scala:246)
>         at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$17.apply(RDD.scala:686)
>         at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$17.apply(RDD.scala:686)
>         at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
>         at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
>         at org.apache.spark.rdd.RDD.iterator(RDD.scala:244)
>         at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
>         at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
>         at org.apache.spark.rdd.RDD.iterator(RDD.scala:244)
>         at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:70)
>         at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
>         at org.apache.spark.scheduler.Task.run(Task.scala:70)
>         at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
>         at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>         at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>         at java.lang.Thread.run(Thread.java:745)
> Caused by: scala.tools.reflect.ToolBoxError: reflective compilation has failed:
> value < is not a member of TimestampType.this.InternalType
>         at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$ToolBoxGlobal.throwIfErrors(ToolBoxFactory.scala:316)
>         at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$ToolBoxGlobal.wrapInPackageAndCompile(ToolBoxFactory.scala:198)
>         at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$ToolBoxGlobal.compile(ToolBoxFactory.scala:252)
>         at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$$anonfun$compile$2.apply(ToolBoxFactory.scala:429)
>         at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$$anonfun$compile$2.apply(ToolBoxFactory.scala:422)
>         at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$withCompilerApi$.liftedTree2$1(ToolBoxFactory.scala:355)
>         at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$withCompilerApi$.apply(ToolBoxFactory.scala:355)
>         at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl.compile(ToolBoxFactory.scala:422)
>         at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl.eval(ToolBoxFactory.scala:444)
>         at org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection$.create(GenerateMutableProjection.scala:74)
>         at org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection$.create(GenerateMutableProjection.scala:26)
>         at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:92)
>         at org.spark-project.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599)
>         at org.spark-project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379)
>         ... 25 more
> 15/07/22 12:02:50  ERROR TaskSetManager 75: Task 0 in stage 0.0 failed 1 times; aborting job
> Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost): java.util.concurrent.ExecutionException: scala.tools.reflect.ToolBoxError: reflective compilation has failed:
> value < is not a member of TimestampType.this.InternalType
>         at org.spark-project.guava.util.concurrent.AbstractFuture$Sync.getValue(AbstractFuture.java:306)
>         at org.spark-project.guava.util.concurrent.AbstractFuture$Sync.get(AbstractFuture.java:293)
>         at org.spark-project.guava.util.concurrent.AbstractFuture.get(AbstractFuture.java:116)
>         at org.spark-project.guava.util.concurrent.Uninterruptibles.getUninterruptibly(Uninterruptibles.java:135)
>         at org.spark-project.guava.cache.LocalCache$Segment.getAndRecordStats(LocalCache.java:2410)
>         at org.spark-project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2380)
>         at org.spark-project.guava.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342)
>         at org.spark-project.guava.cache.LocalCache$Segment.get(LocalCache.java:2257)
>         at org.spark-project.guava.cache.LocalCache.get(LocalCache.java:4000)
>         at org.spark-project.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004)
>         at org.spark-project.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874)
>         at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.generate(CodeGenerator.scala:105)
>         at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.generate(CodeGenerator.scala:102)
>         at org.apache.spark.sql.execution.SparkPlan.newMutableProjection(SparkPlan.scala:170)
>         at org.apache.spark.sql.execution.GeneratedAggregate$$anonfun$9.apply(GeneratedAggregate.scala:261)
>         at org.apache.spark.sql.execution.GeneratedAggregate$$anonfun$9.apply(GeneratedAggregate.scala:246)
>         at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$17.apply(RDD.scala:686)
>         at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$17.apply(RDD.scala:686)
>         at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
>         at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
>         at org.apache.spark.rdd.RDD.iterator(RDD.scala:244)
>         at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
>         at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
>         at org.apache.spark.rdd.RDD.iterator(RDD.scala:244)
>         at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:70)
>         at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
>         at org.apache.spark.scheduler.Task.run(Task.scala:70)
>         at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
>         at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>         at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>         at java.lang.Thread.run(Thread.java:745)
> Caused by: scala.tools.reflect.ToolBoxError: reflective compilation has failed:
> value < is not a member of TimestampType.this.InternalType
>         at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$ToolBoxGlobal.throwIfErrors(ToolBoxFactory.scala:316)
>         at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$ToolBoxGlobal.wrapInPackageAndCompile(ToolBoxFactory.scala:198)
>         at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$ToolBoxGlobal.compile(ToolBoxFactory.scala:252)
>         at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$$anonfun$compile$2.apply(ToolBoxFactory.scala:429)
>         at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$$anonfun$compile$2.apply(ToolBoxFactory.scala:422)
>         at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$withCompilerApi$.liftedTree2$1(ToolBoxFactory.scala:355)
>         at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$withCompilerApi$.apply(ToolBoxFactory.scala:355)
>         at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl.compile(ToolBoxFactory.scala:422)
>         at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl.eval(ToolBoxFactory.scala:444)
>         at org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection$.create(GenerateMutableProjection.scala:74)
>         at org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection$.create(GenerateMutableProjection.scala:26)
>         at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:92)
>         at org.spark-project.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599)
>         at org.spark-project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379)
>         ... 25 more
> Driver stacktrace:
>         at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1273)
>         at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1264)
>         at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1263)
>         at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>         at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
>         at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1263)
>         at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730)
>         at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730)
>         at scala.Option.foreach(Option.scala:257)
>         at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:730)
>         at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1457)
>         at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1418)
>         at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
> ================================================
> Code: TruncHour.java
> /*
>  * To change this license header, choose License Headers in Project Properties.
>  * To change this template file, choose Tools | Templates
>  * and open the template in the editor.
>  */
> package zwu.spark.sample;
> import java.sql.Timestamp;
> import org.apache.spark.sql.api.java.UDF1;
> public class TruncHour implements UDF1<Timestamp, Timestamp> {
>     @Override
>     public Timestamp call(Timestamp t1) throws Exception {
>         t1.setMinutes(0);
>         t1.setSeconds(0);
>         t1.setNanos(0);
>         return t1;
>         //throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
>     }
> }
> TimestampSample.java:
> package zwu.spark.sample;
> import java.io.Serializable;
> import java.sql.Timestamp;
> import java.util.ArrayList;
> import java.util.Date;
> import java.util.List;
> import org.apache.log4j.Logger;
> import org.apache.spark.SparkConf;
> import org.apache.spark.api.java.JavaRDD;
> import org.apache.spark.api.java.JavaSparkContext;
> import org.apache.spark.sql.DataFrame;
> import org.apache.spark.sql.types.DataTypes;
> /**
>  *
>  * @author zw251y
>  */
> public class TimestampSample {
>     private final static Logger log = Logger.getLogger(TimestampSample.class);
>     public static void main(String[] args) {
>         SparkConf sparkConf = new SparkConf().setAppName("TimeSample");
>         //if (PUtil.isWindows) {
>         sparkConf.setMaster("local[1]");
>         //}
>         JavaSparkContext sc = new JavaSparkContext(sparkConf);
>         org.apache.spark.sql.hive.HiveContext sqlContext = new org.apache.spark.sql.hive.HiveContext(sc.sc());
>         List<TimeBean> tbList = new ArrayList<>();
>         TimeBean tb = new TimeBean();
>         tb.ts = new Timestamp(new Date().getTime());
>         tbList.add(tb);
>         JavaRDD<TimeBean> mytable = sc.parallelize(tbList);
>         //Apply a schema to an RDD of JavaBeans and register it as a table.
>         //DataFrame schemaPeople = sqlContext.applySchema(people, TimeBean.class);
>         DataFrame schemaPeople = sqlContext.createDataFrame(mytable, TimeBean.class);
>         schemaPeople.registerTempTable("timetable");
>          
>         sqlContext.udf().register("truncHour", new TruncHour(), DataTypes.TimestampType);
>         // SQL can be run over RDDs that have been registered as tables.
>         //org.apache.spark.sql.types.TimestampType p = null;
>         //p.
>         String test = "select p from (SELECT   min(ts) p , count(ts)  from timetable  group by truncHour(ts)) as mytable group by p ";
>         DataFrame df = sqlContext.sql(test);
>         df.show();
>     }
>     public static class TimeBean implements Serializable {
>         private String name;
>         private Timestamp ts;
>         /**
>          * Get the value of ts
>          *
>          * @return the value of ts
>          */
>         public Timestamp getTs() {
>             return ts;
>         }
>         /**
>          * Set the value of ts
>          *
>          * @param d new value of ts
>          */
>         public void setTs(Timestamp d) {
>             this.ts = d;
>         }
>         public String getName() {
>             return name;
>         }
>         public void setName(String name) {
>             this.name = name;
>         }
>     }
> }



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org