You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Paul Wu (JIRA)" <ji...@apache.org> on 2015/07/24 18:01:05 UTC
[jira] [Issue Comment Deleted] (SPARK-9255) Timestamp handling
incorrect for Spark 1.4.1 on Linux
[ https://issues.apache.org/jira/browse/SPARK-9255?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Paul Wu updated SPARK-9255:
---------------------------
Comment: was deleted
(was: [~srowen] , could you please see the attachment timestamp_bug2.zip? It has the detailed comment on how I created the project and how I tested it. It is very, very simple project and I tested it many times and got the same conclusion: the bug is indeed in 1.4.1 on Redhat with JDK 1.8, but not in 1.3.0.)
> Timestamp handling incorrect for Spark 1.4.1 on Linux
> -----------------------------------------------------
>
> Key: SPARK-9255
> URL: https://issues.apache.org/jira/browse/SPARK-9255
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 1.4.1
> Environment: Redhat Linux, Java 8.0 and Spark 1.4.1 release.
> Reporter: Paul Wu
> Attachments: timestamp_bug2.zip, tstest
>
>
> This is a very strange case involving timestamp I can run the program on Windows using dev pom.xml (1.4.1) or 1.4.1 or 1.3.0 release downloaded from Apache without issues , but when I ran it on Spark 1.4.1 release either downloaded from Apache or the version built with scala 2.11 on redhat linux, it has the following error (the code I used is after this stack trace):
> 15/07/22 12:02:50 ERROR Executor 96: Exception in task 0.0 in stage 0.0 (TID 0)
> java.util.concurrent.ExecutionException: scala.tools.reflect.ToolBoxError: reflective compilation has failed:
> value < is not a member of TimestampType.this.InternalType
> at org.spark-project.guava.util.concurrent.AbstractFuture$Sync.getValue(AbstractFuture.java:306)
> at org.spark-project.guava.util.concurrent.AbstractFuture$Sync.get(AbstractFuture.java:293)
> at org.spark-project.guava.util.concurrent.AbstractFuture.get(AbstractFuture.java:116)
> at org.spark-project.guava.util.concurrent.Uninterruptibles.getUninterruptibly(Uninterruptibles.java:135)
> at org.spark-project.guava.cache.LocalCache$Segment.getAndRecordStats(LocalCache.java:2410)
> at org.spark-project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2380)
> at org.spark-project.guava.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342)
> at org.spark-project.guava.cache.LocalCache$Segment.get(LocalCache.java:2257)
> at org.spark-project.guava.cache.LocalCache.get(LocalCache.java:4000)
> at org.spark-project.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004)
> at org.spark-project.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874)
> at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.generate(CodeGenerator.scala:105)
> at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.generate(CodeGenerator.scala:102)
> at org.apache.spark.sql.execution.SparkPlan.newMutableProjection(SparkPlan.scala:170)
> at org.apache.spark.sql.execution.GeneratedAggregate$$anonfun$9.apply(GeneratedAggregate.scala:261)
> at org.apache.spark.sql.execution.GeneratedAggregate$$anonfun$9.apply(GeneratedAggregate.scala:246)
> at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$17.apply(RDD.scala:686)
> at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$17.apply(RDD.scala:686)
> at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:244)
> at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:244)
> at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:70)
> at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
> at org.apache.spark.scheduler.Task.run(Task.scala:70)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
> at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> at java.lang.Thread.run(Thread.java:745)
> Caused by: scala.tools.reflect.ToolBoxError: reflective compilation has failed:
> value < is not a member of TimestampType.this.InternalType
> at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$ToolBoxGlobal.throwIfErrors(ToolBoxFactory.scala:316)
> at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$ToolBoxGlobal.wrapInPackageAndCompile(ToolBoxFactory.scala:198)
> at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$ToolBoxGlobal.compile(ToolBoxFactory.scala:252)
> at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$$anonfun$compile$2.apply(ToolBoxFactory.scala:429)
> at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$$anonfun$compile$2.apply(ToolBoxFactory.scala:422)
> at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$withCompilerApi$.liftedTree2$1(ToolBoxFactory.scala:355)
> at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$withCompilerApi$.apply(ToolBoxFactory.scala:355)
> at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl.compile(ToolBoxFactory.scala:422)
> at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl.eval(ToolBoxFactory.scala:444)
> at org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection$.create(GenerateMutableProjection.scala:74)
> at org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection$.create(GenerateMutableProjection.scala:26)
> at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:92)
> at org.spark-project.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599)
> at org.spark-project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379)
> ... 25 more
> 15/07/22 12:02:50 ERROR TaskSetManager 75: Task 0 in stage 0.0 failed 1 times; aborting job
> Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost): java.util.concurrent.ExecutionException: scala.tools.reflect.ToolBoxError: reflective compilation has failed:
> value < is not a member of TimestampType.this.InternalType
> at org.spark-project.guava.util.concurrent.AbstractFuture$Sync.getValue(AbstractFuture.java:306)
> at org.spark-project.guava.util.concurrent.AbstractFuture$Sync.get(AbstractFuture.java:293)
> at org.spark-project.guava.util.concurrent.AbstractFuture.get(AbstractFuture.java:116)
> at org.spark-project.guava.util.concurrent.Uninterruptibles.getUninterruptibly(Uninterruptibles.java:135)
> at org.spark-project.guava.cache.LocalCache$Segment.getAndRecordStats(LocalCache.java:2410)
> at org.spark-project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2380)
> at org.spark-project.guava.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342)
> at org.spark-project.guava.cache.LocalCache$Segment.get(LocalCache.java:2257)
> at org.spark-project.guava.cache.LocalCache.get(LocalCache.java:4000)
> at org.spark-project.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004)
> at org.spark-project.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874)
> at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.generate(CodeGenerator.scala:105)
> at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.generate(CodeGenerator.scala:102)
> at org.apache.spark.sql.execution.SparkPlan.newMutableProjection(SparkPlan.scala:170)
> at org.apache.spark.sql.execution.GeneratedAggregate$$anonfun$9.apply(GeneratedAggregate.scala:261)
> at org.apache.spark.sql.execution.GeneratedAggregate$$anonfun$9.apply(GeneratedAggregate.scala:246)
> at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$17.apply(RDD.scala:686)
> at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$17.apply(RDD.scala:686)
> at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:244)
> at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:244)
> at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:70)
> at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
> at org.apache.spark.scheduler.Task.run(Task.scala:70)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
> at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> at java.lang.Thread.run(Thread.java:745)
> Caused by: scala.tools.reflect.ToolBoxError: reflective compilation has failed:
> value < is not a member of TimestampType.this.InternalType
> at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$ToolBoxGlobal.throwIfErrors(ToolBoxFactory.scala:316)
> at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$ToolBoxGlobal.wrapInPackageAndCompile(ToolBoxFactory.scala:198)
> at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$ToolBoxGlobal.compile(ToolBoxFactory.scala:252)
> at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$$anonfun$compile$2.apply(ToolBoxFactory.scala:429)
> at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$$anonfun$compile$2.apply(ToolBoxFactory.scala:422)
> at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$withCompilerApi$.liftedTree2$1(ToolBoxFactory.scala:355)
> at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$withCompilerApi$.apply(ToolBoxFactory.scala:355)
> at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl.compile(ToolBoxFactory.scala:422)
> at scala.tools.reflect.ToolBoxFactory$ToolBoxImpl.eval(ToolBoxFactory.scala:444)
> at org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection$.create(GenerateMutableProjection.scala:74)
> at org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection$.create(GenerateMutableProjection.scala:26)
> at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:92)
> at org.spark-project.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599)
> at org.spark-project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379)
> ... 25 more
> Driver stacktrace:
> at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1273)
> at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1264)
> at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1263)
> at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
> at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1263)
> at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730)
> at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730)
> at scala.Option.foreach(Option.scala:257)
> at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:730)
> at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1457)
> at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1418)
> at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
> ================================================
> Code: TruncHour.java
> /*
> * To change this license header, choose License Headers in Project Properties.
> * To change this template file, choose Tools | Templates
> * and open the template in the editor.
> */
> package zwu.spark.sample;
> import java.sql.Timestamp;
> import org.apache.spark.sql.api.java.UDF1;
> public class TruncHour implements UDF1<Timestamp, Timestamp> {
> @Override
> public Timestamp call(Timestamp t1) throws Exception {
> t1.setMinutes(0);
> t1.setSeconds(0);
> t1.setNanos(0);
> return t1;
> //throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
> }
> }
> TimestampSample.java:
> package zwu.spark.sample;
> import java.io.Serializable;
> import java.sql.Timestamp;
> import java.util.ArrayList;
> import java.util.Date;
> import java.util.List;
> import org.apache.log4j.Logger;
> import org.apache.spark.SparkConf;
> import org.apache.spark.api.java.JavaRDD;
> import org.apache.spark.api.java.JavaSparkContext;
> import org.apache.spark.sql.DataFrame;
> import org.apache.spark.sql.types.DataTypes;
> /**
> *
> * @author zw251y
> */
> public class TimestampSample {
> private final static Logger log = Logger.getLogger(TimestampSample.class);
> public static void main(String[] args) {
> SparkConf sparkConf = new SparkConf().setAppName("TimeSample");
> //if (PUtil.isWindows) {
> sparkConf.setMaster("local[1]");
> //}
> JavaSparkContext sc = new JavaSparkContext(sparkConf);
> org.apache.spark.sql.hive.HiveContext sqlContext = new org.apache.spark.sql.hive.HiveContext(sc.sc());
> List<TimeBean> tbList = new ArrayList<>();
> TimeBean tb = new TimeBean();
> tb.ts = new Timestamp(new Date().getTime());
> tbList.add(tb);
> JavaRDD<TimeBean> mytable = sc.parallelize(tbList);
> //Apply a schema to an RDD of JavaBeans and register it as a table.
> //DataFrame schemaPeople = sqlContext.applySchema(people, TimeBean.class);
> DataFrame schemaPeople = sqlContext.createDataFrame(mytable, TimeBean.class);
> schemaPeople.registerTempTable("timetable");
>
> sqlContext.udf().register("truncHour", new TruncHour(), DataTypes.TimestampType);
> // SQL can be run over RDDs that have been registered as tables.
> //org.apache.spark.sql.types.TimestampType p = null;
> //p.
> String test = "select p from (SELECT min(ts) p , count(ts) from timetable group by truncHour(ts)) as mytable group by p ";
> DataFrame df = sqlContext.sql(test);
> df.show();
> }
> public static class TimeBean implements Serializable {
> private String name;
> private Timestamp ts;
> /**
> * Get the value of ts
> *
> * @return the value of ts
> */
> public Timestamp getTs() {
> return ts;
> }
> /**
> * Set the value of ts
> *
> * @param d new value of ts
> */
> public void setTs(Timestamp d) {
> this.ts = d;
> }
> public String getName() {
> return name;
> }
> public void setName(String name) {
> this.name = name;
> }
> }
> }
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org