You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@spark.apache.org by whitebread <al...@me.com> on 2014/11/21 19:39:01 UTC

SparkSQL Timestamp query failure

Hi all,

I put some log files into sql tables through Spark and my schema looks like
this:

 |-- timestamp: timestamp (nullable = true)
 |-- c_ip: string (nullable = true)
 |-- cs_username: string (nullable = true)
 |-- s_ip: string (nullable = true)
 |-- s_port: string (nullable = true)
 |-- cs_method: string (nullable = true)
 |-- cs_uri_stem: string (nullable = true)
 |-- cs_query: string (nullable = true)
 |-- sc_status: integer (nullable = false)
 |-- sc_bytes: integer (nullable = false)
 |-- cs_bytes: integer (nullable = false)
 |-- time_taken: integer (nullable = false)
 |-- User_Agent: string (nullable = true)
 |-- Referrer: string (nullable = true)

As you can notice I created a timestamp field which I read is supported by
Spark (Date wouldn't work as far as I understood). I would love to use for
queries like "where timestamp>(2012-10-08 16:10:36.0)" but when I run it I
keep getting errors.
I tried these 2 following sintax forms:
For the second one I parse a string so Im sure Im actually pass it in a
timestamp format.
I use 2 functions: /parse/ and  /date2timestamp/.

*Any hint on how I should handle timestamp values?* 

Thanks,

Alessandro

1)
scala> sqlContext.sql("SELECT * FROM Logs as l where l.timestamp=(2012-10-08
16:10:36.0)").collect
java.lang.RuntimeException: [1.55] failure: ``)'' expected but 16 found

SELECT * FROM Logs as l where l.timestamp=(2012-10-08 16:10:36.0)
                                                      ^
	at scala.sys.package$.error(package.scala:27)
	at org.apache.spark.sql.catalyst.SqlParser.apply(SqlParser.scala:60)
	at org.apache.spark.sql.SQLContext.parseSql(SQLContext.scala:73)
	at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:260)
	at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:21)
	at $iwC$$iwC$$iwC$$iwC.<init>(<console>:26)
	at $iwC$$iwC$$iwC.<init>(<console>:28)
	at $iwC$$iwC.<init>(<console>:30)
	at $iwC.<init>(<console>:32)
	at <init>(<console>:34)
	at .<init>(<console>:38)
	at .<clinit>(<console>)
	at .<init>(<console>:7)
	at .<clinit>(<console>)
	at $print(<console>)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
	at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:606)
	at
org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:789)
	at
org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1062)
	at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:615)
	at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:646)
	at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:610)
	at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:814)
	at
org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:859)
	at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:771)
	at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:616)
	at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:624)
	at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:629)
	at
org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:954)
	at
org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:902)
	at
org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:902)
	at
scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
	at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:902)
	at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:997)
	at org.apache.spark.repl.Main$.main(Main.scala:31)
	at org.apache.spark.repl.Main.main(Main.scala)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
	at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:606)
	at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:328)
	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75)
	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)

2)
sqlContext.sql("SELECT * FROM Logs as l where
l.timestamp="+date2timestamp(formatTime3.parse("2012-10-08
16:10:36.0"))).collect
java.lang.RuntimeException: [1.54] failure: ``UNION'' expected but 16 found

SELECT * FROM Logs as l where l.timestamp=2012-10-08 16:10:36.0
                                                     ^
	at scala.sys.package$.error(package.scala:27)
	at org.apache.spark.sql.catalyst.SqlParser.apply(SqlParser.scala:60)
	at org.apache.spark.sql.SQLContext.parseSql(SQLContext.scala:73)
	at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:260)
	at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:23)
	at $iwC$$iwC$$iwC$$iwC.<init>(<console>:28)
	at $iwC$$iwC$$iwC.<init>(<console>:30)
	at $iwC$$iwC.<init>(<console>:32)
	at $iwC.<init>(<console>:34)
	at <init>(<console>:36)
	at .<init>(<console>:40)
	at .<clinit>(<console>)
	at .<init>(<console>:7)
	at .<clinit>(<console>)
	at $print(<console>)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
	at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:606)
	at
org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:789)
	at
org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1062)
	at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:615)
	at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:646)
	at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:610)
	at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:814)
	at
org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:859)
	at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:771)
	at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:616)
	at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:624)
	at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:629)
	at
org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:954)
	at
org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:902)
	at
org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:902)
	at
scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
	at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:902)
	at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:997)
	at org.apache.spark.repl.Main$.main(Main.scala:31)
	at org.apache.spark.repl.Main.main(Main.scala)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
	at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:606)
	at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:328)
	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75)
	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)





--
View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/SparkSQL-Timestamp-query-failure-tp19502.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.

---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscribe@spark.apache.org
For additional commands, e-mail: user-help@spark.apache.org


Re: SparkSQL Timestamp query failure

Posted by anu <an...@gmail.com>.
Thank you Alessandro :)

On Tue, Mar 3, 2015 at 10:03 AM, whitebread [via Apache Spark User List] <
ml-node+s1001560n21884h2@n3.nabble.com> wrote:

> Anu,
>
> 1) I defined my class Header as it follows:
>
> case class Header(timestamp: java.sql.Timestamp, c_ip: String,
> cs_username: String, s_ip: String, s_port: String, cs_method: String,
> cs_uri_stem: String, cs_query: String, sc_status: Int, sc_bytes: Int,
> cs_bytes: Int, time_taken: Int, User_Agent: String, Referrer: String)
>
> 2) Defined a function to transform date to timestamp:
>
> implicit def date2timestamp(date: java.util.Date) = new
> java.sql.Timestamp(date.getTime)
>
> 3) Defined the format of my timestamp
>
> val formatTime = new java.text.SimpleDateFormat("yyyy-MM-dd hh:mm:ss")
>
> 4) Finally, I was able to parse my data:
>
> val tableMod = toProcessLogs.map(_.split(" ")).map(p =>
> (Header(date2timestamp(formatTime3.parse(p(0)+" "+p(1))),p(2), p(3), p(4),
> p(5), p(6), p(7), p(8), p(9).trim.toInt, p(10).trim.toInt,
> p(11).trim.toInt, p(12).trim.toInt, p(13), p(14))))
>
> Hope this helps,
>
> Alessandro
>
> ------------------------------
>  If you reply to this email, your message will be added to the discussion
> below:
>
> http://apache-spark-user-list.1001560.n3.nabble.com/SparkSQL-Timestamp-query-failure-tp19502p21884.html
>  To unsubscribe from SparkSQL Timestamp query failure, click here
> <http://apache-spark-user-list.1001560.n3.nabble.com/template/NamlServlet.jtp?macro=unsubscribe_by_code&node=19502&code=YW5hbWlrYS5ndW9wdGFAZ21haWwuY29tfDE5NTAyfDE1MjUxMDc5MQ==>
> .
> NAML
> <http://apache-spark-user-list.1001560.n3.nabble.com/template/NamlServlet.jtp?macro=macro_viewer&id=instant_html%21nabble%3Aemail.naml&base=nabble.naml.namespaces.BasicNamespace-nabble.view.web.template.NabbleNamespace-nabble.view.web.template.NodeNamespace&breadcrumbs=notify_subscribers%21nabble%3Aemail.naml-instant_emails%21nabble%3Aemail.naml-send_instant_email%21nabble%3Aemail.naml>
>




--
View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/SparkSQL-Timestamp-query-failure-tp19502p21885.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.

Re: SparkSQL Timestamp query failure

Posted by anu <an...@gmail.com>.
Hi Alessandro

Could you specify which query were you able to run successfully?

1. sqlContext.sql("SELECT * FROM Logs as l where l.timestamp = '2012-10-08
16:10:36' ").collect 

OR

2. sqlContext.sql("SELECT * FROM Logs as l where cast(l.timestamp as string)
= '2012-10-08 16:10:36.0').collect 

I am able to run only the second query, i.e. the one with timestamp casted
to string. What is the use of even parsing my data to store timestamp values
when I can't do >= and <= comparisons on timestamp?? 

In the above query, I am ultimately doing string comparisons, while I
actually want to do comparison on timestamp values.

*My Spark version is 1.1.0*

Please somebody clarify why am I not able to perform queries like
Select * from table1 where endTime >= '2015-01-01 00:00:00' and endTime <=
'2015-01-10 00:00:00' 

without getting anything in the output.

Even, the following doesn't work
Select * from table1 where endTime >=  CAST('2015-01-01 00:00:00' as
timestamp) and endTime <= CAST('2015-01-10 00:00:00' as timestamp)

I get the this error :  *java.lang.RuntimeException: [1.99] failure:
``STRING'' expected but identifier timestamp found*

Thanks



--
View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/SparkSQL-Timestamp-query-failure-tp19502p22292.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.

---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscribe@spark.apache.org
For additional commands, e-mail: user-help@spark.apache.org


Re: SparkSQL Timestamp query failure

Posted by anu <an...@gmail.com>.
Can you please post how did you overcome this issue.



--
View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/SparkSQL-Timestamp-query-failure-tp19502p21868.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.

---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscribe@spark.apache.org
For additional commands, e-mail: user-help@spark.apache.org


Re: SparkSQL Timestamp query failure

Posted by whitebread <al...@me.com>.
Cheng thanks,

thanks to you I found out that the problem as you guessed was a precision one. 

2012-10-08 16:10:36 instead of 2012-10-08 16:10:36.0


Thanks again.

Alessandro


> On Nov 23, 2014, at 11:10 PM, Cheng, Hao [via Apache Spark User List] <ml...@n3.nabble.com> wrote:
> 
> Can you try query like “SELECT timestamp, CAST(timestamp as string) FROM logs LIMIT 5”, I guess you probably ran into the timestamp precision or the timezone shifting problem.
> 
>  
> 
> (And it’s not mandatory, but you’d better change the field name from “timestamp” to something else, as “timestamp” is the keyword of data type in Hive/Spark SQL.)
> 
>   <>
> From: Alessandro Panebianco [mailto:[hidden email] <x-msg://2/user/SendEmail.jtp?type=node&node=19613&i=0>] 
> Sent: Monday, November 24, 2014 11:12 AM
> To: Wang, Daoyuan
> Cc: [hidden email] <x-msg://2/user/SendEmail.jtp?type=node&node=19613&i=1>
> Subject: Re: SparkSQL Timestamp query failure
> 
>  
> 
> Hey Daoyuan, 
> 
>  
> 
> following your suggestion I obtain the same result as when I do:
> 
>  
> 
> where l.timestamp = '2012-10-08 16:10:36.0’
> 
>  
> 
> what happens using either your suggestion or simply using single quotes as I just typed in the example before is that the query does not fail but it doesn’t return anything either as it should.
> 
>  
> 
> If I do a simple :
> 
>  
> 
> SELECT timestamp FROM Logs limit 5").collect.foreach(println) 
> 
>  
> 
> I get: 
> 
>  
> 
> [2012-10-08 16:10:36.0]
> 
> [2012-10-08 16:10:36.0]
> 
> [2012-10-08 16:10:36.0]
> 
> [2012-10-08 16:10:41.0]
> 
> [2012-10-08 16:10:41.0]
> 
>  
> 
> that is why I am sure that putting one of those timestamps should not return an empty arrray.
> 
>  
> 
> Id really love to find a solution to this problem. Since Spark supports Timestamp it should provide simple comparison actions with them in my opinion.
> 
>  
> 
> Any other help would be greatly appreciated.
> 
>  
> 
> Alessandro
> 
>  
> 
>  
> 
>  
> 
>  
> 
> On Nov 23, 2014, at 8:10 PM, Wang, Daoyuan <[hidden email] <x-msg://2/user/SendEmail.jtp?type=node&node=19613&i=2>> wrote:
> 
>  
> 
> Hi,
> 
> I think you can try
> cast(l.timestamp as string)='2012-10-08 16:10:36.0'
> 
> Thanks,
> Daoyuan
> 
> -----Original Message-----
> From: whitebread [[hidden email] <x-msg://2/user/SendEmail.jtp?type=node&node=19613&i=3>] 
> Sent: Sunday, November 23, 2014 12:11 AM
> To: [hidden email] <x-msg://2/user/SendEmail.jtp?type=node&node=19613&i=4>
> Subject: Re: SparkSQL Timestamp query failure
> 
> Thanks for your answer Akhil, 
> 
> I have already tried that and the query actually doesn't fail but it doesn't return anything either as it should.
> Using single quotes I think it reads it as a string and not as a timestamp. 
> 
> I don't know how to solve this. Any other hint by any chance?
> 
> Thanks,
> 
> Alessandro
> 
> 
> 
> --
> View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/SparkSQL-Timestamp-query-failure-tp19502p19554.html <http://apache-spark-user-list.1001560.n3.nabble.com/SparkSQL-Timestamp-query-failure-tp19502p19554.html>
> Sent from the Apache Spark User List mailing list archive at Nabble.com <http://nabble.com/>.
> 
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: [hidden email] <x-msg://2/user/SendEmail.jtp?type=node&node=19613&i=5> For additional commands, e-mail: [hidden email] <x-msg://2/user/SendEmail.jtp?type=node&node=19613&i=6>
>  
> 
> 
> 
> If you reply to this email, your message will be added to the discussion below:
> http://apache-spark-user-list.1001560.n3.nabble.com/SparkSQL-Timestamp-query-failure-tp19502p19613.html <http://apache-spark-user-list.1001560.n3.nabble.com/SparkSQL-Timestamp-query-failure-tp19502p19613.html>
> To unsubscribe from SparkSQL Timestamp query failure, click here <http://apache-spark-user-list.1001560.n3.nabble.com/template/NamlServlet.jtp?macro=unsubscribe_by_code&node=19502&code=YWxlLnBhbmViaWFuY29AbWUuY29tfDE5NTAyfC00MjA1ODk4MTE=>.
> NAML <http://apache-spark-user-list.1001560.n3.nabble.com/template/NamlServlet.jtp?macro=macro_viewer&id=instant_html%21nabble%3Aemail.naml&base=nabble.naml.namespaces.BasicNamespace-nabble.view.web.template.NabbleNamespace-nabble.view.web.template.NodeNamespace&breadcrumbs=notify_subscribers%21nabble%3Aemail.naml-instant_emails%21nabble%3Aemail.naml-send_instant_email%21nabble%3Aemail.naml>




--
View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/SparkSQL-Timestamp-query-failure-tp19502p19616.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.

RE: SparkSQL Timestamp query failure

Posted by "Cheng, Hao" <ha...@intel.com>.
Can you try query like “SELECT timestamp, CAST(timestamp as string) FROM logs LIMIT 5”, I guess you probably ran into the timestamp precision or the timezone shifting problem.

(And it’s not mandatory, but you’d better change the field name from “timestamp” to something else, as “timestamp” is the keyword of data type in Hive/Spark SQL.)

From: Alessandro Panebianco [mailto:ale.panebianco@me.com]
Sent: Monday, November 24, 2014 11:12 AM
To: Wang, Daoyuan
Cc: user@spark.incubator.apache.org
Subject: Re: SparkSQL Timestamp query failure

Hey Daoyuan,

following your suggestion I obtain the same result as when I do:

where l.timestamp = '2012-10-08 16:10:36.0’

what happens using either your suggestion or simply using single quotes as I just typed in the example before is that the query does not fail but it doesn’t return anything either as it should.

If I do a simple :

SELECT timestamp FROM Logs limit 5").collect.foreach(println)

I get:

[2012-10-08 16:10:36.0]
[2012-10-08 16:10:36.0]
[2012-10-08 16:10:36.0]
[2012-10-08 16:10:41.0]
[2012-10-08 16:10:41.0]

that is why I am sure that putting one of those timestamps should not return an empty arrray.

Id really love to find a solution to this problem. Since Spark supports Timestamp it should provide simple comparison actions with them in my opinion.

Any other help would be greatly appreciated.

Alessandro




On Nov 23, 2014, at 8:10 PM, Wang, Daoyuan <da...@intel.com>> wrote:

Hi,

I think you can try
cast(l.timestamp as string)='2012-10-08 16:10:36.0'

Thanks,
Daoyuan

-----Original Message-----
From: whitebread [mailto:ale.panebianco@me.com]
Sent: Sunday, November 23, 2014 12:11 AM
To: user@spark.incubator.apache.org<ma...@spark.incubator.apache.org>
Subject: Re: SparkSQL Timestamp query failure

Thanks for your answer Akhil,

I have already tried that and the query actually doesn't fail but it doesn't return anything either as it should.
Using single quotes I think it reads it as a string and not as a timestamp.

I don't know how to solve this. Any other hint by any chance?

Thanks,

Alessandro



--
View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/SparkSQL-Timestamp-query-failure-tp19502p19554.html
Sent from the Apache Spark User List mailing list archive at Nabble.com<http://Nabble.com>.

---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscribe@spark.apache.org<ma...@spark.apache.org> For additional commands, e-mail: user-help@spark.apache.org<ma...@spark.apache.org>


Re: SparkSQL Timestamp query failure

Posted by Alessandro Panebianco <al...@me.com>.
Hey Daoyuan, 

following your suggestion I obtain the same result as when I do:

where l.timestamp = '2012-10-08 16:10:36.0’

what happens using either your suggestion or simply using single quotes as I just typed in the example before is that the query does not fail but it doesn’t return anything either as it should.

If I do a simple :

SELECT timestamp FROM Logs limit 5").collect.foreach(println) 

I get: 

[2012-10-08 16:10:36.0]
[2012-10-08 16:10:36.0]
[2012-10-08 16:10:36.0]
[2012-10-08 16:10:41.0]
[2012-10-08 16:10:41.0]

that is why I am sure that putting one of those timestamps should not return an empty arrray.

Id really love to find a solution to this problem. Since Spark supports Timestamp it should provide simple comparison actions with them in my opinion.

Any other help would be greatly appreciated.

Alessandro


> 

> On Nov 23, 2014, at 8:10 PM, Wang, Daoyuan <da...@intel.com> wrote:
> 
> Hi,
> 
> I think you can try
> cast(l.timestamp as string)='2012-10-08 16:10:36.0'
> 
> Thanks,
> Daoyuan
> 
> -----Original Message-----
> From: whitebread [mailto:ale.panebianco@me.com] 
> Sent: Sunday, November 23, 2014 12:11 AM
> To: user@spark.incubator.apache.org
> Subject: Re: SparkSQL Timestamp query failure
> 
> Thanks for your answer Akhil, 
> 
> I have already tried that and the query actually doesn't fail but it doesn't return anything either as it should.
> Using single quotes I think it reads it as a string and not as a timestamp. 
> 
> I don't know how to solve this. Any other hint by any chance?
> 
> Thanks,
> 
> Alessandro
> 
> 
> 
> --
> View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/SparkSQL-Timestamp-query-failure-tp19502p19554.html
> Sent from the Apache Spark User List mailing list archive at Nabble.com.
> 
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: user-unsubscribe@spark.apache.org For additional commands, e-mail: user-help@spark.apache.org
> 


RE: SparkSQL Timestamp query failure

Posted by "Wang, Daoyuan" <da...@intel.com>.
Hi,

I think you can try
 cast(l.timestamp as string)='2012-10-08 16:10:36.0'

Thanks,
Daoyuan

-----Original Message-----
From: whitebread [mailto:ale.panebianco@me.com] 
Sent: Sunday, November 23, 2014 12:11 AM
To: user@spark.incubator.apache.org
Subject: Re: SparkSQL Timestamp query failure

Thanks for your answer Akhil, 

I have already tried that and the query actually doesn't fail but it doesn't return anything either as it should.
Using single quotes I think it reads it as a string and not as a timestamp. 

I don't know how to solve this. Any other hint by any chance?

Thanks,

Alessandro



--
View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/SparkSQL-Timestamp-query-failure-tp19502p19554.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.

---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscribe@spark.apache.org For additional commands, e-mail: user-help@spark.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscribe@spark.apache.org
For additional commands, e-mail: user-help@spark.apache.org


Re: SparkSQL Timestamp query failure

Posted by whitebread <al...@me.com>.
Thanks for your answer Akhil, 

I have already tried that and the query actually doesn't fail but it doesn't
return anything either as it should.
Using single quotes I think it reads it as a string and not as a timestamp. 

I don't know how to solve this. Any other hint by any chance?

Thanks,

Alessandro



--
View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/SparkSQL-Timestamp-query-failure-tp19502p19554.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.

---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscribe@spark.apache.org
For additional commands, e-mail: user-help@spark.apache.org


Re: SparkSQL Timestamp query failure

Posted by Akhil Das <ak...@sigmoidanalytics.com>.
What about

sqlContext.sql("SELECT * FROM Logs as l where l.timestamp=*'2012-10-08
16:10:36.0'*").collect

You might need to quote the timestamp it looks like.

Thanks
Best Regards

On Sat, Nov 22, 2014 at 12:09 AM, whitebread <al...@me.com> wrote:

> Hi all,
>
> I put some log files into sql tables through Spark and my schema looks like
> this:
>
>  |-- timestamp: timestamp (nullable = true)
>  |-- c_ip: string (nullable = true)
>  |-- cs_username: string (nullable = true)
>  |-- s_ip: string (nullable = true)
>  |-- s_port: string (nullable = true)
>  |-- cs_method: string (nullable = true)
>  |-- cs_uri_stem: string (nullable = true)
>  |-- cs_query: string (nullable = true)
>  |-- sc_status: integer (nullable = false)
>  |-- sc_bytes: integer (nullable = false)
>  |-- cs_bytes: integer (nullable = false)
>  |-- time_taken: integer (nullable = false)
>  |-- User_Agent: string (nullable = true)
>  |-- Referrer: string (nullable = true)
>
> As you can notice I created a timestamp field which I read is supported by
> Spark (Date wouldn't work as far as I understood). I would love to use for
> queries like "where timestamp>(2012-10-08 16:10:36.0)" but when I run it I
> keep getting errors.
> I tried these 2 following sintax forms:
> For the second one I parse a string so Im sure Im actually pass it in a
> timestamp format.
> I use 2 functions: /parse/ and  /date2timestamp/.
>
> *Any hint on how I should handle timestamp values?*
>
> Thanks,
>
> Alessandro
>
> 1)
> scala> sqlContext.sql("SELECT * FROM Logs as l where
> l.timestamp=(2012-10-08
> 16:10:36.0)").collect
> java.lang.RuntimeException: [1.55] failure: ``)'' expected but 16 found
>
> SELECT * FROM Logs as l where l.timestamp=(2012-10-08 16:10:36.0)
>                                                       ^
>         at scala.sys.package$.error(package.scala:27)
>         at
> org.apache.spark.sql.catalyst.SqlParser.apply(SqlParser.scala:60)
>         at org.apache.spark.sql.SQLContext.parseSql(SQLContext.scala:73)
>         at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:260)
>         at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:21)
>         at $iwC$$iwC$$iwC$$iwC.<init>(<console>:26)
>         at $iwC$$iwC$$iwC.<init>(<console>:28)
>         at $iwC$$iwC.<init>(<console>:30)
>         at $iwC.<init>(<console>:32)
>         at <init>(<console>:34)
>         at .<init>(<console>:38)
>         at .<clinit>(<console>)
>         at .<init>(<console>:7)
>         at .<clinit>(<console>)
>         at $print(<console>)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at
>
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>         at
>
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>         at java.lang.reflect.Method.invoke(Method.java:606)
>         at
> org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:789)
>         at
> org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1062)
>         at
> org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:615)
>         at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:646)
>         at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:610)
>         at
> org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:814)
>         at
>
> org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:859)
>         at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:771)
>         at
> org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:616)
>         at
> org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:624)
>         at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:629)
>         at
>
> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:954)
>         at
>
> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:902)
>         at
>
> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:902)
>         at
>
> scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
>         at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:902)
>         at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:997)
>         at org.apache.spark.repl.Main$.main(Main.scala:31)
>         at org.apache.spark.repl.Main.main(Main.scala)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at
>
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>         at
>
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>         at java.lang.reflect.Method.invoke(Method.java:606)
>         at
> org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:328)
>         at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75)
>         at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
>
> 2)
> sqlContext.sql("SELECT * FROM Logs as l where
> l.timestamp="+date2timestamp(formatTime3.parse("2012-10-08
> 16:10:36.0"))).collect
> java.lang.RuntimeException: [1.54] failure: ``UNION'' expected but 16 found
>
> SELECT * FROM Logs as l where l.timestamp=2012-10-08 16:10:36.0
>                                                      ^
>         at scala.sys.package$.error(package.scala:27)
>         at
> org.apache.spark.sql.catalyst.SqlParser.apply(SqlParser.scala:60)
>         at org.apache.spark.sql.SQLContext.parseSql(SQLContext.scala:73)
>         at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:260)
>         at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:23)
>         at $iwC$$iwC$$iwC$$iwC.<init>(<console>:28)
>         at $iwC$$iwC$$iwC.<init>(<console>:30)
>         at $iwC$$iwC.<init>(<console>:32)
>         at $iwC.<init>(<console>:34)
>         at <init>(<console>:36)
>         at .<init>(<console>:40)
>         at .<clinit>(<console>)
>         at .<init>(<console>:7)
>         at .<clinit>(<console>)
>         at $print(<console>)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at
>
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>         at
>
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>         at java.lang.reflect.Method.invoke(Method.java:606)
>         at
> org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:789)
>         at
> org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1062)
>         at
> org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:615)
>         at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:646)
>         at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:610)
>         at
> org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:814)
>         at
>
> org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:859)
>         at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:771)
>         at
> org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:616)
>         at
> org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:624)
>         at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:629)
>         at
>
> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:954)
>         at
>
> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:902)
>         at
>
> org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:902)
>         at
>
> scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
>         at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:902)
>         at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:997)
>         at org.apache.spark.repl.Main$.main(Main.scala:31)
>         at org.apache.spark.repl.Main.main(Main.scala)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at
>
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>         at
>
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>         at java.lang.reflect.Method.invoke(Method.java:606)
>         at
> org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:328)
>         at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75)
>         at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
>
>
>
>
>
> --
> View this message in context:
> http://apache-spark-user-list.1001560.n3.nabble.com/SparkSQL-Timestamp-query-failure-tp19502.html
> Sent from the Apache Spark User List mailing list archive at Nabble.com.
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: user-unsubscribe@spark.apache.org
> For additional commands, e-mail: user-help@spark.apache.org
>
>