You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@asterixdb.apache.org by "Yingyi Bu (JIRA)" <ji...@apache.org> on 2016/02/24 19:21:18 UTC

[jira] [Updated] (ASTERIXDB-1322) Compiler exception for queries with complex UDFs

     [ https://issues.apache.org/jira/browse/ASTERIXDB-1322?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Yingyi Bu updated ASTERIXDB-1322:
---------------------------------
    Description: 
The push project down is not done right regarding to subplans.

The following query will cause an compiler exception because of undefined variables (which is projected away in the query plan subtree):

{noformat}
create type HRMType as closed {
  row_id: int32,
  sid: int32,
  date: date,
  day: int32,
  time: time,
  bpm: int32,
  RR: float
};

create dataset HRM(HRMType) primary key row_id;

declare function median($x)
{
     let $c := count($x)
     return avg(
                for $i at $p in (for $j in $x order by $j return $j)
                where $p >= floor(($c-1)/2.0) and $p <= $c/2
                return $i
               )
}


declare function MAD($x)
{
     let $m := median($x)
     return median(for $j in $x order by abs($j-$m)) return abs($j-$m))
}


for $i in dataset HRM
group by $sid := $i.sid, $gdate := $i.date, $gday := $i.day,
$timebin := interval-bin($i.time, time("00:00:00"), day-time-duration("PT1M")) with $i
return
{ 
  "sid": $sid,
  "gdate": $gdate,
  "gday": $gday,
  "timebin": $timebin,
  "stdv": (avg(for $ii in $i return $ii.RR * $ii.RR) - 
          avg(for $ii in $i return $ii.RR) * avg(for $ii in $i return $ii.RR))^(0.5),
  "MAD": MAD(for $ii in $i return $ii.RR)
}
{noformat}


A modified version of MAD can work:
{noformat}
declare function MAD($x)
{
     median(for $j in $x order by abs($j-median($x)) return abs($j-median($x)))
}
{noformat}

  was:
The push project down is not done right regarding to subplans.

The following query will cause an compiler exception because of undefined variables (which is projected away in the query plan subtree):

{noformat}
create type HRMType as closed {
  row_id: int32,
  sid: int32,
  date: date,
  day: int32,
  time: time,
  bpm: int32,
  RR: float
};

create dataset HRM(HRMType) primary key row_id;

declare function median($x)
{
     let $c := count($x)
     return avg(
                for $i at $p in (for $j in $x order by $j return $j)
                where $p >= floor(($c-1)/2.0) and $p <= $c/2
                return $i
               )
}


declare function MAD($x)
{
     let $m := median($x)
     return median(for $j in $x order by abs($j-$m)) return abs($j-$m))
}


for $i in dataset HRM
group by $sid := $i.sid, $gdate := $i.date, $gday := $i.day,
$timebin := interval-bin($i.time, time("00:00:00"), day-time-duration("PT1M")) with $i
return
{ 
  "sid": $sid,
  "gdate": $gdate,
  "gday": $gday,
  "timebin": $timebin,
  "stdv": (avg(for $ii in $i return $ii.RR * $ii.RR) - 
          avg(for $ii in $i return $ii.RR) * avg(for $ii in $i return $ii.RR))^(0.5),
  "MAD": MAD(for $ii in $i return $ii.RR)
}
{nonformat}


A modified version of MAD can work:
{noformat}
declare function MAD($x)
{
     median(for $j in $x order by abs($j-median($x)) return abs($j-median($x)))
}
{noformat}


> Compiler exception for queries with complex UDFs
> ------------------------------------------------
>
>                 Key: ASTERIXDB-1322
>                 URL: https://issues.apache.org/jira/browse/ASTERIXDB-1322
>             Project: Apache AsterixDB
>          Issue Type: Bug
>          Components: Optimizer
>            Reporter: Yingyi Bu
>            Assignee: Yingyi Bu
>
> The push project down is not done right regarding to subplans.
> The following query will cause an compiler exception because of undefined variables (which is projected away in the query plan subtree):
> {noformat}
> create type HRMType as closed {
>   row_id: int32,
>   sid: int32,
>   date: date,
>   day: int32,
>   time: time,
>   bpm: int32,
>   RR: float
> };
> create dataset HRM(HRMType) primary key row_id;
> declare function median($x)
> {
>      let $c := count($x)
>      return avg(
>                 for $i at $p in (for $j in $x order by $j return $j)
>                 where $p >= floor(($c-1)/2.0) and $p <= $c/2
>                 return $i
>                )
> }
> declare function MAD($x)
> {
>      let $m := median($x)
>      return median(for $j in $x order by abs($j-$m)) return abs($j-$m))
> }
> for $i in dataset HRM
> group by $sid := $i.sid, $gdate := $i.date, $gday := $i.day,
> $timebin := interval-bin($i.time, time("00:00:00"), day-time-duration("PT1M")) with $i
> return
> { 
>   "sid": $sid,
>   "gdate": $gdate,
>   "gday": $gday,
>   "timebin": $timebin,
>   "stdv": (avg(for $ii in $i return $ii.RR * $ii.RR) - 
>           avg(for $ii in $i return $ii.RR) * avg(for $ii in $i return $ii.RR))^(0.5),
>   "MAD": MAD(for $ii in $i return $ii.RR)
> }
> {noformat}
> A modified version of MAD can work:
> {noformat}
> declare function MAD($x)
> {
>      median(for $j in $x order by abs($j-median($x)) return abs($j-median($x)))
> }
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)