You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@asterixdb.apache.org by "Michael J. Carey (JIRA)" <ji...@apache.org> on 2017/12/23 08:35:00 UTC

[jira] [Created] (ASTERIXDB-2212) Variable binding/tracking bug in complex GROUP BY query

Michael J. Carey created ASTERIXDB-2212:
-------------------------------------------

             Summary: Variable binding/tracking bug in complex GROUP BY query
                 Key: ASTERIXDB-2212
                 URL: https://issues.apache.org/jira/browse/ASTERIXDB-2212
             Project: Apache AsterixDB
          Issue Type: Bug
            Reporter: Michael J. Carey
            Assignee: Dmitry Lychagin


Try the following sequence of steps (after getting the 'mondial.adm' data file and fixing its path for your machine) and look at the query result - then un-comment out the GROUP BY near the end and look again. You will see that the SQL++ compiler apparently gets confused about which variable/field is which?  Weird bug!  (Reported by U-Wash.)

DROP DATAVERSE hw5 IF EXISTS;
CREATE DATAVERSE hw5;
USE hw5;

CREATE TYPE worldType AS {auto_id:uuid };
CREATE DATASET world(worldType)  PRIMARY KEY auto_id AUTOGENERATED;
LOAD DATASET world USING localfs
        (("path"="127.0.0.1:///Users/mikejcarey/uwash/mondial.adm"),("format"="adm"));

WITH instr AS (select distinct z.`#text` AS ethnic_group, floor(sum((float(z.`-percentage`)/ 100 * float(y.population)))) AS total_population, count(*) AS num_countries
FROM world x, x.mondial.country y, 
     (CASE WHEN is_array(y.ethnicgroups) THEN y.ethnicgroups ELSE [y.ethnicgroups] END) z
WHERE y.ethnicgroups IS NOT MISSING
GROUP BY ethnic_group
ORDER BY total_population DESC
),
stud AS (WITH g AS
(SELECT a.`#text` as ethnicgroup, SUM((float(a.`-percentage`)/100 * float(y.population))) as numEth
FROM world x, x.mondial.country y, 
CASE 
WHEN y.ethnicgroups is missing then [[]] 
WHEN is_object(y.ethnicgroups) then [[y.ethnicgroups]]
ELSE [y.ethnicgroups] END z
UNNEST z a
GROUP BY ethnicgroup),
h as (SELECT a.`#text` as ethnicgroup, COUNT(y.name) as count
FROM world x, x.mondial.country y, 
CASE 
WHEN y.ethnicgroups is missing then [[]] 
WHEN is_object(y.ethnicgroups) then [[y.ethnicgroups]]
ELSE [y.ethnicgroups] END z
UNNEST z a
GROUP BY ethnicgroup)
SELECT g.ethnicgroup AS ethnic_group, h.count AS num_countries, floor(g.numEth) AS total_population
FROM g, h
WHERE g.ethnicgroup = h.ethnicgroup
ORDER BY total_population DESC
)
SELECT subq.ethnic_group, subq.num_countries, subq.total_population
FROM (select total_population, 
   num_countries, ethnic_group FROM instr
UNION ALL
   select total_population, num_countries, ethnic_group FROM stud) AS subq
-- GROUP BY subq.ethnic_group, subq.num_countries, subq.total_population
ORDER BY subq.ethnic_group;




--
This message was sent by Atlassian JIRA
(v6.4.14#64029)