You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Josh Rosen (JIRA)" <ji...@apache.org> on 2016/09/29 20:21:20 UTC

[jira] [Created] (SPARK-17733) InferFiltersFromConstraints rule never terminates for query

Josh Rosen created SPARK-17733:
----------------------------------

             Summary: InferFiltersFromConstraints rule never terminates for query
                 Key: SPARK-17733
                 URL: https://issues.apache.org/jira/browse/SPARK-17733
             Project: Spark
          Issue Type: Bug
          Components: SQL
    Affects Versions: 2.0.0
            Reporter: Josh Rosen
            Priority: Critical
         Attachments: SparkSubmit-2016-09-29-1_snapshot___Users_joshrosen_Snapshots__-_YourKit_Java_Profiler_2013_build_13088_-_64-bit.png

The following (complicated) example becomes stuck in the {{InferFiltersFromConstraints}} rule and never runs. However, it doesn't fail with a stack overflow and doesn't hit the limit on optimization passes, so I think there's some sort of non-obvious infinite loop within the rule itself.


{code:title=Table Creation|borderStyle=solid}
 -- Query #0

CREATE TEMPORARY VIEW table_4(float_col_1, boolean_col_2, decimal2610_col_3, boolean_col_4, timestamp_col_5, boolean_col_6, bigint_col_7, timestamp_col_8) AS VALUES
  (CAST(21.920416 AS FLOAT), false, -182.0700000000BD, true, TIMESTAMP('1996-10-24 00:00:00.0'), true, CAST(-993 AS BIGINT), TIMESTAMP('2007-01-13 00:00:00.0')),
  (CAST(722.4906 AS FLOAT), true, 497.5400000000BD, true, TIMESTAMP('2015-12-14 00:00:00.0'), false, CAST(268 AS BIGINT), TIMESTAMP('2021-04-19 00:00:00.0')),
  (CAST(534.9996 AS FLOAT), true, -470.8300000000BD, true, TIMESTAMP('1996-01-31 00:00:00.0'), false, CAST(-910 AS BIGINT), TIMESTAMP('2019-10-16 00:00:00.0')),
  (CAST(-289.6454 AS FLOAT), false, 892.2500000000BD, false, TIMESTAMP('2014-03-14 00:00:00.0'), false, CAST(-462 AS BIGINT), CAST(NULL AS TIMESTAMP)),
  (CAST(46.395535 AS FLOAT), true, -662.8900000000BD, true, TIMESTAMP('2000-10-16 00:00:00.0'), false, CAST(-656 AS BIGINT), TIMESTAMP('2024-09-01 00:00:00.0')),
  (CAST(-555.36285 AS FLOAT), true, -938.9300000000BD, true, TIMESTAMP('2007-04-10 00:00:00.0'), true, CAST(252 AS BIGINT), TIMESTAMP('2028-12-03 00:00:00.0')),
  (CAST(826.29004 AS FLOAT), true, 53.1800000000BD, false, TIMESTAMP('2004-06-11 00:00:00.0'), false, CAST(437 AS BIGINT), TIMESTAMP('1994-04-04 00:00:00.0')),
  (CAST(-15.276999 AS FLOAT), CAST(NULL AS BOOLEAN), -889.3100000000BD, true, TIMESTAMP('1991-05-23 00:00:00.0'), true, CAST(226 AS BIGINT), TIMESTAMP('2023-07-08 00:00:00.0')),
  (CAST(385.27386 AS FLOAT), CAST(NULL AS BOOLEAN), -9.9500000000BD, false, TIMESTAMP('2022-10-22 00:00:00.0'), true, CAST(430 AS BIGINT), TIMESTAMP('2013-09-29 00:00:00.0')),
  (CAST(988.7868 AS FLOAT), CAST(NULL AS BOOLEAN), 715.1700000000BD, false, TIMESTAMP('2026-10-03 00:00:00.0'), true, CAST(-696 AS BIGINT), TIMESTAMP('1990-08-10 00:00:00.0'))
     ;


 -- Query #1

CREATE TEMPORARY VIEW table_1(double_col_1, boolean_col_2, timestamp_col_3, smallint_col_4, boolean_col_5, int_col_6, timestamp_col_7, varchar0008_col_8, int_col_9, string_col_10) AS VALUES
  (CAST(-147.818640624 AS DOUBLE), CAST(NULL AS BOOLEAN), TIMESTAMP('2012-10-19 00:00:00.0'), CAST(9 AS SMALLINT), false, 77, TIMESTAMP('2014-07-01 00:00:00.0'), '-945', -646, '722'),
  (CAST(594.195125271 AS DOUBLE), false, TIMESTAMP('2016-12-04 00:00:00.0'), CAST(NULL AS SMALLINT), CAST(NULL AS BOOLEAN), CAST(NULL AS INT), TIMESTAMP('1999-12-26 00:00:00.0'), '250', -861, '55'),
  (CAST(-454.171126363 AS DOUBLE), false, TIMESTAMP('2008-12-13 00:00:00.0'), CAST(NULL AS SMALLINT), false, -783, TIMESTAMP('2010-05-28 00:00:00.0'), '211', -959, CAST(NULL AS STRING)),
  (CAST(437.670945524 AS DOUBLE), true, TIMESTAMP('2011-10-16 00:00:00.0'), CAST(952 AS SMALLINT), true, 297, TIMESTAMP('2013-01-13 00:00:00.0'), '262', CAST(NULL AS INT), '936'),
  (CAST(-387.226759334 AS DOUBLE), false, TIMESTAMP('2019-10-03 00:00:00.0'), CAST(-496 AS SMALLINT), CAST(NULL AS BOOLEAN), -925, TIMESTAMP('2028-06-27 00:00:00.0'), '-657', 948, '18'),
  (CAST(-306.138230875 AS DOUBLE), true, TIMESTAMP('1997-10-07 00:00:00.0'), CAST(332 AS SMALLINT), false, 744, TIMESTAMP('1990-09-22 00:00:00.0'), '-345', 566, '-574'),
  (CAST(675.402140308 AS DOUBLE), false, TIMESTAMP('2017-06-26 00:00:00.0'), CAST(972 AS SMALLINT), true, CAST(NULL AS INT), TIMESTAMP('2026-06-10 00:00:00.0'), '518', 683, '-320'),
  (CAST(734.839647174 AS DOUBLE), true, TIMESTAMP('1995-06-01 00:00:00.0'), CAST(-792 AS SMALLINT), CAST(NULL AS BOOLEAN), CAST(NULL AS INT), TIMESTAMP('2021-07-11 00:00:00.0'), '-318', 564, '142'),
  (CAST(-836.513475295 AS DOUBLE), true, TIMESTAMP('2027-01-02 00:00:00.0'), CAST(-446 AS SMALLINT), true, CAST(NULL AS INT), TIMESTAMP('1993-09-01 00:00:00.0'), '771', CAST(NULL AS INT), '977'),
  (CAST(-768.883638815 AS DOUBLE), false, TIMESTAMP('1994-02-11 00:00:00.0'), CAST(-244 AS SMALLINT), true, -493, TIMESTAMP('1994-01-02 00:00:00.0'), '-921', CAST(NULL AS INT), '-409')
     ;


 -- Query #2

CREATE TEMPORARY VIEW table_5(float_col_1, varchar0138_col_2, string_col_3, decimal2211_col_4, float_col_5, string_col_6, timestamp_col_7, varchar0207_col_8) AS VALUES
  (CAST(-885.7606 AS FLOAT), '-740', '680', -929.06000000000BD, CAST(NULL AS FLOAT), '-915', TIMESTAMP('1994-09-12 00:00:00.0'), CAST(NULL AS STRING)),
  (CAST(NULL AS FLOAT), '489', '692', -220.60000000000BD, CAST(939.18964 AS FLOAT), '-514', CAST(NULL AS TIMESTAMP), '181'),
  (CAST(210.7055 AS FLOAT), '44', CAST(NULL AS STRING), -174.70000000000BD, CAST(760.21045 AS FLOAT), '325', TIMESTAMP('2019-09-25 00:00:00.0'), '505'),
  (CAST(952.8074 AS FLOAT), '838', '705', CAST(NULL AS DECIMAL(22,11)), CAST(NULL AS FLOAT), '-62', TIMESTAMP('2029-05-22 00:00:00.0'), CAST(NULL AS STRING)),
  (CAST(-113.300446 AS FLOAT), '-210', '765', CAST(NULL AS DECIMAL(22,11)), CAST(-819.2468 AS FLOAT), '-829', CAST(NULL AS TIMESTAMP), '465'),
  (CAST(-739.9902 AS FLOAT), '614', '-393', -509.22000000000BD, CAST(-339.78568 AS FLOAT), '568', TIMESTAMP('2013-05-14 00:00:00.0'), '305'),
  (CAST(976.0611 AS FLOAT), '670', '71', 663.23000000000BD, CAST(-685.9362 AS FLOAT), '42', CAST(NULL AS TIMESTAMP), '150'),
  (CAST(NULL AS FLOAT), '302', '-404', -349.42000000000BD, CAST(2.113715 AS FLOAT), '-703', TIMESTAMP('2003-01-09 00:00:00.0'), '-863'),
  (CAST(-40.604317 AS FLOAT), '856', '632', 844.57000000000BD, CAST(-730.8376 AS FLOAT), '151', TIMESTAMP('2021-05-11 00:00:00.0'), '494'),
  (CAST(884.62714 AS FLOAT), '-195', '960', -664.40000000000BD, CAST(374.4844 AS FLOAT), '814', TIMESTAMP('2006-06-12 00:00:00.0'), '-900')
     ;


 -- Query #3

CREATE TEMPORARY VIEW table_2(bigint_col_1, boolean_col_2, double_col_3, double_col_4, double_col_5, varchar0164_col_6) AS VALUES
  (CAST(-374 AS BIGINT), CAST(NULL AS BOOLEAN), CAST(939.626553676 AS DOUBLE), CAST(-777.275379746 AS DOUBLE), CAST(235.613760023 AS DOUBLE), '86'),
  (CAST(324 AS BIGINT), true, CAST(-507.23760783 AS DOUBLE), CAST(NULL AS DOUBLE), CAST(966.753434439 AS DOUBLE), '304'),
  (CAST(882 AS BIGINT), false, CAST(-366.529706229 AS DOUBLE), CAST(787.000491043 AS DOUBLE), CAST(-331.333188698 AS DOUBLE), '158'),
  (CAST(-510 AS BIGINT), CAST(NULL AS BOOLEAN), CAST(-855.344932257 AS DOUBLE), CAST(-858.167264921 AS DOUBLE), CAST(NULL AS DOUBLE), '-419'),
  (CAST(-13 AS BIGINT), false, CAST(589.966987492 AS DOUBLE), CAST(NULL AS DOUBLE), CAST(-653.515783257 AS DOUBLE), '970'),
  (CAST(-361 AS BIGINT), true, CAST(-413.021011259 AS DOUBLE), CAST(-716.638705947 AS DOUBLE), CAST(-936.480108205 AS DOUBLE), '807'),
  (CAST(815 AS BIGINT), true, CAST(-643.690268711 AS DOUBLE), CAST(-684.206112496 AS DOUBLE), CAST(335.557479371 AS DOUBLE), '-872'),
  (CAST(617 AS BIGINT), true, CAST(-93.3806447556 AS DOUBLE), CAST(-322.66171021 AS DOUBLE), CAST(-951.18299435 AS DOUBLE), '-167'),
  (CAST(-876 AS BIGINT), false, CAST(-481.774062168 AS DOUBLE), CAST(-204.40537387 AS DOUBLE), CAST(224.889845986 AS DOUBLE), '-986'),
  (CAST(2 AS BIGINT), false, CAST(462.843898322 AS DOUBLE), CAST(-9.85549856798 AS DOUBLE), CAST(-549.875829922 AS DOUBLE), '121')
     ;


 -- Query #4

CREATE TEMPORARY VIEW table_3(string_col_1, float_col_2, timestamp_col_3, boolean_col_4, timestamp_col_5, decimal3317_col_6) AS VALUES
  ('-450', CAST(-903.6053 AS FLOAT), CAST(NULL AS TIMESTAMP), true, TIMESTAMP('2020-08-22 00:00:00.0'), -376.39000000000000000BD),
  ('698', CAST(402.56534 AS FLOAT), TIMESTAMP('2013-10-13 00:00:00.0'), true, TIMESTAMP('2012-11-06 00:00:00.0'), -498.81000000000000000BD),
  ('139', CAST(-895.7336 AS FLOAT), TIMESTAMP('2018-09-08 00:00:00.0'), true, TIMESTAMP('2019-03-13 00:00:00.0'), CAST(NULL AS DECIMAL(33,17))),
  ('616', CAST(-464.9475 AS FLOAT), TIMESTAMP('2028-05-18 00:00:00.0'), true, TIMESTAMP('2016-05-22 00:00:00.0'), -109.88000000000000000BD),
  ('943', CAST(605.42303 AS FLOAT), TIMESTAMP('1996-08-04 00:00:00.0'), false, TIMESTAMP('2028-05-18 00:00:00.0'), 201.36000000000000000BD),
  ('-764', CAST(-503.56726 AS FLOAT), TIMESTAMP('1990-02-28 00:00:00.0'), false, CAST(NULL AS TIMESTAMP), 211.25000000000000000BD),
  ('-587', CAST(84.67886 AS FLOAT), TIMESTAMP('2013-06-06 00:00:00.0'), true, TIMESTAMP('2022-05-07 00:00:00.0'), 90.75000000000000000BD),
  ('712', CAST(141.08926 AS FLOAT), TIMESTAMP('2001-05-12 00:00:00.0'), true, TIMESTAMP('2019-11-22 00:00:00.0'), 929.89000000000000000BD),
  ('948', CAST(0.74294764 AS FLOAT), TIMESTAMP('2002-06-14 00:00:00.0'), false, TIMESTAMP('1990-01-13 00:00:00.0'), -100.90000000000000000BD),
  ('-201', CAST(366.82578 AS FLOAT), TIMESTAMP('2015-11-28 00:00:00.0'), false, CAST(NULL AS TIMESTAMP), 196.33000000000000000BD)
     ;
{code}

{code:title=Query|borderStyle=solid}
SELECT
t1.int_col_2,
(t1.bigint_col_7) / (t2.double_col_3) AS float_col,
TRIM(t2.varchar0164_col_6) AS char_col
FROM (
SELECT
COALESCE(t1.bigint_col_7, t2.bigint_col_7, t2.bigint_col_7) AS int_col,
t1.bigint_col_7,
t2.bigint_col_7 AS int_col_1,
t1.bigint_col_7 AS int_col_2,
COALESCE(t2.bigint_col_7, (COALESCE(t1.bigint_col_7, t2.bigint_col_7, t2.bigint_col_7)) - (t1.bigint_col_7), MIN(t2.bigint_col_7)) AS int_col_3
FROM table_4 t1
INNER JOIN table_4 t2 ON ((t2.timestamp_col_5) = (t1.timestamp_col_8)) AND ((t2.decimal2610_col_3) = (t1.decimal2610_col_3))
WHERE
(t1.bigint_col_7) IN (t2.bigint_col_7, t2.bigint_col_7)
GROUP BY
COALESCE(t1.bigint_col_7, t2.bigint_col_7, t2.bigint_col_7),
t1.bigint_col_7,
t2.bigint_col_7,
t1.bigint_col_7
HAVING
(MIN(COALESCE(t1.bigint_col_7, t2.bigint_col_7, t2.bigint_col_7))) NOT IN (423.13, t2.bigint_col_7)
UNION
SELECT
MIN((436) * (927)) OVER (ORDER BY (t2.int_col_6) * (NULL) DESC, (t1.smallint_col_4) - (t2.int_col_6) DESC ROWS BETWEEN 96 PRECEDING AND 16 PRECEDING) AS int_col,
(t2.int_col_6) * (NULL) AS decimal_col,
(t1.smallint_col_4) - (t2.int_col_6) AS int_col_1,
LAG((449) * (-157.519107824), 46) OVER (ORDER BY (t2.int_col_6) * (NULL) DESC, (t1.smallint_col_4) - (t2.int_col_6) DESC) AS float_col,
AVG((669) - (-773)) OVER (ORDER BY (t2.int_col_6) * (NULL) ASC, (t1.smallint_col_4) - (t2.int_col_6) ASC ROWS BETWEEN CURRENT ROW AND 62 FOLLOWING) AS float_col_1
FROM table_1 t1
INNER JOIN table_1 t2 ON ((t2.smallint_col_4) = (t1.int_col_9)) AND ((t2.smallint_col_4) = (t1.int_col_6))
) t1
INNER JOIN table_2 t2 ON (((t2.bigint_col_1) = (t1.bigint_col_7)) AND ((t2.bigint_col_1) = (t1.int_col))) AND ((t2.bigint_col_1) = (t1.int_col_1))
GROUP BY
t1.int_col_2,
(t1.bigint_col_7) / (t2.double_col_3),
TRIM(t2.varchar0164_col_6)
{code}

I attached YourKit to my Spark process and recorded some stack traces. See the attached screenshots showing the distribution of time for the hung query.





--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org