You are viewing a plain text version of this content. The canonical link for it is here.
Posted to jira@arrow.apache.org by "David Li (Jira)" <ji...@apache.org> on 2021/05/06 14:21:00 UTC

[jira] [Created] (ARROW-12668) [C++][Dataset] CountRows occasionally segfaulting

David Li created ARROW-12668:
--------------------------------

             Summary: [C++][Dataset] CountRows occasionally segfaulting
                 Key: ARROW-12668
                 URL: https://issues.apache.org/jira/browse/ARROW-12668
             Project: Apache Arrow
          Issue Type: Improvement
          Components: C++
            Reporter: David Li
            Assignee: David Li
             Fix For: 5.0.0


[https://github.com/apache/arrow/pull/9656/checks?check_run_id=2518312525]
{noformat}
Start test: dim() correctly determine numbers of rows and columns on arrow_dplyr_query object

 *** caught segfault ***
address 0x7ff7cf2cf8f8, cause 'invalid permissions'

Traceback:
 1: dataset___Scanner__CountRows(self)
 2: scanner$CountRows()
 3: dim.arrow_dplyr_query(.)
 4: dim(.)
 5: ds %>% filter(chr == "a") %>% dim()
 6: eval_bare(expr, quo_get_env(quo))
 7: quasi_label(enquo(object), label, arg = "object")
 8: expect_identical(ds %>% filter(chr == "a") %>% dim(), c(2L, 7L))
 9: eval(code, test_env)
10: eval(code, test_env)
11: withCallingHandlers({    eval(code, test_env)    if (!handled && !is.null(test)) {        skip_empty()    }}, expectation = handle_expectation, skip = handle_skip, warning = handle_warning,     message = handle_message, error = handle_error)
12: doTryCatch(return(expr), name, parentenv, handler)
13: tryCatchOne(expr, names, parentenv, handlers[[1L]])
14: tryCatchList(expr, names[-nh], parentenv, handlers[-nh])
15: doTryCatch(return(expr), name, parentenv, handler)
16: tryCatchOne(tryCatchList(expr, names[-nh], parentenv, handlers[-nh]),     names[nh], parentenv, handlers[[nh]])
17: tryCatchList(expr, classes, parentenv, handlers)
18: tryCatch(withCallingHandlers({    eval(code, test_env)    if (!handled && !is.null(test)) {        skip_empty()    }}, expectation = handle_expectation, skip = handle_skip, warning = handle_warning,     message = handle_message, error = handle_error), error = handle_fatal,     skip = function(e) {    })
19: test_code(desc, code, env = parent.frame(), reporter = reporter)
20: testthat::test_that(what, {    skip_if(getOption("..skip.tests", TRUE), "arrow C++ library not available")    code})
21: test_that("dim() correctly determine numbers of rows and columns on arrow_dplyr_query object",     {        skip_if_not_available("parquet")        ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))        expect_identical(ds %>% filter(chr == "a") %>% dim(),             c(2L, 7L))        expect_equal(ds %>% select(chr, fct, int) %>% dim(),             c(20L, 3L))        expect_identical(ds %>% select(chr, fct, int) %>% filter(chr ==             "a") %>% dim(), c(2L, 3L))    })
22: eval(code, test_env)
23: eval(code, test_env)
24: withCallingHandlers({    eval(code, test_env)    if (!handled && !is.null(test)) {        skip_empty()    }}, expectation = handle_expectation, skip = handle_skip, warning = handle_warning,     message = handle_message, error = handle_error)
25: doTryCatch(return(expr), name, parentenv, handler)
26: tryCatchOne(expr, names, parentenv, handlers[[1L]])
27: tryCatchList(expr, names[-nh], parentenv, handlers[-nh])
28: doTryCatch(return(expr), name, parentenv, handler)
29: tryCatchOne(tryCatchList(expr, names[-nh], parentenv, handlers[-nh]),     names[nh], parentenv, handlers[[nh]])
30: tryCatchList(expr, classes, parentenv, handlers)
31: tryCatch(withCallingHandlers({    eval(code, test_env)    if (!handled && !is.null(test)) {        skip_empty()    }}, expectation = handle_expectation, skip = handle_skip, warning = handle_warning,     message = handle_message, error = handle_error), error = handle_fatal,     skip = function(e) {    })
32: test_code(NULL, exprs, env)
33: source_file(path, child_env(env), wrap = wrap)
34: FUN(X[[i]], ...)
35: lapply(test_paths, test_one_file, env = env, wrap = wrap)
36: force(code)
37: doWithOneRestart(return(expr), restart)
38: withOneRestart(expr, restarts[[1L]])
39: withRestarts(testthat_abort_reporter = function() NULL, force(code))
40: with_reporter(reporters$multi, lapply(test_paths, test_one_file,     env = env, wrap = wrap))
41: test_files(test_dir = test_dir, test_package = test_package,     test_paths = test_paths, load_helpers = load_helpers, reporter = reporter,     env = env, stop_on_failure = stop_on_failure, stop_on_warning = stop_on_warning,     wrap = wrap, load_package = load_package)
42: test_files(test_dir = path, test_paths = test_paths, test_package = package,     reporter = reporter, load_helpers = load_helpers, env = env,     stop_on_failure = stop_on_failure, stop_on_warning = stop_on_warning,     wrap = wrap, load_package = load_package, parallel = parallel)
43: test_dir("testthat", package = package, reporter = reporter,     ..., load_package = "installed")
44: test_check("arrow", reporter = arrow_reporter)
An irrecoverable exception occurred. R is aborting now ...{noformat}
The test also seems to give the wrong results sometimes ([https://github.com/apache/arrow/pull/9656/checks?check_run_id=2518312803])
{noformat}
== Failed tests ================================================================
-- Failure (test-dataset.R:148:3): dim() correctly determine numbers of rows and columns on arrow_dplyr_query object --
ds %>% filter(chr == "a") %>% dim() not identical to c(2L, 7L).
1/2 mismatches
[1] 1 - 2 == -1
 {noformat}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)