You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@arrow.apache.org by "paleolimbot (via GitHub)" <gi...@apache.org> on 2024/02/23 20:33:19 UTC

[I] [R] Caught segfault on benchmark run for TPCH query 21 (scale factor 10) [arrow]

paleolimbot opened a new issue, #40217:
URL: https://github.com/apache/arrow/issues/40217

   ### Describe the bug, including details regarding any error messages, version, and platform.
   
   From the performance report for #40197, apparently we get:
   
   ```
    ' *** caught segfault ***', "address 0x3d, cause 'memory not mapped'", '', 'Traceback:', ' 1: RecordBatchReader__UnsafeDelete(self)', ' 2: reader$.unsafe_delete()', ' 3: as_arrow_table.arrow_dplyr_query(x)',
   ```
   
   for the job:
   
   ```
   engine=arrow, format=parquet, language=R, memory_map=False, query_id=TPCH-21, scale_factor=10
   ```
   
   `RecordBatchReader__UnsafeDelete()` is something I added but I would have to re-look into its use to ensure it is not getting called twice...I believe it was introduced to ensure that open files were closed promptly since this caused problems on Windows.
   
   Also, huge regression last July:
   
   <img width="443" alt="Screenshot 2024-02-23 at 4 32 28 PM" src="https://github.com/apache/arrow/assets/10995762/b6170627-1c00-48ad-bb1c-38a5a37914ca">
   
   
   https://conbench.ursa.dev/benchmark-results/065d8d9e6ab17d1e8000cb6422edfa64/
   
   Full dump:
   
   <details>
   
   ```
                   ['', ' *** caught segfault ***', "address 0x3d, cause 'memory not mapped'", '', 'Traceback:', ' 1: RecordBatchReader__UnsafeDelete(self)', ' 2: reader$.unsafe_delete()', ' 3: as_arrow_table.arrow_dplyr_query(x)', ' 4: as_arrow_table(x)', ' 5: doTryCatch(return(expr), name, parentenv, handler)', ' 6: tryCatchOne(expr, names, parentenv, handlers[[1L]])', ' 7: tryCatchList(expr, classes, parentenv, handlers)', ' 8: tryCatch(as_arrow_table(x), error = function(e, call = caller_env(n = 4)) {    augment_io_error_msg(e, call, schema = schema())})', ' 9: compute.arrow_dplyr_query(x)', '10: collect.arrow_dplyr_query(.)', '11: collect_func(.)', '12: input_func("supplier") %>% inner_join(line_items, by = c(s_suppkey = "l_suppkey")) %>%     filter(l_receiptdate > l_commitdate) %>% inner_join(input_func("nation"),     by = c(s_nationkey = "n_nationkey")) %>% filter(n_name ==     "SAUDI ARABIA") %>% group_by(s_name) %>% summarise(numwait = n()) %>%     ungroup() %>% arrange(desc
 (numwait), s_name) %>% head(100) %>%     collect_func()', '13: query(input_func, collect_func, con)', '14: eval(bm$run, envir = ctx)', '15: eval(bm$run, envir = ctx)', '16: eval(expr, p)', '17: eval.parent(...)', '18: as_bench_time(.Call(system_time_, substitute(expr), parent.frame()))', '19: stats::setNames(as_bench_time(.Call(system_time_, substitute(expr),     parent.frame())), c("process", "real"))', '20: bench::bench_time(eval.parent(...))', '21: eval(expr, p)', '22: eval.parent(expr)', '23: with_profiling(profiling, {    timings <- bench::bench_time(eval.parent(...))})', '24: force(expr)', '25: with_gc_info({    prof_file <- with_profiling(profiling, {        timings <- bench::bench_time(eval.parent(...))    })})', '26: measure(eval(bm$run, envir = ctx), profiling = profiling, drop_caches = drop_caches)', '27: run_iteration(bm = bm, ctx = ctx, profiling = profiling, drop_caches = global_params[["drop_caches"]])', '28: withCallingHandlers({    results[[i]] <- run_iteration(bm =
  bm, ctx = ctx, profiling = profiling,         drop_caches = global_params[["drop_caches"]])}, warning = function(w) {    warnings <<- c(warnings, list(list(warning = as.character(w),         stack_trace = vapply(traceback(3), function(x) paste(x,             collapse = "\\n"), character(1)))))})', '29: doTryCatch(return(expr), name, parentenv, handler)', '30: tryCatchOne(expr, names, parentenv, handlers[[1L]])', '31: tryCatchList(expr, classes, parentenv, handlers)', '32: tryCatch(withCallingHandlers({    results[[i]] <- run_iteration(bm = bm, ctx = ctx, profiling = profiling,         drop_caches = global_params[["drop_caches"]])}, warning = function(w) {    warnings <<- c(warnings, list(list(warning = as.character(w),         stack_trace = vapply(traceback(3), function(x) paste(x,             collapse = "\\n"), character(1)))))}), error = function(e) {    error <<- list(error = as.character(e), stack_trace = vapply(traceback(3),         function(x) paste(x, collapse = "\\n"), char
 acter(1)))})', '33: run_bm(format = "parquet", scale_factor = 10, engine = "arrow",     memory_map = FALSE, query_id = 21, bm = structure(list(name = "tpch",         setup = function(engine = "arrow", query_id = 1:22, format = c("native",             "parquet"), scale_factor = c(1, 10), memory_map = FALSE,             output = "data_frame", chunk_size = NULL) {            engine <- match.arg(engine, c("arrow", "duckdb",                 "duckdb_sql", "dplyr"))            format <- match.arg(format, c("parquet", "feather",                 "native"))            stopifnot(`query_id must be an int` = query_id%%1 ==                 0, `query_id must 1-22` = query_id >= 1 & query_id <=                 22)            output <- match.arg(output, c("arrow_table", "data_frame"))            library("dplyr", warn.conflicts = FALSE)            collect_func <- collect            if (output == "data_frame") {                collect_func <- collect            } else if (output == "arrow_table") {   
              collect_func <- compute            }            con <- NULL            if (engine %in% c("duckdb", "duckdb_sql")) {                con <- DBI::dbConnect(duckdb::duckdb())                DBI::dbExecute(con, paste0("PRAGMA threads=",                   getOption("Ncpus")))            }            BenchEnvironment(input_func = get_input_func(engine = engine,                 scale_factor = scale_factor, query_id = query_id,                 format = format, con = con, memory_map = memory_map,                 chunk_size = chunk_size), query = get_query_func(query_id,                 engine), engine = engine, con = con, scale_factor = scale_factor,                 query_id = query_id, collect_func = collect_func)        }, before_each = quote({            result <- NULL        }), run = quote({            result <- query(input_func, collect_func, con)        }), after_each = quote({            if (scale_factor %in% c(0.01, 0.1, 1, 10)) {                answer <- tpch_answer(sca
 le_factor, query_id)                result <- dplyr::as_tibble(result)                all_equal_out <- waldo::compare(result, answer,                   tolerance = 0.01)                if (length(all_equal_out) != 0) {                  warning(paste0("\\n", all_equal_out, "\\n"))                  stop("The answer does not match")                }            } else {                warning("There is no validation for scale_factors other than 0.01, 0.1, 1, and 10. Be careful with these results!")            }            result <- NULL        }), teardown = quote({            if (!is.null(con)) {                DBI::dbDisconnect(con, shutdown = TRUE)            }        }), valid_params = function(params) {            drop <- (params$engine != "arrow" & params$format ==                 "feather") | (params$engine != "arrow" & params$output ==                 "arrow_table") | (params$engine != "arrow" &                 params$memory_map == TRUE) | (params$engine ==                 "dply
 r" & params$format == "native")            params[!drop, ]        }, case_version = function(params) NULL, batch_id_fun = function(params) {            batch_id <- uuid()            paste0(batch_id, "-", params$scale_factor, substr(params$format,                 1, 1))        }, tags_fun = function(params) {            params$query_id <- sprintf("TPCH-%02d", params$query_id)            if (!is.null(params$output) && params$output == "data_frame") {                params$output <- NULL            }            params        }, packages_used = function(params) {            c(params$engine, "dplyr", "lubridate")        }), class = "Benchmark"), n_iter = 1, batch_id = NULL,     profiling = FALSE, global_params = list(cpu_count = NULL,         lib_path = "latest"), run_id = NULL, run_name = NULL,     run_reason = NULL)', 'An irrecoverable exception occurred. R is aborting now ...', 'Segmentation fault (core dumped)']
   ```
   
   </details>
   
   ### Component(s)
   
   R


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@arrow.apache.org.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org