You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2023/11/22 17:25:26 UTC

(arrow) branch main updated: GH-38684: [Integration] Try to strengthen C Data Interface testing (#38846)

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 37267731a5 GH-38684: [Integration] Try to strengthen C Data Interface testing (#38846)
37267731a5 is described below

commit 37267731a5722455b7f905b0bcfa798c1d5d0769
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Wed Nov 22 18:25:17 2023 +0100

    GH-38684: [Integration] Try to strengthen C Data Interface testing (#38846)
    
    ### Rationale for this change
    
    C Data Interface integration tests sometimes crash with a complicated traceback involving Java, Go and C# signal handlers.
    
    ### What changes are included in this PR?
    
    * Update JDK version in integration build
    * Disable signal-based memory management in the JVM so as to improve cooperation with other runtimes
    * Set memory limits to the various managed runtimes (Java, .Net, Go)
    * Enable some checks in the Go runtime
    * Enable debug allocator in Arrow Java
    
    ### Are these changes tested?
    
    Yes. I am not able to reproduce any sporadic crash using these changes.
    
    ### Are there any user-facing changes?
    
    No.
    
    * Closes: #38684
    
    Authored-by: Antoine Pitrou <an...@python.org>
    Signed-off-by: Antoine Pitrou <an...@python.org>
---
 .github/workflows/java.yml                       |  1 -
 .github/workflows/java_jni.yml                   |  1 -
 ci/scripts/integration_arrow.sh                  |  5 +++++
 dev/archery/archery/integration/tester_cpp.py    |  1 +
 dev/archery/archery/integration/tester_csharp.py |  1 +
 dev/archery/archery/integration/tester_go.py     |  3 +++
 dev/archery/archery/integration/tester_java.py   | 13 ++++++++++---
 docker-compose.yml                               |  3 ++-
 8 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml
index 69adc184b7..a29a859f85 100644
--- a/.github/workflows/java.yml
+++ b/.github/workflows/java.yml
@@ -48,7 +48,6 @@ env:
   DOCKER_VOLUME_PREFIX: ".docker/"
 
 jobs:
-
   ubuntu:
     name: AMD64 Ubuntu 22.04 Java JDK ${{ matrix.jdk }} Maven ${{ matrix.maven }}
     runs-on: ubuntu-latest
diff --git a/.github/workflows/java_jni.yml b/.github/workflows/java_jni.yml
index 76b10b828e..455e874cd4 100644
--- a/.github/workflows/java_jni.yml
+++ b/.github/workflows/java_jni.yml
@@ -48,7 +48,6 @@ env:
   DOCKER_VOLUME_PREFIX: ".docker/"
 
 jobs:
-
   docker:
     name: AMD64 manylinux2014 Java JNI
     runs-on: ubuntu-latest
diff --git a/ci/scripts/integration_arrow.sh b/ci/scripts/integration_arrow.sh
index 6d1d4befa6..a5a012ad2c 100755
--- a/ci/scripts/integration_arrow.sh
+++ b/ci/scripts/integration_arrow.sh
@@ -43,6 +43,11 @@ fi
 # Get more detailed context on crashes
 export PYTHONFAULTHANDLER=1
 
+# Due to how Go reads environment variables, we have to set them from the calling
+# process, or they would get ignored.
+# (see https://forum.golangbridge.org/t/are-godebug-and-other-env-vars-ignored-when-loading-a-go-dll-from-foreign-code/33694)
+export GOMEMLIMIT=200MiB
+export GODEBUG=gctrace=1,clobberfree=1
 
 # Rust can be enabled by exporting ARCHERY_INTEGRATION_WITH_RUST=1
 time archery integration \
diff --git a/dev/archery/archery/integration/tester_cpp.py b/dev/archery/archery/integration/tester_cpp.py
index 02c110c0e2..2a47bc8308 100644
--- a/dev/archery/archery/integration/tester_cpp.py
+++ b/dev/archery/archery/integration/tester_cpp.py
@@ -167,6 +167,7 @@ _cpp_c_data_entrypoints = """
 
 @functools.lru_cache
 def _load_ffi(ffi, lib_path=_ARROW_DLL):
+    os.environ['ARROW_DEBUG_MEMORY_POOL'] = 'trap'
     ffi.cdef(_cpp_c_data_entrypoints)
     dll = ffi.dlopen(lib_path)
     dll.ArrowCpp_CDataIntegration_ExportSchemaFromJson
diff --git a/dev/archery/archery/integration/tester_csharp.py b/dev/archery/archery/integration/tester_csharp.py
index 7dca525673..4f77656411 100644
--- a/dev/archery/archery/integration/tester_csharp.py
+++ b/dev/archery/archery/integration/tester_csharp.py
@@ -38,6 +38,7 @@ def _load_clr():
     global _clr_loaded
     if not _clr_loaded:
         _clr_loaded = True
+        os.environ['DOTNET_GCHeapHardLimit'] = '0xC800000'  # 200 MiB
         import pythonnet
         pythonnet.load("coreclr")
         import clr
diff --git a/dev/archery/archery/integration/tester_go.py b/dev/archery/archery/integration/tester_go.py
index 5368f06a31..b59cd9d113 100644
--- a/dev/archery/archery/integration/tester_go.py
+++ b/dev/archery/archery/integration/tester_go.py
@@ -159,6 +159,9 @@ _go_c_data_entrypoints = """
 
 @functools.lru_cache
 def _load_ffi(ffi, lib_path=_INTEGRATION_DLL):
+    # NOTE that setting Go environment variables here (such as GODEBUG)
+    # would be ignored by the Go runtime. The environment variables need
+    # to be set from the process calling Archery.
     ffi.cdef(_go_c_data_entrypoints)
     dll = ffi.dlopen(lib_path)
     return dll
diff --git a/dev/archery/archery/integration/tester_java.py b/dev/archery/archery/integration/tester_java.py
index 5684798d79..d71479986c 100644
--- a/dev/archery/archery/integration/tester_java.py
+++ b/dev/archery/archery/integration/tester_java.py
@@ -34,11 +34,13 @@ def load_version_from_pom():
     return version_tag.text
 
 
-# XXX Should we add "-Darrow.memory.debug.allocator=true"? It adds a couple
-# minutes to total CPU usage of the integration test suite.
+# NOTE: we don't add "-Darrow.memory.debug.allocator=true" here as it adds a
+# couple minutes to total CPU usage of the integration test suite
+# (see setup_jpype() below).
 _JAVA_OPTS = [
     "-Dio.netty.tryReflectionSetAccessible=true",
     "-Darrow.struct.conflict.policy=CONFLICT_APPEND",
+    "--add-opens=java.base/java.nio=ALL-UNNAMED",
 ]
 
 _arrow_version = load_version_from_pom()
@@ -80,7 +82,12 @@ def setup_jpype():
     jar_path = f"{_ARROW_TOOLS_JAR}:{_ARROW_C_DATA_JAR}"
     # XXX Didn't manage to tone down the logging level here (DEBUG -> INFO)
     jpype.startJVM(jpype.getDefaultJVMPath(),
-                   "-Djava.class.path=" + jar_path, *_JAVA_OPTS)
+                   "-Djava.class.path=" + jar_path,
+                   # This flag is too heavy for IPC and Flight tests
+                   "-Darrow.memory.debug.allocator=true",
+                   # Reduce internal use of signals by the JVM
+                   "-Xrs",
+                   *_JAVA_OPTS)
 
 
 class _CDataBase:
diff --git a/docker-compose.yml b/docker-compose.yml
index 8cc05903e9..74f2c262aa 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1706,7 +1706,8 @@ services:
       args:
         repo: ${REPO}
         arch: ${ARCH}
-        jdk: ${JDK}
+        # Use a newer JDK as it seems to improve stability
+        jdk: 17
         # conda-forge doesn't have 3.5.4 so pinning explicitly, but this should
         # be set to ${MAVEN}
         maven: 3.5