You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2023/06/29 03:53:15 UTC

[arrow] branch main updated: GH-36200: [CI][Docs] Avoid "No space left on device" (#36230)

This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 63b8091d45 GH-36200: [CI][Docs] Avoid "No space left on device" (#36230)
63b8091d45 is described below

commit 63b8091d452ac35ede94eaaf7101ee0de505273f
Author: Sutou Kouhei <ko...@clear-code.com>
AuthorDate: Thu Jun 29 12:53:07 2023 +0900

    GH-36200: [CI][Docs] Avoid "No space left on device" (#36230)
    
    ### Rationale for this change
    
    Our build requires many disk space.
    
    ### What changes are included in this PR?
    
    Remove unused files.
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    No.
    * Closes: #36200
    
    Authored-by: Sutou Kouhei <ko...@clear-code.com>
    Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
 .github/workflows/docs.yml                         |   2 +-
 ci/docker/conda-python.dockerfile                  |   1 +
 ci/docker/ubuntu-20.04-cpp.dockerfile              |   1 +
 ci/docker/ubuntu-22.04-cpp.dockerfile              |   1 +
 ci/scripts/cpp_build.sh                            |  13 +
 cpp/cmake_modules/AWSSDKVariables.cmake            | 388 +++++++++++++++++++++
 cpp/cmake_modules/ThirdpartyToolchain.cmake        |  55 ++-
 .../aws_sdk_cpp_generate_variables.sh              |  72 ++++
 dev/tasks/docs/github.linux.yml                    |   4 +-
 dev/tasks/linux-packages/github.linux.yml          |  52 +--
 docker-compose.yml                                 | 107 +++---
 11 files changed, 608 insertions(+), 88 deletions(-)

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 9193af1d92..8bef50335a 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -22,7 +22,7 @@ on:
 
 permissions:
   contents: read
-  
+
 env:
   ARROW_ENABLE_TIMING_TESTS: OFF
   DOCKER_VOLUME_PREFIX: ".docker/"
diff --git a/ci/docker/conda-python.dockerfile b/ci/docker/conda-python.dockerfile
index 917b60dab9..ba50d26730 100644
--- a/ci/docker/conda-python.dockerfile
+++ b/ci/docker/conda-python.dockerfile
@@ -45,6 +45,7 @@ ENV ARROW_ACERO=ON \
     ARROW_CSV=ON \
     ARROW_DATASET=ON \
     ARROW_FILESYSTEM=ON \
+    ARROW_GDB=ON \
     ARROW_HDFS=ON \
     ARROW_JSON=ON \
     ARROW_TENSORFLOW=ON \
diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile
index 12dfe2d67a..a5c1f0cdc1 100644
--- a/ci/docker/ubuntu-20.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-20.04-cpp.dockerfile
@@ -108,6 +108,7 @@ RUN apt-get update -y -q && \
         python3-dev \
         python3-pip \
         python3-rados \
+        python3-venv \
         rados-objclass-dev \
         rapidjson-dev \
         rsync \
diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile b/ci/docker/ubuntu-22.04-cpp.dockerfile
index e6fd44ff2d..61c33f8feb 100644
--- a/ci/docker/ubuntu-22.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-22.04-cpp.dockerfile
@@ -107,6 +107,7 @@ RUN apt-get update -y -q && \
         protobuf-compiler-grpc \
         python3-dev \
         python3-pip \
+        python3-venv \
         rapidjson-dev \
         rsync \
         tzdata \
diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh
index 2ef9c0ee22..91a570be97 100755
--- a/ci/scripts/cpp_build.sh
+++ b/ci/scripts/cpp_build.sh
@@ -40,6 +40,19 @@ elif [ -x "$(command -v xcrun)" ]; then
   export ARROW_GANDIVA_PC_CXX_FLAGS="-isysroot;$(xcrun --show-sdk-path)"
 fi
 
+if [ "${GITHUB_ACTIONS:-false}" = "true" ]; then
+  case "$(uname)" in
+    Linux|Darwin|MINGW*)
+      if [ "${ARROW_GDB:-OFF}" != "ON" ]; then
+        : ${ARROW_C_FLAGS_DEBUG:=-g1}
+        : ${ARROW_CXX_FLAGS_DEBUG:=-g1}
+      fi
+      ;;
+    *)
+      ;;
+  esac
+fi
+
 if [ "${ARROW_USE_CCACHE}" == "ON" ]; then
     echo -e "===\n=== ccache statistics before build\n==="
     ccache -sv 2>/dev/null || ccache -s
diff --git a/cpp/cmake_modules/AWSSDKVariables.cmake b/cpp/cmake_modules/AWSSDKVariables.cmake
new file mode 100644
index 0000000000..729790dd0f
--- /dev/null
+++ b/cpp/cmake_modules/AWSSDKVariables.cmake
@@ -0,0 +1,388 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Generated by:
+#   $ cpp/cmake_modules/aws_sdk_cpp_generate_variables.sh 1.10.55
+
+set(AWSSDK_UNUSED_DIRECTORIES
+    .github
+    AndroidSDKTesting
+    CI
+    Docs
+    android-build
+    android-unified-tests
+    aws-cpp-sdk-AWSMigrationHub
+    aws-cpp-sdk-access-management
+    aws-cpp-sdk-accessanalyzer
+    aws-cpp-sdk-account
+    aws-cpp-sdk-acm
+    aws-cpp-sdk-acm-pca
+    aws-cpp-sdk-alexaforbusiness
+    aws-cpp-sdk-amp
+    aws-cpp-sdk-amplify
+    aws-cpp-sdk-amplifybackend
+    aws-cpp-sdk-amplifyuibuilder
+    aws-cpp-sdk-apigateway
+    aws-cpp-sdk-apigatewaymanagementapi
+    aws-cpp-sdk-apigatewayv2
+    aws-cpp-sdk-appconfig
+    aws-cpp-sdk-appconfigdata
+    aws-cpp-sdk-appflow
+    aws-cpp-sdk-appintegrations
+    aws-cpp-sdk-application-autoscaling
+    aws-cpp-sdk-application-insights
+    aws-cpp-sdk-applicationcostprofiler
+    aws-cpp-sdk-appmesh
+    aws-cpp-sdk-apprunner
+    aws-cpp-sdk-appstream
+    aws-cpp-sdk-appsync
+    aws-cpp-sdk-arc-zonal-shift
+    aws-cpp-sdk-athena
+    aws-cpp-sdk-auditmanager
+    aws-cpp-sdk-autoscaling
+    aws-cpp-sdk-autoscaling-plans
+    aws-cpp-sdk-awstransfer
+    aws-cpp-sdk-backup
+    aws-cpp-sdk-backup-gateway
+    aws-cpp-sdk-backupstorage
+    aws-cpp-sdk-batch
+    aws-cpp-sdk-billingconductor
+    aws-cpp-sdk-braket
+    aws-cpp-sdk-budgets
+    aws-cpp-sdk-ce
+    aws-cpp-sdk-chime
+    aws-cpp-sdk-chime-sdk-identity
+    aws-cpp-sdk-chime-sdk-media-pipelines
+    aws-cpp-sdk-chime-sdk-meetings
+    aws-cpp-sdk-chime-sdk-messaging
+    aws-cpp-sdk-chime-sdk-voice
+    aws-cpp-sdk-cleanrooms
+    aws-cpp-sdk-cloud9
+    aws-cpp-sdk-cloudcontrol
+    aws-cpp-sdk-clouddirectory
+    aws-cpp-sdk-cloudformation
+    aws-cpp-sdk-cloudfront
+    aws-cpp-sdk-cloudfront-integration-tests
+    aws-cpp-sdk-cloudhsm
+    aws-cpp-sdk-cloudhsmv2
+    aws-cpp-sdk-cloudsearch
+    aws-cpp-sdk-cloudsearchdomain
+    aws-cpp-sdk-cloudtrail
+    aws-cpp-sdk-codeartifact
+    aws-cpp-sdk-codebuild
+    aws-cpp-sdk-codecatalyst
+    aws-cpp-sdk-codecommit
+    aws-cpp-sdk-codedeploy
+    aws-cpp-sdk-codeguru-reviewer
+    aws-cpp-sdk-codeguruprofiler
+    aws-cpp-sdk-codepipeline
+    aws-cpp-sdk-codestar
+    aws-cpp-sdk-codestar-connections
+    aws-cpp-sdk-codestar-notifications
+    aws-cpp-sdk-cognito-idp
+    aws-cpp-sdk-cognito-sync
+    aws-cpp-sdk-cognitoidentity-integration-tests
+    aws-cpp-sdk-comprehend
+    aws-cpp-sdk-comprehendmedical
+    aws-cpp-sdk-compute-optimizer
+    aws-cpp-sdk-connect
+    aws-cpp-sdk-connect-contact-lens
+    aws-cpp-sdk-connectcampaigns
+    aws-cpp-sdk-connectcases
+    aws-cpp-sdk-connectparticipant
+    aws-cpp-sdk-controltower
+    aws-cpp-sdk-cur
+    aws-cpp-sdk-custom-service-integration-tests
+    aws-cpp-sdk-customer-profiles
+    aws-cpp-sdk-databrew
+    aws-cpp-sdk-dataexchange
+    aws-cpp-sdk-datapipeline
+    aws-cpp-sdk-datasync
+    aws-cpp-sdk-dax
+    aws-cpp-sdk-detective
+    aws-cpp-sdk-devicefarm
+    aws-cpp-sdk-devops-guru
+    aws-cpp-sdk-directconnect
+    aws-cpp-sdk-discovery
+    aws-cpp-sdk-dlm
+    aws-cpp-sdk-dms
+    aws-cpp-sdk-docdb
+    aws-cpp-sdk-docdb-elastic
+    aws-cpp-sdk-drs
+    aws-cpp-sdk-ds
+    aws-cpp-sdk-dynamodb
+    aws-cpp-sdk-dynamodb-integration-tests
+    aws-cpp-sdk-dynamodbstreams
+    aws-cpp-sdk-ebs
+    aws-cpp-sdk-ec2
+    aws-cpp-sdk-ec2-instance-connect
+    aws-cpp-sdk-ec2-integration-tests
+    aws-cpp-sdk-ecr
+    aws-cpp-sdk-ecr-public
+    aws-cpp-sdk-ecs
+    aws-cpp-sdk-eks
+    aws-cpp-sdk-elastic-inference
+    aws-cpp-sdk-elasticache
+    aws-cpp-sdk-elasticbeanstalk
+    aws-cpp-sdk-elasticfilesystem
+    aws-cpp-sdk-elasticfilesystem-integration-tests
+    aws-cpp-sdk-elasticloadbalancing
+    aws-cpp-sdk-elasticloadbalancingv2
+    aws-cpp-sdk-elasticmapreduce
+    aws-cpp-sdk-elastictranscoder
+    aws-cpp-sdk-email
+    aws-cpp-sdk-emr-containers
+    aws-cpp-sdk-emr-serverless
+    aws-cpp-sdk-es
+    aws-cpp-sdk-eventbridge
+    aws-cpp-sdk-eventbridge-tests
+    aws-cpp-sdk-events
+    aws-cpp-sdk-evidently
+    aws-cpp-sdk-finspace
+    aws-cpp-sdk-finspace-data
+    aws-cpp-sdk-firehose
+    aws-cpp-sdk-fis
+    aws-cpp-sdk-fms
+    aws-cpp-sdk-forecast
+    aws-cpp-sdk-forecastquery
+    aws-cpp-sdk-frauddetector
+    aws-cpp-sdk-fsx
+    aws-cpp-sdk-gamelift
+    aws-cpp-sdk-gamesparks
+    aws-cpp-sdk-glacier
+    aws-cpp-sdk-globalaccelerator
+    aws-cpp-sdk-glue
+    aws-cpp-sdk-grafana
+    aws-cpp-sdk-greengrass
+    aws-cpp-sdk-greengrassv2
+    aws-cpp-sdk-groundstation
+    aws-cpp-sdk-guardduty
+    aws-cpp-sdk-health
+    aws-cpp-sdk-healthlake
+    aws-cpp-sdk-honeycode
+    aws-cpp-sdk-iam
+    aws-cpp-sdk-identitystore
+    aws-cpp-sdk-imagebuilder
+    aws-cpp-sdk-importexport
+    aws-cpp-sdk-inspector
+    aws-cpp-sdk-inspector2
+    aws-cpp-sdk-iot
+    aws-cpp-sdk-iot-data
+    aws-cpp-sdk-iot-jobs-data
+    aws-cpp-sdk-iot-roborunner
+    aws-cpp-sdk-iot1click-devices
+    aws-cpp-sdk-iot1click-projects
+    aws-cpp-sdk-iotanalytics
+    aws-cpp-sdk-iotdeviceadvisor
+    aws-cpp-sdk-iotevents
+    aws-cpp-sdk-iotevents-data
+    aws-cpp-sdk-iotfleethub
+    aws-cpp-sdk-iotfleetwise
+    aws-cpp-sdk-iotsecuretunneling
+    aws-cpp-sdk-iotsitewise
+    aws-cpp-sdk-iotthingsgraph
+    aws-cpp-sdk-iottwinmaker
+    aws-cpp-sdk-iotwireless
+    aws-cpp-sdk-ivs
+    aws-cpp-sdk-ivschat
+    aws-cpp-sdk-kafka
+    aws-cpp-sdk-kafkaconnect
+    aws-cpp-sdk-kendra
+    aws-cpp-sdk-kendra-ranking
+    aws-cpp-sdk-keyspaces
+    aws-cpp-sdk-kinesis
+    aws-cpp-sdk-kinesis-integration-tests
+    aws-cpp-sdk-kinesis-video-archived-media
+    aws-cpp-sdk-kinesis-video-media
+    aws-cpp-sdk-kinesis-video-signaling
+    aws-cpp-sdk-kinesis-video-webrtc-storage
+    aws-cpp-sdk-kinesisanalytics
+    aws-cpp-sdk-kinesisanalyticsv2
+    aws-cpp-sdk-kinesisvideo
+    aws-cpp-sdk-kms
+    aws-cpp-sdk-lakeformation
+    aws-cpp-sdk-lambda
+    aws-cpp-sdk-lambda-integration-tests
+    aws-cpp-sdk-lex
+    aws-cpp-sdk-lex-models
+    aws-cpp-sdk-lexv2-models
+    aws-cpp-sdk-lexv2-runtime
+    aws-cpp-sdk-license-manager
+    aws-cpp-sdk-license-manager-linux-subscriptions
+    aws-cpp-sdk-license-manager-user-subscriptions
+    aws-cpp-sdk-lightsail
+    aws-cpp-sdk-location
+    aws-cpp-sdk-logs
+    aws-cpp-sdk-logs-integration-tests
+    aws-cpp-sdk-lookoutequipment
+    aws-cpp-sdk-lookoutmetrics
+    aws-cpp-sdk-lookoutvision
+    aws-cpp-sdk-m2
+    aws-cpp-sdk-machinelearning
+    aws-cpp-sdk-macie
+    aws-cpp-sdk-macie2
+    aws-cpp-sdk-managedblockchain
+    aws-cpp-sdk-marketplace-catalog
+    aws-cpp-sdk-marketplace-entitlement
+    aws-cpp-sdk-marketplacecommerceanalytics
+    aws-cpp-sdk-mediaconnect
+    aws-cpp-sdk-mediaconvert
+    aws-cpp-sdk-medialive
+    aws-cpp-sdk-mediapackage
+    aws-cpp-sdk-mediapackage-vod
+    aws-cpp-sdk-mediastore
+    aws-cpp-sdk-mediastore-data
+    aws-cpp-sdk-mediastore-data-integration-tests
+    aws-cpp-sdk-mediatailor
+    aws-cpp-sdk-memorydb
+    aws-cpp-sdk-meteringmarketplace
+    aws-cpp-sdk-mgn
+    aws-cpp-sdk-migration-hub-refactor-spaces
+    aws-cpp-sdk-migrationhub-config
+    aws-cpp-sdk-migrationhuborchestrator
+    aws-cpp-sdk-migrationhubstrategy
+    aws-cpp-sdk-mobile
+    aws-cpp-sdk-monitoring
+    aws-cpp-sdk-mq
+    aws-cpp-sdk-mturk-requester
+    aws-cpp-sdk-mwaa
+    aws-cpp-sdk-neptune
+    aws-cpp-sdk-network-firewall
+    aws-cpp-sdk-networkmanager
+    aws-cpp-sdk-nimble
+    aws-cpp-sdk-oam
+    aws-cpp-sdk-omics
+    aws-cpp-sdk-opensearch
+    aws-cpp-sdk-opensearchserverless
+    aws-cpp-sdk-opsworks
+    aws-cpp-sdk-opsworkscm
+    aws-cpp-sdk-organizations
+    aws-cpp-sdk-outposts
+    aws-cpp-sdk-panorama
+    aws-cpp-sdk-personalize
+    aws-cpp-sdk-personalize-events
+    aws-cpp-sdk-personalize-runtime
+    aws-cpp-sdk-pi
+    aws-cpp-sdk-pinpoint
+    aws-cpp-sdk-pinpoint-email
+    aws-cpp-sdk-pinpoint-sms-voice-v2
+    aws-cpp-sdk-pipes
+    aws-cpp-sdk-polly
+    aws-cpp-sdk-polly-sample
+    aws-cpp-sdk-pricing
+    aws-cpp-sdk-privatenetworks
+    aws-cpp-sdk-proton
+    aws-cpp-sdk-qldb
+    aws-cpp-sdk-qldb-session
+    aws-cpp-sdk-queues
+    aws-cpp-sdk-quicksight
+    aws-cpp-sdk-ram
+    aws-cpp-sdk-rbin
+    aws-cpp-sdk-rds
+    aws-cpp-sdk-rds-data
+    aws-cpp-sdk-rds-integration-tests
+    aws-cpp-sdk-redshift
+    aws-cpp-sdk-redshift-data
+    aws-cpp-sdk-redshift-integration-tests
+    aws-cpp-sdk-redshift-serverless
+    aws-cpp-sdk-rekognition
+    aws-cpp-sdk-resiliencehub
+    aws-cpp-sdk-resource-explorer-2
+    aws-cpp-sdk-resource-groups
+    aws-cpp-sdk-resourcegroupstaggingapi
+    aws-cpp-sdk-robomaker
+    aws-cpp-sdk-rolesanywhere
+    aws-cpp-sdk-route53
+    aws-cpp-sdk-route53-recovery-cluster
+    aws-cpp-sdk-route53-recovery-control-config
+    aws-cpp-sdk-route53-recovery-readiness
+    aws-cpp-sdk-route53domains
+    aws-cpp-sdk-route53resolver
+    aws-cpp-sdk-rum
+    aws-cpp-sdk-sagemaker
+    aws-cpp-sdk-sagemaker-a2i-runtime
+    aws-cpp-sdk-sagemaker-edge
+    aws-cpp-sdk-sagemaker-featurestore-runtime
+    aws-cpp-sdk-sagemaker-geospatial
+    aws-cpp-sdk-sagemaker-metrics
+    aws-cpp-sdk-sagemaker-runtime
+    aws-cpp-sdk-savingsplans
+    aws-cpp-sdk-scheduler
+    aws-cpp-sdk-schemas
+    aws-cpp-sdk-sdb
+    aws-cpp-sdk-secretsmanager
+    aws-cpp-sdk-securityhub
+    aws-cpp-sdk-securitylake
+    aws-cpp-sdk-serverlessrepo
+    aws-cpp-sdk-service-quotas
+    aws-cpp-sdk-servicecatalog
+    aws-cpp-sdk-servicecatalog-appregistry
+    aws-cpp-sdk-servicediscovery
+    aws-cpp-sdk-sesv2
+    aws-cpp-sdk-shield
+    aws-cpp-sdk-signer
+    aws-cpp-sdk-simspaceweaver
+    aws-cpp-sdk-sms
+    aws-cpp-sdk-sms-voice
+    aws-cpp-sdk-snow-device-management
+    aws-cpp-sdk-snowball
+    aws-cpp-sdk-sns
+    aws-cpp-sdk-sqs
+    aws-cpp-sdk-sqs-integration-tests
+    aws-cpp-sdk-ssm
+    aws-cpp-sdk-ssm-contacts
+    aws-cpp-sdk-ssm-incidents
+    aws-cpp-sdk-ssm-sap
+    aws-cpp-sdk-sso
+    aws-cpp-sdk-sso-admin
+    aws-cpp-sdk-sso-oidc
+    aws-cpp-sdk-states
+    aws-cpp-sdk-storagegateway
+    aws-cpp-sdk-support
+    aws-cpp-sdk-support-app
+    aws-cpp-sdk-swf
+    aws-cpp-sdk-synthetics
+    aws-cpp-sdk-text-to-speech
+    aws-cpp-sdk-text-to-speech-tests
+    aws-cpp-sdk-textract
+    aws-cpp-sdk-timestream-query
+    aws-cpp-sdk-timestream-write
+    aws-cpp-sdk-transcribe
+    aws-cpp-sdk-transcribestreaming
+    aws-cpp-sdk-transcribestreaming-integration-tests
+    aws-cpp-sdk-translate
+    aws-cpp-sdk-voice-id
+    aws-cpp-sdk-waf
+    aws-cpp-sdk-waf-regional
+    aws-cpp-sdk-wafv2
+    aws-cpp-sdk-wellarchitected
+    aws-cpp-sdk-wisdom
+    aws-cpp-sdk-workdocs
+    aws-cpp-sdk-worklink
+    aws-cpp-sdk-workmail
+    aws-cpp-sdk-workmailmessageflow
+    aws-cpp-sdk-workspaces
+    aws-cpp-sdk-workspaces-web
+    aws-cpp-sdk-xray
+    code-generation
+    crt
+    doc_crosslinks
+    doc_crosslinks_new
+    doxygen
+    generated
+    scripts
+    testing-resources)
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index f7233846c1..4a19e226f7 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -4160,11 +4160,22 @@ macro(build_google_cloud_cpp_storage)
       "${GOOGLE_CLOUD_CPP_INSTALL_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}google_cloud_cpp_common${CMAKE_STATIC_LIBRARY_SUFFIX}"
   )
 
+  # Remove unused directories to save build directory storage.
+  # 141MB -> 79MB
+  set(GOOGLE_CLOUD_CPP_PATCH_COMMAND ${CMAKE_COMMAND} -E)
+  if(CMAKE_VERSION VERSION_LESS 3.17)
+    list(APPEND GOOGLE_CLOUD_CPP_PATCH_COMMAND remove_directory)
+  else()
+    list(APPEND GOOGLE_CLOUD_CPP_PATCH_COMMAND rm -rf)
+  endif()
+  list(APPEND GOOGLE_CLOUD_CPP_PATCH_COMMAND ci)
+
   externalproject_add(google_cloud_cpp_ep
                       ${EP_COMMON_OPTIONS}
                       INSTALL_DIR ${GOOGLE_CLOUD_CPP_INSTALL_PREFIX}
                       URL ${google_cloud_cpp_storage_SOURCE_URL}
                       URL_HASH "SHA256=${ARROW_GOOGLE_CLOUD_CPP_BUILD_SHA256_CHECKSUM}"
+                      PATCH_COMMAND ${GOOGLE_CLOUD_CPP_PATCH_COMMAND}
                       CMAKE_ARGS ${GOOGLE_CLOUD_CPP_CMAKE_ARGS}
                       BUILD_BYPRODUCTS ${GOOGLE_CLOUD_CPP_STATIC_LIBRARY_STORAGE}
                                        ${GOOGLE_CLOUD_CPP_STATIC_LIBRARY_REST_INTERNAL}
@@ -4616,24 +4627,16 @@ endif()
 # ----------------------------------------------------------------------
 # AWS SDK for C++
 
+include(AWSSDKVariables)
+
 macro(build_awssdk)
   message(STATUS "Building AWS C++ SDK from source")
   set(AWSSDK_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/awssdk_ep-install")
   set(AWSSDK_INCLUDE_DIR "${AWSSDK_PREFIX}/include")
 
-  if(WIN32)
-    # On Windows, need to match build types
-    set(AWSSDK_BUILD_TYPE ${CMAKE_BUILD_TYPE})
-  else()
-    # Otherwise, always build in release mode.
-    # Especially with gcc, debug builds can fail with "asm constraint" errors:
-    # https://github.com/TileDB-Inc/TileDB/issues/1351
-    set(AWSSDK_BUILD_TYPE release)
-  endif()
-
   set(AWSSDK_COMMON_CMAKE_ARGS
       ${EP_COMMON_CMAKE_ARGS}
-      -DCMAKE_BUILD_TYPE=${AWSSDK_BUILD_TYPE}
+      -DCPP_STANDARD=${CMAKE_CXX_STANDARD}
       -DCMAKE_INSTALL_PREFIX=${AWSSDK_PREFIX}
       -DCMAKE_PREFIX_PATH=${AWSSDK_PREFIX}
       -DENABLE_TESTING=OFF
@@ -4655,6 +4658,15 @@ macro(build_awssdk)
       -DBUILD_DEPS=OFF
       -DBUILD_ONLY=config\\$<SEMICOLON>s3\\$<SEMICOLON>transfer\\$<SEMICOLON>identity-management\\$<SEMICOLON>sts
       -DMINIMIZE_SIZE=ON)
+  # Remove unused directories to save build directory storage.
+  # 807MB -> 31MB
+  set(AWSSDK_PATCH_COMMAND ${CMAKE_COMMAND} -E)
+  if(CMAKE_VERSION VERSION_LESS 3.17)
+    list(APPEND AWSSDK_PATCH_COMMAND remove_directory)
+  else()
+    list(APPEND AWSSDK_PATCH_COMMAND rm -rf)
+  endif()
+  list(APPEND AWSSDK_PATCH_COMMAND ${AWSSDK_UNUSED_DIRECTORIES})
 
   if(UNIX)
     # on Linux and macOS curl seems to be required
@@ -4754,16 +4766,33 @@ macro(build_awssdk)
                       BUILD_BYPRODUCTS ${AWS_C_COMMON_STATIC_LIBRARY})
   add_dependencies(AWS::aws-c-common aws_c_common_ep)
 
+  set(AWS_CHECKSUMS_CMAKE_ARGS ${AWSSDK_COMMON_CMAKE_ARGS})
+  if(NOT WIN32)
+    # On non-Windows, always build in release mode.
+    # Especially with gcc, debug builds can fail with "asm constraint" errors:
+    # https://github.com/TileDB-Inc/TileDB/issues/1351
+    list(APPEND AWS_CHECKSUMS_CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release)
+  endif()
   externalproject_add(aws_checksums_ep
                       ${EP_COMMON_OPTIONS}
                       URL ${AWS_CHECKSUMS_SOURCE_URL}
                       URL_HASH "SHA256=${ARROW_AWS_CHECKSUMS_BUILD_SHA256_CHECKSUM}"
-                      CMAKE_ARGS ${AWSSDK_COMMON_CMAKE_ARGS}
+                      CMAKE_ARGS ${AWS_CHECKSUMS_CMAKE_ARGS}
                       BUILD_BYPRODUCTS ${AWS_CHECKSUMS_STATIC_LIBRARY}
                       DEPENDS aws_c_common_ep)
   add_dependencies(AWS::aws-checksums aws_checksums_ep)
 
   if("s2n-tls" IN_LIST _AWSSDK_LIBS)
+    # Remove unused directories to save build directory storage.
+    # 169MB -> 105MB
+    set(AWS_LC_PATCH_COMMAND ${CMAKE_COMMAND} -E)
+    if(CMAKE_VERSION VERSION_LESS 3.17)
+      list(APPEND AWS_LC_PATCH_COMMAND remove_directory)
+    else()
+      list(APPEND AWS_LC_PATCH_COMMAND rm -rf)
+    endif()
+    list(APPEND AWS_LC_PATCH_COMMAND fuzz)
+
     set(AWS_LC_C_FLAGS ${EP_C_FLAGS})
     string(APPEND AWS_LC_C_FLAGS " -Wno-error=overlength-strings -Wno-error=pedantic")
     # Link time optimization is causing trouble like #34349
@@ -4778,6 +4807,7 @@ macro(build_awssdk)
                         ${EP_COMMON_OPTIONS}
                         URL ${AWS_LC_SOURCE_URL}
                         URL_HASH "SHA256=${ARROW_AWS_LC_BUILD_SHA256_CHECKSUM}"
+                        PATCH_COMMAND ${AWS_LC_PATCH_COMMAND}
                         CMAKE_ARGS ${AWS_LC_CMAKE_ARGS}
                         BUILD_BYPRODUCTS ${AWS_LC_STATIC_LIBRARY})
     add_dependencies(AWS::crypto aws_lc_ep)
@@ -4916,6 +4946,7 @@ macro(build_awssdk)
                       ${EP_COMMON_OPTIONS}
                       URL ${AWSSDK_SOURCE_URL}
                       URL_HASH "SHA256=${ARROW_AWSSDK_BUILD_SHA256_CHECKSUM}"
+                      PATCH_COMMAND ${AWSSDK_PATCH_COMMAND}
                       CMAKE_ARGS ${AWSSDK_CMAKE_ARGS}
                       BUILD_BYPRODUCTS ${AWS_CPP_SDK_COGNITO_IDENTITY_STATIC_LIBRARY}
                                        ${AWS_CPP_SDK_CORE_STATIC_LIBRARY}
diff --git a/cpp/cmake_modules/aws_sdk_cpp_generate_variables.sh b/cpp/cmake_modules/aws_sdk_cpp_generate_variables.sh
new file mode 100755
index 0000000000..79b560a4a1
--- /dev/null
+++ b/cpp/cmake_modules/aws_sdk_cpp_generate_variables.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eu
+
+version=$1
+
+base_dir="$(dirname "$0")"
+output="${base_dir}/AWSSDKVariables.cmake"
+
+cat <<HEADER > ${output}
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Generated by:
+#   $ cpp/cmake_modules/aws_sdk_cpp_generate_variables.sh ${version}
+
+HEADER
+
+rm -f ${version}.tar.gz
+wget https://github.com/aws/aws-sdk-cpp/archive/${version}.tar.gz
+base_name=aws-sdk-cpp-${version}
+rm -rf ${base_name}
+tar xf ${version}.tar.gz
+
+echo "set(AWSSDK_UNUSED_DIRECTORIES" >> ${output}
+find ${base_name} -mindepth 1 -maxdepth 1 -type d | \
+  sort | \
+  grep -v cmake | \
+  grep -v toolchains | \
+  grep -v aws-cpp-sdk-cognito-identity | \
+  grep -v aws-cpp-sdk-core | \
+  grep -v aws-cpp-sdk-config | \
+  grep -v aws-cpp-sdk-s3 | \
+  grep -v aws-cpp-sdk-transfer | \
+  grep -v aws-cpp-sdk-identity-management | \
+  grep -v aws-cpp-sdk-sts | \
+  sed -E -e "s,^${base_name}/,    ,g" >> ${output}
+echo ")" >> ${output}
+
+rm -rf ${base_name}
+rm -f ${version}.tar.gz
diff --git a/dev/tasks/docs/github.linux.yml b/dev/tasks/docs/github.linux.yml
index e3a62f5883..f9b2e111e8 100644
--- a/dev/tasks/docs/github.linux.yml
+++ b/dev/tasks/docs/github.linux.yml
@@ -21,7 +21,7 @@
 
 jobs:
   test:
-    name: Docs Preview 
+    name: Docs Preview
     runs-on: ubuntu-latest
 {{ macros.github_set_env(env) }}
     steps:
@@ -55,7 +55,7 @@ jobs:
           {%- raw %}
           AWS_ACCESS_KEY_ID: ${{ secrets.CROSSBOW_DOCS_AWS_ACCESS_KEY_ID }}
           AWS_SECRET_ACCESS_KEY: ${{ secrets.CROSSBOW_DOCS_AWS_SECRET_ACCESS_KEY }}
-          AWS_DEFAULT_REGION: ${{ secrets.CROSSBOW_DOCS_S3_BUCKET_REGION }} 
+          AWS_DEFAULT_REGION: ${{ secrets.CROSSBOW_DOCS_S3_BUCKET_REGION }}
           BUCKET: ${{ secrets.CROSSBOW_DOCS_S3_BUCKET }}
           {% endraw %}
         run: |
diff --git a/dev/tasks/linux-packages/github.linux.yml b/dev/tasks/linux-packages/github.linux.yml
index 3a00849a4d..bf28cf10e9 100644
--- a/dev/tasks/linux-packages/github.linux.yml
+++ b/dev/tasks/linux-packages/github.linux.yml
@@ -38,29 +38,39 @@ jobs:
           env.ARCHITECTURE == 'amd64'
         run: |
           df -h
-          du -hsc /opt/* /usr/local/*
-          du -hsc /opt/hostedtoolcache/*
-          du -hs /usr/local/bin
+          echo "::group::/usr/local/*"
+          du -hsc /usr/local/*
+          echo "::endgroup::"
+          echo "::group::/usr/local/bin/*"
+          du -hsc /usr/local/bin/*
+          echo "::endgroup::"
           # ~1GB (From 1.2GB to 214MB)
-          sudo rm  -rf /usr/local/bin/aliyun \
-                /usr/local/bin/azcopy \
-                /usr/local/bin/bicep \
-                /usr/local/bin/cmake-gui \
-                /usr/local/bin/cpack \
-                /usr/local/bin/helm \
-                /usr/local/bin/hub \
-                /usr/local/bin/kubectl \
-                /usr/local/bin/minikube \
-                /usr/local/bin/node \
-                /usr/local/bin/packer \
-                /usr/local/bin/pulumi* \
-                /usr/local/bin/stack \
-                /usr/local/bin/terraform
-          du -hs /usr/local/bin
-          du -hs /usr/local/share
+          sudo rm -rf \
+            /usr/local/bin/aliyun \
+            /usr/local/bin/azcopy \
+            /usr/local/bin/bicep \
+            /usr/local/bin/cmake-gui \
+            /usr/local/bin/cpack \
+            /usr/local/bin/helm \
+            /usr/local/bin/hub \
+            /usr/local/bin/kubectl \
+            /usr/local/bin/minikube \
+            /usr/local/bin/node \
+            /usr/local/bin/packer \
+            /usr/local/bin/pulumi* \
+            /usr/local/bin/stack \
+            /usr/local/bin/terraform || :
+          echo "::group::/usr/local/share/*"
+          du -hsc /usr/local/share/*
+          echo "::endgroup::"
           # 1.3GB
-          sudo rm -rf /usr/local/share/powershell
-          du -hs /usr/local/share
+          sudo rm -rf /usr/local/share/powershell || :
+          echo "::group::/opt/*"
+          du -hsc /opt/*
+          echo "::endgroup::"
+          echo "::group::/opt/hostedtoolcache/*"
+          du -hsc /opt/hostedtoolcache/*
+          echo "::endgroup::"
           # 5.3GB
           sudo rm -rf /opt/hostedtoolcache/CodeQL || :
           # 1.4GB
diff --git a/docker-compose.yml b/docker-compose.yml
index ee24691116..285754c38d 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -55,6 +55,9 @@
 
 version: '3.5'
 
+x-common: &common
+  GITHUB_ACTIONS:
+
 x-ccache: &ccache
   CCACHE_COMPILERCHECK: content
   CCACHE_COMPRESS: 1
@@ -224,7 +227,7 @@ services:
     ulimits: &ulimits
       core: ${ULIMIT_CORE}
     environment:
-      <<: *ccache
+      <<: [*common, *ccache]
       ARROW_ENABLE_TIMING_TESTS:  # inherit
       ARROW_MIMALLOC: "ON"
     volumes: &alpine-linux-volumes
@@ -275,7 +278,7 @@ services:
     shm_size: *shm-size
     ulimits: *ulimits
     environment:
-      <<: [*ccache, *sccache]
+      <<: [*common, *ccache, *sccache]
       ARROW_BUILD_BENCHMARKS: "ON"
       ARROW_BUILD_EXAMPLES: "ON"
       ARROW_ENABLE_TIMING_TESTS:  # inherit
@@ -310,7 +313,7 @@ services:
         arch: ${ARCH}
     shm_size: *shm-size
     environment:
-      <<: [*ccache, *sccache]
+      <<: [*common, *ccache, *sccache]
       ARROW_CXXFLAGS: "-Og"  # Shrink test runtime by enabling minimal optimizations
       ARROW_ENABLE_TIMING_TESTS:  # inherit
       ARROW_FLIGHT: "OFF"
@@ -345,7 +348,7 @@ services:
     shm_size: *shm-size
     ulimits: *ulimits
     environment:
-      <<: [*ccache, *sccache]
+      <<: [*common, *ccache, *sccache]
       ARROW_ENABLE_TIMING_TESTS:  # inherit
       ARROW_MIMALLOC: "ON"
     volumes: &debian-volumes
@@ -386,7 +389,7 @@ services:
       - apparmor:unconfined
     ulimits: *ulimits
     environment:
-      <<: [*ccache, *sccache]
+      <<: [*common, *ccache, *sccache]
       ARROW_ENABLE_TIMING_TESTS:  # inherit
       ARROW_MIMALLOC: "ON"
     volumes: &ubuntu-volumes
@@ -422,7 +425,7 @@ services:
       - apparmor:unconfined
     ulimits: *ulimits
     environment:
-      <<: [*ccache, *sccache]
+      <<: [*common, *ccache, *sccache]
       ARROW_HOME: /arrow
       ARROW_DEPENDENCY_SOURCE: BUNDLED
       LIBARROW_MINIMAL: "false"
@@ -444,7 +447,7 @@ services:
     volumes:
       - .:/arrow:delegated
     environment:
-      <<: [*ccache, *sccache]
+      <<: [*common, *ccache, *sccache]
       ARROW_DEPENDENCY_SOURCE: BUNDLED
       ARROW_HOME: /arrow
       LIBARROW_MINIMAL: "false"
@@ -466,7 +469,7 @@ services:
     shm_size: *shm-size
     ulimits: *ulimits
     environment:
-      <<: [*ccache, *sccache]
+      <<: [*common, *ccache, *sccache]
       ARROW_DEPENDENCY_SOURCE: BUNDLED
       CMAKE_GENERATOR: "Unix Makefiles"
     volumes: *ubuntu-volumes
@@ -487,7 +490,7 @@ services:
     shm_size: *shm-size
     ulimits: *ulimits
     environment:
-      <<: [*ccache, *sccache]
+      <<: [*common, *ccache, *sccache]
       ARROW_BUILD_UTILITIES: "OFF"
       ARROW_COMPUTE: "OFF"
       ARROW_CSV: "OFF"
@@ -534,7 +537,7 @@ services:
     shm_size: *shm-size
     ulimits: *ulimits
     environment:
-      <<: [ *ccache, *sccache]
+      <<: [*common, *ccache, *sccache]
       ARROW_BUILD_UTILITIES: "OFF"
       ARROW_COMPUTE: "OFF"
       ARROW_CSV: "OFF"
@@ -584,7 +587,7 @@ services:
     shm_size: *shm-size
     volumes: *ubuntu-volumes
     environment:
-      <<: *ccache
+      <<: [*common, *ccache]
       CC: clang-${CLANG_TOOLS}
       CXX: clang++-${CLANG_TOOLS}
       # Avoid creating huge static libraries
@@ -626,7 +629,7 @@ services:
     shm_size: *shm-size
     volumes: *ubuntu-volumes
     environment:
-      <<: [*ccache, *sccache]
+      <<: [*common, *ccache, *sccache]
       CC: clang-${CLANG_TOOLS}
       CXX: clang++-${CLANG_TOOLS}
       ARROW_BUILD_STATIC: "OFF"
@@ -658,7 +661,7 @@ services:
     shm_size: *shm-size
     ulimits: *ulimits
     environment:
-      <<: [*ccache, *sccache]
+      <<: [*common, *ccache, *sccache]
       ARROW_ENABLE_TIMING_TESTS:  # inherit
       ARROW_MIMALLOC: "ON"
       Protobuf_SOURCE: "BUNDLED"  # Need Protobuf >= 3.15
@@ -681,7 +684,7 @@ services:
     shm_size: *shm-size
     ulimits: *ulimits
     environment:
-      <<: *sccache
+      <<: [*common, *sccache]
     volumes:
       - .:/arrow:delegated
     command: >-
@@ -711,9 +714,8 @@ services:
     shm_size: *shm-size
     ulimits: *ulimits
     environment:
-      <<: *ccache
+      <<: [*common, *ccache]
       BUILD_DOCS_C_GLIB: "ON"
-      GITHUB_ACTIONS:
     volumes: *debian-volumes
     command: &c-glib-command >
       /bin/bash -c "
@@ -740,8 +742,7 @@ services:
     shm_size: *shm-size
     ulimits: *ulimits
     environment:
-      <<: *ccache
-      GITHUB_ACTIONS:
+      <<: [*common, *ccache]
     volumes: *ubuntu-volumes
     command: *c-glib-command
 
@@ -771,9 +772,8 @@ services:
     shm_size: *shm-size
     ulimits: *ulimits
     environment:
-      <<: *ccache
+      <<: [*common, *ccache]
       BUILD_DOCS_C_GLIB: "ON"
-      GITHUB_ACTIONS:
     volumes: *debian-volumes
     command: &ruby-command >
       /bin/bash -c "
@@ -802,8 +802,7 @@ services:
     shm_size: *shm-size
     ulimits: *ulimits
     environment:
-      <<: *ccache
-      GITHUB_ACTIONS:
+      <<: [*common, *ccache]
     volumes: *ubuntu-volumes
     command: *ruby-command
 
@@ -830,7 +829,7 @@ services:
         python: ${PYTHON}
     shm_size: *shm-size
     environment:
-      <<: [*ccache, *sccache]
+      <<: [*common, *ccache, *sccache]
     volumes: *conda-volumes
     command: &python-conda-command
       ["
@@ -857,7 +856,7 @@ services:
         numba: ${NUMBA}
     shm_size: *shm-size
     environment:
-      <<: [ *ccache, *sccache ] 
+      <<: [*common, *ccache, *sccache]
       ARROW_BUILD_UTILITIES: "OFF"
       ARROW_COMPUTE: "ON"
       ARROW_CSV: "ON"
@@ -902,7 +901,7 @@ services:
         base: ${REPO}:${ARCH}-debian-${DEBIAN}-cpp
     shm_size: *shm-size
     environment:
-      <<: *ccache
+      <<: [*common, *ccache]
     volumes: *debian-volumes
     command: *python-command
 
@@ -924,7 +923,7 @@ services:
         base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp
     shm_size: *shm-size
     environment:
-      <<: *ccache
+      <<: [*common, *ccache]
     volumes: *ubuntu-volumes
     command: *python-command
 
@@ -961,7 +960,7 @@ services:
         base: ${REPO}:${ARCH}-fedora-${FEDORA}-cpp
     shm_size: *shm-size
     environment:
-      <<: *ccache
+      <<: [*common, *ccache]
       Protobuf_SOURCE: "BUNDLED"  # Need Protobuf >= 3.15
     volumes: *fedora-volumes
     command: *python-command
@@ -1005,7 +1004,7 @@ services:
         base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp
     shm_size: *shm-size
     environment:
-      <<: *ccache
+      <<: [*common, *ccache]
       # Bundled build of OpenTelemetry needs a git client
       ARROW_WITH_OPENTELEMETRY: "OFF"
       PYARROW_VERSION: ${PYARROW_VERSION:-}
@@ -1035,7 +1034,7 @@ services:
       cache_from:
         - ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2014-vcpkg-${VCPKG}
     environment:
-      <<: *ccache
+      <<: [*common, *ccache]
     volumes:
       - .:/arrow:delegated
       - ${DOCKER_VOLUME_PREFIX}python-wheel-manylinux2014-ccache:/ccache:delegated
@@ -1058,7 +1057,7 @@ services:
       cache_from:
         - ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2-28-vcpkg-${VCPKG}
     environment:
-      <<: *ccache
+      <<: [*common, *ccache]
     volumes:
       - .:/arrow:delegated
       - ${DOCKER_VOLUME_PREFIX}python-wheel-manylinux-2-28-ccache:/ccache:delegated
@@ -1070,6 +1069,7 @@ services:
     volumes:
       - .:/arrow:delegated
     environment:
+      <<: *common
       CHECK_IMPORTS: "ON"
       CHECK_UNITTESTS: "OFF"
     command: /arrow/ci/scripts/python_wheel_unix_test.sh /arrow
@@ -1088,6 +1088,7 @@ services:
     volumes:
       - .:/arrow:delegated
     environment:
+      <<: *common
       CHECK_IMPORTS: "OFF"
       CHECK_UNITTESTS: "ON"
     command: /arrow/ci/scripts/python_wheel_unix_test.sh /arrow
@@ -1137,7 +1138,7 @@ services:
       cache_from:
         - ${REPO}:${ARCH}-java-jni-manylinux-2014-vcpkg-${VCPKG}
     environment:
-      <<: *ccache
+      <<: [*common, *ccache]
     volumes:
       - .:/arrow:delegated
       - ${DOCKER_VOLUME_PREFIX}maven-cache:/root/.m2:delegated
@@ -1176,7 +1177,7 @@ services:
         pandas: ${PANDAS}
     shm_size: *shm-size
     environment:
-      <<: [*ccache, *sccache]
+      <<: [*common, *ccache, *sccache]
       PARQUET_REQUIRE_ENCRYPTION:  # inherit
       PYTEST_ARGS:  # inherit
       HYPOTHESIS_PROFILE:  # inherit
@@ -1193,7 +1194,7 @@ services:
     # the CI pipeline.
     image: ${REPO}:${ARCH}-conda-python-${PYTHON}-pandas-${PANDAS}
     environment:
-      <<: *ccache
+      <<: [*common, *ccache]
       ARROW_SUBSTRAIT: "ON"
       LC_ALL: "C.UTF-8"
       LANG: "C.UTF-8"
@@ -1235,7 +1236,7 @@ services:
         dask: ${DASK}
     shm_size: *shm-size
     environment:
-      <<: *ccache
+      <<: [*common, *ccache]
     volumes: *conda-volumes
     command:
       ["/arrow/ci/scripts/cpp_build.sh /arrow /build &&
@@ -1261,7 +1262,7 @@ services:
         python: ${PYTHON}
     shm_size: *shm-size
     environment:
-      <<: *ccache
+      <<: [*common, *ccache]
       ARROW_SUBSTRAIT: "ON"
     volumes: *conda-volumes
     command:
@@ -1288,7 +1289,7 @@ services:
         python: ${PYTHON}
     shm_size: *shm-size
     environment:
-      <<: *ccache
+      <<: [*common, *ccache]
       ARROW_FLIGHT: "OFF"
       ARROW_FLIGHT_SQL: "OFF"
       ARROW_GANDIVA: "OFF"
@@ -1319,7 +1320,7 @@ services:
         llvm: ${LLVM}
     shm_size: *shm-size
     environment:
-      <<: *ccache
+      <<: [*common, *ccache]
       ARROW_DATASET: "OFF"
       ARROW_FLIGHT: "OFF"
       ARROW_GANDIVA: "OFF"
@@ -1363,7 +1364,7 @@ services:
         r_duckdb_dev: ${R_DUCKDB_DEV:-}
     shm_size: *shm-size
     environment:
-      <<: [*ccache, *sccache]
+      <<: [*common, *ccache, *sccache]
       ARROW_R_CXXFLAGS: '-Werror'
       ARROW_FLIGHT: 'ON'
       LIBARROW_BUILD: 'false'
@@ -1379,6 +1380,7 @@ services:
 
   ubuntu-r-only-r:
     environment:
+      <<: *common
       ARROW_DEPENDENCY_SOURCE: ''
       ARROW_SOURCE_HOME: '/arrow'
       FORCE_BUNDLED_BUILD: 'true'
@@ -1410,7 +1412,7 @@ services:
         r_custom_ccache: ${R_CUSTOM_CCACHE}
     shm_size: *shm-size
     environment:
-      <<: *sccache
+      <<: [*common, *sccache]
       LIBARROW_DOWNLOAD: "false"
       ARROW_SOURCE_HOME: "/arrow"
       ARROW_R_DEV: ${ARROW_R_DEV}
@@ -1442,7 +1444,7 @@ services:
         tz: ${TZ}
         r_prune_deps: ${R_PRUNE_DEPS}
     environment:
-      <<: *ccache
+      <<: [*common, *ccache]
       ARROW_SOURCE_HOME: "/arrow"
     volumes: *ubuntu-volumes
     command: >
@@ -1465,6 +1467,7 @@ services:
         r_prune_deps: ${R_PRUNE_DEPS}
     shm_size: *shm-size
     environment:
+      <<: *common
       LIBARROW_DOWNLOAD: "false"
       ARROW_SOURCE_HOME: "/arrow"
       ARROW_R_DEV: ${ARROW_R_DEV}
@@ -1492,7 +1495,7 @@ services:
         r_bin: RDvalgrind
         tz: ${TZ}
     environment:
-      <<: [*ccache, *sccache]
+      <<: [*common, *ccache, *sccache]
       ARROW_R_DEV: ${ARROW_R_DEV}
       # AVX512 not supported by Valgrind (similar to ARROW-9851) some runners support AVX512 and some do not
       # so some build might pass without this setting, but we want to ensure that we stay to AVX2 regardless of runner.
@@ -1519,6 +1522,7 @@ services:
         tz: ${TZ}
     shm_size: *shm-size
     environment:
+      <<: *common
       N_JOBS:
       ARROW_REVDEP_WORKERS:
       ARROW_R_DEV: "true"
@@ -1565,6 +1569,7 @@ services:
     shm_size: *shm-size
     volumes: *debian-volumes
     environment:
+      <<: *common
       ARROW_GO_TESTCGO: "1"
     command: *go-command
 
@@ -1605,6 +1610,7 @@ services:
     shm_size: *shm-size
     volumes: *debian-volumes
     environment:
+      <<: *common
       BUILD_DOCS_JS: "ON"
     command: &js-command >
       /bin/bash -c "
@@ -1690,7 +1696,7 @@ services:
         go: ${GO}
     volumes: *conda-volumes
     environment:
-      <<: *ccache
+      <<: [*common, *ccache]
       # tell archery where the arrow binaries are located
       ARROW_CPP_EXE_PATH: /build/cpp/debug
       ARCHERY_INTEGRATION_WITH_RUST: 0
@@ -1723,7 +1729,7 @@ services:
         node: ${NODE}
         base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-python-3
     environment:
-      <<: *ccache
+      <<: [*common, *ccache]
       ARROW_JAVA_SKIP_GIT_PLUGIN:
       ARROW_CUDA: "ON"
       ARROW_SUBSTRAIT: "ON"
@@ -1761,7 +1767,7 @@ services:
         base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp
         clang_tools: ${CLANG_TOOLS}
     environment:
-      <<: *ccache
+      <<: [*common, *ccache]
     volumes: *ubuntu-volumes
     command: >
       /bin/bash -c "
@@ -1826,7 +1832,7 @@ services:
     links:
       - impala:impala
     environment:
-      <<: *ccache
+      <<: [*common, *ccache]
       ARROW_ENGINE: "OFF"
       ARROW_FLIGHT: "OFF"
       ARROW_FLIGHT_SQL: "OFF"
@@ -1870,7 +1876,7 @@ services:
         numpy: ${NUMPY}
     shm_size: *shm-size
     environment:
-      <<: *ccache
+      <<: [*common, *ccache]
     volumes: *conda-maven-volumes
     command:
       ["/arrow/ci/scripts/cpp_build.sh /arrow /build &&
@@ -1887,11 +1893,10 @@ services:
       - ${DOCKER_VOLUME_PREFIX}conda-ccache:/ccache:delegated
     shm_size: '1gb'
     environment:
-      <<: *ccache
+      <<: [*common, *ccache]
       CMAKE_GENERATOR: Ninja
       DEBIAN_FRONTEND: "noninteractive"
       DOTNET_SYSTEM_GLOBALIZATION_INVARIANT: 1
-      GITHUB_ACTIONS:
       TEST_APT: 0  # would require docker-in-docker
       TEST_YUM: 0
       USE_CONDA: 1
@@ -1921,9 +1926,8 @@ services:
       - ${DOCKER_VOLUME_PREFIX}almalinux-ccache:/ccache:delegated
     shm_size: '1gb'
     environment:
-      <<: *ccache
+      <<: [*common, *ccache]
       CMAKE_GENERATOR: Ninja
-      GITHUB_ACTIONS:
       TEST_APT: 0  # would require docker-in-docker
       TEST_YUM: 0
     command: >
@@ -1950,9 +1954,8 @@ services:
       - ${DOCKER_VOLUME_PREFIX}ubuntu-ccache:/ccache:delegated
     shm_size: '1gb'
     environment:
-      <<: *ccache
+      <<: [*common, *ccache]
       CMAKE_GENERATOR: Ninja
-      GITHUB_ACTIONS:
       TEST_APT: 0  # would require docker-in-docker
       TEST_YUM: 0
     command: >