You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by dl...@apache.org on 2022/02/14 20:05:27 UTC
[accumulo-testing] branch main updated: Terraform configurations for creating test infrastructure (#185)
This is an automated email from the ASF dual-hosted git repository.
dlmarion pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo-testing.git
The following commit(s) were added to refs/heads/main by this push:
new e8e69a0 Terraform configurations for creating test infrastructure (#185)
e8e69a0 is described below
commit e8e69a054f46979cec499724e6931bfb77947f23
Author: Dave Marion <dl...@apache.org>
AuthorDate: Mon Feb 14 15:05:21 2022 -0500
Terraform configurations for creating test infrastructure (#185)
This change includes Terraform configurations for creating and destroying compute resources for testing on AWS and Azure. The configurations install ZooKeeper, Hadoop, Accumulo, and Accumulo-Testing. Users can supply options for the versions of the software that should be installed or can supply their own binary tarballs for installation. See the README for detailed documentation.
Co-authored-by: Brian Loss <br...@gmail.com>
Co-authored-by: domgarguilo <do...@gmail.com>
---
.../terraform-testing-infrastructure/.gitignore | 7 +
.../terraform-testing-infrastructure/QUICKSTART | 74 +
contrib/terraform-testing-infrastructure/README.md | 501 ++++
.../terraform-testing-infrastructure/aws/main.tf | 320 +++
.../aws/variables.tf | 197 ++
.../terraform-testing-infrastructure/azure/main.tf | 402 ++++
.../azure/variables.tf | 246 ++
.../files/update-hosts-genders.sh | 68 +
.../modules/cloud-init-config/main.tf | 101 +
.../cloud-init-config/templates/cloud-init.tftpl | 151 ++
.../grafana_dashboards/accumulo-dashboard.json | 2512 ++++++++++++++++++++
.../grafana_dashboards/accumulo-dashboard.yaml | 6 +
.../modules/config-files/main.tf | 265 +++
.../templates/accumulo-client-properties.tftpl | 122 +
.../templates/accumulo-properties.tftpl | 25 +
.../config-files/templates/cluster.yaml.tftpl | 29 +
.../config-files/templates/core-site.xml.tftpl | 13 +
.../modules/config-files/templates/genders.tftpl | 4 +
.../templates/hadoop-datanode.service.tftpl | 18 +
.../templates/hadoop-namenode.service.tftpl | 18 +
.../templates/hadoop_bash_profile.tftpl | 5 +
.../config-files/templates/hadoop_bashrc.tftpl | 17 +
.../config-files/templates/hdfs-site.xml.tftpl | 9 +
.../modules/config-files/templates/hosts.tftpl | 4 +
.../templates/initialize_accumulo.sh.tftpl | 45 +
.../templates/initialize_hadoop.sh.tftpl | 36 +
.../config-files/templates/install_sw.sh.tftpl | 161 ++
.../config-files/templates/telegraf.conf.tftpl | 301 +++
.../templates/yarn-nodemanager.service.tftpl | 18 +
.../templates/yarn-resourcemanager.service.tftpl | 18 +
.../config-files/templates/yarn-site.xml.tftpl | 13 +
.../modules/config-files/templates/zoo.cfg.tftpl | 29 +
.../config-files/templates/zookeeper.service.tftpl | 18 +
.../modules/configure-nodes/main.tf | 57 +
.../modules/upload-software/main.tf | 33 +
.../shared_state/aws/main.tf | 65 +
.../shared_state/aws/variables.tf | 50 +
.../shared_state/azure/main.tf | 49 +
.../shared_state/azure/variables.tf | 47 +
pom.xml | 6 +
40 files changed, 6060 insertions(+)
diff --git a/contrib/terraform-testing-infrastructure/.gitignore b/contrib/terraform-testing-infrastructure/.gitignore
new file mode 100644
index 0000000..888e1f6
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/.gitignore
@@ -0,0 +1,7 @@
+**/.terraform.lock.hcl
+**/.terraform/
+conf/
+**/terraform.tfstate
+**/terraform.tfstate.backup
+**/*.auto.tfvars.json
+**/*.auto.tfvars
diff --git a/contrib/terraform-testing-infrastructure/QUICKSTART b/contrib/terraform-testing-infrastructure/QUICKSTART
new file mode 100644
index 0000000..a041f38
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/QUICKSTART
@@ -0,0 +1,74 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+1. Download and Install Terraform
+
+ wget https://releases.hashicorp.com/terraform/1.1.5/terraform_1.1.5_linux_amd64.zip
+ unzip into /usr/local/bin
+
+2. Create the Shared State
+
+ NOTE: You only need to do this once. If you are sharing the cluster with a team,
+ then only one person needs to do it and they need to share the bucket with
+ the other team members.
+
+ cd shared_state/aws
+ terraform init
+ terraform apply
+
+3. Create the Configuration
+
+ You will need to create a configuration file that includes values for the
+ variables that do not have a default value. See the Variables section in
+ the README. For example, you can create a file "aws.auto.tfvars" file in
+ the aws directory with the following content (replace as appropriate):
+
+create_route53_records = "true"
+private_network = "true"
+accumulo_root_password = "secret"
+security_group = "sg-ABCDEF001"
+route53_zone = "some.domain.com"
+us_east_1b_subnet = "subnet-ABCDEF123"
+us_east_1e_subnet = "subnet-ABCDEF124"
+ami_owner = "000000000001"
+ami_name_pattern = "MY_AMI_*"
+authorized_ssh_keys = [
+ "ssh-rsa .... user1",
+ "ssh-rsa .... user2",
+ "ssh-rsa .... user3"
+]
+
+
+3. Create the Resources
+
+ cd aws
+
+ Create the configuration section of the README. For example you can create
+ Example in HCL syntax:
+
+ terraform init --backend-config=bucket=<bucket-name-goes-here>
+ terraform apply
+
+4. Accessing the cluster
+
+ The output of the apply step above will include the IP addresses of the
+ resources that were created. If created correctly, you should be able to
+ ssh to the nodes using "ssh hadoop@ip". If you created DNS addresses for
+ the nodes, then you should be able to ssh using those addresses also. You
+ should also be able to access the web pages (see the "Accessing Web
+ Pages" section of the README for ports)
+
diff --git a/contrib/terraform-testing-infrastructure/README.md b/contrib/terraform-testing-infrastructure/README.md
new file mode 100644
index 0000000..693d87f
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/README.md
@@ -0,0 +1,501 @@
+<!--
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+-->
+
+# Accumulo Testing Infrastructure
+
+## Description
+
+This Git repository contains several [Terraform](https://www.terraform.io/) configurations.
+
+ - `shared_state` creates Terraform state storage in either Azure or AWS, which is a prerequisite
+ for the Terraform configurations
+ in `aws` or `azure`.
+ - `shared_state/aws` creates an AWS S3 Bucket and DynamoDB table that are a prerequisite for
+ the Terraform configuration in `aws`.
+ - `shared_state/azure` creates an Azure resource group and storage account that are a
+ prerequisite for the Terraform configuration in `azure`.
+ - `aws` creates the following AWS resources:
+ 1. Creates one or more EC2 nodes for running the different components. Currently, the
+ configuration uses the m5.2xlarge instance type which provides 8 vCPUs, 32GB RAM, and an EBS
+ backed root volume.
+ 2. Runs commands on the EC2 nodes after they are started (5 minutes according to the docs) to
+ install software and configure them.
+ 3. Creates DNS A records for the EC2 nodes.
+ - `azure` creates the following Azure resources:
+ 1. Creates a resource group to hold all of the created resources.
+ 2. Creates networking resources (vnet, subnet, network security group).
+ 3. Creates two or more Azure VMs (along with associated NICs and public IP addresses) for
+ running the different components. The default configuration creates
+ [D8s v4](https://docs.microsoft.com/en-us/azure/virtual-machines/dv4-dsv4-series#dsv4-series)
+ VMs, providing 8 vCPUs and 32GiB RAM with an Azure storage backed OS drive.
+ 4. Runs commands on the VMs after cloud-init provisioning is complete in order to install and
+ configure Hadoop, Zookeeper, Accumulo, and the Accumulo Testing repository.
+
+## Prerequisites
+
+You will need to download and install the correct Terraform [CLI](https://www.terraform.io/downloads)
+for your platform. Put the `terraform` binary on your PATH. You can optionally install
+[Terraform Docs](https://terraform-docs.io/user-guide/installation/) if you want to be able
+to generate documentation or an example variables file for either the shared state or
+`aws` or `azure` configurations.
+
+## Shared State
+
+The `shared_state` directory contains Terraform configurations for creating either an AWS S3 Bucket
+or DynamoDB table, or an Azure resource group, storage account, and container. These objects only
+need to be created once and are used for sharing the Terraform state with a team. To read more
+about this see [remote state](https://www.terraform.io/docs/language/state/remote.html). The AWS
+shared state instructions are based on
+[this article](https://blog.gruntwork.io/how-to-manage-terraform-state-28f5697e68fa).
+
+To generate the storage, run `terraform init` followed by `terraform apply`.
+
+The default AWS configuration generates the S3 bucket name when `terraform apply` is run. This
+ensures that a globally unique S3 bucket name is used. It is not required to set any variables for
+the shared state. However, if you wish to override any variable values, this can be done by
+creating an `aws.auto.tfvars` file in the `shared_state/aws` directory. For example:
+```bash
+cd shared_state/aws
+cat > aws.auto.tfvars << EOF
+bucket_force_destroy = true
+EOF
+```
+
+Assuming the bucket variable is not overridden, the generated S3 bucket name will appear in the
+`terraform` apply output, like the following example:
+```
+Outputs:
+
+bucket_name = "terraform-20220209131315353700000001"
+```
+This value should be supplied to `terraform init` in the [aws](./aws) directory as described below.
+Using the example above, the init command for the aws directory would be:
+```bash
+terraform init -backend-config=bucket=terraform-20220209131315353700000001
+```
+
+If you change any of the backend storage configuration parameters over their defaults, you will
+need to override them when you initialize terraform for the `aws` or `azure` configuration
+below. For example, if you change the region where the S3 bucket is deployed from `us-east-1` to
+`us-west-2`, then you would need to run `terraform init` in the `aws` directory (not the
+shared_state initialization, but the main `aws` directory initialization) with:
+```bash
+terraform init -backend-config=region=us-west-2
+```
+
+The following backend configuration can be overridden from with `-backend-config=<name>=<value>`
+options to `terraform init`. This prevents the need to modify the `backend` sections in
+[aws/main.tf](./aws/main.tf) or [azure/main.tf](./azure/main.tf).
+
+For AWS:
+* `-backend-config=bucket=<bucket_name>`: Override the S3 bucket name
+* `-backend-config=key=<key_name>`: Override the key in the S3 bucket
+* `-backend-config=region=<region>`: Override AWS region
+* `-backend-config=dynamodb_table=<dynamodb_table_name>`: Override the DynamoDB table name
+
+For Azure:
+* `-backend-config=resource_group_name=<resource_group_name>`: Override the resource group where the storage account is located
+* `-backend-config=storage_account_name=<storage_account_name>`: Override the name of the Azure storage account holding Terraform state
+* `-backend-config=container_name=<container_name>`: Override the name of the container within the storage account that is holding Terraform state
+* `-backend-config=key=<blob_name>`: Override the name of the blob within the container that will be used to hold Terraform state
+
+
+## Test Cluster
+
+The `aws` and `azure` directories contain Terraform configurations for creating an Accumulo cluster
+on AWS or Azure respectively. The `aws` and `azure` directories contain the following Terraform
+configuration items:
+ - main.tf - The Terraform configuration file
+ - variables.tf - The declaration and default values for Terraform variables
+These configurations both use shared Terraform module and configuration files that can be found in
+the following directories/files:
+ - modules/ - This contains several shared Terraform modules that are used by the `aws` and `azure`
+ Terraform configurations
+ - `cloud-init-config` - contains templates to generate a
+ [Cloud Init](https://cloudinit.readthedocs.org/) configuration to configure AWS instances or
+ Azure VMs with necessary Linux packages, user accounts, etc.
+ - `config-files` - contains template configuration files for various components of the cluster
+ (e.g., HDFS, Accumulo, Grafana, etc.) as well as helper scripts to install the software
+ components that cannot be installed via cloud-init.
+ - `upload-software` - if pre-built binaries for downloaded software components (Hadoop, Accumulo,
+ Zookeeper, Maven) are included, this module uploads them to the cluster
+ - `configure-nodes` - this module is responsible for executing scripts on the cluster to install
+ and configure software, initialize the HDFS filesystem and Accumulo cluster, and start them.
+ - conf/ - a non-git tracked directory that contains rendered template files with variables replaced
+ by selected runtime configuration. These files are uploaded to the cluster.
+
+### AWS Variables
+
+The table below lists the variables and their default values that are used in the `aws` configuration.
+
+| Name | Description | Type | Default | Required |
+|------|-------------|------|---------|:--------:|
+| accumulo\_branch\_name | The name of the branch to build and install | `string` | `"main"` | no |
+| accumulo\_dir | The Accumulo directory on each EC2 node | `string` | `"/data/accumulo"` | no |
+| accumulo\_instance\_name | The accumulo instance name. | `string` | `"accumulo-testing"` | no |
+| accumulo\_repo | URL of the Accumulo git repo | `string` | `"https://github.com/apache/accumulo.git"` | no |
+| accumulo\_root\_password | The password for the accumulo root user. A randomly generated password will be used if none is specified here. | `string` | `null` | no |
+| accumulo\_testing\_branch\_name | The name of the branch to build and install | `string` | `"main"` | no |
+| accumulo\_testing\_repo | URL of the Accumulo Testing git repo | `string` | `"https://github.com/apache/accumulo-testing.git"` | no |
+| accumulo\_version | The branch of Accumulo to download and install | `string` | `"2.1.0-SNAPSHOT"` | no |
+| ami\_name\_pattern | The pattern of the name of the AMI to use | `any` | n/a | yes |
+| ami\_owner | The id of the AMI owner | `any` | n/a | yes |
+| authorized\_ssh\_key\_files | List of SSH public key files for the developers that will log into the cluster | `list(string)` | `[]` | no |
+| authorized\_ssh\_keys | List of SSH keys for the developers that will log into the cluster | `list(string)` | n/a | yes |
+| cloudinit\_merge\_type | Describes the merge behavior for overlapping config blocks in cloud-init. | `string` | `null` | no |
+| create\_route53\_records | Indicates whether or not route53 records will be created | `bool` | `false` | no |
+| hadoop\_dir | The Hadoop directory on each EC2 node | `string` | `"/data/hadoop"` | no |
+| hadoop\_version | The version of Hadoop to download and install | `string` | `"3.3.1"` | no |
+| instance\_count | The number of EC2 instances to create | `string` | `"2"` | no |
+| instance\_type | The type of EC2 instances to create | `string` | `"m5.2xlarge"` | no |
+| local\_sources\_dir | Directory on local machine that contains Maven, ZooKeeper or Hadoop binary distributions or Accumulo source tarball | `string` | `""` | no |
+| maven\_version | The version of Maven to download and install | `string` | `"3.8.4"` | no |
+| optional\_cloudinit\_config | An optional config block for the cloud-init script. If you set this, you should consider setting cloudinit\_merge\_type to handle merging with the default script as you need. | `string` | `null` | no |
+| private\_network | Indicates whether or not the user is on a private network and access to hosts should be through the private IP addresses rather than public ones. | `bool` | `false` | no |
+| root\_volume\_gb | The size, in GB, of the EC2 instance root volume | `string` | `"300"` | no |
+| route53\_zone | The name of the Route53 zone in which to create DNS addresses | `any` | n/a | yes |
+| security\_group | The Security Group to use when creating AWS objects | `any` | n/a | yes |
+| software\_root | The full directory root where software will be installed | `string` | `"/opt/accumulo-testing"` | no |
+| us\_east\_1b\_subnet | The AWS subnet id for the us-east-1b subnet | `any` | n/a | yes |
+| us\_east\_1e\_subnet | The AWS subnet id for the us-east-1e subnet | `any` | n/a | yes |
+| zookeeper\_dir | The ZooKeeper directory on each EC2 node | `string` | `"/data/zookeeper"` | no |
+| zookeeper\_version | The version of ZooKeeper to download and install | `string` | `"3.5.9"` | no |
+
+The following outputs are returned by the `aws` Terraform configuration.
+
+| Name | Description |
+|------|-------------|
+| accumulo\_root\_password | The supplied, or automatically generated Accumulo root user password. |
+| manager\_ip | The IP address of the manager instance. |
+| worker\_ips | The IP addresses of the worker instances. |
+
+### Azure Variables
+
+The table below lists the variables and their default values that are used in the `azure` configuration.
+
+| Name | Description | Type | Default | Required |
+|------|-------------|------|---------|:--------:|
+| accumulo\_branch\_name | The name of the branch to build and install | `string` | `"main"` | no |
+| accumulo\_dir | The Accumulo directory on each node | `string` | `"/data/accumulo"` | no |
+| accumulo\_instance\_name | The accumulo instance name. | `string` | `"accumulo-testing"` | no |
+| accumulo\_repo | URL of the Accumulo git repo | `string` | `"https://github.com/apache/accumulo.git"` | no |
+| accumulo\_root\_password | The password for the accumulo root user. A randomly generated password will be used if none is specified here. | `string` | `null` | no |
+| accumulo\_testing\_branch\_name | The name of the branch to build and install | `string` | `"main"` | no |
+| accumulo\_testing\_repo | URL of the Accumulo Testing git repo | `string` | `"https://github.com/apache/accumulo-testing.git"` | no |
+| accumulo\_version | The branch of Accumulo to download and install | `string` | `"2.1.0-SNAPSHOT"` | no |
+| admin\_username | The username of the admin user, that can be authenticated with the first public ssh key. | `string` | `"azureuser"` | no |
+| authorized\_ssh\_key\_files | List of SSH public key files for the developers that will log into the cluster | `list(string)` | `[]` | no |
+| authorized\_ssh\_keys | List of SSH keys for the developers that will log into the cluster | `list(string)` | n/a | yes |
+| cloudinit\_merge\_type | Describes the merge behavior for overlapping config blocks in cloud-init. | `string` | `null` | no |
+| create\_resource\_group | Indicates whether or not resource\_group\_name should be created or is an existing resource group. | `bool` | `true` | no |
+| hadoop\_dir | The Hadoop directory on each node | `string` | `"/data/hadoop"` | no |
+| hadoop\_version | The version of Hadoop to download and install | `string` | `"3.3.1"` | no |
+| local\_sources\_dir | Directory on local machine that contains Maven, ZooKeeper or Hadoop binary distributions or Accumulo source tarball | `string` | `""` | no |
+| location | The Azure region where resources are to be created. If an existing resource group is specified, this value is ignored and the resource group's location is used. | `string` | n/a | yes |
+| maven\_version | The version of Maven to download and install | `string` | `"3.8.4"` | no |
+| network\_address\_space | The network address space to use for the virtual network. | `list(string)` | <pre>[<br> "10.0.0.0/16"<br>]</pre> | no |
+| optional\_cloudinit\_config | An optional config block for the cloud-init script. If you set this, you should consider setting cloudinit\_merge\_type to handle merging with the default script as you need. | `string` | `null` | no |
+| os\_disk\_caching | The type of caching to use for the OS disk. Possible values are None, ReadOnly, and ReadWrite. | `string` | `"ReadOnly"` | no |
+| os\_disk\_size\_gb | The size, in GB, of the OS disk | `number` | `300` | no |
+| os\_disk\_type | The disk type to use for OS disks. Possible values are Standard\_LRS, StandardSSD\_LRS, and Premium\_LRS. | `string` | `"Standard_LRS"` | no |
+| resource\_group\_name | The name of the resource group to create or reuse. If not specified, the name is generated based on resource\_name\_prefix. | `string` | `""` | no |
+| resource\_name\_prefix | A prefix applied to all resource names created by this template. | `string` | `"accumulo-testing"` | no |
+| software\_root | The full directory root where software will be installed | `string` | `"/opt/accumulo-testing"` | no |
+| subnet\_address\_prefixes | The subnet address prefixes to use for the accumulo testing subnet. | `list(string)` | <pre>[<br> "10.0.2.0/24"<br>]</pre> | no |
+| vm\_image | n/a | <pre>object({<br> publisher = string<br> offer = string<br> sku = string<br> version = string<br> })</pre> | <pre>{<br> "offer": "0001-com-ubuntu-server-focal",<br> "publisher": "Canonical",<br> "sku": "20_04-lts-gen2",<br> "version": "latest"<br>}</pre> | no |
+| vm\_sku | The SKU of Azure VMs to create | `string` | `"Standard_D8s_v4"` | no |
+| worker\_count | The number of worker VMs to create | `number` | `1` | no |
+| zookeeper\_dir | The ZooKeeper directory on each node | `string` | `"/data/zookeeper"` | no |
+| zookeeper\_version | The version of ZooKeeper to download and install | `string` | `"3.5.9"` | no |
+
+The following outputs are returned by the `azure` Terraform configuration.
+
+| Name | Description |
+|------|-------------|
+| accumulo\_root\_password | The user-supplied or automatically generated Accumulo root user password. |
+| manager\_ip | The public IP address of the manager VM. |
+| worker\_ips | The public IP addresses of the worker VMs. |
+
+### Configuration
+
+When using either the `aws` or `azure` configuration, you will need to supply values for required
+variables that have no default value. There are several
+[ways](https://www.terraform.io/language/values/variables#assigning-values-to-root-module-variables)
+to do this. If you installed Terraform Docs, it can generate the file for you. You can then edit the
+generated file to configure values as desired:
+
+```bash
+CLOUD=<enter either aws or azure>
+cd $CLOUD
+terraform-docs tfvars hcl . > ${CLOUD}.auto.tfvars
+# If you prefer JSON over HCL, then the command would be
+# terraform-docs tfvars json . > ${CLOUD}.auto.tfvars.json
+```
+
+Note that these generated variable files will include values for all variables, where those with
+defaults will be set to their default value. You can also refer to the tables above and simply
+add the values that are required (and have no default, or a default that you wish to change).
+Below is an example JSON file containing configuration for `aws`. This content can be customized
+and placed in the `aws` directory in a file whose name ends with `.auto.tfvars.json`. Any variable
+files whose name ends in `.auto.tfvars` or `.auto.tfvars.json` are automatically included when
+`terraform` commands are executed.
+
+```json
+{
+ "security_group": "sg-ABCDEF001",
+ "route53_zone": "some.domain.com",
+ "us_east_1b_subnet": "subnet-ABCDEF123",
+ "us_east_1e_subnet": "subnet-ABCDEF124",
+ "ami_owner": "000000000001",
+ "ami_name_pattern": "MY_AMI_*",
+ "authorized_ssh_keys": [
+ "ssh-rsa dev_key_1",
+ "ssh-rsa dev_key_2"
+ ]
+}
+```
+
+#### Cloud-Init Customization
+
+The cloud-init template can be found in [cloud-init.tftpl](./modules/cloud-init-config/templates/cloud-init.tftpl).
+If you need to customize this configuration, one method is to use the Terraform variable
+`optional_cloudinit_config` to supply your own additional configuration. For example, some CentOS 7
+images are out of date, and will need software packages to be updated before the rest of the
+software download/install will work. This can be accomplished by adding the following to your
+`.auto.tfvars` file:
+
+```hcl
+optional_cloudinit_config = <<-EOT
+ package_upgrade: true
+EOT
+```
+
+You can add any other cloud-init configuration that you wish here. One factor to consider here is
+the cloud-init [merging behavior](https://cloudinit.readthedocs.io/en/latest/topics/merging.html)
+with sections in the default template. The merging behavior can be controlled by setting the
+`cloudinit_merge_type` variable to your desired merge algorithm. The default is set to
+`dict(recurse_array,no_replace)+list(append)` which will attempt to keep all lists from the default
+configuration, rather than new ones overwriting them.
+
+Another factor to consider is the size of the generated cloud-init template. Cloud providers place
+a limit on the size of this file. AWS limits this content to 16KB, before Base64 encoding, and
+Azure limits it to 64KB after Base64 encoding.
+
+## AWS Resources
+
+This Terraform configuration creates:
+
+ 1. `${instance_count}` EC2 nodes of `${instance_type}` with the latest AMI matching
+ `${ami_name_pattern}` from the `${ami_owner}`. Each EC2 node will have a `${root_volume_gb}`GB
+ root volume. The EFS filesystem is NFS mounted to each node at `${software_root}`.
+ 2. DNS entries in Route53 for each EC2 node.
+
+## Software Layout
+
+This Terraform configuration:
+
+ 1. Downloads, if necessary, the Apache Maven `${maven_version}` binary tarball to
+ `${software_root}/sources`, then untars it to `${software_root}/apache-maven/apache-maven-${maven_version}`
+ 2. Downloads, if necessary, the Apache Zookeeper `${zookeer_version}` binary tarball to
+ `${software_root}/sources`, then untars it to `${software_root}/zookeeper/apache-zookeeper-${zookeeper_version}-bin`
+ 3. Downloads, if necessary, the Apache Hadoop `${hadoop_version}` binary tarball to
+ `${software_root}/sources`, then untars it to `${software_root}/hadoop/hadoop-${hadoop_version}`
+ 4. Clones, if necessary, the Apache Accumulo Git repo from `${accumulo_repo}` into
+ `${software_root}/sources/accumulo-repo`. It switches to the `${accumulo_branch_name}` branch
+ and builds the software using Maven, then untars the binary tarball to
+ `${software_root}/accumulo/accumulo-${accumulo_version}`
+ 5. Downloads the [OpenTelemetry](https://opentelemetry.io/) Java Agent jar file and copies it to
+ `${software_root}/accumulo/accumulo-${accumulo_version}/lib/opentelemetry-javaagent-1.7.1.jar`
+ 6. Copies the Accumulo `test` jar to `${software_root}/accumulo/accumulo-${accumulo_version}/lib`
+ so that `org.apache.accumulo.test.metrics.TestStatsDRegistryFactory` is on the classpath
+ 7. Downloads the [Micrometer](https://micrometer.io/) StatsD Registry jar file and copies it to
+ `${software_root}/accumulo/accumulo-${accumulo_version}/lib/micrometer-registry-statsd-1.7.4.jar`
+ 8. Clones, if necessary, the Apache Accumulo Testing Git repo from `${accumulo_testing_repo}`
+ into `${software_root}/sources/accumulo-testing-repo`. It switches to the
+ `${accumulo_testing_branch_name}` branch and builds the software using Maven.
+
+### Supplying your own software
+
+If you want to supply your own Apache Maven, Apache ZooKeeper, Apache Hadoop, Apache Accumulo, or
+Apache Accumulo Testing binary tar files, then you can put them into a directory on your local
+machine and set the `${local_sources_dir}` variable to the full path to the directory. These files
+will be uploaded to `${software_root}/sources` and the installation script will use them instead of
+downloading them. If the version of the supplied binary tarball is different than the default
+version, then you will also need to override that property. Supplying your own binary tarballs does
+speed up the deployment. However, if you provide the Apache Accumulo binary tarball, then it will
+be harder to update the software on the cluster.
+
+**NOTE**: If you supply your own binary tarball of Accumulo, then you will need to copy the
+`accumulo-test-${accumulo_version}.jar` file to the `lib` directory manually as it's not part of
+the binary tarball.
+
+### Updating Apache Accumulo on the cluster
+
+If you did not provide a binary tarball, then you can update the software running on the cluster by
+doing the following and then restarting Accumulo:
+
+```bash
+cd ${software_root}/sources/accumulo-repo
+git pull
+mvn -s ${software_root}/apache-maven/settings.xml clean package -DskipTests -DskipITs
+tar zxf assemble/target/accumulo-${accumulo_version}-bin.tar.gz -C ${software_root}/accumulo
+# Sync the Accumulo changes with the worker nodes
+pdsh -R exec -g worker rsync -az ${software_root}/accumulo/ %h:${software_root}/accumulo/
+```
+
+### Updating Apache Accumulo Testing on the cluster
+
+If you did not provide a binary tarball, then you can update the software running on the cluster by
+doing the following:
+
+```bash
+cd ${software_root}/sources/accumulo-testing-repo
+git pull
+mvn -s ${software_root}/apache-maven/settings.xml clean package -DskipTests -DskipITs
+```
+
+## Deployment Overview
+
+The first node that is created is called the `manager`, the others are `worker` nodes. The
+following components will run on the `manager` node:
+
+- Apache ZooKeeper
+- Apache Hadoop NameNode
+- Apache Hadoop Yarn ResourceManager
+- Apache Accumulo Manager
+- Apache Accumulo Monitor
+- Apache Accumulo GarbageCollector
+- Apache Accumulo CompactionCoordinator
+- Docker
+- Jaeger Tracing Docker Container
+- Telegraf/InfluxDB/Grafana Docker Container
+
+The following components will run on the `worker` nodes:
+
+- Apache Hadoop DataNode
+- Apache Hadoop Yarn NodeManager
+- Apache Accumulo TabletServer
+- Apache Accumulo Compactor(s)
+
+### Logs
+
+The logs for each service (zookeeper, hadoop, accumulo) are located in their respective local
+directory on each node (`/data/${service}/logs` unless you changed the properties).
+
+### DNS entries
+
+The `aws` Terraform configuration creates DNS entries of the following form:
+
+ <node_name>-<branch_name>-<workspace_name>.${route53_zone}
+
+For example:
+
+- manager-main-default.${route53_zone}
+- worker#-main-default.${route53_zone} (where # is 0, 1, 2, ...)
+
+The `azure` configuration does not current create public DNS entries for the nodes, and it is
+recommended that the public IP addresses be used instead.
+
+## Instructions
+
+ 1. Once you have created a `.auto.tfvars.json` file, or set the properties some other way, run
+ `terraform init`. If you have modified shared_state backend configuration over the default,
+ you can override the values here. For example, the following configuration updates the
+ `resource_group_name` and `storage_account_name` for the `azurerm` backend:
+ ```bash
+ terraform init -backend-config=resource_group_name=my-tfstate-resource-group -backend-config=storage_account_name=mystorageaccountname
+ ```
+ Once values are supplied to `terraform init`, they are stored in the local state and it is not
+ necessary to supply these overrides to the `terraform apply` or `terraform destroy` commands.
+ 2. Run `terraform apply` to create the AWS/Azure resources.
+ 3. Run `terraform destroy` to tear down the AWS/Azure resources.
+
+**NOTE**: If you are working with `aws` and get an Access Denied error then try setting the AWS
+Short Term access keys in your environment
+
+### Accessing Web Pages
+
+For an `aws` cluster, you can access the software configuration/management web pages here:
+- Hadoop NameNode: http://manager-main-default.${route53_zone}:9870
+- Yarn ResourceManager: http://manager-main-default.${route53_zone}:8088
+- Hadoop DataNode: http://worker#-main-default.${route53_zone}:9864
+- Yarn NodeManager: http://worker#-main-default.${route53_zone}:8042
+- Accumulo Monitor: http://manager-main-default.${route53_zone}:9995
+- Jaeger Tracing UI: http://manager-main-default.${route53_zone}:16686
+- Grafana: http://manager-main-default.${route53_zone}:3003
+
+The `azure` cluster creates a network security group that limits public access to port 22 (SSH).
+Therefore, to access configuration/management web pages, you should create a SOCKS proxy and use
+a browser plugin such as
+[FoxyProxy Standard](https://chrome.google.com/webstore/detail/foxyproxy-standard/gcknhkkoolaabfmlnjonogaaifnjlfnp)
+to point the browser to the SOCKS proxy. Create the proxy with
+```bash
+ssh -C2qTnNf -D 9876 hadoop@<manager-public-ip-address>
+```
+Configure FoxyProxy (or your browser directly) to connect to the proxy on localhost port 9876
+(change the port specified in the `-D` option above to use a different proxy port). If you
+configure FoxyProxy with a SOCKS 5 proxy to match the URL regex patterns `https?://manager:.*` and
+`https?://worker[0-9]+:.*`, then you can leave FoxyProxy set to
+"Use proxies based on their pre-defined patterns and priorities" and access the web pages through
+the proxy while other web pages will not use the proxy.
+- Hadoop NameNode: http://manager:9870
+- Yarn ResourceManager: http://manager:8088
+- Hadoop DataNode: http://worker#:9864
+- Yarn NodeManager: http://worker#:8042
+- Accumulo Monitor: http://manager:9995
+- Jaeger Tracing UI: http://manager:16686
+- Grafana: http://manager:3003
+
+
+## Accessing the cluster nodes
+
+The [cloud-init](https://cloudinit.readthedocs.io/en/latest/) configuration applied to each
+AWS instance or Azure VM creates a `hadoop` user. Any public SSH keys specified in the Terraform
+configuration variable `authorized_ssh_keys` (or public key file named in
+`authorized_ssh_key_files`) will be included in the cloud-init template as an authorized key for
+the `hadoop` user.
+
+If you wish to use your default ssh key, typically stored in `~/.ssh/id_rsa.pub`, you would add the
+following to your HCL `.auto.tfvars` file:
+
+```hcl
+authorized_ssh_key_files = [ "~/.ssh/id_rsa.pub" ]
+```
+
+Then, when the cluster is created, you can log in to a node with
+`ssh hadoop@<node-public-ip-address>`.
+
+### SSH'ing to other nodes
+
+The `/etc/hosts` file on each node has been updated with the names (manager, worker0, worker1,
+etc.) and IP addresses of the nodes. `pdsh` has been installed and `/etc/genders` has been
+configured. You should be able to `ssh` to any node as the `hadoop` user without a password.
+Likewise, you should be able to `pdsh` commands to groups of nodes as the hadoop user. The `pdsh`
+genders group `manager` specifies the manager node, and the `worker` group will specify all
+worker nodes.
+
+## Shutdown / Startup Instructions
+
+Once the cluster is created you can simply stop or start the nodes from the AWS console or Azure
+portal. Terraform is just for creating, updating, or destroying the resources. ZooKeeper and Hadoop
+are setup to use SystemD service files, but Accumulo is not. You could log into the manager node
+and run `accumulo-cluster stop` before stopping the nodes. Or, you could just shut them down and
+force Accumulo to recover (which might be good for testing). When restarting the nodes from the AWS
+Console/Azure Portal, ZooKeeper and Hadoop should start on their own. For Accumulo, you should only
+need to run `accumulo-cluster start` on the manager node.
diff --git a/contrib/terraform-testing-infrastructure/aws/main.tf b/contrib/terraform-testing-infrastructure/aws/main.tf
new file mode 100644
index 0000000..f4444f3
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/aws/main.tf
@@ -0,0 +1,320 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# To install Terraform download the appropriate version from https://www.terraform.io/downloads.html
+# and copy the binary to /usr/local/bin or some other location on your PATH.
+#
+# Run "terraform init" in this directory to download the plugins that Terraform will need to run
+# this plan.
+#
+# Run "terraform plan" to see the changes that would be made if you applied this plan.
+#
+# Run "terraform apply" to see the changes that would be made and to optionally apply the plan.
+#
+#
+# This Terraform configuration does the following:
+#
+# 1. Creates one or more EC2 nodes for running the different components. Currently
+# the configuration uses the m5.2xlarge instance type which provides 8 vCPUs, 32GB RAM,
+# and an EBS backed root volume.
+#
+# 2. Runs commands on the EC2 nodes after they are started (5 minutes according
+# to the docs) to install software and configure them.
+#
+# 3. Creates DNS entries for the manager (the first node created) and the workers (the remaining nodes).
+#
+#
+# PRICING:
+#
+# As of Dec 7 2021:
+#
+# Each m5.2xlarge costs $0.384 per hour
+# A 300GB EBS volume running for 40 hours per month is $1.50
+#
+# Currently the storage used is about 2.5GB with Maven, ZooKeeper, Hadoop,
+# Accumulo, and Accumulo-Testing installed and built.
+#
+
+################################
+# Core Terraform Configuration #
+################################
+
+terraform {
+ required_version = ">= 1.1.0"
+ required_providers {
+ aws = {
+ source = "hashicorp/aws"
+ version = "~> 3.68.0"
+ }
+ }
+ backend "s3" {
+ bucket = "accumulo-testing-tf-state"
+ key = "accumulo-testing/terraform.tfstate"
+ region = "us-east-1"
+ dynamodb_table = "accumulo-testing-tf-locks"
+ encrypt = true
+ }
+}
+
+provider "aws" {
+ region = "us-east-1"
+}
+
+
+#
+# Retrieves Account Information from AWS #
+#
+data "aws_caller_identity" "current" {}
+data "aws_partition" "current" {}
+data "aws_region" "current" {}
+
+#
+# Looks up the subnet us-east-1b
+#
+data "aws_subnet" "subnet_1b" {
+ id = var.us_east_1b_subnet
+}
+
+#
+# Looks up the subnet us-east-1e
+#
+data "aws_subnet" "subnet_1e" {
+ id = var.us_east_1e_subnet
+}
+
+#
+# Looks up the latest CentOS AMI
+#
+data "aws_ami" "centos_ami" {
+ owners = ["${var.ami_owner}"]
+ filter {
+ name = "name"
+ values = ["${var.ami_name_pattern}"]
+ }
+}
+
+#
+# Lookup the AWS private zone
+#
+data "aws_route53_zone" "private_zone" {
+ count = var.create_route53_records ? 1 : 0
+ name = var.route53_zone
+ private_zone = true
+}
+
+# Generate cloud-init data to use when creating EC2 nodes.
+module "cloud_init_config" {
+ source = "../modules/cloud-init-config"
+
+ software_root = var.software_root
+ zookeeper_dir = var.zookeeper_dir
+ hadoop_dir = var.hadoop_dir
+ accumulo_dir = var.accumulo_dir
+ maven_version = var.maven_version
+ zookeeper_version = var.zookeeper_version
+ hadoop_version = var.hadoop_version
+ accumulo_branch_name = var.accumulo_branch_name
+ accumulo_version = var.accumulo_version
+ authorized_ssh_keys = local.ssh_keys[*]
+
+ optional_cloudinit_config = var.optional_cloudinit_config
+ cloudinit_merge_type = var.cloudinit_merge_type
+}
+
+##########################
+# EC2 Node Configuration #
+##########################
+
+#
+# Definition for the EC2 nodes to include:
+#
+# 1. AMI
+# 2. Instance Type
+# 3. Number of instances to create
+# 4. Availability Zone subnet
+# 5. VPC security group
+# 6. Size of the root block device
+# 7. Cloud-init script (see https://cloudinit.readthedocs.io/en/latest/) which
+# - creates the hadoop group and user
+# - installs packages via yum
+# - creates some files on the filesystem to use later
+#
+resource "aws_instance" "accumulo-testing" {
+ ami = data.aws_ami.centos_ami.id
+ instance_type = var.instance_type
+ count = var.instance_count
+ subnet_id = data.aws_subnet.subnet_1b.id
+ vpc_security_group_ids = [var.security_group]
+ root_block_device {
+ volume_size = var.root_volume_gb
+ delete_on_termination = true
+ tags = {
+ Name = "accumulo-testing-${var.accumulo_branch_name}-branch-${terraform.workspace}-ws-${count.index}"
+ }
+ }
+ #
+ # User data section will run cloud-init configuration that
+ # was created above from the template
+ #
+ user_data = module.cloud_init_config.cloud_init_data
+ #
+ # Wait for cloud-init to complete, so we're sure the instance is fully provisioned.
+ #
+ provisioner "remote-exec" {
+ inline = [
+ "echo Waiting for cloud init to complete...",
+ "sudo cloud-init status --wait > /dev/null",
+ "sudo cloud-init status --long"
+ ]
+ connection {
+ type = "ssh"
+ host = var.private_network ? self.private_ip : self.public_ip
+ user = "hadoop"
+ }
+ }
+ tags = {
+ Name = "accumulo-testing-${var.accumulo_branch_name}-branch-${terraform.workspace}-ws-${count.index}"
+ Branch = "${var.accumulo_branch_name}"
+ Workspace = "${terraform.workspace}"
+ noshutdown = true
+ nostartup = true
+ }
+}
+
+##############################
+# Create configuration files #
+##############################
+
+#
+# This section creates the ZooKeeper, Hadoop, and Accumulo configuration files
+# using templates in the templates directory and IP addresses from the EC2
+# nodes that we created above and variables.
+#
+
+locals {
+ ssh_keys = toset(concat(var.authorized_ssh_keys, [for k in var.authorized_ssh_key_files : file(k)]))
+ manager_ip = aws_instance.accumulo-testing[0].public_ip
+ worker_ips = var.instance_count > 1 ? slice(aws_instance.accumulo-testing[*].public_ip, 1, var.instance_count) : aws_instance.accumulo-testing[0].public_ip[*]
+ manager_private_ip = aws_instance.accumulo-testing[0].private_ip
+ worker_private_ips = var.instance_count > 1 ? slice(aws_instance.accumulo-testing[*].private_ip, 1, var.instance_count) : aws_instance.accumulo-testing[0].private_ip[*]
+}
+
+##############################
+# Cluster Configuration #
+##############################
+
+#
+# This section creates the ZooKeeper, Hadoop, and Accumulo configuration files
+# using templates in the templates directory and IP addresses from the EC2
+# nodes that we created above and variables.
+#
+module "config_files" {
+ source = "../modules/config-files"
+
+ software_root = var.software_root
+ upload_host = var.private_network ? local.manager_private_ip : local.manager_ip
+ manager_ip = local.manager_private_ip
+ worker_ips = local.worker_private_ips
+
+ zookeeper_dir = var.zookeeper_dir
+ hadoop_dir = var.hadoop_dir
+ accumulo_dir = var.accumulo_dir
+
+ maven_version = var.maven_version
+ zookeeper_version = var.zookeeper_version
+ hadoop_version = var.hadoop_version
+ accumulo_version = var.accumulo_version
+
+ accumulo_repo = var.accumulo_repo
+ accumulo_branch_name = var.accumulo_branch_name
+ accumulo_testing_repo = var.accumulo_testing_repo
+ accumulo_testing_branch_name = var.accumulo_testing_branch_name
+
+ accumulo_instance_name = var.accumulo_instance_name
+ accumulo_root_password = var.accumulo_root_password
+}
+
+#
+# This module uploads any local tarballs to the manager VM and
+# stores them on the NFS share.
+#
+module "upload_software" {
+ source = "../modules/upload-software"
+
+ local_sources_dir = var.local_sources_dir
+ upload_dir = var.software_root
+ upload_host = var.private_network ? local.manager_private_ip : local.manager_ip
+}
+
+#
+# This section performs final configuration of the Accumulo cluster.
+#
+module "configure_nodes" {
+ source = "../modules/configure-nodes"
+
+ software_root = var.software_root
+ upload_host = var.private_network ? local.manager_private_ip : local.manager_ip
+
+ accumulo_instance_name = module.config_files.accumulo_instance_name
+ accumulo_root_password = module.config_files.accumulo_root_password
+
+ depends_on = [
+ module.upload_software,
+ module.config_files
+ ]
+}
+
+####################################################
+# Create the Route53 A record for the manager node #
+####################################################
+
+resource "aws_route53_record" "manager" {
+ count = var.create_route53_records ? 1 : 0
+ zone_id = data.aws_route53_zone.private_zone[0].zone_id
+ name = "manager-${var.accumulo_branch_name}-${terraform.workspace}.${data.aws_route53_zone.private_zone[0].name}"
+ type = "A"
+ ttl = "300"
+ records = [var.private_network ? local.manager_private_ip : local.manager_ip]
+}
+
+resource "aws_route53_record" "worker" {
+ count = var.create_route53_records ? length(local.worker_ips) : 0
+ zone_id = data.aws_route53_zone.private_zone[0].zone_id
+ name = "worker${count.index}-${var.accumulo_branch_name}-${terraform.workspace}.${data.aws_route53_zone.private_zone[0].name}"
+ type = "A"
+ ttl = "300"
+ records = [var.private_network ? local.worker_private_ips[count.index] : local.worker_ips[count.index]]
+}
+
+##############################
+# Outputs #
+##############################
+output "manager_ip" {
+ value = var.private_network ? local.manager_private_ip : local.manager_ip
+ description = "The IP address of the manager instance."
+}
+
+output "worker_ips" {
+ value = var.private_network ? local.worker_private_ips : local.worker_ips
+ description = "The IP addresses of the worker instances."
+}
+
+output "accumulo_root_password" {
+ value = module.config_files.accumulo_root_password
+ description = "The supplied, or automatically generated Accumulo root user password."
+}
diff --git a/contrib/terraform-testing-infrastructure/aws/variables.tf b/contrib/terraform-testing-infrastructure/aws/variables.tf
new file mode 100644
index 0000000..b19c2a5
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/aws/variables.tf
@@ -0,0 +1,197 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+variable "instance_count" {
+ default = "2"
+ description = "The number of EC2 instances to create"
+ nullable = false
+}
+
+variable "instance_type" {
+ default = "m5.2xlarge"
+ description = "The type of EC2 instances to create"
+ nullable = false
+}
+
+variable "root_volume_gb" {
+ default = "300"
+ description = "The size, in GB, of the EC2 instance root volume"
+ nullable = false
+}
+
+variable "software_root" {
+ default = "/opt/accumulo-testing"
+ description = "The full directory root where software will be installed"
+ nullable = false
+}
+
+variable "security_group" {
+ description = "The Security Group to use when creating AWS objects"
+ nullable = false
+}
+
+variable "us_east_1b_subnet" {
+ description = "The AWS subnet id for the us-east-1b subnet"
+ nullable = false
+}
+
+variable "us_east_1e_subnet" {
+ description = "The AWS subnet id for the us-east-1e subnet"
+ nullable = false
+}
+
+variable "route53_zone" {
+ description = "The name of the Route53 zone in which to create DNS addresses"
+ nullable = false
+}
+
+variable "create_route53_records" {
+ default = false
+ description = "Indicates whether or not route53 records will be created"
+ type = bool
+ nullable = false
+}
+
+variable "private_network" {
+ default = false
+ description = "Indicates whether or not the user is on a private network and access to hosts should be through the private IP addresses rather than public ones."
+ type = bool
+ nullable = false
+}
+
+variable "ami_owner" {
+ description = "The id of the AMI owner"
+ nullable = false
+}
+
+variable "ami_name_pattern" {
+ description = "The pattern of the name of the AMI to use"
+ nullable = false
+}
+
+variable "authorized_ssh_keys" {
+ description = "List of SSH keys for the developers that will log into the cluster"
+ type = list(string)
+ nullable = false
+}
+
+variable "authorized_ssh_key_files" {
+ default = []
+ description = "List of SSH public key files for the developers that will log into the cluster"
+ type = list(string)
+ nullable = false
+}
+
+variable "accumulo_instance_name" {
+ default = "accumulo-testing"
+ type = string
+ description = "The accumulo instance name."
+ nullable = false
+}
+
+variable "accumulo_root_password" {
+ default = null
+ type = string
+ description = "The password for the accumulo root user. A randomly generated password will be used if none is specified here."
+ nullable = true
+}
+
+variable "zookeeper_dir" {
+ default = "/data/zookeeper"
+ description = "The ZooKeeper directory on each EC2 node"
+ nullable = false
+}
+
+variable "hadoop_dir" {
+ default = "/data/hadoop"
+ description = "The Hadoop directory on each EC2 node"
+ nullable = false
+}
+
+variable "accumulo_dir" {
+ default = "/data/accumulo"
+ description = "The Accumulo directory on each EC2 node"
+ nullable = false
+}
+
+variable "maven_version" {
+ default = "3.8.4"
+ description = "The version of Maven to download and install"
+ nullable = false
+}
+
+variable "zookeeper_version" {
+ default = "3.5.9"
+ description = "The version of ZooKeeper to download and install"
+ nullable = false
+}
+
+variable "hadoop_version" {
+ default = "3.3.1"
+ description = "The version of Hadoop to download and install"
+ nullable = false
+}
+
+variable "accumulo_version" {
+ default = "2.1.0-SNAPSHOT"
+ description = "The branch of Accumulo to download and install"
+ nullable = false
+}
+
+variable "accumulo_repo" {
+ default = "https://github.com/apache/accumulo.git"
+ description = "URL of the Accumulo git repo"
+ nullable = false
+}
+
+variable "accumulo_branch_name" {
+ default = "main"
+ description = "The name of the branch to build and install"
+ nullable = false
+}
+
+variable "accumulo_testing_repo" {
+ default = "https://github.com/apache/accumulo-testing.git"
+ description = "URL of the Accumulo Testing git repo"
+ nullable = false
+}
+
+variable "accumulo_testing_branch_name" {
+ default = "main"
+ description = "The name of the branch to build and install"
+ nullable = false
+}
+
+variable "local_sources_dir" {
+ default = ""
+ description = "Directory on local machine that contains Maven, ZooKeeper or Hadoop binary distributions or Accumulo source tarball"
+ nullable = true
+}
+
+variable "optional_cloudinit_config" {
+ default = null
+ type = string
+ description = "An optional config block for the cloud-init script. If you set this, you should consider setting cloudinit_merge_type to handle merging with the default script as you need."
+ nullable = true
+}
+
+variable "cloudinit_merge_type" {
+ default = null
+ type = string
+ description = "Describes the merge behavior for overlapping config blocks in cloud-init."
+ nullable = true
+}
diff --git a/contrib/terraform-testing-infrastructure/azure/main.tf b/contrib/terraform-testing-infrastructure/azure/main.tf
new file mode 100644
index 0000000..4e11749
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/azure/main.tf
@@ -0,0 +1,402 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# To install Terraform download the appropriate version from https://www.terraform.io/downloads.html
+# and copy the binary to /usr/local/bin or some other location on your PATH.
+#
+# Run "terraform init" in this directory to download the plugins that Terraform will need to run
+# this plan.
+#
+# Run "terraform plan" to see the changes that would be made if you applied this plan.
+#
+# Run "terraform apply" to see the changes that would be made and to optionally apply the plan.
+#
+#
+# This Terraform configuration does the following:
+#
+# 1. Create a virtual network, subnet, and network security group in Azure.
+#
+# 2. Create a NIC (attached to the security group) and VM in Azure for the manager.
+#
+# 3. Create a NIC (attached to the security group) and VM in Azure for each worker node.
+#
+# 4. VMs are created with a customized cloud-init script, and we wait for this script to complete.
+#
+# 5. Upload config files and installs the software on each node.
+#
+
+################################
+# Core Terraform Configuration #
+################################
+
+terraform {
+ required_version = ">= 1.1.0"
+ required_providers {
+ azurerm = {
+ source = "hashicorp/azurerm"
+ version = "~> 2.91.0"
+ }
+ }
+ backend "azurerm" {
+ resource_group_name = "accumulo-testing-tf-state"
+ storage_account_name = "accumulotesttfsteast"
+ container_name = "accumulo-testing-tf-state"
+ key = "accumulo-testing/terraform.tfstate"
+ }
+}
+
+provider "azurerm" {
+ features {}
+}
+
+locals {
+ os_type = can(regex("^.*[Uu]buntu.*$", var.vm_image.offer)) ? "ubuntu" : "centos"
+
+ ssh_keys = toset(concat(var.authorized_ssh_keys, [for k in var.authorized_ssh_key_files : file(k)]))
+
+ # Save the public/private IP addresses of the VMs to pass to sub-modules.
+ manager_ip = azurerm_linux_virtual_machine.manager.public_ip_address
+ worker_ips = azurerm_linux_virtual_machine.workers[*].public_ip_address
+ manager_private_ip = azurerm_linux_virtual_machine.manager.private_ip_address
+ worker_private_ips = azurerm_linux_virtual_machine.workers[*].private_ip_address
+
+ # This script is run on all node to ensure a "ready" state.
+ # Ready means ready to continue provisioning.
+ ready_script = [
+ "echo Waiting for cloud init to complete...",
+ "sudo cloud-init status --wait > /dev/null",
+ "sudo cloud-init status --long"
+ ]
+}
+
+# Place all resources in a resource group
+resource "azurerm_resource_group" "rg" {
+ count = var.create_resource_group ? 1 : 0
+ name = var.resource_group_name
+ location = var.location
+}
+
+#########################
+# Network Configuration #
+#########################
+
+# Creates a virtual network for use by this cluster.
+resource "azurerm_virtual_network" "accumulo_vnet" {
+ name = "${var.resource_name_prefix}-vnet"
+ resource_group_name = azurerm_resource_group.rg[0].name
+ location = azurerm_resource_group.rg[0].location
+ address_space = var.network_address_space
+}
+
+# Create a subnet for this cluster. Give storage a service endpoint
+# so that we'll be able to create an NFS share.
+resource "azurerm_subnet" "internal" {
+ name = "${var.resource_name_prefix}-subnet"
+ resource_group_name = azurerm_resource_group.rg[0].name
+ virtual_network_name = azurerm_virtual_network.accumulo_vnet.name
+ address_prefixes = var.subnet_address_prefixes
+}
+
+# Create a Network Security Group that only allows SSH (22)
+# traffic from the internet and denies everything else.
+resource "azurerm_network_security_group" "nsg" {
+ name = "${var.resource_name_prefix}-nsg"
+ location = azurerm_resource_group.rg[0].location
+ resource_group_name = azurerm_resource_group.rg[0].name
+
+ security_rule {
+ name = "allow-ssh"
+ priority = 1001
+ direction = "Inbound"
+ access = "Allow"
+ protocol = "Tcp"
+ source_port_range = "*"
+ destination_port_range = "22"
+ source_address_prefix = "*"
+ destination_address_prefix = "*"
+ }
+}
+
+####################
+# VM Configuration #
+####################
+
+# Generate cloud-init data to use when creating nodes.
+module "cloud_init_config" {
+ source = "../modules/cloud-init-config"
+
+ software_root = var.software_root
+ zookeeper_dir = var.zookeeper_dir
+ hadoop_dir = var.hadoop_dir
+ accumulo_dir = var.accumulo_dir
+ maven_version = var.maven_version
+ zookeeper_version = var.zookeeper_version
+ hadoop_version = var.hadoop_version
+ accumulo_branch_name = var.accumulo_branch_name
+ accumulo_version = var.accumulo_version
+ authorized_ssh_keys = local.ssh_keys[*]
+ os_type = local.os_type
+
+ optional_cloudinit_config = var.optional_cloudinit_config
+ cloudinit_merge_type = var.cloudinit_merge_type
+}
+
+# Create a static public IP address for the manager node.
+resource "azurerm_public_ip" "manager" {
+ name = "${var.resource_name_prefix}-manager-ip"
+ resource_group_name = azurerm_resource_group.rg[0].name
+ location = azurerm_resource_group.rg[0].location
+ allocation_method = "Static"
+}
+
+# Create a NIC for the manager node.
+resource "azurerm_network_interface" "manager" {
+ name = "${var.resource_name_prefix}-manager-nic"
+ location = azurerm_resource_group.rg[0].location
+ resource_group_name = azurerm_resource_group.rg[0].name
+
+ enable_accelerated_networking = true
+
+ ip_configuration {
+ name = "internal"
+ subnet_id = azurerm_subnet.internal.id
+ public_ip_address_id = azurerm_public_ip.manager.id
+ private_ip_address_allocation = "Dynamic"
+ }
+}
+
+# Associate the manager node's NIC with the network security group.
+resource "azurerm_network_interface_security_group_association" "manager" {
+ network_interface_id = azurerm_network_interface.manager.id
+ network_security_group_id = azurerm_network_security_group.nsg.id
+}
+
+# Create a static public IP for each of the worker nodes.
+resource "azurerm_public_ip" "workers" {
+ count = var.worker_count
+ name = "${var.resource_name_prefix}-worker${count.index}-ip"
+ resource_group_name = azurerm_resource_group.rg[0].name
+ location = azurerm_resource_group.rg[0].location
+ allocation_method = "Static"
+}
+
+# Create a NIC for each of the worker nodes.
+resource "azurerm_network_interface" "workers" {
+ count = var.worker_count
+ name = "${var.resource_name_prefix}-worker${count.index}-nic"
+ location = azurerm_resource_group.rg[0].location
+ resource_group_name = azurerm_resource_group.rg[0].name
+
+ enable_accelerated_networking = true
+
+ ip_configuration {
+ name = "internal"
+ subnet_id = azurerm_subnet.internal.id
+ public_ip_address_id = azurerm_public_ip.workers[count.index].id
+ private_ip_address_allocation = "Dynamic"
+ }
+}
+
+# Associate each of the worker nodes' NIC with the network security group.
+resource "azurerm_network_interface_security_group_association" "workers" {
+ count = var.worker_count
+ network_interface_id = azurerm_network_interface.workers[count.index].id
+ network_security_group_id = azurerm_network_security_group.nsg.id
+}
+
+# Create the manager VM.
+# Add a login user that can SSH to the VM using the first supplied SSH key.
+resource "azurerm_linux_virtual_machine" "manager" {
+ name = "${var.resource_name_prefix}-manager"
+ resource_group_name = azurerm_resource_group.rg[0].name
+ location = azurerm_resource_group.rg[0].location
+ size = var.vm_sku
+ computer_name = "manager"
+ admin_username = var.admin_username
+ custom_data = base64encode(module.cloud_init_config.cloud_init_data)
+
+ disable_password_authentication = true
+
+ network_interface_ids = [
+ azurerm_network_interface.manager.id,
+ ]
+
+ dynamic "admin_ssh_key" {
+ for_each = local.ssh_keys
+ content {
+ username = var.admin_username
+ public_key = admin_ssh_key.value
+ }
+ }
+
+ os_disk {
+ storage_account_type = var.os_disk_type
+ caching = var.os_disk_caching
+ disk_size_gb = var.os_disk_size_gb
+ }
+
+ source_image_reference {
+ publisher = var.vm_image.publisher
+ offer = var.vm_image.offer
+ sku = var.vm_image.sku
+ version = var.vm_image.version
+ }
+
+ provisioner "remote-exec" {
+ inline = local.ready_script
+ connection {
+ type = "ssh"
+ user = self.admin_username
+ host = self.public_ip_address
+ }
+ }
+}
+
+# Create the worker VMs.
+# Add a login user that can SSH to the VM using the first supplied SSH key.
+resource "azurerm_linux_virtual_machine" "workers" {
+ count = var.worker_count
+ name = "${var.resource_name_prefix}-worker${count.index}"
+ resource_group_name = azurerm_resource_group.rg[0].name
+ location = azurerm_resource_group.rg[0].location
+ size = var.vm_sku
+ computer_name = "worker${count.index}"
+ admin_username = var.admin_username
+ custom_data = base64encode(module.cloud_init_config.cloud_init_data)
+
+ disable_password_authentication = true
+
+ network_interface_ids = [
+ azurerm_network_interface.workers[count.index].id
+ ]
+
+ dynamic "admin_ssh_key" {
+ for_each = local.ssh_keys
+ content {
+ username = var.admin_username
+ public_key = admin_ssh_key.value
+ }
+ }
+
+ os_disk {
+ storage_account_type = var.os_disk_type
+ caching = var.os_disk_caching
+ disk_size_gb = var.os_disk_size_gb
+ }
+
+ source_image_reference {
+ publisher = var.vm_image.publisher
+ offer = var.vm_image.offer
+ sku = var.vm_image.sku
+ version = var.vm_image.version
+ }
+
+ provisioner "remote-exec" {
+ inline = local.ready_script
+ connection {
+ type = "ssh"
+ user = self.admin_username
+ host = self.public_ip_address
+ }
+ }
+}
+
+##############################
+# Cluster Configuration #
+##############################
+
+#
+# This section creates the ZooKeeper, Hadoop, and Accumulo configuration files
+# using templates in the templates directory and IP addresses from the EC2
+# nodes that we created above and variables.
+#
+module "config_files" {
+ source = "../modules/config-files"
+
+ os_type = local.os_type
+
+ software_root = var.software_root
+ upload_host = local.manager_ip
+ manager_ip = local.manager_private_ip
+ worker_ips = local.worker_private_ips
+
+ zookeeper_dir = var.zookeeper_dir
+ hadoop_dir = var.hadoop_dir
+ accumulo_dir = var.accumulo_dir
+
+ maven_version = var.maven_version
+ zookeeper_version = var.zookeeper_version
+ hadoop_version = var.hadoop_version
+ accumulo_version = var.accumulo_version
+
+ accumulo_repo = var.accumulo_repo
+ accumulo_branch_name = var.accumulo_branch_name
+ accumulo_testing_repo = var.accumulo_testing_repo
+ accumulo_testing_branch_name = var.accumulo_testing_branch_name
+
+ accumulo_instance_name = var.accumulo_instance_name
+ accumulo_root_password = var.accumulo_root_password
+}
+
+#
+# This module uploads any local tarballs to the manager VM and
+# stores them on the NFS share.
+#
+module "upload_software" {
+ source = "../modules/upload-software"
+
+ local_sources_dir = var.local_sources_dir
+ upload_dir = var.software_root
+ upload_host = local.manager_ip
+}
+
+#
+# This section performs final configuration of the Accumulo cluster.
+#
+module "configure_nodes" {
+ source = "../modules/configure-nodes"
+
+ software_root = var.software_root
+ upload_host = local.manager_ip
+
+ accumulo_instance_name = module.config_files.accumulo_instance_name
+ accumulo_root_password = module.config_files.accumulo_root_password
+
+ depends_on = [
+ module.upload_software,
+ module.config_files
+ ]
+}
+
+##############################
+# Outputs #
+##############################
+output "manager_ip" {
+ value = local.manager_ip
+ description = "The public IP address of the manager VM."
+}
+
+output "worker_ips" {
+ value = local.worker_ips
+ description = "The public IP addresses of the worker VMs."
+}
+
+output "accumulo_root_password" {
+ value = module.config_files.accumulo_root_password
+ description = "The user-supplied or automatically generated Accumulo root user password."
+}
diff --git a/contrib/terraform-testing-infrastructure/azure/variables.tf b/contrib/terraform-testing-infrastructure/azure/variables.tf
new file mode 100644
index 0000000..9edf84b
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/azure/variables.tf
@@ -0,0 +1,246 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+variable "create_resource_group" {
+ default = true
+ type = bool
+ description = "Indicates whether or not resource_group_name should be created or is an existing resource group."
+ nullable = false
+}
+
+variable "resource_name_prefix" {
+ default = "accumulo-testing"
+ type = string
+ description = "A prefix applied to all resource names created by this template."
+ nullable = false
+}
+
+variable "resource_group_name" {
+ default = ""
+ type = string
+ description = "The name of the resource group to create or reuse. If not specified, the name is generated based on resource_name_prefix."
+ nullable = false
+}
+
+variable "location" {
+ type = string
+ description = "The Azure region where resources are to be created. If an existing resource group is specified, this value is ignored and the resource group's location is used."
+ nullable = false
+}
+
+variable "network_address_space" {
+ default = ["10.0.0.0/16"]
+ type = list(string)
+ description = "The network address space to use for the virtual network."
+ nullable = false
+}
+
+variable "subnet_address_prefixes" {
+ default = ["10.0.2.0/24"]
+ type = list(string)
+ description = "The subnet address prefixes to use for the accumulo testing subnet."
+ nullable = false
+}
+
+variable "worker_count" {
+ default = 1
+ type = number
+ description = "The number of worker VMs to create"
+ nullable = false
+ validation {
+ condition = var.worker_count > 0
+ error_message = "The number of VMs must be at least 1."
+ }
+}
+
+variable "vm_sku" {
+ default = "Standard_D8s_v4"
+ description = "The SKU of Azure VMs to create"
+ nullable = false
+}
+
+variable "admin_username" {
+ default = "azureuser"
+ type = string
+ description = "The username of the admin user, that can be authenticated with the first public ssh key."
+ nullable = false
+}
+
+variable "vm_image" {
+ default = {
+ "publisher" = "Canonical"
+ "offer" = "0001-com-ubuntu-server-focal"
+ "sku" = "20_04-lts-gen2"
+ "version" = "latest"
+ }
+ type = object({
+ publisher = string
+ offer = string
+ sku = string
+ version = string
+ })
+}
+
+variable "os_disk_size_gb" {
+ default = 300
+ type = number
+ description = "The size, in GB, of the OS disk"
+ nullable = false
+ validation {
+ condition = var.os_disk_size_gb >= 30
+ error_message = "The OS disk size must be >= 30GB."
+ }
+}
+
+variable "os_disk_type" {
+ default = "Standard_LRS"
+ type = string
+ description = "The disk type to use for OS disks. Possible values are Standard_LRS, StandardSSD_LRS, and Premium_LRS."
+ validation {
+ condition = contains(["Standard_LRS", "StandardSSD_LRS", "Premium_LRS"], var.os_disk_type)
+ error_message = "The value of os_disk_type must be one of Standard_LRS, StandardSSD_LRS, or Premium_LRS."
+ }
+}
+
+variable "os_disk_caching" {
+ default = "ReadOnly"
+ type = string
+ description = "The type of caching to use for the OS disk. Possible values are None, ReadOnly, and ReadWrite."
+ validation {
+ condition = contains(["None", "ReadOnly", "ReadWrite"], var.os_disk_caching)
+ error_message = "The value of os_disk_caching must be one of None, ReadOnly, or ReadWrite."
+ }
+}
+
+variable "software_root" {
+ default = "/opt/accumulo-testing"
+ description = "The full directory root where software will be installed"
+ nullable = false
+}
+
+variable "authorized_ssh_keys" {
+ description = "List of SSH keys for the developers that will log into the cluster"
+ type = list(string)
+ nullable = false
+}
+
+variable "authorized_ssh_key_files" {
+ default = []
+ description = "List of SSH public key files for the developers that will log into the cluster"
+ type = list(string)
+ nullable = false
+}
+
+variable "accumulo_instance_name" {
+ default = "accumulo-testing"
+ type = string
+ description = "The accumulo instance name."
+ nullable = false
+}
+
+variable "accumulo_root_password" {
+ default = null
+ type = string
+ description = "The password for the accumulo root user. A randomly generated password will be used if none is specified here."
+ nullable = true
+}
+
+variable "zookeeper_dir" {
+ default = "/data/zookeeper"
+ description = "The ZooKeeper directory on each node"
+ nullable = false
+}
+
+variable "hadoop_dir" {
+ default = "/data/hadoop"
+ description = "The Hadoop directory on each node"
+ nullable = false
+}
+
+variable "accumulo_dir" {
+ default = "/data/accumulo"
+ description = "The Accumulo directory on each node"
+ nullable = false
+}
+
+variable "maven_version" {
+ default = "3.8.4"
+ description = "The version of Maven to download and install"
+ nullable = false
+}
+
+variable "zookeeper_version" {
+ default = "3.5.9"
+ description = "The version of ZooKeeper to download and install"
+ nullable = false
+}
+
+variable "hadoop_version" {
+ default = "3.3.1"
+ description = "The version of Hadoop to download and install"
+ nullable = false
+}
+
+variable "accumulo_version" {
+ default = "2.1.0-SNAPSHOT"
+ description = "The branch of Accumulo to download and install"
+ nullable = false
+}
+
+variable "accumulo_repo" {
+ default = "https://github.com/apache/accumulo.git"
+ description = "URL of the Accumulo git repo"
+ nullable = false
+}
+
+variable "accumulo_branch_name" {
+ default = "main"
+ description = "The name of the branch to build and install"
+ nullable = false
+}
+
+variable "accumulo_testing_repo" {
+ default = "https://github.com/apache/accumulo-testing.git"
+ description = "URL of the Accumulo Testing git repo"
+ nullable = false
+}
+
+variable "accumulo_testing_branch_name" {
+ default = "main"
+ description = "The name of the branch to build and install"
+ nullable = false
+}
+
+variable "local_sources_dir" {
+ default = ""
+ description = "Directory on local machine that contains Maven, ZooKeeper or Hadoop binary distributions or Accumulo source tarball"
+ nullable = true
+}
+
+variable "optional_cloudinit_config" {
+ default = null
+ type = string
+ description = "An optional config block for the cloud-init script. If you set this, you should consider setting cloudinit_merge_type to handle merging with the default script as you need."
+ nullable = true
+}
+
+variable "cloudinit_merge_type" {
+ default = null
+ type = string
+ description = "Describes the merge behavior for overlapping config blocks in cloud-init."
+ nullable = true
+}
diff --git a/contrib/terraform-testing-infrastructure/modules/cloud-init-config/files/update-hosts-genders.sh b/contrib/terraform-testing-infrastructure/modules/cloud-init-config/files/update-hosts-genders.sh
new file mode 100644
index 0000000..84edf6f
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/cloud-init-config/files/update-hosts-genders.sh
@@ -0,0 +1,68 @@
+#! /usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -eo pipefail
+
+if [ $# -ne 2 ]; then
+ echo "usage: $0 additional_hosts_file additional_genders_file" >&2
+ exit 1
+fi
+
+HOSTS_ADDITIONS=$1
+GENDERS_ADDITIONS=$2
+
+begin_hosts_marker="##### BEGIN GENERATED HOSTS #####"
+end_hosts_marker="##### END GENERATED HOSTS #####"
+begin_genders_marker="##### BEGIN GENERATED GENDERS #####"
+end_genders_marker="##### END GENERATED GENDERS #####"
+
+# Update the hosts file locally
+# Wrap the supplied host additions with markers that we'll use to strip it back out.
+TMPHOSTS=/tmp/hosts$$
+cat > $TMPHOSTS <<EOF
+$begin_hosts_marker
+##### DO NOT EDIT THIS SECTION #####
+$(<"$HOSTS_ADDITIONS")
+$end_hosts_marker
+EOF
+# Strip out any previously applied hosts additiona, and then tack the new ones on to the end of /etc/hosts.
+sudo sed -ri '/^'"$begin_hosts_marker"'$/,/^'"$end_hosts_marker"'$/d' /etc/hosts
+cat "$TMPHOSTS" | sudo tee -a /etc/hosts > /dev/null
+
+# Update the genders file locally
+TMPGENDERS=/tmp/genders$$
+cat > $TMPGENDERS <<EOF
+$begin_genders_marker
+$(<"$GENDERS_ADDITIONS")
+$end_genders_marker
+EOF
+[[ -f /etc/genders ]] && sudo sed -ri '/^'"$begin_genders_marker"'$/,/^'"$end_genders_marker"'$/d' /etc/genders
+cat "$TMPGENDERS" | sudo tee -a /etc/genders > /dev/null
+echo "Check genders file validity..."
+nodeattr -k
+
+# Now copy hosts updates to the workers and apply
+pdcp -g worker $TMPHOSTS $TMPHOSTS
+pdsh -S -g worker 'sudo sed -ri '"'"'/^'"$begin_hosts_marker"'$/,/^'"$end_hosts_marker"'$/d'"'"' /etc/hosts'
+pdsh -S -g worker 'cat '$TMPHOSTS' | sudo tee -a /etc/hosts > /dev/null && rm -f $TMPHOSTS'
+rm -f $TMPHOSTS
+
+# Copy genders updates to the workers and apply
+pdcp -g worker $TMPGENDERS $TMPGENDERS
+pdsh -S -g worker "sudo cp $TMPGENDERS /etc/genders && rm -f $TMPGENDERS"
+rm -f $TMPGENDERS
diff --git a/contrib/terraform-testing-infrastructure/modules/cloud-init-config/main.tf b/contrib/terraform-testing-infrastructure/modules/cloud-init-config/main.tf
new file mode 100644
index 0000000..1de3a15
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/cloud-init-config/main.tf
@@ -0,0 +1,101 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+variable "software_root" {}
+variable "zookeeper_dir" {}
+variable "hadoop_dir" {}
+variable "accumulo_dir" {}
+variable "maven_version" {}
+variable "zookeeper_version" {}
+variable "hadoop_version" {}
+variable "accumulo_branch_name" {}
+variable "accumulo_version" {}
+variable "authorized_ssh_keys" {}
+variable "cloudinit_merge_type" {
+ default = "dict(recurse_array,no_replace)+list(append)"
+ nullable = false
+}
+variable "optional_cloudinit_config" {
+ default = ""
+ nullable = false
+}
+variable "os_type" {
+ default = "centos"
+ type = string
+ nullable = false
+ validation {
+ condition = contains(["centos", "ubuntu"], var.os_type)
+ error_message = "The value of os_type must be either 'centos' or 'ubuntu'."
+ }
+}
+
+#####################
+# Create Hadoop Key #
+#####################
+resource "tls_private_key" "hadoop" {
+ algorithm = "RSA"
+ rsa_bits = "4096"
+}
+
+################################
+# Generate Cloud Init Template #
+################################
+locals {
+ ssh_keys = concat(var.authorized_ssh_keys, tls_private_key.hadoop.public_key_openssh[*])
+ cloud_init_script = templatefile("${path.module}/templates/cloud-init.tftpl", {
+ files_path = "${path.module}/files"
+ software_root = var.software_root
+ zookeeper_dir = var.zookeeper_dir
+ hadoop_dir = var.hadoop_dir
+ accumulo_dir = var.accumulo_dir
+ maven_version = var.maven_version
+ zookeeper_version = var.zookeeper_version
+ hadoop_version = var.hadoop_version
+ accumulo_branch_name = var.accumulo_branch_name
+ accumulo_version = var.accumulo_version
+ authorized_ssh_keys = local.ssh_keys[*]
+ os_type = var.os_type
+ hadoop_public_key = indent(6, tls_private_key.hadoop.public_key_openssh)
+ hadoop_private_key = indent(6, tls_private_key.hadoop.private_key_pem)
+ })
+}
+
+data "cloudinit_config" "cfg" {
+ gzip = false
+ base64_encode = false
+ part {
+ filename = "init.cfg"
+ content_type = "text/cloud-config"
+ content = local.cloud_init_script
+ }
+
+ # Allow for a user-specified cloud-init script to be passed in.
+ # This will always be included, but if it's empty then cloud-init
+ # will ignore it.
+ part {
+ filename = "userdefined.cfg"
+ content_type = "text/cloud-config"
+ merge_type = var.cloudinit_merge_type
+ content = var.optional_cloudinit_config
+ }
+}
+
+output "cloud_init_data" {
+ value = data.cloudinit_config.cfg.rendered
+}
+
+
diff --git a/contrib/terraform-testing-infrastructure/modules/cloud-init-config/templates/cloud-init.tftpl b/contrib/terraform-testing-infrastructure/modules/cloud-init-config/templates/cloud-init.tftpl
new file mode 100644
index 0000000..30cc806
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/cloud-init-config/templates/cloud-init.tftpl
@@ -0,0 +1,151 @@
+#cloud-config
+#
+# Don't log key information
+#
+no_ssh_fingerprints: true
+# Create the hadoop and docker groups
+groups:
+ - hadoop
+ - docker
+# Add default auto created user to docker group
+system_info:
+ default_user:
+ groups: [docker]
+# Create users
+users:
+ - default
+ - name: hadoop
+ homedir: /home/hadoop
+ no_create_home: false
+ primary_group: hadoop
+ groups: docker
+ shell: /bin/bash
+ sudo: ALL=(ALL) NOPASSWD:ALL
+ #
+ # NOTE: The ssh_authorized_keys section of the hadoop user configuration should contain
+ # the public key for every developer that is going to log into the nodes. This
+ # allows the developer to log into the nodes using the command: ssh hadoop@<ip>
+ #
+ ssh_authorized_keys:
+%{ for key in authorized_ssh_keys ~}
+ - ${key}
+%{ endfor ~}
+
+%{ if os_type == "centos" ~}
+yum_repos:
+ docker:
+ name: Docker CE Stable - $basearch
+ baseurl: https://download.docker.com/linux/centos/$releasever/$basearch/stable
+ enabled: true
+ gpgcheck: true
+ gpgkey: https://download.docker.com/linux/centos/gpg
+%{ endif ~}
+%{ if os_type == "ubuntu" ~}
+apt:
+ sources:
+ docker.list:
+ source: deb [arch=amd64] https://download.docker.com/linux/ubuntu $RELEASE stable
+ keyid: 9DC858229FC7DD38854AE2D88D81803C0EBFCD88
+%{ endif ~}
+
+#
+# yum/apt install the following packages
+#
+packages:
+%{ if os_type == "centos" ~}
+ - epel-release
+ - yum-utils
+ - gcc-c++
+ - java-11-openjdk-devel
+ - git
+%{ endif ~}
+%{ if os_type == "ubuntu" ~}
+ - net-tools
+ - g++
+ - openjdk-11-jdk-headless
+ - pdsh
+ - make
+%{ endif ~}
+ - docker-ce
+ - docker-ce-cli
+ - containerd.io
+ - wget
+
+#
+# Make directories on each node
+#
+runcmd:
+ - mkdir -p ${software_root} ${zookeeper_dir} ${hadoop_dir} ${accumulo_dir}
+ - chown hadoop.hadoop ${software_root} ${zookeeper_dir} ${hadoop_dir} ${accumulo_dir}
+ - systemctl enable docker
+ - systemctl start docker
+%{ if os_type == "ubuntu" ~}
+ # Use bash instead of dash for the default shell
+ - ln -s bash /bin/sh.bash
+ - mv /bin/sh.bash /bin/sh
+%{ endif ~}
+%{ if os_type == "centos" ~}
+ # This has to be done here vs in the packages section because
+ # we install the epel-release package there and can't update
+ # the yum repo in the middle to make it know about pdsh-mod-genders
+ - yum -y update
+ - yum -y install pdsh-mod-genders
+%{ endif ~}
+ - sysctl -w vm.swappiness=0
+ - sysctl -p
+ # Move Hadoop ssh files in place. We couldn't do this directly in write_files
+ # because some distros with cloud-init have the write_files module run before
+ # the users are created.
+ - mv /run/hadoop_ssh/* /home/hadoop/.ssh
+ - rm -rf /run/hadoop_ssh
+ - chown -R hadoop:hadoop /home/hadoop/.ssh
+
+#
+# Write files to the filesystem, will be copied into place
+# or invoked later
+#
+write_files:
+# Set up PDSH to skip strict host key checking. Also, on ubuntu, we need
+# to set ssh as the default method for the rcmd module.
+%{ if os_type == "ubuntu" ~}
+ - path: /etc/pdsh/rcmd_default
+ permissions: '0644'
+ content: |
+ ssh
+%{ endif ~}
+ - path: /etc/profile.d/pdsh.sh
+ permissions: '0755'
+ content: |
+ export PDSH_SSH_ARGS_APPEND="-o StrictHostKeyChecking=no"
+ # Increase open files limits for the Hadoop user
+ - path: /etc/security/limits.conf
+ append: true
+ content: |
+ hadoop soft nofile 4096
+ hadoop hard nofile 65535
+ # Set up files for Hadoop's home. Due to a cloud-init issue with some
+ # cloud provider images, the write_files module will run before users
+ # are created, so we cannot change ownership to hadoop here. We must
+ # do that as a runcmd, which we know runs later.
+ - path: /run/hadoop_ssh/config
+ defer: true
+ permissions: '0600'
+ content: |
+ Host *
+ Compression yes
+ StrictHostKeyChecking no
+ - path: /run/hadoop_ssh/id_rsa
+ defer: true
+ permissions: '0600'
+ content: |
+ ${hadoop_private_key}
+ - path: /run/hadoop_ssh/id_rsa.pub
+ defer: true
+ permissions: '0644'
+ content: |
+ ${hadoop_public_key}
+ # Install some utility scripts
+ - path: /usr/local/bin/update-hosts-genders.sh
+ permissions: '0755'
+ content: |
+ ${indent(6, file("${files_path}/update-hosts-genders.sh"))}
diff --git a/contrib/terraform-testing-infrastructure/modules/config-files/files/grafana_dashboards/accumulo-dashboard.json b/contrib/terraform-testing-infrastructure/modules/config-files/files/grafana_dashboards/accumulo-dashboard.json
new file mode 100644
index 0000000..a6c4254
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/config-files/files/grafana_dashboards/accumulo-dashboard.json
@@ -0,0 +1,2512 @@
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "id": 1,
+ "links": [],
+ "panels": [
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 20,
+ "panels": [],
+ "title": "Tablet Server Activity",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "InfluxDB",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 5,
+ "x": 0,
+ "y": 1
+ },
+ "hiddenSeries": false,
+ "id": 2,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.3.5",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "alias": "",
+ "groupBy": [
+ {
+ "params": [
+ "1s"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "process.name"
+ ],
+ "type": "tag"
+ },
+ {
+ "params": [
+ "previous"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "accumulo_thrift_execute",
+ "orderByTime": "ASC",
+ "policy": "default",
+ "queryType": "randomWalk",
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "count"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "last"
+ }
+ ]
+ ],
+ "tags": []
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Thrift Execution Times",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "InfluxDB",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 6,
+ "x": 5,
+ "y": 1
+ },
+ "hiddenSeries": false,
+ "id": 4,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.3.5",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "groupBy": [
+ {
+ "params": [
+ "1s"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "process.name"
+ ],
+ "type": "tag"
+ },
+ {
+ "params": [
+ "previous"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "accumulo_thrift_idle",
+ "orderByTime": "ASC",
+ "policy": "default",
+ "queryType": "randomWalk",
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "count"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "last"
+ }
+ ]
+ ],
+ "tags": []
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Thrift Idle Time",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "InfluxDB",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 6,
+ "x": 11,
+ "y": 1
+ },
+ "hiddenSeries": false,
+ "id": 6,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.3.5",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "groupBy": [
+ {
+ "params": [
+ "$__interval"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "0"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "accumulo_tserver_compactions_minc_queued",
+ "orderByTime": "ASC",
+ "policy": "default",
+ "queryType": "randomWalk",
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "count"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "mean"
+ }
+ ]
+ ],
+ "tags": []
+ },
+ {
+ "groupBy": [
+ {
+ "params": [
+ "$__interval"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "0"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "accumulo_tserver_compactions_minc_running",
+ "orderByTime": "ASC",
+ "policy": "default",
+ "refId": "B",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "count"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "mean"
+ }
+ ]
+ ],
+ "tags": []
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Queued and Running Minor Compactions",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "InfluxDB",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 6,
+ "x": 17,
+ "y": 1
+ },
+ "hiddenSeries": false,
+ "id": 7,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.3.5",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "groupBy": [
+ {
+ "params": [
+ "1s"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "previous"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "accumulo_tserver_majc_queued",
+ "orderByTime": "ASC",
+ "policy": "default",
+ "query": "SELECT mean(\"count\") FROM \"accumulo_tserver_compactions_majc_queued\" WHERE $timeFilter GROUP BY time($__interval) fill(0)",
+ "queryType": "randomWalk",
+ "rawQuery": false,
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "last"
+ }
+ ]
+ ],
+ "tags": []
+ },
+ {
+ "groupBy": [
+ {
+ "params": [
+ "1s"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "previous"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "accumulo_tserver_majc_running",
+ "orderByTime": "ASC",
+ "policy": "default",
+ "refId": "B",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "last"
+ }
+ ]
+ ],
+ "tags": []
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Queued and Running Major Compactions",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "InfluxDB",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 5,
+ "x": 0,
+ "y": 9
+ },
+ "hiddenSeries": false,
+ "id": 21,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.3.5",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "alias": "",
+ "groupBy": [
+ {
+ "params": [
+ "1s"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "process.name"
+ ],
+ "type": "tag"
+ },
+ {
+ "params": [
+ "previous"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "accumulo_tserver_ingest_mutations",
+ "orderByTime": "ASC",
+ "policy": "default",
+ "queryType": "randomWalk",
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "last"
+ },
+ {
+ "params": [
+ "1s"
+ ],
+ "type": "non_negative_derivative"
+ }
+ ]
+ ],
+ "tags": []
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "TServer Ingest Mutations",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "InfluxDB",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 5,
+ "x": 5,
+ "y": 9
+ },
+ "hiddenSeries": false,
+ "id": 22,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.3.5",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "alias": "",
+ "groupBy": [
+ {
+ "params": [
+ "1s"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "process.name"
+ ],
+ "type": "tag"
+ },
+ {
+ "params": [
+ "previous"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "accumulo_tserver_ingest_bytes",
+ "orderByTime": "ASC",
+ "policy": "default",
+ "queryType": "randomWalk",
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "last"
+ },
+ {
+ "params": [
+ "1s"
+ ],
+ "type": "non_negative_derivative"
+ }
+ ]
+ ],
+ "tags": []
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "TServer Ingest Bytes",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "InfluxDB",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 5,
+ "x": 10,
+ "y": 9
+ },
+ "hiddenSeries": false,
+ "id": 23,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.3.5",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "alias": "",
+ "groupBy": [
+ {
+ "params": [
+ "1s"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "process.name"
+ ],
+ "type": "tag"
+ },
+ {
+ "params": [
+ "linear"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "accumulo_tserver_queries",
+ "orderByTime": "ASC",
+ "policy": "default",
+ "queryType": "randomWalk",
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "last"
+ },
+ {
+ "params": [
+ "1s"
+ ],
+ "type": "non_negative_derivative"
+ }
+ ]
+ ],
+ "tags": []
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "TServer Lookup Count",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "InfluxDB",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 5,
+ "x": 15,
+ "y": 9
+ },
+ "hiddenSeries": false,
+ "id": 24,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.3.5",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "alias": "",
+ "groupBy": [
+ {
+ "params": [
+ "1s"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "process.name"
+ ],
+ "type": "tag"
+ },
+ {
+ "params": [
+ "previous"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "accumulo_tserver_scan_scanned_entries",
+ "orderByTime": "ASC",
+ "policy": "default",
+ "queryType": "randomWalk",
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "last"
+ },
+ {
+ "params": [
+ "1s"
+ ],
+ "type": "non_negative_derivative"
+ }
+ ]
+ ],
+ "tags": []
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "TServer Lookup Scanned K/V",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "InfluxDB",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 5,
+ "x": 0,
+ "y": 17
+ },
+ "hiddenSeries": false,
+ "id": 25,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.3.5",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "alias": "",
+ "groupBy": [
+ {
+ "params": [
+ "1s"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "process.name"
+ ],
+ "type": "tag"
+ },
+ {
+ "params": [
+ "previous"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "accumulo_tserver_scan_results",
+ "orderByTime": "ASC",
+ "policy": "default",
+ "queryType": "randomWalk",
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "last"
+ },
+ {
+ "params": [
+ "1s"
+ ],
+ "type": "non_negative_derivative"
+ }
+ ]
+ ],
+ "tags": []
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "TServer Lookup Scan Results",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "InfluxDB",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 5,
+ "x": 5,
+ "y": 17
+ },
+ "hiddenSeries": false,
+ "id": 26,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.3.5",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "alias": "",
+ "groupBy": [
+ {
+ "params": [
+ "1s"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "process.name"
+ ],
+ "type": "tag"
+ },
+ {
+ "params": [
+ "previous"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "accumulo_tserver_scan_results_bytes",
+ "orderByTime": "ASC",
+ "policy": "default",
+ "queryType": "randomWalk",
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "last"
+ },
+ {
+ "params": [
+ "1s"
+ ],
+ "type": "non_negative_derivative"
+ }
+ ]
+ ],
+ "tags": []
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "TServer Lookup Scan Result Bytes",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 25
+ },
+ "id": 12,
+ "panels": [],
+ "title": "Garbage Collection Activity",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "InfluxDB",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 5,
+ "x": 0,
+ "y": 26
+ },
+ "hiddenSeries": false,
+ "id": 9,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.3.5",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "groupBy": [
+ {
+ "params": [
+ "$__interval"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "previous"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "accumulo_gc_candidates",
+ "orderByTime": "ASC",
+ "policy": "default",
+ "queryType": "randomWalk",
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "mean"
+ }
+ ]
+ ],
+ "tags": []
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "RFile Delete Candidate",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "InfluxDB",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 5,
+ "y": 26
+ },
+ "hiddenSeries": false,
+ "id": 10,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.3.5",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "groupBy": [
+ {
+ "params": [
+ "$__interval"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "previous"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "accumulo_gc_deleted",
+ "orderByTime": "ASC",
+ "policy": "default",
+ "queryType": "randomWalk",
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "mean"
+ }
+ ]
+ ],
+ "tags": []
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "RFiles Deleted",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "InfluxDB",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 11,
+ "y": 26
+ },
+ "hiddenSeries": false,
+ "id": 14,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.3.5",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "groupBy": [
+ {
+ "params": [
+ "$__interval"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "0"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "accumulo_gc_in_use",
+ "orderByTime": "ASC",
+ "policy": "default",
+ "queryType": "randomWalk",
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "mean"
+ }
+ ]
+ ],
+ "tags": []
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "RFiles In Use",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "InfluxDB",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 17,
+ "y": 26
+ },
+ "hiddenSeries": false,
+ "id": 15,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.3.5",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "groupBy": [
+ {
+ "params": [
+ "$__interval"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "0"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "accumulo_gc_errors",
+ "orderByTime": "ASC",
+ "policy": "default",
+ "queryType": "randomWalk",
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "mean"
+ }
+ ]
+ ],
+ "tags": []
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "RFile GC Errors",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "InfluxDB",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 6,
+ "w": 5,
+ "x": 0,
+ "y": 33
+ },
+ "hiddenSeries": false,
+ "id": 13,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.3.5",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "groupBy": [
+ {
+ "params": [
+ "1s"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "previous"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "accumulo_gc_wal_candidates",
+ "orderByTime": "ASC",
+ "policy": "default",
+ "queryType": "randomWalk",
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "last"
+ }
+ ]
+ ],
+ "tags": []
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "WALog Delete Candidates",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "InfluxDB",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 5,
+ "y": 33
+ },
+ "hiddenSeries": false,
+ "id": 16,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.3.5",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "groupBy": [
+ {
+ "params": [
+ "$__interval"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "previous"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "accumulo_gc_wal_deleted",
+ "orderByTime": "ASC",
+ "policy": "default",
+ "queryType": "randomWalk",
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "mean"
+ }
+ ]
+ ],
+ "tags": []
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "WALogs Deleted",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "InfluxDB",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 11,
+ "y": 33
+ },
+ "hiddenSeries": false,
+ "id": 17,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.3.5",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "groupBy": [
+ {
+ "params": [
+ "$__interval"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "0"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "accumulo_gc_wal_in_use",
+ "orderByTime": "ASC",
+ "policy": "default",
+ "queryType": "randomWalk",
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "mean"
+ }
+ ]
+ ],
+ "tags": []
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "WALog In Use",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "InfluxDB",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 17,
+ "y": 33
+ },
+ "hiddenSeries": false,
+ "id": 18,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.3.5",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "groupBy": [
+ {
+ "params": [
+ "$__interval"
+ ],
+ "type": "time"
+ },
+ {
+ "params": [
+ "0"
+ ],
+ "type": "fill"
+ }
+ ],
+ "measurement": "accumulo_gc_wal_errors",
+ "orderByTime": "ASC",
+ "policy": "default",
+ "queryType": "randomWalk",
+ "refId": "A",
+ "resultFormat": "time_series",
+ "select": [
+ [
+ {
+ "params": [
+ "value"
+ ],
+ "type": "field"
+ },
+ {
+ "params": [],
+ "type": "mean"
+ }
+ ]
+ ],
+ "tags": []
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "WALog GC Errors",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "refresh": "1m",
+ "schemaVersion": 26,
+ "style": "dark",
+ "tags": [],
+ "templating": {
+ "list": []
+ },
+ "time": {
+ "from": "now-15m",
+ "to": "now"
+ },
+ "timepicker": {},
+ "timezone": "",
+ "title": "Accumulo Micrometer Test Dashboard",
+ "uid": "0govCeDnk",
+ "version": 8
+}
diff --git a/contrib/terraform-testing-infrastructure/modules/config-files/files/grafana_dashboards/accumulo-dashboard.yaml b/contrib/terraform-testing-infrastructure/modules/config-files/files/grafana_dashboards/accumulo-dashboard.yaml
new file mode 100644
index 0000000..72eb773
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/config-files/files/grafana_dashboards/accumulo-dashboard.yaml
@@ -0,0 +1,6 @@
+apiVersion: 1
+providers:
+ - name: Accumulo-Dashboard
+ type: file
+ options:
+ path: /etc/grafana/provisioning/dashboards/accumulo-dashboard.json
diff --git a/contrib/terraform-testing-infrastructure/modules/config-files/main.tf b/contrib/terraform-testing-infrastructure/modules/config-files/main.tf
new file mode 100644
index 0000000..ec3940a
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/config-files/main.tf
@@ -0,0 +1,265 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+variable "os_type" {
+ default = "centos"
+}
+variable "software_root" {}
+variable "upload_host" {}
+variable "manager_ip" {}
+variable "worker_ips" {}
+
+variable "zookeeper_dir" {}
+variable "hadoop_dir" {}
+variable "accumulo_dir" {}
+
+variable "maven_version" {}
+variable "zookeeper_version" {}
+variable "hadoop_version" {}
+variable "accumulo_version" {}
+
+variable "accumulo_repo" {}
+variable "accumulo_branch_name" {}
+variable "accumulo_testing_repo" {}
+variable "accumulo_testing_branch_name" {}
+
+variable "accumulo_instance_name" {
+ default = "accumulo-testing"
+ type = string
+ nullable = false
+}
+variable "accumulo_root_password" {
+ default = ""
+ type = string
+ nullable = false
+}
+
+
+locals {
+ conf_dir = "${path.module}/conf"
+ files_dir = "${path.module}/files"
+ templates_dir = "${path.module}/templates"
+
+ java_home = var.os_type == "ubuntu" ? "/usr/lib/jvm/java-11-openjdk-amd64" : "/usr/lib/jvm/java-11-openjdk"
+ accumulo_root_pw = coalesce(var.accumulo_root_password, random_string.accumulo_root_password.result)
+
+ template_vars = {
+ manager_ip = var.manager_ip
+ worker_ips = var.worker_ips
+ java_home = local.java_home
+ accumulo_branch_name = var.accumulo_branch_name
+ accumulo_dir = var.accumulo_dir
+ accumulo_repo = var.accumulo_repo
+ accumulo_testing_repo = var.accumulo_testing_repo
+ accumulo_testing_branch_name = var.accumulo_testing_branch_name
+ accumulo_version = var.accumulo_version
+ software_root = var.software_root
+ hadoop_dir = var.hadoop_dir
+ hadoop_version = var.hadoop_version
+ maven_version = var.maven_version
+ zookeeper_dir = var.zookeeper_dir
+ zookeeper_version = var.zookeeper_version
+ accumulo_instance_name = var.accumulo_instance_name
+ accumulo_root_password = local.accumulo_root_pw,
+ accumulo_instance_secret = random_string.accumulo_instance_secret.result
+ }
+}
+
+resource "random_string" "accumulo_root_password" {
+ length = 12
+ special = false
+}
+
+resource "random_string" "accumulo_instance_secret" {
+ length = 12
+ special = false
+}
+
+resource "local_file" "etc-hosts" {
+ filename = "${local.conf_dir}/hosts"
+ file_permission = "644"
+ content = templatefile("${local.templates_dir}/hosts.tftpl", local.template_vars)
+}
+
+resource "local_file" "etc-genders" {
+ filename = "${local.conf_dir}/genders"
+ file_permission = "644"
+ content = templatefile("${local.templates_dir}/genders.tftpl", local.template_vars)
+}
+
+resource "local_file" "zookeeper-config" {
+ filename = "${local.conf_dir}/zoo.cfg"
+ file_permission = "644"
+ content = templatefile("${local.templates_dir}/zoo.cfg.tftpl", local.template_vars)
+}
+
+resource "local_file" "hadoop-core-config" {
+ filename = "${local.conf_dir}/core-site.xml"
+ file_permission = "644"
+ content = templatefile("${local.templates_dir}/core-site.xml.tftpl", local.template_vars)
+}
+
+resource "local_file" "hadoop-hdfs-config" {
+ filename = "${local.conf_dir}/hdfs-site.xml"
+ file_permission = "644"
+ content = templatefile("${local.templates_dir}/hdfs-site.xml.tftpl", local.template_vars)
+}
+
+resource "local_file" "hadoop-yarn-config" {
+ filename = "${local.conf_dir}/yarn-site.xml"
+ file_permission = "644"
+ content = templatefile("${local.templates_dir}/yarn-site.xml.tftpl", local.template_vars)
+}
+
+resource "local_file" "accumulo-cluster-config" {
+ filename = "${local.conf_dir}/cluster.yaml"
+ file_permission = "644"
+ content = templatefile("${local.templates_dir}/cluster.yaml.tftpl", local.template_vars)
+}
+
+resource "local_file" "accumulo-properties-config" {
+ filename = "${local.conf_dir}/accumulo.properties"
+ file_permission = "644"
+ content = templatefile("${local.templates_dir}/accumulo-properties.tftpl", local.template_vars)
+}
+
+resource "local_file" "accumulo-client-properties-config" {
+ filename = "${local.conf_dir}/accumulo-client.properties"
+ file_permission = "644"
+ content = templatefile("${local.templates_dir}/accumulo-client-properties.tftpl", local.template_vars)
+}
+
+resource "local_file" "telegraf-config" {
+ filename = "${local.conf_dir}/telegraf.conf"
+ file_permission = "644"
+ content = templatefile("${local.templates_dir}/telegraf.conf.tftpl", local.template_vars)
+}
+
+resource "local_file" "namenode-systemd" {
+ filename = "${local.conf_dir}/hadoop-namenode.service"
+ file_permission = "644"
+ content = templatefile("${local.templates_dir}/hadoop-namenode.service.tftpl", local.template_vars)
+}
+
+resource "local_file" "datanode-systemd" {
+ filename = "${local.conf_dir}/hadoop-datanode.service"
+ file_permission = "644"
+ content = templatefile("${local.templates_dir}/hadoop-datanode.service.tftpl", local.template_vars)
+}
+
+resource "local_file" "resourcemanager-systemd" {
+ filename = "${local.conf_dir}/yarn-resourcemanager.service"
+ file_permission = "644"
+ content = templatefile("${local.templates_dir}/yarn-resourcemanager.service.tftpl", local.template_vars)
+}
+
+resource "local_file" "nodemanager-systemd" {
+ filename = "${local.conf_dir}/yarn-nodemanager.service"
+ file_permission = "644"
+ content = templatefile("${local.templates_dir}/yarn-nodemanager.service.tftpl", local.template_vars)
+}
+
+resource "local_file" "zookeeper-systemd" {
+ filename = "${local.conf_dir}/zookeeper.service"
+ file_permission = "644"
+ content = templatefile("${local.templates_dir}/zookeeper.service.tftpl", local.template_vars)
+}
+
+resource "local_file" "hadoop-bash-profile" {
+ filename = "${local.conf_dir}/hadoop_bash_profile"
+ file_permission = "600"
+ content = templatefile("${local.templates_dir}/hadoop_bash_profile.tftpl", local.template_vars)
+}
+
+resource "local_file" "hadoop-bashrc" {
+ filename = "${local.conf_dir}/hadoop_bashrc"
+ file_permission = "600"
+ content = templatefile("${local.templates_dir}/hadoop_bashrc.tftpl", local.template_vars)
+}
+
+resource "local_file" "install-software" {
+ filename = "${local.conf_dir}/install_sw.sh"
+ file_permission = "755"
+ content = templatefile("${local.templates_dir}/install_sw.sh.tftpl", local.template_vars)
+}
+
+resource "local_file" "initialize-hadoop" {
+ filename = "${local.conf_dir}/initialize-hadoop.sh"
+ file_permission = "755"
+ content = templatefile("${local.templates_dir}/initialize_hadoop.sh.tftpl", local.template_vars)
+}
+
+resource "local_file" "initialize-accumulo" {
+ filename = "${local.conf_dir}/initialize-accumulo.sh"
+ file_permission = "755"
+ content = templatefile("${local.templates_dir}/initialize_accumulo.sh.tftpl", local.template_vars)
+}
+
+resource "null_resource" "upload_config_files" {
+ depends_on = [
+ local_file.etc-hosts,
+ local_file.etc-genders,
+ local_file.zookeeper-config,
+ local_file.hadoop-core-config,
+ local_file.hadoop-hdfs-config,
+ local_file.hadoop-yarn-config,
+ local_file.accumulo-cluster-config,
+ local_file.accumulo-properties-config,
+ local_file.accumulo-client-properties-config,
+ local_file.telegraf-config,
+ local_file.namenode-systemd,
+ local_file.datanode-systemd,
+ local_file.resourcemanager-systemd,
+ local_file.nodemanager-systemd,
+ local_file.zookeeper-systemd,
+ local_file.hadoop-bash-profile,
+ local_file.hadoop-bashrc,
+ local_file.install-software,
+ local_file.initialize-hadoop,
+ local_file.initialize-accumulo
+ ]
+ connection {
+ type = "ssh"
+ host = var.upload_host
+ user = "hadoop"
+ }
+ provisioner "remote-exec" {
+ inline = [
+ "mkdir -p ${var.software_root}/grafana/dashboards"
+ ]
+ }
+ provisioner "file" {
+ source = local.conf_dir
+ destination = var.software_root
+ }
+ provisioner "file" {
+ source = "${local.files_dir}/grafana_dashboards/"
+ destination = "${var.software_root}/grafana/dashboards/"
+ }
+}
+
+output "conf_dir" {
+ value = local.conf_dir
+}
+
+output "accumulo_instance_name" {
+ value = var.accumulo_instance_name
+}
+
+output "accumulo_root_password" {
+ value = local.accumulo_root_pw
+}
diff --git a/contrib/terraform-testing-infrastructure/modules/config-files/templates/accumulo-client-properties.tftpl b/contrib/terraform-testing-infrastructure/modules/config-files/templates/accumulo-client-properties.tftpl
new file mode 100644
index 0000000..d9fa383
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/config-files/templates/accumulo-client-properties.tftpl
@@ -0,0 +1,122 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+################################
+## Accumulo client configuration
+################################
+
+## NOTE - All properties that have a default are set with it. Properties that
+## are uncommented must be set by the user.
+
+## Instance properties
+## --------------
+## Name of Accumulo instance to connect to
+instance.name=${accumulo_instance_name}
+
+## Zookeeper connection information for Accumulo instance
+instance.zookeepers=${manager_ip}:2181
+
+## Zookeeper session timeout
+#instance.zookeepers.timeout=30s
+
+
+## Authentication properties
+## --------------
+## Authentication method (i.e password, kerberos, PasswordToken, KerberosToken, etc)
+auth.type=password
+
+## Accumulo principal/username for chosen authentication method
+auth.principal=root
+
+## Authentication token (ex. mypassword, /path/to/keytab)
+auth.token=${accumulo_root_password}
+
+
+## Batch Writer properties
+## --------------
+## The durability used to write to the write-ahead log. Legal values are: none, which skips the write-ahead log; log, which sends the data to the write-ahead log, but does nothing to make it durable; flush, which pushes data to the file system; and sync, which ensures the data is written to disk. Setting this property will change the durability for the BatchWriter session. A value of "default" will use the table's durability setting.
+#batch.writer.durability=default
+
+## Max amount of time (in seconds) to hold data in memory before flushing it
+#batch.writer.latency.max=120s
+
+## Max memory (in bytes) to batch before writing
+#batch.writer.memory.max=50M
+
+## Maximum number of threads to use for writing data to tablet servers.
+#batch.writer.threads.max=3
+
+## Max amount of time (in seconds) an unresponsive server will be re-tried. An exception is thrown when this timeout is exceeded. Set to zero for no timeout.
+#batch.writer.timeout.max=0
+
+
+## Batch Scanner properties
+## --------------
+## Number of concurrent query threads to spawn for querying
+#batch.scanner.num.query.threads=3
+
+
+## Scanner properties
+## --------------
+## Number of key/value pairs that will be fetched at time from tablet server
+#scanner.batch.size=1000
+
+
+## SSL properties
+## --------------
+## Enable SSL for client RPC
+#ssl.enabled=false
+
+## Password used to encrypt keystore
+#ssl.keystore.password=
+
+## Path to SSL keystore file
+#ssl.keystore.path=
+
+## Type of SSL keystore
+#ssl.keystore.type=jks
+
+## Password used to encrypt truststore
+#ssl.truststore.password=
+
+## Path to SSL truststore file
+#ssl.truststore.path=
+
+## Type of SSL truststore
+#ssl.truststore.type=jks
+
+## Use JSSE system properties to configure SSL
+#ssl.use.jsse=false
+
+
+## SASL properties
+## --------------
+## Enable SASL for client RPC
+#sasl.enabled=false
+
+## Kerberos principal/primary that Accumulo servers use to login
+#sasl.kerberos.server.primary=accumulo
+
+## SASL quality of protection. Valid values are 'auth', 'auth-int', and 'auth-conf'
+#sasl.qop=auth
+
+
+## Tracing properties
+## --------------
+## A list of span receiver classes to send trace spans
+#trace.span.receivers=org.apache.accumulo.tracer.ZooTraceClient
+
+## The zookeeper node where tracers are registered
+#trace.zookeeper.path=/tracers
diff --git a/contrib/terraform-testing-infrastructure/modules/config-files/templates/accumulo-properties.tftpl b/contrib/terraform-testing-infrastructure/modules/config-files/templates/accumulo-properties.tftpl
new file mode 100644
index 0000000..8c4934e
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/config-files/templates/accumulo-properties.tftpl
@@ -0,0 +1,25 @@
+# This is the main configuration file for Apache Accumulo. Available configuration properties can be
+# found at https://accumulo.apache.org/docs/2.x/configuration/server-properties
+
+## Sets location in HDFS where Accumulo will store data
+instance.volumes=hdfs://${manager_ip}:8000/accumulo
+
+## Sets location of Zookeepers
+instance.zookeeper.host=${manager_ip}:2181
+
+## All Accumulo servers must have same secret. This secret is randomly generated by terraform.
+instance.secret=${accumulo_instance_secret}
+
+## Set to false if 'accumulo-util build-native' fails
+tserver.memory.maps.native.enabled=true
+
+# Run multiple compactors per node
+compactor.port.search=true
+
+# OpenTelemetry settings
+general.opentelemetry.enabled=true
+
+#Micrometer settings
+general.micrometer.enabled=true
+general.micrometer.jvm.metrics.enabled=true
+general.micrometer.factory=org.apache.accumulo.test.metrics.TestStatsDRegistryFactory
diff --git a/contrib/terraform-testing-infrastructure/modules/config-files/templates/cluster.yaml.tftpl b/contrib/terraform-testing-infrastructure/modules/config-files/templates/cluster.yaml.tftpl
new file mode 100644
index 0000000..ce33571
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/config-files/templates/cluster.yaml.tftpl
@@ -0,0 +1,29 @@
+manager:
+ - ${manager_ip}
+
+monitor:
+ - ${manager_ip}
+
+gc:
+ - ${manager_ip}
+
+tserver:
+%{ for ip in worker_ips ~}
+ - ${ip}
+%{ endfor ~}
+
+compaction:
+ coordinator:
+ - ${manager_ip}
+ compactor:
+ - queue:
+ - q1
+ - q2
+ - q1:
+%{ for ip in worker_ips ~}
+ - ${ip}
+%{ endfor ~}
+ - q2:
+%{ for ip in worker_ips ~}
+ - ${ip}
+%{ endfor ~}
diff --git a/contrib/terraform-testing-infrastructure/modules/config-files/templates/core-site.xml.tftpl b/contrib/terraform-testing-infrastructure/modules/config-files/templates/core-site.xml.tftpl
new file mode 100644
index 0000000..9f67480
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/config-files/templates/core-site.xml.tftpl
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<configuration>
+ <property>
+ <name>fs.defaultFS</name>
+ <value>hdfs://${manager_ip}:8000</value>
+ </property>
+ <property>
+ <name>hadoop.tmp.dir</name>
+ <value>${hadoop_dir}</value>
+ </property>
+</configuration>
+
diff --git a/contrib/terraform-testing-infrastructure/modules/config-files/templates/genders.tftpl b/contrib/terraform-testing-infrastructure/modules/config-files/templates/genders.tftpl
new file mode 100644
index 0000000..15d2043
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/config-files/templates/genders.tftpl
@@ -0,0 +1,4 @@
+manager manager
+%{ for index, ip in worker_ips ~}
+worker${index} worker
+%{ endfor ~}
diff --git a/contrib/terraform-testing-infrastructure/modules/config-files/templates/hadoop-datanode.service.tftpl b/contrib/terraform-testing-infrastructure/modules/config-files/templates/hadoop-datanode.service.tftpl
new file mode 100644
index 0000000..96cd6cf
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/config-files/templates/hadoop-datanode.service.tftpl
@@ -0,0 +1,18 @@
+[Unit]
+Description=Hadoop DataNode start/stop
+After=remote-fs.target
+
+[Service]
+Environment=JAVA_HOME=${java_home}
+Environment=HADOOP_HOME=${software_root}/hadoop/hadoop-${hadoop_version}
+Environment=HADOOP_LOG_DIR=${hadoop_dir}/logs
+User=hadoop
+Group=hadoop
+Type=oneshot
+ExecStart=${software_root}/hadoop/hadoop-${hadoop_version}/bin/hdfs --daemon start datanode
+ExecStop=${software_root}/hadoop/hadoop-${hadoop_version}/bin/hdfs --daemon stop datanode
+RemainAfterExit=yes
+
+[Install]
+WantedBy=multi-user.target
+
diff --git a/contrib/terraform-testing-infrastructure/modules/config-files/templates/hadoop-namenode.service.tftpl b/contrib/terraform-testing-infrastructure/modules/config-files/templates/hadoop-namenode.service.tftpl
new file mode 100644
index 0000000..eb016bc
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/config-files/templates/hadoop-namenode.service.tftpl
@@ -0,0 +1,18 @@
+[Unit]
+Description=Hadoop NameNode start/stop
+After=remote-fs.target
+
+[Service]
+Environment=JAVA_HOME=${java_home}
+Environment=HADOOP_HOME=${software_root}/hadoop/hadoop-${hadoop_version}
+Environment=HADOOP_LOG_DIR=${hadoop_dir}/logs
+User=hadoop
+Group=hadoop
+Type=oneshot
+ExecStart=${software_root}/hadoop/hadoop-${hadoop_version}/bin/hdfs --daemon start namenode
+ExecStop=${software_root}/hadoop/hadoop-${hadoop_version}/bin/hdfs --daemon stop namenode
+RemainAfterExit=yes
+
+[Install]
+WantedBy=multi-user.target
+
diff --git a/contrib/terraform-testing-infrastructure/modules/config-files/templates/hadoop_bash_profile.tftpl b/contrib/terraform-testing-infrastructure/modules/config-files/templates/hadoop_bash_profile.tftpl
new file mode 100644
index 0000000..fdff0a1
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/config-files/templates/hadoop_bash_profile.tftpl
@@ -0,0 +1,5 @@
+# .bash_profile
+# Get the aliases and functions
+if [ -f ~/.bashrc ]; then
+ source ~/.bashrc
+fi
diff --git a/contrib/terraform-testing-infrastructure/modules/config-files/templates/hadoop_bashrc.tftpl b/contrib/terraform-testing-infrastructure/modules/config-files/templates/hadoop_bashrc.tftpl
new file mode 100644
index 0000000..6343669
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/config-files/templates/hadoop_bashrc.tftpl
@@ -0,0 +1,17 @@
+# .bashrc
+if [ -f /etc/bashrc ]; then
+ source /etc/bashrc
+fi
+export JAVA_HOME=${java_home}
+export ZOOKEEPER_HOME=${software_root}/zookeeper/apache-zookeeper-${zookeeper_version}-bin
+export HADOOP_HOME=${software_root}/hadoop/hadoop-${hadoop_version}
+export ACCUMULO_HOME=${software_root}/accumulo/accumulo-${accumulo_version}
+export ACCUMULO_LOG_DIR=${accumulo_dir}/logs
+export M2_HOME=${software_root}/apache-maven/apache-maven-${maven_version}
+
+export ACCUMULO_JAVA_OPTS="-javaagent:${software_root}/accumulo/accumulo-${accumulo_version}/lib/opentelemetry-javaagent-1.7.1.jar -Dotel.traces.exporter=jaeger -Dotel.exporter.jaeger.endpoint=http://${manager_ip}:14250 -Dtest.meter.registry.host=${manager_ip} -Dtest.meter.registry.port=8125"
+
+# User specific environment and startup programs
+PATH=$PATH:$HOME/.local/bin:$HOME/bin:$ZOOKEEPER_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$ACCUMULO_HOME/bin:$M2_HOME/bin
+export PATH
+ulimit -n 32768
diff --git a/contrib/terraform-testing-infrastructure/modules/config-files/templates/hdfs-site.xml.tftpl b/contrib/terraform-testing-infrastructure/modules/config-files/templates/hdfs-site.xml.tftpl
new file mode 100644
index 0000000..f81e9c3
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/config-files/templates/hdfs-site.xml.tftpl
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<configuration>
+ <property>
+ <name>dfs.datanode.synconclose</name>
+ <value>true</value>
+ </property>
+</configuration>
+
diff --git a/contrib/terraform-testing-infrastructure/modules/config-files/templates/hosts.tftpl b/contrib/terraform-testing-infrastructure/modules/config-files/templates/hosts.tftpl
new file mode 100644
index 0000000..1bac9e2
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/config-files/templates/hosts.tftpl
@@ -0,0 +1,4 @@
+${manager_ip} manager
+%{ for idx, ip in worker_ips ~}
+${ip} worker${idx}
+%{ endfor ~}
diff --git a/contrib/terraform-testing-infrastructure/modules/config-files/templates/initialize_accumulo.sh.tftpl b/contrib/terraform-testing-infrastructure/modules/config-files/templates/initialize_accumulo.sh.tftpl
new file mode 100755
index 0000000..b1cbcb1
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/config-files/templates/initialize_accumulo.sh.tftpl
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+set -eo pipefail
+
+if [ $# -ne 2 ]; then
+ echo "usage: $0 instance_name root_password" >&2
+ exit 1
+fi
+
+#
+# Initialize Accumulo with the supplied instance name and root user password
+#
+accumulo init --instance-name "$1" --password "$2"
+
+#
+# Launch jaegertracing and telegraf containers
+#
+docker run -d --name jaeger \
+ --restart always \
+ -e COLLECTOR_ZIPKIN_HOST_PORT=:9411 \
+ -p 5775:5775/udp -p 6831:6831/udp \
+ -p 6832:6832/udp -p 5778:5778 \
+ -p 16686:16686 -p 14268:14268 \
+ -p 14250:14250 -p 9411:9411 \
+ jaegertracing/all-in-one:1.29
+
+docker run --ulimit nofile=66000:66000 -d \
+ --restart always \
+ --name tig-stack \
+ -p 3003:3003 \
+ -p 3004:8888 \
+ -p 8086:8086 \
+ -p 22022:22 \
+ -p 8125:8125/udp \
+ -v /data/metrics/influxdb:/var/lib/influxdb \
+ -v /data/metrics/grafana:/var/lib/grafana \
+ -v ${software_root}/telegraf/conf:/etc/telegraf \
+ -v ${software_root}/grafana/dashboards:/etc/grafana/provisioning/dashboards \
+ artlov/docker-telegraf-influxdb-grafana:latest
+
+#
+# Start the Accumulo cluster
+# accumulo-cluster start doesn't return a proper error code, so make it true here so the script doesn't fail.
+#
+accumulo-cluster start || true
diff --git a/contrib/terraform-testing-infrastructure/modules/config-files/templates/initialize_hadoop.sh.tftpl b/contrib/terraform-testing-infrastructure/modules/config-files/templates/initialize_hadoop.sh.tftpl
new file mode 100644
index 0000000..1d97aa7
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/config-files/templates/initialize_hadoop.sh.tftpl
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+set -eo pipefail
+
+#
+# Copy local systemctl unit files into place and enable zookeeper, the namenode, and the resourcemanager
+#
+sudo cp ${software_root}/conf/zookeeper.service /etc/systemd/system/zookeeper.service
+sudo cp ${software_root}/conf/hadoop-namenode.service /etc/systemd/system/hadoop-namenode.service
+sudo cp ${software_root}/conf/yarn-resourcemanager.service /etc/systemd/system/yarn-resourcemanager.service
+sudo systemctl daemon-reload
+sudo systemctl enable zookeeper
+sudo systemctl enable hadoop-namenode
+sudo systemctl enable yarn-resourcemanager
+
+#
+# Copy the datanode and nodemanager systemd unit file to each worker, and enable them there.
+#
+pdcp -g worker ${software_root}/conf/hadoop-datanode.service ${software_root}/conf/yarn-nodemanager.service /tmp/.
+pdsh -S -g worker "sudo cp /tmp/{hadoop-datanode,yarn-nodemanager}.service /etc/systemd/system/. && rm -f /tmp/{hadoop-datanode,yarn-nodemanager}.service"
+pdsh -S -g worker sudo systemctl daemon-reload
+pdsh -S -g worker sudo systemctl enable hadoop-datanode yarn-nodemanager
+
+#
+# Startup HDFS cluster
+# 1. Start zookeeper
+# 2. Format the namenode
+# 3. Start the namenode
+# 4. Start the resource manager
+# 5. Start datanodes and nodemanagers on the worker nodes
+#
+sudo systemctl start zookeeper
+hdfs namenode -format
+sudo systemctl start hadoop-namenode
+sudo systemctl start yarn-resourcemanager
+pdsh -S -g worker 'sudo systemctl start hadoop-datanode yarn-nodemanager'
diff --git a/contrib/terraform-testing-infrastructure/modules/config-files/templates/install_sw.sh.tftpl b/contrib/terraform-testing-infrastructure/modules/config-files/templates/install_sw.sh.tftpl
new file mode 100644
index 0000000..dc3b55a
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/config-files/templates/install_sw.sh.tftpl
@@ -0,0 +1,161 @@
+#! /bin/bash
+
+set -eo pipefail
+
+export JAVA_HOME=${java_home}
+
+SOURCES_DIR="${software_root}/sources"
+
+cd ${software_root}
+mkdir -p $SOURCES_DIR
+
+#
+# Download and configure Maven
+#
+MVN_URL="https://dlcdn.apache.org/maven/maven-3/${maven_version}/binaries/apache-maven-${maven_version}-bin.tar.gz"
+MVN_SRC="$${SOURCES_DIR}/apache-maven-${maven_version}-bin.tar.gz"
+
+if [ ! -f $MVN_SRC ]; then
+ wget $MVN_URL -O $MVN_SRC
+fi
+if [ ! -d ${software_root}/apache-maven/apache-maven-${maven_version} ]; then
+ mkdir -p ${software_root}/apache-maven
+ tar zxf $MVN_SRC -C ${software_root}/apache-maven
+ [ -d ~/.m2 ] || mkdir ~/.m2
+ cat << 'END' >> ~/.m2/settings.xml
+ <settings xmlns="http://maven.apache.org/SETTINGS/1.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/SETTINGS/1.0.0 https://maven.apache.org/xsd/settings-1.0.0.xsd">
+ <localRepository>${software_root}/apache-maven/repository</localRepository>
+ </settings>
+END
+fi
+
+#
+# Download and Install ZooKeeper
+#
+ZK_URL="https://dlcdn.apache.org/zookeeper/zookeeper-${zookeeper_version}/apache-zookeeper-${zookeeper_version}-bin.tar.gz"
+ZK_SRC="$${SOURCES_DIR}/apache-zookeeper-${zookeeper_version}-bin.tar.gz"
+
+if [ ! -f $ZK_SRC ]; then
+ wget $ZK_URL -O $ZK_SRC
+fi
+if [ ! -d ${software_root}/zookeeper/apache-zookeeper-${zookeeper_version}-bin ]; then
+ mkdir -p ${software_root}/zookeeper
+ tar zxf $ZK_SRC -C ${software_root}/zookeeper
+fi
+
+#
+# Download and Install Hadoop
+#
+HADOOP_URL="https://downloads.apache.org/hadoop/common/hadoop-${hadoop_version}/hadoop-${hadoop_version}.tar.gz"
+HADOOP_SRC="$${SOURCES_DIR}/hadoop-${hadoop_version}.tar.gz"
+
+if [ ! -f $HADOOP_SRC ]; then
+ wget $HADOOP_URL -O $HADOOP_SRC
+fi
+if [ ! -d ${software_root}/hadoop/hadoop-${hadoop_version} ]; then
+ mkdir -p ${software_root}/hadoop
+ tar zxf $HADOOP_SRC -C ${software_root}/hadoop
+fi
+
+#
+# If Accumulo binary tarball provided, then untar it and use it
+#
+ACCUMULO_SRC="$${SOURCES_DIR}/accumulo-${accumulo_version}-bin.tar.gz"
+
+if [ -f $ACCUMULO_SRC ]; then
+ echo "Binary tarball found, untarring it..."
+ mkdir -p ${software_root}/accumulo
+ tar zxf $ACCUMULO_SRC -C ${software_root}/accumulo
+else
+#
+# Download, build, and install Accumulo
+ echo "Binary tarball not found, cloning Accumulo repo from ${accumulo_repo}"
+ rm -rf $SOURCES_DIR/accumulo-repo
+ cd $SOURCES_DIR
+ git clone ${accumulo_repo} accumulo-repo
+ cd accumulo-repo
+ git checkout ${accumulo_branch_name}
+ ${software_root}/apache-maven/apache-maven-${maven_version}/bin/mvn -ntp clean package -DskipTests -DskipITs
+ mkdir -p ${software_root}/accumulo
+ tar zxf assemble/target/accumulo-${accumulo_version}-bin.tar.gz -C ${software_root}/accumulo
+fi
+${software_root}/accumulo/accumulo-${accumulo_version}/bin/accumulo-util build-native
+
+#
+# OpenTelemetry dependencies
+#
+if [ ! -f ${software_root}/accumulo/accumulo-${accumulo_version}/lib/opentelemetry-javaagent-1.7.1.jar ]; then
+ wget https://search.maven.org/remotecontent?filepath=io/opentelemetry/javaagent/opentelemetry-javaagent/1.7.1/opentelemetry-javaagent-1.7.1.jar -O ${software_root}/accumulo/accumulo-${accumulo_version}/lib/opentelemetry-javaagent-1.7.1.jar
+fi
+#
+# Micrometer dependencies
+#
+if [ ! -f ${software_root}/accumulo/accumulo-${accumulo_version}/lib/accumulo-test-${accumulo_version}.jar ]; then
+ if [ -f $SOURCES_DIR/accumulo-repo/test/target/accumulo-test-${accumulo_version}.jar ]; then
+ cp $SOURCES_DIR/accumulo-repo/test/target/accumulo-test-${accumulo_version}.jar ${software_root}/accumulo/accumulo-${accumulo_version}/lib/.
+ else
+ echo "accumulo-test-${accumulo_version}.jar not found, metrics won't work..."
+ fi
+fi
+if [ ! -f ${software_root}/accumulo/accumulo-${accumulo_version}/lib/micrometer-registry-statsd-1.7.4.jar ]; then
+ wget https://search.maven.org/remotecontent?filepath=io/micrometer/micrometer-registry-statsd/1.7.4/micrometer-registry-statsd-1.7.4.jar -O ${software_root}/accumulo/accumulo-${accumulo_version}/lib/micrometer-registry-statsd-1.7.4.jar
+fi
+
+#
+# Download and build Accumulo-Testing
+#
+TESTING_SRC="$${SOURCES_DIR}/accumulo-testing.zip"
+
+cd ${software_root}
+if [ -f $TESTING_SRC ]; then
+ echo "Accumulo Testing tarball found, untarring it..."
+ mkdir -p $SOURCES_DIR/accumulo-testing-repo
+ tar zxf $TESTING_SRC -C $SOURCES_DIR/accumulo-testing-repo
+else
+ # Download, build, and install Accumulo Testing
+ rm -rf $SOURCES_DIR/accumulo-testing-repo
+ cd $SOURCES_DIR
+ git clone ${accumulo_testing_repo} accumulo-testing-repo
+ cd accumulo-testing-repo
+ git checkout ${accumulo_testing_branch_name}
+ ${software_root}/apache-maven/apache-maven-${maven_version}/bin/mvn -ntp clean package -DskipTests -DskipITs
+fi
+
+#
+# Copy the configuration files to the correct places
+#
+cp ${software_root}/conf/zoo.cfg ${software_root}/zookeeper/apache-zookeeper-${zookeeper_version}-bin/conf/zoo.cfg
+cp ${software_root}/conf/hdfs-site.xml ${software_root}/hadoop/hadoop-${hadoop_version}/etc/hadoop/hdfs-site.xml
+cp ${software_root}/conf/core-site.xml ${software_root}/hadoop/hadoop-${hadoop_version}/etc/hadoop/core-site.xml
+cp ${software_root}/conf/yarn-site.xml ${software_root}/hadoop/hadoop-${hadoop_version}/etc/hadoop/yarn-site.xml
+cp ${software_root}/conf/cluster.yaml ${software_root}/accumulo/accumulo-${accumulo_version}/conf/cluster.yaml
+cp ${software_root}/conf/accumulo.properties ${software_root}/accumulo/accumulo-${accumulo_version}/conf/accumulo.properties
+cp ${software_root}/conf/accumulo-client.properties ${software_root}/accumulo/accumulo-${accumulo_version}/conf/accumulo-client.properties
+mkdir -p ${software_root}/telegraf/conf
+cp ${software_root}/conf/telegraf.conf ${software_root}/telegraf/conf/.
+
+# Update configuration properties for accumulo-testing
+defaultFS=$(hdfs getconf -confKey fs.defaultFS)
+sed -ri "s|^test.common.hdfs.root=.*$|test.common.hdfs.root=$${defaultFS}|" $SOURCES_DIR/accumulo-testing-repo/conf/accumulo-testing.properties
+sed -ri "s|^test.common.yarn.resource.manager=.*$|test.common.yarn.resource.manager=${manager_ip}|" $SOURCES_DIR/accumulo-testing-repo/conf/accumulo-testing.properties
+
+#
+# Make directories that will be needed for metrics collection
+#
+sudo mkdir -p /data/metrics/influxdb
+sudo mkdir -p /data/metrics/grafana
+sudo chown -R hadoop.hadoop /data/metrics
+sudo chmod 777 /data/metrics/influxdb
+
+# Sync the software root dir to all of the worker nodes, but exclude accumulo source, maven, and conf dirs.
+echo "Sycing ${software_root} to worker nodes..."
+tar cf /tmp/accumulo-testing.tar -C ${software_root} \
+ --exclude=./conf \
+ --exclude=./apache-maven \
+ --exclude=./accumulo_repo \
+ --exclude=./sources .
+pdcp -g worker /tmp/accumulo-testing.tar /tmp/.
+pdsh -S -g worker tar xf /tmp/accumulo-testing.tar -C ${software_root}
+rm -f /tmp/accumulo-testing.tar
+pdsh -S -g worker rm -f /tmp/accumulo-testing.tar
diff --git a/contrib/terraform-testing-infrastructure/modules/config-files/templates/telegraf.conf.tftpl b/contrib/terraform-testing-infrastructure/modules/config-files/templates/telegraf.conf.tftpl
new file mode 100644
index 0000000..70888e7
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/config-files/templates/telegraf.conf.tftpl
@@ -0,0 +1,301 @@
+# Telegraf Configuration
+#
+# Telegraf is entirely plugin driven. All metrics are gathered from the
+# declared inputs, and sent to the declared outputs.
+#
+# Plugins must be declared in here to be active.
+# To deactivate a plugin, comment out the name and any variables.
+#
+# Use 'telegraf -config telegraf.conf -test' to see what metrics a config
+# file would generate.
+#
+
+
+# Global tags can be specified here in key="value" format.
+[global_tags]
+ # dc = "us-east-1" # will tag all metrics with dc=us-east-1
+ # rack = "1a"
+ ## Environment variables can be used as tags, and throughout the config file
+ # user = "$USER"
+
+
+# Configuration for telegraf agent
+[agent]
+ ## Default data collection interval for all inputs
+ interval = "10s"
+ ## Rounds collection interval to 'interval'
+ ## ie, if interval="10s" then always collect on :00, :10, :20, etc.
+ round_interval = true
+
+ ## Telegraf will send metrics to outputs in batches of at most
+ ## metric_batch_size metrics.
+ ## This controls the size of writes that Telegraf sends to output plugins.
+ metric_batch_size = 1000
+
+ ## Maximum number of unwritten metrics per output. Increasing this value
+ ## allows for longer periods of output downtime without dropping metrics at the
+ ## cost of higher maximum memory usage.
+ metric_buffer_limit = 100000
+
+ ## Collection jitter is used to jitter the collection by a random amount.
+ ## Each plugin will sleep for a random time within jitter before collecting.
+ ## This can be used to avoid many plugins querying things like sysfs at the
+ ## same time, which can have a measurable effect on the system.
+ collection_jitter = "0s"
+
+ ## Default flushing interval for all outputs. Maximum flush_interval will be
+ ## flush_interval + flush_jitter
+ flush_interval = "5s"
+
+ ## Jitter the flush interval by a random amount. This is primarily to avoid
+ ## large write spikes for users running a large number of telegraf instances.
+ ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
+ flush_jitter = "0s"
+
+ ## By default or when set to "0s", precision will be set to the same
+ ## timestamp order as the collection interval, with the maximum being 1s.
+ ## ie, when interval = "10s", precision will be "1s"
+ ## when interval = "250ms", precision will be "1ms"
+ ## Precision will NOT be used for service inputs. It is up to each individual
+ ## service input to set the timestamp at the appropriate precision.
+ ## Valid time units are "ns", "us" (or "µs"), "ms", "s".
+ precision = ""
+
+ ## Log at debug level.
+ # debug = false
+ ## Log only error level messages.
+ # quiet = false
+
+ ## Log target controls the destination for logs and can be one of "file",
+ ## "stderr" or, on Windows, "eventlog". When set to "file", the output file
+ ## is determined by the "logfile" setting.
+ # logtarget = "file"
+
+ ## Name of the file to be logged to when using the "file" logtarget. If set to
+ ## the empty string then logs are written to stderr.
+ # logfile = ""
+
+ ## The logfile will be rotated after the time interval specified. When set
+ ## to 0 no time based rotation is performed. Logs are rotated only when
+ ## written to, if there is no log activity rotation may be delayed.
+ # logfile_rotation_interval = "0d"
+
+ ## The logfile will be rotated when it becomes larger than the specified
+ ## size. When set to 0 no size based rotation is performed.
+ # logfile_rotation_max_size = "0MB"
+
+ ## Maximum number of rotated archives to keep, any older logs are deleted.
+ ## If set to -1, no archives are removed.
+ # logfile_rotation_max_archives = 5
+
+ ## Override default hostname, if empty use os.Hostname()
+ hostname = ""
+ ## If set to true, do no set the "host" tag in the telegraf agent.
+ omit_hostname = false
+
+
+###############################################################################
+# OUTPUT PLUGINS #
+###############################################################################
+
+
+# Configuration for sending metrics to InfluxDB
+[[outputs.influxdb]]
+ ## The full HTTP or UDP URL for your InfluxDB instance.
+ ##
+ ## Multiple URLs can be specified for a single cluster, only ONE of the
+ ## urls will be written to each interval.
+ # urls = ["unix:///var/run/influxdb.sock"]
+ # urls = ["udp://127.0.0.1:8089"]
+ # urls = ["http://127.0.0.1:8086"]
+ urls = ["http://${manager_ip}:8086"]
+
+ ## The target database for metrics; will be created as needed.
+ ## For UDP url endpoint database needs to be configured on server side.
+ database = "telegraf"
+
+ ## The value of this tag will be used to determine the database. If this
+ ## tag is not set the 'database' option is used as the default.
+ # database_tag = ""
+
+ ## If true, the 'database_tag' will not be included in the written metric.
+ # exclude_database_tag = false
+
+ ## If true, no CREATE DATABASE queries will be sent. Set to true when using
+ ## Telegraf with a user without permissions to create databases or when the
+ ## database already exists.
+ # skip_database_creation = false
+
+ ## Name of existing retention policy to write to. Empty string writes to
+ ## the default retention policy. Only takes effect when using HTTP.
+ # retention_policy = ""
+
+ ## The value of this tag will be used to determine the retention policy. If this
+ ## tag is not set the 'retention_policy' option is used as the default.
+ # retention_policy_tag = ""
+
+ ## If true, the 'retention_policy_tag' will not be included in the written metric.
+ # exclude_retention_policy_tag = false
+
+ ## Write consistency (clusters only), can be: "any", "one", "quorum", "all".
+ ## Only takes effect when using HTTP.
+ # write_consistency = "any"
+
+ ## Timeout for HTTP messages.
+ # timeout = "5s"
+
+ ## HTTP Basic Auth
+ username = "telegraf"
+ password = "telegraf"
+
+ ## HTTP User-Agent
+ # user_agent = "telegraf"
+
+ ## UDP payload size is the maximum packet size to send.
+ # udp_payload = "512B"
+
+ ## Optional TLS Config for use on HTTP connections.
+ # tls_ca = "/etc/telegraf/ca.pem"
+ # tls_cert = "/etc/telegraf/cert.pem"
+ # tls_key = "/etc/telegraf/key.pem"
+ ## Use TLS but skip chain & host verification
+ # insecure_skip_verify = false
+
+ ## HTTP Proxy override, if unset values the standard proxy environment
+ ## variables are consulted to determine which proxy, if any, should be used.
+ # http_proxy = "http://corporate.proxy:3128"
+
+ ## Additional HTTP headers
+ # http_headers = {"X-Special-Header" = "Special-Value"}
+
+ ## HTTP Content-Encoding for write request body, can be set to "gzip" to
+ ## compress body or "identity" to apply no encoding.
+ # content_encoding = "identity"
+
+ ## When true, Telegraf will output unsigned integers as unsigned values,
+ ## i.e.: "42u". You will need a version of InfluxDB supporting unsigned
+ ## integer values. Enabling this option will result in field type errors if
+ ## existing data has been written.
+ # influx_uint_support = false
+
+
+###############################################################################
+# INPUT PLUGINS #
+###############################################################################
+
+
+# Read metrics about cpu usage
+[[inputs.cpu]]
+ ## Whether to report per-cpu stats or not
+ percpu = true
+ ## Whether to report total system cpu stats or not
+ totalcpu = true
+ ## If true, collect raw CPU time metrics.
+ collect_cpu_time = false
+ ## If true, compute and report the sum of all non-idle CPU states.
+ report_active = false
+
+
+# Read metrics about disk usage by mount point
+[[inputs.disk]]
+ ## By default stats will be gathered for all mount points.
+ ## Set mount_points will restrict the stats to only the specified mount points.
+ # mount_points = ["/"]
+
+ ## Ignore mount points by filesystem type.
+ ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"]
+
+
+# Read metrics about disk IO by device
+[[inputs.diskio]]
+ # no configuration
+
+# Get kernel statistics from /proc/stat
+[[inputs.kernel]]
+ # no configuration
+
+
+# Read metrics about memory usage
+[[inputs.mem]]
+ # no configuration
+
+
+# Get the number of processes and group them by status
+[[inputs.processes]]
+ # no configuration
+
+
+# Read metrics about swap memory usage
+[[inputs.swap]]
+ # no configuration
+
+
+# Read metrics about system load & uptime
+[[inputs.system]]
+ ## Uncomment to remove deprecated metrics.
+ # fielddrop = ["uptime_format"]
+
+
+###############################################################################
+# SERVICE INPUT PLUGINS #
+###############################################################################
+
+
+# Statsd UDP/TCP Server
+[[inputs.statsd]]
+ ## Protocol, must be "tcp", "udp", "udp4" or "udp6" (default=udp)
+ protocol = "udp"
+
+ ## MaxTCPConnection - applicable when protocol is set to tcp (default=250)
+ max_tcp_connections = 250
+
+ ## Enable TCP keep alive probes (default=false)
+ tcp_keep_alive = false
+
+ ## Specifies the keep-alive period for an active network connection.
+ ## Only applies to TCP sockets and will be ignored if tcp_keep_alive is false.
+ ## Defaults to the OS configuration.
+ # tcp_keep_alive_period = "2h"
+
+ ## Address and port to host UDP listener on
+ service_address = ":8125"
+
+ ## The following configuration options control when telegraf clears it's cache
+ ## of previous values. If set to false, then telegraf will only clear it's
+ ## cache when the daemon is restarted.
+ ## Reset gauges every interval (default=true)
+ delete_gauges = false
+ ## Reset counters every interval (default=true)
+ delete_counters = false
+ ## Reset sets every interval (default=true)
+ delete_sets = true
+ ## Reset timings & histograms every interval (default=true)
+ delete_timings = true
+
+ ## Percentiles to calculate for timing & histogram stats
+ percentiles = [50.0, 90.0, 99.0, 99.9, 99.95, 100.0]
+
+ ## separator to use between elements of a statsd metric
+ metric_separator = "_"
+
+ ## Parses tags in the datadog statsd format
+ ## http://docs.datadoghq.com/guides/dogstatsd/
+ parse_data_dog_tags = true
+
+ ## Parses datadog extensions to the statsd format
+ datadog_extensions = true
+
+ ## Statsd data translation templates, more info can be read here:
+ ## https://github.com/influxdata/telegraf/blob/master/docs/TEMPLATE_PATTERN.md
+ # templates = [
+ # "cpu.* measurement*"
+ # ]
+
+ ## Number of UDP messages allowed to queue up, once filled,
+ ## the statsd server will start dropping packets
+ allowed_pending_messages = 1000000
+
+ ## Number of timing/histogram values to track per-measurement in the
+ ## calculation of percentiles. Raising this limit increases the accuracy
+ ## of percentiles but also increases the memory usage and cpu time.
+ percentile_limit = 1000
diff --git a/contrib/terraform-testing-infrastructure/modules/config-files/templates/yarn-nodemanager.service.tftpl b/contrib/terraform-testing-infrastructure/modules/config-files/templates/yarn-nodemanager.service.tftpl
new file mode 100644
index 0000000..a85b3b1
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/config-files/templates/yarn-nodemanager.service.tftpl
@@ -0,0 +1,18 @@
+[Unit]
+Description=Yarn NodeManager start/stop
+After=hadoop-datanode.service
+
+[Service]
+Environment=JAVA_HOME=${java_home}
+Environment=YARN_HOME=${software_root}/hadoop/hadoop-${hadoop_version}
+Environment=HADOOP_LOG_DIR=${hadoop_dir}/logs
+User=hadoop
+Group=hadoop
+Type=oneshot
+ExecStart=${software_root}/hadoop/hadoop-${hadoop_version}/bin/yarn --daemon start nodemanager
+ExecStop=${software_root}/hadoop/hadoop-${hadoop_version}/bin/yarn --daemon stop nodemanager
+RemainAfterExit=yes
+
+[Install]
+WantedBy=multi-user.target
+
diff --git a/contrib/terraform-testing-infrastructure/modules/config-files/templates/yarn-resourcemanager.service.tftpl b/contrib/terraform-testing-infrastructure/modules/config-files/templates/yarn-resourcemanager.service.tftpl
new file mode 100644
index 0000000..85ca210
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/config-files/templates/yarn-resourcemanager.service.tftpl
@@ -0,0 +1,18 @@
+[Unit]
+Description=Yarn ResourceManager start/stop
+After=hadoop-namenode.service
+
+[Service]
+Environment=JAVA_HOME=${java_home}
+Environment=YARN_HOME=${software_root}/hadoop/hadoop-${hadoop_version}
+Environment=HADOOP_LOG_DIR=${hadoop_dir}/logs
+User=hadoop
+Group=hadoop
+Type=oneshot
+ExecStart=${software_root}/hadoop/hadoop-${hadoop_version}/bin/yarn --daemon start resourcemanager
+ExecStop=${software_root}/hadoop/hadoop-${hadoop_version}/bin/yarn --daemon stop resourcemanager
+RemainAfterExit=yes
+
+[Install]
+WantedBy=multi-user.target
+
diff --git a/contrib/terraform-testing-infrastructure/modules/config-files/templates/yarn-site.xml.tftpl b/contrib/terraform-testing-infrastructure/modules/config-files/templates/yarn-site.xml.tftpl
new file mode 100644
index 0000000..bbef225
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/config-files/templates/yarn-site.xml.tftpl
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<configuration>
+ <property>
+ <name>yarn.resourcemanager.hostname</name>
+ <value>${manager_ip}</value>
+ </property>
+ <property>
+ <name>yarn.nodemanager.aux-services</name>
+ <value>mapreduce_shuffle</value>
+ </property>
+</configuration>
+
diff --git a/contrib/terraform-testing-infrastructure/modules/config-files/templates/zoo.cfg.tftpl b/contrib/terraform-testing-infrastructure/modules/config-files/templates/zoo.cfg.tftpl
new file mode 100644
index 0000000..3eba821
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/config-files/templates/zoo.cfg.tftpl
@@ -0,0 +1,29 @@
+# The number of milliseconds of each tick
+tickTime=2000
+# The number of ticks that the initial
+# synchronization phase can take
+initLimit=10
+# The number of ticks that can pass between
+# sending a request and getting an acknowledgement
+syncLimit=5
+# the directory where the snapshot is stored.
+# do not use /tmp for storage, /tmp here is just
+# example sakes.
+dataDir=${zookeeper_dir}
+# the port at which the clients will connect
+clientPort=2181
+# the maximum number of client connections.
+# increase this if you need to handle more clients
+#maxClientCnxns=60
+#
+# Be sure to read the maintenance section of the
+# administrator guide before turning on autopurge.
+#
+# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
+#
+# The number of snapshots to retain in dataDir
+#autopurge.snapRetainCount=3
+# Purge task interval in hours
+# Set to "0" to disable auto purge feature
+#autopurge.purgeInterval=1
+
diff --git a/contrib/terraform-testing-infrastructure/modules/config-files/templates/zookeeper.service.tftpl b/contrib/terraform-testing-infrastructure/modules/config-files/templates/zookeeper.service.tftpl
new file mode 100644
index 0000000..0b9d045
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/config-files/templates/zookeeper.service.tftpl
@@ -0,0 +1,18 @@
+[Unit]
+Description=ZooKeeper start/stop
+After=remote-fs.target
+
+[Service]
+Environment=JAVA_HOME=${java_home}
+Environment=ZOOKEEPER_HOME=${software_root}/zookeeper/apache-zookeeper-${zookeeper_version}-bin
+Environment=ZOO_LOG_DIR=${zookeeper_dir}/logs
+User=hadoop
+Group=hadoop
+Type=oneshot
+ExecStart=${software_root}/zookeeper/apache-zookeeper-${zookeeper_version}-bin/bin/zkServer.sh start
+ExecStop=${software_root}/zookeeper/apache-zookeeper-${zookeeper_version}-bin/bin/zkServer.sh start
+RemainAfterExit=yes
+
+[Install]
+WantedBy=multi-user.target
+
diff --git a/contrib/terraform-testing-infrastructure/modules/configure-nodes/main.tf b/contrib/terraform-testing-infrastructure/modules/configure-nodes/main.tf
new file mode 100644
index 0000000..016d821
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/configure-nodes/main.tf
@@ -0,0 +1,57 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+variable "software_root" {}
+variable "upload_host" {}
+variable "accumulo_instance_name" {}
+variable "accumulo_root_password" {}
+
+#####################################################
+# Run the install software script on the first node #
+#####################################################
+
+#
+# This connects to the first node and runs the install_sw.sh script.
+#
+resource "null_resource" "configure_manager_node" {
+ connection {
+ type = "ssh"
+ host = var.upload_host
+ user = "hadoop"
+ }
+ provisioner "remote-exec" {
+ inline = [<<-EOT
+ set -eo pipefail
+
+ # Put local bashrc/bash_profile in place and source it before doing anything else.
+ cp ${var.software_root}/conf/hadoop_bash_profile /home/hadoop/.bash_profile
+ cp ${var.software_root}/conf/hadoop_bashrc /home/hadoop/.bashrc
+ source /home/hadoop/.bash_profile
+
+ # Update the hosts and genders files across the cluster. This applies changes
+ # locally first, then uses pdcp/pdsh to apply them across the cluster.
+ /usr/local/bin/update-hosts-genders.sh ${var.software_root}/conf/hosts ${var.software_root}/conf/genders
+ # Now that genders is set up properly, we can use it to copy the hadoop .bashrc and .bash_profile out.
+ pdcp -g worker /home/hadoop/.bashrc /home/hadoop/.bash_profile /home/hadoop/.
+
+ bash ${var.software_root}/conf/install_sw.sh
+ bash ${var.software_root}/conf/initialize-hadoop.sh
+ bash -l ${var.software_root}/conf/initialize-accumulo.sh "${var.accumulo_instance_name}" "${var.accumulo_root_password}"
+ EOT
+ ]
+ }
+}
diff --git a/contrib/terraform-testing-infrastructure/modules/upload-software/main.tf b/contrib/terraform-testing-infrastructure/modules/upload-software/main.tf
new file mode 100644
index 0000000..5df890f
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/modules/upload-software/main.tf
@@ -0,0 +1,33 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+variable "local_sources_dir" {}
+variable "upload_dir" {}
+variable "upload_host" {}
+
+resource "null_resource" "upload_software" {
+ count = var.local_sources_dir == "" ? 0 : 1
+ connection {
+ type = "ssh"
+ host = var.upload_host
+ user = "hadoop"
+ }
+ provisioner "file" {
+ source = var.local_sources_dir
+ destination = "${var.upload_dir}/sources"
+ }
+}
diff --git a/contrib/terraform-testing-infrastructure/shared_state/aws/main.tf b/contrib/terraform-testing-infrastructure/shared_state/aws/main.tf
new file mode 100644
index 0000000..7a8d3e9
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/shared_state/aws/main.tf
@@ -0,0 +1,65 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+terraform {
+ required_providers {
+ aws = {
+ source = "hashicorp/aws"
+ version = "~> 3.68.0"
+ }
+ }
+}
+
+provider "aws" {
+ region = var.region
+}
+
+resource "aws_s3_bucket" "terraform_state" {
+ bucket = var.bucket
+ acl = var.bucket_acl
+ force_destroy = var.bucket_force_destroy
+ # Enable versioning so we can see the full revision history of our
+ # state files
+ versioning {
+ enabled = true
+ }
+ # Enable server-side encryption by default
+ server_side_encryption_configuration {
+ rule {
+ apply_server_side_encryption_by_default {
+ sse_algorithm = "AES256"
+ }
+ }
+ }
+ tags = {
+ Name = "accumulo-testing-tf-state"
+ }
+}
+
+resource "aws_dynamodb_table" "terraform_locks" {
+ name = var.dynamodb_table_name
+ billing_mode = "PAY_PER_REQUEST"
+ hash_key = "LockID"
+ attribute {
+ name = "LockID"
+ type = "S"
+ }
+}
+
+output "bucket_name" {
+ value = aws_s3_bucket.terraform_state.id
+}
diff --git a/contrib/terraform-testing-infrastructure/shared_state/aws/variables.tf b/contrib/terraform-testing-infrastructure/shared_state/aws/variables.tf
new file mode 100644
index 0000000..a80e644
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/shared_state/aws/variables.tf
@@ -0,0 +1,50 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+variable "bucket" {
+ default = ""
+ type = string
+ description = "S3 bucket name for storing shared state. If not supplied, a name will be generated."
+}
+
+variable "bucket_acl" {
+ default = "private"
+ type = string
+ description = "The ACL to use for the S3 bucket. Defaults to private."
+ validation {
+ condition = contains(["private", "public-read", "public-read-write", "aws-exec-read", "authenticated-read", "log-delivery-write"], var.bucket_acl)
+ error_message = "The value of bucket_acl must be one of private, public-read, public-read-write, aws-exec-read, authenticated-read, or log-delivery-write."
+ }
+}
+
+variable "bucket_force_destroy" {
+ default = false
+ type = bool
+ description = "If true, upon terraform destroy, the bucket will be deleted even if it is not empty."
+}
+
+variable "region" {
+ type = string
+ default = "us-east-1"
+ description = "AWS region to use for S3 bucket."
+}
+
+variable "dynamodb_table_name" {
+ default = "accumulo-testing-tf-locks"
+ type = string
+ description = "DynamoDB table name for storing shared state."
+}
diff --git a/contrib/terraform-testing-infrastructure/shared_state/azure/main.tf b/contrib/terraform-testing-infrastructure/shared_state/azure/main.tf
new file mode 100644
index 0000000..e49d40d
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/shared_state/azure/main.tf
@@ -0,0 +1,49 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+terraform {
+ required_providers {
+ azurerm = {
+ source = "hashicorp/azurerm"
+ version = "~> 2.91.0"
+ }
+ }
+}
+
+provider "azurerm" {
+ features {}
+}
+
+resource "azurerm_resource_group" "rg" {
+ name = var.resource_group_name
+ location = var.location
+}
+
+resource "azurerm_storage_account" "sa" {
+ name = var.storage_account_name
+ resource_group_name = azurerm_resource_group.rg.name
+ location = var.location
+ account_tier = "Standard"
+ account_replication_type = "LRS"
+ account_kind = "StorageV2"
+}
+
+resource "azurerm_storage_container" "example" {
+ name = var.storage_container_name
+ storage_account_name = azurerm_storage_account.sa.name
+ container_access_type = "private"
+}
diff --git a/contrib/terraform-testing-infrastructure/shared_state/azure/variables.tf b/contrib/terraform-testing-infrastructure/shared_state/azure/variables.tf
new file mode 100644
index 0000000..c367243
--- /dev/null
+++ b/contrib/terraform-testing-infrastructure/shared_state/azure/variables.tf
@@ -0,0 +1,47 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+variable "location" {
+ default = "eastus"
+ type = string
+ description = "The region where resources will be deployed."
+}
+
+variable "resource_group_name" {
+ default = "accumulo-testing-tf-state"
+ type = string
+ description = "Name of the resource group that holds the shared state storage account."
+}
+
+variable "storage_account_name" {
+ default = "accumulotesttfsteast"
+ type = string
+ description = "Name of the storage account that will hold shared state."
+ validation {
+ condition = can(regex("^[a-z0-9]{3,24}$", var.storage_account_name))
+ error_message = "The storage_account_name variable name must be letters and numbers and be 3-24 characters in length."
+ }
+}
+
+variable "storage_container_name" {
+ default = "accumulo-testing-tf-state"
+ type = string
+ validation {
+ condition = can(regex("^[-a-z0-9]{3,63}$", var.storage_container_name))
+ error_message = "The storage_container_name variable name must be letters and numbers and be 3-63 characters in length."
+ }
+}
diff --git a/pom.xml b/pom.xml
index c0a78bc..783ba1f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -252,6 +252,12 @@
<excludes combine.children="append">
<exclude>test/bench/lib/data/*</exclude>
<exclude>test/compat/japi-compliance/*</exclude>
+ <exclude>contrib/terraform-testing-infrastructure/**/terraform.tfstate*</exclude>
+ <exclude>contrib/terraform-testing-infrastructure/**/*.tftpl</exclude>
+ <exclude>contrib/terraform-testing-infrastructure/**/.terraform/**</exclude>
+ <exclude>contrib/terraform-testing-infrastructure/**/.terraform*</exclude>
+ <exclude>contrib/terraform-testing-infrastructure/**/*auto.tfvars*</exclude>
+ <exclude>contrib/terraform-testing-infrastructure/modules/config-files/**</exclude>
</excludes>
</configuration>
<executions>