You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2017/01/20 21:11:11 UTC
[1/2] kudu git commit: [kudu-jepsen] Kudu Jepsen tests
Repository: kudu
Updated Branches:
refs/heads/master e8381bb44 -> fb3bbbc8c
[kudu-jepsen] Kudu Jepsen tests
This patch contains David's code for the initial kudu-jepsen tests
as it was before KUDU-798 was resolved (i.e. as it was when it was
failing) and additional updates/fixes:
* Extra nemeses for the read/write register linearizability test
* Run multiple test scenarios in the scope of the register test
* Starting up master server: wait for the catalog manager
* Other assorted fixes for more robust operation
The clojure code is integrated into the Kudu maven build and is compiled
along with the other projects in a separate 'jepsen' profile.
The patch also adds functionality to run the kudu-jepsen tests
from the clojure-maven-plugin. The test uses the build machine
as the Jepsen control node, running the control logic and the Kudu
Java client there.
Restrictions:
1. Kudu nodes should run recent Debian/Ubuntu Linux distro
(that's due to the internal Jepsen's restrictions).
2. The 'kudu-master', 'kudu-tserver' and 'kudu' binaries in
the $KUDU_HOME/build/latest/bin should be built for the OS/distro
running on the Kudu cluster nodes.
Change-Id: I590c6e78840304b3131666c7037ff9a08dc77dea
Reviewed-on: http://gerrit.cloudera.org:8080/5492
Tested-by: Kudu Jenkins
Reviewed-by: David Ribeiro Alves <dr...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/6985a544
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/6985a544
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/6985a544
Branch: refs/heads/master
Commit: 6985a544342014e29fe2615ddcb02ccdf4f8bd39
Parents: e8381bb
Author: Alexey Serbin <as...@cloudera.com>
Authored: Thu Jan 5 00:33:22 2017 -0800
Committer: David Ribeiro Alves <dr...@apache.org>
Committed: Fri Jan 20 21:05:28 2017 +0000
----------------------------------------------------------------------
java/kudu-jepsen/.gitignore | 24 ++
java/kudu-jepsen/README.adoc | 164 ++++++++
java/kudu-jepsen/pom.xml | 152 +++++++
java/kudu-jepsen/resources/kudu.flags | 21 +
java/kudu-jepsen/resources/ntp.conf.common | 30 ++
java/kudu-jepsen/resources/ntp.conf.server | 3 +
.../src/main/clojure/jepsen/kudu.clj | 104 +++++
.../src/main/clojure/jepsen/kudu/client.clj | 95 +++++
.../src/main/clojure/jepsen/kudu/nemesis.clj | 132 ++++++
.../src/main/clojure/jepsen/kudu/register.clj | 93 ++++
.../src/main/clojure/jepsen/kudu/table.clj | 107 +++++
.../src/main/clojure/jepsen/kudu/util.clj | 421 +++++++++++++++++++
.../src/test/clojure/jepsen/kudu_test.clj | 84 ++++
java/kudu-jepsen/src/utils/kudu_test_runner.clj | 115 +++++
java/pom.xml | 12 +
15 files changed, 1557 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/.gitignore
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/.gitignore b/java/kudu-jepsen/.gitignore
new file mode 100644
index 0000000..2125254
--- /dev/null
+++ b/java/kudu-jepsen/.gitignore
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Ignore files containing information on prior Leiningen runs/sessions.
+.lein-failures
+.lein-repl-history
+# Ignore jepsen files generated by local test runs.
+store/
+# Ignore the result of processing README.adoc with the asciidoctor tool.
+README.html
http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/README.adoc
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/README.adoc b/java/kudu-jepsen/README.adoc
new file mode 100644
index 0000000..4c32428
--- /dev/null
+++ b/java/kudu-jepsen/README.adoc
@@ -0,0 +1,164 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+= jepsen.kudu
+
+:author: Kudu Team
+
+A link:http://clojure.org[Clojure] library designed to run
+link:http://kudu.apache.org[Apache Kudu] consistency tests using
+the link:https://aphyr.com/tags/Jepsen[Jepsen] framework. Curently, a simple
+linearizability test for read/write register is implemented and run
+for several fault injection scenarios.
+
+== Prerequisites and Requirements
+=== Operating System Requirements
+Only Debian/Ubuntu Linux is supported as a platform for the master and tablet
+server nodes. Tested to work on Debian 8 Jessie.
+
+== Overview
+The Clojure code is integrated into the project using the
+link:https://github.com/talios/clojure-maven-plugin[clojure-maven-plugin].
+The kudu-jepsen tests are invoked by executing the `clojure:run`
+plugin-specific goal. The parameters are passed via the standard
+`-D<property>=<value>` notation. There is a dedicated Clojure wrapper script
+`kudu_test_runner.clj` in `$KUDU_HOME/java/kudu-jepsen/src/utils` which
+populates the test environment with appropriate properties and iteratively
+runs all the registered tests with different nemeses scenarios.
+
+== Usage
+=== Building
+To build the library the following components are required:
+
+* JDK 8
+* Apache Maven version 3.3.6 or higher
+
+To build the project, run in the parent directory (i.e. `$KUDU_HOME/java`)
+[listing]
+----
+$ mvn clean compile test-compile -Pjepsen
+----
+
+=== Running
+The machines for Kudu master and tserver nodes should be created prior
+to running the test: the tests does not create those itself. The machines should
+be up and running when starting the test.
+
+To run the test, the following components are required at the control node:
+
+* JDK 8
+* Apache Maven version 3.3.6 or higher
+* SSH client (and optionally, SSH authentication agent)
+* gnuplot (to visualize test results)
+
+Jepsen uses SSH to perform operations at DB nodes. The kudu-jepsen assumes
+that SSH keys are installed accordingly:
+
+* The public part of the SSH key should be added into the `authorized_keys` file
+ at all DB nodes for the `root` user
+* For the SSH private key the options are:
+** Add the key to the SSH authentication agent running at the control node
+** Specify the path to the file with the key in plain (non-encrypted) format
+ via the `sshKeyPath` property.
+
+If using SSH authentication agent to hold the SSH key for DB nodes access,
+run in the current directory:
+[listing]
+----
+$ mvn clojure:run -DtserverNodes="t0,t1,t2,t3,t4" -DmasterNodes="m0"
+----
+
+If not using SSH authentication agent, specify the SSH key location via the
+`sshKeyPath` property:
+[listing]
+----
+$ mvn clojure:run -DtserverNodes="t0,t1,t2,t3,t4" -DmasterNodes="m0" -DsshKeyPath="./vm_root_id_rsa"
+----
+
+Note that commas (not spaces) are used to separate the names of the nodes. The
+DNS resolver should be properly configured to resolve the specified hostnames
+into IP addresses.
+
+The `tserverNodes` property is used to specify the set of nodes where to run
+Kudu tablet servers. The `masterNodes` property is used to specify the set of
+nodes to run Kudu master servers.
+
+In the Jepsen terminology, Kudu master and tserver nodes are playing
+*Jepsen DB node* roles. The machine where the above mentioned maven command
+is run plays *Jepsen control node* role.
+
+=== Troubleshooting
+The majority of the kudu-jepsen test failures can be put into two classification
+buckets:
+
+* An error happened while setting up the testing environment, contacting
+ machines at the Kudu cluster, starting up Kudu server-side components, etc.
+* The Jepsen's analysis detected inconsistent history of operations.
+
+The former class of failures might be a manifestation of wrong configuration,
+a problem with the test environment or a bug in the test code itself.
+Those issues manifest themselves in messages like the following:
+[listing]
+----
+21:41:42 Ran 10 tests containing 10 assertions.
+21:41:42 0 failures, 10 errors.
+----
+To get more details, take a closer look at the output of `mvn clojure:run`
+or at particular `jepsen.log` files under
+$KUDU_HOME/java/kudu-jepsen/store/rw-register/<test_timestamp> directories.
+
+The latter class of failures represents more serious issues: manifestations
+of non-linearizable history of operations. If Jepsen finds a such an
+inconsistency, it outputs something like the the following into the log:
+[listing]
+----
+Analysis invalid! (\uff89\u0ca5\u76ca\u0ca5\uff09\uff89 \u253b\u2501\u253b
+----
+To troubleshoot, first it's necessary to find where the failed test stores
+the results: it should be one of the timestamp-named sub-directories
+(e.g. `20170109T071938.000-0800`) under
+`$KUDU_HOME/java/kudu-jepsen/store/rw-register`. One of the possible ways
+to find the directory:
+[listing]
+----
+$ cd $KUDU_HOME/java/kudu-jepsen/store/rw-register
+$ find . -name jepsen.log | xargs grep 'Analysis invalid'
+./20170109T071938.000-0800/jepsen.log:Analysis invalid! (\uff89\u0ca5\u76ca\u0ca5\uff09\uff89 \u253b\u2501\u253b
+$
+----
+Another way is to find sub-directories where the `linear.svg` file is present:
+[listing]
+----
+$ cd $KUDU_HOME/java/kudu-jepsen/store/rw-register
+$ find . -name linear.svg
+./20170109T071938.000-0800/linear.svg
+$
+----
+Along with `jepsen.log` and `history.txt` files the failed test generates
+`linear.svg` file (gnuplot is required for that). The diagram in `linear.svg`
+illustrates the part of the history which Jepsen found inconsistent:
+the diagram shows the time/client operation status/system state relationship
+and the sequences of legal/illegal operations paths. From this point, the next
+step is to locate the corresponding part of the history in the `history.txt`
+file. Usually the problem appears around an activation interval of the test
+nemesis scenario. Once found, it's possible to tie the vicinity of the
+inconsistent operation sequence with the timestamps in the `jepsen.log` file.
+Having the timestamps of the operations and their sequence, it's possible to
+find relative messages in `kudu-tserver.log` and `kudu-master.log` log files
+in sub-directories named as Kudu cluster nodes. Hopefully, that information
+is enough to create a reproducible scenario for further troubleshooting
+and debugging.
http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/pom.xml
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/pom.xml b/java/kudu-jepsen/pom.xml
new file mode 100644
index 0000000..b443f4c
--- /dev/null
+++ b/java/kudu-jepsen/pom.xml
@@ -0,0 +1,152 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.kudu</groupId>
+ <artifactId>kudu-parent</artifactId>
+ <version>1.3.0-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>kudu-jepsen</artifactId>
+ <name>Kudu Jepsen Tests</name>
+
+ <properties>
+ <!-- Jepsen tests require specific infrastructure and do not run as part of the
+ regular java tests.-->
+ <skipTests>true</skipTests>
+ <clojure.version>1.8.0</clojure.version>
+ <jepsen.version>0.1.3</jepsen.version>
+ <clojure.maven.plugin.version>1.7.1</clojure.maven.plugin.version>
+ <!-- List of Kudu Master nodes (e.g. "m0" or "m0,m1,m2") -->
+ <masterNodes>m0</masterNodes>
+ <!-- List of Kudu Tablet Server nodes (e.g. "t0,t1,t2") -->
+ <tserverNodes>t0,t1,t2,t3,t4</tserverNodes>
+ <!-- Path to the SSH key to access Kudu Master and
+ Tablet Server nodes as the 'root' user. If left empty, the test
+ will try to use keys from the SSH agent, if any. -->
+ <sshKeyPath></sshKeyPath>
+ <!-- Number of iterations to run the test suite in cycle. -->
+ <iterNum>1</iterNum>
+ </properties>
+
+ <packaging>clojure</packaging>
+
+ <repositories>
+ <repository>
+ <id>clojars</id>
+ <url>http://clojars.org/repo/</url>
+ </repository>
+ </repositories>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.clojure</groupId>
+ <artifactId>clojure</artifactId>
+ <version>${clojure.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>jepsen</groupId>
+ <artifactId>jepsen</artifactId>
+ <version>${jepsen.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.clojure</groupId>
+ <artifactId>tools.cli</artifactId>
+ <version>0.3.5</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.kudu</groupId>
+ <artifactId>kudu-client</artifactId>
+ <version>${project.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.kudu</groupId>
+ <artifactId>kudu-client</artifactId>
+ <version>${project.version}</version>
+ <type>test-jar</type>
+ <!-- Jepsen imports its own slf4j-->
+ <exclusions>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.kudu</groupId>
+ <artifactId>interface-annotations</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>com.theoryinpractise</groupId>
+ <artifactId>clojure-maven-plugin</artifactId>
+ <version>${clojure.maven.plugin.version}</version>
+ <!-- Load maven extensions (like type and packaging handlers) for
+ the clojure maven plugin-->
+ <extensions>true</extensions>
+ <configuration>
+ <script>src/utils/kudu_test_runner.clj</script>
+ <args>
+ --masters=${masterNodes}
+ --tservers=${tserverNodes}
+ --ssh-key-path=${sshKeyPath}
+ --iter-num=${iterNum}
+ </args>
+ </configuration>
+ </plugin>
+ <plugin>
+ <!-- Do not run the checkstyle plugin for clojure sources -->
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-checkstyle-plugin</artifactId>
+ <version>2.17</version>
+ <executions>
+ <execution>
+ <id>validate</id>
+ <phase>none</phase>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ <resources>
+ <resource>
+ <directory>resources</directory>
+ <filtering>false</filtering>
+ <includes>
+ <include>kudu.flags</include>
+ <include>ntp.conf.common</include>
+ <include>ntp.conf.server</include>
+ </includes>
+ </resource>
+ </resources>
+ </build>
+</project>
http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/resources/kudu.flags
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/resources/kudu.flags b/java/kudu-jepsen/resources/kudu.flags
new file mode 100644
index 0000000..0890740
--- /dev/null
+++ b/java/kudu-jepsen/resources/kudu.flags
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This overrides all flags in a flag file
+--unlock_experimental_flags
+--unlock_unsafe_flags
+--logtostderr
http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/resources/ntp.conf.common
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/resources/ntp.conf.common b/java/kudu-jepsen/resources/ntp.conf.common
new file mode 100644
index 0000000..6543eb1
--- /dev/null
+++ b/java/kudu-jepsen/resources/ntp.conf.common
@@ -0,0 +1,30 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+tinker panic 0
+enable kernel
+enable ntp
+enable stats
+statistics loopstats peerstats clockstats sysstats
+filegen loopstats file loopstats type day enable
+filegen peerstats file peerstats type day enable
+filegen clockstats file clockstats type day enable
+filegen sysstats file sysstats type day enable
+logconfig =syncall +clockall +sysall +peerall
+logfile /var/log/ntpd.log
+statsdir /var/log/ntpstats/
+driftfile /var/lib/ntp/ntp.drift
http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/resources/ntp.conf.server
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/resources/ntp.conf.server b/java/kudu-jepsen/resources/ntp.conf.server
new file mode 100644
index 0000000..aeac471
--- /dev/null
+++ b/java/kudu-jepsen/resources/ntp.conf.server
@@ -0,0 +1,3 @@
+enable calibrate
+server 127.127.1.0 burst iburst minpoll 4 maxpoll 4
+fudge 127.127.1.0 stratum 10
http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/src/main/clojure/jepsen/kudu.clj
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/src/main/clojure/jepsen/kudu.clj b/java/kudu-jepsen/src/main/clojure/jepsen/kudu.clj
new file mode 100644
index 0000000..a871c63
--- /dev/null
+++ b/java/kudu-jepsen/src/main/clojure/jepsen/kudu.clj
@@ -0,0 +1,104 @@
+;; Licensed to the Apache Software Foundation (ASF) under one
+;; or more contributor license agreements. See the NOTICE file
+;; distributed with this work for additional information
+;; regarding copyright ownership. The ASF licenses this file
+;; to you under the Apache License, Version 2.0 (the
+;; "License"); you may not use this file except in compliance
+;; with the License. You may obtain a copy of the License at
+;;
+;; http://www.apache.org/licenses/LICENSE-2.0
+;;
+;; Unless required by applicable law or agreed to in writing,
+;; software distributed under the License is distributed on an
+;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+;; KIND, either express or implied. See the License for the
+;; specific language governing permissions and limitations
+;; under the License.
+
+(ns jepsen.kudu
+ "Tests for Apache Kudu"
+ (:require [clojure.tools.logging :refer :all]
+ [clojure.java.io :as io]
+ [clojure.java.shell :refer [sh]]
+ [clojure.string :as str]
+ [clojure.pprint :refer [pprint]]
+ [jepsen
+ [control :as c :refer [|]]
+ [db :as db]
+ [net :as net]
+ [tests :as tests]
+ [util :as util :refer [meh]]]
+ [jepsen.control.net :as cnet :refer [heal]]
+ [jepsen.control.util :as cu]
+ [jepsen.os.debian :as debian]
+ [jepsen.kudu.nemesis :as kn]
+ [jepsen.kudu.util :as ku]))
+
+(defn db
+ "The setup/teardown procedure for a Kudu node. A node can run either
+ a master or a tablet server."
+ []
+ (reify db/DB
+ (setup! [_ test node]
+ (info node "Setting up environment")
+ (c/su
+ ;; Restore the network. This is to clean-up left-overs from prior
+ ;; nemesis-induced grudges.
+ (meh (cnet/heal))
+
+ (c/exec :service :rsyslog :start)
+
+ (ku/prepare-node test node)
+ (ku/sync-time test node)
+ (ku/start-kudu test node))
+ (info node "Kudu ready"))
+
+ (teardown! [_ test node]
+ (info node "Tearing down Kudu")
+ (c/su
+ (when (.contains (:tservers test) node)
+ (ku/stop-kudu-tserver test node))
+ (when (.contains (:masters test) node)
+ (ku/stop-kudu-master test node)))
+ ;; TODO collect table data for debugging
+ (info node "Kudu stopped"))
+
+ db/LogFiles
+ (log-files [_ test node]
+ (cond-> []
+ (.contains (:tservers test) node) (conj ku/kudu-tserver-log-file)
+ (.contains (:masters test) node) (conj ku/kudu-master-log-file)))))
+
+
+(defn merge-options
+ "Merges the common options for all Kudu tests with the specific options
+ set on the test itself. This does not include 'db' or 'nodes'."
+ [opts]
+ (let [default-opts {:os debian/os
+ :net net/iptables
+ :db (db)
+ ;; The list of nodes that will run tablet servers.
+ :tservers [:n1 :n2 :n3 :n4 :n5]
+ ;; The list of nodes that will run the kudu master.
+ :masters [:m1]
+ :table-name
+ (str (:name opts) "-" (System/currentTimeMillis))
+ :ts-hb-interval-ms 1000
+ :ts-hb-max-failures-before-backoff 3
+ :ts-raft-hb-interval-ms 50
+ :ranges []}
+
+ custom-opts (merge default-opts opts)
+
+ derived-opts {:master-addresses
+ (ku/concatenate-addresses ku/master-rpc-port
+ (:masters custom-opts))
+ :nodes (vec (concat (:tservers custom-opts)
+ (:masters custom-opts)))}]
+ (merge custom-opts derived-opts)))
+
+;; Common setup for all kudu tests.
+(defn kudu-test
+ "Sets up the test parameters."
+ [opts]
+ (merge-options opts))
http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/src/main/clojure/jepsen/kudu/client.clj
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/src/main/clojure/jepsen/kudu/client.clj b/java/kudu-jepsen/src/main/clojure/jepsen/kudu/client.clj
new file mode 100644
index 0000000..31ab90c
--- /dev/null
+++ b/java/kudu-jepsen/src/main/clojure/jepsen/kudu/client.clj
@@ -0,0 +1,95 @@
+;; Licensed to the Apache Software Foundation (ASF) under one
+;; or more contributor license agreements. See the NOTICE file
+;; distributed with this work for additional information
+;; regarding copyright ownership. The ASF licenses this file
+;; to you under the Apache License, Version 2.0 (the
+;; "License"); you may not use this file except in compliance
+;; with the License. You may obtain a copy of the License at
+;;
+;; http://www.apache.org/licenses/LICENSE-2.0
+;;
+;; Unless required by applicable law or agreed to in writing,
+;; software distributed under the License is distributed on an
+;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+;; KIND, either express or implied. See the License for the
+;; specific language governing permissions and limitations
+;; under the License.
+
+(ns jepsen.kudu.client
+ "Thin wrappers around Kudu Java client."
+ (:require [clojure.tools.logging :refer :all]
+ [clojure.pprint :refer [pprint]])
+ (:import [org.apache.kudu ColumnSchema
+ ColumnSchema$ColumnSchemaBuilder
+ Schema
+ Type])
+ (:import [org.apache.kudu.client AbstractKuduScannerBuilder
+ AsyncKuduScanner$ReadMode
+ BaseKuduTest
+ CreateTableOptions
+ KuduClient
+ KuduClient$KuduClientBuilder
+ KuduPredicate
+ KuduPredicate$ComparisonOp
+ KuduScanner
+ KuduSession
+ KuduTable
+ OperationResponse
+ PartialRow
+ RowResult
+ RowResultIterator]))
+
+(defn sync-client
+ "Builds and returns a new synchronous Kudu client."
+ [master-addresses]
+ (let [builder (new KuduClient$KuduClientBuilder master-addresses)
+ client (. builder build)]
+ client))
+
+(defn close-client
+ [sync-client]
+ (try (.close sync-client) (catch Exception e (info "Error closing client: " e))))
+
+(defn column-schema
+ ([name type] (column-schema name type false))
+ ([name type key?]
+ (-> (new ColumnSchema$ColumnSchemaBuilder name, type) (.key key?) .build)))
+
+(defn create-table
+ [sync-client name schema options]
+ (.createTable sync-client name schema options))
+
+(defn open-table
+ [sync-client name]
+ (.openTable sync-client name))
+
+(defn rr->tuple
+ "Transforms a RowResult into a tuple."
+ [row-result]
+ (let [columns (-> row-result .getSchema .getColumns)]
+ (into {}
+ (for [[idx column] (map-indexed vector columns)]
+ (let [name (.getName column)
+ type (.getType column)
+ value (condp = type
+ Type/INT8 (.getByte row-result idx)
+ Type/INT16 (.getShort row-result idx)
+ Type/INT32 (.getInt row-result idx)
+ Type/INT64 (.getLong row-result idx)
+ Type/BINARY (.getBinaryCopy row-result idx)
+ Type/STRING (.getString row-result idx)
+ Type/BOOL (.getBoolean row-result idx)
+ Type/FLOAT (.getFloat row-result idx)
+ Type/DOUBLE (.getDouble row-result idx)
+ Type/UNIXTIME_MICROS (.getLong row-result idx))]
+ {(keyword name) value})))))
+
+(defn drain-scanner-to-tuples
+ "Drains a scanner to a vector of tuples."
+ [scanner]
+ (let [result (atom [])]
+ (while (.hasMoreRows scanner)
+ (let [rr-iter (.nextRows scanner)]
+ (while (.hasNext rr-iter)
+ (swap! result conj (rr->tuple (.next rr-iter))))))
+ @result))
http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/src/main/clojure/jepsen/kudu/nemesis.clj
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/src/main/clojure/jepsen/kudu/nemesis.clj b/java/kudu-jepsen/src/main/clojure/jepsen/kudu/nemesis.clj
new file mode 100644
index 0000000..ca106e0
--- /dev/null
+++ b/java/kudu-jepsen/src/main/clojure/jepsen/kudu/nemesis.clj
@@ -0,0 +1,132 @@
+;; Licensed to the Apache Software Foundation (ASF) under one
+;; or more contributor license agreements. See the NOTICE file
+;; distributed with this work for additional information
+;; regarding copyright ownership. The ASF licenses this file
+;; to you under the Apache License, Version 2.0 (the
+;; "License"); you may not use this file except in compliance
+;; with the License. You may obtain a copy of the License at
+;;
+;; http://www.apache.org/licenses/LICENSE-2.0
+;;
+;; Unless required by applicable law or agreed to in writing,
+;; software distributed under the License is distributed on an
+;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+;; KIND, either express or implied. See the License for the
+;; specific language governing permissions and limitations
+;; under the License.
+
+(ns jepsen.kudu.nemesis
+ "Nemeses for Apache Kudu."
+ (:refer-clojure :exclude [test])
+ (:require [jepsen
+ [client :as client]
+ [control :as c]
+ [nemesis :as nm]
+ [net :as net]
+ [util :as util]]
+ [clojure.tools.logging :refer :all]
+ [jepsen.kudu.util :as ku]))
+
+
+(defn tserver-partitioner
+ "Tablet server partitioner: cut network links between tablet servers
+ in response to :start operation: cut network links as defined by
+ (grudge nodes), and restore them back in response to :stop operation."
+ [grudge]
+ (reify client/Client
+ (setup! [this test _]
+ (net/heal! (:net test) test)
+ this)
+
+ (invoke! [this test op]
+ (case (:f op)
+ :start (let [grudge (grudge (:tservers test))]
+ (nm/partition! test grudge)
+ (assoc op :value (str "Cut off " (pr-str grudge))))
+ :stop (do (net/heal! (:net test) test)
+ (assoc op :value "fully connected"))))
+
+ (teardown! [this test]
+ (net/heal! (:net test) test))))
+
+
+(defn tserver-start-stopper
+ "Takes a targeting function which, given a list of nodes, returns a single
+ node or collection of nodes to affect, and two functions `(start! test node)`
+ invoked on nemesis start, and `(stop! test node)` invoked on nemesis stop.
+ Returns a nemesis which responds to :start and :stop by running the start!
+ and stop! fns on each of the given nodes. During `start!` and `stop!`, binds
+ the `jepsen.control` session to the given node, so you can just call `(c/exec
+ ...)`.
+
+ Re-selects a fresh node (or nodes) for each start--if targeter returns nil,
+ skips the start. The return values from the start and stop fns will become
+ the :values of the returned :info operations from the nemesis, e.g.:
+
+ {:value {:n1 [:killed \"java\"]}}"
+ [targeter start! stop!]
+ (let [nodes (atom nil)]
+ (reify client/Client
+ (setup! [this test _] this)
+
+ (invoke! [this test op]
+ (locking nodes
+ (assoc op :type :info, :value
+ (case (:f op)
+ :start (if-let [ns (-> test :tservers targeter util/coll)]
+ (if (compare-and-set! nodes nil ns)
+ (c/on-many ns (start! test c/*host*))
+ (str "nemesis already disrupting "
+ (pr-str @nodes)))
+ :no-target)
+ :stop (if-let [ns @nodes]
+ (let [value (c/on-many ns (stop! test c/*host*))]
+ (reset! nodes nil)
+ value)
+ :not-started)))))
+
+ (teardown! [this test]))))
+
+
+(defn tserver-partition-random-halves
+ "Cuts the tablet servers' network into randomly chosen halves."
+ []
+ (tserver-partitioner (comp nm/complete-grudge nm/bisect shuffle)))
+
+
+(defn tserver-partition-majorities-ring
+ "A grudge in which every tablet server can see a majority, but no server sees
+ the *same* majority as any other."
+ []
+ (tserver-partitioner nm/majorities-ring))
+
+
+(defn kill-restart-tserver
+ "Responds to `{:f :start}` by sending SIGKILL to the tablet server on a given
+ node, and when `{:f :stop}` arrives, re-starts the specified tablet server.
+ Picks the node(s) using `(targeter list-of-nodes)`. Targeter may return
+ either a single node or a collection of nodes."
+ ([targeter]
+ (tserver-start-stopper targeter
+ (fn start [t n]
+ (c/su (c/exec :killall :-s :SIGKILL :kudu-tserver))
+ [:killed :kudu-tserver])
+ (fn stop [t n]
+ (ku/start-kudu-tserver t n)
+ [:started :kudu-tserver]))))
+
+(defn tserver-hammer-time
+ "Responds to `{:f :start}` by pausing the tablet server name on a given node
+ using SIGSTOP, and when `{:f :stop}` arrives, resumes it with SIGCONT.
+ Picks the node(s) to pause using `(targeter list-of-nodes)`, which defaults
+ to `rand-nth`. Targeter may return either a single node or a collection
+ of nodes."
+ ([] (tserver-hammer-time rand-nth))
+ ([targeter]
+ (tserver-start-stopper targeter
+ (fn start [t n]
+ (c/su (c/exec :killall :-s "STOP" :kudu-tserver))
+ [:paused :kudu-tserver])
+ (fn stop [t n]
+ (c/su (c/exec :killall :-s "CONT" :kudu-tserver))
+ [:resumed :kudu-tserver]))))
http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/src/main/clojure/jepsen/kudu/register.clj
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/src/main/clojure/jepsen/kudu/register.clj b/java/kudu-jepsen/src/main/clojure/jepsen/kudu/register.clj
new file mode 100644
index 0000000..b0753ba
--- /dev/null
+++ b/java/kudu-jepsen/src/main/clojure/jepsen/kudu/register.clj
@@ -0,0 +1,93 @@
+;; Licensed to the Apache Software Foundation (ASF) under one
+;; or more contributor license agreements. See the NOTICE file
+;; distributed with this work for additional information
+;; regarding copyright ownership. The ASF licenses this file
+;; to you under the Apache License, Version 2.0 (the
+;; "License"); you may not use this file except in compliance
+;; with the License. You may obtain a copy of the License at
+;;
+;; http://www.apache.org/licenses/LICENSE-2.0
+;;
+;; Unless required by applicable law or agreed to in writing,
+;; software distributed under the License is distributed on an
+;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+;; KIND, either express or implied. See the License for the
+;; specific language governing permissions and limitations
+;; under the License.
+
+(ns jepsen.kudu.register
+ "Simple linearizability test for a read/write register."
+ (:refer-clojure :exclude [test])
+ (:require [jepsen
+ [kudu :as kudu]
+ [client :as client]
+ [util :refer [majority]]
+ [checker :as checker]
+ [generator :as gen]
+ [nemesis :as nemesis]]
+ [jepsen.kudu.client :as kc]
+ [jepsen.kudu.table :as kt]
+ [jepsen.kudu.nemesis :as kn]
+ [clojure.tools.logging :refer :all]
+ [knossos.model :as model]))
+
+(def register-key "x")
+
+(defn r [_ _] {:type :invoke, :f :read, :value nil})
+(defn w [_ _] {:type :invoke, :f :write, :value (rand-int 10)})
+
+(defn client
+ [table-created? kclient ktable]
+ (reify client/Client
+ (setup! [_ test _]
+ ;; Create the client and create/open the table.
+ (let [kclient (kc/sync-client (:master-addresses test))
+ table-name (:table-name test)
+ ktable (locking table-created?
+ (when (compare-and-set! table-created? false true)
+ (kc/create-table
+ kclient
+ table-name
+ kt/kv-table-schema
+ (let [ranges (:table-ranges test)
+ rep-factor (:num-replicas test)]
+ (if (nil? ranges)
+ (kt/kv-table-options-hash
+ rep-factor (count (:tservers test)))
+ (kt/kv-table-options-range
+ rep-factor ranges)))))
+ (kc/open-table kclient table-name))]
+ (client table-created? kclient ktable)))
+
+ (invoke! [_ _ op]
+ (case (:f op)
+ :read (assoc op :type :ok,
+ :value (kt/kv-read kclient ktable register-key))
+ :write (do (kt/kv-write kclient ktable register-key (:value op))
+ (assoc op :type :ok))))
+
+ (teardown! [_ _]
+ (kc/close-client kclient))))
+
+(defn register-test
+ [opts]
+ (kudu/kudu-test
+ (merge
+ {:name "rw-register"
+ :client (client (atom false) nil nil)
+ :concurrency 10
+ :num-replicas 5
+ :nemesis nemesis/noop
+ :model (model/register)
+ :generator (->> (gen/reserve 5 (gen/mix [w r]) r)
+ (gen/stagger 1/3)
+ (gen/nemesis
+ (gen/seq (cycle [(gen/sleep 5)
+ {:type :info, :f :start}
+ (gen/sleep 5)
+ {:type :info, :f :stop}])))
+ (gen/time-limit 60))
+ :checker (checker/compose
+ {:perf (checker/perf)
+ :linear checker/linearizable})}
+ opts)))
http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/src/main/clojure/jepsen/kudu/table.clj
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/src/main/clojure/jepsen/kudu/table.clj b/java/kudu-jepsen/src/main/clojure/jepsen/kudu/table.clj
new file mode 100644
index 0000000..5338dad
--- /dev/null
+++ b/java/kudu-jepsen/src/main/clojure/jepsen/kudu/table.clj
@@ -0,0 +1,107 @@
+;; Licensed to the Apache Software Foundation (ASF) under one
+;; or more contributor license agreements. See the NOTICE file
+;; distributed with this work for additional information
+;; regarding copyright ownership. The ASF licenses this file
+;; to you under the Apache License, Version 2.0 (the
+;; "License"); you may not use this file except in compliance
+;; with the License. You may obtain a copy of the License at
+;;
+;; http://www.apache.org/licenses/LICENSE-2.0
+;;
+;; Unless required by applicable law or agreed to in writing,
+;; software distributed under the License is distributed on an
+;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+;; KIND, either express or implied. See the License for the
+;; specific language governing permissions and limitations
+;; under the License.
+
+(ns jepsen.kudu.table
+ "Utilities to work with kudu tables, for testing."
+ (:require [clojure.tools.logging :refer :all]
+ [clojure.pprint :refer [pprint]]
+ [jepsen.kudu.client :as c])
+ (:import [org.apache.kudu ColumnSchema
+ ColumnSchema$ColumnSchemaBuilder
+ Schema
+ Type])
+ (:import [org.apache.kudu.client AbstractKuduScannerBuilder
+ AsyncKuduScanner$ReadMode
+ BaseKuduTest
+ CreateTableOptions
+ KuduClient
+ KuduClient$KuduClientBuilder
+ KuduPredicate
+ KuduPredicate$ComparisonOp
+ KuduScanner
+ KuduSession
+ KuduTable
+ OperationResponse
+ PartialRow
+ RowResult
+ RowResultIterator
+ Upsert]))
+;;
+;; KV Table utilities
+;;
+
+;; Creates a basic schema for a Key/Value table where the key is a string and
+;; the value is an int.
+(def kv-table-schema
+ (new Schema [(c/column-schema "key" Type/STRING true)
+ (c/column-schema "value" Type/INT32 false)]))
+
+
+(defn kv-table-options-range
+ "Returns options to create a K/V table with partitions on 'ranges'.
+ Ranges should be a vector of [start, end) keys. The resulting table
+ has (count ranges) tablets with the exact coverage set on the ranges.
+ The resulting table has the specified replication factor."
+ [num-replicas ranges]
+ (let [options (new CreateTableOptions)]
+ (.setRangePartitionColumns options ["key"])
+ (.setNumReplicas options num-replicas)
+ (doseq [range ranges]
+ (let [lower (.newPartialRow kv-table-schema)
+ upper (.newPartialRow kv-table-schema)]
+ (.addString lower "key" (get range 0))
+ (.addString upper "key" (get range 1))
+ (.addRangePartition options lower upper)))
+ options))
+
+
+(defn kv-table-options-hash
+ "Returns options to create a K/V table with key column hash partitioned
+ into the given number of buckets. The resulting table has the specified
+ replication factor."
+ [num-replicas buckets-num]
+ (let [options (new CreateTableOptions)]
+ (.setRangePartitionColumns options ["key"])
+ (.setNumReplicas options num-replicas)
+ (.addHashPartitions options ["key"] buckets-num)
+ options))
+
+
+(defn kv-write
+ "Upsert a row on a KV table."
+ [sync-client table key value]
+ (let [upsert (.newUpsert table)
+ row (.getRow upsert)]
+ (.addString row "key" key)
+ (.addInt row "value" (int value))
+ (let [response (.apply (.newSession sync-client) upsert)]
+ (assert (not (.hasRowError response)) (str "Got a row error: " response)))))
+
+(defn kv-read
+ "Read the value associated with key."
+ [sync-client table key]
+ (let [scanner-builder (.newScannerBuilder sync-client table)
+ predicate (KuduPredicate/newComparisonPredicate (c/column-schema "key" Type/STRING)
+ KuduPredicate$ComparisonOp/EQUAL
+ key)]
+ (.readMode scanner-builder AsyncKuduScanner$ReadMode/READ_AT_SNAPSHOT)
+ (.addPredicate scanner-builder predicate)
+ (let [rows (c/drain-scanner-to-tuples (.build scanner-builder))]
+ (case (count rows)
+ 0 nil
+ 1 (:value (get rows 0))
+ (assert false (str "Expected 0 or 1 rows. Got: " (count rows)))))))
http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/src/main/clojure/jepsen/kudu/util.clj
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/src/main/clojure/jepsen/kudu/util.clj b/java/kudu-jepsen/src/main/clojure/jepsen/kudu/util.clj
new file mode 100644
index 0000000..2e21794
--- /dev/null
+++ b/java/kudu-jepsen/src/main/clojure/jepsen/kudu/util.clj
@@ -0,0 +1,421 @@
+;; Licensed to the Apache Software Foundation (ASF) under one
+;; or more contributor license agreements. See the NOTICE file
+;; distributed with this work for additional information
+;; regarding copyright ownership. The ASF licenses this file
+;; to you under the Apache License, Version 2.0 (the
+;; "License"); you may not use this file except in compliance
+;; with the License. You may obtain a copy of the License at
+;;
+;; http://www.apache.org/licenses/LICENSE-2.0
+;;
+;; Unless required by applicable law or agreed to in writing,
+;; software distributed under the License is distributed on an
+;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+;; KIND, either express or implied. See the License for the
+;; specific language governing permissions and limitations
+;; under the License.
+
+(ns jepsen.kudu.util
+ "Utilities for Apache Kudu jepsen tests"
+ (:require [clojure.tools.logging :refer :all]
+ [clojure.java.io :as io]
+ [clojure.string :as str]
+ [jepsen
+ [control :as c :refer [|]]
+ [util :as util :refer [meh]]]
+ [jepsen.control.util :as cu]
+ [jepsen.os.debian :as debian]))
+
+(defn path
+ "Returns the filesystem path for the path components joined with Unix path
+ separator."
+ [& components]
+ (str/join "/" components))
+
+;; TODO(aserbin): make it possible to set the version via a run-time option.
+;;
+;; The empty string corresponds to the latest snapshot from the main trunk.
+;; To run against some other branch, set to "<major>.<minor>.<patch>";
+;; e.g. set "1.2.0" to run against packages built for Kudu 1.2.0 release.
+(def kudu-pkg-version "")
+
+(def kudu-repo-url
+ (str "http://repos.jenkins.cloudera.com/kudu" kudu-pkg-version
+ "-nightly/debian/jessie/amd64/kudu"))
+(def kudu-repo-name "kudu-nightly")
+(def kudu-repo-apt-line (str "deb " kudu-repo-url " jessie-kudu contrib"))
+(def kudu-required-packages
+ "The set of the required system packages (more are installed by dependency)."
+ [:libsasl2-modules
+ :libsasl2-modules-gssapi-mit
+ :lsb-release
+ :ntp
+ :openssl])
+(def kudu-master-pkg :kudu-master)
+(def kudu-tserver-pkg :kudu-tserver)
+
+(def kudu-build-dir "../../build/latest")
+
+(def kudu-conf-dir "/etc/kudu/conf")
+(def kudu-master-gflagfile (path kudu-conf-dir "master.gflagfile"))
+(def kudu-tserver-gflagfile (path kudu-conf-dir "tserver.gflagfile"))
+(def kudu-target-bin-dir "/opt/local/bin")
+(def kudu-target-sbin-dir "/opt/local/sbin")
+(def kudu-uname "kudu")
+(def kudu-uid 999)
+(def kudu-gname "kudu")
+(def kudu-gid 999)
+(def kudu-home-dir "/var/lib/kudu")
+(def kudu-master-home-dir (path kudu-home-dir "master"))
+(def kudu-tserver-home-dir (path kudu-home-dir "tserver"))
+(def kudu-log-dir "/var/log/kudu")
+(def kudu-master-log-file (path kudu-log-dir "kudu-master.log"))
+(def kudu-tserver-log-file (path kudu-log-dir "kudu-tserver.log"))
+(def kudu-run-dir "/var/run/kudu")
+(def kudu-master-pid-file (path kudu-run-dir "kudu-master-kudu.pid"))
+(def kudu-tserver-pid-file (path kudu-run-dir "kudu-tserver-kudu.pid"))
+
+(def master-rpc-port 7051)
+(def tserver-rpc-port 7050)
+
+(defn kudu-cli
+ "Returns path to the Kudu CLI tool or just binary name, if it's appropriate
+ to rely on the PATH environment variable."
+ [test node]
+ (if (:use-packages? test)
+ "kudu" ;; relying on the standard PATH env variable
+ (path kudu-target-bin-dir "kudu")))
+
+
+(defn concatenate-addresses
+ "Returns a list of the addresses in form 'h0:port,h1:port,h2:port'
+ given a port and list of hostnames."
+ [port hosts]
+ (str/join "," (map #(str (name %) ":" (str port)) hosts)))
+
+
+(defn group-exist?
+ "If the specified group exists?"
+ [group-name]
+ (try (c/exec :egrep (str "^" group-name) "/etc/group")
+ true
+ (catch RuntimeException _ false)))
+
+
+(defn user-exist?
+ "If the specified user exists?"
+ [user-name]
+ (try (c/exec :id user-name)
+ true
+ (catch RuntimeException _ false)))
+
+
+(defn ntp-in-sync?
+ "Is the NTP server in sync state? This function should be called in the
+ context of already established SSH session at the node."
+ []
+ (try (c/exec :ntp-wait :-n1 :-s1)
+ true
+ (catch RuntimeException _ false)))
+
+
+(defn kudu-master-in-service?
+ "Is the Kudu master process at the specified node in service already?
+ This function should be called in the context of already established SSH
+ session at the node."
+ [test node]
+ (try (c/exec (kudu-cli test node) :table :list node)
+ true
+ (catch RuntimeException _ false)))
+
+
+(defn kudu-master-see-tservers?
+ "Whether the Kudu master sees the specified number of tablet servers."
+ [test node tservers-count]
+ (let [pattern (str "Fetched info from all "
+ (str tservers-count)" Tablet Servers")]
+ (try (c/exec (kudu-cli test node) :cluster :ksck node | :grep pattern)
+ true
+ (catch RuntimeException _ false))))
+
+
+(defn kudu-tserver-in-service?
+ "Is the Kudu tserver process at the specified node is up and running?
+ This function should be called in the context of already established SSH
+ session at the node."
+ [test node]
+ (try (c/exec (kudu-cli test node) :tserver :status node)
+ true
+ (catch RuntimeException _ false)))
+
+
+(defn start-kudu-master
+ "Start Kudu master daemon at the specified node. This function should
+ be called in the super-user context (jepsen.control/su)."
+ [test node]
+ (info node "Starting Kudu Master")
+ (let [use-svc-scripts? (:use-packages? test)]
+ (if use-svc-scripts?
+ (c/exec :service :kudu-master :start)
+ (c/exec :sudo :-u kudu-uname :start-stop-daemon
+ :--start
+ :--background
+ :--make-pidfile
+ :--pidfile kudu-master-pid-file
+ :--chdir kudu-home-dir
+ :--no-close
+ :--oknodo
+ :--exec (path kudu-target-sbin-dir "kudu-master")
+ :--
+ :--flagfile kudu-master-gflagfile
+ :>> kudu-master-log-file (c/lit "2>&1"))))
+
+ ;; Wait for master services avaiable (awaiting for the catalog manager).
+ (loop [iteration 0]
+ (when-not (kudu-master-in-service? test node)
+ (if (> iteration 100)
+ (c/exec :echo "timeout waiting for master server to start" (c/lit ";")
+ :false)
+ (do
+ (Thread/sleep 500)
+ (recur (inc iteration))))))
+
+ ;; Wait until the master sees all tservers in the cluster. Otherwise
+ ;; the client would not be able to create a table with the desired
+ ;; replication factor when not all tservers have registered.
+ (let [tservers-count (count (:tservers test))]
+ (loop [iteration 0]
+ (when-not (kudu-master-see-tservers? test node tservers-count)
+ (if (> iteration 200)
+ (c/exec :echo "timeout waiting for all tservers to start" (c/lit ";")
+ :false)
+ (do
+ (Thread/sleep 500)
+ (recur (inc iteration))))))))
+
+
+(defn stop-kudu-master
+ "Stop Kudu master daemon at the specified node. This function should be
+ called in the super-user context (jepsen.control/su)."
+ [test node]
+ (info node "Stopping Kudu Master")
+ (let [use-svc-scripts? (:use-packages? test)]
+ (if use-svc-scripts?
+ (meh (c/exec :service :kudu-master :stop))
+ (cu/stop-daemon! "kudu-master" kudu-master-pid-file))))
+
+
+(defn start-kudu-tserver
+ "Start Kudu tablet server daemon at the specified node. This function
+ should be called in the super-user context (jepsen.control/su)."
+ [test node]
+ (info node "Starting Kudu Tablet Server")
+ (let [use-svc-scripts? (:use-packages? test)]
+ (if use-svc-scripts?
+ (c/exec :service :kudu-tserver :start)
+ (c/exec :sudo :-u kudu-uname :start-stop-daemon
+ :--start
+ :--background
+ :--make-pidfile
+ :--pidfile kudu-tserver-pid-file
+ :--chdir kudu-home-dir
+ :--no-close
+ :--oknodo
+ :--exec (path kudu-target-sbin-dir "kudu-tserver")
+ :--
+ :--flagfile kudu-tserver-gflagfile
+ :>> kudu-tserver-log-file (c/lit "2>&1"))))
+
+ ;; Wait for the tablet server to become on-line.
+ (loop [iteration 0]
+ (when-not (kudu-tserver-in-service? test node)
+ (if (> iteration 100)
+ (c/exec :echo "timeout waiting for tablet server to start" (c/lit ";")
+ :false)
+ (do
+ (Thread/sleep 500)
+ (recur (inc iteration)))))))
+
+
+(defn stop-kudu-tserver
+ "Stops Kudu Tablet Server on the specified node."
+ [test node]
+ (info node "Stopping Kudu Tablet Server")
+ (let [use-svc-scripts? (:use-packages? test)]
+ (if use-svc-scripts?
+ (meh (c/exec :service :kudu-tserver :stop))
+ (cu/stop-daemon! "kudu-tserver" kudu-tserver-pid-file))))
+
+
+(defn kudu-cfg-master
+ "Returns Kudu master flags file contents."
+ [test]
+ (let [data-path kudu-master-home-dir
+ flags [(str "--fs_wal_dir=" data-path)
+ (str "--fs_data_dirs=" data-path)
+ (str "--log_dir=" kudu-log-dir)
+ (str "--rpc_bind_addresses=0.0.0.0:" (str master-rpc-port))]]
+ ;; Only set the master addresses when there is more than one master
+ (str/join "\n"
+ (if (> (count (:masters test)) 1)
+ (conj flags (str "--master_addresses="
+ (concatenate-addresses master-rpc-port
+ (:masters test))))
+ flags))))
+
+
+(defn kudu-cfg-tserver
+ "Returns Kudu tserver flags file contents."
+ [test]
+ (let [data-path kudu-tserver-home-dir
+ flags [(str "--fs_wal_dir=" data-path)
+ (str "--fs_data_dirs=" data-path)
+ (str "--log_dir=" kudu-log-dir)
+ (str "--rpc_bind_addresses=0.0.0.0:" (str tserver-rpc-port))
+ (str "--heartbeat_interval_ms="
+ (str (:ts-hb-interval-ms test)))
+ (str "--raft_heartbeat_interval_ms="
+ (str (:ts-raft-hb-interval-ms test)))
+ (str "--heartbeat_max_failures_before_backoff="
+ (str (:ts-hb-max-failures-before-backoff test)))]]
+ (str/join "\n" (conj flags (str "--tserver_master_addrs="
+ (concatenate-addresses master-rpc-port
+ (:masters test)))))))
+
+
+(defn ntp-server-config
+ "Returns ntp.conf contents for Kudu master node."
+ []
+ (let [common-opts (slurp (io/resource "ntp.conf.common"))
+ server-opts (slurp (io/resource "ntp.conf.server"))]
+ (str common-opts "\n" server-opts)))
+
+
+(defn ntp-slave-config
+ "Returns ntp.conf contents for Kudu tserver node."
+ [servers]
+ (let [common-opts (slurp (io/resource "ntp.conf.common"))
+ server-lines (map #(str "server " (name %)
+ " burst iburst prefer minpoll 4 maxpoll 4")
+ servers)]
+ (str common-opts "\n" (str/join "\n" server-lines))))
+
+
+(defn prepare-node-with-pkgs
+ "Prepare a Kudu node: install Kudu using packages."
+ [test node]
+ (let [repo-file (str "/etc/apt/sources.list.d/"
+ (name kudu-repo-name) ".list")]
+ (when-not (cu/exists? repo-file)
+ (info node "Adding " kudu-repo-name " package repositoy")
+ (debian/add-repo! kudu-repo-name kudu-repo-apt-line)
+ (info node "Fetching " kudu-repo-name " package key")
+ (c/exec :curl :-fLSs (str kudu-repo-url "/" "archive.key") |
+ :apt-key :add :-)
+ (info node "Updating package index")
+ (debian/update!)))
+
+ (when (.contains (:masters test) node)
+ (when-not (debian/installed? kudu-master-pkg)
+ (info node "Installing kudu-master package")
+ (debian/install kudu-master-pkg)))
+ (when (.contains (:tservers test) node)
+ (when-not (debian/installed? kudu-tserver-pkg)
+ (info node "Installing kudu-tserver package")
+ (debian/install kudu-tserver-pkg))))
+
+
+(defn prepare-node-with-binaries
+ "Prepare Kudu node: create the directory structure and place necessary
+ Kudu binaries at place."
+ [test node]
+
+ (when-not (group-exist? kudu-gname)
+ (c/exec :groupadd :-o :-g kudu-gid kudu-gname))
+ (when-not (user-exist? kudu-uname)
+ (c/exec :useradd :-o :-u kudu-uid :-g kudu-gname :-d kudu-home-dir
+ :-s "/usr/sbin/nologin" kudu-uname))
+
+ ;; Prepare directory structure for the files.
+ (c/exec :mkdir :-p kudu-conf-dir)
+ (when (.contains (:masters test) node)
+ (c/exec :mkdir :-p path kudu-master-home-dir))
+ (when (.contains (:tservers test) node)
+ (c/exec :mkdir :-p path kudu-tserver-home-dir))
+ (c/exec :chown :-R (str kudu-uname ":" kudu-gname) kudu-home-dir)
+
+ (c/exec :mkdir :-p kudu-run-dir)
+ (c/exec :chown :-R (str kudu-uname ":" kudu-gname) kudu-run-dir)
+
+ (c/exec :mkdir :-p kudu-log-dir)
+ (c/exec :chown :-R (str kudu-uname ":" kudu-gname) kudu-log-dir)
+
+ (c/exec :rm :-rf kudu-target-bin-dir)
+ (c/exec :mkdir :-p kudu-target-bin-dir)
+
+ (c/exec :rm :-rf kudu-target-sbin-dir)
+ (c/exec :mkdir :-p kudu-target-sbin-dir)
+
+ ;; Copy appropriate binaries to the node.
+ (when (.contains (:masters test) node)
+ (let [master-binary-src (path kudu-build-dir "bin" "kudu-master")
+ master-binary-dst (path kudu-target-sbin-dir "kudu-master")]
+ (c/upload master-binary-src kudu-target-sbin-dir)
+ (c/exec :chmod 755 master-binary-dst)))
+ (when (.contains (:tservers test) node)
+ (let [tserver-binary-src (path kudu-build-dir "bin" "kudu-tserver")
+ tserver-binary-dst (path kudu-target-sbin-dir "kudu-tserver")]
+ (c/upload tserver-binary-src kudu-target-sbin-dir)
+ (c/exec :chmod 755 tserver-binary-dst)))
+ (let [kudu-cli-binary-src (path kudu-build-dir "bin" "kudu")
+ kudu-cli-binary-dst (path kudu-target-bin-dir "kudu")]
+ (c/upload kudu-cli-binary-src kudu-target-bin-dir)
+ (c/exec :chmod 755 kudu-cli-binary-dst)))
+
+
+(defn prepare-node
+ "Prepare Kudu node using either packaged Kudu software or
+ assorted Kudu binaries for the server-side components."
+ [test node]
+ (when-not (debian/installed? kudu-required-packages)
+ (info node "Installing required packages")
+ (debian/install kudu-required-packages))
+
+
+ (if (:use-packages? test)
+ (prepare-node-with-pkgs test node)
+ (prepare-node-with-binaries test node))
+
+ (when (.contains (:masters test) node)
+ (c/exec :rm :-rf kudu-master-home-dir)
+ (c/exec :rm :-f kudu-master-log-file)
+ (c/exec :echo (str (slurp (io/resource "kudu.flags"))
+ "\n" (kudu-cfg-master test)) :> kudu-master-gflagfile))
+ (when (.contains (:tservers test) node)
+ (c/exec :rm :-rf kudu-tserver-home-dir)
+ (c/exec :rm :-f kudu-tserver-log-file)
+ (c/exec :echo (str (slurp (io/resource "kudu.flags"))
+ "\n" (kudu-cfg-tserver test)) :> kudu-tserver-gflagfile)))
+
+
+(defn sync-time
+ "When ntpd is not in synchronized state, revamps its configs and restarts
+ the ntpd daemon."
+ [test node]
+ (when-not (ntp-in-sync?)
+ (c/exec :service :ntp :stop "||" :true)
+ (c/exec :echo "NTPD_OPTS='-g -N'" :> "/etc/default/ntp")
+ (when (.contains (:masters test) node)
+ (c/exec :echo (ntp-server-config) :> "/etc/ntp.conf"))
+ (when (.contains (:tservers test) node)
+ (c/exec :echo (ntp-slave-config (:masters test)):> "/etc/ntp.conf"))
+ (c/exec :service :ntp :start)
+ ;; Wait for 5 minutes max for ntpd to get into synchronized state.
+ (c/exec :ntp-wait :-s1 :-n300)))
+
+
+(defn start-kudu
+ "Start Kudu services on the node."
+ [test node]
+ (when (.contains (:masters test) node) (start-kudu-master test node))
+ (when (.contains (:tservers test) node) (start-kudu-tserver test node)))
http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/src/test/clojure/jepsen/kudu_test.clj
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/src/test/clojure/jepsen/kudu_test.clj b/java/kudu-jepsen/src/test/clojure/jepsen/kudu_test.clj
new file mode 100644
index 0000000..d00c26a
--- /dev/null
+++ b/java/kudu-jepsen/src/test/clojure/jepsen/kudu_test.clj
@@ -0,0 +1,84 @@
+;; Licensed to the Apache Software Foundation (ASF) under one
+;; or more contributor license agreements. See the NOTICE file
+;; distributed with this work for additional information
+;; regarding copyright ownership. The ASF licenses this file
+;; to you under the Apache License, Version 2.0 (the
+;; "License"); you may not use this file except in compliance
+;; with the License. You may obtain a copy of the License at
+;;
+;; http://www.apache.org/licenses/LICENSE-2.0
+;;
+;; Unless required by applicable law or agreed to in writing,
+;; software distributed under the License is distributed on an
+;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+;; KIND, either express or implied. See the License for the
+;; specific language governing permissions and limitations
+;; under the License.
+
+(ns jepsen.kudu-test
+ (:require [clojure.test :refer :all]
+ [jepsen.core :as jepsen]
+ [jepsen.nemesis :as jn]
+ [jepsen.tests :as tests]
+ [jepsen.kudu :as kudu]
+ [jepsen.kudu.nemesis :as kn]
+ [jepsen.kudu.register :as kudu-register]))
+
+(defn check
+ [tcasefun opts]
+ (is (:valid? (:results (jepsen/run! (tcasefun opts))))))
+
+(defmacro dt
+ [tfun tsuffix topts]
+ (let [tname# (symbol (str (name tfun) "-" tsuffix))]
+ `(clojure.test/deftest ~tname# (check ~tfun ~topts))))
+
+(defn dt-func
+ [tfun tscenario topts]
+ `(dt ~tfun ~tscenario ~topts))
+
+(defmacro instantiate-tests
+ [tfun config topts]
+ (let [seqtfun# (reduce (fn [out _] (conj out tfun)) [] (eval config))
+ seqtscenario# (reduce (fn [out e]
+ (conj out (:scenario e))) [] (eval config))
+ seqtopts# (reduce (fn [out e]
+ (conj out (merge (eval topts)
+ {:nemesis (:nemesis e)})))
+ [] (eval config))]
+ `(do ~@(map dt-func seqtfun# seqtscenario# seqtopts#))))
+
+;; Configurations for tests. Every configuration corresponds to running
+;; a test with particular nemesis (let's call it "scenario").
+(def register-test kudu-register/register-test)
+(def register-test-configs
+ [
+ {:scenario "noop-nemesis"
+ :nemesis '((fn [] jn/noop))}
+ {:scenario "tserver-random-halves"
+ :nemesis '(kn/tserver-partition-random-halves)}
+ {:scenario "tserver-majorities-ring"
+ :nemesis '(kn/tserver-partition-majorities-ring)}
+ {:scenario "kill-restart-2-tservers"
+ :nemesis '(kn/kill-restart-tserver (comp (partial take 2) shuffle))}
+ {:scenario "kill-restart-3-tservers"
+ :nemesis '(kn/kill-restart-tserver (comp (partial take 3) shuffle))}
+ {:scenario "kill-restart-all-tservers"
+ :nemesis '(kn/kill-restart-tserver shuffle)}
+ {:scenario "all-random-halves"
+ :nemesis '(jn/partition-random-halves)}
+ {:scenario "all-majorities-ring"
+ :nemesis '(jn/partition-majorities-ring)}
+ {:scenario "hammer-2-tservers"
+ :nemesis '(kn/tserver-hammer-time (comp (partial take 2) shuffle))}
+ {:scenario "hammer-3-tservers"
+ :nemesis '(kn/tserver-hammer-time (comp (partial take 3) shuffle))}
+ {:scenario "hammer-all-tservers"
+ :nemesis '(kn/tserver-hammer-time shuffle)}
+ ])
+
+(defmacro instantiate-all-kudu-tests
+ [opts]
+ `(instantiate-tests register-test register-test-configs ~opts))
+
+(instantiate-all-kudu-tests {})
http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/src/utils/kudu_test_runner.clj
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/src/utils/kudu_test_runner.clj b/java/kudu-jepsen/src/utils/kudu_test_runner.clj
new file mode 100644
index 0000000..b43e3e6
--- /dev/null
+++ b/java/kudu-jepsen/src/utils/kudu_test_runner.clj
@@ -0,0 +1,115 @@
+;; Licensed to the Apache Software Foundation (ASF) under one
+;; or more contributor license agreements. See the NOTICE file
+;; distributed with this work for additional information
+;; regarding copyright ownership. The ASF licenses this file
+;; to you under the Apache License, Version 2.0 (the
+;; "License"); you may not use this file except in compliance
+;; with the License. You may obtain a copy of the License at
+;;
+;; http://www.apache.org/licenses/LICENSE-2.0
+;;
+;; Unless required by applicable law or agreed to in writing,
+;; software distributed under the License is distributed on an
+;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+;; KIND, either express or implied. See the License for the
+;; specific language governing permissions and limitations
+;; under the License.
+
+;;
+;; This is a starter script for maven-clojure-plugin. The script parses passed
+;; command-line arguments and starts Kudu jepsen tests with appropriate
+;; parameters.
+;;
+;; The script is invoked via calling 'mvn clojure:run'. The 'clojure:run' target
+;; is used instead of 'clojure:test' because the latter does not allow to
+;; pass necessary customization parameters for the tests.
+;;
+;; The script accepts the following command-line options:
+;; --masters=<list_of_kudu_master_hostnames>
+;; --tservers=<list_of_kudu_tserver_hostnames>
+;; --ssh-key-path=<path_to_private_ssh_key_to_login_into_kudu_nodes or empty>
+;; --iter-num=<number_of_iterations_to_run> (default is 1)
+;; The list of nodes/hostnames can be separated either by single space or comma.
+;;
+
+(ns jepsen.kudu-test-runner
+ "Run Kudu jepsen tests via clojure-maven-plugin on 'mvn clojure:run'"
+ (:require [clojure.tools.logging :refer :all]
+ [clojure.string :as string]
+ [clojure.test :refer [run-tests]]
+ [clojure.tools.cli :refer [parse-opts]]
+ [jepsen.nemesis :as jn]
+ [jepsen.control :as jc]
+ [jepsen.kudu.nemesis :as kn]
+ [jepsen.kudu-test :refer [instantiate-all-kudu-tests]]))
+
+(def parse-hostnames #(string/split % #"[, ]"))
+
+(defn parse-path
+ [input]
+ (def trimmed (string/trim input))
+ (if (= 0 (count trimmed)) nil trimmed))
+
+(def cli-options
+ [
+ [:long-opt "--masters"
+ :required "<nodes>"
+ :desc "Set of Kudu master nodes"
+ :missing "Kudu master nodes are missing"
+ :parse-fn parse-hostnames]
+ [:long-opt "--tservers"
+ :required "<nodes>"
+ :desc "Set of Kudu tserver nodes"
+ :missing "Kudu tserver nodes are missing"
+ :parse-fn parse-hostnames]
+ [:long-opt "--ssh-key-path"
+ :required "<path_to_private_ssh_key>"
+ :desc "Path to the SSH private key to login into the Kudu nodes.
+ If not specified or empty, keys are retrieved from SSH agent."
+ :missing "Path to the SSH private key is not specified, using SSH agent."
+ :parse-fn parse-path]
+ [:long-opt "--iter-num"
+ :required "<number_of_iterations>"
+ :default 1
+ :desc "Number of iterations to run the test suite in cycle."
+ :parse-fn #(Integer/parseInt %)]
+ ])
+
+(defn get-cmd-line-opts
+ []
+ (let [{:keys [options arguments errors summary]}
+ (parse-opts *command-line-args* cli-options)]
+ options))
+
+(do
+ (def cmd-line-opts (get-cmd-line-opts))
+ (def test-opts (dissoc cmd-line-opts :ssh-key-path :iter-num))
+ (def private-key-path (:ssh-key-path cmd-line-opts))
+ (def iter-num (:iter-num cmd-line-opts))
+ ;; Custom reporting for the tests.
+ (defmulti custom-report :type)
+ (def old-report clojure.test/report)
+ (defmethod custom-report :default [m]
+ (old-report m))
+ ;; Print the name of the test upon starting it.
+ (defmethod custom-report :begin-test-var [m]
+ (println (-> m :var meta :name)))
+ (println "Running" iter-num "iteration(s) of the test suite")
+ (println "Running with ssh key:" private-key-path)
+ (println "Running with test options:" test-opts)
+ (jepsen.kudu-test/instantiate-all-kudu-tests test-opts)
+ (binding [jc/*strict-host-key-checking* :no
+ jc/*private-key-path* private-key-path
+ clojure.test/report custom-report]
+ (loop [iteration 0]
+ (when (< iteration iter-num)
+ (let [summary (run-tests 'jepsen.kudu-test-runner)]
+ (when-not (= 0 (:fail summary))
+ (println "FAILURE: tests failed.")
+ (System/exit 1))
+ (when-not (= 0 (:error summary))
+ (println "ERROR: encountered errors while running the tests.")
+ (System/exit 1))
+ (println "SUCCESS: all tests passed; no errors.")
+ (recur (inc iteration)))))
+ (System/exit 0)))
http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/pom.xml
----------------------------------------------------------------------
diff --git a/java/pom.xml b/java/pom.xml
index 4a15908..cdc5153 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -305,5 +305,17 @@
<module>kudu-csd</module>
</modules>
</profile>
+
+ <!-- Build the jepsen test for Kudu.
+
+ Disabled by default since Java 8 and maven >= 3.3.6 is required to
+ build the kudu-jepsen artifact. To enable, add '-Pjepsen'
+ to the maven command line. -->
+ <profile>
+ <id>jepsen</id>
+ <modules>
+ <module>kudu-jepsen</module>
+ </modules>
+ </profile>
</profiles>
</project>
[2/2] kudu git commit: env_util: Factor out helper
DeleteExcessFilesByPattern()
Posted by ad...@apache.org.
env_util: Factor out helper DeleteExcessFilesByPattern()
The logic contained in here is generally useful for log rotation
purposes, and we can reuse it for minidump rotation in a later patch.
Change-Id: I6e76911b0a68f7c1397d93eb027b6a5c99e2fbed
Reviewed-on: http://gerrit.cloudera.org:8080/5740
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/fb3bbbc8
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/fb3bbbc8
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/fb3bbbc8
Branch: refs/heads/master
Commit: fb3bbbc8ce8527a63144b268cbad4aee92da4c1f
Parents: 6985a54
Author: Mike Percy <mp...@apache.org>
Authored: Wed Jan 18 17:49:05 2017 -0800
Committer: Adar Dembo <ad...@cloudera.com>
Committed: Fri Jan 20 21:10:47 2017 +0000
----------------------------------------------------------------------
src/kudu/util/env_util-test.cc | 44 ++++++++++++++++++++++++++++++++++---
src/kudu/util/env_util.cc | 31 ++++++++++++++++++++++++++
src/kudu/util/env_util.h | 7 ++++++
src/kudu/util/logging.cc | 25 ++-------------------
4 files changed, 81 insertions(+), 26 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/fb3bbbc8/src/kudu/util/env_util-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/env_util-test.cc b/src/kudu/util/env_util-test.cc
index 90b308d..7c79068 100644
--- a/src/kudu/util/env_util-test.cc
+++ b/src/kudu/util/env_util-test.cc
@@ -15,15 +15,20 @@
// specific language governing permissions and limitations
// under the License.
+#include <sys/statvfs.h>
+#include <sys/time.h>
#include <unistd.h>
-#include "kudu/util/env_util.h"
+#include <memory>
+#include <unordered_set>
#include <gflags/gflags.h>
-#include <memory>
-#include <sys/statvfs.h>
+#include <glog/stl_logging.h>
+#include "kudu/gutil/map-util.h"
#include "kudu/gutil/strings/substitute.h"
+#include "kudu/gutil/walltime.h"
+#include "kudu/util/env_util.h"
#include "kudu/util/path_util.h"
#include "kudu/util/test_macros.h"
#include "kudu/util/test_util.h"
@@ -33,6 +38,7 @@ DECLARE_int64(disk_reserved_bytes_free_for_testing);
using std::string;
using std::unique_ptr;
+using std::unordered_set;
using strings::Substitute;
namespace kudu {
@@ -108,5 +114,37 @@ TEST_F(EnvUtilTest, TestCreateDirsRecursively) {
ASSERT_TRUE(is_dir);
}
+// Ensure that DeleteExcessFilesByPattern() works.
+// We ensure that the number of files remaining after running it is the number
+// expected, and we manually set the modification times on the relevant files
+// to allow us to test that files are deleted oldest-first.
+TEST_F(EnvUtilTest, TestDeleteExcessFilesByPattern) {
+ string dir = JoinPathSegments(test_dir_, "excess");
+ ASSERT_OK(env_->CreateDir(dir));
+ vector<string> filenames = {"a", "b", "c", "d"};
+ int now_sec = GetCurrentTimeMicros() / 1000;
+ for (int i = 0; i < filenames.size(); i++) {
+ const string& filename = filenames[i];
+ string path = JoinPathSegments(dir, filename);
+ unique_ptr<WritableFile> file;
+ ASSERT_OK(env_->NewWritableFile(path, &file));
+ ASSERT_OK(file->Close());
+
+ // Set the last-modified time of the file.
+ struct timeval target_time { .tv_sec = now_sec + (i * 2), .tv_usec = 0 };
+ struct timeval times[2] = { target_time, target_time };
+ ASSERT_EQ(0, utimes(path.c_str(), times)) << errno;
+ }
+ vector<string> children;
+ ASSERT_OK(env_->GetChildren(dir, &children));
+ ASSERT_EQ(6, children.size()); // 4 files plus "." and "..".
+ ASSERT_OK(DeleteExcessFilesByPattern(env_, dir + "/*", 2));
+ ASSERT_OK(env_->GetChildren(dir, &children));
+ ASSERT_EQ(4, children.size()); // 2 files plus "." and "..".
+ unordered_set<string> children_set(children.begin(), children.end());
+ unordered_set<string> expected_set({".", "..", "c", "d"});
+ ASSERT_EQ(expected_set, children_set) << children;
+}
+
} // namespace env_util
} // namespace kudu
http://git-wip-us.apache.org/repos/asf/kudu/blob/fb3bbbc8/src/kudu/util/env_util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/env_util.cc b/src/kudu/util/env_util.cc
index e398831..178d391 100644
--- a/src/kudu/util/env_util.cc
+++ b/src/kudu/util/env_util.cc
@@ -236,6 +236,37 @@ Status CopyFile(Env* env, const string& source_path, const string& dest_path,
return Status::OK();
}
+Status DeleteExcessFilesByPattern(Env* env, const string& pattern, int max_matches) {
+ // Negative numbers don't make sense for our interface.
+ DCHECK_GE(max_matches, 0);
+
+ vector<string> matching_files;
+ RETURN_NOT_OK(env->Glob(pattern, &matching_files));
+
+ if (matching_files.size() <= max_matches) {
+ return Status::OK();
+ }
+
+ vector<pair<time_t, string>> matching_file_mtimes;
+ for (string& matching_file_path : matching_files) {
+ int64_t mtime;
+ RETURN_NOT_OK(env->GetFileModifiedTime(matching_file_path, &mtime));
+ matching_file_mtimes.emplace_back(mtime, std::move(matching_file_path));
+ }
+
+ // Use mtime to determine which matching files to delete. This could
+ // potentially be ambiguous, depending on the resolution of last-modified
+ // timestamp in the filesystem, but that is part of the contract.
+ std::sort(matching_file_mtimes.begin(), matching_file_mtimes.end());
+ matching_file_mtimes.resize(matching_file_mtimes.size() - max_matches);
+
+ for (const auto& matching_file : matching_file_mtimes) {
+ RETURN_NOT_OK(env->DeleteFile(matching_file.second));
+ }
+
+ return Status::OK();
+}
+
ScopedFileDeleter::ScopedFileDeleter(Env* env, std::string path)
: env_(DCHECK_NOTNULL(env)), path_(std::move(path)), should_delete_(true) {}
http://git-wip-us.apache.org/repos/asf/kudu/blob/fb3bbbc8/src/kudu/util/env_util.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/env_util.h b/src/kudu/util/env_util.h
index c54bfa8..884b17c 100644
--- a/src/kudu/util/env_util.h
+++ b/src/kudu/util/env_util.h
@@ -81,6 +81,13 @@ Status CreateDirsRecursively(Env* env, const std::string& path);
Status CopyFile(Env* env, const std::string& source_path, const std::string& dest_path,
WritableFileOptions opts);
+// Deletes files matching 'pattern' in excess of 'max_matches' files.
+// 'max_matches' must be greater than or equal to 0.
+// The oldest files are deleted first, as determined by last modified time.
+// In the case that multiple files have the same last modified time, it is not
+// defined which file will be deleted first.
+Status DeleteExcessFilesByPattern(Env* env, const std::string& pattern, int max_matches);
+
// Deletes a file or directory when this object goes out of scope.
//
// The deletion may be cancelled by calling .Cancel().
http://git-wip-us.apache.org/repos/asf/kudu/blob/fb3bbbc8/src/kudu/util/logging.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/logging.cc b/src/kudu/util/logging.cc
index 4438380..8332349 100644
--- a/src/kudu/util/logging.cc
+++ b/src/kudu/util/logging.cc
@@ -38,6 +38,7 @@
#include "kudu/util/debug-util.h"
#include "kudu/util/debug/leakcheck_disabler.h"
#include "kudu/util/env.h"
+#include "kudu/util/env_util.h"
#include "kudu/util/flag_tags.h"
#include "kudu/util/status.h"
@@ -359,23 +360,10 @@ Status DeleteExcessLogFiles(Env* env) {
if (max_log_files <= 0) return Status::OK();
for (int severity = 0; severity < google::NUM_SEVERITIES; ++severity) {
- vector<string> logfiles;
// Build glob pattern for input
// e.g. /var/log/kudu/kudu-master.*.INFO.*
string pattern = strings::Substitute("$0/$1.*.$2.*", FLAGS_log_dir, FLAGS_log_filename,
google::GetLogSeverityName(severity));
- RETURN_NOT_OK(env->Glob(pattern, &logfiles));
-
- if (logfiles.size() <= max_log_files) {
- continue;
- }
-
- vector<pair<time_t, string>> logfile_mtimes;
- for (string& logfile : logfiles) {
- int64_t mtime;
- RETURN_NOT_OK(env->GetFileModifiedTime(logfile, &mtime));
- logfile_mtimes.emplace_back(mtime, std::move(logfile));
- }
// Keep the 'max_log_files' most recent log files, as compared by
// modification time. Glog files contain a second-granularity timestamp in
@@ -383,16 +371,7 @@ Status DeleteExcessLogFiles(Env* env) {
// guaranteed by glob, however this code has been adapted from Impala which
// uses mtime to determine which files to delete, and there haven't been any
// issues in production settings.
- std::sort(logfile_mtimes.begin(), logfile_mtimes.end());
- logfile_mtimes.resize(logfile_mtimes.size() - max_log_files);
-
- VLOG(2) << "Deleting " << logfile_mtimes.size()
- << " excess glog files at " << google::GetLogSeverityName(severity)
- << " severity";
-
- for (const auto& logfile: logfile_mtimes) {
- RETURN_NOT_OK(env->DeleteFile(logfile.second));
- }
+ RETURN_NOT_OK(env_util::DeleteExcessFilesByPattern(env, pattern, max_log_files));
}
return Status::OK();
}