You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2017/01/20 21:11:11 UTC

[1/2] kudu git commit: [kudu-jepsen] Kudu Jepsen tests

Repository: kudu
Updated Branches:
  refs/heads/master e8381bb44 -> fb3bbbc8c


[kudu-jepsen] Kudu Jepsen tests

This patch contains David's code for the initial kudu-jepsen tests
as it was before KUDU-798 was resolved (i.e. as it was when it was
failing) and additional updates/fixes:
  * Extra nemeses for the read/write register linearizability test
  * Run multiple test scenarios in the scope of the register test
  * Starting up master server: wait for the catalog manager
  * Other assorted fixes for more robust operation

The clojure code is integrated into the Kudu maven build and is compiled
along with the other projects in a separate 'jepsen' profile.

The patch also adds functionality to run the kudu-jepsen tests
from the clojure-maven-plugin.  The test uses the build machine
as the Jepsen control node, running the control logic and the Kudu
Java client there.

Restrictions:
  1. Kudu nodes should run recent Debian/Ubuntu Linux distro
     (that's due to the internal Jepsen's restrictions).

  2. The 'kudu-master', 'kudu-tserver' and 'kudu' binaries in
     the $KUDU_HOME/build/latest/bin should be built for the OS/distro
     running on the Kudu cluster nodes.

Change-Id: I590c6e78840304b3131666c7037ff9a08dc77dea
Reviewed-on: http://gerrit.cloudera.org:8080/5492
Tested-by: Kudu Jenkins
Reviewed-by: David Ribeiro Alves <dr...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/6985a544
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/6985a544
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/6985a544

Branch: refs/heads/master
Commit: 6985a544342014e29fe2615ddcb02ccdf4f8bd39
Parents: e8381bb
Author: Alexey Serbin <as...@cloudera.com>
Authored: Thu Jan 5 00:33:22 2017 -0800
Committer: David Ribeiro Alves <dr...@apache.org>
Committed: Fri Jan 20 21:05:28 2017 +0000

----------------------------------------------------------------------
 java/kudu-jepsen/.gitignore                     |  24 ++
 java/kudu-jepsen/README.adoc                    | 164 ++++++++
 java/kudu-jepsen/pom.xml                        | 152 +++++++
 java/kudu-jepsen/resources/kudu.flags           |  21 +
 java/kudu-jepsen/resources/ntp.conf.common      |  30 ++
 java/kudu-jepsen/resources/ntp.conf.server      |   3 +
 .../src/main/clojure/jepsen/kudu.clj            | 104 +++++
 .../src/main/clojure/jepsen/kudu/client.clj     |  95 +++++
 .../src/main/clojure/jepsen/kudu/nemesis.clj    | 132 ++++++
 .../src/main/clojure/jepsen/kudu/register.clj   |  93 ++++
 .../src/main/clojure/jepsen/kudu/table.clj      | 107 +++++
 .../src/main/clojure/jepsen/kudu/util.clj       | 421 +++++++++++++++++++
 .../src/test/clojure/jepsen/kudu_test.clj       |  84 ++++
 java/kudu-jepsen/src/utils/kudu_test_runner.clj | 115 +++++
 java/pom.xml                                    |  12 +
 15 files changed, 1557 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/.gitignore
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/.gitignore b/java/kudu-jepsen/.gitignore
new file mode 100644
index 0000000..2125254
--- /dev/null
+++ b/java/kudu-jepsen/.gitignore
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Ignore files containing information on prior Leiningen runs/sessions.
+.lein-failures
+.lein-repl-history
+# Ignore jepsen files generated by local test runs.
+store/
+# Ignore the result of processing README.adoc with the asciidoctor tool.
+README.html

http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/README.adoc
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/README.adoc b/java/kudu-jepsen/README.adoc
new file mode 100644
index 0000000..4c32428
--- /dev/null
+++ b/java/kudu-jepsen/README.adoc
@@ -0,0 +1,164 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+= jepsen.kudu
+
+:author: Kudu Team
+
+A link:http://clojure.org[Clojure] library designed to run
+link:http://kudu.apache.org[Apache Kudu] consistency tests using
+the link:https://aphyr.com/tags/Jepsen[Jepsen] framework. Curently, a simple
+linearizability test for read/write register is implemented and run
+for several fault injection scenarios.
+
+== Prerequisites and Requirements
+=== Operating System Requirements
+Only Debian/Ubuntu Linux is supported as a platform for the master and tablet
+server nodes. Tested to work on Debian 8 Jessie.
+
+== Overview
+The Clojure code is integrated into the project using the
+link:https://github.com/talios/clojure-maven-plugin[clojure-maven-plugin].
+The kudu-jepsen tests are invoked by executing the `clojure:run`
+plugin-specific goal. The parameters are passed via the standard
+`-D<property>=<value>` notation. There is a dedicated Clojure wrapper script
+`kudu_test_runner.clj` in `$KUDU_HOME/java/kudu-jepsen/src/utils` which
+populates the test environment with appropriate properties and iteratively
+runs all the registered tests with different nemeses scenarios.
+
+== Usage
+=== Building
+To build the library the following components are required:
+
+* JDK 8
+* Apache Maven version 3.3.6 or higher
+
+To build the project, run in the parent directory (i.e. `$KUDU_HOME/java`)
+[listing]
+----
+$ mvn clean compile test-compile -Pjepsen
+----
+
+=== Running
+The machines for Kudu master and tserver nodes should be created prior
+to running the test: the tests does not create those itself. The machines should
+be up and running when starting the test.
+
+To run the test, the following components are required at the control node:
+
+* JDK 8
+* Apache Maven version 3.3.6 or higher
+* SSH client (and optionally, SSH authentication agent)
+* gnuplot (to visualize test results)
+
+Jepsen uses SSH to perform operations at DB nodes. The kudu-jepsen assumes
+that SSH keys are installed accordingly:
+
+* The public part of the SSH key should be added into the `authorized_keys` file
+  at all DB nodes for the `root` user
+* For the SSH private key the options are:
+** Add the key to the SSH authentication agent running at the control node
+** Specify the path to the file with the key in plain (non-encrypted) format
+   via the `sshKeyPath` property.
+
+If using SSH authentication agent to hold the SSH key for DB nodes access,
+run in the current directory:
+[listing]
+----
+$ mvn clojure:run -DtserverNodes="t0,t1,t2,t3,t4" -DmasterNodes="m0"
+----
+
+If not using SSH authentication agent, specify the SSH key location via the
+`sshKeyPath` property:
+[listing]
+----
+$ mvn clojure:run -DtserverNodes="t0,t1,t2,t3,t4" -DmasterNodes="m0" -DsshKeyPath="./vm_root_id_rsa"
+----
+
+Note that commas (not spaces) are used to separate the names of the nodes. The
+DNS resolver should be properly configured to resolve the specified hostnames
+into IP addresses.
+
+The `tserverNodes` property is used to specify the set of nodes where to run
+Kudu tablet servers. The `masterNodes` property is used to specify the set of
+nodes to run Kudu master servers.
+
+In the Jepsen terminology, Kudu master and tserver nodes are playing
+*Jepsen DB node* roles. The machine where the above mentioned maven command
+is run plays *Jepsen control node* role.
+
+=== Troubleshooting
+The majority of the kudu-jepsen test failures can be put into two classification
+buckets:
+
+* An error happened while setting up the testing environment, contacting
+  machines at the Kudu cluster, starting up Kudu server-side components, etc.
+* The Jepsen's analysis detected inconsistent history of operations.
+
+The former class of failures might be a manifestation of wrong configuration,
+a problem with the test environment or a bug in the test code itself.
+Those issues manifest themselves in messages like the following:
+[listing]
+----
+21:41:42 Ran 10  tests containing 10 assertions.
+21:41:42 0 failures, 10 errors.
+----
+To get more details, take a closer look at the output of `mvn clojure:run`
+or at particular `jepsen.log` files under
+$KUDU_HOME/java/kudu-jepsen/store/rw-register/<test_timestamp> directories.
+
+The latter class of failures represents more serious issues: manifestations
+of non-linearizable history of operations. If Jepsen finds a such an
+inconsistency, it outputs something like the the following into the log:
+[listing]
+----
+Analysis invalid! (\uff89\u0ca5\u76ca\u0ca5\uff09\uff89 \u253b\u2501\u253b
+----
+To troubleshoot, first it's necessary to find where the failed test stores
+the results: it should be one of the timestamp-named sub-directories
+(e.g. `20170109T071938.000-0800`) under
+`$KUDU_HOME/java/kudu-jepsen/store/rw-register`. One of the possible ways
+to find the directory:
+[listing]
+----
+$ cd $KUDU_HOME/java/kudu-jepsen/store/rw-register
+$ find . -name jepsen.log | xargs grep 'Analysis invalid'
+./20170109T071938.000-0800/jepsen.log:Analysis invalid! (\uff89\u0ca5\u76ca\u0ca5\uff09\uff89 \u253b\u2501\u253b
+$
+----
+Another way is to find sub-directories where the `linear.svg` file is present:
+[listing]
+----
+$ cd $KUDU_HOME/java/kudu-jepsen/store/rw-register
+$ find . -name linear.svg
+./20170109T071938.000-0800/linear.svg
+$
+----
+Along with `jepsen.log` and `history.txt` files the failed test generates
+`linear.svg` file (gnuplot is required for that). The diagram in `linear.svg`
+illustrates the part of the history which Jepsen found inconsistent:
+the diagram shows the time/client operation status/system state relationship
+and the sequences of legal/illegal operations paths. From this point, the next
+step is to locate the corresponding part of the history in the `history.txt`
+file. Usually the problem appears around an activation interval of the test
+nemesis scenario. Once found, it's possible to tie the vicinity of the
+inconsistent operation sequence with the timestamps in the `jepsen.log` file.
+Having the timestamps of the operations and their sequence, it's possible to
+find relative messages in `kudu-tserver.log` and `kudu-master.log` log files
+in sub-directories named as Kudu cluster nodes. Hopefully, that information
+is enough to create a reproducible scenario for further troubleshooting
+and debugging.

http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/pom.xml
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/pom.xml b/java/kudu-jepsen/pom.xml
new file mode 100644
index 0000000..b443f4c
--- /dev/null
+++ b/java/kudu-jepsen/pom.xml
@@ -0,0 +1,152 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.kudu</groupId>
+        <artifactId>kudu-parent</artifactId>
+        <version>1.3.0-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>kudu-jepsen</artifactId>
+    <name>Kudu Jepsen Tests</name>
+
+    <properties>
+        <!-- Jepsen tests require specific infrastructure and do not run as part of the
+             regular java tests.-->
+        <skipTests>true</skipTests>
+        <clojure.version>1.8.0</clojure.version>
+        <jepsen.version>0.1.3</jepsen.version>
+        <clojure.maven.plugin.version>1.7.1</clojure.maven.plugin.version>
+        <!-- List of Kudu Master nodes (e.g. "m0" or "m0,m1,m2") -->
+        <masterNodes>m0</masterNodes>
+        <!-- List of Kudu Tablet Server nodes (e.g. "t0,t1,t2") -->
+        <tserverNodes>t0,t1,t2,t3,t4</tserverNodes>
+        <!-- Path to the SSH key to access Kudu Master and
+             Tablet Server nodes as the 'root' user.  If left empty, the test
+             will try to use keys from the SSH agent, if any. -->
+        <sshKeyPath></sshKeyPath>
+        <!-- Number of iterations to run the test suite in cycle. -->
+        <iterNum>1</iterNum>
+    </properties>
+
+    <packaging>clojure</packaging>
+
+    <repositories>
+        <repository>
+            <id>clojars</id>
+            <url>http://clojars.org/repo/</url>
+        </repository>
+    </repositories>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.clojure</groupId>
+            <artifactId>clojure</artifactId>
+            <version>${clojure.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>jepsen</groupId>
+            <artifactId>jepsen</artifactId>
+            <version>${jepsen.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.clojure</groupId>
+            <artifactId>tools.cli</artifactId>
+            <version>0.3.5</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.kudu</groupId>
+            <artifactId>kudu-client</artifactId>
+            <version>${project.version}</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-api</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.kudu</groupId>
+            <artifactId>kudu-client</artifactId>
+            <version>${project.version}</version>
+            <type>test-jar</type>
+            <!-- Jepsen imports its own slf4j-->
+            <exclusions>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-api</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.kudu</groupId>
+            <artifactId>interface-annotations</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+
+    </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+              <groupId>com.theoryinpractise</groupId>
+              <artifactId>clojure-maven-plugin</artifactId>
+              <version>${clojure.maven.plugin.version}</version>
+              <!-- Load maven extensions (like type and packaging handlers) for
+                   the clojure maven plugin-->
+              <extensions>true</extensions>
+              <configuration>
+                <script>src/utils/kudu_test_runner.clj</script>
+                <args>
+                  --masters=${masterNodes}
+                  --tservers=${tserverNodes}
+                  --ssh-key-path=${sshKeyPath}
+                  --iter-num=${iterNum}
+                </args>
+              </configuration>
+            </plugin>
+            <plugin>
+              <!-- Do not run the checkstyle plugin for clojure sources -->
+              <groupId>org.apache.maven.plugins</groupId>
+              <artifactId>maven-checkstyle-plugin</artifactId>
+              <version>2.17</version>
+              <executions>
+                <execution>
+                  <id>validate</id>
+                  <phase>none</phase>
+                </execution>
+              </executions>
+            </plugin>
+        </plugins>
+        <resources>
+          <resource>
+            <directory>resources</directory>
+            <filtering>false</filtering>
+            <includes>
+              <include>kudu.flags</include>
+              <include>ntp.conf.common</include>
+              <include>ntp.conf.server</include>
+            </includes>
+          </resource>
+        </resources>
+    </build>
+</project>

http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/resources/kudu.flags
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/resources/kudu.flags b/java/kudu-jepsen/resources/kudu.flags
new file mode 100644
index 0000000..0890740
--- /dev/null
+++ b/java/kudu-jepsen/resources/kudu.flags
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This overrides all flags in a flag file
+--unlock_experimental_flags
+--unlock_unsafe_flags
+--logtostderr

http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/resources/ntp.conf.common
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/resources/ntp.conf.common b/java/kudu-jepsen/resources/ntp.conf.common
new file mode 100644
index 0000000..6543eb1
--- /dev/null
+++ b/java/kudu-jepsen/resources/ntp.conf.common
@@ -0,0 +1,30 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+tinker panic 0
+enable kernel
+enable ntp
+enable stats
+statistics loopstats peerstats clockstats sysstats
+filegen loopstats file loopstats type day enable
+filegen peerstats file peerstats type day enable
+filegen clockstats file clockstats type day enable
+filegen sysstats file sysstats type day enable
+logconfig =syncall +clockall +sysall +peerall
+logfile /var/log/ntpd.log
+statsdir /var/log/ntpstats/
+driftfile /var/lib/ntp/ntp.drift

http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/resources/ntp.conf.server
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/resources/ntp.conf.server b/java/kudu-jepsen/resources/ntp.conf.server
new file mode 100644
index 0000000..aeac471
--- /dev/null
+++ b/java/kudu-jepsen/resources/ntp.conf.server
@@ -0,0 +1,3 @@
+enable calibrate
+server 127.127.1.0 burst iburst minpoll 4 maxpoll 4
+fudge 127.127.1.0 stratum 10

http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/src/main/clojure/jepsen/kudu.clj
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/src/main/clojure/jepsen/kudu.clj b/java/kudu-jepsen/src/main/clojure/jepsen/kudu.clj
new file mode 100644
index 0000000..a871c63
--- /dev/null
+++ b/java/kudu-jepsen/src/main/clojure/jepsen/kudu.clj
@@ -0,0 +1,104 @@
+;; Licensed to the Apache Software Foundation (ASF) under one
+;; or more contributor license agreements. See the NOTICE file
+;; distributed with this work for additional information
+;; regarding copyright ownership. The ASF licenses this file
+;; to you under the Apache License, Version 2.0 (the
+;; "License"); you may not use this file except in compliance
+;; with the License. You may obtain a copy of the License at
+;;
+;;   http://www.apache.org/licenses/LICENSE-2.0
+;;
+;; Unless required by applicable law or agreed to in writing,
+;; software distributed under the License is distributed on an
+;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+;; KIND, either express or implied. See the License for the
+;; specific language governing permissions and limitations
+;; under the License.
+
+(ns jepsen.kudu
+  "Tests for Apache Kudu"
+  (:require [clojure.tools.logging :refer :all]
+            [clojure.java.io :as io]
+            [clojure.java.shell :refer [sh]]
+            [clojure.string :as str]
+            [clojure.pprint :refer [pprint]]
+            [jepsen
+             [control :as c :refer [|]]
+             [db :as db]
+             [net :as net]
+             [tests :as tests]
+             [util :as util :refer [meh]]]
+            [jepsen.control.net :as cnet :refer [heal]]
+            [jepsen.control.util :as cu]
+            [jepsen.os.debian :as debian]
+            [jepsen.kudu.nemesis :as kn]
+            [jepsen.kudu.util :as ku]))
+
+(defn db
+  "The setup/teardown procedure for a Kudu node.  A node can run either
+  a master or a tablet server."
+  []
+  (reify db/DB
+    (setup! [_ test node]
+      (info node "Setting up environment")
+      (c/su
+        ;; Restore the network.  This is to clean-up left-overs from prior
+        ;; nemesis-induced grudges.
+        (meh (cnet/heal))
+
+        (c/exec :service :rsyslog :start)
+
+        (ku/prepare-node test node)
+        (ku/sync-time test node)
+        (ku/start-kudu test node))
+      (info node "Kudu ready"))
+
+    (teardown! [_ test node]
+      (info node "Tearing down Kudu")
+      (c/su
+        (when (.contains (:tservers test) node)
+          (ku/stop-kudu-tserver test node))
+        (when (.contains (:masters test) node)
+          (ku/stop-kudu-master test node)))
+        ;; TODO collect table data for debugging
+      (info node "Kudu stopped"))
+
+    db/LogFiles
+    (log-files [_ test node]
+      (cond-> []
+        (.contains (:tservers test) node) (conj ku/kudu-tserver-log-file)
+        (.contains (:masters test) node) (conj ku/kudu-master-log-file)))))
+
+
+(defn merge-options
+  "Merges the common options for all Kudu tests with the specific options
+  set on the test itself. This does not include 'db' or 'nodes'."
+  [opts]
+  (let [default-opts {:os         debian/os
+                      :net        net/iptables
+                      :db         (db)
+                      ;; The list of nodes that will run tablet servers.
+                      :tservers   [:n1 :n2 :n3 :n4 :n5]
+                      ;; The list of nodes that will run the kudu master.
+                      :masters    [:m1]
+                      :table-name
+                        (str (:name opts) "-" (System/currentTimeMillis))
+                      :ts-hb-interval-ms 1000
+                      :ts-hb-max-failures-before-backoff 3
+                      :ts-raft-hb-interval-ms 50
+                      :ranges      []}
+
+        custom-opts (merge default-opts opts)
+
+        derived-opts {:master-addresses
+                      (ku/concatenate-addresses ku/master-rpc-port
+                                                (:masters custom-opts))
+                      :nodes (vec (concat (:tservers custom-opts)
+                                          (:masters custom-opts)))}]
+    (merge custom-opts derived-opts)))
+
+;; Common setup for all kudu tests.
+(defn kudu-test
+  "Sets up the test parameters."
+  [opts]
+  (merge-options opts))

http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/src/main/clojure/jepsen/kudu/client.clj
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/src/main/clojure/jepsen/kudu/client.clj b/java/kudu-jepsen/src/main/clojure/jepsen/kudu/client.clj
new file mode 100644
index 0000000..31ab90c
--- /dev/null
+++ b/java/kudu-jepsen/src/main/clojure/jepsen/kudu/client.clj
@@ -0,0 +1,95 @@
+;; Licensed to the Apache Software Foundation (ASF) under one
+;; or more contributor license agreements. See the NOTICE file
+;; distributed with this work for additional information
+;; regarding copyright ownership. The ASF licenses this file
+;; to you under the Apache License, Version 2.0 (the
+;; "License"); you may not use this file except in compliance
+;; with the License. You may obtain a copy of the License at
+;;
+;;   http://www.apache.org/licenses/LICENSE-2.0
+;;
+;; Unless required by applicable law or agreed to in writing,
+;; software distributed under the License is distributed on an
+;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+;; KIND, either express or implied. See the License for the
+;; specific language governing permissions and limitations
+;; under the License.
+
+(ns jepsen.kudu.client
+  "Thin wrappers around Kudu Java client."
+  (:require [clojure.tools.logging :refer :all]
+            [clojure.pprint :refer [pprint]])
+  (:import [org.apache.kudu ColumnSchema
+                            ColumnSchema$ColumnSchemaBuilder
+                            Schema
+                            Type])
+  (:import [org.apache.kudu.client AbstractKuduScannerBuilder
+                                   AsyncKuduScanner$ReadMode
+                                   BaseKuduTest
+                                   CreateTableOptions
+                                   KuduClient
+                                   KuduClient$KuduClientBuilder
+                                   KuduPredicate
+                                   KuduPredicate$ComparisonOp
+                                   KuduScanner
+                                   KuduSession
+                                   KuduTable
+                                   OperationResponse
+                                   PartialRow
+                                   RowResult
+                                   RowResultIterator]))
+
+(defn sync-client
+  "Builds and returns a new synchronous Kudu client."
+  [master-addresses]
+  (let [builder (new KuduClient$KuduClientBuilder master-addresses)
+        client (. builder build)]
+    client))
+
+(defn close-client
+  [sync-client]
+  (try (.close sync-client) (catch Exception e (info "Error closing client: " e))))
+
+(defn column-schema
+  ([name type] (column-schema name type false))
+  ([name type key?]
+   (-> (new ColumnSchema$ColumnSchemaBuilder name, type) (.key key?) .build)))
+
+(defn create-table
+  [sync-client name schema options]
+  (.createTable sync-client name schema options))
+
+(defn open-table
+  [sync-client name]
+  (.openTable sync-client name))
+
+(defn rr->tuple
+  "Transforms a RowResult into a tuple."
+  [row-result]
+  (let [columns (-> row-result .getSchema .getColumns)]
+    (into {}
+          (for [[idx column] (map-indexed vector columns)]
+            (let [name (.getName column)
+                  type (.getType column)
+                  value (condp = type
+                          Type/INT8 (.getByte row-result idx)
+                          Type/INT16 (.getShort row-result idx)
+                          Type/INT32 (.getInt row-result idx)
+                          Type/INT64 (.getLong row-result idx)
+                          Type/BINARY (.getBinaryCopy row-result idx)
+                          Type/STRING (.getString row-result idx)
+                          Type/BOOL (.getBoolean row-result idx)
+                          Type/FLOAT (.getFloat row-result idx)
+                          Type/DOUBLE (.getDouble row-result idx)
+                          Type/UNIXTIME_MICROS (.getLong row-result idx))]
+              {(keyword name) value})))))
+
+(defn drain-scanner-to-tuples
+  "Drains a scanner to a vector of tuples."
+  [scanner]
+  (let [result (atom [])]
+    (while (.hasMoreRows scanner)
+      (let [rr-iter (.nextRows scanner)]
+        (while (.hasNext rr-iter)
+          (swap! result conj (rr->tuple (.next rr-iter))))))
+    @result))

http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/src/main/clojure/jepsen/kudu/nemesis.clj
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/src/main/clojure/jepsen/kudu/nemesis.clj b/java/kudu-jepsen/src/main/clojure/jepsen/kudu/nemesis.clj
new file mode 100644
index 0000000..ca106e0
--- /dev/null
+++ b/java/kudu-jepsen/src/main/clojure/jepsen/kudu/nemesis.clj
@@ -0,0 +1,132 @@
+;; Licensed to the Apache Software Foundation (ASF) under one
+;; or more contributor license agreements. See the NOTICE file
+;; distributed with this work for additional information
+;; regarding copyright ownership. The ASF licenses this file
+;; to you under the Apache License, Version 2.0 (the
+;; "License"); you may not use this file except in compliance
+;; with the License. You may obtain a copy of the License at
+;;
+;;   http://www.apache.org/licenses/LICENSE-2.0
+;;
+;; Unless required by applicable law or agreed to in writing,
+;; software distributed under the License is distributed on an
+;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+;; KIND, either express or implied. See the License for the
+;; specific language governing permissions and limitations
+;; under the License.
+
+(ns jepsen.kudu.nemesis
+  "Nemeses for Apache Kudu."
+  (:refer-clojure :exclude [test])
+  (:require [jepsen
+             [client :as client]
+             [control :as c]
+             [nemesis :as nm]
+             [net :as net]
+             [util :as util]]
+            [clojure.tools.logging :refer :all]
+            [jepsen.kudu.util :as ku]))
+
+
+(defn tserver-partitioner
+  "Tablet server partitioner: cut network links between tablet servers
+  in response to :start operation: cut network links as defined by
+  (grudge nodes), and restore them back in response to :stop operation."
+  [grudge]
+  (reify client/Client
+    (setup! [this test _]
+      (net/heal! (:net test) test)
+      this)
+
+    (invoke! [this test op]
+      (case (:f op)
+        :start (let [grudge (grudge (:tservers test))]
+                 (nm/partition! test grudge)
+                 (assoc op :value (str "Cut off " (pr-str grudge))))
+        :stop  (do (net/heal! (:net test) test)
+                   (assoc op :value "fully connected"))))
+
+    (teardown! [this test]
+      (net/heal! (:net test) test))))
+
+
+(defn tserver-start-stopper
+  "Takes a targeting function which, given a list of nodes, returns a single
+  node or collection of nodes to affect, and two functions `(start! test node)`
+  invoked on nemesis start, and `(stop! test node)` invoked on nemesis stop.
+  Returns a nemesis which responds to :start and :stop by running the start!
+  and stop! fns on each of the given nodes. During `start!` and `stop!`, binds
+  the `jepsen.control` session to the given node, so you can just call `(c/exec
+  ...)`.
+
+  Re-selects a fresh node (or nodes) for each start--if targeter returns nil,
+  skips the start. The return values from the start and stop fns will become
+  the :values of the returned :info operations from the nemesis, e.g.:
+
+      {:value {:n1 [:killed \"java\"]}}"
+  [targeter start! stop!]
+  (let [nodes (atom nil)]
+    (reify client/Client
+      (setup! [this test _] this)
+
+      (invoke! [this test op]
+        (locking nodes
+          (assoc op :type :info, :value
+                 (case (:f op)
+                   :start (if-let [ns (-> test :tservers targeter util/coll)]
+                            (if (compare-and-set! nodes nil ns)
+                              (c/on-many ns (start! test c/*host*))
+                              (str "nemesis already disrupting "
+                                   (pr-str @nodes)))
+                            :no-target)
+                   :stop (if-let [ns @nodes]
+                           (let [value (c/on-many ns (stop! test c/*host*))]
+                             (reset! nodes nil)
+                             value)
+                           :not-started)))))
+
+      (teardown! [this test]))))
+
+
+(defn tserver-partition-random-halves
+  "Cuts the tablet servers' network into randomly chosen halves."
+  []
+  (tserver-partitioner (comp nm/complete-grudge nm/bisect shuffle)))
+
+
+(defn tserver-partition-majorities-ring
+  "A grudge in which every tablet server can see a majority, but no server sees
+  the *same* majority as any other."
+  []
+  (tserver-partitioner nm/majorities-ring))
+
+
+(defn kill-restart-tserver
+  "Responds to `{:f :start}` by sending SIGKILL to the tablet server on a given
+  node, and when `{:f :stop}` arrives, re-starts the specified tablet server.
+  Picks the node(s) using `(targeter list-of-nodes)`.  Targeter may return
+  either a single node or a collection of nodes."
+  ([targeter]
+   (tserver-start-stopper targeter
+                          (fn start [t n]
+                            (c/su (c/exec :killall :-s :SIGKILL :kudu-tserver))
+                            [:killed :kudu-tserver])
+                          (fn stop [t n]
+                            (ku/start-kudu-tserver t n)
+                            [:started :kudu-tserver]))))
+
+(defn tserver-hammer-time
+  "Responds to `{:f :start}` by pausing the tablet server name on a given node
+  using SIGSTOP, and when `{:f :stop}` arrives, resumes it with SIGCONT.
+  Picks the node(s) to pause using `(targeter list-of-nodes)`, which defaults
+  to `rand-nth`.  Targeter may return either a single node or a collection
+  of nodes."
+  ([] (tserver-hammer-time rand-nth))
+  ([targeter]
+   (tserver-start-stopper targeter
+                       (fn start [t n]
+                         (c/su (c/exec :killall :-s "STOP" :kudu-tserver))
+                         [:paused :kudu-tserver])
+                       (fn stop [t n]
+                         (c/su (c/exec :killall :-s "CONT" :kudu-tserver))
+                         [:resumed :kudu-tserver]))))

http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/src/main/clojure/jepsen/kudu/register.clj
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/src/main/clojure/jepsen/kudu/register.clj b/java/kudu-jepsen/src/main/clojure/jepsen/kudu/register.clj
new file mode 100644
index 0000000..b0753ba
--- /dev/null
+++ b/java/kudu-jepsen/src/main/clojure/jepsen/kudu/register.clj
@@ -0,0 +1,93 @@
+;; Licensed to the Apache Software Foundation (ASF) under one
+;; or more contributor license agreements. See the NOTICE file
+;; distributed with this work for additional information
+;; regarding copyright ownership. The ASF licenses this file
+;; to you under the Apache License, Version 2.0 (the
+;; "License"); you may not use this file except in compliance
+;; with the License. You may obtain a copy of the License at
+;;
+;;   http://www.apache.org/licenses/LICENSE-2.0
+;;
+;; Unless required by applicable law or agreed to in writing,
+;; software distributed under the License is distributed on an
+;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+;; KIND, either express or implied. See the License for the
+;; specific language governing permissions and limitations
+;; under the License.
+
+(ns jepsen.kudu.register
+  "Simple linearizability test for a read/write register."
+  (:refer-clojure :exclude [test])
+  (:require [jepsen
+             [kudu :as kudu]
+             [client :as client]
+             [util :refer [majority]]
+             [checker    :as checker]
+             [generator  :as gen]
+             [nemesis    :as nemesis]]
+            [jepsen.kudu.client :as kc]
+            [jepsen.kudu.table :as kt]
+            [jepsen.kudu.nemesis :as kn]
+            [clojure.tools.logging :refer :all]
+            [knossos.model :as model]))
+
+(def register-key "x")
+
+(defn r   [_ _] {:type :invoke, :f :read, :value nil})
+(defn w   [_ _] {:type :invoke, :f :write, :value (rand-int 10)})
+
+(defn client
+  [table-created? kclient ktable]
+  (reify client/Client
+    (setup! [_ test _]
+      ;; Create the client and create/open the table.
+      (let [kclient (kc/sync-client (:master-addresses test))
+            table-name (:table-name test)
+            ktable (locking table-created?
+                     (when (compare-and-set! table-created? false true)
+                       (kc/create-table
+                         kclient
+                         table-name
+                         kt/kv-table-schema
+                         (let [ranges (:table-ranges test)
+                               rep-factor (:num-replicas test)]
+                           (if (nil? ranges)
+                             (kt/kv-table-options-hash
+                               rep-factor (count (:tservers test)))
+                             (kt/kv-table-options-range
+                               rep-factor ranges)))))
+                       (kc/open-table kclient table-name))]
+        (client table-created? kclient ktable)))
+
+    (invoke! [_ _ op]
+      (case (:f op)
+        :read  (assoc op :type :ok,
+                         :value (kt/kv-read kclient ktable register-key))
+        :write (do (kt/kv-write kclient ktable register-key (:value op))
+                   (assoc op :type :ok))))
+
+    (teardown! [_ _]
+      (kc/close-client kclient))))
+
+(defn register-test
+  [opts]
+  (kudu/kudu-test
+    (merge
+      {:name    "rw-register"
+       :client (client (atom false) nil nil)
+       :concurrency 10
+       :num-replicas 5
+       :nemesis  nemesis/noop
+       :model   (model/register)
+       :generator (->> (gen/reserve 5 (gen/mix [w r]) r)
+                       (gen/stagger 1/3)
+                       (gen/nemesis
+                         (gen/seq (cycle [(gen/sleep 5)
+                                          {:type :info, :f :start}
+                                          (gen/sleep 5)
+                                          {:type :info, :f :stop}])))
+                       (gen/time-limit 60))
+       :checker (checker/compose
+                  {:perf   (checker/perf)
+                   :linear checker/linearizable})}
+      opts)))

http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/src/main/clojure/jepsen/kudu/table.clj
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/src/main/clojure/jepsen/kudu/table.clj b/java/kudu-jepsen/src/main/clojure/jepsen/kudu/table.clj
new file mode 100644
index 0000000..5338dad
--- /dev/null
+++ b/java/kudu-jepsen/src/main/clojure/jepsen/kudu/table.clj
@@ -0,0 +1,107 @@
+;; Licensed to the Apache Software Foundation (ASF) under one
+;; or more contributor license agreements. See the NOTICE file
+;; distributed with this work for additional information
+;; regarding copyright ownership. The ASF licenses this file
+;; to you under the Apache License, Version 2.0 (the
+;; "License"); you may not use this file except in compliance
+;; with the License. You may obtain a copy of the License at
+;;
+;;   http://www.apache.org/licenses/LICENSE-2.0
+;;
+;; Unless required by applicable law or agreed to in writing,
+;; software distributed under the License is distributed on an
+;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+;; KIND, either express or implied. See the License for the
+;; specific language governing permissions and limitations
+;; under the License.
+
+(ns jepsen.kudu.table
+  "Utilities to work with kudu tables, for testing."
+  (:require [clojure.tools.logging :refer :all]
+            [clojure.pprint :refer [pprint]]
+            [jepsen.kudu.client :as c])
+  (:import [org.apache.kudu ColumnSchema
+                            ColumnSchema$ColumnSchemaBuilder
+                            Schema
+                            Type])
+  (:import [org.apache.kudu.client AbstractKuduScannerBuilder
+                                   AsyncKuduScanner$ReadMode
+                                   BaseKuduTest
+                                   CreateTableOptions
+                                   KuduClient
+                                   KuduClient$KuduClientBuilder
+                                   KuduPredicate
+                                   KuduPredicate$ComparisonOp
+                                   KuduScanner
+                                   KuduSession
+                                   KuduTable
+                                   OperationResponse
+                                   PartialRow
+                                   RowResult
+                                   RowResultIterator
+                                   Upsert]))
+;;
+;; KV Table utilities
+;;
+
+;; Creates a basic schema for a Key/Value table where the key is a string and
+;; the value is an int.
+(def kv-table-schema
+  (new Schema [(c/column-schema "key" Type/STRING true)
+               (c/column-schema "value" Type/INT32 false)]))
+
+
+(defn kv-table-options-range
+  "Returns options to create a K/V table with partitions on 'ranges'.
+  Ranges should be a vector of [start, end) keys.  The resulting table
+  has (count ranges) tablets with the exact coverage set on the ranges.
+  The resulting table has the specified replication factor."
+  [num-replicas ranges]
+  (let [options (new CreateTableOptions)]
+    (.setRangePartitionColumns options ["key"])
+    (.setNumReplicas options num-replicas)
+    (doseq [range ranges]
+      (let [lower (.newPartialRow kv-table-schema)
+            upper (.newPartialRow kv-table-schema)]
+        (.addString lower "key" (get range 0))
+        (.addString upper "key" (get range 1))
+        (.addRangePartition options lower upper)))
+    options))
+
+
+(defn kv-table-options-hash
+  "Returns options to create a K/V table with key column hash partitioned
+  into the given number of buckets.  The resulting table has the specified
+  replication factor."
+  [num-replicas buckets-num]
+  (let [options (new CreateTableOptions)]
+    (.setRangePartitionColumns options ["key"])
+    (.setNumReplicas options num-replicas)
+    (.addHashPartitions options ["key"] buckets-num)
+    options))
+
+
+(defn kv-write
+  "Upsert a row on a KV table."
+  [sync-client table key value]
+  (let [upsert (.newUpsert table)
+        row (.getRow upsert)]
+    (.addString row "key" key)
+    (.addInt row "value" (int value))
+    (let [response (.apply (.newSession sync-client) upsert)]
+      (assert (not (.hasRowError response)) (str "Got a row error: " response)))))
+
+(defn kv-read
+  "Read the value associated with key."
+  [sync-client table key]
+  (let [scanner-builder (.newScannerBuilder sync-client table)
+        predicate (KuduPredicate/newComparisonPredicate (c/column-schema "key" Type/STRING)
+                                                        KuduPredicate$ComparisonOp/EQUAL
+                                                        key)]
+    (.readMode scanner-builder AsyncKuduScanner$ReadMode/READ_AT_SNAPSHOT)
+    (.addPredicate scanner-builder predicate)
+    (let [rows (c/drain-scanner-to-tuples (.build scanner-builder))]
+      (case (count rows)
+        0 nil
+        1 (:value (get rows 0))
+        (assert false (str "Expected 0 or 1 rows. Got: " (count rows)))))))

http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/src/main/clojure/jepsen/kudu/util.clj
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/src/main/clojure/jepsen/kudu/util.clj b/java/kudu-jepsen/src/main/clojure/jepsen/kudu/util.clj
new file mode 100644
index 0000000..2e21794
--- /dev/null
+++ b/java/kudu-jepsen/src/main/clojure/jepsen/kudu/util.clj
@@ -0,0 +1,421 @@
+;; Licensed to the Apache Software Foundation (ASF) under one
+;; or more contributor license agreements. See the NOTICE file
+;; distributed with this work for additional information
+;; regarding copyright ownership. The ASF licenses this file
+;; to you under the Apache License, Version 2.0 (the
+;; "License"); you may not use this file except in compliance
+;; with the License. You may obtain a copy of the License at
+;;
+;;   http://www.apache.org/licenses/LICENSE-2.0
+;;
+;; Unless required by applicable law or agreed to in writing,
+;; software distributed under the License is distributed on an
+;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+;; KIND, either express or implied. See the License for the
+;; specific language governing permissions and limitations
+;; under the License.
+
+(ns jepsen.kudu.util
+  "Utilities for Apache Kudu jepsen tests"
+  (:require [clojure.tools.logging :refer :all]
+            [clojure.java.io :as io]
+            [clojure.string :as str]
+            [jepsen
+             [control :as c :refer [|]]
+             [util :as util :refer [meh]]]
+            [jepsen.control.util :as cu]
+            [jepsen.os.debian :as debian]))
+
+(defn path
+  "Returns the filesystem path for the path components joined with Unix path
+  separator."
+  [& components]
+  (str/join "/" components))
+
+;; TODO(aserbin): make it possible to set the version via a run-time option.
+;;
+;; The empty string corresponds to the latest snapshot from the main trunk.
+;; To run against some other branch, set to "<major>.<minor>.<patch>";
+;; e.g. set "1.2.0" to run against packages built for Kudu 1.2.0 release.
+(def kudu-pkg-version "")
+
+(def kudu-repo-url
+  (str "http://repos.jenkins.cloudera.com/kudu" kudu-pkg-version
+       "-nightly/debian/jessie/amd64/kudu"))
+(def kudu-repo-name "kudu-nightly")
+(def kudu-repo-apt-line (str "deb " kudu-repo-url " jessie-kudu contrib"))
+(def kudu-required-packages
+  "The set of the required system packages (more are installed by dependency)."
+  [:libsasl2-modules
+   :libsasl2-modules-gssapi-mit
+   :lsb-release
+   :ntp
+   :openssl])
+(def kudu-master-pkg :kudu-master)
+(def kudu-tserver-pkg :kudu-tserver)
+
+(def kudu-build-dir "../../build/latest")
+
+(def kudu-conf-dir "/etc/kudu/conf")
+(def kudu-master-gflagfile (path kudu-conf-dir "master.gflagfile"))
+(def kudu-tserver-gflagfile (path kudu-conf-dir "tserver.gflagfile"))
+(def kudu-target-bin-dir "/opt/local/bin")
+(def kudu-target-sbin-dir "/opt/local/sbin")
+(def kudu-uname "kudu")
+(def kudu-uid 999)
+(def kudu-gname "kudu")
+(def kudu-gid 999)
+(def kudu-home-dir "/var/lib/kudu")
+(def kudu-master-home-dir (path kudu-home-dir "master"))
+(def kudu-tserver-home-dir (path kudu-home-dir "tserver"))
+(def kudu-log-dir "/var/log/kudu")
+(def kudu-master-log-file (path kudu-log-dir "kudu-master.log"))
+(def kudu-tserver-log-file (path kudu-log-dir "kudu-tserver.log"))
+(def kudu-run-dir "/var/run/kudu")
+(def kudu-master-pid-file (path kudu-run-dir "kudu-master-kudu.pid"))
+(def kudu-tserver-pid-file (path kudu-run-dir "kudu-tserver-kudu.pid"))
+
+(def master-rpc-port 7051)
+(def tserver-rpc-port 7050)
+
+(defn kudu-cli
+  "Returns path to the Kudu CLI tool or just binary name, if it's appropriate
+  to rely on the PATH environment variable."
+  [test node]
+  (if (:use-packages? test)
+    "kudu"  ;; relying on the standard PATH env variable
+    (path kudu-target-bin-dir "kudu")))
+
+
+(defn concatenate-addresses
+  "Returns a list of the addresses in form 'h0:port,h1:port,h2:port'
+  given a port and list of hostnames."
+  [port hosts]
+  (str/join "," (map #(str (name %) ":" (str port)) hosts)))
+
+
+(defn group-exist?
+  "If the specified group exists?"
+  [group-name]
+  (try (c/exec :egrep (str "^" group-name) "/etc/group")
+       true
+       (catch RuntimeException _ false)))
+
+
+(defn user-exist?
+  "If the specified user exists?"
+  [user-name]
+  (try (c/exec :id user-name)
+       true
+       (catch RuntimeException _ false)))
+
+
+(defn ntp-in-sync?
+  "Is the NTP server in sync state? This function should be called in the
+  context of already established SSH session at the node."
+  []
+  (try (c/exec :ntp-wait :-n1 :-s1)
+       true
+       (catch RuntimeException _ false)))
+
+
+(defn kudu-master-in-service?
+  "Is the Kudu master process at the specified node in service already?
+  This function should be called in the context of already established SSH
+  session at the node."
+  [test node]
+  (try (c/exec (kudu-cli test node) :table :list node)
+       true
+       (catch RuntimeException _ false)))
+
+
+(defn kudu-master-see-tservers?
+  "Whether the Kudu master sees the specified number of tablet servers."
+  [test node tservers-count]
+  (let [pattern (str "Fetched info from all "
+                     (str tservers-count)" Tablet Servers")]
+    (try (c/exec (kudu-cli test node) :cluster :ksck node | :grep pattern)
+         true
+         (catch RuntimeException _ false))))
+
+
+(defn kudu-tserver-in-service?
+  "Is the Kudu tserver process at the specified node is up and running?
+  This function should be called in the context of already established SSH
+  session at the node."
+  [test node]
+  (try (c/exec (kudu-cli test node) :tserver :status node)
+       true
+       (catch RuntimeException _ false)))
+
+
+(defn start-kudu-master
+  "Start Kudu master daemon at the specified node. This function should
+  be called in the super-user context (jepsen.control/su)."
+  [test node]
+  (info node "Starting Kudu Master")
+  (let [use-svc-scripts? (:use-packages? test)]
+    (if use-svc-scripts?
+      (c/exec :service :kudu-master :start)
+      (c/exec :sudo :-u kudu-uname :start-stop-daemon
+              :--start
+              :--background
+              :--make-pidfile
+              :--pidfile      kudu-master-pid-file
+              :--chdir        kudu-home-dir
+              :--no-close
+              :--oknodo
+              :--exec         (path kudu-target-sbin-dir "kudu-master")
+              :--
+              :--flagfile kudu-master-gflagfile
+              :>> kudu-master-log-file (c/lit "2>&1"))))
+
+  ;; Wait for master services avaiable (awaiting for the catalog manager).
+  (loop [iteration 0]
+    (when-not (kudu-master-in-service? test node)
+      (if (> iteration 100)
+        (c/exec :echo "timeout waiting for master server to start" (c/lit ";")
+                :false)
+        (do
+          (Thread/sleep 500)
+          (recur (inc iteration))))))
+
+  ;; Wait until the master sees all tservers in the cluster. Otherwise
+  ;; the client would not be able to create a table with the desired
+  ;; replication factor when not all tservers have registered.
+  (let [tservers-count (count (:tservers test))]
+    (loop [iteration 0]
+      (when-not (kudu-master-see-tservers? test node tservers-count)
+        (if (> iteration 200)
+          (c/exec :echo "timeout waiting for all tservers to start" (c/lit ";")
+                  :false)
+          (do
+            (Thread/sleep 500)
+            (recur (inc iteration))))))))
+
+
+(defn stop-kudu-master
+  "Stop Kudu master daemon at the specified node. This function should be
+  called in the super-user context (jepsen.control/su)."
+  [test node]
+  (info node "Stopping Kudu Master")
+  (let [use-svc-scripts? (:use-packages? test)]
+    (if use-svc-scripts?
+      (meh (c/exec :service :kudu-master :stop))
+      (cu/stop-daemon! "kudu-master" kudu-master-pid-file))))
+
+
+(defn start-kudu-tserver
+  "Start Kudu tablet server daemon at the specified node. This function
+  should be called in the super-user context (jepsen.control/su)."
+  [test node]
+  (info node "Starting Kudu Tablet Server")
+  (let [use-svc-scripts? (:use-packages? test)]
+    (if use-svc-scripts?
+      (c/exec :service :kudu-tserver :start)
+      (c/exec :sudo :-u kudu-uname :start-stop-daemon
+              :--start
+              :--background
+              :--make-pidfile
+              :--pidfile      kudu-tserver-pid-file
+              :--chdir        kudu-home-dir
+              :--no-close
+              :--oknodo
+              :--exec         (path kudu-target-sbin-dir "kudu-tserver")
+              :--
+              :--flagfile kudu-tserver-gflagfile
+              :>> kudu-tserver-log-file (c/lit "2>&1"))))
+
+  ;; Wait for the tablet server to become on-line.
+  (loop [iteration 0]
+    (when-not (kudu-tserver-in-service? test node)
+      (if (> iteration 100)
+        (c/exec :echo "timeout waiting for tablet server to start" (c/lit ";")
+                :false)
+        (do
+          (Thread/sleep 500)
+          (recur (inc iteration)))))))
+
+
+(defn stop-kudu-tserver
+  "Stops Kudu Tablet Server on the specified node."
+  [test node]
+  (info node "Stopping Kudu Tablet Server")
+  (let [use-svc-scripts? (:use-packages? test)]
+    (if use-svc-scripts?
+      (meh (c/exec :service :kudu-tserver :stop))
+      (cu/stop-daemon! "kudu-tserver" kudu-tserver-pid-file))))
+
+
+(defn kudu-cfg-master
+  "Returns Kudu master flags file contents."
+  [test]
+  (let [data-path kudu-master-home-dir
+        flags [(str "--fs_wal_dir=" data-path)
+               (str "--fs_data_dirs=" data-path)
+               (str "--log_dir=" kudu-log-dir)
+               (str "--rpc_bind_addresses=0.0.0.0:" (str master-rpc-port))]]
+    ;; Only set the master addresses when there is more than one master
+    (str/join "\n"
+              (if (> (count (:masters test)) 1)
+                (conj flags (str "--master_addresses="
+                                 (concatenate-addresses master-rpc-port
+                                                        (:masters test))))
+                flags))))
+
+
+(defn kudu-cfg-tserver
+  "Returns Kudu tserver flags file contents."
+  [test]
+  (let [data-path kudu-tserver-home-dir
+        flags [(str "--fs_wal_dir=" data-path)
+               (str "--fs_data_dirs=" data-path)
+               (str "--log_dir=" kudu-log-dir)
+               (str "--rpc_bind_addresses=0.0.0.0:" (str tserver-rpc-port))
+               (str "--heartbeat_interval_ms="
+                    (str (:ts-hb-interval-ms test)))
+               (str "--raft_heartbeat_interval_ms="
+                    (str (:ts-raft-hb-interval-ms test)))
+               (str "--heartbeat_max_failures_before_backoff="
+                    (str (:ts-hb-max-failures-before-backoff test)))]]
+    (str/join "\n" (conj flags (str "--tserver_master_addrs="
+                                    (concatenate-addresses master-rpc-port
+                                                           (:masters test)))))))
+
+
+(defn ntp-server-config
+  "Returns ntp.conf contents for Kudu master node."
+  []
+  (let [common-opts (slurp (io/resource "ntp.conf.common"))
+        server-opts (slurp (io/resource "ntp.conf.server"))]
+    (str common-opts "\n" server-opts)))
+
+
+(defn ntp-slave-config
+  "Returns ntp.conf contents for Kudu tserver node."
+  [servers]
+  (let [common-opts (slurp (io/resource "ntp.conf.common"))
+        server-lines (map #(str "server " (name %)
+                                " burst iburst prefer minpoll 4 maxpoll 4")
+                          servers)]
+    (str common-opts "\n" (str/join "\n" server-lines))))
+
+
+(defn prepare-node-with-pkgs
+  "Prepare a Kudu node: install Kudu using packages."
+  [test node]
+  (let [repo-file (str "/etc/apt/sources.list.d/"
+                       (name kudu-repo-name) ".list")]
+    (when-not (cu/exists? repo-file)
+      (info node "Adding " kudu-repo-name " package repositoy")
+      (debian/add-repo! kudu-repo-name kudu-repo-apt-line)
+      (info node "Fetching " kudu-repo-name " package key")
+      (c/exec :curl :-fLSs (str kudu-repo-url "/" "archive.key") |
+              :apt-key :add :-)
+      (info node "Updating package index")
+      (debian/update!)))
+
+  (when (.contains (:masters test) node)
+    (when-not (debian/installed? kudu-master-pkg)
+      (info node "Installing kudu-master package")
+      (debian/install kudu-master-pkg)))
+  (when (.contains (:tservers test) node)
+    (when-not (debian/installed? kudu-tserver-pkg)
+      (info node "Installing kudu-tserver package")
+      (debian/install kudu-tserver-pkg))))
+
+
+(defn prepare-node-with-binaries
+  "Prepare Kudu node: create the directory structure and place necessary
+  Kudu binaries at place."
+  [test node]
+
+  (when-not (group-exist? kudu-gname)
+    (c/exec :groupadd :-o :-g kudu-gid kudu-gname))
+  (when-not (user-exist? kudu-uname)
+    (c/exec :useradd :-o :-u kudu-uid :-g kudu-gname :-d kudu-home-dir
+            :-s "/usr/sbin/nologin" kudu-uname))
+
+  ;; Prepare directory structure for the files.
+  (c/exec :mkdir :-p kudu-conf-dir)
+  (when (.contains (:masters test) node)
+    (c/exec :mkdir :-p path kudu-master-home-dir))
+  (when (.contains (:tservers test) node)
+    (c/exec :mkdir :-p path kudu-tserver-home-dir))
+  (c/exec :chown :-R (str kudu-uname ":" kudu-gname) kudu-home-dir)
+
+  (c/exec :mkdir :-p kudu-run-dir)
+  (c/exec :chown :-R (str kudu-uname ":" kudu-gname) kudu-run-dir)
+
+  (c/exec :mkdir :-p kudu-log-dir)
+  (c/exec :chown :-R (str kudu-uname ":" kudu-gname) kudu-log-dir)
+
+  (c/exec :rm :-rf kudu-target-bin-dir)
+  (c/exec :mkdir :-p kudu-target-bin-dir)
+
+  (c/exec :rm :-rf kudu-target-sbin-dir)
+  (c/exec :mkdir :-p kudu-target-sbin-dir)
+
+  ;; Copy appropriate binaries to the node.
+  (when (.contains (:masters test) node)
+    (let [master-binary-src (path kudu-build-dir "bin" "kudu-master")
+          master-binary-dst (path kudu-target-sbin-dir "kudu-master")]
+      (c/upload master-binary-src kudu-target-sbin-dir)
+      (c/exec :chmod 755 master-binary-dst)))
+  (when (.contains (:tservers test) node)
+    (let [tserver-binary-src (path kudu-build-dir "bin" "kudu-tserver")
+          tserver-binary-dst (path kudu-target-sbin-dir "kudu-tserver")]
+      (c/upload tserver-binary-src kudu-target-sbin-dir)
+      (c/exec :chmod 755 tserver-binary-dst)))
+  (let [kudu-cli-binary-src (path kudu-build-dir "bin" "kudu")
+        kudu-cli-binary-dst (path kudu-target-bin-dir "kudu")]
+    (c/upload kudu-cli-binary-src kudu-target-bin-dir)
+    (c/exec :chmod 755 kudu-cli-binary-dst)))
+
+
+(defn prepare-node
+  "Prepare Kudu node using either packaged Kudu software or
+  assorted Kudu binaries for the server-side components."
+  [test node]
+  (when-not (debian/installed? kudu-required-packages)
+      (info node "Installing required packages")
+      (debian/install kudu-required-packages))
+
+
+  (if (:use-packages? test)
+    (prepare-node-with-pkgs test node)
+    (prepare-node-with-binaries test node))
+
+  (when (.contains (:masters test) node)
+    (c/exec :rm :-rf kudu-master-home-dir)
+    (c/exec :rm :-f kudu-master-log-file)
+    (c/exec :echo (str (slurp (io/resource "kudu.flags"))
+                       "\n" (kudu-cfg-master test)) :> kudu-master-gflagfile))
+  (when (.contains (:tservers test) node)
+    (c/exec :rm :-rf kudu-tserver-home-dir)
+    (c/exec :rm :-f kudu-tserver-log-file)
+    (c/exec :echo (str (slurp (io/resource "kudu.flags"))
+                       "\n" (kudu-cfg-tserver test)) :> kudu-tserver-gflagfile)))
+
+
+(defn sync-time
+  "When ntpd is not in synchronized state, revamps its configs and restarts
+  the ntpd daemon."
+  [test node]
+  (when-not (ntp-in-sync?)
+    (c/exec :service :ntp :stop "||" :true)
+    (c/exec :echo "NTPD_OPTS='-g -N'" :> "/etc/default/ntp")
+    (when (.contains (:masters test) node)
+      (c/exec :echo (ntp-server-config) :> "/etc/ntp.conf"))
+    (when (.contains (:tservers test) node)
+      (c/exec :echo (ntp-slave-config (:masters test)):> "/etc/ntp.conf"))
+    (c/exec :service :ntp :start)
+    ;; Wait for 5 minutes max for ntpd to get into synchronized state.
+    (c/exec :ntp-wait :-s1 :-n300)))
+
+
+(defn start-kudu
+  "Start Kudu services on the node."
+  [test node]
+  (when (.contains (:masters test) node) (start-kudu-master test node))
+  (when (.contains (:tservers test) node) (start-kudu-tserver test node)))

http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/src/test/clojure/jepsen/kudu_test.clj
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/src/test/clojure/jepsen/kudu_test.clj b/java/kudu-jepsen/src/test/clojure/jepsen/kudu_test.clj
new file mode 100644
index 0000000..d00c26a
--- /dev/null
+++ b/java/kudu-jepsen/src/test/clojure/jepsen/kudu_test.clj
@@ -0,0 +1,84 @@
+;; Licensed to the Apache Software Foundation (ASF) under one
+;; or more contributor license agreements. See the NOTICE file
+;; distributed with this work for additional information
+;; regarding copyright ownership. The ASF licenses this file
+;; to you under the Apache License, Version 2.0 (the
+;; "License"); you may not use this file except in compliance
+;; with the License. You may obtain a copy of the License at
+;;
+;;   http://www.apache.org/licenses/LICENSE-2.0
+;;
+;; Unless required by applicable law or agreed to in writing,
+;; software distributed under the License is distributed on an
+;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+;; KIND, either express or implied. See the License for the
+;; specific language governing permissions and limitations
+;; under the License.
+
+(ns jepsen.kudu-test
+  (:require [clojure.test :refer :all]
+            [jepsen.core :as jepsen]
+            [jepsen.nemesis :as jn]
+            [jepsen.tests :as tests]
+            [jepsen.kudu :as kudu]
+            [jepsen.kudu.nemesis :as kn]
+            [jepsen.kudu.register :as kudu-register]))
+
+(defn check
+  [tcasefun opts]
+  (is (:valid? (:results (jepsen/run! (tcasefun opts))))))
+
+(defmacro dt
+  [tfun tsuffix topts]
+  (let [tname# (symbol (str (name tfun) "-" tsuffix))]
+  `(clojure.test/deftest ~tname# (check ~tfun ~topts))))
+
+(defn dt-func
+  [tfun tscenario topts]
+  `(dt ~tfun ~tscenario ~topts))
+
+(defmacro instantiate-tests
+  [tfun config topts]
+  (let [seqtfun# (reduce (fn [out _] (conj out tfun)) [] (eval config))
+        seqtscenario# (reduce (fn [out e]
+                              (conj out (:scenario e))) [] (eval config))
+        seqtopts# (reduce (fn [out e]
+                            (conj out (merge (eval topts)
+                                             {:nemesis (:nemesis e)})))
+                          [] (eval config))]
+    `(do ~@(map dt-func seqtfun# seqtscenario# seqtopts#))))
+
+;; Configurations for tests.  Every configuration corresponds to running
+;; a test with particular nemesis (let's call it "scenario").
+(def register-test kudu-register/register-test)
+(def register-test-configs
+  [
+   {:scenario "noop-nemesis"
+    :nemesis '((fn [] jn/noop))}
+   {:scenario "tserver-random-halves"
+    :nemesis '(kn/tserver-partition-random-halves)}
+   {:scenario "tserver-majorities-ring"
+    :nemesis '(kn/tserver-partition-majorities-ring)}
+   {:scenario "kill-restart-2-tservers"
+    :nemesis '(kn/kill-restart-tserver (comp (partial take 2) shuffle))}
+   {:scenario "kill-restart-3-tservers"
+    :nemesis '(kn/kill-restart-tserver (comp (partial take 3) shuffle))}
+   {:scenario "kill-restart-all-tservers"
+    :nemesis '(kn/kill-restart-tserver shuffle)}
+   {:scenario "all-random-halves"
+    :nemesis '(jn/partition-random-halves)}
+   {:scenario "all-majorities-ring"
+    :nemesis '(jn/partition-majorities-ring)}
+   {:scenario "hammer-2-tservers"
+    :nemesis '(kn/tserver-hammer-time (comp (partial take 2) shuffle))}
+   {:scenario "hammer-3-tservers"
+    :nemesis '(kn/tserver-hammer-time (comp (partial take 3) shuffle))}
+   {:scenario "hammer-all-tservers"
+    :nemesis '(kn/tserver-hammer-time shuffle)}
+   ])
+
+(defmacro instantiate-all-kudu-tests
+  [opts]
+  `(instantiate-tests register-test register-test-configs ~opts))
+
+(instantiate-all-kudu-tests {})

http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/kudu-jepsen/src/utils/kudu_test_runner.clj
----------------------------------------------------------------------
diff --git a/java/kudu-jepsen/src/utils/kudu_test_runner.clj b/java/kudu-jepsen/src/utils/kudu_test_runner.clj
new file mode 100644
index 0000000..b43e3e6
--- /dev/null
+++ b/java/kudu-jepsen/src/utils/kudu_test_runner.clj
@@ -0,0 +1,115 @@
+;; Licensed to the Apache Software Foundation (ASF) under one
+;; or more contributor license agreements. See the NOTICE file
+;; distributed with this work for additional information
+;; regarding copyright ownership. The ASF licenses this file
+;; to you under the Apache License, Version 2.0 (the
+;; "License"); you may not use this file except in compliance
+;; with the License. You may obtain a copy of the License at
+;;
+;;   http://www.apache.org/licenses/LICENSE-2.0
+;;
+;; Unless required by applicable law or agreed to in writing,
+;; software distributed under the License is distributed on an
+;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+;; KIND, either express or implied. See the License for the
+;; specific language governing permissions and limitations
+;; under the License.
+
+;;
+;; This is a starter script for maven-clojure-plugin. The script parses passed
+;; command-line arguments and starts Kudu jepsen tests with appropriate
+;; parameters.
+;;
+;; The script is invoked via calling 'mvn clojure:run'. The 'clojure:run' target
+;; is used instead of 'clojure:test' because the latter does not allow to
+;; pass necessary customization parameters for the tests.
+;;
+;; The script accepts the following command-line options:
+;;   --masters=<list_of_kudu_master_hostnames>
+;;   --tservers=<list_of_kudu_tserver_hostnames>
+;;   --ssh-key-path=<path_to_private_ssh_key_to_login_into_kudu_nodes or empty>
+;;   --iter-num=<number_of_iterations_to_run> (default is 1)
+;; The list of nodes/hostnames can be separated either by single space or comma.
+;;
+
+(ns jepsen.kudu-test-runner
+  "Run Kudu jepsen tests via clojure-maven-plugin on 'mvn clojure:run'"
+  (:require [clojure.tools.logging :refer :all]
+            [clojure.string :as string]
+            [clojure.test :refer [run-tests]]
+            [clojure.tools.cli :refer [parse-opts]]
+            [jepsen.nemesis :as jn]
+            [jepsen.control :as jc]
+            [jepsen.kudu.nemesis :as kn]
+            [jepsen.kudu-test :refer [instantiate-all-kudu-tests]]))
+
+(def parse-hostnames #(string/split % #"[, ]"))
+
+(defn parse-path
+  [input]
+  (def trimmed (string/trim input))
+  (if (= 0 (count trimmed)) nil trimmed))
+
+(def cli-options
+  [
+   [:long-opt "--masters"
+    :required "<nodes>"
+    :desc "Set of Kudu master nodes"
+    :missing "Kudu master nodes are missing"
+    :parse-fn parse-hostnames]
+   [:long-opt "--tservers"
+    :required "<nodes>"
+    :desc "Set of Kudu tserver nodes"
+    :missing "Kudu tserver nodes are missing"
+    :parse-fn parse-hostnames]
+   [:long-opt "--ssh-key-path"
+    :required "<path_to_private_ssh_key>"
+    :desc "Path to the SSH private key to login into the Kudu nodes.
+          If not specified or empty, keys are retrieved from SSH agent."
+    :missing "Path to the SSH private key is not specified, using SSH agent."
+    :parse-fn parse-path]
+   [:long-opt "--iter-num"
+    :required "<number_of_iterations>"
+    :default 1
+    :desc "Number of iterations to run the test suite in cycle."
+    :parse-fn #(Integer/parseInt %)]
+   ])
+
+(defn get-cmd-line-opts
+  []
+  (let [{:keys [options arguments errors summary]}
+        (parse-opts *command-line-args* cli-options)]
+    options))
+
+(do
+  (def cmd-line-opts (get-cmd-line-opts))
+  (def test-opts (dissoc cmd-line-opts :ssh-key-path :iter-num))
+  (def private-key-path (:ssh-key-path cmd-line-opts))
+  (def iter-num (:iter-num cmd-line-opts))
+  ;; Custom reporting for the tests.
+  (defmulti custom-report :type)
+  (def old-report clojure.test/report)
+  (defmethod custom-report :default [m]
+  (old-report m))
+  ;; Print the name of the test upon starting it.
+  (defmethod custom-report :begin-test-var [m]
+  (println (-> m :var meta :name)))
+  (println "Running" iter-num "iteration(s) of the test suite")
+  (println "Running with ssh key:" private-key-path)
+  (println "Running with test options:" test-opts)
+  (jepsen.kudu-test/instantiate-all-kudu-tests test-opts)
+  (binding [jc/*strict-host-key-checking* :no
+            jc/*private-key-path* private-key-path
+            clojure.test/report custom-report]
+    (loop [iteration 0]
+      (when (< iteration iter-num)
+        (let [summary (run-tests 'jepsen.kudu-test-runner)]
+          (when-not (= 0 (:fail summary))
+            (println "FAILURE: tests failed.")
+            (System/exit 1))
+          (when-not (= 0 (:error summary))
+            (println "ERROR: encountered errors while running the tests.")
+            (System/exit 1))
+          (println "SUCCESS: all tests passed; no errors.")
+          (recur (inc iteration)))))
+          (System/exit 0)))

http://git-wip-us.apache.org/repos/asf/kudu/blob/6985a544/java/pom.xml
----------------------------------------------------------------------
diff --git a/java/pom.xml b/java/pom.xml
index 4a15908..cdc5153 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -305,5 +305,17 @@
         <module>kudu-csd</module>
       </modules>
     </profile>
+
+    <!-- Build the jepsen test for Kudu.
+
+         Disabled by default since Java 8 and maven >= 3.3.6 is required to
+         build the kudu-jepsen artifact.  To enable, add '-Pjepsen'
+         to the maven command line. -->
+    <profile>
+      <id>jepsen</id>
+      <modules>
+        <module>kudu-jepsen</module>
+      </modules>
+    </profile>
   </profiles>
 </project>


[2/2] kudu git commit: env_util: Factor out helper DeleteExcessFilesByPattern()

Posted by ad...@apache.org.
env_util: Factor out helper DeleteExcessFilesByPattern()

The logic contained in here is generally useful for log rotation
purposes, and we can reuse it for minidump rotation in a later patch.

Change-Id: I6e76911b0a68f7c1397d93eb027b6a5c99e2fbed
Reviewed-on: http://gerrit.cloudera.org:8080/5740
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <ad...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/fb3bbbc8
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/fb3bbbc8
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/fb3bbbc8

Branch: refs/heads/master
Commit: fb3bbbc8ce8527a63144b268cbad4aee92da4c1f
Parents: 6985a54
Author: Mike Percy <mp...@apache.org>
Authored: Wed Jan 18 17:49:05 2017 -0800
Committer: Adar Dembo <ad...@cloudera.com>
Committed: Fri Jan 20 21:10:47 2017 +0000

----------------------------------------------------------------------
 src/kudu/util/env_util-test.cc | 44 ++++++++++++++++++++++++++++++++++---
 src/kudu/util/env_util.cc      | 31 ++++++++++++++++++++++++++
 src/kudu/util/env_util.h       |  7 ++++++
 src/kudu/util/logging.cc       | 25 ++-------------------
 4 files changed, 81 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/fb3bbbc8/src/kudu/util/env_util-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/env_util-test.cc b/src/kudu/util/env_util-test.cc
index 90b308d..7c79068 100644
--- a/src/kudu/util/env_util-test.cc
+++ b/src/kudu/util/env_util-test.cc
@@ -15,15 +15,20 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <sys/statvfs.h>
+#include <sys/time.h>
 #include <unistd.h>
 
-#include "kudu/util/env_util.h"
+#include <memory>
+#include <unordered_set>
 
 #include <gflags/gflags.h>
-#include <memory>
-#include <sys/statvfs.h>
+#include <glog/stl_logging.h>
 
+#include "kudu/gutil/map-util.h"
 #include "kudu/gutil/strings/substitute.h"
+#include "kudu/gutil/walltime.h"
+#include "kudu/util/env_util.h"
 #include "kudu/util/path_util.h"
 #include "kudu/util/test_macros.h"
 #include "kudu/util/test_util.h"
@@ -33,6 +38,7 @@ DECLARE_int64(disk_reserved_bytes_free_for_testing);
 
 using std::string;
 using std::unique_ptr;
+using std::unordered_set;
 using strings::Substitute;
 
 namespace kudu {
@@ -108,5 +114,37 @@ TEST_F(EnvUtilTest, TestCreateDirsRecursively) {
   ASSERT_TRUE(is_dir);
 }
 
+// Ensure that DeleteExcessFilesByPattern() works.
+// We ensure that the number of files remaining after running it is the number
+// expected, and we manually set the modification times on the relevant files
+// to allow us to test that files are deleted oldest-first.
+TEST_F(EnvUtilTest, TestDeleteExcessFilesByPattern) {
+  string dir = JoinPathSegments(test_dir_, "excess");
+  ASSERT_OK(env_->CreateDir(dir));
+  vector<string> filenames = {"a", "b", "c", "d"};
+  int now_sec = GetCurrentTimeMicros() / 1000;
+  for (int i = 0; i < filenames.size(); i++) {
+    const string& filename = filenames[i];
+    string path = JoinPathSegments(dir, filename);
+    unique_ptr<WritableFile> file;
+    ASSERT_OK(env_->NewWritableFile(path, &file));
+    ASSERT_OK(file->Close());
+
+    // Set the last-modified time of the file.
+    struct timeval target_time { .tv_sec = now_sec + (i * 2), .tv_usec = 0 };
+    struct timeval times[2] = { target_time, target_time };
+    ASSERT_EQ(0, utimes(path.c_str(), times)) << errno;
+  }
+  vector<string> children;
+  ASSERT_OK(env_->GetChildren(dir, &children));
+  ASSERT_EQ(6, children.size()); // 4 files plus "." and "..".
+  ASSERT_OK(DeleteExcessFilesByPattern(env_, dir + "/*", 2));
+  ASSERT_OK(env_->GetChildren(dir, &children));
+  ASSERT_EQ(4, children.size()); // 2 files plus "." and "..".
+  unordered_set<string> children_set(children.begin(), children.end());
+  unordered_set<string> expected_set({".", "..", "c", "d"});
+  ASSERT_EQ(expected_set, children_set) << children;
+}
+
 } // namespace env_util
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/fb3bbbc8/src/kudu/util/env_util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/env_util.cc b/src/kudu/util/env_util.cc
index e398831..178d391 100644
--- a/src/kudu/util/env_util.cc
+++ b/src/kudu/util/env_util.cc
@@ -236,6 +236,37 @@ Status CopyFile(Env* env, const string& source_path, const string& dest_path,
   return Status::OK();
 }
 
+Status DeleteExcessFilesByPattern(Env* env, const string& pattern, int max_matches) {
+  // Negative numbers don't make sense for our interface.
+  DCHECK_GE(max_matches, 0);
+
+  vector<string> matching_files;
+  RETURN_NOT_OK(env->Glob(pattern, &matching_files));
+
+  if (matching_files.size() <= max_matches) {
+    return Status::OK();
+  }
+
+  vector<pair<time_t, string>> matching_file_mtimes;
+  for (string& matching_file_path : matching_files) {
+    int64_t mtime;
+    RETURN_NOT_OK(env->GetFileModifiedTime(matching_file_path, &mtime));
+    matching_file_mtimes.emplace_back(mtime, std::move(matching_file_path));
+  }
+
+  // Use mtime to determine which matching files to delete. This could
+  // potentially be ambiguous, depending on the resolution of last-modified
+  // timestamp in the filesystem, but that is part of the contract.
+  std::sort(matching_file_mtimes.begin(), matching_file_mtimes.end());
+  matching_file_mtimes.resize(matching_file_mtimes.size() - max_matches);
+
+  for (const auto& matching_file : matching_file_mtimes) {
+    RETURN_NOT_OK(env->DeleteFile(matching_file.second));
+  }
+
+  return Status::OK();
+}
+
 ScopedFileDeleter::ScopedFileDeleter(Env* env, std::string path)
     : env_(DCHECK_NOTNULL(env)), path_(std::move(path)), should_delete_(true) {}
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/fb3bbbc8/src/kudu/util/env_util.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/env_util.h b/src/kudu/util/env_util.h
index c54bfa8..884b17c 100644
--- a/src/kudu/util/env_util.h
+++ b/src/kudu/util/env_util.h
@@ -81,6 +81,13 @@ Status CreateDirsRecursively(Env* env, const std::string& path);
 Status CopyFile(Env* env, const std::string& source_path, const std::string& dest_path,
                 WritableFileOptions opts);
 
+// Deletes files matching 'pattern' in excess of 'max_matches' files.
+// 'max_matches' must be greater than or equal to 0.
+// The oldest files are deleted first, as determined by last modified time.
+// In the case that multiple files have the same last modified time, it is not
+// defined which file will be deleted first.
+Status DeleteExcessFilesByPattern(Env* env, const std::string& pattern, int max_matches);
+
 // Deletes a file or directory when this object goes out of scope.
 //
 // The deletion may be cancelled by calling .Cancel().

http://git-wip-us.apache.org/repos/asf/kudu/blob/fb3bbbc8/src/kudu/util/logging.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/logging.cc b/src/kudu/util/logging.cc
index 4438380..8332349 100644
--- a/src/kudu/util/logging.cc
+++ b/src/kudu/util/logging.cc
@@ -38,6 +38,7 @@
 #include "kudu/util/debug-util.h"
 #include "kudu/util/debug/leakcheck_disabler.h"
 #include "kudu/util/env.h"
+#include "kudu/util/env_util.h"
 #include "kudu/util/flag_tags.h"
 #include "kudu/util/status.h"
 
@@ -359,23 +360,10 @@ Status DeleteExcessLogFiles(Env* env) {
   if (max_log_files <= 0) return Status::OK();
 
   for (int severity = 0; severity < google::NUM_SEVERITIES; ++severity) {
-    vector<string> logfiles;
     // Build glob pattern for input
     // e.g. /var/log/kudu/kudu-master.*.INFO.*
     string pattern = strings::Substitute("$0/$1.*.$2.*", FLAGS_log_dir, FLAGS_log_filename,
                                          google::GetLogSeverityName(severity));
-    RETURN_NOT_OK(env->Glob(pattern, &logfiles));
-
-    if (logfiles.size() <= max_log_files) {
-      continue;
-    }
-
-    vector<pair<time_t, string>> logfile_mtimes;
-    for (string& logfile : logfiles) {
-      int64_t mtime;
-      RETURN_NOT_OK(env->GetFileModifiedTime(logfile, &mtime));
-      logfile_mtimes.emplace_back(mtime, std::move(logfile));
-    }
 
     // Keep the 'max_log_files' most recent log files, as compared by
     // modification time. Glog files contain a second-granularity timestamp in
@@ -383,16 +371,7 @@ Status DeleteExcessLogFiles(Env* env) {
     // guaranteed by glob, however this code has been adapted from Impala which
     // uses mtime to determine which files to delete, and there haven't been any
     // issues in production settings.
-    std::sort(logfile_mtimes.begin(), logfile_mtimes.end());
-    logfile_mtimes.resize(logfile_mtimes.size() - max_log_files);
-
-    VLOG(2) << "Deleting " << logfile_mtimes.size()
-            << " excess glog files at " << google::GetLogSeverityName(severity)
-            << " severity";
-
-    for (const auto& logfile: logfile_mtimes) {
-      RETURN_NOT_OK(env->DeleteFile(logfile.second));
-    }
+    RETURN_NOT_OK(env_util::DeleteExcessFilesByPattern(env, pattern, max_log_files));
   }
   return Status::OK();
 }