You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by if...@apache.org on 2020/09/07 06:27:24 UTC
[cassandra-harry] 01/02: Harry: generator library and extensible
framework for fuzz testing Apache Cassandra
This is an automated email from the ASF dual-hosted git repository.
ifesdjeen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/cassandra-harry.git
commit 8a03e5cf6f3ab77216939af1798076912c02168c
Author: Alex Petrov <al...@apple.com>
AuthorDate: Thu Sep 3 20:19:57 2020 +0200
Harry: generator library and extensible framework for fuzz testing Apache Cassandra
Patch by Alex Petrov; reviewed by Aleksey Yeschenko for CASSANDRA-15348
---
.gitignore | 25 +
LICENSE.txt | 209 +++++
Makefile | 11 +
README.md | 538 ++++++++++++
conf/example.yaml | 74 ++
docker/Dockerfile.local | 19 +
docker/run-local.sh | 1 +
docker/run.sh | 89 ++
harry-core/pom.xml | 68 ++
harry-core/src/harry/core/Configuration.java | 908 +++++++++++++++++++++
harry-core/src/harry/core/Run.java | 73 ++
.../src/harry/corruptor/AddExtraRowCorruptor.java | 82 ++
.../src/harry/corruptor/ChangeValueCorruptor.java | 85 ++
.../src/harry/corruptor/HideRowCorruptor.java | 49 ++
.../src/harry/corruptor/HideValueCorruptor.java | 74 ++
.../harry/corruptor/QueryResponseCorruptor.java | 71 ++
harry-core/src/harry/corruptor/RowCorruptor.java | 50 ++
.../src/harry/corruptor/ShowValueCorruptor.java | 77 ++
harry-core/src/harry/data/ResultSetRow.java | 60 ++
harry-core/src/harry/ddl/ColumnSpec.java | 388 +++++++++
harry-core/src/harry/ddl/SchemaGenerators.java | 295 +++++++
harry-core/src/harry/ddl/SchemaSpec.java | 329 ++++++++
harry-core/src/harry/generators/Bijections.java | 380 +++++++++
.../src/harry/generators/BooleanGenerator.java | 29 +
harry-core/src/harry/generators/Bytes.java | 42 +
harry-core/src/harry/generators/Collections.java | 248 ++++++
.../src/harry/generators/DataGenerators.java | 486 +++++++++++
harry-core/src/harry/generators/Generator.java | 131 +++
harry-core/src/harry/generators/Generators.java | 92 +++
harry-core/src/harry/generators/PCGFastPure.java | 145 ++++
harry-core/src/harry/generators/PcgRSUFast.java | 81 ++
.../src/harry/generators/RandomGenerator.java | 87 ++
harry-core/src/harry/generators/RngUtils.java | 96 +++
.../src/harry/generators/StringBijection.java | 170 ++++
harry-core/src/harry/generators/Surjections.java | 158 ++++
.../generators/distribution/Distribution.java | 137 ++++
harry-core/src/harry/model/DataTracker.java | 123 +++
.../src/harry/model/DescriptorSelectorBuilder.java | 104 +++
harry-core/src/harry/model/DoNothingModel.java | 38 +
harry-core/src/harry/model/ExhaustiveChecker.java | 647 +++++++++++++++
harry-core/src/harry/model/Model.java | 69 ++
harry-core/src/harry/model/OpSelectors.java | 623 ++++++++++++++
harry-core/src/harry/model/QuiescentChecker.java | 110 +++
harry-core/src/harry/model/SelectHelper.java | 127 +++
.../harry/model/StatelessVisibleRowsChecker.java | 124 +++
harry-core/src/harry/model/VisibleRowsChecker.java | 248 ++++++
.../model/clock/ApproximateMonotonicClock.java | 280 +++++++
harry-core/src/harry/model/clock/OffsetClock.java | 61 ++
harry-core/src/harry/model/sut/NoOpSut.java | 44 +
harry-core/src/harry/model/sut/PrintlnSut.java | 48 ++
.../src/harry/model/sut/SystemUnderTest.java | 54 ++
.../src/harry/operations/CompiledStatement.java | 56 ++
harry-core/src/harry/operations/DeleteHelper.java | 127 +++
harry-core/src/harry/operations/Relation.java | 350 ++++++++
harry-core/src/harry/operations/WriteHelper.java | 148 ++++
harry-core/src/harry/reconciler/Reconciler.java | 237 ++++++
.../src/harry/runner/AbstractPartitionVisitor.java | 103 +++
.../runner/DefaultPartitionVisitorFactory.java | 198 +++++
harry-core/src/harry/runner/DefaultRowVisitor.java | 68 ++
harry-core/src/harry/runner/PartitionVisitor.java | 26 +
harry-core/src/harry/runner/Query.java | 299 +++++++
harry-core/src/harry/runner/QuerySelector.java | 279 +++++++
harry-core/src/harry/runner/RowVisitor.java | 61 ++
harry-core/src/harry/runner/Runner.java | 396 +++++++++
harry-core/src/harry/runner/Validator.java | 119 +++
harry-core/src/harry/util/BitSet.java | 202 +++++
harry-core/src/harry/util/Ranges.java | 150 ++++
harry-core/src/harry/util/TestRunner.java | 62 ++
.../test/harry/generators/DataGeneratorsTest.java | 470 +++++++++++
.../test/harry/generators/ExtensionsTest.java | 41 +
.../test/harry/generators/RandomGeneratorTest.java | 131 +++
.../test/harry/generators/SurjectionsTest.java | 62 ++
.../test/harry/model/ApproximateClockTest.java | 182 +++++
harry-core/test/harry/model/OpSelectorsTest.java | 263 ++++++
harry-core/test/harry/op/RowVisitorTest.java | 100 +++
harry-core/test/harry/operations/RelationTest.java | 212 +++++
harry-core/test/harry/util/BitSetTest.java | 63 ++
harry-core/test/harry/util/RangesTest.java | 100 +++
harry-integration/pom.xml | 48 ++
.../src/harry/model/sut/ExternalClusterSut.java | 149 ++++
.../src/harry/model/sut/InJvmSut.java | 166 ++++
.../test/harry/QuickTheoriesAdapter.java | 69 ++
.../test/harry/ddl/SchemaGenTest.java | 206 +++++
.../model/ExhaustiveCheckerIntegrationTest.java | 254 ++++++
.../harry/model/ExhaustiveCheckerUnitTest.java | 161 ++++
.../test/harry/model/IntegrationTestBase.java | 99 +++
harry-integration/test/harry/model/MockSchema.java | 115 +++
harry-integration/test/harry/model/ModelTest.java | 147 ++++
.../harry/model/QuerySelectorNegativeTest.java | 144 ++++
.../test/harry/model/QuerySelectorTest.java | 153 ++++
.../model/QuiescentCheckerIntegrationTest.java | 170 ++++
.../test/harry/model/TestBaseImpl.java | 32 +
harry-runner/pom.xml | 37 +
harry-runner/src/harry/runner/HarryRunner.java | 204 +++++
harry-runner/src/harry/runner/Reproduce.java | 68 ++
pom.xml | 234 ++++++
test/conf/logback-dtest.xml | 76 ++
97 files changed, 15594 insertions(+)
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..652b7d9
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,25 @@
+.java-version
+
+operation.log
+
+*.class
+
+# Package Files #
+*.jar
+*.war
+*.ear
+
+# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
+hs_err_pid*
+
+*.iml
+
+target
+.idea
+
+*~
+dumps/*
+build/*
+repro/*
+*/build/*
+shared/*
\ No newline at end of file
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..d5c4984
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,209 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+
+THIRD-PARTY DEPENDENCIES
+========================
+Convenience copies of some third-party dependencies are distributed with
+Apache Cassandra as Java jar files in lib/. Licensing information for
+these files can be found in the lib/licenses directory.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..71dad4b
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,11 @@
+DOCKER_REPO = org.apache.cassandra/harry/harry-runner
+
+img:
+ rm -fr shared/*
+ mvn clean && mvn package -DskipTests && docker build -t ${DOCKER_REPO}:latest-local ./ -f docker/Dockerfile.local
+
+run: img
+ docker run -v `pwd`/shared:/shared -it ${DOCKER_REPO}:latest-local
+
+run-last:
+ docker run -v `pwd`/shared:/shared -it ${DOCKER_REPO}:latest-local
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..0d920d4
--- /dev/null
+++ b/README.md
@@ -0,0 +1,538 @@
+# Harry, a fuzz testing tool for Apache Cassandra
+
+Project aims to generate _reproducible_ workloads that are as close to real-life
+as possible, while being able to _efficiently_ verify the cluster state against
+the model without pausing the workload itself.
+
+_Reproducibility_ is achieved by using the PCG family of random number
+generators and generating schema, configuration, and every step of the workload
+from the repeatable sequence of random numbers. Schema and configuration are
+generated from the _seed_. Each operation is assigned its own monotonically
+increasing _logical timestamp_, which preserves logical operation order between
+different runs.
+
+_Efficiency_ is achieved by employing the features of the PCG random number
+generator (walking the sequence of random numbers back and forth), and writing
+value generators in a way that preserves properties of the descriptor it was
+generated from.
+
+Given a `long` _descriptor_ can be _inflated_ into some value:
+ * value can be _deflated_ back to the descriptor it was generated from
+ (in other words, generation is *invertible*)
+ * two inflated values will sort the same way ass two descriptors they
+ were generated from (in other words, generation is *order-preserving*)
+
+These properties are also preserved for the composite values, such
+as clustering and partition keys.
+
+# Components
+
+Every Harry run starts from Configuration. You can find an example configuration
+under `conf/example.yml`.
+
+*Clock* is a component responsible for mapping _logical_ timestamps to
+_real-time_ ones. When reproducing test failures, and for validation purposes, a
+snapshot of such be taken to map a real-time timestamp from the value retrieved
+from the database to map it back to the logical timestamp of the operation that
+wrote this value. Given a real-time timestamp, the clock can return a logical
+timestamp, and vice versa.
+
+*Runner* is a component that schedules operations that change the cluster
+(system under test) and model state.
+
+*System under test*: a Cassandra node or cluster. Default implementation is
+in_jvm (in-JVM DTest cluster). Harry also supports external clusters.
+
+*Model* is responsible for tracking logical timestamps that.
+
+*Partition descriptor selector* controls how partitions are selected based on
+the current logical timestamp. The default implementation is a sliding window of
+partition descriptors that will visit one partition after the other in the
+window `slide_after_repeats` times. After that, it will retire one partition
+descriptor, and pick one instead of it.
+
+*Clustering descriptor selector* controls how clustering keys are picked within
+the partition: how many rows there can be in a partition, how many rows are
+visited for a logical timestamp, how many operations there will be in batch,
+what kind of operations there will and how often each kind of operation is going
+to occur.
+
+# Formal Relations Between Entities
+
+To be able to implement efficient models, we had to reduce the amount of state,
+required for validation to a minimum and try to operate on primitive data values
+most of the time. Any Harry run starts with a `seed`. Given the same
+configuration, and the same seed, we're able to make runs deterministic (in
+other words, records in two clusters created from the same seed are going to
+have different timestamps, but will be otherwise identical).
+
+Since it's clear how to generate things like random schemas, cluster
+configurations, etc., let's discuss how we're generating data, and why this type
+of generation makes validation efficient.
+
+First, we're using PCG family of random number generators, which, besides having
+nice characteristics that any RNG should have, have two important features:
+
+ * Streams: for single seed, we can have several independent _different_
+ streams of random numbers.
+ * Walkability: PCG generators generate a stream of numbers you can walk _back_
+ and _forth_. That is, for any number _n_ that represents a _position_ of the
+ random number in the stream of random numbers, we can get the random number
+ at this position. Conversely, given a random number, we can determine what
+ is its position in the stream. Moreover, knowing a random number, we can
+ determine which number precedes it in the stream of random numbers, and,
+ finally, we can determine how many numbers there are in a stream between the
+ two random numbers.
+
+Out of these operations, determining the _next_ random number in the sequence
+can be done in constant time, `O(1)`. Advancing generator by _n_ steps can be
+done in `O(log(n))` steps. Since generation is cyclical, advancing the iterator
+backward is equivalent to advancing it by `cardinality - 1` steps. If we're
+generating 64 bits of entropy, advancing by `-1` can be done in 64 steps.
+
+Let's introduce some definitions:
+ * `lts` is a *logical timestamp*, an entity (number in our case), given by the
+ clock, on which some action occurs
+ * `m` is a *modification id*, a sequential number of the modification that
+ occurs on `lts`
+ * `rts` is an approximate real-time as of clock for this run
+ * `pid` is a partition position, a number between `0` and N, for `N` unique
+ generated partitions
+ * `pd` is a partition descriptor, a unique descriptor identifying the
+ partition
+ * `cd` is a clustering descriptor, a unique descriptor identifying row within
+ some partition
+
+A small introduction that can help to understand the relation between these
+entities. Hierarchically, the generation process looks as follows:
+
+ * `lts` is an entry point, from which the decision process starts
+ * `pd` is picked from `lts`, and determines which partition is going to be
+ visited
+ * for `(pd, lts)` combination, `#mods` (the number of modification batches)
+ and `#rows` (the number of rows per modification batch) is determined. `m`
+ is an index of the modification batch, and `i` is an index of the operation
+ in the modification batch.
+ * `cd` is picked based on `(pd, lts)`, and `n`, a sequential number of the
+ operation among all modification batches
+ * operation type used columns, and values for the modification are picked
+ depending on the `pd`, `lts`, `m`, and `i`
+
+Most of this formalization is implemented in `OpSelectors`, and is relied upon
+in`PartitionVisitor` and any implementation of a `Model`.
+
+Random number generation (see `OpSelectors#Rng`):
+
+ * `rng(i, stream[, e])`: returns i'th number drawn from random sequence
+ `stream` producing values with `e` bits of entropy (64 bits for now).
+ * `rng'(s, stream[, e])`: returns `i` of the random number `s` drawn from the
+ random sequence `stream`. This function is an inverse of `rng`.
+ * `next(rnd, stream[, e])` and `prev(rnd, stream[, e])`: the next/previous
+ number relative to `rnd` drawn from random sequence `stream`.
+
+A simple example of a partition descriptor selector is one that is based on a
+sliding window of a size `s`, that slides every `n` iterations. First, we
+determine _where_ the window should start for a given `lts` (in other words, how
+many times it has already slid). After that, we determine which `pd` we pick out
+of `s` available ones. After picking each one of the `s` descriptors `n` times,
+we retire the oldest descriptor and pick a new one to the window. Window start
+and offset are then used as input for the `rng(start + offset, stream)` to make
+sure descriptors are uniformly distributed.
+
+We can build a clustering descriptor selector in a similar manner. Each
+partition will use its `pd` as a stream id, and pick `cd` from a universe of
+possible `cds` of size `#cds`. On each `lts`, we pick a random `offset`, and
+start picking `#ops` clusterings from this `offset < #cds`, and wrap around to
+index 0 after that. This way, each operation maps to a unique `cd`, and
+`#op` can be determined from `cd` deterministically.
+
+# Data Generation
+
+So far, we have established how to generate partition, clustering, and value
+_descriptors_. Now, we need to understand how we can generate data modification
+statements out of these descriptors in a way that helps us to validate data
+later.
+
+Since every run has a predefined schema, and by the time we visit a partition we
+have a logical timestamp, we can make the rest of the decisions: pick a number
+of batches we're about to perform, determine what kind of operations each one of
+the batches is going to contain, which rows we're going to visit (clustering for
+each modification operation).
+
+To generate a write, we need to know _which partition_ we're going to visit (in
+other words, partition descriptor), _which row_ we'd like to modify (in other
+words, clustering descriptor), _which columns_ we're modifying (in other words,
+a column mask), and, for each modified column - its value. By the time we're
+ready to make an actual query to the database, we already know `pd`, `cd`,
+`rts`, and `vds[]`, which is all we need to "inflate" a write.
+
+To inflate each value descriptor, we take a generator for its datatype, and turn
+its descriptor into the object. This generation process has the following
+important properties:
+
+ * it is invertible: for every `inflate(vd) -> value`, there's `deflate(value) -> vd`
+ * it is order-preserving: `compare(vd1, vd2) == compare(inflate(vd1), inflate(vd2))`
+
+Inflating `pd` and `cd` is slightly more involved than inflating `vds`, since
+partition and clustering keys are often composite. This means that `inflate(pd)`
+returns an array of objects, rather just a single object: `inflate(pd) ->
+value[]`, and `deflate(value[]) -> pd`. Just like inflating value descriptors,
+inflating keys preserves order.
+
+It is easy to see that, given two modifications: `Update(pd1, cd1, [vd1_1,
+vd2_1, vd3_1], lts1)` and `Update(pd1, cd1, [vd1_2, vd3_2], lts2)`, we will end
+up with a resultset that contains effects of both operations: `ResultSetRow(pd1,
+cd1, [vd1_2@rts2, vd2_1@rts1, vd3_2@rts2])`.
+
+# Model
+
+Model in Harry ties the rest of the components together and allows us to check
+whether or not data returned by the cluster actually makes sense. The model
+relies on the clock since we have to convert real-time timestamps of the
+returned values back to logical timestamps, on descriptor selectors to pick the
+right partition and rows.
+
+## Visible Rows Checker
+
+Let's try to put it all together and build a simple model. The simplest one is a
+visible row checker. It can check if any row in the response returned from the
+database could have been produced by one of the operations. However, it won't be
+able to find errors related to missing rows, and will only notice some cases of
+erroneously overwritten rows.
+
+In the model, we can see a response from the database in its deflated state. In
+other words, instead of the actual values returned, we see their descriptors.
+Every resultset row consists of `pd`, `cd`, `vds[]` (value descriptors), and
+`lts[]` (logical timestamps at which these values were written).
+
+To validate, we need to iterate through all operations for this partition,
+starting with the latest one the model is aware of. This model has no internal
+state, and validates entire partitions:
+
+```
+void validatePartitionState(long validationLts, List<ResultSetRow> rows) {
+ long maxLtsForPd = pdSelector.maxLts(validationLts);
+ long pd = pdSelector.pd(validationLts, schema);
+
+ for (ResultSetRow row : rows) {
+ // iterator that gives us unique lts from the row in descending order
+ LongIterator rowLtsIter = descendingIterator(row.lts);
+ // iterator that gives us unique lts from the model in descending order
+ LongIterator modelLtsIter = descendingIterator(pdSelector, validationLts);
+
+ outer:
+ while (rowLtsIter.hasNext()) {
+ long rowLts = rowLtsIter.nextLong();
+
+ // this model can not check columns whose values were never written or were deleted
+ if (rowLts == NO_TIMESTAMP)
+ continue outer;
+
+ if (!modelLtsIter.hasNext())
+ throw new ValidationException(String.format("Model iterator is exhausted, could not verify %d lts for the row: \n%s %s",
+ rowLts, row, query));
+
+ while (modelLtsIter.hasNext()) {
+ long modelLts = modelLtsIter.nextLong();
+ // column was written by the operation that has a lower lts than the current one from the model
+ if (modelLts > rowLts)
+ continue;
+ // column was written by the operation that has a higher lts, which contradicts to the model, since otherwise we'd validate it by now
+ if (modelLts < rowLts)
+ throw new RuntimeException("Can't find a corresponding event id in the model for: " + rowLts + " " + modelLts);
+
+ // Compare values for columns that were supposed to be written with this lts
+ for (int col = 0; col < row.lts.length; col++) {
+ if (row.lts[col] != rowLts)
+ continue;
+
+ long m = descriptorSelector.modificationId(pd, row.cd, rowLts, row.vds[col], col);
+ long vd = descriptorSelector.vd(pd, row.cd, rowLts, m, col);
+
+ // If the value model predicts doesn't match the one received from the database, throw an exception
+ if (vd != row.vds[col])
+ throw new RuntimeException("Returned value doesn't match the model");
+ }
+ continue outer;
+ }
+ }
+ }
+}
+```
+
+As you can see, all validation is done using deflated `ResultSetRows`, which
+contain enough data to say which logical timestamp each value was written with,
+and which value descriptor each value has. This model can also validate data
+concurrently to the ongoing data modification operations.
+
+## Quiescent Checker
+
+Let's consider one more checker. It'll be more powerful than the visible rows
+checker in one way since it can find any inconsistency in data (incorrect
+timestamp, missing or additional row, rows coming in the wrong order, etc), but
+it'll also have one limitation: it won't be able to run concurrently with data
+modification statements. This means that for this model to be used, we should
+have no _in-flight_ queries, and all queries have to be in a deterministic state
+by the time we're validating their results.
+
+For this checker, we assume that we have a component that is called
+`Reconciler`, which can inflate partition state _up to some_ `lts`. `Reconciler`
+works by simply applying each modification in the same order they were applied
+to the cluster, and using standard Cassandra data reconciliation rules (last
+write wins / DELETE wins over INSERT in case of a timestamp collision).
+
+With this component, and knowing that there can be no in-fight queries, we can
+validate data in the following way:
+
+```
+public void validatePartitionState(long verificationLts, Iterator<ResultSetRow> actual, Query query) {
+ // find out up the highest completed logical timestamp
+ long maxCompleteLts = tracker.maxComplete();
+
+ // get the expected state from reconciler
+ Iterator<Reconciler.RowState> expected = reconciler.inflatePartitionState(query.pd, maxSeenLts, query).iterator(query.reverse);
+
+ // compare actual and expected rows one-by-one in-order
+ while (actual.hasNext() && expected.hasNext()) {
+ ResultSetRow actualRowState = actual.next();
+ Reconciler.RowState expectedRowState = expected.next();
+
+ if (actualRowState.cd != expectedRowState.cd)
+ throw new ValidationException("Found a row in the model that is not present in the resultset:\nExpected: %s\nActual: %s",
+ expectedRowState, actualRowState);
+
+ if (!Arrays.equals(actualRowState.vds, expectedRowState.vds))
+ throw new ValidationException("Returned row state doesn't match the one predicted by the model:\nExpected: %s (%s)\nActual: %s (%s).",
+ Arrays.toString(expectedRowState.vds), expectedRowState,
+ Arrays.toString(actualRowState.vds), actualRowState);
+
+ if (!Arrays.equals(actualRowState.lts, expectedRowState.lts))
+ throw new ValidationException("Timestamps in the row state don't match ones predicted by the model:\nExpected: %s (%s)\nActual: %s (%s).",
+ Arrays.toString(expectedRowState.lts), expectedRowState,
+ Arrays.toString(actualRowState.lts), actualRowState);
+ }
+
+ if (actual.hasNext() || expected.hasNext()) {
+ throw new ValidationException("Expected results to have the same number of results, but %s result iterator has more results",
+ actual.hasNext() ? "actual" : "expected");
+ }
+}
+```
+
+If there's any mismatch, it'll be caught right away: if there's an extra row
+(for example, there were issues in Cassandra that caused it to have duplicate
+rows), or if some row or even value in the row is missing.
+
+## Exhaustive Checker
+
+To be able to both run validation concurrently to modifications and be able to
+catch all kinds of inconsistencies, we need a more involved checker.
+
+In this checker, we rely on inflating partition state. However, we're most
+interested in `lts`, `opId`, and visibility (whether or not it is still
+in-flight) of each modification operation. To be able to give a reliable result,
+we need to make sure we follow these rules:
+
+ * every operation model _thinks_ should be visible, has to be visible
+ * every operation model _thinks_ should be invisible, has to be invisible
+ * every operation model doesn't know the state of (i.e., it is still
+ in-flight) can be _either_ visible _invisible_
+ * there can be no state in the database model is not aware of (in other words,
+ we either can _explain_ how a row came to be, or we conclude that the row is
+ erroneous)
+
+A naive way to do this would be to inflate every possible partition state, where
+every in-flight operation would be either visible or invisible, but this gets
+costly very quickly since the number of possible combinations grows
+exponentially. A better (and simpler) way to do this is to iterate all
+operations and keep the state of "explained" operations:
+
+
+```
+public class RowValidationState {
+ // every column starts in UNOBSERVED, and has to move to either REMOVED, or OBSERVED state
+ private final ColumnState[] columnStates;
+ // keep track of operations related to each column state
+ private final Operation[] causingOperations;
+}
+```
+
+Now, we move through all operations for a given row, starting from the _newest_ ones, towards
+the oldest ones:
+
+```
+public void validatePartitionState(long verificationLts, PeekingIterator<ResultSetRow> actual_, Query query) {
+ // get a list of operations for each cd
+ NavigableMap<Long, List<Operation>> operations = inflatePartitionState(query);
+
+ for (Map.Entry<Long, List<Operation>> entry : operations.entrySet()) {
+ long cd = entry.getKey();
+ List<Operation> ops = entry.getValue();
+
+ // Found a row that is present both in the model and in the resultset
+ if (actual.hasNext() && actual.peek().cd == cd) {
+ validateRow(new RowValidationState(actual.next), operations);
+ } else {
+ validateNoRow(cd, operations);
+
+ // Row is not present in the resultset, and we currently look at modifications with a clustering past it
+ if (actual.hasNext() && cmp.compare(actual.peek().cd, cd) < 0)
+ throw new ValidationException("Couldn't find a corresponding explanation for the row in the model");
+ }
+ }
+
+ // if there are more rows in the resultset, and we don't have model explanation for them, we've found an issue
+ if (actual.hasNext())
+ throw new ValidationException("Observed unvalidated rows");
+}
+```
+
+Now, we have to implement `validateRow` and `validateNoRow`. `validateNoRow` is
+easy: we only need to make sure that a set of operations results in an invisible
+row. Since we're iterating operations in reverse order, if we encounter a delete
+not followed by any writes, we can conclude that the row is invisible and exit
+early. If there's a write that is not followed by a delete, and the row isn't
+covered by a range tombstone, we know it's an error.
+
+`validateRow` only has to iterate operations in reverse order until it can
+explain the value in every column. For example, if a value is `UNOBSERVED`, and
+the first thing we encounter is a `DELETE` that removes this column, we only
+need to make sure that the value is actually `null`, in which case we can
+conclude that the value can be explained as `REMOVED`.
+
+Similarly, if we encounter an operation that has written the expected value, we
+conclude that the value is `OBSERVED`. If there are any seeming inconsistencies
+between the model and resultset, we have to check whether or not the operation
+in question is still in flight. If it is, its results may still not be visible,
+so we can't reliably say it's an error.
+
+To summarize, in order for us to implement an exhaustive checker, we have to
+iterate operations for each of the rows present in the model in reverse order
+until we either detect inconsistency that can't be explained by an in-flight
+operation or until we explain every value in the row.
+
+
+## Conclusion
+
+As you can see, all checkers up till now are almost entirely stateless.
+Exhaustive and quiescent models rely on `DataTracker` component that is aware of
+the in-flight and completed `lts`, but don't need any other state apart from
+that, since we can always inflate a complete partition from scratch every time
+we validate.
+
+While not relying on the state is a useful feature, at least _some_ state is
+useful to have. For example, if we're validating just a few rows in the
+partition, right now we have to iterate through each and every `lts` that has
+visited this partition and filter out only modifications that have visited it.
+However, since the model is notified of each _started_, and, later, finished
+modification via `recordEvent`, we can keep track of `pd -> (cd -> lts)` map.
+You can check out `VisibleRowsChecker` as an example of that.
+
+# Usage
+
+To use Harry, you first need to build a Cassandra in-JVM dtest jar. At the moment
+of writing, there's no official repository where these jars are released, so you'll
+have to build it manually:
+
+```
+git clone git@github.com:apache/cassandra.git
+cd cassandra
+./build-shaded-dtest-jar.sh 4.0-beta4 4.0.0-SNAPSHOT
+cd ~/../harry/
+mvn package
+```
+
+`4.0-beta3` is a version of Cassandra which you can find in `build.xml`, and
+`4.0.0-SNAPSHOT` is a version of dtest jar that'll be installed under
+`org.apache.cassandra:cassandra-dtest-local` in your `~/.m2/repository`.
+
+Alternatively, you can use a docker container. For that just run:
+
+```
+git clone git@github.com:apache/cassandra.git
+cd cassandra
+./build-shaded-dtest-jar.sh 4.0-beta4 4.0.0-SNAPSHOT
+cd ~/../harry/
+make run
+```
+
+For best effect, uncomment `scheduleCorruption(run, executor);` in `HarryRunner`, which
+will corrupt data in some way so that you could see how Harry detects this corruption.
+
+Each Harry failure contains a complete cluster state, operation log, failure
+description, and a run configuration. Most of the time, you'll be able to just load
+up the existing cluster state with `harry.runner.Reproduce` class, which pick up
+a run configuration from `shared/run.yml`, and see the same error you've seen
+in your failure log.
+
+When reproducing, make sure to point `system_under_test/root` in the yaml
+file to the dump, which is something like `~/harry/shared/cluster-state/1599155256261`,
+and make sure to point validation to the same LTS as the failed one with
+`run.validator.validatePartition(...L)`.
+
+Because of how our corruptor works, some errors are only reproducible on a specific
+lts, since they're kind of writing the data "from the future", so you should also make
+sure you set the following values from the corresponding values in `failure.dump`.
+
+```
+model:
+ exhaustive_checker:
+ max_seen_lts: your_value
+ max_complete_lts: your_value
+```
+
+# What's missing
+
+Harry is by no means feature-complete. Main things that are missing are:
+
+ * Some types (such as collections) are not deflatable
+ * Some types are implemented, but are not hooked up (`blob` and `text`) to DSL/generator
+ * Partition deletions are not implemented
+ * 2i queries are not implemented
+ * Compact storage is not implemented
+ * Static columns are not implemented
+ * Fault injection is not implemented
+ * Runner and scheduler are rather rudimentary and require significant rework and proper scheduling
+ * TTL is not supported
+ * Some SELECT queries are not supported: `LIMIT`, `IN`, `GROUP BY`, token range queries
+ * Partition deletions are not implemented
+ * Pagination is not implemented
+
+Some things, even though are implemented, can be improved or optimized:
+
+ * RNG should be able to yield less than 64 bits of entropy per step
+ * State tracking should be done in a compact off-heap data stucture
+ * Inflated partition state and per-row operation log should be done in a compact
+ off-heap data structure
+ * Exhaustive checker can be significantly optimized
+ * Harry shouldn't rely on java-driver for query generation
+ * Exhaustive checker shouold use more precise information from data tracker, not
+ just watermarks
+ * Decesion-making about _when_ we visit partitions and/or rows should be improved
+
+This list of improvements is incomplete, and should only give the reader a rough
+idea about the state of the project. Main goal for the initial release was to make it
+useful, now we can make it fast and feature-complete!
+
+# Contributors
+
+ * [Alex Petrov](https://github.com/ifesdjeen)
+ * [Benedict Elliot Smith](https://github.com/belliottsmith)
+
+Special thanks to [Aleksey Yeschenko](https://github.com/iamaleksey),
+[Sam Tunnicliffe](https://github.com/beobal), [Marcus Eriksson](https://github.com/krummas),
+and [Scott Andreas](https://github.com/cscotta).
+
+# License
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/conf/example.yaml b/conf/example.yaml
new file mode 100644
index 0000000..19c26ca
--- /dev/null
+++ b/conf/example.yaml
@@ -0,0 +1,74 @@
+seed: 1596731732524
+
+# Default schema provider generates random schema
+schema_provider:
+ default: {}
+
+drop_schema: false
+create_schema: true
+truncate_table: false
+
+# Clock is a component responsible for mapping _logical_ timestamps to _real-time_ ones.
+#
+# When reproducing test failures, and for validation purposes, a snapshot of such clock can
+# be taken to map a real-time timestamp from the value retrieved from the database in order
+# to map it back to the logical timestamp of the operation that wrote this value.
+clock:
+ approximate_monotonic:
+ historySize: 7300
+ epochPeriod: 1
+ epochTimeUnit: "SECONDS"
+
+# Runner is a is a component that schedules operations that change the cluster (system under test)
+# and model state.
+runner:
+ concurrent:
+ writer_threads: 2
+ round_robin_validator_threads: 1
+ recent_partition_validator_threads: 1
+
+run_time: 2
+run_time_unit: "HOURS"
+
+# System under test: a Cassandra node or cluster. Default implementation is in_jvm (in-jvm DTest cluster).
+# Harry also supports external clusters.
+system_under_test:
+ in_jvm:
+ nodes: 3
+ worker_threads: 10
+ root: "/tmp/harry/"
+
+# Model is responsible for tracking logical timestamps that
+model:
+ exhaustive_checker:
+ max_seen_lts: 19
+ max_complete_lts: 16
+
+# Partition descriptor selector controls how partitions is selected based on the current logical
+# timestamp. Default implementation is a sliding window of partition descriptors that will visit
+# one partition after the other in the window `slide_after_repeats` times. After that will
+# retire one partition descriptor, and pick one instead of it.
+partition_descriptor_selector:
+ default:
+ window_size: 10
+ slide_after_repeats: 100
+
+# Clustering descriptor selector controls how clusterings are picked within the partition:
+# how many rows there can be in a partition, how many rows will be visited for a logical timestamp,
+# how many operations there will be in batch, what kind of operations there will and how often
+# each kind of operation is going to occur.
+clustering_descriptor_selector:
+ default:
+ modifications_per_lts:
+ type: "constant"
+ constant: 10
+ rows_per_modification:
+ type: "constant"
+ constant: 10
+ operation_kind_weights:
+ WRITE: 97
+ DELETE_RANGE: 1
+ DELETE_ROW: 1
+ DELETE_COLUMN: 1
+ column_mask_bitsets: null
+ max_partition_size: 100
diff --git a/docker/Dockerfile.local b/docker/Dockerfile.local
new file mode 100644
index 0000000..cf57b36
--- /dev/null
+++ b/docker/Dockerfile.local
@@ -0,0 +1,19 @@
+FROM adoptopenjdk/openjdk11
+
+MAINTAINER Apache Cassandra <de...@cassandra.apache.org>
+
+RUN mkdir -p /opt/harry/lib
+RUN mkdir -p /opt/harry/test
+RUN mkdir -p /opt/harry/logs
+RUN mkdir -p /cassandra/harry
+
+COPY ./harry-core/target/lib/* /opt/harry/lib/
+COPY ./harry-core/target/*.jar /opt/harry/lib/
+COPY ./harry-runner/target/lib/* /opt/harry/lib/
+COPY ./harry-runner/target/*.jar /opt/harry/
+COPY ./test/conf/logback-dtest.xml /opt/harry/test/conf/logback-dtest.xml
+COPY ./docker/run.sh /opt/harry/
+
+WORKDIR /opt/harry
+COPY ./docker/run-local.sh /opt/harry/
+ENTRYPOINT /opt/harry/run-local.sh
diff --git a/docker/run-local.sh b/docker/run-local.sh
new file mode 100755
index 0000000..56dde11
--- /dev/null
+++ b/docker/run-local.sh
@@ -0,0 +1 @@
+/opt/harry/run.sh local_run
diff --git a/docker/run.sh b/docker/run.sh
new file mode 100755
index 0000000..9a18b22
--- /dev/null
+++ b/docker/run.sh
@@ -0,0 +1,89 @@
+cd /opt/harry/
+
+HARRY_DIR=/cassandra/harry/
+ls -1 /mnt/harry-failures/* | xargs rm -fr
+local_run=false
+if [ $1 = "local_run" ]; then
+ local_run=true
+fi
+
+if [ "$local_run" = true ] ; then
+ echo "Running locally..."
+fi
+
+while true; do
+ java -ea \
+ -Xms4g \
+ -Xmx4g \
+ -XX:MaxRAM=4g \
+ -XX:MaxMetaspaceSize=384M \
+ -XX:MetaspaceSize=128M \
+ -XX:SoftRefLRUPolicyMSPerMB=0 \
+ -XX:MaxDirectMemorySize=2g \
+ -Dcassandra.memtable_row_overhead_computation_step=100 \
+ -Djdk.attach.allowAttachSelf=true \
+ -XX:+HeapDumpOnOutOfMemoryError \
+ -XX:-UseBiasedLocking \
+ -XX:+UseTLAB \
+ -XX:+ResizeTLAB \
+ -XX:+UseNUMA \
+ -XX:+PerfDisableSharedMem \
+ -XX:+UseConcMarkSweepGC \
+ -XX:+CMSParallelRemarkEnabled \
+ -XX:SurvivorRatio=8 \
+ -XX:MaxTenuringThreshold=1 \
+ -XX:CMSInitiatingOccupancyFraction=75 \
+ -XX:+UseCMSInitiatingOccupancyOnly \
+ -XX:CMSWaitDuration=10000 \
+ -XX:+CMSParallelInitialMarkEnabled \
+ -XX:+CMSEdenChunksRecordAlways \
+ -XX:+CMSClassUnloadingEnabled \
+ -XX:+UseCondCardMark \
+ -XX:OnOutOfMemoryError=kill \
+ --add-exports java.base/jdk.internal.misc=ALL-UNNAMED \
+ --add-exports java.base/jdk.internal.ref=ALL-UNNAMED \
+ --add-exports java.base/sun.nio.ch=ALL-UNNAMED \
+ --add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED \
+ --add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED \
+ --add-exports java.rmi/sun.rmi.server=ALL-UNNAMED \
+ --add-exports java.sql/java.sql=ALL-UNNAMED \
+ --add-opens java.base/java.lang.module=ALL-UNNAMED \
+ --add-opens java.base/jdk.internal.loader=ALL-UNNAMED \
+ --add-opens java.base/jdk.internal.ref=ALL-UNNAMED \
+ --add-opens java.base/jdk.internal.reflect=ALL-UNNAMED \
+ --add-opens java.base/jdk.internal.math=ALL-UNNAMED \
+ --add-opens java.base/jdk.internal.module=ALL-UNNAMED \
+ --add-opens java.base/jdk.internal.util.jar=ALL-UNNAMED \
+ --add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED \
+ -Dorg.apache.cassandra.test.logback.configurationFile=file:///opt/harry/test/conf/logback-dtest.xml \
+ -cp /opt/harry/lib/*:/opt/harry/harry-runner-0.0.1-SNAPSHOT.jar \
+ -Dharry.root=${HARRY_DIR} \
+ harry.runner.HarryRunner
+
+ if [ $? -ne 0 ]; then
+ if [ -e "failure.dump" ]; then
+ echo "Creating failure dump..."
+ FAILURES_DIR="/opt/harry/dump/"
+ RUN="run-$(date +%Y%m%d%H%M%S)-${RANDOM}"
+ mkdir ${FAILURES_DIR}
+ mkdir ${FAILURES_DIR}cluster-state
+ mv ${HARRY_DIR}* ${FAILURES_DIR}/cluster-state
+ mv operation.log ${FAILURES_DIR}/
+ mv failure.dump ${FAILURES_DIR}/
+ mv run.yaml ${FAILURES_DIR}/
+
+ if [ "$local_run" = true ] ; then
+ mv ${FAILURES_DIR}/* /shared/
+ else
+ echo "TODO"
+ fi
+ fi
+ fi
+
+ if [ "$local_run" = true ] ; then
+ exit 0
+ else
+ rm -fr ${HARRY_DIR}*
+ sleep 1
+ fi
+done
diff --git a/harry-core/pom.xml b/harry-core/pom.xml
new file mode 100755
index 0000000..21747a3
--- /dev/null
+++ b/harry-core/pom.xml
@@ -0,0 +1,68 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+ <modelVersion>4.0.0</modelVersion>
+ <packaging>jar</packaging>
+
+ <parent>
+ <groupId>org.apache.cassandra</groupId>
+ <version>0.0.1-SNAPSHOT</version>
+ <artifactId>harry-parent</artifactId>
+ </parent>
+
+ <artifactId>harry-core</artifactId>
+ <name>Harry Core</name>
+
+ <dependencies>
+ <dependency>
+ <groupId>com.datastax.cassandra</groupId>
+ <artifactId>cassandra-driver-core</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-math3</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>com.fasterxml.jackson.dataformat</groupId>
+ <artifactId>jackson-dataformat-yaml</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-databind</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-annotations</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.cassandra</groupId>
+ <artifactId>cassandra-dtest-shaded</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.cassandra</groupId>
+ <artifactId>dtest-api</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.hamcrest</groupId>
+ <artifactId>hamcrest-all</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+</project>
+
diff --git a/harry-core/src/harry/core/Configuration.java b/harry-core/src/harry/core/Configuration.java
new file mode 100644
index 0000000..3a68053
--- /dev/null
+++ b/harry-core/src/harry/core/Configuration.java
@@ -0,0 +1,908 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.core;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.concurrent.TimeUnit;
+import java.util.function.Consumer;
+import java.util.function.Function;
+import java.util.function.Supplier;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonTypeInfo;
+import com.fasterxml.jackson.annotation.JsonTypeName;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
+import com.fasterxml.jackson.dataformat.yaml.YAMLGenerator;
+import harry.ddl.SchemaGenerators;
+import harry.ddl.SchemaSpec;
+import harry.generators.Surjections;
+import harry.generators.distribution.Distribution;
+import harry.model.ExhaustiveChecker;
+import harry.model.Model;
+import harry.model.OpSelectors;
+import harry.model.QuiescentChecker;
+import harry.model.clock.ApproximateMonotonicClock;
+import harry.model.sut.SystemUnderTest;
+import harry.runner.DefaultPartitionVisitorFactory;
+import harry.runner.DefaultRowVisitor;
+import harry.runner.PartitionVisitor;
+import harry.runner.Query;
+import harry.runner.QuerySelector;
+import harry.runner.RowVisitor;
+import harry.runner.Runner;
+import harry.runner.Validator;
+import harry.util.BitSet;
+
+public class Configuration
+{
+ private static final ObjectMapper mapper;
+
+ static
+ {
+ mapper = new ObjectMapper(new YAMLFactory()
+ .disable(YAMLGenerator.Feature.USE_NATIVE_TYPE_ID)
+ .disable(YAMLGenerator.Feature.WRITE_DOC_START_MARKER)
+ .disable(YAMLGenerator.Feature.CANONICAL_OUTPUT)
+ .enable(YAMLGenerator.Feature.INDENT_ARRAYS));
+ mapper.registerSubtypes(Configuration.DebugApproximateMonotonicClockConfiguration.class);
+ mapper.registerSubtypes(Configuration.ConcurrentRunnerConfig.class);
+
+ mapper.registerSubtypes(Configuration.ExhaustiveCheckerConfig.class);
+ mapper.registerSubtypes(Configuration.DefaultCDSelectorConfiguration.class);
+ mapper.registerSubtypes(Configuration.DefaultPDSelectorConfiguration.class);
+ mapper.registerSubtypes(Configuration.ConstantDistributionConfig.class);
+ mapper.registerSubtypes(DefaultSchemaProviderConfiguration.class);
+ mapper.registerSubtypes(DefaultRowVisitorConfiguration.class);
+ }
+
+ public final long seed;
+ public final SchemaProviderConfiguration schema_provider;
+
+ public final boolean drop_schema;
+ public final boolean create_schema;
+ public final boolean truncate_table;
+
+ public final ClockConfiguration clock;
+ public final RunnerConfiguration runner;
+ public final SutConfiguration system_under_test;
+ public final ModelConfiguration model;
+ public final RowVisitorConfiguration row_visitor;
+
+ public final PDSelectorConfiguration partition_descriptor_selector;
+ public final CDSelectorConfiguration clustering_descriptor_selector;
+
+ public final long run_time;
+ public final TimeUnit run_time_unit;
+
+ @JsonCreator
+ public Configuration(@JsonProperty("seed") long seed,
+ @JsonProperty("schema_provider") SchemaProviderConfiguration schema_provider,
+ @JsonProperty("drop_schema") boolean drop_schema,
+ @JsonProperty("create_schema") boolean create_schema,
+ @JsonProperty("truncate_schema") boolean truncate_table,
+ @JsonProperty("clock") ClockConfiguration clock,
+ @JsonProperty("runner") RunnerConfiguration runner,
+ @JsonProperty("system_under_test") SutConfiguration system_under_test,
+ @JsonProperty("model") ModelConfiguration model,
+ @JsonProperty("row_visitor") RowVisitorConfiguration row_visitor,
+ @JsonProperty("partition_descriptor_selector") PDSelectorConfiguration partition_descriptor_selector,
+ @JsonProperty("clustering_descriptor_selector") CDSelectorConfiguration clustering_descriptor_selector,
+ @JsonProperty(value = "run_time", defaultValue = "2") long run_time,
+ @JsonProperty(value = "run_time_unit", defaultValue = "HOURS") TimeUnit run_time_unit)
+ {
+ this.seed = seed;
+ this.schema_provider = schema_provider;
+ this.drop_schema = drop_schema;
+ this.create_schema = create_schema;
+ this.truncate_table = truncate_table;
+ this.clock = clock;
+ this.runner = runner;
+ this.system_under_test = system_under_test;
+ this.model = model;
+ this.row_visitor = row_visitor;
+ this.partition_descriptor_selector = partition_descriptor_selector;
+ this.clustering_descriptor_selector = clustering_descriptor_selector;
+ this.run_time = run_time;
+ this.run_time_unit = run_time_unit;
+ }
+
+ public static void registerSubtypes(Class<?>... classes)
+ {
+ mapper.registerSubtypes(classes);
+ }
+
+ public static String toYamlString(Configuration config)
+ {
+ try
+ {
+ return mapper.writeValueAsString(config);
+ }
+ catch (Throwable t)
+ {
+ throw new RuntimeException(t);
+ }
+ }
+
+ public static Configuration fromYamlString(String config)
+ {
+ try
+ {
+ return mapper.readValue(config, Configuration.class);
+ }
+ catch (Throwable t)
+ {
+ throw new RuntimeException(t);
+ }
+ }
+
+ public static Configuration fromFile(String path)
+ {
+ try
+ {
+ return mapper.readValue(new File(path), Configuration.class);
+ }
+ catch (Throwable t)
+ {
+ throw new RuntimeException(t);
+ }
+ }
+
+ public static void validate(Configuration config)
+ {
+ // TODO: validation
+ //assert historySize * clockEpochTimeUnit.toMillis(clockEpoch) > runTimePeriod.toMillis(runTime) : "History size is too small for this run";
+ }
+
+ public Runner createRunner()
+ {
+ return createRunner(this);
+ }
+
+ public Run createRun()
+ {
+ return createRun(this);
+ }
+
+ public static Run createRun(Configuration snapshot)
+ {
+ long seed = snapshot.seed;
+
+ OpSelectors.Rng rng = new OpSelectors.PCGFast(seed);
+
+ OpSelectors.MonotonicClock clock = snapshot.clock.make();
+
+ // TODO: parsing schema
+ SchemaSpec schemaSpec = snapshot.schema_provider.make(seed);
+
+ OpSelectors.PdSelector pdSelector = snapshot.partition_descriptor_selector.make(rng);
+ OpSelectors.DescriptorSelector descriptorSelector = snapshot.clustering_descriptor_selector.make(rng, schemaSpec);
+
+ SystemUnderTest sut = snapshot.system_under_test.make();
+ QuerySelector querySelector = new QuerySelector(schemaSpec,
+ pdSelector,
+ descriptorSelector,
+ Surjections.pick(Query.QueryKind.CLUSTERING_SLICE,
+ Query.QueryKind.CLUSTERING_RANGE),
+ rng);
+ Model model = snapshot.model.create(schemaSpec, pdSelector, descriptorSelector, clock, querySelector, sut);
+ Validator validator = new Validator(model, schemaSpec, clock, pdSelector, descriptorSelector, rng);
+
+ RowVisitor rowVisitor;
+ if (snapshot.row_visitor != null)
+ rowVisitor = snapshot.row_visitor.make(schemaSpec, clock, descriptorSelector, querySelector);
+ else
+ rowVisitor = new DefaultRowVisitor(schemaSpec, clock, descriptorSelector, querySelector);
+
+ // TODO: make this one configurable, too?
+ Supplier<PartitionVisitor> visitorFactory = new DefaultPartitionVisitorFactory(model, sut, pdSelector, descriptorSelector, schemaSpec, rowVisitor);
+ return new Run(rng,
+ clock,
+ pdSelector,
+ descriptorSelector,
+ schemaSpec,
+ model,
+ sut,
+ validator,
+ rowVisitor,
+ visitorFactory,
+ snapshot);
+ }
+
+ public static Runner createRunner(Configuration snapshot)
+ {
+ Run run = createRun(snapshot);
+ return snapshot.runner.make(run);
+ }
+
+ public static class ConfigurationBuilder
+ {
+ long seed;
+ SchemaProviderConfiguration schema_provider = new DefaultSchemaProviderConfiguration();
+
+ boolean drop_schema;
+ boolean create_schema;
+ boolean truncate_table;
+
+ ClockConfiguration clock;
+ RunnerConfiguration runner;
+ SutConfiguration system_under_test;
+ ModelConfiguration model;
+ RowVisitorConfiguration row_visitor = new DefaultRowVisitorConfiguration();
+
+ PDSelectorConfiguration partition_descriptor_selector = new Configuration.DefaultPDSelectorConfiguration(10, 100);
+ CDSelectorConfiguration clustering_descriptor_selector; // TODO: sensible default value
+
+ long run_time = 2;
+ TimeUnit run_time_unit = TimeUnit.HOURS;
+
+ public ConfigurationBuilder setSeed(long seed)
+ {
+ this.seed = seed;
+ return this;
+ }
+
+ public ConfigurationBuilder setSchemaProvider(SchemaProviderConfiguration schema_provider)
+ {
+ this.schema_provider = schema_provider;
+ return this;
+ }
+
+ public ConfigurationBuilder setRunTime(long runTime, TimeUnit runTimeUnit)
+ {
+ this.run_time_unit = Objects.requireNonNull(runTimeUnit, "unit");
+ this.run_time = runTime;
+ return this;
+ }
+
+ public ConfigurationBuilder setClock(ClockConfiguration clock)
+ {
+ this.clock = clock;
+ return this;
+ }
+
+
+ public ConfigurationBuilder setSUT(SutConfiguration system_under_test)
+ {
+ this.system_under_test = system_under_test;
+ return this;
+ }
+
+ public ConfigurationBuilder setDropSchema(boolean drop_schema)
+ {
+ this.drop_schema = drop_schema;
+ return this;
+ }
+
+ public ConfigurationBuilder setCreateSchema(boolean create_schema)
+ {
+ this.create_schema = create_schema;
+ return this;
+ }
+
+ public ConfigurationBuilder setTruncateTable(boolean truncate_table)
+ {
+ this.truncate_table = truncate_table;
+ return this;
+ }
+
+ public ConfigurationBuilder setModel(ModelConfiguration model)
+ {
+ this.model = model;
+ return this;
+ }
+
+ public ConfigurationBuilder setRunner(RunnerConfiguration runner)
+ {
+ this.runner = runner;
+ return this;
+ }
+
+ public ConfigurationBuilder setPartitionDescriptorSelector(PDSelectorConfiguration partition_descriptor_selector)
+ {
+ this.partition_descriptor_selector = partition_descriptor_selector;
+ return this;
+ }
+
+ public ConfigurationBuilder setClusteringDescriptorSelector(CDSelectorConfiguration builder)
+ {
+ this.clustering_descriptor_selector = builder;
+ return this;
+ }
+
+ public ConfigurationBuilder setClusteringDescriptorSelector(Consumer<CDSelectorConfigurationBuilder> build)
+ {
+ CDSelectorConfigurationBuilder builder = new CDSelectorConfigurationBuilder();
+ build.accept(builder);
+ return setClusteringDescriptorSelector(builder.build());
+ }
+
+ public ConfigurationBuilder setRowVisitor(RowVisitorConfiguration row_visitor)
+ {
+ this.row_visitor = row_visitor;
+ return this;
+ }
+
+ public Configuration build()
+ {
+ return new Configuration(seed,
+ Objects.requireNonNull(schema_provider, "Schema provider should not be null"),
+ drop_schema,
+ create_schema,
+ truncate_table,
+
+ Objects.requireNonNull(clock, "Clock should not be null"),
+ runner,
+ Objects.requireNonNull(system_under_test, "System under test should not be null"),
+ Objects.requireNonNull(model, "Model should not be null"),
+ Objects.requireNonNull(row_visitor, "Row visitor should not be null"),
+
+ Objects.requireNonNull(partition_descriptor_selector, "Partition descriptor selector should not be null"),
+ Objects.requireNonNull(clustering_descriptor_selector, "Clustering descriptor selector should not be null"),
+
+ run_time,
+ run_time_unit);
+ }
+ }
+
+ public ConfigurationBuilder unbuild()
+ {
+ ConfigurationBuilder builder = new ConfigurationBuilder();
+ builder.seed = seed;
+ builder.schema_provider = schema_provider;
+ builder.drop_schema = drop_schema;
+ builder.create_schema = create_schema;
+ builder.truncate_table = truncate_table;
+
+ builder.clock = clock;
+ builder.runner = runner;
+ builder.system_under_test = system_under_test;
+ builder.model = model;
+ builder.row_visitor = row_visitor;
+
+ builder.partition_descriptor_selector = partition_descriptor_selector;
+ builder.clustering_descriptor_selector = clustering_descriptor_selector;
+
+ builder.run_time = run_time;
+ builder.run_time_unit = run_time_unit;
+ return builder;
+ }
+
+ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.WRAPPER_OBJECT)
+ public interface ClockConfiguration extends OpSelectors.MonotonicClockFactory
+ {
+ }
+
+ @JsonTypeName("approximate_monotonic")
+ public static class ApproximateMonotonicClockConfiguration implements ClockConfiguration
+ {
+ public final int history_size;
+ public final int epoch_length;
+ public final TimeUnit epoch_time_unit;
+
+ @JsonCreator
+ public ApproximateMonotonicClockConfiguration(@JsonProperty("history_size") int history_size,
+ @JsonProperty("epoch_length") int epoch_length,
+ @JsonProperty("epoch_time_unit") TimeUnit epoch_time_unit)
+ {
+ this.history_size = history_size;
+ this.epoch_length = epoch_length;
+ this.epoch_time_unit = epoch_time_unit;
+ }
+
+ public OpSelectors.MonotonicClock make()
+ {
+ return new ApproximateMonotonicClock(history_size,
+ epoch_length,
+ epoch_time_unit);
+ }
+ }
+
+ @JsonTypeName("debug_approximate_monotonic")
+ public static class DebugApproximateMonotonicClockConfiguration implements ClockConfiguration
+ {
+ public final long startTimeMicros;
+ public final int historySize;
+ public final long[] history;
+ public final long lts;
+ public final int idx;
+ public final long epochPeriod;
+ public final TimeUnit epochTimeUnit;
+
+ @JsonCreator
+ public DebugApproximateMonotonicClockConfiguration(@JsonProperty("start_time_micros") long startTimeMicros,
+ @JsonProperty("history_size") int historySize,
+ @JsonProperty("history") long[] history,
+ @JsonProperty("lts") long lts,
+ @JsonProperty("idx") int idx,
+ @JsonProperty("epoch_period") long epochPeriod,
+ @JsonProperty("epoch_time_unit") TimeUnit epochTimeUnit)
+ {
+ this.startTimeMicros = startTimeMicros;
+ this.historySize = historySize;
+ this.history = history;
+ this.lts = lts;
+ this.idx = idx;
+ this.epochPeriod = epochPeriod;
+ this.epochTimeUnit = epochTimeUnit;
+ }
+
+ public OpSelectors.MonotonicClock make()
+ {
+ return ApproximateMonotonicClock.forDebug(startTimeMicros,
+ historySize,
+ lts,
+ idx,
+ epochPeriod,
+ epochTimeUnit,
+ history);
+ }
+ }
+
+ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.WRAPPER_OBJECT)
+ public interface RunnerConfiguration extends Runner.RunnerFactory
+ {
+ }
+
+ @JsonTypeName("concurrent")
+ public static class ConcurrentRunnerConfig implements RunnerConfiguration
+ {
+ public final int writer_threads;
+ public final int round_robin_validator_threads;
+ public final int recent_partition_validator_threads;
+
+ @JsonCreator
+ public ConcurrentRunnerConfig(@JsonProperty(value = "writer_threads", defaultValue = "2") int writer_threads,
+ @JsonProperty(value = "round_robin_validator_threads", defaultValue = "2") int round_robin_validator_threads,
+ @JsonProperty(value = "recent_partition_validator_threads", defaultValue = "2") int recent_partition_validator_threads)
+ {
+ this.writer_threads = writer_threads;
+ this.round_robin_validator_threads = round_robin_validator_threads;
+ this.recent_partition_validator_threads = recent_partition_validator_threads;
+ }
+
+ public Runner make(Run run)
+ {
+ return new Runner.ConcurrentRunner(run, writer_threads, round_robin_validator_threads, recent_partition_validator_threads);
+ }
+ }
+
+ @JsonTypeName("sequential")
+ public static class SequentialRunnerConfig implements RunnerConfiguration
+ {
+ private final int round_robin_validator_threads;
+ private final int check_recent_after;
+ private final int check_all_after;
+
+ @JsonCreator
+ public SequentialRunnerConfig(@JsonProperty(value = "round_robin_validator_threads", defaultValue = "2") int round_robin_validator_threads,
+ @JsonProperty(value = "check_recent_after", defaultValue = "100") int check_recent_after,
+ @JsonProperty(value = "check_all_after", defaultValue = "5000") int check_all_after)
+ {
+ this.round_robin_validator_threads = round_robin_validator_threads;
+ this.check_recent_after = check_recent_after;
+ this.check_all_after = check_all_after;
+ }
+
+ public Runner make(Run run)
+ {
+ return new Runner.SequentialRunner(run, round_robin_validator_threads, check_recent_after, check_all_after);
+ }
+ }
+
+ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.WRAPPER_OBJECT)
+ public interface SutConfiguration extends SystemUnderTest.SUTFactory
+ {
+ }
+
+ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.WRAPPER_OBJECT)
+ public interface ModelConfiguration extends Model.ModelFactory
+ {
+ }
+
+ @JsonTypeName("exhaustive_checker")
+ public static class ExhaustiveCheckerConfig implements ModelConfiguration
+ {
+ public final long max_seen_lts;
+ public final long max_complete_lts;
+
+ public ExhaustiveCheckerConfig()
+ {
+ this(-1, -1);
+ }
+
+ @JsonCreator
+ public ExhaustiveCheckerConfig(@JsonProperty(value = "max_seen_lts", defaultValue = "-1") long max_seen_lts,
+ @JsonProperty(value = "max_complete_lts", defaultValue = "-1") long max_complete_lts)
+ {
+ this.max_seen_lts = max_seen_lts;
+ this.max_complete_lts = max_complete_lts;
+ }
+
+ public Model create(SchemaSpec schema, OpSelectors.PdSelector pdSelector, OpSelectors.DescriptorSelector descriptorSelector, OpSelectors.MonotonicClock clock, QuerySelector querySelector, SystemUnderTest sut)
+ {
+ ExhaustiveChecker exhaustiveChecker = new ExhaustiveChecker(schema,
+ pdSelector,
+ descriptorSelector,
+ clock,
+ querySelector,
+ sut);
+ exhaustiveChecker.forceLts(max_seen_lts, max_complete_lts);
+ return exhaustiveChecker;
+ }
+ }
+
+ @JsonTypeName("quiescent_checker")
+ public static class QuiescentCheckerConfig implements ModelConfiguration
+ {
+ public final long max_seen_lts;
+ public final long max_complete_lts;
+
+ public QuiescentCheckerConfig()
+ {
+ this(-1, -1);
+ }
+
+ @JsonCreator
+ public QuiescentCheckerConfig(@JsonProperty(value = "max_seen_lts", defaultValue = "-1") long max_seen_lts,
+ @JsonProperty(value = "max_complete_lts", defaultValue = "-1") long max_complete_lts)
+ {
+ this.max_seen_lts = max_seen_lts;
+ this.max_complete_lts = max_complete_lts;
+ }
+
+ public Model create(SchemaSpec schema, OpSelectors.PdSelector pdSelector, OpSelectors.DescriptorSelector descriptorSelector, OpSelectors.MonotonicClock clock, QuerySelector querySelector, SystemUnderTest sut)
+ {
+ QuiescentChecker exhaustiveChecker = new QuiescentChecker(schema,
+ pdSelector,
+ descriptorSelector,
+ clock,
+ querySelector,
+ sut);
+ exhaustiveChecker.forceLts(max_seen_lts, max_complete_lts);
+ return exhaustiveChecker;
+ }
+ }
+
+
+ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.WRAPPER_OBJECT)
+ public interface PDSelectorConfiguration extends OpSelectors.PdSelectorFactory
+ {
+ }
+
+ @JsonTypeName("default")
+ public static class DefaultPDSelectorConfiguration implements PDSelectorConfiguration
+ {
+ public final int window_size;
+ public final int slide_after_repeats;
+
+ @JsonCreator
+ public DefaultPDSelectorConfiguration(@JsonProperty(value = "window_size", defaultValue = "10") int window_size,
+ @JsonProperty(value = "slide_after_repeats", defaultValue = "100") int slide_after_repeats)
+ {
+ this.window_size = window_size;
+ this.slide_after_repeats = slide_after_repeats;
+ }
+
+ public OpSelectors.PdSelector make(OpSelectors.Rng rng)
+ {
+ return new OpSelectors.DefaultPdSelector(rng, window_size, slide_after_repeats);
+ }
+ }
+
+ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.WRAPPER_OBJECT)
+ public interface CDSelectorConfiguration extends OpSelectors.DescriptorSelectorFactory
+ {
+ }
+
+ public static class WeightedSelectorBuilder<T>
+ {
+ private final Map<T, Integer> operation_kind_weights;
+
+ public WeightedSelectorBuilder()
+ {
+ operation_kind_weights = new HashMap<>();
+ }
+
+ public WeightedSelectorBuilder addWeight(T v, int weight)
+ {
+ operation_kind_weights.put(v, weight);
+ return this;
+ }
+
+ public Map<T, Integer> build()
+ {
+ return operation_kind_weights;
+ }
+ }
+
+ public static class OperationKindSelectorBuilder extends WeightedSelectorBuilder<OpSelectors.OperationKind>
+ {
+ }
+
+ // TODO: configure fractions/fractional builder
+ public static class CDSelectorConfigurationBuilder
+ {
+ private DistributionConfig modifications_per_lts = new ConstantDistributionConfig(10);
+ private DistributionConfig rows_per_modification = new ConstantDistributionConfig(10);
+ private int max_partition_size = 100;
+ private Map<OpSelectors.OperationKind, Integer> operation_kind_weights = new OperationKindSelectorBuilder()
+ .addWeight(OpSelectors.OperationKind.DELETE_ROW, 1)
+ .addWeight(OpSelectors.OperationKind.DELETE_COLUMN, 1)
+ .addWeight(OpSelectors.OperationKind.WRITE, 98)
+ .build();
+ private Map<OpSelectors.OperationKind, long[]> column_mask_bitsets;
+ private int[] fractions;
+
+ public CDSelectorConfigurationBuilder setNumberOfModificationsDistribution(DistributionConfig modifications_per_lts)
+ {
+ this.modifications_per_lts = modifications_per_lts;
+ return this;
+ }
+
+ public CDSelectorConfigurationBuilder setRowsPerModificationDistribution(DistributionConfig rows_per_modification)
+ {
+ this.rows_per_modification = rows_per_modification;
+ return this;
+ }
+
+ public CDSelectorConfigurationBuilder setMaxPartitionSize(int max_partition_size)
+ {
+ if (max_partition_size <= 0)
+ throw new IllegalArgumentException("Max partition size should be positive");
+ this.max_partition_size = max_partition_size;
+ return this;
+ }
+
+ public CDSelectorConfigurationBuilder setOperationKindWeights(Map<OpSelectors.OperationKind, Integer> operation_kind_weights)
+ {
+ this.operation_kind_weights = operation_kind_weights;
+ return this;
+ }
+
+ public CDSelectorConfigurationBuilder setColumnMasks(Map<OpSelectors.OperationKind, long[]> column_mask_bitsets)
+ {
+ this.column_mask_bitsets = column_mask_bitsets;
+ return this;
+ }
+
+ public void setFractions(int[] fractions)
+ {
+ this.fractions = fractions;
+ }
+
+ public DefaultCDSelectorConfiguration build()
+ {
+ if (fractions == null)
+ {
+ return new DefaultCDSelectorConfiguration(modifications_per_lts,
+ rows_per_modification,
+ max_partition_size,
+ operation_kind_weights,
+ column_mask_bitsets);
+ }
+ else
+ {
+ return new HierarchicalCDSelectorConfiguration(modifications_per_lts,
+ rows_per_modification,
+ max_partition_size,
+ operation_kind_weights,
+ column_mask_bitsets,
+ fractions);
+ }
+ }
+ }
+
+ @JsonTypeName("default")
+ public static class DefaultCDSelectorConfiguration implements CDSelectorConfiguration
+ {
+ public final DistributionConfig modifications_per_lts;
+ public final DistributionConfig rows_per_modification;
+ public final int max_partition_size;
+ public final Map<OpSelectors.OperationKind, Integer> operation_kind_weights;
+ public final Map<OpSelectors.OperationKind, long[]> column_mask_bitsets;
+
+ @JsonCreator
+ public DefaultCDSelectorConfiguration(@JsonProperty("modifications_per_lts") DistributionConfig modifications_per_lts,
+ @JsonProperty("rows_per_modification") DistributionConfig rows_per_modification,
+ @JsonProperty(value = "window_size", defaultValue = "100") int max_partition_size,
+ @JsonProperty("operation_kind_weights") Map<OpSelectors.OperationKind, Integer> operation_kind_weights,
+ @JsonProperty("column_mask_bitsets") Map<OpSelectors.OperationKind, long[]> column_mask_bitsets)
+ {
+ this.modifications_per_lts = modifications_per_lts;
+ this.rows_per_modification = rows_per_modification;
+ this.max_partition_size = max_partition_size;
+ this.operation_kind_weights = operation_kind_weights;
+ this.column_mask_bitsets = column_mask_bitsets;
+ }
+
+ protected Function<OpSelectors.OperationKind, Surjections.Surjection<BitSet>> columnSelector(SchemaSpec schemaSpec)
+ {
+ Function<OpSelectors.OperationKind, Surjections.Surjection<BitSet>> columnSelector;
+ if (column_mask_bitsets == null)
+ {
+ columnSelector = OpSelectors.columnSelectorBuilder().forAll(schemaSpec.regularColumns.size()).build();
+ }
+ else
+ {
+ Map<OpSelectors.OperationKind, Surjections.Surjection<BitSet>> m = new HashMap<>();
+ for (Map.Entry<OpSelectors.OperationKind, long[]> entry : column_mask_bitsets.entrySet())
+ {
+ List<BitSet> bitSets = new ArrayList<>(entry.getValue().length);
+ for (long raw_bitset : entry.getValue())
+ bitSets.add(BitSet.create(raw_bitset, schemaSpec.regularColumns.size()));
+ Surjections.Surjection<BitSet> selector = Surjections.pick(bitSets);
+ m.put(entry.getKey(), selector);
+ }
+ columnSelector = m::get;
+ }
+
+ return columnSelector;
+ }
+
+ public OpSelectors.DescriptorSelector make(OpSelectors.Rng rng, SchemaSpec schemaSpec)
+ {
+ return new OpSelectors.DefaultDescriptorSelector(rng,
+ columnSelector(schemaSpec),
+ Surjections.weighted(operation_kind_weights),
+ modifications_per_lts.make(),
+ rows_per_modification.make(),
+ max_partition_size);
+ }
+ }
+
+ public static class HierarchicalCDSelectorConfiguration extends DefaultCDSelectorConfiguration
+ {
+ private final int[] fractions;
+
+ public HierarchicalCDSelectorConfiguration(DistributionConfig modifications_per_lts,
+ DistributionConfig rows_per_modification,
+ int max_partition_size,
+ Map<OpSelectors.OperationKind, Integer> operation_kind_weights,
+ Map<OpSelectors.OperationKind, long[]> column_mask_bitsets,
+ int[] fractions)
+ {
+ super(modifications_per_lts, rows_per_modification, max_partition_size, operation_kind_weights, column_mask_bitsets);
+ this.fractions = fractions;
+ }
+
+ public OpSelectors.DescriptorSelector make(OpSelectors.Rng rng, SchemaSpec schemaSpec)
+ {
+ return new OpSelectors.HierarchicalDescriptorSelector(rng,
+ fractions,
+ columnSelector(schemaSpec),
+ Surjections.weighted(operation_kind_weights),
+ modifications_per_lts.make(),
+ rows_per_modification.make(),
+ max_partition_size);
+ }
+ }
+
+ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY, property = "type")
+ public interface DistributionConfig extends Distribution.DistributionFactory
+ {
+ }
+
+ @JsonTypeName("identity")
+ public static class IdentityDistributionConfig implements DistributionConfig
+ {
+ @JsonCreator
+ public IdentityDistributionConfig()
+ {
+ }
+
+ public Distribution make()
+ {
+ return new Distribution.IdentityDistribution();
+ }
+ }
+
+ @JsonTypeName("normal")
+ public static class NormalDistributionConfig implements DistributionConfig
+ {
+ @JsonCreator
+ public NormalDistributionConfig()
+ {
+ }
+
+ public Distribution make()
+ {
+ return new Distribution.NormalDistribution();
+ }
+ }
+
+ @JsonTypeName("constant")
+ public static class ConstantDistributionConfig implements DistributionConfig
+ {
+ public final long constant;
+
+ @JsonCreator
+ public ConstantDistributionConfig(@JsonProperty("constant") long constant)
+ {
+ this.constant = constant;
+ }
+
+ public Distribution make()
+ {
+ return new Distribution.ConstantDistribution(constant);
+ }
+ }
+
+ @JsonTypeName("scaled")
+ public static class ScaledDistributionConfig implements DistributionConfig
+ {
+ private final long min;
+ private final long max;
+
+ @JsonCreator
+ public ScaledDistributionConfig(long min, long max)
+ {
+ this.min = min;
+ this.max = max;
+ }
+
+ public Distribution make()
+ {
+ return new Distribution.ScaledDistribution(min, max);
+ }
+ }
+
+ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.WRAPPER_OBJECT)
+ public interface RowVisitorConfiguration extends RowVisitor.RowVisitorFactory
+ {
+ }
+
+ @JsonTypeName("default")
+ public static class DefaultRowVisitorConfiguration implements RowVisitorConfiguration
+ {
+ public RowVisitor make(SchemaSpec schema,
+ OpSelectors.MonotonicClock clock,
+ OpSelectors.DescriptorSelector descriptorSelector,
+ QuerySelector querySelector)
+ {
+ return new DefaultRowVisitor(schema,
+ clock,
+ descriptorSelector,
+ querySelector);
+ }
+ }
+
+ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.WRAPPER_OBJECT)
+ public interface SchemaProviderConfiguration extends SchemaSpec.SchemaSpecFactory
+ {
+ }
+
+ @JsonTypeName("default")
+ public static class DefaultSchemaProviderConfiguration implements SchemaProviderConfiguration
+ {
+ public SchemaSpec make(long seed)
+ {
+ return SchemaGenerators.defaultSchemaSpecGen("harry", "table0")
+ .inflate(seed);
+ }
+ }
+
+ // TODO: schema provider by DDL
+}
diff --git a/harry-core/src/harry/core/Run.java b/harry-core/src/harry/core/Run.java
new file mode 100644
index 0000000..fdff08e
--- /dev/null
+++ b/harry-core/src/harry/core/Run.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.core;
+
+import java.util.function.Supplier;
+
+import harry.ddl.SchemaSpec;
+import harry.model.Model;
+import harry.model.OpSelectors;
+import harry.model.sut.SystemUnderTest;
+import harry.runner.PartitionVisitor;
+import harry.runner.RowVisitor;
+import harry.runner.Validator;
+
+public class Run
+{
+ public final OpSelectors.Rng rng;
+ public final OpSelectors.MonotonicClock clock;
+ public final OpSelectors.PdSelector pdSelector;
+
+ public final OpSelectors.DescriptorSelector descriptorSelector;
+
+ public final SchemaSpec schemaSpec;
+ public final Model model;
+ public final SystemUnderTest sut;
+ public final Validator validator;
+ public final RowVisitor rowVisitor;
+ public final Supplier<PartitionVisitor> visitorFactory;
+
+ public final Configuration snapshot;
+
+ Run(OpSelectors.Rng rng,
+ OpSelectors.MonotonicClock clock,
+ OpSelectors.PdSelector pdSelector,
+ OpSelectors.DescriptorSelector descriptorSelector,
+
+ SchemaSpec schemaSpec,
+ Model model,
+ SystemUnderTest sut,
+ Validator validator,
+ RowVisitor rowVisitor,
+ Supplier<PartitionVisitor> visitorFactory,
+ Configuration snapshot)
+ {
+ this.rng = rng;
+ this.clock = clock;
+ this.pdSelector = pdSelector;
+ this.descriptorSelector = descriptorSelector;
+ this.schemaSpec = schemaSpec;
+ this.model = model;
+ this.sut = sut;
+ this.validator = validator;
+ this.rowVisitor = rowVisitor;
+ this.visitorFactory = visitorFactory;
+ this.snapshot = snapshot;
+ }
+}
diff --git a/harry-core/src/harry/corruptor/AddExtraRowCorruptor.java b/harry-core/src/harry/corruptor/AddExtraRowCorruptor.java
new file mode 100644
index 0000000..85ae502
--- /dev/null
+++ b/harry-core/src/harry/corruptor/AddExtraRowCorruptor.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.corruptor;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import harry.data.ResultSetRow;
+import harry.ddl.SchemaSpec;
+import harry.model.OpSelectors;
+import harry.model.SelectHelper;
+import harry.model.sut.SystemUnderTest;
+import harry.operations.WriteHelper;
+import harry.runner.Query;
+
+public class AddExtraRowCorruptor implements QueryResponseCorruptor
+{
+ private final SchemaSpec schema;
+ private final OpSelectors.MonotonicClock clock;
+ private final OpSelectors.DescriptorSelector descriptorSelector;
+
+ public AddExtraRowCorruptor(SchemaSpec schema,
+ OpSelectors.MonotonicClock clock,
+ OpSelectors.DescriptorSelector descriptorSelector)
+ {
+ this.schema = schema;
+ this.clock = clock;
+ this.descriptorSelector = descriptorSelector;
+ }
+
+ public boolean maybeCorrupt(Query query, SystemUnderTest sut)
+ {
+ Set<Long> cds = new HashSet<>();
+ long maxLts = 0;
+ for (Object[] obj : sut.execute(query.toSelectStatement()))
+ {
+ ResultSetRow row = SelectHelper.resultSetToRow(schema, clock, obj);
+ // TODO: extract CD cheaper
+ cds.add(row.cd);
+ for (int i = 0; i < row.lts.length; i++)
+ maxLts = Math.max(maxLts, row.lts[i]);
+ }
+
+ if (cds.size() >= descriptorSelector.maxPartitionSize())
+ return false;
+
+ long cd;
+ long attempt = 0;
+ do
+ {
+ cd = descriptorSelector.randomCd(query.pd, attempt, schema);
+ if (attempt++ == 1000)
+ return false;
+ }
+ while (!query.match(cd) || cds.contains(cd));
+
+ long[] vds = descriptorSelector.vds(query.pd, cd, maxLts, 0, schema);
+
+ // We do not know if the row was deleted. We could try inferring it, but that
+ // still won't help since we can't use it anyways, since collisions between a
+ // written value and tombstone are resolved in favour of tombstone, so we're
+ // just going to take the next lts.
+ sut.execute(WriteHelper.inflateInsert(schema, query.pd, cd, vds, clock.rts(maxLts) + 1));
+ return true;
+ }
+}
\ No newline at end of file
diff --git a/harry-core/src/harry/corruptor/ChangeValueCorruptor.java b/harry-core/src/harry/corruptor/ChangeValueCorruptor.java
new file mode 100644
index 0000000..032f962
--- /dev/null
+++ b/harry-core/src/harry/corruptor/ChangeValueCorruptor.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.corruptor;
+
+import java.util.Arrays;
+
+import harry.data.ResultSetRow;
+import harry.ddl.SchemaSpec;
+import harry.generators.DataGenerators;
+import harry.generators.PcgRSUFast;
+import harry.generators.RandomGenerator;
+import harry.model.Model;
+import harry.model.OpSelectors;
+import harry.operations.CompiledStatement;
+import harry.operations.WriteHelper;
+
+/**
+ * Corrupts a single value written value in the row by writing a valid, invertible value with an incorrect
+ * descriptor, if row has any values written.
+ */
+public class ChangeValueCorruptor implements RowCorruptor
+{
+ private final SchemaSpec schema;
+ private final OpSelectors.MonotonicClock clock;
+ private final RandomGenerator rng;
+
+ public ChangeValueCorruptor(SchemaSpec schemaSpec,
+ OpSelectors.MonotonicClock clock)
+ {
+ this.schema = schemaSpec;
+ this.clock = clock;
+ this.rng = new PcgRSUFast(1, 1);
+ }
+
+ // Can corrupt any row that has at least one written non-null value
+ public boolean canCorrupt(ResultSetRow row)
+ {
+ for (int idx = 0; idx < row.lts.length; idx++)
+ {
+ // TODO: in addition to this, we should check if the value equals to the largest possible
+ // value, since otherwise it won't sort correctly.
+ if (row.lts[idx] != Model.NO_TIMESTAMP)
+ return true;
+ }
+ return false;
+ }
+
+ public CompiledStatement corrupt(ResultSetRow row)
+ {
+ long[] corruptedVds = new long[row.vds.length];
+ Arrays.fill(corruptedVds, DataGenerators.UNSET_DESCR);
+
+ int idx;
+ do
+ {
+ idx = rng.nextInt(corruptedVds.length - 1);
+ } while (row.lts[idx] == Model.NO_TIMESTAMP);
+
+ final long oldV = row.vds[idx];
+ do
+ {
+ corruptedVds[idx] = +rng.next();
+ }
+ // we need to find a value that sorts strictly greater than the current one
+ while (schema.regularColumns.get(idx).generator().compare(corruptedVds[idx], oldV) <= 0);
+
+ return WriteHelper.inflateInsert(schema, row.pd, row.cd, corruptedVds, clock.rts(row.lts[idx]));
+ }
+}
diff --git a/harry-core/src/harry/corruptor/HideRowCorruptor.java b/harry-core/src/harry/corruptor/HideRowCorruptor.java
new file mode 100644
index 0000000..a2a2648
--- /dev/null
+++ b/harry-core/src/harry/corruptor/HideRowCorruptor.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.corruptor;
+
+import harry.data.ResultSetRow;
+import harry.ddl.SchemaSpec;
+import harry.model.OpSelectors;
+import harry.operations.CompiledStatement;
+import harry.operations.DeleteHelper;
+
+public class HideRowCorruptor implements RowCorruptor
+{
+ private final SchemaSpec schema;
+ private final OpSelectors.MonotonicClock clock;
+
+ public HideRowCorruptor(SchemaSpec schemaSpec,
+ OpSelectors.MonotonicClock clock)
+ {
+ this.schema = schemaSpec;
+ this.clock = clock;
+ }
+
+ // Can corrupt any row that has at least one written non-null value
+ public boolean canCorrupt(ResultSetRow row)
+ {
+ return row != null;
+ }
+
+ public CompiledStatement corrupt(ResultSetRow row)
+ {
+ return DeleteHelper.deleteRow(schema, row.pd, row.cd, clock.rts(clock.maxLts()) + 1);
+ }
+}
diff --git a/harry-core/src/harry/corruptor/HideValueCorruptor.java b/harry-core/src/harry/corruptor/HideValueCorruptor.java
new file mode 100644
index 0000000..871ac37
--- /dev/null
+++ b/harry-core/src/harry/corruptor/HideValueCorruptor.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.corruptor;
+
+import harry.data.ResultSetRow;
+import harry.ddl.SchemaSpec;
+import harry.generators.PcgRSUFast;
+import harry.generators.RandomGenerator;
+import harry.model.Model;
+import harry.model.OpSelectors;
+import harry.operations.CompiledStatement;
+import harry.operations.DeleteHelper;
+import harry.util.BitSet;
+
+// removes/hides the value of one of the columns that was previously set
+public class HideValueCorruptor implements RowCorruptor
+{
+ private final SchemaSpec schema;
+ private final OpSelectors.MonotonicClock clock;
+ private final RandomGenerator rng;
+
+ public HideValueCorruptor(SchemaSpec schemaSpec,
+ OpSelectors.MonotonicClock clock)
+ {
+ this.schema = schemaSpec;
+ this.clock = clock;
+ this.rng = new PcgRSUFast(1, 1);
+ }
+
+ // Can corrupt any row that has at least one written non-null value
+ public boolean canCorrupt(ResultSetRow row)
+ {
+ for (int idx = 0; idx < row.lts.length; idx++)
+ {
+ if (row.lts[idx] != Model.NO_TIMESTAMP)
+ return true;
+ }
+ return false;
+ }
+
+ public CompiledStatement corrupt(ResultSetRow row)
+ {
+ int idx;
+ do
+ {
+ idx = rng.nextInt(row.lts.length - 1);
+ }
+ while (row.lts[idx] == Model.NO_TIMESTAMP);
+
+ BitSet mask = BitSet.allUnset(schema.regularColumns.size());
+ mask.set(idx);
+ return DeleteHelper.deleteColumn(schema,
+ row.pd,
+ row.cd,
+ mask,
+ clock.rts(clock.maxLts()) + 1);
+ }
+}
diff --git a/harry-core/src/harry/corruptor/QueryResponseCorruptor.java b/harry-core/src/harry/corruptor/QueryResponseCorruptor.java
new file mode 100644
index 0000000..aefce0f
--- /dev/null
+++ b/harry-core/src/harry/corruptor/QueryResponseCorruptor.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.corruptor;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import harry.data.ResultSetRow;
+import harry.ddl.SchemaSpec;
+import harry.model.OpSelectors;
+import harry.model.SelectHelper;
+import harry.model.sut.SystemUnderTest;
+import harry.operations.CompiledStatement;
+import harry.runner.Query;
+
+public interface QueryResponseCorruptor
+{
+ boolean maybeCorrupt(Query query, SystemUnderTest sut);
+
+ class SimpleQueryResponseCorruptor implements QueryResponseCorruptor
+ {
+ private final RowCorruptor rowCorruptor;
+ private final SchemaSpec schema;
+ private final OpSelectors.MonotonicClock clock;
+
+ public SimpleQueryResponseCorruptor(SchemaSpec schema,
+ OpSelectors.MonotonicClock clock,
+ RowCorruptor.RowCorruptorFactory factory)
+ {
+ this.rowCorruptor = factory.create(schema, clock);
+ this.schema = schema;
+ this.clock = clock;
+ }
+
+ public boolean maybeCorrupt(Query query, SystemUnderTest sut)
+ {
+ List<ResultSetRow> result = new ArrayList<>();
+ CompiledStatement statement = query.toSelectStatement();
+ for (Object[] obj : sut.execute(statement.cql(), statement.bindings()))
+ result.add(SelectHelper.resultSetToRow(schema, clock, obj));
+
+ // TODO: technically, we can do this just depends on corruption strategy
+ // we just need to corrupt results of the current query.
+ if (result.isEmpty())
+ return false;
+
+ for (ResultSetRow row : result)
+ {
+ if (rowCorruptor.maybeCorrupt(row, sut))
+ return true;
+ }
+ return false;
+ }
+ }
+}
diff --git a/harry-core/src/harry/corruptor/RowCorruptor.java b/harry-core/src/harry/corruptor/RowCorruptor.java
new file mode 100644
index 0000000..85261b2
--- /dev/null
+++ b/harry-core/src/harry/corruptor/RowCorruptor.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.corruptor;
+
+import harry.data.ResultSetRow;
+import harry.ddl.SchemaSpec;
+import harry.model.OpSelectors;
+import harry.model.sut.SystemUnderTest;
+import harry.operations.CompiledStatement;
+
+public interface RowCorruptor
+{
+ boolean canCorrupt(ResultSetRow row);
+
+ CompiledStatement corrupt(ResultSetRow row);
+
+ // Returns true if it could corrupt a row, false otherwise
+ default boolean maybeCorrupt(ResultSetRow row, SystemUnderTest sut)
+ {
+ if (canCorrupt(row))
+ {
+ CompiledStatement statement = corrupt(row);
+ sut.execute(statement.cql(), statement.bindings());
+ return true;
+ }
+ return false;
+ }
+
+ interface RowCorruptorFactory
+ {
+ RowCorruptor create(SchemaSpec schemaSpec,
+ OpSelectors.MonotonicClock clock);
+ }
+}
\ No newline at end of file
diff --git a/harry-core/src/harry/corruptor/ShowValueCorruptor.java b/harry-core/src/harry/corruptor/ShowValueCorruptor.java
new file mode 100644
index 0000000..450c687
--- /dev/null
+++ b/harry-core/src/harry/corruptor/ShowValueCorruptor.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.corruptor;
+
+import java.util.Arrays;
+
+import harry.data.ResultSetRow;
+import harry.ddl.SchemaSpec;
+import harry.generators.DataGenerators;
+import harry.generators.PcgRSUFast;
+import harry.generators.RandomGenerator;
+import harry.model.Model;
+import harry.model.OpSelectors;
+import harry.operations.CompiledStatement;
+import harry.operations.WriteHelper;
+
+public class ShowValueCorruptor implements RowCorruptor
+{
+ private final SchemaSpec schema;
+ private final OpSelectors.MonotonicClock clock;
+ private final RandomGenerator rng;
+
+ public ShowValueCorruptor(SchemaSpec schemaSpec,
+ OpSelectors.MonotonicClock clock)
+ {
+ this.schema = schemaSpec;
+ this.clock = clock;
+ this.rng = new PcgRSUFast(1, 1);
+ }
+
+ // Can corrupt any row that has at least one written non-null value
+ public boolean canCorrupt(ResultSetRow row)
+ {
+ for (int idx = 0; idx < row.lts.length; idx++)
+ {
+ if (row.lts[idx] == Model.NO_TIMESTAMP)
+ return true;
+ }
+ return false;
+ }
+
+ public CompiledStatement corrupt(ResultSetRow row)
+ {
+ long[] corruptedVds = new long[row.lts.length];
+ Arrays.fill(corruptedVds, DataGenerators.UNSET_DESCR);
+
+ int idx;
+ do
+ {
+ idx = rng.nextInt(corruptedVds.length - 1);
+ }
+ while (row.lts[idx] != Model.NO_TIMESTAMP);
+
+ corruptedVds[idx] = rng.next();
+
+ // We do not know LTS of the deleted row. We could try inferring it, but that
+ // still won't help since we can't use it anyways, since collisions between a
+ // written value and tombstone are resolved in favour of tombstone.
+ return WriteHelper.inflateInsert(schema, row.pd, row.cd, corruptedVds, clock.rts(clock.maxLts()) + 1);
+ }
+}
diff --git a/harry-core/src/harry/data/ResultSetRow.java b/harry-core/src/harry/data/ResultSetRow.java
new file mode 100644
index 0000000..e2bf02b
--- /dev/null
+++ b/harry-core/src/harry/data/ResultSetRow.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.data;
+
+public class ResultSetRow
+{
+ public final long pd;
+ public final long cd;
+ public final long[] vds;
+ public final long[] lts;
+
+ public ResultSetRow(long pd,
+ long cd,
+ long[] vds,
+ long[] lts)
+ {
+ this.pd = pd;
+ this.cd = cd;
+ this.vds = vds;
+ this.lts = lts;
+ }
+
+ public String toString()
+ {
+ return "resultSetRow("
+ + pd +
+ "L, " + cd +
+ "L, values(" + toString(vds) + ")" +
+ ", lts(" + toString(lts) + "))";
+ }
+
+ public String toString(long[] arr)
+ {
+ String s = "";
+ for (int i = 0; i < arr.length; i++)
+ {
+ s += arr[i];
+ s += "L";
+ if (i < (arr.length - 1))
+ s += ',';
+ }
+ return s;
+ }
+}
diff --git a/harry-core/src/harry/ddl/ColumnSpec.java b/harry-core/src/harry/ddl/ColumnSpec.java
new file mode 100644
index 0000000..623a6e7
--- /dev/null
+++ b/harry-core/src/harry/ddl/ColumnSpec.java
@@ -0,0 +1,388 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.ddl;
+
+import java.util.Collection;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Objects;
+import java.util.UUID;
+
+import com.google.common.collect.ImmutableList;
+
+import harry.generators.Bijections;
+import harry.generators.StringBijection;
+
+public class ColumnSpec<T>
+{
+ public final String name;
+ public final DataType<T> type;
+ public final Kind kind;
+ int columnIndex;
+
+ public ColumnSpec(String name,
+ DataType<T> type,
+ Kind kind)
+ {
+ this.name = name;
+ this.type = type;
+ this.kind = kind;
+ }
+
+ void setColumnIndex(int idx)
+ {
+ this.columnIndex = idx;
+ }
+
+ public int getColumnIndex()
+ {
+ return columnIndex;
+ }
+
+ public String toCQL()
+ {
+ return String.format("%s %s%s",
+ name,
+ type.toString(),
+ kind == Kind.STATIC ? " static" : "");
+ }
+
+ public boolean equals(Object o)
+ {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ ColumnSpec<?> that = (ColumnSpec<?>) o;
+ return Objects.equals(name, that.name) &&
+ Objects.equals(type, that.type) &&
+ kind == that.kind;
+ }
+
+ public int hashCode()
+ {
+ return Objects.hash(name, type, kind);
+ }
+
+ public String name()
+ {
+ return name;
+ }
+
+ public boolean isReversed()
+ {
+ return type.isReversed();
+ }
+
+ public String toString()
+ {
+ return name + '(' + type.toString() + ")";
+ }
+
+ public Bijections.Bijection<T> generator()
+ {
+ return type.generator();
+ }
+
+ public T inflate(long current)
+ {
+ return type.generator().inflate(current);
+ }
+
+ public long adjustEntropyDomain(long current)
+ {
+ return type.generator().adjustEntropyDomain(current);
+ }
+
+ public long deflate(T value)
+ {
+ return type.generator().deflate(value);
+ }
+
+ public static ColumnSpec<?> pk(String name, DataType<?> type)
+ {
+ return new ColumnSpec<>(name, type, Kind.PARTITION_KEY);
+ }
+
+ @SuppressWarnings("unchecked")
+ public static ColumnSpec<?> ck(String name, DataType<?> type, boolean isReversed)
+ {
+ return new ColumnSpec(name, isReversed ? ReversedType.getInstance(type) : type, Kind.CLUSTERING);
+ }
+
+ @SuppressWarnings("unchecked")
+ public static ColumnSpec<?> ck(String name, DataType<?> type)
+ {
+ return new ColumnSpec(name, type, Kind.CLUSTERING);
+ }
+
+ public static ColumnSpec<?> regularColumn(String name, DataType<?> type)
+ {
+ return new ColumnSpec<>(name, type, Kind.REGULAR);
+ }
+
+ public static ColumnSpec<?> staticColumn(String name, DataType<?> type)
+ {
+ return new ColumnSpec<>(name, type, Kind.CLUSTERING);
+ }
+
+ public enum Kind
+ {
+ CLUSTERING, REGULAR, STATIC, PARTITION_KEY
+ }
+
+ public static abstract class DataType<T>
+ {
+ protected final String cqlName;
+
+ protected DataType(String cqlName)
+ {
+ this.cqlName = cqlName;
+ }
+
+ public boolean isReversed()
+ {
+ return false;
+ }
+
+ public abstract Bijections.Bijection<T> generator();
+
+ public abstract int maxSize();
+
+ public String toString()
+ {
+ return cqlName;
+ }
+ }
+
+ public static final DataType<Byte> int8Type = new DataType<Byte>("tinyint")
+ {
+ public Bijections.Bijection<Byte> generator()
+ {
+ return Bijections.INT8_GENERATOR;
+ }
+
+ public int maxSize()
+ {
+ return Byte.BYTES;
+ }
+ };
+ public static final DataType<Short> int16Type = new DataType<Short>("smallint")
+ {
+ public Bijections.Bijection<Short> generator()
+ {
+ return Bijections.INT16_GENERATOR;
+ }
+
+ public int maxSize()
+ {
+ return Short.BYTES;
+ }
+ };
+ public static final DataType<Integer> int32Type = new DataType<Integer>("int")
+ {
+ public Bijections.Bijection<Integer> generator()
+ {
+ return Bijections.INT32_GENERATOR;
+ }
+
+ public int maxSize()
+ {
+ return Integer.BYTES;
+ }
+ };
+ public static final DataType<Long> int64Type = new DataType<Long>("bigint")
+ {
+ public Bijections.Bijection<Long> generator()
+ {
+ return Bijections.INT64_GENERATOR;
+ }
+
+ public int maxSize()
+ {
+ return Long.BYTES;
+ }
+ };
+ public static final DataType<Boolean> booleanType = new DataType<Boolean>("boolean")
+ {
+ public Bijections.Bijection<Boolean> generator()
+ {
+ return Bijections.BOOLEAN_GENERATOR;
+ }
+
+ public int maxSize()
+ {
+ return Byte.BYTES;
+ }
+ };
+ public static final DataType<Float> floatType = new DataType<Float>("float")
+ {
+ public Bijections.Bijection<Float> generator()
+ {
+ return Bijections.FLOAT_GENERATOR;
+ }
+
+ public int maxSize()
+ {
+ return Float.BYTES;
+ }
+ };
+ public static final DataType<Double> doubleType = new DataType<Double>("double")
+ {
+ public Bijections.Bijection<Double> generator()
+ {
+ return Bijections.DOUBLE_GENERATOR;
+ }
+
+ public int maxSize()
+ {
+ return Double.BYTES;
+ }
+ };
+ public static final DataType<String> asciiType = new DataType<String>("ascii")
+ {
+ private final Bijections.Bijection<String> gen = new StringBijection();
+
+ public Bijections.Bijection<String> generator()
+ {
+ return gen;
+ }
+
+ public int maxSize()
+ {
+ return Long.BYTES;
+ }
+ };
+
+ public static DataType<String> asciiType(int nibbleSize, int maxRandomNibbles)
+ {
+ Bijections.Bijection<String> gen = new StringBijection(nibbleSize, maxRandomNibbles);
+
+ return new DataType<String>("ascii")
+ {
+ public Bijections.Bijection<String> generator()
+ {
+ return gen;
+ }
+
+ public int maxSize()
+ {
+ return Long.BYTES;
+ }
+ };
+ }
+
+ public static final DataType<UUID> uuidType = new DataType<UUID>("uuid")
+ {
+ public Bijections.Bijection<UUID> generator()
+ {
+ return Bijections.UUID_GENERATOR;
+ }
+
+ public int maxSize()
+ {
+ return Long.BYTES;
+ }
+ };
+
+ public static final DataType<Date> timestampType = new DataType<Date>("timestamp")
+ {
+ public Bijections.Bijection<Date> generator()
+ {
+ return Bijections.TIMESTAMP_GENERATOR;
+ }
+
+ public int maxSize()
+ {
+ return Long.BYTES;
+ }
+ };
+
+ public static final Collection<DataType<?>> DATA_TYPES = ImmutableList.of(
+ ColumnSpec.int8Type,
+ ColumnSpec.int16Type,
+ ColumnSpec.int32Type,
+ ColumnSpec.int64Type,
+ ColumnSpec.booleanType,
+ ColumnSpec.floatType,
+ ColumnSpec.doubleType,
+ ColumnSpec.asciiType,
+ ColumnSpec.uuidType,
+ ColumnSpec.timestampType);
+
+ public static class ReversedType<T> extends DataType<T>
+ {
+ public static final Map<DataType<?>, ReversedType<?>> cache = new HashMap()
+ {{
+ put(int8Type, new ReversedType<>(int8Type));
+ put(int16Type, new ReversedType<>(int16Type));
+ put(int32Type, new ReversedType<>(int32Type));
+ put(int64Type, new ReversedType<>(int64Type));
+ put(booleanType, new ReversedType<>(booleanType));
+ put(floatType, new ReversedType<>(floatType, new Bijections.ReverseFloatGenerator()));
+ put(doubleType, new ReversedType<>(doubleType, new Bijections.ReverseDoubleGenerator()));
+ put(asciiType, new ReversedType<>(asciiType));
+ }};
+
+ private final DataType<T> baseType;
+ private final Bijections.Bijection<T> generator;
+
+ public ReversedType(DataType<T> baseType)
+ {
+ super(baseType.cqlName);
+ this.baseType = baseType;
+ this.generator = new Bijections.ReverseBijection<>(baseType.generator());
+ }
+
+ public ReversedType(DataType<T> baseType, Bijections.Bijection<T> generator)
+ {
+ super(baseType.cqlName);
+ this.baseType = baseType;
+ this.generator = generator;
+ }
+
+ public boolean isReversed()
+ {
+ return true;
+ }
+
+ public Bijections.Bijection<T> generator()
+ {
+ return generator;
+ }
+
+ public int maxSize()
+ {
+ return baseType.maxSize();
+ }
+
+ public DataType<T> baseType()
+ {
+ return baseType;
+ }
+
+ public static <T> DataType<T> getInstance(DataType<T> type)
+ {
+ ReversedType<T> t = (ReversedType<T>) cache.get(type);
+ if (t == null)
+ t = new ReversedType<>(type);
+ assert t.baseType == type : "Type mismatch";
+ return t;
+ }
+ }
+}
\ No newline at end of file
diff --git a/harry-core/src/harry/ddl/SchemaGenerators.java b/harry-core/src/harry/ddl/SchemaGenerators.java
new file mode 100644
index 0000000..fd46bb8
--- /dev/null
+++ b/harry-core/src/harry/ddl/SchemaGenerators.java
@@ -0,0 +1,295 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.ddl;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.Function;
+import java.util.function.Supplier;
+
+import com.google.common.collect.ImmutableList;
+
+import harry.generators.Generator;
+import harry.generators.Surjections;
+
+public class SchemaGenerators
+{
+ private final static long SCHEMAGEN_STREAM_ID = 0x6264593273L;
+
+ public static Builder schema(String ks)
+ {
+ return new Builder(ks);
+ }
+
+ public static final Collection<ColumnSpec.DataType<?>> clusteringKeyTypes;
+ public static final Collection<ColumnSpec.DataType<?>> columnTypes;
+
+ static
+ {
+ ImmutableList.Builder<ColumnSpec.DataType<?>> builder = ImmutableList.builder();
+ builder.add(ColumnSpec.int8Type,
+ ColumnSpec.int16Type,
+ ColumnSpec.int32Type,
+ ColumnSpec.int64Type,
+// TODO re-enable boolean type; add it to ByteBufferUtil in Cassandra for that
+// ColumnSpec.booleanType,
+ ColumnSpec.floatType,
+ ColumnSpec.doubleType,
+ ColumnSpec.asciiType);
+ columnTypes = builder.build();
+ builder = ImmutableList.builder();
+ builder.addAll(columnTypes);
+ for (ColumnSpec.DataType<?> columnType : columnTypes)
+ {
+ builder.add(ColumnSpec.ReversedType.getInstance(columnType));
+ }
+ clusteringKeyTypes = builder.build();
+ }
+
+ @SuppressWarnings("unchecked")
+ public static <T> Generator<T> fromValues(Collection<T> allValues)
+ {
+ return fromValues((T[]) allValues.toArray());
+ }
+
+ public static <T> Generator<T> fromValues(T[] allValues)
+ {
+ return (rng) -> {
+ return allValues[rng.nextInt(allValues.length - 1)];
+ };
+ }
+
+ @SuppressWarnings("unchecked")
+ public static Generator<ColumnSpec<?>> columnSpecGenerator(String prefix, ColumnSpec.Kind kind)
+ {
+ return fromValues(columnTypes)
+ .map(new Function<ColumnSpec.DataType<?>, ColumnSpec<?>>()
+ {
+ private int counter = 0;
+
+ public ColumnSpec<?> apply(ColumnSpec.DataType<?> type)
+ {
+ return new ColumnSpec<>(prefix + (counter++),
+ type,
+ kind);
+ }
+ });
+ }
+
+ @SuppressWarnings("unchecked")
+ public static Generator<ColumnSpec<?>> columnSpecGenerator(Collection<ColumnSpec.DataType<?>> columnTypes, String prefix, ColumnSpec.Kind kind)
+ {
+ return fromValues(columnTypes)
+ .map(new Function<ColumnSpec.DataType<?>, ColumnSpec<?>>()
+ {
+ private int counter = 0;
+
+ public ColumnSpec<?> apply(ColumnSpec.DataType<?> type)
+ {
+ return new ColumnSpec<>(prefix + (counter++),
+ type,
+ kind);
+ }
+ });
+ }
+
+ @SuppressWarnings("unchecked")
+ public static Generator<ColumnSpec<?>> clusteringColumnSpecGenerator(String prefix)
+ {
+ return fromValues(clusteringKeyTypes)
+ .map(new Function<ColumnSpec.DataType<?>, ColumnSpec<?>>()
+ {
+ private int counter = 0;
+
+ public ColumnSpec<?> apply(ColumnSpec.DataType<?> type)
+ {
+ return ColumnSpec.ck(prefix + (counter++), type);
+ }
+ });
+ }
+
+ private static AtomicInteger tableCounter = new AtomicInteger(1);
+
+ public static class Builder
+ {
+ private final String keyspace;
+ private final Supplier<String> tableNameSupplier;
+
+ private Generator<ColumnSpec<?>> pkGenerator = columnSpecGenerator("pk", ColumnSpec.Kind.PARTITION_KEY);
+ private Generator<ColumnSpec<?>> ckGenerator = clusteringColumnSpecGenerator("ck");
+ private Generator<ColumnSpec<?>> regularGenerator = columnSpecGenerator("regular", ColumnSpec.Kind.REGULAR);
+
+ private int minPks = 1;
+ private int maxPks = 1;
+ private int minCks = 0;
+ private int maxCks = 0;
+ private int minRegular = 0;
+ private int maxRegular = 0;
+
+ public Builder(String keyspace)
+ {
+ this(keyspace, () -> "table_" + tableCounter.getAndIncrement());
+ }
+
+ public Builder(String keyspace, Supplier<String> tableNameSupplier)
+ {
+ this.keyspace = keyspace;
+ this.tableNameSupplier = tableNameSupplier;
+ }
+
+ public Builder partitionKeyColumnCount(int numCols)
+ {
+ return partitionKeyColumnCount(numCols, numCols);
+ }
+
+ public Builder partitionKeyColumnCount(int minCols, int maxCols)
+ {
+ this.minPks = minCols;
+ this.maxPks = maxCols;
+ return this;
+ }
+
+ public Builder partitionKeySpec(int minCols, int maxCols, ColumnSpec.DataType<?>... columnTypes)
+ {
+ this.minPks = minCols;
+ this.maxPks = maxCols;
+ this.pkGenerator = columnSpecGenerator(Arrays.asList(columnTypes), "pk", ColumnSpec.Kind.PARTITION_KEY);
+ return this;
+ }
+
+ public Builder clusteringColumnCount(int numCols)
+ {
+ return clusteringColumnCount(numCols, numCols);
+ }
+
+ public Builder clusteringColumnCount(int minCols, int maxCols)
+ {
+ this.minCks = minCols;
+ this.maxCks = maxCols;
+ return this;
+ }
+
+ public Builder clusteringKeySpec(int minCols, int maxCols, ColumnSpec.DataType<?>... columnTypes)
+ {
+ this.minCks = minCols;
+ this.maxCks = maxCols;
+ this.ckGenerator = columnSpecGenerator(Arrays.asList(columnTypes), "ck", ColumnSpec.Kind.CLUSTERING);
+ return this;
+ }
+
+ public Builder regularColumnCount(int numCols)
+ {
+ return regularColumnCount(numCols, numCols);
+ }
+
+ public Builder regularColumnCount(int minCols, int maxCols)
+ {
+ this.minRegular = minCols;
+ this.maxRegular = maxCols;
+ return this;
+ }
+
+ public Builder regularColumnSpec(int minCols, int maxCols, ColumnSpec.DataType<?>... columnTypes)
+ {
+ this.minRegular = minCols;
+ this.maxRegular = maxCols;
+ this.regularGenerator = columnSpecGenerator(Arrays.asList(columnTypes), "regular", ColumnSpec.Kind.REGULAR);
+ return this;
+ }
+
+ private static class ColumnCounts
+ {
+ private final int pks;
+ private final int cks;
+ private final int regulars;
+
+ private ColumnCounts(int pks, int cks, int regulars)
+ {
+ this.pks = pks;
+ this.cks = cks;
+ this.regulars = regulars;
+ }
+ }
+
+ public Generator<ColumnCounts> columnCountsGenerator()
+ {
+ return (rand) -> {
+ int pks = rand.nextInt(minPks, maxPks);
+ int cks = rand.nextInt(minCks, maxCks);
+ int regulars = rand.nextInt(minRegular, maxRegular);
+
+ return new ColumnCounts(pks, cks, regulars);
+ };
+ }
+
+ public Generator<SchemaSpec> generator()
+ {
+ Generator<ColumnCounts> columnCountsGenerator = columnCountsGenerator();
+
+ return columnCountsGenerator.flatMap(counts -> {
+ return rand -> {
+ List<ColumnSpec<?>> pk = pkGenerator.generate(rand, counts.pks);
+ List<ColumnSpec<?>> ck = ckGenerator.generate(rand, counts.cks);
+ return new SchemaSpec(keyspace,
+ tableNameSupplier.get(),
+ pk,
+ ck,
+ regularGenerator.generate(rand, counts.regulars));
+ };
+ });
+ }
+
+ public Surjections.Surjection<SchemaSpec> surjection()
+ {
+ return generator().toSurjection(SCHEMAGEN_STREAM_ID);
+ }
+ }
+
+ public static Surjections.Surjection<SchemaSpec> defaultSchemaSpecGen(String ks, String table)
+ {
+ return new SchemaGenerators.Builder(ks, () -> table)
+ .partitionKeySpec(1, 4,
+// ColumnSpec.int8Type,
+// ColumnSpec.int16Type,
+ ColumnSpec.int32Type,
+ ColumnSpec.int64Type,
+// ColumnSpec.floatType,
+// ColumnSpec.doubleType,
+ ColumnSpec.asciiType(4, 10))
+ .clusteringKeySpec(1, 4,
+// ColumnSpec.int8Type,
+// ColumnSpec.int16Type,
+ ColumnSpec.int32Type,
+ ColumnSpec.int64Type,
+// ColumnSpec.floatType,
+// ColumnSpec.doubleType,
+ ColumnSpec.asciiType(4, 10))
+ .regularColumnSpec(1, 10,
+// ColumnSpec.int8Type,
+// ColumnSpec.int16Type,
+ ColumnSpec.int32Type,
+ ColumnSpec.int64Type,
+// ColumnSpec.floatType,
+// ColumnSpec.doubleType,
+ ColumnSpec.asciiType(5, 10))
+ .surjection();
+ }
+}
diff --git a/harry-core/src/harry/ddl/SchemaSpec.java b/harry-core/src/harry/ddl/SchemaSpec.java
new file mode 100644
index 0000000..52c4767
--- /dev/null
+++ b/harry-core/src/harry/ddl/SchemaSpec.java
@@ -0,0 +1,329 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.ddl;
+
+import java.util.List;
+import java.util.Objects;
+import java.util.function.Consumer;
+import java.util.stream.Stream;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+
+import harry.generators.DataGenerators;
+import harry.operations.CompiledStatement;
+import harry.operations.Relation;
+import harry.util.BitSet;
+
+// TODO: improve API of this class
+public class SchemaSpec
+{
+ public interface SchemaSpecFactory
+ {
+ public SchemaSpec make(long seed);
+ }
+
+ public final DataGenerators.KeyGenerator pkGenerator;
+ public final DataGenerators.KeyGenerator ckGenerator;
+
+ private final boolean isCompactStorage;
+
+ // These fields are immutable, and are safe as public
+ public final String keyspace;
+ public final String table;
+
+ public final List<ColumnSpec<?>> partitionKeys;
+ public final List<ColumnSpec<?>> clusteringKeys;
+ public final List<ColumnSpec<?>> regularColumns;
+ public final List<ColumnSpec<?>> allColumns;
+
+ public final BitSet ALL_COLUMNS_BITSET;
+
+ // TODO: forbid this constructor; add the one where column specs would be initialized through builder and have indexes
+ public SchemaSpec(String keyspace,
+ String table,
+ List<ColumnSpec<?>> partitionKeys,
+ List<ColumnSpec<?>> clusteringKeys,
+ List<ColumnSpec<?>> regularColumns)
+ {
+ this(keyspace, table, partitionKeys, clusteringKeys, regularColumns, false);
+ }
+
+ public SchemaSpec(String keyspace,
+ String table,
+ List<ColumnSpec<?>> partitionKeys,
+ List<ColumnSpec<?>> clusteringKeys,
+ List<ColumnSpec<?>> regularColumns,
+ boolean isCompactStorage)
+ {
+ assert !isCompactStorage || clusteringKeys.size() == 0 || regularColumns.size() <= 1;
+
+ this.keyspace = keyspace;
+ this.table = table;
+ this.isCompactStorage = isCompactStorage;
+ this.partitionKeys = ImmutableList.copyOf(partitionKeys);
+ for (int i = 0; i < partitionKeys.size(); i++)
+ partitionKeys.get(i).setColumnIndex(i);
+ this.clusteringKeys = ImmutableList.copyOf(clusteringKeys);
+ for (int i = 0; i < clusteringKeys.size(); i++)
+ clusteringKeys.get(i).setColumnIndex(i);
+ this.regularColumns = ImmutableList.copyOf(regularColumns);
+ for (int i = 0; i < regularColumns.size(); i++)
+ regularColumns.get(i).setColumnIndex(i);
+ this.allColumns = ImmutableList.copyOf(Iterables.concat(partitionKeys,
+ clusteringKeys,
+ regularColumns));
+ this.pkGenerator = DataGenerators.createKeyGenerator(partitionKeys);
+ this.ckGenerator = DataGenerators.createKeyGenerator(clusteringKeys);
+
+ this.ALL_COLUMNS_BITSET = BitSet.allSet(regularColumns.size());
+ }
+
+ public static interface AddRelationCallback
+ {
+ public void accept(ColumnSpec spec, Relation.RelationKind kind, Object value);
+ }
+
+ public void inflateRelations(long pd,
+ List<Relation> clusteringRelations,
+ AddRelationCallback consumer)
+ {
+ Object[] pk = inflatePartitionKey(pd);
+ for (int i = 0; i < pk.length; i++)
+ consumer.accept(partitionKeys.get(i), Relation.RelationKind.EQ, pk[i]);
+
+ for (Relation r : clusteringRelations)
+ consumer.accept(r.columnSpec, r.kind, r.value());
+ }
+
+ public Object[] inflatePartitionKey(long pd)
+ {
+ return pkGenerator.inflate(pd);
+ }
+
+ public Object[] inflateClusteringKey(long cd)
+ {
+ return ckGenerator.inflate(cd);
+ }
+
+ public Object[] inflateRegularColumns(long[] vds)
+ {
+ return DataGenerators.inflateData(regularColumns, vds);
+ }
+
+ // TODO: remove indirection; call directly
+ public long adjustPdEntropy(long descriptor)
+ {
+ return pkGenerator.adjustEntropyDomain(descriptor);
+ }
+
+ public long adjustCdEntropy(long descriptor)
+ {
+ return ckGenerator.adjustEntropyDomain(descriptor);
+ }
+
+ public long deflatePartitionKey(Object[] pk)
+ {
+ return pkGenerator.deflate(pk);
+ }
+
+ public long deflateClusteringKey(Object[] ck)
+ {
+ return ckGenerator.deflate(ck);
+ }
+
+ public long[] deflateRegularColumns(Object[] regulars)
+ {
+ return DataGenerators.deflateData(regularColumns, regulars);
+ }
+
+ public CompiledStatement compile()
+ {
+ StringBuilder sb = new StringBuilder();
+
+ sb.append("CREATE TABLE ");
+ sb.append(keyspace)
+ .append(".")
+ .append(table)
+ .append(" (");
+
+ SeparatorAppender commaAppender = new SeparatorAppender();
+ for (ColumnSpec<?> cd : partitionKeys)
+ {
+ commaAppender.accept(sb);
+ sb.append(cd.toCQL());
+ if (partitionKeys.size() == 1 && clusteringKeys.size() == 0)
+ sb.append(" PRIMARY KEY");
+ }
+
+ Stream.concat(clusteringKeys.stream(),
+ regularColumns.stream())
+ .forEach((cd) -> {
+ commaAppender.accept(sb);
+ sb.append(cd.toCQL());
+ });
+
+ if (clusteringKeys.size() > 0 || partitionKeys.size() > 1)
+ {
+ sb.append(", ").append(getPrimaryKeyCql());
+ }
+
+ sb.append(')');
+
+ Runnable appendWith = doOnce(() -> sb.append(" WITH "));
+
+ if (isCompactStorage)
+ {
+ appendWith.run();
+ sb.append("COMPACT STORAGE AND");
+ }
+
+ if (clusteringKeys.size() > 0)
+ {
+ appendWith.run();
+ sb.append(getClusteringOrderCql())
+ .append(';');
+ }
+
+ return new CompiledStatement(sb.toString());
+ }
+
+ private String getClusteringOrderCql()
+ {
+ StringBuilder sb = new StringBuilder();
+ if (clusteringKeys.size() > 0)
+ {
+ sb.append(" CLUSTERING ORDER BY (");
+
+ SeparatorAppender commaAppender = new SeparatorAppender();
+ for (ColumnSpec<?> column : clusteringKeys)
+ {
+ commaAppender.accept(sb);
+ sb.append(column.name).append(' ').append(column.isReversed() ? "DESC" : "ASC");
+ }
+
+ // TODO: test for this
+// sb.append(") AND read_repair='none'");
+ sb.append(")");
+ }
+
+ return sb.toString();
+ }
+
+ private String getPrimaryKeyCql()
+ {
+ StringBuilder sb = new StringBuilder();
+ sb.append("PRIMARY KEY (");
+ if (partitionKeys.size() > 1)
+ {
+ sb.append('(');
+ SeparatorAppender commaAppender = new SeparatorAppender();
+ for (ColumnSpec<?> cd : partitionKeys)
+ {
+ commaAppender.accept(sb);
+ sb.append(cd.name);
+ }
+ sb.append(')');
+ }
+ else
+ {
+ sb.append(partitionKeys.get(0).name);
+ }
+
+ for (ColumnSpec<?> cd : clusteringKeys)
+ sb.append(", ").append(cd.name);
+
+ return sb.append(')').toString();
+ }
+
+ public String toString()
+ {
+ return String.format("schema {cql=%s, columns=%s}", compile().toString(), allColumns);
+ }
+
+ private static Runnable doOnce(Runnable r)
+ {
+ return new Runnable()
+ {
+ boolean executed = false;
+
+ public void run()
+ {
+ if (executed)
+ return;
+
+ executed = true;
+ r.run();
+ }
+ };
+ }
+
+ public static class SeparatorAppender implements Consumer<StringBuilder>
+ {
+ boolean isFirst = true;
+ private final String separator;
+
+ public SeparatorAppender()
+ {
+ this(",");
+ }
+
+ public SeparatorAppender(String separator)
+ {
+ this.separator = separator;
+ }
+
+ public void accept(StringBuilder stringBuilder)
+ {
+ if (isFirst)
+ isFirst = false;
+ else
+ stringBuilder.append(separator);
+ }
+
+ public void accept(StringBuilder stringBuilder, String s)
+ {
+ accept(stringBuilder);
+ stringBuilder.append(s);
+ }
+
+
+ public void reset()
+ {
+ isFirst = true;
+ }
+ }
+
+ public boolean equals(Object o)
+ {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ SchemaSpec that = (SchemaSpec) o;
+ return Objects.equals(keyspace, that.keyspace) &&
+ Objects.equals(table, that.table) &&
+ Objects.equals(partitionKeys, that.partitionKeys) &&
+ Objects.equals(clusteringKeys, that.clusteringKeys) &&
+ Objects.equals(regularColumns, that.regularColumns);
+ }
+
+ public int hashCode()
+ {
+ return Objects.hash(keyspace, table, partitionKeys, clusteringKeys, regularColumns);
+ }
+}
diff --git a/harry-core/src/harry/generators/Bijections.java b/harry-core/src/harry/generators/Bijections.java
new file mode 100644
index 0000000..43d78c7
--- /dev/null
+++ b/harry-core/src/harry/generators/Bijections.java
@@ -0,0 +1,380 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.generators;
+
+import java.util.Date;
+import java.util.UUID;
+
+public class Bijections
+{
+ public static final Bijection<Byte> INT8_GENERATOR = new ByteGenerator();
+ public static final Bijection<Short> INT16_GENERATOR = new Int16Generator();
+ public static final Bijection<Integer> INT32_GENERATOR = new Int32Generator();
+ public static final Bijection<Long> INT64_GENERATOR = new LongGenerator();
+ public static final Bijection<Float> FLOAT_GENERATOR = new FloatGenerator();
+ public static final Bijection<Double> DOUBLE_GENERATOR = new DoubleGenerator();
+ public static final Bijection<Boolean> BOOLEAN_GENERATOR = new BooleanGenerator();
+
+ public static final Bijection<UUID> UUID_GENERATOR = new UUIDGenerator();
+ public static final Bijection<Date> TIMESTAMP_GENERATOR = new TimestampGenerator();
+
+ /**
+ * When generating a value, invertible generator first draws a long from the random number generator, and
+ * passes it to the normalization function. Normalization scales the long value down to the range that corresponds
+ * to the generated value range. For example, for Boolean, the range is of a size 2. For Integer - 2^32, etc.
+ * <p>
+ * deflated has to be equal to adjustEntropyDomain value.
+ * <p>
+ * When inflating, we should inflate up to adjustEntropyDomain values. This way, deflated values will correspond to infoated ones.
+ */
+ public interface Bijection<T>
+ {
+ T inflate(long descriptor);
+
+ long deflate(T value);
+
+ int byteSize();
+
+ int compare(long l, long r);
+
+ default long adjustEntropyDomain(long descriptor)
+ {
+ return descriptor & Bytes.bytePatternFor(byteSize());
+ }
+ }
+
+ // TODO: two points:
+ // * We might be able to avoid boxing if we can generate straight to byte buffer (?)
+ // * since these data types are quite specialized, we do not strictly need complex interface for them, it might
+ // be easier to even create a special type for these. We need randomness source in cases of more complex generation,
+ // but not really here.
+ public static class ReverseBijection<T> implements Bijection<T>
+ {
+ private final Bijection<T> delegate;
+
+ public ReverseBijection(Bijection<T> delegate)
+ {
+ this.delegate = delegate;
+ }
+
+ public T inflate(long descriptor)
+ {
+ return delegate.inflate(descriptor * -1);
+ }
+
+ public long deflate(T value)
+ {
+ return -1 * delegate.deflate(value);
+ }
+
+ public int byteSize()
+ {
+ return delegate.byteSize();
+ }
+
+ public int compare(long l, long r)
+ {
+ return delegate.compare(r, l);
+ }
+
+ public long adjustEntropyDomain(long descriptor)
+ {
+ long pattern = Bytes.BYTES[byteSize() - 1];
+ return descriptor & (pattern >> 1);
+ }
+ }
+
+ public static class LongGenerator implements Bijection<Long>
+ {
+ public Long inflate(long current)
+ {
+ return current;
+ }
+
+ public long deflate(Long value)
+ {
+ return value;
+ }
+
+ public int compare(long l, long r)
+ {
+ return Long.compare(l, r);
+ }
+
+ public int byteSize()
+ {
+ return Long.BYTES;
+ }
+ }
+
+ public static class Int32Generator implements Bijection<Integer>
+ {
+ public Integer inflate(long current)
+ {
+ return (int) current;
+ }
+
+ public long deflate(Integer value)
+ {
+ return value & 0xffffffffL;
+ }
+
+ public int compare(long l, long r)
+ {
+ return Integer.compare((int) l, (int) r);
+ }
+
+ public int byteSize()
+ {
+ return Integer.BYTES;
+ }
+ }
+
+ public static class Int16Generator implements Bijection<Short>
+ {
+ public Short inflate(long current)
+ {
+ return (short) current;
+ }
+
+ public long deflate(Short value)
+ {
+ return value & 0xffffL;
+ }
+
+ public int compare(long l, long r)
+ {
+ return Short.compare((short) l, (short) r);
+ }
+
+ public int byteSize()
+ {
+ return Short.BYTES;
+ }
+ }
+
+ public static class ByteGenerator implements Bijection<Byte>
+ {
+ public Byte inflate(long current)
+ {
+ return (byte) current;
+ }
+
+ public long deflate(Byte value)
+ {
+ return value & 0xffL;
+ }
+
+ public int compare(long l, long r)
+ {
+ return Byte.compare((byte) l, (byte) r);
+ }
+
+ public int byteSize()
+ {
+ return Byte.BYTES;
+ }
+ }
+
+ public static class BooleanGenerator implements Bijection<Boolean>
+ {
+ public Boolean inflate(long current)
+ {
+ return inflatePrimitive(current);
+ }
+
+ private boolean inflatePrimitive(long current)
+ {
+ return current == 2;
+ }
+
+ public long deflate(Boolean value)
+ {
+ return value ? 2 : 1;
+ }
+
+ public int byteSize()
+ {
+ return Byte.BYTES;
+ }
+
+ public int compare(long l, long r)
+ {
+ return Byte.compare((byte) l, (byte) r);
+ }
+
+ // TODO: this this right? Why +1?
+ public long adjustEntropyDomain(long descriptor)
+ {
+ return (descriptor & 1) + 1;
+ }
+ }
+
+ public static class FloatGenerator implements Bijection<Float>
+ {
+ public Float inflate(long current)
+ {
+ return inflatePrimitive(current);
+ }
+
+ protected float inflatePrimitive(long current)
+ {
+ long tmp = current & 0xffffffffL;
+ tmp ^= ((tmp >> 31) & 0x7fffffffL);
+ return Float.intBitsToFloat((int) tmp);
+ }
+
+ public long deflate(Float value)
+ {
+ int tmp = Float.floatToRawIntBits(value);
+ tmp ^= ((tmp >> 31) & 0x7fffffffL);
+ return tmp;
+ }
+
+ public int compare(long l, long r)
+ {
+ return Float.compare(inflatePrimitive(l), inflatePrimitive(r));
+ }
+
+ public int byteSize()
+ {
+ return Float.BYTES;
+ }
+
+ public long adjustEntropyDomain(long descriptor)
+ {
+ return (~0x8F000000L & descriptor) & 0xffffffffL;
+ }
+ }
+
+ public static class ReverseFloatGenerator extends FloatGenerator
+ {
+ public float inflatePrimitive(long current)
+ {
+ return super.inflatePrimitive(current) * -1;
+ }
+
+ public long deflate(Float value)
+ {
+ return super.deflate(value * -1);
+ }
+ }
+
+ public static class DoubleGenerator implements Bijection<Double>
+ {
+ public Double inflate(long current)
+ {
+ return inflatePrimitive(current);
+ }
+
+ protected double inflatePrimitive(long current)
+ {
+ current = current ^ ((current >> 63) & 0x7fffffffffffffffL);
+ return Double.longBitsToDouble(current);
+ }
+
+ public long deflate(Double value)
+ {
+ long current = Double.doubleToRawLongBits(value);
+ current = current ^ ((current >> 63) & 0x7fffffffffffffffL);
+ return current;
+ }
+
+ public int compare(long l, long r)
+ {
+ return Double.compare(inflatePrimitive(l), inflatePrimitive(r));
+ }
+
+ public int byteSize()
+ {
+ return Double.BYTES;
+ }
+
+ public long adjustEntropyDomain(long descriptor)
+ {
+ return (~0x8F00000000000000L & descriptor);
+ }
+ }
+
+
+ public static class ReverseDoubleGenerator extends DoubleGenerator
+ {
+ public double inflatePrimitive(long current)
+ {
+ return super.inflatePrimitive(current) * -1;
+ }
+
+ public long deflate(Double value)
+ {
+ return super.deflate(value * -1);
+ }
+ }
+
+ public static class UUIDGenerator implements Bijection<UUID>
+ {
+ public UUID inflate(long current)
+ {
+ // order is determined by the top bits
+ return new UUID(current, current);
+ }
+
+ public long deflate(UUID value)
+ {
+ return value.getMostSignificantBits();
+ }
+
+ public int compare(long l, long r)
+ {
+ return Byte.compare((byte) l, (byte) r);
+ }
+
+ public int byteSize()
+ {
+ return Long.BYTES;
+ }
+ }
+
+ public static class TimestampGenerator implements Bijection<Date>
+ {
+ public Date inflate(long descriptor)
+ {
+ return new Date(descriptor);
+ }
+
+ public long deflate(Date value)
+ {
+ return value.getTime();
+ }
+
+ public int compare(long l, long r)
+ {
+ return Byte.compare((byte) l, (byte) r);
+ }
+
+ public int byteSize()
+ {
+ return Long.BYTES;
+ }
+
+ public long adjustEntropyDomain(long descriptor)
+ {
+ return descriptor & (Bytes.bytePatternFor(byteSize() >> 1));
+ }
+ }
+}
\ No newline at end of file
diff --git a/harry-core/src/harry/generators/BooleanGenerator.java b/harry-core/src/harry/generators/BooleanGenerator.java
new file mode 100644
index 0000000..74e3a0c
--- /dev/null
+++ b/harry-core/src/harry/generators/BooleanGenerator.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.generators;
+
+public class BooleanGenerator implements Generator<Boolean>
+{
+ public static BooleanGenerator INSTANCE = new BooleanGenerator();
+
+ public Boolean generate(RandomGenerator rng)
+ {
+ return rng.nextBoolean();
+ }
+}
diff --git a/harry-core/src/harry/generators/Bytes.java b/harry-core/src/harry/generators/Bytes.java
new file mode 100644
index 0000000..09e0001
--- /dev/null
+++ b/harry-core/src/harry/generators/Bytes.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.generators;
+
+public class Bytes
+{
+
+ public final static long[] BYTES = new long[]{ 0xFFL,
+ 0xFFFFL,
+ 0xFFFFFFL,
+ 0xFFFFFFFFL,
+ 0xFFFFFFFFFFL,
+ 0xFFFFFFFFFFFFL,
+ 0xFFFFFFFFFFFFFFL,
+ 0xFFFFFFFFFFFFFFFFL };
+
+ public static long bytePatternFor(int size)
+ {
+ return BYTES[size - 1];
+ }
+
+ public static long signMaskFor(int size)
+ {
+ return 1L << ((Byte.SIZE * size) - 1);
+ }
+}
diff --git a/harry-core/src/harry/generators/Collections.java b/harry-core/src/harry/generators/Collections.java
new file mode 100644
index 0000000..7e4674a
--- /dev/null
+++ b/harry-core/src/harry/generators/Collections.java
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.generators;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import harry.ddl.ColumnSpec;
+
+// TODO: collections are currently not deflatable and/or checkable with a model
+public class Collections
+{
+ public static <K, V> ColumnSpec.DataType<Map<K, V>> mapColumn(ColumnSpec.DataType<K> k,
+ ColumnSpec.DataType<V> v,
+ int maxSize)
+ {
+ return new ColumnSpec.DataType<Map<K, V>>(String.format("map<%s,%s>", k.toString(), v.toString()))
+ {
+ private final Bijections.Bijection<Map<K, V>> gen = mapGen(k.generator(), v.generator(), maxSize);
+
+ public Bijections.Bijection<Map<K, V>> generator()
+ {
+ return gen;
+ }
+
+ public int maxSize()
+ {
+ return Long.BYTES;
+ }
+ };
+ }
+
+ public static <V> ColumnSpec.DataType<List<V>> listColumn(ColumnSpec.DataType<V> v,
+ int maxSize)
+ {
+ return new ColumnSpec.DataType<List<V>>(String.format("set<%s>", v.toString()))
+ {
+ private final Bijections.Bijection<List<V>> gen = listGen(v.generator(), maxSize);
+
+ public Bijections.Bijection<List<V>> generator()
+ {
+ return gen;
+ }
+
+ public int maxSize()
+ {
+ return Long.BYTES;
+ }
+ };
+ }
+
+
+ public static <V> ColumnSpec.DataType<Set<V>> setColumn(ColumnSpec.DataType<V> v,
+ int maxSize)
+ {
+ return new ColumnSpec.DataType<Set<V>>(String.format("set<%s>", v.toString()))
+ {
+ private final Bijections.Bijection<Set<V>> gen = setGen(v.generator(), maxSize);
+
+ public Bijections.Bijection<Set<V>> generator()
+ {
+ return gen;
+ }
+
+ public int maxSize()
+ {
+ return Long.BYTES;
+ }
+ };
+ }
+
+
+ public static <K, V> Bijections.Bijection<Map<K, V>> mapGen(Bijections.Bijection<K> keyGen,
+ Bijections.Bijection<V> valueGen,
+ int maxSize)
+ {
+ return new MapGenerator<>(keyGen, valueGen, maxSize);
+ }
+
+ public static <V> Bijections.Bijection<List<V>> listGen(Bijections.Bijection<V> valueGen,
+ int maxSize)
+ {
+ return new ListGenerator<>(valueGen, maxSize);
+ }
+
+ public static <V> Bijections.Bijection<Set<V>> setGen(Bijections.Bijection<V> valueGen,
+ int maxSize)
+ {
+ return new SetGenerator<>(valueGen, maxSize);
+ }
+
+ public static class MapGenerator<K, V> implements Bijections.Bijection<Map<K, V>>
+ {
+ public final Bijections.Bijection<K> keyGen;
+ public final Bijections.Bijection<V> valueGen;
+ public int maxSize;
+
+ public MapGenerator(Bijections.Bijection<K> keyGen,
+ Bijections.Bijection<V> valueGen,
+ int maxSize)
+ {
+ this.keyGen = keyGen;
+ this.valueGen = valueGen;
+ this.maxSize = maxSize;
+ }
+
+ public Map<K, V> inflate(long descriptor)
+ {
+ long rnd = RngUtils.next(descriptor);
+ int count = RngUtils.asInt(rnd, 0, maxSize);
+ Map<K, V> m = new HashMap<>();
+ for (int i = 0; i < count; i++)
+ {
+ rnd = RngUtils.next(rnd);
+ K key = keyGen.inflate(rnd);
+ rnd = RngUtils.next(rnd);
+ V value = valueGen.inflate(rnd);
+ m.put(key, value);
+ }
+
+ return m;
+ }
+
+ // At least for non-frozen ones
+ public long deflate(Map<K, V> value)
+ {
+ throw new UnsupportedOperationException();
+ }
+
+ public int byteSize()
+ {
+ return Long.BYTES;
+ }
+
+ public int compare(long l, long r)
+ {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ public static class ListGenerator<V> implements Bijections.Bijection<List<V>>
+ {
+ public final Bijections.Bijection<V> valueGen;
+ public int maxSize;
+
+ public ListGenerator(Bijections.Bijection<V> valueGen,
+ int maxSize)
+ {
+ this.valueGen = valueGen;
+ this.maxSize = maxSize;
+ }
+
+ public List<V> inflate(long descriptor)
+ {
+ long rnd = RngUtils.next(descriptor);
+ int count = RngUtils.asInt(rnd, 0, maxSize);
+ List<V> m = new ArrayList<>();
+ for (int i = 0; i < count; i++)
+ {
+ rnd = RngUtils.next(rnd);
+ V value = valueGen.inflate(rnd);
+ m.add(value);
+ }
+
+ return m;
+ }
+
+ // At least for non-frozen ones
+ public long deflate(List<V> value)
+ {
+ throw new UnsupportedOperationException();
+ }
+
+ public int byteSize()
+ {
+ return Long.BYTES;
+ }
+
+ public int compare(long l, long r)
+ {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ public static class SetGenerator<V> implements Bijections.Bijection<Set<V>>
+ {
+ public final Bijections.Bijection<V> valueGen;
+ public int maxSize;
+
+ public SetGenerator(Bijections.Bijection<V> valueGen,
+ int maxSize)
+ {
+ this.valueGen = valueGen;
+ this.maxSize = maxSize;
+ }
+
+ public Set<V> inflate(long descriptor)
+ {
+ long rnd = RngUtils.next(descriptor);
+ int count = RngUtils.asInt(rnd, 0, maxSize);
+ Set<V> m = new HashSet<>();
+ for (int i = 0; i < count; i++)
+ {
+ rnd = RngUtils.next(rnd);
+ V value = valueGen.inflate(rnd);
+ m.add(value);
+ }
+
+ return m;
+ }
+
+ // At least for non-frozen ones
+ public long deflate(Set<V> value)
+ {
+ throw new UnsupportedOperationException();
+ }
+
+ public int byteSize()
+ {
+ return Long.BYTES;
+ }
+
+ public int compare(long l, long r)
+ {
+ throw new UnsupportedOperationException();
+ }
+ }
+}
diff --git a/harry-core/src/harry/generators/DataGenerators.java b/harry-core/src/harry/generators/DataGenerators.java
new file mode 100644
index 0000000..9acd957
--- /dev/null
+++ b/harry-core/src/harry/generators/DataGenerators.java
@@ -0,0 +1,486 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.generators;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import com.google.common.annotations.VisibleForTesting;
+
+import harry.ddl.ColumnSpec;
+
+public class DataGenerators
+{
+ public static final Object UNSET_VALUE = new Object();
+ public static long UNSET_DESCR = 0;
+ public static long NIL_DESCR = -1;
+
+ public static Object[] inflateData(List<ColumnSpec<?>> columns, long[] descriptors)
+ {
+ // This can be not true depending on how we implement subselections
+ assert columns.size() == descriptors.length;
+ Object[] data = new Object[descriptors.length];
+ for (int i = 0; i < descriptors.length; i++)
+ {
+ ColumnSpec columnSpec = columns.get(i);
+ if (descriptors[i] == UNSET_DESCR)
+ data[i] = UNSET_VALUE;
+ else
+ data[i] = columnSpec.inflate(descriptors[i]);
+ }
+ return data;
+ }
+
+ public static long[] deflateData(List<ColumnSpec<?>> columns, Object[] data)
+ {
+ // This can be not true depending on how we implement subselections
+ assert columns.size() == data.length;
+ long[] descriptors = new long[data.length];
+ for (int i = 0; i < descriptors.length; i++)
+ {
+ ColumnSpec columnSpec = columns.get(i);
+ if (data[i] == null)
+ descriptors[i] = NIL_DESCR;
+ else
+ descriptors[i] = columnSpec.deflate(data[i]);
+ }
+ return descriptors;
+ }
+
+ public static int[] requiredBytes(List<ColumnSpec<?>> columns)
+ {
+ switch (columns.size())
+ {
+ case 0:
+ throw new RuntimeException("Can't inflate empty data column set as it is not inversible");
+ case 1:
+ return new int[]{ Math.min(columns.get(0).type.maxSize(), Long.SIZE) };
+ default:
+ class Pair
+ {
+ final int idx, maxSize;
+
+ Pair(int idx, int maxSize)
+ {
+ this.idx = idx;
+ this.maxSize = maxSize;
+ }
+ }
+ int[] bytes = new int[Math.min(4, columns.size())];
+ Pair[] sorted = new Pair[bytes.length];
+ for (int i = 0; i < sorted.length; i++)
+ sorted[i] = new Pair(i, columns.get(i).type.maxSize());
+
+ int remainingBytes = Long.BYTES;
+ int slotSize = remainingBytes / bytes.length;
+ // first pass: give it at most a slot number of bytes
+ for (int i = 0; i < sorted.length; i++)
+ {
+ int size = sorted[i].maxSize;
+ int allotedSize = Math.min(size, slotSize);
+ remainingBytes -= allotedSize;
+ bytes[sorted[i].idx] = allotedSize;
+ }
+
+ // sliced evenly
+ if (remainingBytes == 0)
+ return bytes;
+
+ // second pass: try to occupy remaining bytes
+ // it is possible to improve the second pass and separate additional bytes evenly, but it is
+ // questionable how much it'll bring since it does not change the total amount of entropy.
+ for (int i = 0; i < sorted.length; i++)
+ {
+ if (remainingBytes == 0)
+ break;
+ Pair p = sorted[i];
+ if (bytes[p.idx] < p.maxSize)
+ {
+ int allotedSize = Math.min(p.maxSize - bytes[p.idx], remainingBytes);
+ remainingBytes -= allotedSize;
+ bytes[p.idx] += allotedSize;
+ }
+ }
+
+ return bytes;
+ }
+ }
+
+ public static Object[] inflateKey(List<ColumnSpec<?>> columns, long descriptor, long[] slices)
+ {
+ assert columns.size() >= slices.length : String.format("Columns: %s. Slices: %s", columns, Arrays.toString(slices));
+ assert columns.size() > 0 : "Can't deflate from empty columnset";
+
+ Object[] res = new Object[columns.size()];
+ for (int i = 0; i < slices.length; i++)
+ {
+ ColumnSpec spec = columns.get(i);
+ res[i] = spec.inflate(slices[i]);
+ }
+
+ // The rest can be random, since prefix is always fixed
+ long current = descriptor;
+ for (int i = slices.length; i < columns.size(); i++)
+ {
+ current = RngUtils.next(current);
+ res[i] = columns.get(i).inflate(current);
+ }
+
+ return res;
+ }
+
+ public static long[] deflateKey(List<ColumnSpec<?>> columns, Object[] values)
+ {
+ assert columns.size() == values.length : String.format("%s != %s", columns.size(), values.length);
+ assert columns.size() > 0 : "Can't deflate from empty columnset";
+
+ int fixedPart = Math.min(4, columns.size());
+
+ long[] slices = new long[fixedPart];
+ for (int i = 0; i < fixedPart; i++)
+ {
+ ColumnSpec spec = columns.get(i);
+ slices[i] = spec.deflate(values[i]);
+ }
+ return slices;
+ }
+
+ public static KeyGenerator createKeyGenerator(List<ColumnSpec<?>> columns)
+ {
+ switch (columns.size())
+ {
+ case 0:
+ return EMPTY_KEY_GEN;
+ case 1:
+ return new SinglePartKeyGenerator(columns);
+ default:
+ return new MultiPartKeyGenerator(columns);
+ }
+ }
+
+ private static final KeyGenerator EMPTY_KEY_GEN = new KeyGenerator(Collections.emptyList())
+ {
+ private final long[] EMPTY_SLICED = new long[0];
+ private final Object[] EMPTY_INFLATED = new Object[0];
+
+ public long[] slice(long descriptor)
+ {
+ return EMPTY_SLICED;
+ }
+
+ public long stitch(long[] parts)
+ {
+ return 0;
+ }
+
+ protected long minValueInternal(int idx)
+ {
+ return 0;
+ }
+
+ protected long maxValueInternal(int idx)
+ {
+ return 0;
+ }
+
+ @Override
+ public Object[] inflate(long descriptor)
+ {
+ return EMPTY_INFLATED;
+ }
+
+ @Override
+ public long deflate(Object[] value)
+ {
+ return 0;
+ }
+
+ public long adjustEntropyDomain(long descriptor)
+ {
+ return 0;
+ }
+
+ public int byteSize()
+ {
+ return 0;
+ }
+
+ public int compare(long l, long r)
+ {
+ return 0;
+ }
+ };
+
+ public static abstract class KeyGenerator implements Bijections.Bijection<Object[]>
+ {
+ @VisibleForTesting
+ final List<ColumnSpec<?>> columns;
+
+ KeyGenerator(List<ColumnSpec<?>> columns)
+ {
+ this.columns = columns;
+ }
+
+ public abstract long[] slice(long descriptor);
+
+ public abstract long stitch(long[] parts);
+
+ public long minValue()
+ {
+ return columns.get(0).isReversed() ? maxForSize(byteSize()) : minForSize(byteSize());
+ }
+
+ public long maxValue()
+ {
+ return columns.get(0).isReversed() ? minForSize(byteSize()) : maxForSize(byteSize());
+ }
+
+ protected static long minForSize(int size)
+ {
+ long min = 1L << (size * Byte.SIZE - 1);
+
+ if (size < Long.BYTES)
+ min ^= Bytes.signMaskFor(size);
+
+ return min;
+ }
+
+ protected long maxForSize(int size)
+ {
+ long max = Bytes.bytePatternFor(size) >>> 1;
+
+ if (size < Long.BYTES)
+ max ^= Bytes.signMaskFor(size);
+
+ return max;
+ }
+
+ /**
+ * Min value for a segment: 0, possibly with an inverted 0 sign for stitching.
+ * Similar thing can be achieved by
+ */
+ public long minValue(int idx)
+ {
+ return columns.get(idx).isReversed() ? maxValueInternal(idx) : minValueInternal(idx);
+ }
+
+ public long maxValue(int idx)
+ {
+ return columns.get(idx).isReversed() ? minValueInternal(idx) : maxValueInternal(idx);
+ }
+
+ protected abstract long minValueInternal(int idx);
+
+ protected abstract long maxValueInternal(int idx);
+ }
+
+ static class SinglePartKeyGenerator extends KeyGenerator
+ {
+ private final Bijections.Bijection keyGen;
+ private final int totalSize;
+
+ SinglePartKeyGenerator(List<ColumnSpec<?>> columns)
+ {
+ super(columns);
+ assert columns.size() == 1;
+ this.keyGen = columns.get(0).generator();
+ this.totalSize = keyGen.byteSize();
+ }
+
+ public long[] slice(long descriptor)
+ {
+ long adjusted = adjustEntropyDomain(descriptor);
+ long[] res = new long[]{ adjusted };
+ assert adjusted == stitch(res);
+ return res;
+ }
+
+ public long stitch(long[] parts)
+ {
+ return parts[0];
+ }
+
+ public long minValueInternal(int idx)
+ {
+ return minForSize(totalSize);
+ }
+
+ public long maxValueInternal(int idx)
+ {
+ return maxForSize(totalSize);
+ }
+
+ public Object[] inflate(long descriptor)
+ {
+ return new Object[]{ keyGen.inflate(descriptor) };
+ }
+
+ public long deflate(Object[] value)
+ {
+ long descriptor = keyGen.deflate(value[0]);
+ descriptor &= Bytes.bytePatternFor(totalSize);
+ return descriptor;
+ }
+
+ public int byteSize()
+ {
+ return totalSize;
+ }
+
+ public int compare(long l, long r)
+ {
+ return Long.compare(l, r);
+ }
+
+ public long adjustEntropyDomain(long descriptor)
+ {
+ descriptor &= (Bytes.bytePatternFor(totalSize) >> 1);
+ descriptor = keyGen.adjustEntropyDomain(descriptor);
+ return descriptor;
+ }
+ }
+
+ public static class MultiPartKeyGenerator extends KeyGenerator
+ {
+ @VisibleForTesting
+ public final int[] sizes;
+ protected final int totalSize;
+
+ MultiPartKeyGenerator(List<ColumnSpec<?>> columns)
+ {
+ super(columns);
+ assert columns.size() > 1 : "It makes sense to use a multipart generator if you have more than one column, but you have " + columns.size();
+
+ this.sizes = requiredBytes(columns);
+ int total = 0;
+ for (int size : sizes)
+ total += size;
+
+ this.totalSize = total;
+ }
+
+ public long deflate(Object[] values)
+ {
+ return stitch(DataGenerators.deflateKey(columns, values));
+ }
+
+ public Object[] inflate(long descriptor)
+ {
+ return DataGenerators.inflateKey(columns, descriptor, slice(descriptor));
+ }
+
+ public long adjustEntropyDomain(long descriptor)
+ {
+ // We can't simply trim the value here, mostly because of the values like
+ // long and double that can change the value during normalization in addition
+ // to trimming it.
+ return stitch(slice(descriptor));
+ }
+
+ // Checks whether we need to invert a slice sign to preserve order of the sliced descriptor
+ public boolean shouldInvertSign(int idx)
+ {
+ int maxSliceSize = columns.get(idx).generator().byteSize();
+ int actualSliceSize = sizes[idx];
+
+ if (idx == 0)
+ {
+ // Signed representation of the first value would follow the sorting of the
+ // long value itself, which means that we have invert sign of the first piece if:
+ // * not all entropy bytes are consumed
+ // * we do not have enough entropy bytes to set the sign bit of the value
+ // TODO: I think maxPieceSize != pieceSize is not right here; it should be <= ???
+ return totalSize != Long.BYTES || maxSliceSize != actualSliceSize;
+ }
+
+ // We invert sign of all subsequent chunks if their signs match
+ return maxSliceSize == actualSliceSize;
+ }
+
+ public long[] slice(long descriptor)
+ {
+ long[] pieces = new long[sizes.length];
+ long pos = totalSize;
+ for (int i = 0; i < sizes.length; i++)
+ {
+ final int size = sizes[i];
+ long piece = descriptor >> ((pos - size) * Byte.SIZE);
+
+ piece &= Bytes.bytePatternFor(size);
+
+ if (shouldInvertSign(i))
+ piece ^= Bytes.signMaskFor(size);
+
+ piece = columns.get(i).adjustEntropyDomain(piece);
+
+ pieces[i] = piece;
+ pos -= size;
+ }
+ return pieces;
+ }
+
+ public long stitch(long[] parts)
+ {
+ long stitched = 0;
+ int consumed = 0;
+ for (int i = sizes.length - 1; i >= 0; i--)
+ {
+ int size = sizes[i];
+ long piece = parts[i];
+
+ if (shouldInvertSign(i))
+ piece ^= Bytes.signMaskFor(size);
+
+ piece &= Bytes.bytePatternFor(size);
+ stitched |= piece << (consumed * Byte.SIZE);
+ consumed += size;
+ }
+ return stitched;
+ }
+
+ protected long minValueInternal(int idx)
+ {
+ int size = sizes[idx];
+ long res = 0;
+ if (shouldInvertSign(idx))
+ res ^= Bytes.signMaskFor(size);
+ return res;
+ }
+
+ protected long maxValueInternal(int idx)
+ {
+ int size = sizes[idx];
+ long res = Bytes.bytePatternFor(size);
+ if (shouldInvertSign(idx))
+ res ^= Bytes.signMaskFor(size);
+ return res;
+ }
+
+ public int byteSize()
+ {
+ return totalSize;
+ }
+
+ public int compare(long l, long r)
+ {
+ return Long.compare(l, r);
+ }
+ }
+}
\ No newline at end of file
diff --git a/harry-core/src/harry/generators/Generator.java b/harry-core/src/harry/generators/Generator.java
new file mode 100644
index 0000000..d787ce4
--- /dev/null
+++ b/harry-core/src/harry/generators/Generator.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.generators;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.function.BiFunction;
+import java.util.function.Function;
+import java.util.function.Supplier;
+
+public interface Generator<T>
+{
+ // It might be better if every generator has its own independent rng (or even implementation-dependent rng).
+ // This way we can have simpler interface: generate a value from the long and invertible iterator does the opposite.
+ // More things can be invertible this way. For example, entire primary keys can be invertible.
+ T generate(RandomGenerator rng);
+
+ default List<T> generate(RandomGenerator rng, int n)
+ {
+ List<T> res = new ArrayList<>(n);
+ for (int i = 0; i < n; i++)
+ res.add(generate(rng));
+ return res;
+ }
+
+ default Surjections.Surjection<T> toSurjection(long streamId)
+ {
+ return (current) -> {
+ RandomGenerator rng = new PcgRSUFast(current, streamId);
+ return generate(rng);
+ };
+ }
+
+ // TODO: this is only applicable to surjections, it seems
+ public static class Value<T>
+ {
+ public final long descriptor;
+ public final T value;
+
+ public Value(long descriptor, T value)
+ {
+ this.descriptor = descriptor;
+ this.value = value;
+ }
+
+ public String toString()
+ {
+ return "Value{" +
+ "descriptor=" + descriptor +
+ ", value=" + (value.getClass().isArray() ? Arrays.toString((Object[]) value) : value) + '}';
+ }
+ }
+
+ default Supplier<T> bind(RandomGenerator rand)
+ {
+ return () -> generate(rand);
+ }
+
+ default <T1> Generator<T1> map(Function<T, T1> map)
+ {
+ return (rand) -> map.apply(generate(rand));
+ }
+
+ default <T1> Generator<T1> flatMap(Function<T, Generator<T1>> fmap)
+ {
+ return (rand) -> fmap.apply(generate(rand)).generate(rand);
+ }
+
+ default <T2, T3> Generator<T3> zip(Generator<T2> g1, BiFunction<T, T2, T3> map)
+ {
+ return (rand) -> map.apply(Generator.this.generate(rand), g1.generate(rand));
+ }
+
+ default <T2, T3, T4> Generator<T4> zip(Generator<T2> g1, Generator<T3> g2, TriFunction<T, T2, T3, T4> map)
+ {
+ return (rand) -> map.apply(Generator.this.generate(rand), g1.generate(rand), g2.generate(rand));
+ }
+
+ default <T2, T3, T4, T5> Generator<T5> zip(Generator<T2> g1, Generator<T3> g2, Generator<T4> g3, QuatFunction<T, T2, T3, T4, T5> map)
+ {
+ return (rand) -> map.apply(Generator.this.generate(rand), g1.generate(rand), g2.generate(rand), g3.generate(rand));
+ }
+
+ default <T2, T3, T4, T5, T6> Generator<T6> zip(Generator<T2> g1, Generator<T3> g2, Generator<T4> g3, Generator<T5> g4, QuinFunction<T, T2, T3, T4, T5, T6> map)
+ {
+ return (rand) -> map.apply(Generator.this.generate(rand), g1.generate(rand), g2.generate(rand), g3.generate(rand), g4.generate(rand));
+ }
+
+ default <T2, T3, T4, T5, T6, T7> Generator<T7> zip(Generator<T2> g1, Generator<T3> g2, Generator<T4> g3, Generator<T5> g4, Generator<T6> g5, SechFunction<T, T2, T3, T4, T5, T6, T7> map)
+ {
+ return (rand) -> map.apply(Generator.this.generate(rand), g1.generate(rand), g2.generate(rand), g3.generate(rand), g4.generate(rand), g5.generate(rand));
+ }
+
+
+ public interface TriFunction<T1, T2, T3, RES>
+ {
+ RES apply(T1 t1, T2 t2, T3 t3);
+ }
+
+ public interface QuatFunction<T1, T2, T3, T4, RES>
+ {
+ RES apply(T1 t1, T2 t2, T3 t3, T4 t4);
+ }
+
+ public interface QuinFunction<T1, T2, T3, T4, T5, RES>
+ {
+ RES apply(T1 t1, T2 t2, T3 t3, T4 t4, T5 t5);
+ }
+
+ public interface SechFunction<T1, T2, T3, T4, T5, T6, RES>
+ {
+ RES apply(T1 t1, T2 t2, T3 t3, T4 t4, T5 t5, T6 t6);
+ }
+}
\ No newline at end of file
diff --git a/harry-core/src/harry/generators/Generators.java b/harry-core/src/harry/generators/Generators.java
new file mode 100644
index 0000000..397c1bc
--- /dev/null
+++ b/harry-core/src/harry/generators/Generators.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.generators;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.function.Supplier;
+
+public class Generators
+{
+ // TODO: test
+ public static <T> Generator<T> pick(List<T> ts)
+ {
+ return (rng) -> {
+ return ts.get(rng.nextInt(0, ts.size() - 1));
+ };
+ }
+
+ public static <T> Generator<T> pick(T... ts)
+ {
+ return pick(Arrays.asList(ts));
+ }
+
+ // TODO: test
+ public static <T> Generator<List<T>> subsetGenerator(List<T> list)
+ {
+ return subsetGenerator(list, 0, list.size() - 1);
+ }
+
+ // TODO: test
+ public static <T> Generator<List<T>> subsetGenerator(List<T> list, int minSize, int maxSize)
+ {
+ return (rng) -> {
+ int count = rng.nextInt(minSize, maxSize);
+ Set<T> set = new HashSet<>();
+ for (int i = 0; i < count; i++)
+ set.add(list.get(rng.nextInt(minSize, maxSize)));
+
+ return (List<T>) new ArrayList<>(set);
+ };
+ }
+
+ // TODO: test
+ public static <T extends Enum<T>> Generator<T> enumValues(Class<T> e)
+ {
+ return pick(Arrays.asList(e.getEnumConstants()));
+ }
+
+ // TODO: test
+ public static <T> Generator<List<T>> list(Generator<T> of, int maxSize)
+ {
+ return list(of, 0, maxSize);
+ }
+
+ // TODO: test
+ public static <T> Generator<List<T>> list(Generator<T> of, int minSize, int maxSize)
+ {
+ return (rng) -> {
+ int count = rng.nextInt(minSize, maxSize);
+ return of.generate(rng, count);
+ };
+ }
+
+ public static <T> Generator<T> constant(T constant)
+ {
+ return (random) -> constant;
+ }
+
+ public static <T> Generator<T> constant(Supplier<T> constant)
+ {
+ return (random) -> constant.get();
+ }
+}
\ No newline at end of file
diff --git a/harry-core/src/harry/generators/PCGFastPure.java b/harry-core/src/harry/generators/PCGFastPure.java
new file mode 100644
index 0000000..48bfae3
--- /dev/null
+++ b/harry-core/src/harry/generators/PCGFastPure.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.generators;
+
+/**
+ * Immutable / pure implementaiton of PCG.
+ * <p>
+ * Base algorithm translated from C/C++ code:
+ * https://github.com/imneme/pcg-c
+ * https://github.com/imneme/pcg-cpp
+ * <p>
+ * Original library developed by Melissa O'Neill <on...@pcg-random.org>
+ */
+public class PCGFastPure
+{
+ private static final long NEXT_MULTIPLIER = 6364136223846793005L;
+
+ private static final long XORSHIFT_MULTIPLIER = Long.parseUnsignedLong("12605985483714917081");
+ private static final long XORSHIFT_UNMULTIPLIER = Long.parseUnsignedLong("15009553638781119849");
+
+ public static long advance(long generated, long steps, long stream)
+ {
+ return shuffle(advanceState(unshuffle(generated), steps, stream));
+ }
+
+ public static long streamIncrement(long stream)
+ {
+ return (stream << 1) | 1;
+ }
+
+ public static long advanceState(long state, long steps, long stream)
+ {
+ long acc_mult = 1;
+ long acc_plus = 0;
+
+ long cur_plus = streamIncrement(stream);
+ long cur_mult = NEXT_MULTIPLIER;
+
+ while (Long.compareUnsigned(steps, 0) > 0)
+ {
+ if ((steps & 1) == 1)
+ {
+ acc_mult *= cur_mult;
+ acc_plus = acc_plus * cur_mult + cur_plus;
+ }
+ cur_plus *= (cur_mult + 1);
+ cur_mult *= cur_mult;
+ steps = Long.divideUnsigned(steps, 2);
+ }
+ return (acc_mult * state) + acc_plus;
+ }
+
+ public static long next(long state, long stream)
+ {
+ return shuffle(nextState(unshuffle(state), stream));
+ }
+
+ public static long previous(long state, long stream)
+ {
+ return advance(state, -1, stream);
+ }
+
+ public static long nextState(long state, long stream)
+ {
+ return (state * NEXT_MULTIPLIER) + streamIncrement(stream);
+ }
+
+ public static long distance(long curState, long newState, long stream)
+ {
+ if (curState == newState)
+ return 0;
+
+ long curPlus = streamIncrement(stream);
+ long curMult = NEXT_MULTIPLIER;
+
+ long bit = 1L;
+ long distance = 0;
+
+ while (curState != newState)
+ {
+ if ((curState & bit) != (newState & bit))
+ {
+ curState = curState * curMult + curPlus;
+ distance |= bit;
+ }
+ assert ((curState & bit) == (newState & bit));
+ bit <<= 1;
+ curPlus = (curMult + 1) * curPlus;
+ curMult *= curMult;
+ }
+
+ return distance - 1;
+ }
+
+ public static long shuffle(long state)
+ {
+ long word = ((state >>> ((state >>> 59) + 5)) ^ state) * XORSHIFT_MULTIPLIER;
+ return (word >>> 43) ^ word;
+ }
+
+ public static long unshuffle(long shuffled)
+ {
+ long word = shuffled;
+ word = unxorshift(word, 43);
+ word *= XORSHIFT_UNMULTIPLIER;
+ word = unxorshift(word, (int) (word >>> 59) + 5);
+ return word;
+ }
+
+ public static long unxorshift(long x, int shift)
+ {
+ return unxorshift(x, 64, shift);
+ }
+
+ public static long unxorshift(long x, int bits, int shift)
+ {
+ if (2 * shift >= bits)
+ return x ^ (x >>> shift);
+
+ long lowmask1 = (1L << (bits - shift * 2)) - 1L;
+ long bottomBits = x & lowmask1;
+ long topBits = (x ^ (x >>> shift)) & ~lowmask1;
+ x = topBits | bottomBits;
+
+ long lowmask2 = (1L << (bits - shift)) - 1L;
+ bottomBits = unxorshift(x & lowmask2, bits - shift, shift) & lowmask1;
+ return topBits | bottomBits;
+ }
+}
\ No newline at end of file
diff --git a/harry-core/src/harry/generators/PcgRSUFast.java b/harry-core/src/harry/generators/PcgRSUFast.java
new file mode 100644
index 0000000..34ffb62
--- /dev/null
+++ b/harry-core/src/harry/generators/PcgRSUFast.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.generators;
+
+public class PcgRSUFast implements RandomGenerator
+{
+ private long state;
+ private long step;
+
+ /**
+ * Stream number of the rng.
+ */
+ private final long stream;
+
+ public PcgRSUFast(long seed, long streamNumber)
+ {
+ this.stream = (streamNumber << 1) | 1; // 2* + 1
+ seed(seed);
+ }
+
+ public void seed(long seed)
+ {
+ state = RngUtils.xorshift64star(seed) + stream;
+ }
+
+ public void advance(long steps)
+ {
+ this.step += steps;
+ this.state = PCGFastPure.advanceState(state, steps, stream);
+ }
+
+ protected void nextStep()
+ {
+ state = PCGFastPure.nextState(state, stream);
+ step++;
+ }
+
+ public void seek(long step)
+ {
+ advance(step - this.step);
+ }
+
+ public long next()
+ {
+ nextStep();
+ return PCGFastPure.shuffle(state);
+ }
+
+ public long distance(long generated)
+ {
+ return PCGFastPure.distance(state, PCGFastPure.unshuffle(generated), stream);
+ }
+
+ public long distance(PcgRSUFast other)
+ {
+ // Check if they are the same stream...
+ if (stream != other.stream)
+ {
+ throw new IllegalArgumentException("Can not compare generators with different " +
+ "streams. Those generators will never converge");
+ }
+
+ return PCGFastPure.distance(state, other.state, stream);
+ }
+}
\ No newline at end of file
diff --git a/harry-core/src/harry/generators/RandomGenerator.java b/harry-core/src/harry/generators/RandomGenerator.java
new file mode 100644
index 0000000..869f60e
--- /dev/null
+++ b/harry-core/src/harry/generators/RandomGenerator.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.generators;
+
+import com.google.common.annotations.VisibleForTesting;
+
+
+/**
+ * Random generator interface that offers:
+ * * Settable seed
+ * * Ability to generate multiple "next" random seeds
+ * * Ability to generate multiple "dependent" seeds, from which we can retrace the base seed with subtraction
+ */
+public interface RandomGenerator
+{
+
+ long next();
+
+ default long[] next(int n)
+ {
+ long[] next = new long[n];
+ for (int i = 0; i < n; i++)
+ next[i] = next();
+ return next;
+ }
+
+ void seed(long seed);
+
+ void seek(long step);
+
+ default int nextInt()
+ {
+ return RngUtils.asInt(next());
+ }
+
+ default int nextInt(int max)
+ {
+ return RngUtils.asInt(next(), max);
+ }
+
+ default int nextInt(int min, int max)
+ {
+ return RngUtils.asInt(next(), min, max);
+ }
+
+ default boolean nextBoolean()
+ {
+ return RngUtils.asBoolean(next());
+ }
+
+
+ default long nextAt(long step)
+ {
+ seek(step);
+ return next();
+ }
+
+ @VisibleForTesting
+ public static RandomGenerator forTests()
+ {
+ return forTests(System.currentTimeMillis());
+ }
+
+ @VisibleForTesting
+ public static RandomGenerator forTests(long seed)
+ {
+ System.out.println("Seed: " + seed);
+ return new PcgRSUFast(seed, 1);
+ }
+}
+
diff --git a/harry-core/src/harry/generators/RngUtils.java b/harry-core/src/harry/generators/RngUtils.java
new file mode 100644
index 0000000..92b97ee
--- /dev/null
+++ b/harry-core/src/harry/generators/RngUtils.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.generators;
+
+public class RngUtils
+{
+ public static long next(long input)
+ {
+ return xorshift64star(input);
+ }
+
+ public static long xorshift64star(long input)
+ {
+ input ^= input >> 12;
+ input ^= input << 25; // b
+ input ^= input >> 27; // c
+ return input * 0x2545F4914F6CDD1DL;
+ }
+
+ public static long[] next(long current, int n)
+ {
+ long[] next = new long[n];
+ for (int i = 0; i < n; i++)
+ {
+ current = next(current);
+ next[i] = current;
+ }
+ return next;
+ }
+
+ public static byte[] asBytes(long current)
+ {
+ byte[] bytes = new byte[Long.BYTES];
+ for (int i = 0; i < Long.BYTES; i++)
+ {
+ bytes[i] = (byte) (current & 0xFF);
+ current >>= current;
+ }
+ return bytes;
+ }
+
+ public static byte asByte(long current)
+ {
+ return (byte) current;
+ }
+
+ public static int asInt(long current)
+ {
+ return (int) current;
+ }
+
+ // TODO: this needs some improvement
+ public static int asInt(long current, int max)
+ {
+ return Math.abs((int) current % (max + 1));
+ }
+
+ // Generate a value in [min, max] range: from min _inclusive_ to max _inclusive_.
+ public static int asInt(long current, int min, int max)
+ {
+ if (min == max)
+ return min;
+ return min + asInt(current, max - min);
+ }
+
+ public static boolean asBoolean(long current)
+ {
+ return (current & 1) == 1;
+ }
+
+ public static float asFloat(long current)
+ {
+ return Float.intBitsToFloat((int) current);
+ }
+
+ public static double asDouble(long current)
+ {
+ return Double.longBitsToDouble(current);
+ }
+}
diff --git a/harry-core/src/harry/generators/StringBijection.java b/harry-core/src/harry/generators/StringBijection.java
new file mode 100644
index 0000000..7a7b8c2
--- /dev/null
+++ b/harry-core/src/harry/generators/StringBijection.java
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.generators;
+
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+import java.util.TreeSet;
+
+// TODO: document
+public class StringBijection implements Bijections.Bijection<String>
+{
+ public static final int NIBBLES_SIZE = 256;
+ private final String[] nibbles;
+ private final Map<String, Integer> inverse;
+ private final int nibbleSize;
+ private final int maxRandomNibbles;
+
+ public StringBijection()
+ {
+ this(alphabetNibbles(8), 8, 10);
+ }
+
+ public StringBijection(int nibbleSize, int maxRandomNibbles)
+ {
+ this(alphabetNibbles(nibbleSize), nibbleSize, maxRandomNibbles);
+ }
+
+ public StringBijection(String[] nibbles, int nibbleSize, int maxRandomNibbles)
+ {
+ assert nibbles.length == NIBBLES_SIZE;
+ this.nibbles = nibbles;
+ this.inverse = new HashMap<>();
+ this.nibbleSize = nibbleSize;
+
+ for (int i = 0; i < nibbles.length; i++)
+ {
+ assert nibbles[i].length() == nibbleSize;
+ inverse.put(nibbles[i], i);
+ }
+
+ this.maxRandomNibbles = maxRandomNibbles;
+ }
+
+ public String inflate(long descriptor)
+ {
+ StringBuilder builder = new StringBuilder();
+ for (int i = 0; i < Long.BYTES; i++)
+ {
+ int idx = getByte(descriptor, i);
+ builder.append(nibbles[idx]);
+ }
+
+ appendRandomNibbles(builder, descriptor);
+
+ // everything after this point can be just random, since strings are guaranteed
+ // to have unique prefixes
+ return builder.toString();
+ }
+
+ public static int getByte(long l, int idx)
+ {
+ int b = (int) ((l >> (Long.BYTES - idx - 1) * Byte.SIZE) & 0xff);
+ if (idx == 0)
+ b ^= 0x80;
+ return b;
+ }
+
+
+ // TODO: shuld we switch to PCG here, too?
+ private void appendRandomNibbles(StringBuilder builder, long descriptor)
+ {
+ long rnd = RngUtils.next(descriptor);
+ int count = RngUtils.asInt(rnd, 0, maxRandomNibbles);
+
+ for (int i = 0; i < count; i++)
+ {
+ rnd = RngUtils.next(rnd);
+ builder.append(nibbles[RngUtils.asByte(rnd) & 0xff]);
+ }
+ }
+
+ public long deflate(String descriptor)
+ {
+ long res = 0;
+ for (int i = 0; i < Long.BYTES; i++)
+ {
+ String nibble = descriptor.substring(nibbleSize * i, nibbleSize * (i + 1));
+ assert inverse.containsKey(nibble) : String.format("Bad string: %s, %s", nibble, descriptor);
+ long idx = inverse.get(nibble);
+ if (i == 0)
+ idx ^= 0x80;
+ res |= idx << (Long.BYTES - i - 1) * Byte.SIZE;
+ }
+ return res;
+ }
+
+ public int compare(long l, long r)
+ {
+ for (int i = 0; i < Long.BYTES; i++)
+ {
+ int cmp = Integer.compare(getByte(l, i),
+ getByte(r, i));
+ if (cmp != 0)
+ return cmp;
+ }
+ return 0;
+ }
+
+ public int byteSize()
+ {
+ return Long.BYTES;
+ }
+
+ @Override
+ public String toString()
+ {
+ return "ascii(" +
+ "nibbleSize=" + nibbleSize +
+ ", maxRandomNibbles=" + maxRandomNibbles +
+ ')';
+ }
+
+ // TODO: create a method that would generate random nibbles, and return them sorted
+ private final static char[] characters = { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z' };
+
+ // We don't really care much about algorithmic complexity of this method since it's called only once during startup
+ public static String[] alphabetNibbles(int nibbleSize)
+ {
+ Random rnd = new Random(1);
+
+ // We need to generate 256 _unique_ strings; ideally we don't want to re-sort them
+ Set<String> strings = new TreeSet<String>();
+ while (strings.size() < NIBBLES_SIZE)
+ {
+ char[] chars = new char[nibbleSize];
+ for (int j = 0; j < nibbleSize; j++)
+ chars[j] = characters[rnd.nextInt(characters.length)];
+
+ strings.add(new String(chars));
+ }
+
+ assert strings.size() == NIBBLES_SIZE : strings.size();
+ String[] nibbles = new String[NIBBLES_SIZE];
+ Iterator<String> iter = strings.iterator();
+ for (int i = 0; i < NIBBLES_SIZE; i++)
+ nibbles[i] = iter.next();
+
+ return nibbles;
+ }
+}
diff --git a/harry-core/src/harry/generators/Surjections.java b/harry-core/src/harry/generators/Surjections.java
new file mode 100644
index 0000000..1caf71d
--- /dev/null
+++ b/harry-core/src/harry/generators/Surjections.java
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.generators;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.function.Function;
+import java.util.function.LongFunction;
+import java.util.function.Supplier;
+
+public class Surjections
+{
+ public static <T> Surjection<T> constant(Supplier<T> constant)
+ {
+ return (current) -> constant.get();
+ }
+
+ public static <T> Surjection<T> constant(T constant)
+ {
+ return (current) -> constant;
+ }
+
+ public static <T> Surjection<T> pick(List<T> ts)
+ {
+ return new Surjection<T>()
+ {
+ public T inflate(long current)
+ {
+ return ts.get(RngUtils.asInt(current, 0, ts.size() - 1));
+ }
+
+ @Override
+ public String toString()
+ {
+ return String.format("Surjection#pick{from=%s}", ts);
+ }
+ };
+ }
+
+ public static long[] weights(int... weights)
+ {
+ long[] res = new long[weights.length];
+ int sum = 0;
+ for (int i = 0; i < weights.length; i++)
+ {
+ long w = weights[i];
+ sum += w;
+ res[i] = w << 32 | i;
+ }
+ assert sum == 100;
+
+ return res;
+ }
+
+ public static <T> Surjection<T> weighted(int[] weights, T... items)
+ {
+ return weighted(weights(weights), items);
+ }
+
+ public static <T> Surjection<T> weighted(long[] weights, T... items)
+ {
+ assert weights.length == items.length;
+
+ Arrays.sort(weights);
+ TreeMap<Integer, T> weightMap = new TreeMap<Integer, T>();
+ int prev = 0;
+ for (int i = 0; i < weights.length; i++)
+ {
+ long orig = weights[i];
+ int weight = (int) (orig >> 32);
+ int idx = (int) orig;
+ weightMap.put(prev, items[idx]);
+ prev += weight;
+ }
+
+ return (i) -> {
+ int weight = RngUtils.asInt(i, 0, 100);
+ return weightMap.floorEntry(weight).getValue();
+ };
+ }
+
+ public static <T> Surjection<T> weighted(Map<T, Integer> weights)
+ {
+ TreeMap<Integer, T> weightMap = new TreeMap<Integer, T>();
+ int sum = 0;
+ for (Map.Entry<T, Integer> entry : weights.entrySet())
+ {
+ sum += entry.getValue();
+ weightMap.put(sum, entry.getKey());
+ }
+
+ assert sum == 100;
+ return (i) -> {
+ int weight = RngUtils.asInt(i, 0, 100);
+ return weightMap.ceilingEntry(weight).getValue();
+ };
+ }
+
+ public static <T> Surjection<T> pick(T... ts)
+ {
+ return pick(Arrays.asList(ts));
+ }
+
+ public static <T extends Enum<T>> Surjection<T> enumValues(Class<T> e)
+ {
+ return pick(Arrays.asList(e.getEnumConstants()));
+ }
+
+ public interface Surjection<T>
+ {
+ T inflate(long descriptor);
+
+ default <T1> Surjection<T1> map(Function<T, T1> map)
+ {
+ return (current) -> map.apply(inflate(current));
+ }
+
+ default LongFunction<T> toFn()
+ {
+ return new LongFunction<T>()
+ {
+ public T apply(long value)
+ {
+ return inflate(value);
+ }
+ };
+ }
+
+ default Generator<T> toGenerator()
+ {
+ return new Generator<T>()
+ {
+ public T generate(RandomGenerator rng)
+ {
+ return inflate(rng.next());
+ }
+ };
+ }
+ }
+}
diff --git a/harry-core/src/harry/generators/distribution/Distribution.java b/harry-core/src/harry/generators/distribution/Distribution.java
new file mode 100644
index 0000000..a89b547
--- /dev/null
+++ b/harry-core/src/harry/generators/distribution/Distribution.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.generators.distribution;
+
+import harry.core.Configuration;
+
+public interface Distribution
+{
+ public Configuration.DistributionConfig toConfig();
+
+ public interface DistributionFactory
+ {
+ public Distribution make();
+ }
+
+ public long skew(long i);
+
+ public class IdentityDistribution implements Distribution
+ {
+ public Configuration.DistributionConfig toConfig()
+ {
+ return new Configuration.IdentityDistributionConfig();
+ }
+
+ public long skew(long i)
+ {
+ return i;
+ }
+ }
+
+ public class ConstantDistribution implements Distribution
+ {
+ public final long constant;
+
+ public ConstantDistribution(long constant)
+ {
+ this.constant = constant;
+ }
+
+ public Configuration.DistributionConfig toConfig()
+ {
+ return new Configuration.ConstantDistributionConfig(constant);
+ }
+
+ public long skew(long i)
+ {
+ return constant;
+ }
+
+ public String toString()
+ {
+ return "ConstantDistribution{" +
+ "constant=" + constant +
+ '}';
+ }
+ }
+
+ public class ScaledDistribution implements Distribution
+ {
+ private final long min;
+ private final long max;
+
+ public ScaledDistribution(long min, long max)
+ {
+ this.min = min;
+ this.max = max;
+ }
+
+ public Configuration.DistributionConfig toConfig()
+ {
+ return new Configuration.ScaledDistributionConfig(min, max);
+ }
+
+ public long skew(long i)
+ {
+ return scale(i, min, max);
+ }
+
+ public static long scale(long value, long min, long max)
+ {
+ if (value == 0)
+ return (max - min) / 2;
+
+ double nomalized = (1.0 * Math.abs(value)) / Long.MAX_VALUE;
+ double diff = 0.5 * (max - min);
+ if (value > 0)
+ return (long) (min + nomalized * diff);
+ else
+ return (long) (max - nomalized * diff);
+ }
+
+ public String toString()
+ {
+ return "ScaledDistribution{" +
+ "min=" + min +
+ ", max=" + max +
+ '}';
+ }
+ }
+
+ // TODO: test and graph!
+ public class NormalDistribution implements Distribution
+ {
+ private final org.apache.commons.math3.distribution.NormalDistribution delegate;
+
+ public NormalDistribution()
+ {
+ delegate = new org.apache.commons.math3.distribution.NormalDistribution();
+ }
+
+ public Configuration.DistributionConfig toConfig()
+ {
+ return new Configuration.NormalDistributionConfig();
+ }
+
+ public long skew(long i)
+ {
+ return (long) delegate.cumulativeProbability((double) i);
+ }
+ }
+}
diff --git a/harry-core/src/harry/model/DataTracker.java b/harry-core/src/harry/model/DataTracker.java
new file mode 100644
index 0000000..78815d5
--- /dev/null
+++ b/harry-core/src/harry/model/DataTracker.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.model;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.PriorityBlockingQueue;
+import java.util.concurrent.atomic.AtomicLong;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class DataTracker
+{
+ private static final Logger logger = LoggerFactory.getLogger(DataTracker.class);
+
+ private final AtomicLong maxSeenLts;
+ // TODO: This is a trivial implementation that can be significantly improved upon
+ // for example, we could use a bitmap that records `1`s for all lts that are after
+ // the consective, and "collapse" the bitmap state into the long as soon as we see
+ // consecutive `1` on the left side.
+ private final AtomicLong maxCompleteLts;
+ private final PriorityBlockingQueue<Long> reorderBuffer;
+
+ DataTracker()
+ {
+ this.maxSeenLts = new AtomicLong(-1);
+ this.maxCompleteLts = new AtomicLong(-1);
+ this.reorderBuffer = new PriorityBlockingQueue<>(100);
+ }
+
+ // TODO: there's also some room for improvement in terms of concurrency
+ // TODO: remove pd?
+ public void recordEvent(long lts, boolean quorumAchieved)
+ {
+ // all seen LTS are allowed to be "in-flight"
+ maxSeenLts.getAndUpdate((old) -> Math.max(lts, old));
+
+ if (!quorumAchieved)
+ return;
+
+ long maxAchievedConsecutive = drainReorderQueue();
+
+ if (maxAchievedConsecutive + 1 == lts)
+ maxCompleteLts.compareAndSet(maxAchievedConsecutive, lts);
+ else
+ reorderBuffer.offer(lts);
+ }
+
+ public long drainReorderQueue()
+ {
+ long expected = maxCompleteLts.get();
+ long maxAchievedConsecutive = expected;
+ if (reorderBuffer.isEmpty())
+ return maxAchievedConsecutive;
+
+ boolean catchingUp = false;
+
+ Long smallest = reorderBuffer.poll();
+ while (smallest != null && smallest == maxAchievedConsecutive + 1)
+ {
+ maxAchievedConsecutive++;
+ catchingUp = true;
+ smallest = reorderBuffer.poll();
+ }
+
+ // put back
+ if (smallest != null)
+ reorderBuffer.offer(smallest);
+
+ if (catchingUp)
+ maxCompleteLts.compareAndSet(expected, maxAchievedConsecutive);
+
+ int bufferSize = reorderBuffer.size();
+ if (bufferSize > 100)
+ logger.warn("Reorder buffer size has grown up to " + reorderBuffer.size());
+ return maxAchievedConsecutive;
+ }
+
+ public long maxSeenLts()
+ {
+ return maxSeenLts.get();
+ }
+
+ public long maxCompleteLts()
+ {
+ return maxCompleteLts.get();
+ }
+
+ @VisibleForTesting
+ void forceLts(long maxSeen, long maxComplete)
+ {
+ this.maxSeenLts.set(maxSeen);
+ this.maxCompleteLts.set(maxComplete);
+ }
+
+ public String toString()
+ {
+ List<Long> buf = new ArrayList<>(reorderBuffer);
+ return "DataTracker{" +
+ "maxSeenLts=" + maxSeenLts +
+ ", maxCompleteLts=" + maxCompleteLts +
+ ", reorderBuffer=" + buf +
+ '}';
+ }
+}
diff --git a/harry-core/src/harry/model/DescriptorSelectorBuilder.java b/harry-core/src/harry/model/DescriptorSelectorBuilder.java
new file mode 100644
index 0000000..f34484f
--- /dev/null
+++ b/harry-core/src/harry/model/DescriptorSelectorBuilder.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.model;
+
+import java.util.Objects;
+import java.util.function.Function;
+
+import harry.core.Configuration;
+import harry.ddl.SchemaSpec;
+import harry.generators.Surjections;
+import harry.generators.distribution.Distribution;
+import harry.util.BitSet;
+
+import static harry.model.OpSelectors.DefaultDescriptorSelector.DEFAULT_OP_TYPE_SELECTOR;
+
+public class DescriptorSelectorBuilder implements Configuration.CDSelectorConfiguration
+{
+ private Function<SchemaSpec, Function<OpSelectors.OperationKind, Surjections.Surjection<BitSet>>> columnMaskSelector;
+ private Surjections.Surjection<OpSelectors.OperationKind> operationTypeSelector = DEFAULT_OP_TYPE_SELECTOR;
+ private Distribution numberOfRowsDistribution = new Distribution.ScaledDistribution(2, 30);
+ private Distribution numberOfModificationsDistribution = new Distribution.ScaledDistribution(1, 3);
+ private int maxPartitionSize = Integer.MAX_VALUE;
+ private Function<SchemaSpec, int[]> fractionsSupplier = null;
+
+ public DescriptorSelectorBuilder setFractions(int[] fractions)
+ {
+ this.fractionsSupplier = (schema) -> fractions;
+ return this;
+ }
+
+ public DescriptorSelectorBuilder setFractions(Function<SchemaSpec, int[]> fractions)
+ {
+ this.fractionsSupplier = fractions;
+ return this;
+ }
+
+ public DescriptorSelectorBuilder setColumnMaskSelector(Surjections.Surjection<BitSet> selector)
+ {
+ this.columnMaskSelector = (schemaSpec) -> new OpSelectors.ColumnSelectorBuilder().forAll(selector).build();
+ return this;
+ }
+
+ public DescriptorSelectorBuilder setColumnMaskSelector(Function<SchemaSpec, Function<OpSelectors.OperationKind, Surjections.Surjection<BitSet>>> columnMaskSelector)
+ {
+ this.columnMaskSelector = Objects.requireNonNull(columnMaskSelector, "mask");
+ return this;
+ }
+
+ public DescriptorSelectorBuilder setOperationTypeSelector(Surjections.Surjection<OpSelectors.OperationKind> operationTypeSelector)
+ {
+ this.operationTypeSelector = Objects.requireNonNull(operationTypeSelector, "type");
+ return this;
+ }
+
+ /**
+ * In a given modification, we are only able to visit as many rows as there are rows in the partition, so
+ * we'll always be limited by this.
+ **/
+ public DescriptorSelectorBuilder setRowsPerModificationDistribution(Distribution numberOfRowsDistribution)
+ {
+ this.numberOfRowsDistribution = Objects.requireNonNull(numberOfRowsDistribution, "distribution");
+ return this;
+ }
+
+ public DescriptorSelectorBuilder setNumberOfModificationsDistribution(Distribution numberOfModificationsDistribution)
+ {
+ this.numberOfModificationsDistribution = Objects.requireNonNull(numberOfModificationsDistribution, "distribution");
+ return this;
+ }
+
+ public DescriptorSelectorBuilder setMaxPartitionSize(int maxPartitionSize)
+ {
+ if (maxPartitionSize <= 0)
+ throw new IllegalArgumentException("Max partition size should be positive");
+ this.maxPartitionSize = maxPartitionSize;
+ return this;
+ }
+
+ public OpSelectors.DescriptorSelector make(OpSelectors.Rng rng, SchemaSpec schemaSpec)
+ {
+ return new OpSelectors.DefaultDescriptorSelector(rng,
+ columnMaskSelector.apply(schemaSpec),
+ operationTypeSelector,
+ numberOfModificationsDistribution,
+ numberOfRowsDistribution,
+ maxPartitionSize);
+ }
+}
diff --git a/harry-core/src/harry/model/DoNothingModel.java b/harry-core/src/harry/model/DoNothingModel.java
new file mode 100644
index 0000000..c21392d
--- /dev/null
+++ b/harry-core/src/harry/model/DoNothingModel.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.model;
+
+import harry.core.Configuration;
+import harry.runner.Query;
+
+public class DoNothingModel implements Model
+{
+ public void recordEvent(long lts, boolean quorumAchieved)
+ {
+ }
+
+ public void validatePartitionState(long verificationLts, Query query)
+ {
+ }
+
+ public Configuration.ModelConfiguration toConfig()
+ {
+ throw new RuntimeException("not implemented");
+ }
+}
diff --git a/harry-core/src/harry/model/ExhaustiveChecker.java b/harry-core/src/harry/model/ExhaustiveChecker.java
new file mode 100644
index 0000000..935da45
--- /dev/null
+++ b/harry-core/src/harry/model/ExhaustiveChecker.java
@@ -0,0 +1,647 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.model;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.Objects;
+import java.util.TreeMap;
+import java.util.function.Supplier;
+import java.util.stream.Collectors;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.Iterators;
+import com.google.common.collect.PeekingIterator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import harry.core.Configuration;
+import harry.data.ResultSetRow;
+import harry.ddl.SchemaSpec;
+import harry.model.sut.SystemUnderTest;
+import harry.runner.AbstractPartitionVisitor;
+import harry.runner.PartitionVisitor;
+import harry.runner.Query;
+import harry.runner.QuerySelector;
+import harry.util.BitSet;
+import harry.util.Ranges;
+
+import static harry.generators.DataGenerators.NIL_DESCR;
+import static harry.generators.DataGenerators.UNSET_DESCR;
+
+public class ExhaustiveChecker implements Model
+{
+ private static final Logger logger = LoggerFactory.getLogger(ExhaustiveChecker.class);
+
+ public static LongComparator FORWARD_COMPARATOR = Long::compare;
+ public static LongComparator REVERSE_COMPARATOR = (a, b) -> Long.compare(b, a);
+
+ protected final OpSelectors.DescriptorSelector descriptorSelector;
+ protected final OpSelectors.PdSelector pdSelector;
+ protected final OpSelectors.MonotonicClock clock;
+ protected final SystemUnderTest sut;
+ protected final QuerySelector querySelector;
+
+ protected final DataTracker tracker;
+
+ private final SchemaSpec schema;
+
+ public ExhaustiveChecker(SchemaSpec schema,
+ OpSelectors.PdSelector pdSelector,
+ OpSelectors.DescriptorSelector descriptorSelector,
+ OpSelectors.MonotonicClock clock,
+ QuerySelector querySelector,
+ SystemUnderTest sut)
+ {
+ this.descriptorSelector = descriptorSelector;
+ this.pdSelector = pdSelector;
+ this.tracker = new DataTracker();
+ this.schema = schema;
+ this.sut = sut;
+ this.clock = clock;
+ this.querySelector = querySelector;
+ }
+
+ public void recordEvent(long lts, boolean quorumAchieved)
+ {
+ tracker.recordEvent(lts, quorumAchieved);
+ }
+
+ public DataTracker tracker()
+ {
+ return tracker;
+ }
+
+ public void validatePartitionState(long validationLts, Query query)
+ {
+ validatePartitionState(validationLts,
+ query,
+ () -> {
+ while (!Thread.currentThread().isInterrupted())
+ {
+ try
+ {
+ return SelectHelper.execute(sut, clock, query);
+ }
+ catch (Throwable t)
+ {
+ logger.error(String.format("Caught error while trying execute query %s", query),
+ t);
+ }
+ }
+ throw new RuntimeException("Interrupted");
+ });
+ }
+
+ void validatePartitionState(long validationLts, Query query, Supplier<List<ResultSetRow>> rowsSupplier)
+ {
+ // Before we execute SELECT, we know what was the lts of operation that is guaranteed to be visible
+ long visibleLtsBound = tracker.maxCompleteLts();
+
+ // TODO: when we implement a reorder buffer through a bitmap, we can just grab a bitmap _before_,
+ // and know that a combination of `consecutive` + `bitmap` gives us _all possible guaranteed-to-be-seen_ values
+ List<ResultSetRow> rows = rowsSupplier.get();
+
+ // by the time SELECT done, we grab max "possible" lts
+ long inFlightLtsBound = tracker.maxSeenLts();
+ PartitionState partitionState = inflatePartitionState(validationLts, inFlightLtsBound, query);
+ NavigableMap<Long, List<Operation>> operations = partitionState.operations;
+ LongComparator cmp = FORWARD_COMPARATOR;
+ if (query.reverse)
+ {
+ operations = partitionState.operations.descendingMap();
+ cmp = REVERSE_COMPARATOR;
+ }
+
+ if (!rows.isEmpty() && operations.isEmpty())
+ {
+ throw new ValidationException(String.format("Returned rows are not empty, but there were no records in the event log.\nRows: %s\nMax seen LTS: %s\nQuery: %s",
+ rows, inFlightLtsBound, query));
+ }
+
+ PeekingIterator<ResultSetRow> rowIter = Iterators.peekingIterator(rows.iterator());
+
+ // TODO: these two are here only for debugging/logging purposes
+ List<ResultSetRow> validatedRows = new ArrayList<>();
+ List<Long> validatedNoRows = new ArrayList<>();
+ try
+ {
+ for (Map.Entry<Long, List<Operation>> entry : operations.entrySet())
+ {
+ long cd = entry.getKey();
+ Iterator<Operation> modificationIter = entry.getValue().iterator();
+
+ // Found a row that is present both in the model and in the resultset
+ if (rowIter.hasNext() && rowIter.peek().cd == cd)
+ {
+ ResultSetRow row = rowIter.next();
+ RowValidationState validationState = new RowValidationState(row, visibleLtsBound, inFlightLtsBound, partitionState.rangeTombstones);
+
+ // TODO: We only need to go for as long as we explain every column. In fact, we make state _less_ precise by allowing
+ // to continue moving back in time. So far this hasn't proven to be a source of any issues, but we should fix that.
+ // One of the examples is a deletion followed by no writes, or a row write that completely overwrites all columns.
+ while (modificationIter.hasNext())
+ validationState.process(modificationIter.next());
+
+ long minLts = Long.MAX_VALUE;
+ for (int col = 0; col < validationState.causingOperations.length; col++)
+ {
+ long colLts = row.lts[col];
+ if (colLts != NO_TIMESTAMP && colLts < minLts)
+ minLts = colLts;
+
+ long rowValueDescr = row.vds[col];
+ switch (validationState.columnStates[col])
+ {
+ case REMOVED:
+ if (colLts != NO_TIMESTAMP || rowValueDescr != NIL_DESCR)
+ throw new ValidationException("Inconsistency found: value of the column %d was supposed to be removed", col);
+ break;
+ case OBSERVED:
+ if (colLts == NO_TIMESTAMP || rowValueDescr == NIL_DESCR)
+ throw new ValidationException("Inconsistency found: value of the column %d was supposed to be observed", col);
+ break;
+ case UNOBSERVED:
+ if (colLts != NO_TIMESTAMP || rowValueDescr != NIL_DESCR)
+ throw new ValidationException("Inconsistency found: value of the column %d was never written. " +
+ "Row timestamp: %d. " +
+ "Row descriptor: %d",
+ col, colLts, rowValueDescr);
+ }
+ }
+
+ // for any visible row, we have to make sure it is not shadowed by any range tombstones
+ for (Ranges.Range rt : partitionState.rangeTombstones.shadowedBy(cd, minLts))
+ {
+ if (rt.timestamp <= visibleLtsBound)
+ {
+ throw new ValidationException("Row was supposed to be shadowed by the range tombstone." +
+ "\nRow: %s" +
+ "\nRange tombstone: %s" +
+ "\nMin LTS: %d" +
+ "\nVisible LTS Bound: %d",
+ row,
+ rt,
+ minLts,
+ visibleLtsBound);
+ }
+ }
+
+ validatedRows.add(row);
+ }
+ // Modifications for this clustering are are not visible
+ else
+ {
+ validateNoRow(cd, visibleLtsBound, modificationIter, partitionState.rangeTombstones);
+ validatedNoRows.add(cd);
+
+ // Row is not present in the resultset, and we currently look at modifications with a clustering past it
+ if (rowIter.hasNext() && cmp.compare(rowIter.peek().cd, cd) < 0)
+ throw new ValidationException("Couldn't find a corresponding explanation for the row %s in the model. %s",
+ rowIter.next(),
+ partitionState.operations.get(cd));
+ }
+ }
+
+ if (rowIter.hasNext())
+ throw new ValidationException(String.format("Observed unvalidated rows : %s", Iterators.toString(rowIter)));
+ }
+ catch (Throwable t)
+ {
+ throw new ValidationException(String.format("Caught exception while validating the resultset %s." +
+ "\nchecker.tracker().forceLts(%dL, %dL)" +
+ "\nrun.validator.validatePartition(%dL)" +
+ "\nRow Iter Peek: %s" +
+ "\nValidated no rows:\n%s" +
+ "\nValidated rows:\n%s" +
+ "\nRows:\n%s",
+ query,
+ inFlightLtsBound, visibleLtsBound, validationLts,
+ rowIter.hasNext() ? rowIter.peek() : null,
+ validatedNoRows,
+ validatedRows.stream().map(Object::toString).collect(Collectors.joining(",\n")),
+ rows.stream().map(Object::toString).collect(Collectors.joining(",\n"))),
+ t);
+ }
+ }
+
+ // there seems to be some issue here, when validating too many in-flight rows
+ public void validateNoRow(long cd, long visibleLtsBound, Iterator<Operation> ops, Ranges rangeTombstones)
+ {
+ // Row was never written
+ if (!ops.hasNext())
+ return;
+
+ // There should have been at least one removal followed by no live updates
+ List<Operation> visibleWrites = new ArrayList<>();
+ while (ops.hasNext())
+ {
+ Operation op = ops.next();
+ boolean isVisible = op.lts <= visibleLtsBound;
+
+ switch (descriptorSelector.operationType(op.pd, op.lts, op.opId))
+ {
+ // we are going from the newest operation to the oldest one;
+ // eventually, we're getting to some write. if it should have propagated, we save it to overwrites
+ // if we find a row delete
+ case WRITE:
+ if (isVisible)
+ visibleWrites.add(op);
+
+ break;
+ case DELETE_COLUMN:
+ // TODO: continue only in case of non-compact storage. In case of compact storage, deletion of all cells results into row deletion
+ if (!descriptorSelector.columnMask(op.pd, op.lts, op.opId).allUnset())
+ continue;
+ // otherwise, fall through, since we can't distinguish this from row delete
+ case DELETE_ROW:
+ // row delete, followed by any number of non-propagated writes explains why descriptor is not visible
+ if (!visibleWrites.isEmpty())
+ {
+ long newestVisibleLts = visibleWrites.get(0).lts;
+ if (rangeTombstones.isShadowed(cd, newestVisibleLts))
+ return;
+
+ // if we have at least one write not shadowed by delete, we have an error
+ throw new ValidationException("While validating %d, expected row not to be visible: a deletion, followed by no overwrites or all incomplete writes, but found %s." +
+ "\nRange tombstones: %s",
+ cd,
+ visibleWrites,
+ rangeTombstones.newerThan(newestVisibleLts));
+ }
+ return;
+ }
+ }
+
+ if (!visibleWrites.isEmpty())
+ {
+ long newestVisibleLts = visibleWrites.get(0).lts;
+ if (rangeTombstones.isShadowed(cd, newestVisibleLts))
+ return;
+
+ throw new ValidationException("While validating %d, expected row not to be visible: a deletion, followed by no overwrites or all incomplete writes, but found %s." +
+ "\nRange tombstones: %s",
+ cd,
+ visibleWrites,
+ rangeTombstones.newerThan(newestVisibleLts));
+ }
+ }
+
+ public static class PartitionState
+ {
+ public final NavigableMap<Long, List<Operation>> operations;
+ public final Ranges rangeTombstones;
+
+ public PartitionState(NavigableMap<Long, List<Operation>> operations, Ranges rangeTombstones)
+ {
+ this.operations = operations;
+ this.rangeTombstones = rangeTombstones;
+ }
+ }
+
+ public PartitionState inflatePartitionState(long validationLts, long maxLts, Query query)
+ {
+ long currentLts = pdSelector.maxLts(validationLts);
+ long pd = pdSelector.pd(currentLts, schema);
+
+ if (pd != pdSelector.pd(validationLts, schema))
+ throw new ValidationException("Partition descriptor %d doesn't match partition descriptor %d for LTS %d",
+ pd, pdSelector.pd(validationLts, schema), validationLts);
+ NavigableMap<Long, List<Operation>> operations = new TreeMap<>();
+ List<Ranges.Range> ranges = new ArrayList<>();
+
+ PartitionVisitor partitionVisitor = new AbstractPartitionVisitor(pdSelector, descriptorSelector, schema)
+ {
+ public void operation(long lts, long pd, long cd, long m, long opId)
+ {
+ OpSelectors.OperationKind opType = descriptorSelector.operationType(pd, lts, opId);
+ if (opType == OpSelectors.OperationKind.DELETE_RANGE)
+ {
+ ranges.add(maybeWrap(lts, opId, querySelector.inflate(lts, opId).toRange(lts)));
+ }
+ else if (query.match(cd)) // skip descriptors that are out of range
+ {
+ operations.computeIfAbsent(cd, (cd_) -> new ArrayList<>());
+ Operation operation = new Operation(pd, cd, lts, opId, opType);
+ operations.get(cd).add(operation);
+ }
+ }
+ };
+
+ while (currentLts >= 0)
+ {
+ if (currentLts <= maxLts)
+ partitionVisitor.visitPartition(currentLts);
+
+ currentLts = pdSelector.prevLts(currentLts);
+ }
+ return new PartitionState(operations, new Ranges(ranges));
+ }
+
+ private static Ranges.Range maybeWrap(long lts, long opId, Ranges.Range range)
+ {
+ if (logger.isDebugEnabled())
+ return new DebugRange(lts, opId, range.minBound, range.maxBound, range.minInclusive, range.maxInclusive, range.timestamp);
+
+ return range;
+ }
+
+ private static class DebugRange extends Ranges.Range
+ {
+ private final long lts;
+ private final long opId;
+
+ public DebugRange(long lts, long opId,
+ long minBound, long maxBound, boolean minInclusive, boolean maxInclusive, long timestamp)
+ {
+ super(minBound, maxBound, minInclusive, maxInclusive, timestamp);
+ this.lts = lts;
+ this.opId = opId;
+ }
+
+ public String toString()
+ {
+ return super.toString() +
+ "(lts=" + lts +
+ ", opId=" + opId +
+ ')';
+ }
+ }
+
+ public Configuration.ExhaustiveCheckerConfig toConfig()
+ {
+ return new Configuration.ExhaustiveCheckerConfig(tracker.maxSeenLts(), tracker.maxCompleteLts());
+ }
+
+ public String toString()
+ {
+ return "ExhaustiveChecker{" + tracker.toString() + '}';
+ }
+
+ public class RowValidationState
+ {
+ private final ColumnState[] columnStates;
+ private final Operation[] causingOperations;
+ private final PeekingIterator<Long> ltsIterator;
+ private final Ranges rangeTombstones;
+ private final ResultSetRow row;
+ private final long visibleLtsBound;
+ private final long inFlightLtsBound;
+
+ public RowValidationState(ResultSetRow row, long visibleLtsBound, long inFlightLtsBound, Ranges rangeTombstones)
+ {
+ this.row = row;
+ this.visibleLtsBound = visibleLtsBound;
+ this.inFlightLtsBound = inFlightLtsBound;
+
+ this.columnStates = new ColumnState[row.vds.length];
+ Arrays.fill(columnStates, ColumnState.UNOBSERVED);
+
+ this.causingOperations = new Operation[columnStates.length];
+ long[] ltsVector = new long[row.lts.length];
+ System.arraycopy(row.lts, 0, ltsVector, 0, ltsVector.length);
+ this.ltsIterator = Iterators.peekingIterator(ltsIterator(ltsVector).iterator());
+ this.rangeTombstones = rangeTombstones;
+ }
+
+ public void process(Operation op)
+ {
+ if (ltsIterator.hasNext() && op.lts > ltsIterator.peek())
+ ltsIterator.next();
+
+ assert row.pd == op.pd : String.format("Row and operation descriptors do not match: %d != %d", row.pd, op.pd);
+
+ switch (descriptorSelector.operationType(op.pd, op.lts, op.opId))
+ {
+ case WRITE:
+ processInsert(op);
+ break;
+ case DELETE_ROW:
+ // In case of a visible row, deletion that was followed with a write can be considered equivalent
+ // to a deletion of all column values.
+ processDelete(op, schema.ALL_COLUMNS_BITSET);
+ break;
+ case DELETE_COLUMN:
+ BitSet mask = descriptorSelector.columnMask(row.pd, op.lts, op.opId);
+ if (mask.allUnset())
+ throw new IllegalArgumentException("Can't have a delete column query with no columns set. Column mask: " + mask);
+
+ processDelete(op, mask);
+ break;
+ }
+ }
+
+ private void transitionState(int idx, ColumnState newState, Operation modification)
+ {
+ ColumnState oldState = columnStates[idx];
+ switch (newState)
+ {
+ case UNOBSERVED:
+ throw new IllegalArgumentException("Can not transition to UNOBSERVED state");
+ case REMOVED:
+ if (!(oldState == ColumnState.UNOBSERVED || oldState == ColumnState.REMOVED))
+ throw new ValidationException("Can not transition from %s to %s.", oldState, newState);
+ break;
+ case OBSERVED:
+ if (!(oldState == ColumnState.OBSERVED || oldState == ColumnState.UNOBSERVED))
+ throw new ValidationException("Can not transition from %s to %s.", oldState, newState);
+ break;
+ }
+ columnStates[idx] = newState;
+ causingOperations[idx] = modification;
+ }
+
+ private void processInsert(Operation op)
+ {
+ if (op.lts > inFlightLtsBound)
+ throw new IllegalStateException(String.format("Observed LTS not yet recorded by this model: %s. Max seen LTS: %s",
+ op.lts, inFlightLtsBound));
+
+ boolean isVisible = op.lts <= visibleLtsBound;
+
+ long[] inflatedDescriptors = descriptorSelector.vds(op.pd, op.cd, op.lts, op.opId, schema);
+ for (int col = 0; col < row.lts.length; col++)
+ {
+ final long valueDescriptor = inflatedDescriptors[col];
+
+ // Write is visible
+ if (row.vds[col] == valueDescriptor && row.lts[col] == op.lts)
+ {
+ transitionState(col, ColumnState.OBSERVED, op);
+ continue;
+ }
+
+ if (!isVisible // write has never propagated
+ || valueDescriptor == UNSET_DESCR // this modification did not make this write
+ || (columnStates[col] == ColumnState.REMOVED && causingOperations[col].lts >= op.lts) // confirmed that this column was removed later
+ || (columnStates[col] == ColumnState.OBSERVED && causingOperations[col].lts >= op.lts) // we could confirm the overwrite earlier
+ // TODO: that won't work. To reproduce this, take testDetectsRemovedColumn with range tombstones. Removed column will have a timestamp of min long,
+ // and _any_ range tombstone is going to be able to shadow it.
+ || rangeTombstones.isShadowed(row.cd, row.lts[col])) // if this row's lts is shadowed, we can be certain that whole row is shadowed
+ continue;
+
+ throw new ValidationException("Error caught while validating column %d. " +
+ "Expected value: %d. " +
+ "Modification should have been visible but was not." +
+ "\nOperation: %s" +
+ "\nRow: %s" +
+ "\nRow ID: %d" +
+ "\nColumn States: %s " +
+ "\nRange tombstones: %s",
+ col,
+ valueDescriptor,
+ op,
+ row,
+ descriptorSelector.rowId(row.pd, row.lts[col], row.cd),
+ Arrays.toString(columnStates),
+ rangeTombstones);
+ }
+ }
+
+ public void processDelete(Operation op, BitSet mask)
+ {
+ boolean isVisible = op.lts <= visibleLtsBound;
+
+ for (int col = 0; col < columnStates.length; col++)
+ {
+ // Deletion must have propagated
+ if (mask.isSet(col) && row.lts[col] == NO_TIMESTAMP)
+ {
+ transitionState(col, ColumnState.REMOVED, op);
+ continue;
+ }
+
+ if (!isVisible
+ || mask.isSet(col)
+ || columnStates[col] != ColumnState.OBSERVED
+ || op.lts < causingOperations[col].lts)
+ continue;
+
+ throw new ValidationException("Error caught wile validating column %d. " +
+ "Delete operation with lts %s should have been visible or shadowed by the later update, but was not. " +
+ "\nOperation: %s" +
+ "\nRow: %s" +
+ "\nColumn States: %s",
+ col, op.lts,
+ op, row, Arrays.toString(columnStates));
+ }
+ }
+
+ public String toString()
+ {
+ return String.format("Validated: %s." +
+ "\nObserved timestamps: %s",
+ Arrays.toString(columnStates),
+ row);
+ }
+ }
+
+ @VisibleForTesting
+ public void forceLts(long maxSeen, long maxComplete)
+ {
+ tracker.forceLts(maxSeen, maxComplete);
+ }
+
+ public static class Operation implements Comparable<Operation>
+ {
+ public final long pd;
+ public final long lts;
+ public final long cd;
+ public final long opId;
+ public final OpSelectors.OperationKind op;
+
+ public Operation(long pd, long cd, long lts, long opId, OpSelectors.OperationKind op)
+ {
+ this.pd = pd;
+ this.lts = lts;
+ this.cd = cd;
+ this.opId = opId;
+ this.op = op;
+ }
+
+ private static final Comparator<Operation> comparator = Comparator.comparingLong((Operation a) -> a.lts);
+
+ public int compareTo(Operation other)
+ {
+ // reverse order
+ return comparator.compare(other, this);
+ }
+
+ public boolean equals(Object o)
+ {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ Operation operation = (Operation) o;
+ return pd == operation.pd &&
+ lts == operation.lts &&
+ cd == operation.cd &&
+ opId == operation.opId;
+ }
+
+ public int hashCode()
+ {
+ return Objects.hash(pd, lts, cd, opId);
+ }
+
+ public String toString()
+ {
+ return "Operation{" +
+ "pd=" + pd +
+ ", cd=" + cd +
+ ", lts=" + lts +
+ ", opId=" + opId +
+ ", op=" + op +
+ '}';
+ }
+ }
+
+ public enum ColumnState
+ {
+ UNOBSERVED,
+ REMOVED,
+ OBSERVED
+ }
+
+ public static interface LongComparator
+ {
+ int compare(long o1, long o2);
+ }
+
+ public static List<Long> ltsIterator(long[] lts)
+ {
+ long[] sorted = Arrays.copyOf(lts, lts.length);
+ Arrays.sort(sorted);
+ List<Long> deduplicated = new ArrayList<>(lts.length);
+ for (int i = 0; i < sorted.length; i++)
+ {
+ if (sorted[i] <= 0)
+ continue;
+
+ if (deduplicated.size() == 0 || deduplicated.get(deduplicated.size() - 1) != sorted[i])
+ deduplicated.add(sorted[i]);
+ }
+
+ return deduplicated;
+ }
+}
diff --git a/harry-core/src/harry/model/Model.java b/harry-core/src/harry/model/Model.java
new file mode 100644
index 0000000..04fe6f1
--- /dev/null
+++ b/harry-core/src/harry/model/Model.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.model;
+
+import harry.core.Configuration;
+import harry.ddl.SchemaSpec;
+import harry.model.sut.SystemUnderTest;
+import harry.runner.Query;
+import harry.runner.QuerySelector;
+
+public interface Model
+{
+ long NO_TIMESTAMP = Long.MIN_VALUE;
+
+ void recordEvent(long lts, boolean quorumAchieved);
+
+ void validatePartitionState(long verificationLts, Query query);
+
+ Configuration.ModelConfiguration toConfig();
+
+ interface ModelFactory
+ {
+ Model create(SchemaSpec schema,
+ OpSelectors.PdSelector pdSelector,
+ OpSelectors.DescriptorSelector descriptorSelector,
+ OpSelectors.MonotonicClock clock,
+ QuerySelector querySelector,
+ SystemUnderTest sut);
+ }
+
+ class ValidationException extends RuntimeException
+ {
+ public ValidationException()
+ {
+ super();
+ }
+
+ public ValidationException(String message)
+ {
+ super(message);
+ }
+
+ public ValidationException(String format, Object... objects)
+ {
+ super(String.format(format, objects));
+ }
+
+ public ValidationException(String message, Throwable cause)
+ {
+ super(message, cause);
+ }
+ }
+}
\ No newline at end of file
diff --git a/harry-core/src/harry/model/OpSelectors.java b/harry-core/src/harry/model/OpSelectors.java
new file mode 100644
index 0000000..d86d436
--- /dev/null
+++ b/harry-core/src/harry/model/OpSelectors.java
@@ -0,0 +1,623 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.model;
+
+import java.util.EnumMap;
+import java.util.Map;
+import java.util.function.Function;
+
+import com.google.common.annotations.VisibleForTesting;
+
+import harry.core.Configuration;
+import harry.ddl.SchemaSpec;
+import harry.generators.PCGFastPure;
+import harry.generators.RngUtils;
+import harry.generators.Surjections;
+import harry.generators.distribution.Distribution;
+import harry.util.BitSet;
+
+import static harry.generators.DataGenerators.UNSET_DESCR;
+
+/**
+ * Row (uninflated) data selectors. Not calling them generators, since their output is entirely
+ * deterministic, and for each input they are able to produce a single output.
+ * <p>
+ * This is more or less a direct translation of the formalization.
+ * <p>
+ * All functions implemented by this interface have to _always_ produce same outputs for same inputs.
+ * Most of the functions, with the exception of real-time clock translations, shouold be pure.
+ * <p>
+ * Functions that are reverse of their coutnerparts are prefixed with "un"
+ */
+public interface OpSelectors
+{
+ public static interface Rng
+ {
+ long randomNumber(long i, long stream);
+
+ long sequenceNumber(long r, long stream);
+
+ default long next(long r)
+ {
+ return next(r, 0);
+ }
+
+ long next(long r, long stream);
+
+ long prev(long r, long stream);
+
+ default long prev(long r)
+ {
+ return next(r, 0);
+ }
+ }
+
+ /**
+ * Clock is a component responsible for mapping _logical_ timestamps to _real-time_ ones.
+ * When reproducing test failures, and for validation purposes, a snapshot of such clock can
+ * be taken to map a real-time timestamp from the value retrieved from the database in order
+ * to map it back to the logical timestamp of the operation that wrote this value.
+ */
+ public static interface MonotonicClock
+ {
+ long rts(long lts);
+
+ long lts(long rts);
+
+ long nextLts();
+
+ long maxLts();
+
+ public Configuration.ClockConfiguration toConfig();
+ }
+
+ public static interface MonotonicClockFactory
+ {
+ public MonotonicClock make();
+ }
+
+ // TODO: move to DescriptorSelector, makes no sense to split them
+
+ /**
+ * *Partition descriptor selector* controls how partitions is selected based on the current logical
+ * timestamp. Default implementation is a sliding window of partition descriptors that will visit
+ * one partition after the other in the window `slide_after_repeats` times. After that will
+ * retire one partition descriptor, and pick one instead of it.
+ */
+ public abstract class PdSelector
+ {
+ @VisibleForTesting
+ protected abstract long pd(long lts);
+
+ public long pd(long lts, SchemaSpec schema)
+ {
+ return schema.adjustPdEntropy(pd(lts));
+ }
+
+ // previous and next LTS with that will yield same pd
+ public abstract long nextLts(long lts);
+
+ public abstract long prevLts(long lts);
+
+ public abstract long maxLts(long lts);
+
+ public abstract long minLtsAt(long position);
+
+ public abstract long minLtsFor(long pd);
+
+ public abstract long positionFor(long lts);
+ }
+
+ public static interface PdSelectorFactory
+ {
+ public PdSelector make(Rng rng);
+ }
+
+ public static interface DescriptorSelectorFactory
+ {
+ public DescriptorSelector make(OpSelectors.Rng rng, SchemaSpec schemaSpec);
+ }
+
+ /**
+ * DescriptorSelector controls how clustering descriptors are picked within the partition:
+ * how many rows there can be in a partition, how many rows will be visited for a logical timestamp,
+ * how many operations there will be in batch, what kind of operations there will and how often
+ * each kind of operation is going to occur.
+ */
+ public abstract class DescriptorSelector
+ {
+ public abstract int numberOfModifications(long lts);
+
+ public abstract int opsPerModification(long lts);
+
+ public abstract int maxPartitionSize();
+
+ public abstract boolean isCdVisitedBy(long pd, long lts, long cd);
+
+ // clustering descriptor is calculated using operation id and not modification id, since
+ // value descriptors are calculated using modification ids.
+ public long cd(long pd, long lts, long opId, SchemaSpec schema)
+ {
+ return schema.adjustCdEntropy(cd(pd, lts, opId));
+ }
+
+ /**
+ * Currently, we do not allow visiting the same row more than once per lts, which means that:
+ * <p>
+ * * `max(opId)` returned `cds` have to be unique for any `lts/pd` pair
+ * * {@code max(opId) < maxPartitionSize}
+ */
+ @VisibleForTesting
+ protected abstract long cd(long pd, long lts, long opId);
+
+ public long randomCd(long pd, long entropy, SchemaSpec schema)
+ {
+ return schema.adjustCdEntropy(randomCd(pd, entropy));
+ }
+
+ public abstract long randomCd(long pd, long entropy);
+
+ @VisibleForTesting
+ protected abstract long vd(long pd, long cd, long lts, long opId, int col);
+
+ public long[] vds(long pd, long cd, long lts, long opId, SchemaSpec schema)
+ {
+ long[] vds = new long[schema.regularColumns.size()];
+ BitSet mask = columnMask(pd, cd, opId);
+
+ for (int col = 0; col < vds.length; col++)
+ {
+ if (mask.isSet(col))
+ {
+ long vd = vd(pd, cd, lts, opId, col);
+ vds[col] = schema.regularColumns.get(col).adjustEntropyDomain(vd);
+ }
+ else
+ {
+ vds[col] = UNSET_DESCR;
+ }
+ }
+ return vds;
+ }
+
+ public abstract OperationKind operationType(long pd, long lts, long opId);
+
+ public abstract BitSet columnMask(long pd, long lts, long opId);
+
+ // TODO: why is this one unused?
+ public abstract long rowId(long pd, long lts, long cd);
+
+ public abstract long modificationId(long pd, long cd, long lts, long vd, int col);
+ }
+
+ public static class PCGFast implements OpSelectors.Rng
+ {
+ private final long seed;
+
+ public PCGFast(long seed)
+ {
+ this.seed = seed;
+ }
+
+ public long randomNumber(long i, long stream)
+ {
+ return PCGFastPure.shuffle(PCGFastPure.advanceState(seed, i, stream));
+ }
+
+ public long sequenceNumber(long r, long stream)
+ {
+ return PCGFastPure.distance(seed, PCGFastPure.unshuffle(r), stream);
+ }
+
+ public long next(long r, long stream)
+ {
+ return PCGFastPure.next(r, stream);
+ }
+
+ public long prev(long r, long stream)
+ {
+ return PCGFastPure.previous(r, stream);
+ }
+ }
+
+ /**
+ * Generates partition descriptors, based on LTS as if we had a sliding window.
+ * <p>
+ * Each {@code windowSize * switchAfter} steps, we move the window by one, effectively
+ * expiring one partition descriptor, and adding one partition descriptor to the window.
+ * <p>
+ * For any LTS, we can calculate previous and next LTS on which it will visit the same
+ * partition
+ */
+ public static class DefaultPdSelector extends OpSelectors.PdSelector
+ {
+ public final static long PARTITION_DESCRIPTOR_STREAM_ID = 0x706b;
+
+ private final OpSelectors.Rng rng;
+ private final long slideAfterRepeats;
+ private final long switchAfter;
+ private final long windowSize;
+
+ public DefaultPdSelector(OpSelectors.Rng rng, long windowSize, long slideAfterRepeats)
+ {
+ this.rng = rng;
+ this.slideAfterRepeats = slideAfterRepeats;
+ this.windowSize = windowSize;
+ this.switchAfter = windowSize * slideAfterRepeats;
+ }
+
+ protected long pd(long lts)
+ {
+ return rng.randomNumber(positionFor(lts), PARTITION_DESCRIPTOR_STREAM_ID);
+ }
+
+ public long minLtsAt(long position)
+ {
+ if (position < windowSize)
+ return position;
+
+ long windowStart = (position - (windowSize - 1)) * slideAfterRepeats * windowSize;
+ return windowStart + windowSize - 1;
+ }
+
+ public long minLtsFor(long pd)
+ {
+ return minLtsAt(rng.sequenceNumber(pd, PARTITION_DESCRIPTOR_STREAM_ID));
+ }
+
+ public long positionFor(long lts)
+ {
+ long windowStart = lts / switchAfter;
+ return windowStart + lts % windowSize;
+ }
+
+ public long nextLts(long lts)
+ {
+ long slideCount = lts / switchAfter;
+ long positionInCycle = lts - slideCount * switchAfter;
+ long nextRepeat = positionInCycle / windowSize + 1;
+
+ if (nextRepeat > slideAfterRepeats ||
+ (nextRepeat == slideAfterRepeats && (positionInCycle % windowSize) == 0))
+ return -1;
+
+ // last cycle before window slides; next window will have shifted by one
+ if (nextRepeat == slideAfterRepeats)
+ positionInCycle -= 1;
+
+ return slideCount * switchAfter + windowSize + positionInCycle;
+ }
+
+ public long prevLts(long lts)
+ {
+ long slideCount = lts / switchAfter;
+ long positionInCycle = lts - slideCount * switchAfter;
+ long prevRepeat = positionInCycle / windowSize - 1;
+
+ if (lts < windowSize ||
+ prevRepeat < -1 ||
+ (prevRepeat == -1 && (positionInCycle % windowSize) == (windowSize - 1)))
+ return -1;
+
+ // last cycle before window slides; next window will have shifted by one
+ if (prevRepeat == -1)
+ positionInCycle += 1;
+
+ return slideCount * switchAfter - windowSize + positionInCycle;
+ }
+
+ public long maxLts(long lts)
+ {
+ long windowStart = lts / switchAfter;
+ long position = windowStart + lts % windowSize;
+
+ return position * switchAfter + (slideAfterRepeats - 1) * windowSize;
+ }
+
+ public String toString()
+ {
+ return "DefaultPdSelector{" +
+ "slideAfterRepeats=" + slideAfterRepeats +
+ ", windowSize=" + windowSize +
+ '}';
+ }
+ }
+
+ public static ColumnSelectorBuilder columnSelectorBuilder()
+ {
+ return new ColumnSelectorBuilder();
+ }
+
+ // TODO: add weights/probabilities to this
+ // TODO: this looks like a hierarchical surjection
+ public static class ColumnSelectorBuilder
+ {
+ private Map<OperationKind, Surjections.Surjection<BitSet>> m;
+
+ public ColumnSelectorBuilder()
+ {
+ this.m = new EnumMap<>(OperationKind.class);
+ }
+
+ public ColumnSelectorBuilder forAll(int regularColumnsCount)
+ {
+ return forAll(BitSet.surjection(regularColumnsCount));
+ }
+
+ public ColumnSelectorBuilder forAll(Surjections.Surjection<BitSet> orig)
+ {
+ for (OperationKind type : OperationKind.values())
+ {
+ Surjections.Surjection<BitSet> gen = orig;
+ if (type == OperationKind.DELETE_COLUMN)
+ {
+ gen = (descriptor) -> {
+ while (true)
+ {
+ BitSet bitSet = orig.inflate(descriptor);
+ if (!bitSet.allUnset())
+ return bitSet;
+
+ descriptor = RngUtils.next(descriptor);
+ }
+ };
+ }
+ this.m.put(type, gen);
+ }
+ return this;
+ }
+
+ public ColumnSelectorBuilder forAll(BitSet... pickFrom)
+ {
+ return forAll(Surjections.pick(pickFrom));
+ }
+
+ public ColumnSelectorBuilder forWrite(Surjections.Surjection<BitSet> gen)
+ {
+ m.put(OperationKind.WRITE, gen);
+ return this;
+ }
+
+ public ColumnSelectorBuilder forWrite(BitSet pickFrom)
+ {
+ return forWrite(Surjections.pick(pickFrom));
+ }
+
+ public ColumnSelectorBuilder forDelete(Surjections.Surjection<BitSet> gen)
+ {
+ m.put(OperationKind.DELETE_ROW, gen);
+ return this;
+ }
+
+ public ColumnSelectorBuilder forDelete(BitSet pickFrom)
+ {
+ return forDelete(Surjections.pick(pickFrom));
+ }
+
+ public ColumnSelectorBuilder forColumnDelete(Surjections.Surjection<BitSet> gen)
+ {
+ m.put(OperationKind.DELETE_COLUMN, gen);
+ return this;
+ }
+
+ public ColumnSelectorBuilder forColumnDelete(BitSet pickFrom)
+ {
+ return forColumnDelete(Surjections.pick(pickFrom));
+ }
+
+ public Function<OperationKind, Surjections.Surjection<BitSet>> build()
+ {
+ return m::get;
+ }
+ }
+
+ // TODO: this can actually be further improved upon. Maybe not generation-wise, this part seems to be ok,
+ // but in the way it is hooked up with the rest of the system
+ public static class HierarchicalDescriptorSelector extends DefaultDescriptorSelector
+ {
+ private final int[] fractions;
+
+ public HierarchicalDescriptorSelector(Rng rng,
+ // how many parts (at most) each subsequent "level" should contain
+ int[] fractions,
+ Function<OperationKind, Surjections.Surjection<BitSet>> columnMaskSelector,
+ Surjections.Surjection<OperationKind> operationTypeSelector,
+ Distribution modificationsPerLtsDistribution,
+ Distribution rowsPerModificationsDistribution,
+ int maxPartitionSize)
+ {
+ super(rng, columnMaskSelector, operationTypeSelector, modificationsPerLtsDistribution, rowsPerModificationsDistribution, maxPartitionSize);
+ this.fractions = fractions;
+ }
+
+ @Override
+ public long cd(long pd, long lts, long opId, SchemaSpec schema)
+ {
+ if (schema.clusteringKeys.size() <= 1)
+ return schema.adjustCdEntropy(super.cd(pd, lts, opId));
+
+ int partitionSize = maxPartitionSize();
+ int clusteringOffset = clusteringOffset(lts);
+ long res;
+ if (clusteringOffset == 0)
+ {
+ res = rng.prev(opId, pd);
+ }
+ else
+ {
+ int positionInPartition = (int) ((clusteringOffset + opId) % partitionSize);
+ res = cd(positionInPartition, fractions, schema, rng, pd);
+ }
+ return schema.adjustCdEntropy(res);
+ }
+
+ @VisibleForTesting
+ public static long cd(int positionInPartition, int[] fractions, SchemaSpec schema, Rng rng, long pd)
+ {
+ long[] slices = new long[schema.clusteringKeys.size()];
+ for (int i = 0; i < slices.length; i++)
+ {
+ int idx = i < fractions.length ? (positionInPartition % (fractions[i] - 1)) : positionInPartition;
+ slices[i] = rng.prev(idx, rng.next(pd, i + 1));
+ }
+
+ return schema.ckGenerator.stitch(slices);
+ }
+
+ protected long cd(long pd, long lts, long opId)
+ {
+ throw new RuntimeException("Shouldn't be called");
+ }
+ }
+
+ // TODO: add a way to limit partition size alltogether; current "number of rows" notion is a bit misleading
+ public static class DefaultDescriptorSelector extends DescriptorSelector
+ {
+ protected final static long NUMBER_OF_MODIFICATIONS_STREAM = 0xf490c5272baL;
+ protected final static long ROWS_PER_OPERATION_STREAM = 0x5e03812e293L;
+ protected final static long BITSET_IDX_STREAM = 0x92eb607bef1L;
+
+ public static Surjections.Surjection<OperationKind> DEFAULT_OP_TYPE_SELECTOR = Surjections.enumValues(OperationKind.class);
+
+ protected final OpSelectors.Rng rng;
+ protected final Surjections.Surjection<OperationKind> operationTypeSelector;
+ protected final Function<OperationKind, Surjections.Surjection<BitSet>> columnMaskSelector;
+ protected final Distribution modificationsPerLtsDistribution;
+ protected final Distribution rowsPerModificationsDistribution;
+ protected final int maxPartitionSize;
+
+ public DefaultDescriptorSelector(OpSelectors.Rng rng,
+ Function<OperationKind, Surjections.Surjection<BitSet>> columnMaskSelector,
+ Surjections.Surjection<OperationKind> operationTypeSelector,
+ Distribution modificationsPerLtsDistribution,
+ Distribution rowsPerModificationsDistribution,
+ int maxPartitionSize)
+ {
+ this.rng = rng;
+
+ this.operationTypeSelector = operationTypeSelector;
+ this.columnMaskSelector = columnMaskSelector;
+
+ this.modificationsPerLtsDistribution = modificationsPerLtsDistribution;
+ this.rowsPerModificationsDistribution = rowsPerModificationsDistribution;
+ this.maxPartitionSize = maxPartitionSize;
+ }
+
+ public int numberOfModifications(long lts)
+ {
+ return (int) modificationsPerLtsDistribution.skew(rng.randomNumber(lts, NUMBER_OF_MODIFICATIONS_STREAM));
+ }
+
+ public int opsPerModification(long lts)
+ {
+ return (int) rowsPerModificationsDistribution.skew(rng.randomNumber(lts, ROWS_PER_OPERATION_STREAM));
+ }
+
+ // TODO: this is not the best way to calculate a clustering offset; potentially we'd like to use
+ // some sort of expiration mechanism slimilar to PDs.
+ public int maxPartitionSize()
+ {
+ return maxPartitionSize;
+ }
+
+ protected int clusteringOffset(long lts)
+ {
+ return RngUtils.asInt(lts, 0, maxPartitionSize() - 1);
+ }
+
+ // TODO: this won't work for entropy-adjusted CDs, at least the way they're implemented now
+ public boolean isCdVisitedBy(long pd, long lts, long cd)
+ {
+ return rowId(pd, lts, cd) < (numberOfModifications(lts) * opsPerModification(lts));
+ }
+
+ public long randomCd(long pd, long entropy)
+ {
+ long positionInPartition = Math.abs(rng.prev(entropy)) % maxPartitionSize();
+ return rng.prev(positionInPartition, pd);
+ }
+
+ protected long cd(long pd, long lts, long opId)
+ {
+ assert opId <= maxPartitionSize;
+ int partitionSize = maxPartitionSize();
+ int clusteringOffset = clusteringOffset(lts);
+ if (clusteringOffset == 0)
+ return rng.prev(opId, pd);
+
+ // TODO: partition size can't be larger than cardinality of the value.
+ // So if we have 10 modifications per lts and 10 rows per modification,
+ // we'll visit the same row twice per lts.
+ int positionInPartition = (int) ((clusteringOffset + opId) % partitionSize);
+
+ return rng.prev(positionInPartition, pd);
+ }
+
+ public long rowId(long pd, long lts, long cd)
+ {
+ int partitionSize = maxPartitionSize();
+ int clusteringOffset = clusteringOffset(lts);
+ int positionInPartition = (int) rng.next(cd, pd);
+
+ if (clusteringOffset == 0)
+ return positionInPartition;
+
+ if (positionInPartition == 0)
+ return partitionSize - clusteringOffset;
+ if (positionInPartition == clusteringOffset)
+ return 0;
+ else if (positionInPartition < clusteringOffset)
+ return partitionSize - clusteringOffset + positionInPartition;
+ else
+ return positionInPartition - clusteringOffset;
+ }
+
+ public OperationKind operationType(long pd, long lts, long opId)
+ {
+ return operationTypeSelector.inflate(pd ^ lts ^ opId);
+ }
+
+ public BitSet columnMask(long pd, long lts, long opId)
+ {
+ Surjections.Surjection<BitSet> gen = columnMaskSelector.apply(operationType(pd, lts, opId));
+ if (gen == null)
+ throw new IllegalArgumentException("Can't find a selector for " + gen);
+ return gen.inflate(rng.randomNumber(pd ^ lts ^ opId, BITSET_IDX_STREAM));
+ }
+
+ public long vd(long pd, long cd, long lts, long opId, int col)
+ {
+ // change randomNumber / sequenceNumber to prev/Next
+ return rng.randomNumber(opId + 1, pd ^ cd ^ lts ^ col);
+ }
+
+ public long modificationId(long pd, long cd, long lts, long vd, int col)
+ {
+ return rng.sequenceNumber(vd, pd ^ cd ^ lts ^ col);
+ }
+ }
+
+ public enum OperationKind
+ {
+ WRITE,
+ DELETE_ROW,
+ DELETE_COLUMN,
+ DELETE_RANGE
+ }
+}
diff --git a/harry-core/src/harry/model/QuiescentChecker.java b/harry-core/src/harry/model/QuiescentChecker.java
new file mode 100644
index 0000000..94a4b37
--- /dev/null
+++ b/harry-core/src/harry/model/QuiescentChecker.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.model;
+
+import java.util.Arrays;
+import java.util.Iterator;
+
+import com.google.common.annotations.VisibleForTesting;
+
+import harry.core.Configuration;
+import harry.data.ResultSetRow;
+import harry.ddl.SchemaSpec;
+import harry.model.sut.SystemUnderTest;
+import harry.reconciler.Reconciler;
+import harry.runner.Query;
+import harry.runner.QuerySelector;
+
+public class QuiescentChecker implements Model
+{
+ private final OpSelectors.MonotonicClock clock;
+
+ private final DataTracker tracker;
+ private final SystemUnderTest sut;
+ private final Reconciler reconciler;
+
+ public QuiescentChecker(SchemaSpec schema,
+ OpSelectors.PdSelector pdSelector,
+ OpSelectors.DescriptorSelector descriptorSelector,
+ OpSelectors.MonotonicClock clock,
+ QuerySelector querySelector,
+
+ SystemUnderTest sut)
+ {
+ this.clock = clock;
+ this.sut = sut;
+
+ this.reconciler = new Reconciler(schema, pdSelector, descriptorSelector, querySelector);
+ this.tracker = new DataTracker();
+ }
+
+ public void recordEvent(long lts, boolean quorumAchieved)
+ {
+ tracker.recordEvent(lts, quorumAchieved);
+ }
+
+ public void validatePartitionState(long verificationLts, Query query)
+ {
+ long maxCompeteLts = tracker.maxCompleteLts();
+ long maxSeenLts = tracker.maxSeenLts();
+
+ assert maxCompeteLts == maxSeenLts : "Runner hasn't settled down yet. " +
+ "Quiescent model can't be reliably used in such cases.";
+
+ Iterator<ResultSetRow> actual = SelectHelper.execute(sut, clock, query).iterator();
+ Iterator<Reconciler.RowState> expected = reconciler.inflatePartitionState(query.pd, maxSeenLts, query).iterator(query.reverse);
+
+ while (actual.hasNext() && expected.hasNext())
+ {
+ ResultSetRow actualRowState = actual.next();
+ Reconciler.RowState expectedRowState = expected.next();
+ // TODO: this is not necessarily true. It can also be that ordering is incorrect.
+ if (actualRowState.cd != expectedRowState.cd)
+ throw new ValidationException("Found a row in the model that is not present in the resultset:\nExpected: %s\nActual: %s",
+ expectedRowState, actualRowState);
+
+ if (!Arrays.equals(actualRowState.vds, expectedRowState.vds))
+ throw new ValidationException("Returned row state doesn't match the one predicted by the model:\nExpected: %s (%s)\nActual: %s (%s).",
+ Arrays.toString(expectedRowState.vds), expectedRowState,
+ Arrays.toString(actualRowState.vds), actualRowState);
+
+ if (!Arrays.equals(actualRowState.lts, expectedRowState.lts))
+ throw new ValidationException("Timestamps in the row state don't match ones predicted by the model:\nExpected: %s (%s)\nActual: %s (%s).",
+ Arrays.toString(expectedRowState.lts), expectedRowState,
+ Arrays.toString(actualRowState.lts), actualRowState);
+ }
+
+ if (actual.hasNext() || expected.hasNext())
+ {
+ throw new ValidationException("Expected results to have the same number of results, but %s result iterator has more results",
+ actual.hasNext() ? "actual" : "expected");
+ }
+ }
+
+ @VisibleForTesting
+ public void forceLts(long maxSeen, long maxComplete)
+ {
+ tracker.forceLts(maxSeen, maxComplete);
+ }
+
+ public Configuration.ModelConfiguration toConfig()
+ {
+ return new Configuration.QuiescentCheckerConfig(tracker.maxSeenLts(), tracker.maxCompleteLts());
+ }
+}
\ No newline at end of file
diff --git a/harry-core/src/harry/model/SelectHelper.java b/harry-core/src/harry/model/SelectHelper.java
new file mode 100644
index 0000000..8cf9d80
--- /dev/null
+++ b/harry-core/src/harry/model/SelectHelper.java
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.model;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import com.datastax.driver.core.querybuilder.Ordering;
+import com.datastax.driver.core.querybuilder.QueryBuilder;
+import com.datastax.driver.core.querybuilder.Select;
+import harry.data.ResultSetRow;
+import harry.ddl.ColumnSpec;
+import harry.ddl.SchemaSpec;
+import harry.model.sut.SystemUnderTest;
+import harry.operations.CompiledStatement;
+import harry.operations.Relation;
+import harry.runner.Query;
+
+public class SelectHelper
+{
+ public static CompiledStatement select(SchemaSpec schema, long pd)
+ {
+ return select(schema, pd, Collections.emptyList(), false, true);
+ }
+
+ /**
+ * Here, {@code reverse} should be understood not in ASC/DESC sense, but rather in terms
+ * of how we're going to iterate through this partition (in other words, if first clustering component order
+ * is DESC, we'll iterate in ASC order)
+ */
+ public static CompiledStatement select(SchemaSpec schema, long pd, List<Relation> relations, boolean reverse, boolean includeWriteTime)
+ {
+ Select.Selection select = QueryBuilder.select();
+ for (ColumnSpec<?> column : schema.allColumns)
+ select.column(column.name);
+
+ if (includeWriteTime)
+ {
+ for (ColumnSpec<?> column : schema.regularColumns)
+ select.writeTime(column.name);
+ }
+
+ Select.Where where = select.from(schema.keyspace, schema.table).where();
+ List<Object> bindings = new ArrayList<>();
+
+ addRelations(schema, where, bindings, pd, relations);
+ addOrderBy(schema, where, reverse);
+
+ Object[] bindingsArr = bindings.toArray(new Object[bindings.size()]);
+ return new CompiledStatement(where.toString(), bindingsArr);
+ }
+
+ private static void addRelations(SchemaSpec schema, Select.Where where, List<Object> bindings, long pd, List<Relation> relations)
+ {
+ schema.inflateRelations(pd,
+ relations,
+ (spec, kind, value) -> {
+ where.and(kind.getClause(spec));
+ bindings.add(value);
+ });
+ }
+
+ private static void addOrderBy(SchemaSpec schema, Select.Where whereClause, boolean reverse)
+ {
+ if (reverse && schema.clusteringKeys.size() > 0)
+ {
+ Ordering[] ordering = new Ordering[schema.clusteringKeys.size()];
+ for (int i = 0; i < schema.clusteringKeys.size(); i++)
+ {
+ ColumnSpec<?> c = schema.clusteringKeys.get(i);
+ ordering[i] = c.isReversed() ? QueryBuilder.asc(c.name) : QueryBuilder.desc(c.name);
+ }
+ whereClause.orderBy(ordering);
+ }
+ }
+
+ public static ResultSetRow resultSetToRow(SchemaSpec schema, OpSelectors.MonotonicClock clock, Object[] result)
+ {
+ Object[] partitionKey = new Object[schema.partitionKeys.size()];
+ Object[] clusteringKey = new Object[schema.clusteringKeys.size()];
+ Object[] regularColumns = new Object[schema.regularColumns.size()];
+
+ System.arraycopy(result, 0, partitionKey, 0, partitionKey.length);
+ System.arraycopy(result, partitionKey.length, clusteringKey, 0, clusteringKey.length);
+ System.arraycopy(result, partitionKey.length + clusteringKey.length, regularColumns, 0, regularColumns.length);
+
+ long[] lts = new long[schema.regularColumns.size()];
+ for (int i = 0; i < lts.length; i++)
+ {
+ Object v = result[schema.allColumns.size() + i];
+ lts[i] = v == null ? Model.NO_TIMESTAMP : clock.lts((long) v);
+ }
+
+ return new ResultSetRow(schema.deflatePartitionKey(partitionKey),
+ schema.deflateClusteringKey(clusteringKey),
+ schema.deflateRegularColumns(regularColumns),
+ lts);
+ }
+
+ public static List<ResultSetRow> execute(SystemUnderTest sut, OpSelectors.MonotonicClock clock, Query query)
+ {
+ CompiledStatement compiled = query.toSelectStatement();
+ Object[][] objects = sut.execute(compiled.cql(), compiled.bindings());
+ List<ResultSetRow> result = new ArrayList<>();
+ for (Object[] obj : objects)
+ result.add(resultSetToRow(query.schemaSpec, clock, obj));
+
+ return result;
+ }
+}
diff --git a/harry-core/src/harry/model/StatelessVisibleRowsChecker.java b/harry-core/src/harry/model/StatelessVisibleRowsChecker.java
new file mode 100644
index 0000000..01f0381
--- /dev/null
+++ b/harry-core/src/harry/model/StatelessVisibleRowsChecker.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.model;
+
+import java.util.List;
+import java.util.function.Supplier;
+
+import harry.core.Configuration;
+import harry.data.ResultSetRow;
+import harry.ddl.SchemaSpec;
+import harry.model.sut.SystemUnderTest;
+import harry.runner.Query;
+import harry.runner.QuerySelector;
+
+import static harry.model.VisibleRowsChecker.descendingIterator;
+
+/**
+ * A simple model to check whether or not the rows reported as visible by the database are reflected in
+ * the model.
+ */
+public class StatelessVisibleRowsChecker implements Model
+{
+ protected final OpSelectors.PdSelector pdSelector;
+ protected final OpSelectors.DescriptorSelector descriptorSelector;
+ protected final OpSelectors.MonotonicClock clock;
+ protected final SystemUnderTest sut;
+
+ protected final SchemaSpec schema;
+
+ public StatelessVisibleRowsChecker(SchemaSpec schema,
+ OpSelectors.PdSelector pdSelector,
+ OpSelectors.DescriptorSelector descriptorSelector,
+ OpSelectors.MonotonicClock clock,
+ QuerySelector querySelector,
+ SystemUnderTest sut)
+ {
+ this.pdSelector = pdSelector;
+ this.descriptorSelector = descriptorSelector;
+ this.schema = schema;
+ this.clock = clock;
+ this.sut = sut;
+ }
+
+ public void recordEvent(long lts, boolean quorumAchieved)
+ {
+ //no-op
+ }
+
+ public void validatePartitionState(long validationLts, Query query)
+ {
+ validatePartitionState(validationLts,
+ query,
+ () -> SelectHelper.execute(sut, clock, query));
+ }
+
+ public Configuration.ModelConfiguration toConfig()
+ {
+ throw new RuntimeException("not implemented");
+ }
+
+ void validatePartitionState(long validationLts, Query query, Supplier<List<ResultSetRow>> rowsSupplier)
+ {
+ // we ignore Query here, since our criteria for checking in this model is presence of the row in the resultset
+ long pd = pdSelector.pd(validationLts, schema);
+
+ List<ResultSetRow> rows = rowsSupplier.get();
+
+ for (ResultSetRow row : rows)
+ {
+ VisibleRowsChecker.LongIterator rowLtsIter = descendingIterator(row.lts);
+ VisibleRowsChecker.LongIterator modelLtsIter = descendingIterator(pdSelector, validationLts);
+
+ outer:
+ while (rowLtsIter.hasNext())
+ {
+ long rowLts = rowLtsIter.nextLong();
+
+ if (rowLts == NO_TIMESTAMP)
+ continue;
+
+ if (!modelLtsIter.hasNext())
+ throw new ValidationException(String.format("Model iterator is exhausted, could not verify %d lts for the row: \n%s %s",
+ rowLts, row, query));
+
+ while (modelLtsIter.hasNext())
+ {
+ long modelLts = modelLtsIter.nextLong();
+ if (modelLts > rowLts)
+ continue;
+ if (modelLts < rowLts)
+ throw new RuntimeException("Can't find a corresponding event id in the model for: " + rowLts + " " + modelLts);
+ for (int col = 0; col < row.lts.length; col++)
+ {
+ if (row.lts[col] != rowLts)
+ continue;
+
+ long m = descriptorSelector.modificationId(pd, row.cd, rowLts, row.vds[col], col);
+ long vd = descriptorSelector.vd(pd, row.cd, rowLts, m, col);
+
+ if (vd != row.vds[col])
+ throw new RuntimeException("Can't verify the row");
+ }
+ continue outer;
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/harry-core/src/harry/model/VisibleRowsChecker.java b/harry-core/src/harry/model/VisibleRowsChecker.java
new file mode 100644
index 0000000..ddda8b8
--- /dev/null
+++ b/harry-core/src/harry/model/VisibleRowsChecker.java
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.model;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.function.Supplier;
+
+import com.google.common.collect.Iterators;
+import com.google.common.collect.PeekingIterator;
+
+import harry.core.Configuration;
+import harry.data.ResultSetRow;
+import harry.ddl.SchemaSpec;
+import harry.model.sut.SystemUnderTest;
+import harry.runner.Query;
+import harry.runner.QuerySelector;
+
+/**
+ * A simple model to check whether or not the rows reported as visible by the database are reflected in
+ * the model.
+ */
+public class VisibleRowsChecker implements Model
+{
+ protected final Map<Long, TreeMap<Long, Event>> eventLog;
+ protected final OpSelectors.DescriptorSelector descriptorSelector;
+ protected final OpSelectors.PdSelector pdSelector;
+ protected final OpSelectors.MonotonicClock clock;
+ protected final SystemUnderTest sut;
+ protected final AtomicLong maxLts;
+ protected final SchemaSpec schema;
+
+ public VisibleRowsChecker(SchemaSpec schema,
+ OpSelectors.PdSelector pdSelector,
+ OpSelectors.DescriptorSelector descriptorSelector,
+ OpSelectors.MonotonicClock clock,
+ QuerySelector querySelector,
+ SystemUnderTest sut)
+ {
+ this.pdSelector = pdSelector;
+ this.descriptorSelector = descriptorSelector;
+ this.eventLog = new HashMap<>();
+ this.maxLts = new AtomicLong();
+ this.schema = schema;
+ this.clock = clock;
+ this.sut = sut;
+ }
+
+
+ public synchronized void recordEvent(long lts, boolean quorumAchieved)
+ {
+ maxLts.updateAndGet((old) -> Math.max(old, lts));
+ long pd = pdSelector.pd(lts);
+
+ // TODO: This is definitely not optimal, but we probably use a better, potentially off-heap sorted structure for that anyways
+ TreeMap<Long, Event> events = eventLog.get(pd);
+ if (events == null)
+ {
+ events = new TreeMap<>();
+ eventLog.put(pd, events);
+ }
+
+ Event event = events.get(lts);
+ assert event == null || !event.quorumAchieved : "Operation should be partially visible before it is fully visible";
+ events.put(lts, new Event(lts, quorumAchieved));
+ }
+
+
+ public void validatePartitionState(long validationLts, Query query)
+ {
+ validatePartitionState(validationLts,
+ query,
+ () -> SelectHelper.execute(sut, clock, query));
+ }
+
+ public Configuration.ModelConfiguration toConfig()
+ {
+ throw new RuntimeException("not implemented");
+ }
+
+ synchronized void validatePartitionState(long validationLts, Query query, Supplier<List<ResultSetRow>> rowsSupplier)
+ {
+ // TODO: Right now, we ignore Query here!
+ long pd = pdSelector.pd(validationLts, schema);
+ List<ResultSetRow> rows = rowsSupplier.get();
+ TreeMap<Long, Event> events = eventLog.get(pd);
+ if (!rows.isEmpty() && (events == null || events.isEmpty()))
+ {
+ throw new ValidationException(String.format("Returned rows are not empty, but there were no records in the event log.\nRows: %s\nSeen pds: %s",
+ rows, eventLog.keySet()));
+ }
+
+ for (ResultSetRow row : rows)
+ {
+ LongIterator rowLtsIter = descendingIterator(row.lts);
+ PeekingIterator<Event> modelLtsIter = Iterators.peekingIterator(events.subMap(0L, true, maxLts.get(), true)
+ .descendingMap()
+ .values()
+ .iterator());
+ outer:
+ while (rowLtsIter.hasNext())
+ {
+ long rowLts = rowLtsIter.nextLong();
+
+ if (rowLts == NO_TIMESTAMP)
+ continue;
+
+ if (!modelLtsIter.hasNext())
+ throw new ValidationException(String.format("Model iterator is exhausted, could not verify %d lts for the row: \n%s %s",
+ rowLts, row, query));
+
+ while (modelLtsIter.hasNext())
+ {
+ Event event = modelLtsIter.next();
+ if (event.lts > rowLts)
+ continue;
+ if (event.lts < rowLts)
+ throw new RuntimeException("Can't find a corresponding event id in the model for: " + rowLts + " " + event);
+ for (int col = 0; col < row.lts.length; col++)
+ {
+ if (row.lts[col] != rowLts)
+ continue;
+ long m = descriptorSelector.modificationId(pd, row.cd, rowLts, row.vds[col], col);
+ long vd = descriptorSelector.vd(pd, row.cd, rowLts, m, col);
+ if (vd != row.vds[col])
+ throw new RuntimeException("Can't verify the row");
+ }
+ continue outer;
+ }
+ }
+ }
+ }
+
+ public interface LongIterator extends Iterator<Long>
+ {
+ long nextLong();
+ }
+
+
+ public static LongIterator descendingIterator(OpSelectors.PdSelector pdSelector, long verificationLts)
+ {
+ return new VisibleRowsChecker.LongIterator()
+ {
+ long next = pdSelector.maxLts(verificationLts);
+
+ public long nextLong()
+ {
+ long ret = next;
+ next = pdSelector.prevLts(next);
+ return ret;
+ }
+
+ public boolean hasNext()
+ {
+ return next >= 0;
+ }
+
+ public Long next()
+ {
+ return null;
+ }
+ };
+ }
+
+ public static LongIterator descendingIterator(long[] ltss)
+ {
+ long[] sorted = Arrays.copyOf(ltss, ltss.length);
+ Arrays.sort(sorted);
+
+ return new LongIterator()
+ {
+ private int lastUniqueIdx = -1;
+
+ public long nextLong()
+ {
+ if (lastUniqueIdx == -1)
+ throw new RuntimeException("No elements left or hasNext hasn't been called");
+ return sorted[lastUniqueIdx];
+ }
+
+ public boolean hasNext()
+ {
+ if (lastUniqueIdx == -1 && sorted.length > 0)
+ {
+ lastUniqueIdx = ltss.length - 1;
+ return true;
+ }
+
+ long lastUnique = sorted[lastUniqueIdx];
+ while (lastUniqueIdx >= 0)
+ {
+ if (sorted[lastUniqueIdx] != lastUnique)
+ return true;
+ lastUniqueIdx--;
+ }
+
+ lastUniqueIdx = -1;
+ return false;
+ }
+
+ public Long next()
+ {
+ return nextLong();
+ }
+ };
+ }
+
+ protected static class Event
+ {
+ final long lts;
+ volatile boolean quorumAchieved;
+
+ public Event(long lts, boolean quorumAchieved)
+ {
+ this.lts = lts;
+ this.quorumAchieved = quorumAchieved;
+ }
+
+ public String toString()
+ {
+ return "Event{" +
+ "lts=" + lts +
+ ", quorumAchieved=" + quorumAchieved +
+ '}';
+ }
+ }
+}
\ No newline at end of file
diff --git a/harry-core/src/harry/model/clock/ApproximateMonotonicClock.java b/harry-core/src/harry/model/clock/ApproximateMonotonicClock.java
new file mode 100644
index 0000000..59dd47e
--- /dev/null
+++ b/harry-core/src/harry/model/clock/ApproximateMonotonicClock.java
@@ -0,0 +1,280 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.model.clock;
+
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.atomic.AtomicLongArray;
+import java.util.concurrent.locks.LockSupport;
+
+import com.google.common.annotations.VisibleForTesting;
+
+import harry.core.Configuration;
+import harry.model.OpSelectors;
+
+/**
+ * Monotonic clock, that guarantees that any LTS can be converted to a unique RTS, given
+ * the number of LTS does not exceed the number of RTS (e.g., we do not draw LTS more frequently
+ * than once per microsecond).
+ * <p>
+ * This conversion works as follows:
+ * <p>
+ * * every `period`, we record the current timestamp and maximum seen LTS, and keep history of up to
+ * `historySize` LTS/timestamp combinations
+ * * when queried to retrieve the LTS, we find a timestamp, relative to which we can calculate RTS.
+ * After that, we calculate a difference between the largest LTS that is still smaller than the converted
+ * one, and add this difference to the timestamp.
+ * <p>
+ * This way, later LTS can only be mapped to later RTS, and any LTS that was drawn previously, will be
+ * uniquely mapped relative to some timestamp, with the order matching the LTS order.
+ */
+// TODO: shut down
+public class ApproximateMonotonicClock implements OpSelectors.MonotonicClock
+{
+ private static long START_VALUE = 0;
+ private static long DEFUNCT = Long.MIN_VALUE;
+ private static long REBASE_IN_PROGRESS = Long.MIN_VALUE + 1;
+
+ // TODO: there's a theoretical possibility of a bug; when we have several consecutive epochs without
+ // change in LTS, current implementation will return the latest epoch instead of the earliest one.
+ // This is not a big deal in terms of monotonicity but can cause some problems when validating TTL.
+ // The simples fix would be to find the smallest matching epoch.
+ private final ScheduledExecutorService executor;
+ private final int historySize;
+ private final AtomicLongArray ltsHistory;
+ private final long startTimeMicros;
+ private volatile int idx;
+ private final AtomicLong lts;
+
+ private final long periodMicros;
+
+ private final long epoch;
+ private final TimeUnit epochTimeUnit;
+
+ public ApproximateMonotonicClock(long period, TimeUnit timeUnit)
+ {
+ this(10000, period, timeUnit);
+ }
+
+ public ApproximateMonotonicClock(int historySize, long epoch, TimeUnit epochTimeUnit)
+ {
+ this(TimeUnit.MILLISECONDS.toMicros(System.currentTimeMillis()),
+ historySize, new AtomicLongArray(historySize), START_VALUE, 0, epoch, epochTimeUnit);
+ rebase();
+ }
+
+ ApproximateMonotonicClock(long startTimeMicros,
+ int historySize,
+ AtomicLongArray history,
+ long lts,
+ int idx,
+ long epoch,
+ TimeUnit epochTimeUnit)
+ {
+ this.startTimeMicros = startTimeMicros;
+ this.historySize = historySize;
+ this.ltsHistory = history;
+ this.lts = new AtomicLong(lts);
+ this.idx = idx;
+ this.periodMicros = epochTimeUnit.toMicros(epoch);
+ this.executor = Executors.newSingleThreadScheduledExecutor(r -> {
+ Thread t = new Thread(r);
+ t.setName("ApproximateMonotonicClock-ScheduledTasks");
+ t.setDaemon(true);
+ return t;
+ });
+ this.executor.scheduleAtFixedRate(this::rebase, epoch, epoch, epochTimeUnit);
+ this.epoch = epoch;
+ this.epochTimeUnit = epochTimeUnit;
+ }
+
+ @VisibleForTesting
+ public static ApproximateMonotonicClock forDebug(long startTimeMicros, int historySize, long lts, int idx, long period, TimeUnit timeUnit, long... values)
+ {
+ AtomicLongArray history = new AtomicLongArray(historySize);
+ for (int i = 0; i < values.length; i++)
+ history.set(i, values[i]);
+
+ assert values.length == idx; // sanity check
+ return new ApproximateMonotonicClock(startTimeMicros, historySize, history, lts, idx, period, timeUnit);
+ }
+
+ public long get(long idx)
+ {
+ return ltsHistory.get((int) (idx % historySize));
+ }
+
+ private void rebase()
+ {
+ int arrayIdx = idx % historySize;
+ long rebaseLts = lts.get();
+ if (rebaseLts == DEFUNCT)
+ throw new IllegalStateException();
+
+ while (!lts.compareAndSet(rebaseLts, REBASE_IN_PROGRESS))
+ rebaseLts = lts.get();
+
+ ltsHistory.set(arrayIdx, rebaseLts == START_VALUE ? START_VALUE : (rebaseLts + 1));
+
+ // If we happen to exhaust counter, we just need to make operations "wider".
+ // It is unsafe to proceed, so we defunct the clock.
+ //
+ // We could make a clock implementation that would sleep on `get`, but it will
+ // be more expensive, since we'll have to check for overflow each time before
+ // returning anything.
+ if (idx > 1 && get(idx) - get(idx - 1) > periodMicros)
+ {
+ lts.set(DEFUNCT);
+ executor.shutdown();
+ throwCounterExhaustedException();
+ }
+
+ idx = idx + 1;
+ if (!lts.compareAndSet(REBASE_IN_PROGRESS, rebaseLts))
+ throw new IllegalStateException("No thread should have changed LTS during rebase. " + lts.get());
+ }
+
+ public long nextLts()
+ {
+ long current = lts.get();
+ while (true)
+ {
+ if (current >= 0)
+ {
+ if (lts.compareAndSet(current, current + 1))
+ return current;
+
+ current = lts.get();
+ continue;
+ }
+
+ if (current == REBASE_IN_PROGRESS)
+ {
+ LockSupport.parkNanos(1);
+ current = lts.get();
+ continue;
+ }
+
+ if (current == DEFUNCT)
+ throwCounterExhaustedException();
+
+ throw new IllegalStateException("This should have been unreachable: " + current);
+ }
+ }
+
+ public long maxLts()
+ {
+ while (true)
+ {
+ long ret = lts.get();
+
+ if (ret == REBASE_IN_PROGRESS)
+ {
+ LockSupport.parkNanos(1);
+ continue;
+ }
+
+ if (ret == DEFUNCT)
+ throwCounterExhaustedException();
+
+ return ret;
+ }
+ }
+
+ public Configuration.ClockConfiguration toConfig()
+ {
+ int idx = this.idx;
+ long[] history = new long[Math.min(idx, historySize)];
+ for (int i = 0; i < history.length; i++)
+ history[i] = ltsHistory.get(i);
+ return new Configuration.DebugApproximateMonotonicClockConfiguration(startTimeMicros,
+ ltsHistory.length(),
+ history,
+ lts.get(),
+ idx,
+ epoch,
+ epochTimeUnit);
+ }
+
+ public long lts(final long rts)
+ {
+ final int historyIdx = idx - 1;
+ for (int i = 0; i < historySize - 1 && historyIdx - i >= 0; i++)
+ {
+ long periodStartRts = startTimeMicros + periodMicros * (historyIdx - i);
+ if (rts >= periodStartRts)
+ {
+ long periodStartLts = get(historyIdx - i);
+ return periodStartLts + rts - periodStartRts;
+ }
+ }
+ throw new IllegalStateException("RTS is too old to convert to LTS: " + rts + "\n " + ltsHistory);
+ }
+
+ // TODO: binary search instead
+ public long rts(final long lts)
+ {
+ assert lts <= maxLts() : String.format("Queried for LTS we haven't yet issued %d. Max is %d.", lts, maxLts());
+
+ final int historyIdx = idx - 1;
+ for (int i = 0; i < historySize - 1 && historyIdx - i >= 0; i++)
+ {
+ long periodStartLts = get(historyIdx - i);
+ if (lts >= periodStartLts)
+ {
+ long periodStartRts = startTimeMicros + periodMicros * (historyIdx - i);
+ return periodStartRts + lts - periodStartLts;
+ }
+ }
+
+ throw new IllegalStateException("LTS is too old to convert to RTS: " + lts + "\n " + dumpHistory());
+ }
+
+ private String dumpHistory()
+ {
+ String s = "";
+ int idx = this.idx;
+ for (int i = 0; i < Math.min(idx, historySize); i++)
+ {
+ s += ltsHistory.get(i) + ",";
+ }
+ return s.substring(0, Math.max(0, s.length() - 1));
+ }
+
+ public String toString()
+ {
+ return String.format("withDebugClock(%dL,\n\t%d,\n\t%d,\n\t%d,\n\t%d,\n\t%s,\n\t%s)",
+ startTimeMicros,
+ historySize,
+ lts.get(),
+ idx,
+ epoch,
+ epochTimeUnit,
+ dumpHistory());
+ }
+
+ private void throwCounterExhaustedException()
+ {
+ long diff = get(idx) - get(idx - 1);
+ throw new RuntimeException(String.format("Counter was exhausted. Drawn %d out of %d lts during the period.",
+ diff, periodMicros));
+ }
+}
diff --git a/harry-core/src/harry/model/clock/OffsetClock.java b/harry-core/src/harry/model/clock/OffsetClock.java
new file mode 100644
index 0000000..9f40a64
--- /dev/null
+++ b/harry-core/src/harry/model/clock/OffsetClock.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.model.clock;
+
+import java.util.concurrent.atomic.AtomicLong;
+
+import harry.core.Configuration;
+import harry.model.OpSelectors;
+
+public class OffsetClock implements OpSelectors.MonotonicClock
+{
+ final AtomicLong lts = new AtomicLong(0);
+
+ private final long base;
+
+ public OffsetClock(long base)
+ {
+ this.base = base;
+ }
+
+ public long rts(long lts)
+ {
+ return base + lts;
+ }
+
+ public long lts(long rts)
+ {
+ return rts - base;
+ }
+
+ public long nextLts()
+ {
+ return lts.getAndIncrement();
+ }
+
+ public long maxLts()
+ {
+ return lts.get();
+ }
+
+ public Configuration.ClockConfiguration toConfig()
+ {
+ throw new RuntimeException("not implemented");
+ }
+}
diff --git a/harry-core/src/harry/model/sut/NoOpSut.java b/harry-core/src/harry/model/sut/NoOpSut.java
new file mode 100644
index 0000000..c09490f
--- /dev/null
+++ b/harry-core/src/harry/model/sut/NoOpSut.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.model.sut;
+
+import java.util.concurrent.CompletableFuture;
+
+public class NoOpSut implements SystemUnderTest
+{
+ public boolean isShutdown()
+ {
+ return false;
+ }
+
+ public void shutdown()
+ {
+ }
+
+ public Object[][] execute(String statement, Object... bindings)
+ {
+ return new Object[0][];
+ }
+
+ public CompletableFuture<Object[][]> executeAsync(String statement, Object... bindings)
+ {
+ return CompletableFuture.supplyAsync(() -> execute(statement, bindings),
+ Runnable::run);
+ }
+}
diff --git a/harry-core/src/harry/model/sut/PrintlnSut.java b/harry-core/src/harry/model/sut/PrintlnSut.java
new file mode 100644
index 0000000..463d780
--- /dev/null
+++ b/harry-core/src/harry/model/sut/PrintlnSut.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.model.sut;
+
+import java.util.Arrays;
+import java.util.concurrent.CompletableFuture;
+
+public class PrintlnSut implements SystemUnderTest
+{
+ public boolean isShutdown()
+ {
+ return false;
+ }
+
+ public void shutdown()
+ {
+ }
+
+ public Object[][] execute(String statement, Object... bindings)
+ {
+ System.out.println(String.format("%s | %s",
+ statement,
+ Arrays.toString(bindings)));
+ return new Object[0][];
+ }
+
+ public CompletableFuture<Object[][]> executeAsync(String statement, Object... bindings)
+ {
+ return CompletableFuture.supplyAsync(() -> execute(statement, bindings),
+ Runnable::run);
+ }
+}
diff --git a/harry-core/src/harry/model/sut/SystemUnderTest.java b/harry-core/src/harry/model/sut/SystemUnderTest.java
new file mode 100644
index 0000000..9e5e0a9
--- /dev/null
+++ b/harry-core/src/harry/model/sut/SystemUnderTest.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.model.sut;
+
+import java.util.concurrent.CompletableFuture;
+
+import harry.operations.CompiledStatement;
+
+public interface SystemUnderTest
+{
+ public interface SUTFactory
+ {
+ public SystemUnderTest make();
+ }
+
+ public boolean isShutdown();
+
+ public void shutdown();
+
+ default void schemaChange(String statement)
+ {
+ execute(statement, new Object[]{});
+ }
+
+ default Object[][] execute(CompiledStatement statement)
+ {
+ return execute(statement.cql(), statement.bindings());
+ }
+
+ Object[][] execute(String statement, Object... bindings);
+
+ CompletableFuture<Object[][]> executeAsync(String statement, Object... bindings);
+
+ interface SystemUnderTestFactory
+ {
+ SystemUnderTest create();
+ }
+}
\ No newline at end of file
diff --git a/harry-core/src/harry/operations/CompiledStatement.java b/harry-core/src/harry/operations/CompiledStatement.java
new file mode 100644
index 0000000..21261f0
--- /dev/null
+++ b/harry-core/src/harry/operations/CompiledStatement.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.operations;
+
+import java.util.Arrays;
+
+public class CompiledStatement
+{
+ private final String cql;
+ private final Object[] bindings;
+
+ public CompiledStatement(String cql, Object... bindings)
+ {
+ this.cql = cql;
+ this.bindings = bindings;
+ }
+
+ public String cql()
+ {
+ return cql;
+ }
+
+ public Object[] bindings()
+ {
+ return bindings;
+ }
+
+ public static CompiledStatement create(String cql, Object... bindings)
+ {
+ return new CompiledStatement(cql, bindings);
+ }
+
+ public String toString()
+ {
+ return "CompiledStatement{" +
+ "cql='" + cql + '\'' +
+ ", bindings=" + Arrays.toString(bindings) +
+ '}';
+ }
+}
diff --git a/harry-core/src/harry/operations/DeleteHelper.java b/harry-core/src/harry/operations/DeleteHelper.java
new file mode 100644
index 0000000..41a5d43
--- /dev/null
+++ b/harry-core/src/harry/operations/DeleteHelper.java
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.operations;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.function.IntConsumer;
+
+import com.datastax.driver.core.querybuilder.Delete;
+import com.datastax.driver.core.querybuilder.QueryBuilder;
+import harry.ddl.ColumnSpec;
+import harry.ddl.SchemaSpec;
+import harry.util.BitSet;
+
+public class DeleteHelper
+{
+ public static CompiledStatement deleteColumn(SchemaSpec schema,
+ long pd,
+ long cd,
+ BitSet columnsToDelete,
+ long rts)
+ {
+ if (columnsToDelete == null || columnsToDelete.allUnset())
+ throw new IllegalArgumentException("Can't have a delete column query with no columns set. Column mask: " + columnsToDelete);
+
+ return delete(schema, pd, cd, columnsToDelete, rts);
+ }
+
+ public static CompiledStatement deleteRow(SchemaSpec schema,
+ long pd,
+ long cd,
+ long rts)
+ {
+ return delete(schema, pd, cd, null, rts);
+ }
+
+ public static CompiledStatement delete(SchemaSpec schema,
+ long pd,
+ List<Relation> relations,
+ BitSet columnsToDelete,
+ long rts)
+ {
+ return compile(schema,
+ pd,
+ relations,
+ columnsToDelete,
+ rts);
+ }
+
+ private static CompiledStatement delete(SchemaSpec schema,
+ long pd,
+ long cd,
+ BitSet mask,
+ long rts)
+ {
+ return compile(schema,
+ pd,
+ Relation.eqRelations(schema.ckGenerator.slice(cd),
+ schema.clusteringKeys),
+ mask,
+ rts);
+ }
+
+ private static CompiledStatement compile(SchemaSpec schema,
+ long pd,
+ List<Relation> relations,
+ BitSet columnsToDelete,
+ long ts)
+ {
+ Delete delete;
+ if (columnsToDelete == null)
+ delete = QueryBuilder.delete().from(schema.keyspace, schema.table);
+ else
+ delete = QueryBuilder.delete(columnNames(schema.regularColumns, columnsToDelete))
+ .from(schema.keyspace, schema.table);
+
+ Delete.Where where = delete.where();
+ List<Object> bindings = new ArrayList<>();
+
+ addRelations(schema, where, bindings, pd, relations);
+ delete.using(QueryBuilder.timestamp(ts));
+
+ Object[] bindingsArr = bindings.toArray(new Object[bindings.size()]);
+ return new CompiledStatement(delete.toString(), bindingsArr);
+ }
+
+ private static void addRelations(SchemaSpec schema, Delete.Where where, List<Object> bindings, long pd, List<Relation> relations)
+ {
+ schema.inflateRelations(pd,
+ relations,
+ (spec, kind, value) -> {
+ where.and(kind.getClause(spec));
+ bindings.add(value);
+ });
+ }
+
+ private static String[] columnNames(List<ColumnSpec<?>> columns, BitSet mask)
+ {
+ String[] columnNames = new String[mask.setCount()];
+ mask.eachSetBit(new IntConsumer()
+ {
+ int i = 0;
+
+ public void accept(int idx)
+ {
+ columnNames[i++] = columns.get(idx).name;
+ }
+ });
+ return columnNames;
+ }
+}
diff --git a/harry-core/src/harry/operations/Relation.java b/harry-core/src/harry/operations/Relation.java
new file mode 100644
index 0000000..1218837
--- /dev/null
+++ b/harry-core/src/harry/operations/Relation.java
@@ -0,0 +1,350 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.operations;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import com.datastax.driver.core.querybuilder.Clause;
+import harry.ddl.ColumnSpec;
+import harry.model.ExhaustiveChecker;
+
+import static com.datastax.driver.core.querybuilder.QueryBuilder.bindMarker;
+import static com.datastax.driver.core.querybuilder.QueryBuilder.eq;
+import static com.datastax.driver.core.querybuilder.QueryBuilder.gt;
+import static com.datastax.driver.core.querybuilder.QueryBuilder.gte;
+import static com.datastax.driver.core.querybuilder.QueryBuilder.lt;
+import static com.datastax.driver.core.querybuilder.QueryBuilder.lte;
+
+public class Relation
+{
+ public final RelationKind kind;
+ public final ColumnSpec<?> columnSpec;
+ // Theoretically, in model, we'll just be able to compare stuff according to relation, and pass it to DB
+ public long descriptor;
+
+ Relation(RelationKind kind,
+ ColumnSpec<?> columnSpec,
+ long descriptor)
+ {
+ this.kind = kind;
+ this.columnSpec = columnSpec;
+ this.descriptor = descriptor;
+ }
+
+ public boolean match(long l)
+ {
+ return kind.match(columnSpec.type.generator()::compare, l, descriptor);
+ }
+
+ public Object value()
+ {
+ return columnSpec.inflate(descriptor);
+ }
+
+ public String column()
+ {
+ return columnSpec.name;
+ }
+
+ public Clause toClause()
+ {
+ return kind.getClause(column(), bindMarker());
+ }
+
+ public String toString()
+ {
+ return "Relation{" +
+ "kind=" + kind +
+ ", columnSpec=" + columnSpec +
+ ", descriptor=" + descriptor + " (" + Long.toHexString(descriptor) + ")" +
+ '}';
+ }
+
+ public static Relation relation(RelationKind kind, ColumnSpec<?> columnSpec, long descriptor)
+ {
+ return new Relation(kind, columnSpec, descriptor);
+ }
+
+ public static Relation eqRelation(ColumnSpec<?> columnSpec, long descriptor)
+ {
+ return new Relation(RelationKind.EQ, columnSpec, descriptor);
+ }
+
+ public static List<Relation> eqRelations(long[] key, List<ColumnSpec<?>> columnSpecs)
+ {
+ List<Relation> relations = new ArrayList<>(key.length);
+ addEqRelation(key, columnSpecs, relations);
+ return relations;
+ }
+
+ public static void addEqRelation(long[] key, List<ColumnSpec<?>> columnSpecs, List<Relation> relations)
+ {
+ addRelation(key, columnSpecs, relations, RelationKind.EQ);
+ }
+
+ public static void addRelation(long[] key, List<ColumnSpec<?>> columnSpecs, List<Relation> relations, RelationKind kind)
+ {
+ assert key.length == columnSpecs.size() :
+ String.format("Key size (%d) should equal to column spec size (%d)", key.length, columnSpecs.size());
+ for (int i = 0; i < key.length; i++)
+ {
+ ColumnSpec<?> spec = columnSpecs.get(i);
+ relations.add(relation(kind, spec, key[i]));
+ }
+ }
+
+ public enum RelationKind
+ {
+ LT
+ {
+ @Override
+ public Clause getClause(String name, Object obj)
+ {
+ return lt(name, obj);
+ }
+
+ public Clause getClause(List<String> name, List<Object> obj)
+ {
+ return lt(name, obj);
+ }
+
+ public boolean isNegatable()
+ {
+ return true;
+ }
+
+ public boolean isInclusive()
+ {
+ return false;
+ }
+
+ public RelationKind negate()
+ {
+ return GT;
+ }
+
+ public long nextMatch(long n)
+ {
+ return Math.subtractExact(n, 1);
+ }
+
+ public String toString()
+ {
+ return "<";
+ }
+
+ public boolean match(ExhaustiveChecker.LongComparator comparator, long l, long r)
+ {
+ return comparator.compare(l, r) < 0;
+ }
+ },
+ GT
+ {
+ @Override
+ public Clause getClause(String name, Object obj)
+ {
+ return gt(name, obj);
+ }
+
+ public Clause getClause(List<String> name, List<Object> obj)
+ {
+ return gt(name, obj);
+ }
+
+ public boolean isNegatable()
+ {
+ return true;
+ }
+
+ public boolean isInclusive()
+ {
+ return false;
+ }
+
+ public RelationKind negate()
+ {
+ return LT;
+ }
+
+ public String toString()
+ {
+ return ">";
+ }
+
+ public boolean match(ExhaustiveChecker.LongComparator comparator, long l, long r)
+ {
+ return comparator.compare(l, r) > 0;
+ }
+
+ public long nextMatch(long n)
+ {
+ return Math.addExact(n, 1);
+ }
+ },
+ LTE
+ {
+ @Override
+ public Clause getClause(String name, Object obj)
+ {
+ return lte(name, obj);
+ }
+
+ public Clause getClause(List<String> name, List<Object> obj)
+ {
+ return lt(name, obj);
+ }
+
+ public boolean isNegatable()
+ {
+ return true;
+ }
+
+ public boolean isInclusive()
+ {
+ return true;
+ }
+
+ public RelationKind negate()
+ {
+ return GTE;
+ }
+
+ public String toString()
+ {
+ return "<=";
+ }
+
+ public boolean match(ExhaustiveChecker.LongComparator comparator, long l, long r)
+ {
+ return comparator.compare(l, r) <= 0;
+ }
+
+ public long nextMatch(long n)
+ {
+ return Math.subtractExact(n, 1);
+ }
+ },
+ GTE
+ {
+ @Override
+ public Clause getClause(String name, Object obj)
+ {
+ return gte(name, obj);
+ }
+
+ public Clause getClause(List<String> name, List<Object> obj)
+ {
+ return gte(name, obj);
+ }
+
+ public boolean isNegatable()
+ {
+ return true;
+ }
+
+ public boolean isInclusive()
+ {
+ return true;
+ }
+
+ public RelationKind negate()
+ {
+ return LTE;
+ }
+
+ public String toString()
+ {
+ return ">=";
+ }
+
+ public boolean match(ExhaustiveChecker.LongComparator comparator, long l, long r)
+ {
+ return comparator.compare(l, r) >= 0;
+ }
+
+ public long nextMatch(long n)
+ {
+ return Math.addExact(n, 1);
+ }
+ },
+ EQ
+ {
+ @Override
+ public Clause getClause(String name, Object obj)
+ {
+ return eq(name, obj);
+ }
+
+ public Clause getClause(List<String> name, List<Object> obj)
+ {
+ return eq(name, obj);
+ }
+
+ public boolean isNegatable()
+ {
+ return false;
+ }
+
+ public boolean isInclusive()
+ {
+ return true;
+ }
+
+ public RelationKind negate()
+ {
+ throw new IllegalArgumentException("Cannot negate EQ");
+ }
+
+ public long nextMatch(long n)
+ {
+ return n;
+ }
+
+ public String toString()
+ {
+ return "=";
+ }
+
+ public boolean match(ExhaustiveChecker.LongComparator comparator, long l, long r)
+ {
+ return comparator.compare(l, r) == 0;
+ }
+ };
+
+ public abstract boolean match(ExhaustiveChecker.LongComparator comparator, long l, long r);
+
+ public abstract Clause getClause(String name, Object obj);
+
+ public Clause getClause(ColumnSpec<?> spec)
+ {
+ return getClause(spec.name, bindMarker());
+ }
+
+ public abstract Clause getClause(List<String> name, List<Object> obj);
+
+ public abstract boolean isNegatable();
+
+ public abstract boolean isInclusive();
+
+ public abstract RelationKind negate();
+
+ public abstract long nextMatch(long n);
+ }
+}
diff --git a/harry-core/src/harry/operations/WriteHelper.java b/harry-core/src/harry/operations/WriteHelper.java
new file mode 100644
index 0000000..c1f9829
--- /dev/null
+++ b/harry-core/src/harry/operations/WriteHelper.java
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.operations;
+
+import java.util.List;
+
+import harry.ddl.ColumnSpec;
+import harry.ddl.SchemaSpec;
+import harry.generators.DataGenerators;
+
+import static com.datastax.driver.core.querybuilder.QueryBuilder.bindMarker;
+import static com.datastax.driver.core.querybuilder.QueryBuilder.eq;
+import static com.datastax.driver.core.querybuilder.QueryBuilder.insertInto;
+import static com.datastax.driver.core.querybuilder.QueryBuilder.set;
+import static com.datastax.driver.core.querybuilder.QueryBuilder.timestamp;
+import static com.datastax.driver.core.querybuilder.QueryBuilder.update;
+
+public class WriteHelper
+{
+ public static CompiledStatement inflateInsert(SchemaSpec schema,
+ long pd,
+ long cd,
+ long[] vds,
+ long timestamp)
+ {
+ Object[] partitionKey = schema.inflatePartitionKey(pd);
+ Object[] clusteringKey = schema.inflateClusteringKey(cd);
+ Object[] regularColumns = schema.inflateRegularColumns(vds);
+
+ Object[] bindings = new Object[schema.allColumns.size()];
+ int bindingsCount = 0;
+ com.datastax.driver.core.querybuilder.Insert insert = insertInto(schema.keyspace,
+ schema.table);
+
+ bindingsCount += addValue(insert, bindings, schema.partitionKeys, partitionKey, bindingsCount);
+ bindingsCount += addValue(insert, bindings, schema.clusteringKeys, clusteringKey, bindingsCount);
+ bindingsCount += addValue(insert, bindings, schema.regularColumns, regularColumns, bindingsCount);
+
+ insert.using(timestamp(timestamp));
+
+ // Some of the values were unset
+ if (bindingsCount != bindings.length)
+ {
+ Object[] tmp = new Object[bindingsCount];
+ System.arraycopy(bindings, 0, tmp, 0, bindingsCount);
+ bindings = tmp;
+ }
+ return CompiledStatement.create(insert.toString(), bindings);
+ }
+
+ private static int addValue(com.datastax.driver.core.querybuilder.Insert insert,
+ Object[] bindings,
+ List<ColumnSpec<?>> columns,
+ Object[] data,
+ int bound)
+ {
+ assert data.length == columns.size();
+
+ int bindingsCount = 0;
+ for (int i = 0; i < data.length; i++)
+ {
+ if (data[i] == DataGenerators.UNSET_VALUE)
+ continue;
+
+ insert.value(columns.get(i).name, bindMarker());
+ bindings[bound + bindingsCount] = data[i];
+ bindingsCount++;
+ }
+
+ return bindingsCount;
+ }
+
+ public static CompiledStatement inflateUpdate(SchemaSpec schema,
+ long pd,
+ long cd,
+ long[] vds,
+ long timestamp)
+ {
+ Object[] partitionKey = schema.inflatePartitionKey(pd);
+ Object[] clusteringKey = schema.inflateClusteringKey(cd);
+ Object[] regularColumns = schema.inflateRegularColumns(vds);
+
+ Object[] bindings = new Object[schema.allColumns.size()];
+ int bindingsCount = 0;
+ com.datastax.driver.core.querybuilder.Update update = update(schema.keyspace,
+ schema.table);
+
+ bindingsCount += addWith(update, bindings, schema.regularColumns, regularColumns, bindingsCount);
+ bindingsCount += addWhere(update, bindings, schema.partitionKeys, partitionKey, bindingsCount);
+ bindingsCount += addWhere(update, bindings, schema.clusteringKeys, clusteringKey, bindingsCount);
+
+ update.using(timestamp(timestamp));
+ // TODO: TTL
+ // ttl.ifPresent(ts -> update.using(ttl(ts)));
+
+ return CompiledStatement.create(update.toString(), bindings);
+ }
+
+ private static int addWith(com.datastax.driver.core.querybuilder.Update update,
+ Object[] bindings,
+ List<ColumnSpec<?>> columns,
+ Object[] data,
+ int bound)
+ {
+ assert data.length == columns.size();
+
+ for (int i = 0; i < data.length; i++)
+ {
+ update.with(set(columns.get(i).name, bindMarker()));
+ bindings[bound + i] = data[i];
+ }
+
+ return data.length;
+ }
+
+ private static int addWhere(com.datastax.driver.core.querybuilder.Update update,
+ Object[] bindings,
+ List<ColumnSpec<?>> columns,
+ Object[] data,
+ int bound)
+ {
+ assert data.length == columns.size();
+
+ for (int i = 0; i < data.length; i++)
+ {
+ update.where().and(eq(columns.get(i).name, bindMarker()));
+ bindings[bound + i] = data[i];
+ }
+
+ return data.length;
+ }
+}
\ No newline at end of file
diff --git a/harry-core/src/harry/reconciler/Reconciler.java b/harry-core/src/harry/reconciler/Reconciler.java
new file mode 100644
index 0000000..061cec3
--- /dev/null
+++ b/harry-core/src/harry/reconciler/Reconciler.java
@@ -0,0 +1,237 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.reconciler;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NavigableMap;
+import java.util.TreeMap;
+
+import harry.ddl.SchemaSpec;
+import harry.model.OpSelectors;
+import harry.runner.AbstractPartitionVisitor;
+import harry.runner.PartitionVisitor;
+import harry.runner.Query;
+import harry.runner.QuerySelector;
+import harry.util.BitSet;
+import harry.util.Ranges;
+
+import static harry.generators.DataGenerators.NIL_DESCR;
+import static harry.generators.DataGenerators.UNSET_DESCR;
+import static harry.model.Model.NO_TIMESTAMP;
+
+/**
+ * A simple Cassandra-style reconciler for operations against model state.
+ * <p>
+ * It is useful both as a testing/debugging tool (to avoid starting Cassandra
+ * cluster to get a result set), and as a quiescent model checker.
+ */
+public class Reconciler
+{
+ private final OpSelectors.DescriptorSelector descriptorSelector;
+ private final OpSelectors.PdSelector pdSelector;
+ private final QuerySelector querySelector;
+ private final SchemaSpec schema;
+
+ public Reconciler(SchemaSpec schema,
+ OpSelectors.PdSelector pdSelector,
+ OpSelectors.DescriptorSelector descriptorSelector,
+ QuerySelector querySelector)
+ {
+ this.descriptorSelector = descriptorSelector;
+ this.pdSelector = pdSelector;
+ this.schema = schema;
+ this.querySelector = querySelector;
+ }
+
+ public PartitionState inflatePartitionState(long pd, long maxLts, Query query)
+ {
+ List<Ranges.Range> ranges = new ArrayList<>();
+ // TODO: we should think of a single-pass algorithm that would allow us to inflate all deletes and range deletes for a partition
+ PartitionVisitor partitionVisitor = new AbstractPartitionVisitor(pdSelector, descriptorSelector, schema)
+ {
+ public void operation(long lts, long pd, long cd, long m, long opId)
+ {
+ if (!query.match(cd))
+ return;
+
+ OpSelectors.OperationKind opType = descriptorSelector.operationType(pd, lts, opId);
+ if (opType == OpSelectors.OperationKind.DELETE_RANGE)
+ ranges.add(querySelector.inflate(lts, opId).toRange(lts));
+ else if (opType == OpSelectors.OperationKind.DELETE_ROW)
+ ranges.add(new Ranges.Range(cd, cd, true, true, lts));
+ }
+ };
+
+ long currentLts = pdSelector.minLtsFor(pd);
+ while (currentLts <= maxLts && currentLts >= 0)
+ {
+ partitionVisitor.visitPartition(currentLts);
+ currentLts = pdSelector.nextLts(currentLts);
+ }
+
+ // We have to do two passes to avoid inflating deleted items
+ Ranges rts = new Ranges(ranges);
+
+ PartitionState partitionState = new PartitionState();
+ partitionVisitor = new AbstractPartitionVisitor(pdSelector, descriptorSelector, schema)
+ {
+ public void operation(long lts, long pd, long cd, long m, long opId)
+ {
+ if (!query.match(cd) || rts.isShadowed(cd, lts))
+ return;
+
+ OpSelectors.OperationKind opType = descriptorSelector.operationType(pd, lts, opId);
+
+ if (opType == OpSelectors.OperationKind.WRITE)
+ {
+ partitionState.add(cd,
+ descriptorSelector.vds(pd, cd, lts, opId, schema),
+ lts);
+ }
+ else if (opType == OpSelectors.OperationKind.DELETE_COLUMN)
+ {
+ partitionState.deleteColumns(cd,
+ descriptorSelector.columnMask(pd, lts, opId));
+ }
+ else
+ {
+ throw new AssertionError();
+ }
+ }
+ };
+
+ currentLts = pdSelector.minLtsFor(pd);
+ while (currentLts <= maxLts && currentLts >= 0)
+ {
+ partitionVisitor.visitPartition(currentLts);
+ currentLts = pdSelector.nextLts(currentLts);
+ }
+
+ return partitionState;
+ }
+
+ public static class PartitionState implements Iterable<RowState>
+ {
+ private NavigableMap<Long, RowState> rows;
+
+ private PartitionState()
+ {
+ rows = new TreeMap<>();
+ }
+
+ private void add(long cd,
+ long[] vds,
+ long lts)
+ {
+ RowState state = rows.get(cd);
+
+ if (state == null)
+ {
+ long[] ltss = new long[vds.length];
+ long[] vdsCopy = new long[vds.length];
+ for (int i = 0; i < vds.length; i++)
+ {
+ if (vds[i] != UNSET_DESCR)
+ {
+ ltss[i] = lts;
+ vdsCopy[i] = vds[i];
+ }
+ else
+ {
+ ltss[i] = NO_TIMESTAMP;
+ vdsCopy[i] = NIL_DESCR;
+ }
+ }
+
+
+ state = new RowState(cd, vdsCopy, ltss);
+ rows.put(cd, state);
+ }
+ else
+ {
+ for (int i = 0; i < vds.length; i++)
+ {
+ if (vds[i] != UNSET_DESCR)
+ {
+ state.vds[i] = vds[i];
+ assert lts > state.lts[i]; // sanity check; we're iterating in lts order
+ state.lts[i] = lts;
+ }
+ }
+ }
+ }
+
+ private void deleteColumns(long cd, BitSet mask)
+ {
+ RowState state = rows.get(cd);
+ if (state == null)
+ return;
+
+ for (int i = 0; i < mask.size(); i++)
+ {
+ if (mask.isSet(i))
+ {
+ state.vds[i] = NIL_DESCR;
+ state.lts[i] = NO_TIMESTAMP;
+ }
+ }
+ }
+
+ public Iterator<RowState> iterator()
+ {
+ return iterator(false);
+ }
+
+ public Iterator<RowState> iterator(boolean reverse)
+ {
+ if (reverse)
+ return rows.descendingMap().values().iterator();
+
+ return rows.values().iterator();
+ }
+ }
+
+ public static class RowState
+ {
+ public final long cd;
+ public final long[] vds;
+ public final long[] lts;
+
+ public RowState(long cd,
+ long[] vds,
+ long[] lts)
+ {
+ this.cd = cd;
+ this.vds = vds;
+ this.lts = lts;
+ }
+
+ public String toString()
+ {
+ return "RowState{" +
+ "cd=" + cd +
+ ", vds=" + Arrays.toString(vds) +
+ ", lts=" + Arrays.toString(lts) +
+ '}';
+ }
+ }
+}
\ No newline at end of file
diff --git a/harry-core/src/harry/runner/AbstractPartitionVisitor.java b/harry-core/src/harry/runner/AbstractPartitionVisitor.java
new file mode 100644
index 0000000..aefa79f
--- /dev/null
+++ b/harry-core/src/harry/runner/AbstractPartitionVisitor.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package harry.runner;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import harry.ddl.SchemaSpec;
+import harry.model.Model;
+import harry.model.OpSelectors;
+
+public abstract class AbstractPartitionVisitor implements PartitionVisitor
+{
+ protected final OpSelectors.PdSelector pdSelector;
+ protected final OpSelectors.DescriptorSelector descriptorSelector;
+ protected final SchemaSpec schema;
+
+ public AbstractPartitionVisitor(OpSelectors.PdSelector pdSelector,
+ OpSelectors.DescriptorSelector descriptorSelector,
+ SchemaSpec schema)
+ {
+ this.pdSelector = pdSelector;
+ this.descriptorSelector = descriptorSelector;
+ this.schema = schema;
+ }
+
+ public void visitPartition(long lts)
+ {
+ visitPartition(lts, pdSelector.pd(lts, schema));
+ }
+
+ private void visitPartition(long lts, long pd)
+ {
+ beforeLts(lts, pd);
+
+ int modificationsCount = descriptorSelector.numberOfModifications(lts);
+ int opsPerModification = descriptorSelector.opsPerModification(lts);
+ int maxPartitionSize = descriptorSelector.maxPartitionSize();
+ assert opsPerModification * modificationsCount <= maxPartitionSize : "Number of operations exceeds partition width";
+
+ for (int m = 0; m < modificationsCount; m++)
+ {
+ Set<Long> visitedCds = new HashSet<>(); // for debug purposes
... 6319 lines suppressed ...
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@cassandra.apache.org
For additional commands, e-mail: commits-help@cassandra.apache.org