You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by xe...@apache.org on 2016/01/24 04:36:16 UTC

[14/14] cassandra git commit: Integrate SASI index into Cassandra

Integrate SASI index into Cassandra

patch by xedin; reviewed by beobal for CASSANDRA-10661


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/72790dc8
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/72790dc8
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/72790dc8

Branch: refs/heads/trunk
Commit: 72790dc8e34826b39ac696b03025ae6b7b6beb2b
Parents: 11c8ca6
Author: Pavel Yaskevich <xe...@apache.org>
Authored: Wed Dec 2 19:23:54 2015 -0800
Committer: Pavel Yaskevich <xe...@apache.org>
Committed: Sat Jan 23 19:35:29 2016 -0800

----------------------------------------------------------------------
 CHANGES.txt                                     |     1 +
 build.xml                                       |    22 +-
 doc/SASI.md                                     |   768 +
 lib/concurrent-trees-2.4.0.jar                  |   Bin 0 -> 118696 bytes
 lib/hppc-0.5.4.jar                              |   Bin 0 -> 1305173 bytes
 lib/jflex-1.6.0.jar                             |   Bin 0 -> 1048690 bytes
 lib/licenses/concurrent-trees-2.4.0.txt         |   201 +
 lib/licenses/hppc-0.5.4.txt                     |   202 +
 lib/licenses/jflex-1.6.0.txt                    |   201 +
 lib/licenses/primitive-1.0.txt                  |   201 +
 lib/licenses/snowball-stemmer-1.3.0.581.1.txt   |   201 +
 lib/primitive-1.0.jar                           |   Bin 0 -> 52589 bytes
 lib/snowball-stemmer-1.3.0.581.1.jar            |   Bin 0 -> 93019 bytes
 .../cassandra/config/DatabaseDescriptor.java    |     7 +-
 .../org/apache/cassandra/db/ColumnIndex.java    |     6 +-
 .../apache/cassandra/db/filter/RowFilter.java   |    15 +-
 .../cassandra/index/SecondaryIndexManager.java  |    11 +
 .../apache/cassandra/index/sasi/SASIIndex.java  |   288 +
 .../cassandra/index/sasi/SASIIndexBuilder.java  |   128 +
 .../cassandra/index/sasi/SSTableIndex.java      |   187 +
 .../org/apache/cassandra/index/sasi/Term.java   |    65 +
 .../cassandra/index/sasi/TermIterator.java      |   208 +
 .../index/sasi/analyzer/AbstractAnalyzer.java   |    51 +
 .../index/sasi/analyzer/NoOpAnalyzer.java       |    54 +
 .../sasi/analyzer/NonTokenizingAnalyzer.java    |   126 +
 .../sasi/analyzer/NonTokenizingOptions.java     |   147 +
 .../sasi/analyzer/SUPPLEMENTARY.jflex-macro     |   143 +
 .../index/sasi/analyzer/StandardAnalyzer.java   |   194 +
 .../sasi/analyzer/StandardTokenizerImpl.jflex   |   220 +
 .../analyzer/StandardTokenizerInterface.java    |    65 +
 .../sasi/analyzer/StandardTokenizerOptions.java |   272 +
 .../analyzer/filter/BasicResultFilters.java     |    76 +
 .../analyzer/filter/FilterPipelineBuilder.java  |    51 +
 .../analyzer/filter/FilterPipelineExecutor.java |    53 +
 .../analyzer/filter/FilterPipelineTask.java     |    52 +
 .../sasi/analyzer/filter/StemmerFactory.java    |   101 +
 .../sasi/analyzer/filter/StemmingFilters.java   |    46 +
 .../sasi/analyzer/filter/StopWordFactory.java   |   100 +
 .../sasi/analyzer/filter/StopWordFilters.java   |    42 +
 .../cassandra/index/sasi/conf/ColumnIndex.java  |   193 +
 .../cassandra/index/sasi/conf/DataTracker.java  |   162 +
 .../cassandra/index/sasi/conf/IndexMode.java    |   169 +
 .../index/sasi/conf/view/PrefixTermTree.java    |   194 +
 .../index/sasi/conf/view/RangeTermTree.java     |    77 +
 .../index/sasi/conf/view/TermTree.java          |    58 +
 .../cassandra/index/sasi/conf/view/View.java    |   104 +
 .../cassandra/index/sasi/disk/Descriptor.java   |    51 +
 .../cassandra/index/sasi/disk/OnDiskBlock.java  |   142 +
 .../cassandra/index/sasi/disk/OnDiskIndex.java  |   773 ++
 .../index/sasi/disk/OnDiskIndexBuilder.java     |   627 +
 .../index/sasi/disk/PerSSTableIndexWriter.java  |   361 +
 .../apache/cassandra/index/sasi/disk/Token.java |    42 +
 .../cassandra/index/sasi/disk/TokenTree.java    |   519 +
 .../index/sasi/disk/TokenTreeBuilder.java       |   839 ++
 .../exceptions/TimeQuotaExceededException.java  |    21 +
 .../index/sasi/memory/IndexMemtable.java        |    71 +
 .../index/sasi/memory/KeyRangeIterator.java     |   118 +
 .../cassandra/index/sasi/memory/MemIndex.java   |    51 +
 .../index/sasi/memory/SkipListMemIndex.java     |    97 +
 .../index/sasi/memory/TrieMemIndex.java         |   254 +
 .../cassandra/index/sasi/plan/Expression.java   |   340 +
 .../cassandra/index/sasi/plan/Operation.java    |   477 +
 .../index/sasi/plan/QueryController.java        |   261 +
 .../cassandra/index/sasi/plan/QueryPlan.java    |   170 +
 .../cassandra/index/sasi/sa/ByteTerm.java       |    51 +
 .../cassandra/index/sasi/sa/CharTerm.java       |    54 +
 .../cassandra/index/sasi/sa/IntegralSA.java     |    84 +
 .../org/apache/cassandra/index/sasi/sa/SA.java  |    58 +
 .../cassandra/index/sasi/sa/SuffixSA.java       |   143 +
 .../apache/cassandra/index/sasi/sa/Term.java    |    58 +
 .../cassandra/index/sasi/sa/TermIterator.java   |    31 +
 .../index/sasi/utils/AbstractIterator.java      |   155 +
 .../index/sasi/utils/CombinedTerm.java          |   103 +
 .../index/sasi/utils/CombinedTermIterator.java  |    87 +
 .../index/sasi/utils/CombinedValue.java         |    25 +
 .../index/sasi/utils/MappedBuffer.java          |   253 +
 .../index/sasi/utils/OnDiskIndexIterator.java   |    64 +
 .../sasi/utils/RangeIntersectionIterator.java   |   281 +
 .../index/sasi/utils/RangeIterator.java         |   279 +
 .../index/sasi/utils/RangeUnionIterator.java    |   158 +
 .../cassandra/index/sasi/utils/TypeUtil.java    |    84 +
 .../sasi/utils/trie/AbstractPatriciaTrie.java   |  1151 ++
 .../index/sasi/utils/trie/AbstractTrie.java     |   230 +
 .../cassandra/index/sasi/utils/trie/Cursor.java |    83 +
 .../index/sasi/utils/trie/KeyAnalyzer.java      |    73 +
 .../index/sasi/utils/trie/PatriciaTrie.java     |  1261 ++
 .../cassandra/index/sasi/utils/trie/Trie.java   |   152 +
 .../cassandra/index/sasi/utils/trie/Tries.java  |    95 +
 .../apache/cassandra/io/sstable/Component.java  |    13 +-
 .../cassandra/io/sstable/KeyIterator.java       |     7 +
 .../io/sstable/format/SSTableFlushObserver.java |    12 +-
 .../io/sstable/format/SSTableReader.java        |    20 +
 .../io/sstable/format/SSTableWriter.java        |     9 +-
 .../io/util/DataOutputBufferFixed.java          |     5 +
 .../cassandra/io/util/SequentialWriter.java     |     7 +
 .../apache/cassandra/utils/ByteBufferUtil.java  |    41 +
 .../org/apache/cassandra/utils/FBUtilities.java |     5 +
 .../cassandra/utils/memory/MemoryUtil.java      |     5 +-
 .../index/sasi/analyzer/filter/ar_ST.txt        |   163 +
 .../index/sasi/analyzer/filter/bg_ST.txt        |   260 +
 .../index/sasi/analyzer/filter/cs_ST.txt        |   257 +
 .../index/sasi/analyzer/filter/de_ST.txt        |   604 +
 .../index/sasi/analyzer/filter/en_ST.txt        |   572 +
 .../index/sasi/analyzer/filter/es_ST.txt        |   308 +
 .../index/sasi/analyzer/filter/fi_ST.txt        |   748 +
 .../index/sasi/analyzer/filter/fr_ST.txt        |   464 +
 .../index/sasi/analyzer/filter/hi_ST.txt        |   164 +
 .../index/sasi/analyzer/filter/hu_ST.txt        |   738 +
 .../index/sasi/analyzer/filter/it_ST.txt        |   400 +
 .../index/sasi/analyzer/filter/pl_ST.txt        |   139 +
 .../index/sasi/analyzer/filter/pt_ST.txt        |   357 +
 .../index/sasi/analyzer/filter/ro_ST.txt        |   283 +
 .../index/sasi/analyzer/filter/ru_ST.txt        |   423 +
 .../index/sasi/analyzer/filter/sv_ST.txt        |   387 +
 test/conf/cassandra-murmur.yaml                 |    43 +
 ...dventures_of_huckleberry_finn_mark_twain.txt | 12361 +++++++++++++++++
 .../tokenization/apache_license_header.txt      |    16 +
 test/resources/tokenization/ja_jp_1.txt         |     1 +
 test/resources/tokenization/ja_jp_2.txt         |     2 +
 test/resources/tokenization/lorem_ipsum.txt     |     1 +
 test/resources/tokenization/ru_ru_1.txt         |    19 +
 .../tokenization/top_visited_domains.txt        |     3 +
 test/resources/tokenization/zn_tw_1.txt         |    19 +
 .../unit/org/apache/cassandra/SchemaLoader.java |   139 +
 .../apache/cassandra/cql3/KeyCacheCqlTest.java  |    14 +-
 .../cassandra/index/sasi/SASIIndexTest.java     |  1852 +++
 .../analyzer/NonTokenizingAnalyzerTest.java     |    78 +
 .../sasi/analyzer/StandardAnalyzerTest.java     |   196 +
 .../index/sasi/disk/OnDiskIndexTest.java        |   856 ++
 .../sasi/disk/PerSSTableIndexWriterTest.java    |   161 +
 .../index/sasi/disk/TokenTreeTest.java          |   535 +
 .../index/sasi/plan/OperationTest.java          |   645 +
 .../index/sasi/utils/LongIterator.java          |   106 +
 .../index/sasi/utils/MappedBufferTest.java      |   540 +
 .../utils/RangeIntersectionIteratorTest.java    |   387 +
 .../sasi/utils/RangeUnionIteratorTest.java      |   306 +
 .../format/SSTableFlushObserverTest.java        |     5 +-
 137 files changed, 40339 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 650bf5f..9309ef4 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 3.4
+ * Integrate SASI index into Cassandra (CASSANDRA-10661)
  * Add --skip-flush option to nodetool snapshot
  * Skip values for non-queried columns (CASSANDRA-10657)
  * Add support for secondary indexes on static columns (CASSANDRA-8103)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/build.xml
----------------------------------------------------------------------
diff --git a/build.xml b/build.xml
index f9f42f7..035628a 100644
--- a/build.xml
+++ b/build.xml
@@ -244,6 +244,15 @@
     </target>
 
     <!--
+        Generates Java sources for tokenization support from jflex
+        grammar files
+    -->
+    <target name="generate-jflex-java" description="Generate Java from jflex grammar">
+        <taskdef classname="jflex.anttask.JFlexTask" classpath="${build.lib}/jflex-1.6.0.jar" name="jflex" />
+        <jflex file="${build.src.java}/org/apache/cassandra/index/sasi/analyzer/StandardTokenizerImpl.jflex" destdir="${build.src.gen-java}/" />
+    </target>
+
+    <!--
        Fetch Maven Ant Tasks and Cassandra's dependencies
        These targets are intentionally free of dependencies so that they
        can be run stand-alone from a binary release artifact.
@@ -405,6 +414,11 @@
           	<exclusion groupId="log4j" artifactId="log4j"/>
           </dependency>
           <dependency groupId="joda-time" artifactId="joda-time" version="2.4" />
+          <dependency groupId="com.carrotsearch" artifactId="hppc" version="0.5.4" />
+          <dependency groupId="de.jflex" artifactId="jflex" version="1.6.0" />
+          <dependency groupId="net.mintern" artifactId="primitive" version="1.0" />
+          <dependency groupId="com.github.rholder" artifactId="snowball-stemmer" version="1.3.0.581.1" />
+          <dependency groupId="com.googlecode.concurrent-trees" artifactId="concurrent-trees" version="2.4.0" />
 
         </dependencyManagement>
         <developer id="alakshman" name="Avinash Lakshman"/>
@@ -568,6 +582,12 @@
         <dependency groupId="org.slf4j" artifactId="log4j-over-slf4j"/>
         <dependency groupId="org.slf4j" artifactId="jcl-over-slf4j"/>
         <dependency groupId="org.apache.thrift" artifactId="libthrift"/>
+        <dependency groupId="com.carrotsearch" artifactId="hppc" version="0.5.4" />
+        <dependency groupId="de.jflex" artifactId="jflex" version="1.6.0" />
+        <dependency groupId="net.mintern" artifactId="primitive" version="1.0" />
+        <dependency groupId="com.github.rholder" artifactId="snowball-stemmer" version="1.3.0.581.1" />
+        <dependency groupId="com.googlecode.concurrent-trees" artifactId="concurrent-trees" version="2.4.0" />
+
       </artifact:pom>
       <artifact:pom id="clientutil-pom"
                     artifactId="cassandra-clientutil"
@@ -734,7 +754,7 @@
         depends="maven-ant-tasks-retrieve-build,build-project" description="Compile Cassandra classes"/>
     <target name="codecoverage" depends="jacoco-run,jacoco-report" description="Create code coverage report"/>
 
-    <target depends="init,gen-cql3-grammar,generate-cql-html"
+    <target depends="init,gen-cql3-grammar,generate-cql-html,generate-jflex-java"
             name="build-project">
         <echo message="${ant.project.name}: ${ant.file}"/>
         <!-- Order matters! -->

http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/doc/SASI.md
----------------------------------------------------------------------
diff --git a/doc/SASI.md b/doc/SASI.md
new file mode 100644
index 0000000..64573b8
--- /dev/null
+++ b/doc/SASI.md
@@ -0,0 +1,768 @@
+# SASIIndex
+
+[`SASIIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/SASIIndex.java),
+or "SASI" for short, is an implementation of Cassandra's
+`Index` interface that can be used as an alternative to the
+existing implementations. SASI's indexing and querying improves on
+existing implementations by tailoring it specifically to Cassandra's
+needs. SASI has superior performance in cases where queries would
+previously require filtering. In achieving this performance, SASI aims
+to be significantly less resource intensive than existing
+implementations, in memory, disk, and CPU usage. In addition, SASI
+supports prefix and contains queries on strings (similar to SQL's
+`LIKE = "foo*"` or `LIKE = "*foo*"'`).
+
+The following goes on describe how to get up and running with SASI,
+demonstrates usage with examples, and provides some details on its
+implementation.
+
+## Using SASI
+
+The examples below walk through creating a table and indexes on its
+columns, and performing queries on some inserted data. The patchset in
+this repository includes support for the Thrift and CQL3 interfaces.
+
+The examples below assume the `demo` keyspace has been created and is
+in use.
+
+```
+cqlsh> CREATE KEYSPACE demo WITH replication = {
+   ... 'class': 'SimpleStrategy',
+   ... 'replication_factor': '1'
+   ... };
+cqlsh> USE demo;
+```
+
+All examples are performed on the `sasi` table:
+
+```
+cqlsh:demo> CREATE TABLE sasi (id uuid, first_name text, last_name text,
+        ... age int, height int, created_at bigint, primary key (id));
+```
+
+#### Creating Indexes
+
+To create SASI indexes use CQLs `CREATE CUSTOM INDEX` statement:
+
+```
+cqlsh:demo> CREATE CUSTOM INDEX ON sasi (first_name) USING 'org.apache.cassandra.index.sasi.SASIIndex'
+        ... WITH OPTIONS = {
+        ... 'analyzer_class':
+        ...   'org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer',
+        ... 'case_sensitive': 'false'
+        ... };
+
+cqlsh:demo> CREATE CUSTOM INDEX ON sasi (last_name) USING 'org.apache.cassandra.index.sasi.SASIIndex'
+        ... WITH OPTIONS = {'mode': 'CONTAINS'};
+
+cqlsh:demo> CREATE CUSTOM INDEX ON sasi (age) USING 'org.apache.cassandra.index.sasi.SASIIndex';
+
+cqlsh:demo> CREATE CUSTOM INDEX ON sasi (created_at) USING 'org.apache.cassandra.index.sasi.SASIIndex'
+        ...  WITH OPTIONS = {'mode': 'SPARSE'};
+```
+
+The indexes created have some options specified that customize their
+behaviour and potentially performance. The index on `first_name` is
+case-insensitive. The analyzers are discussed more in a subsequent
+example. The `NonTokenizingAnalyzer` performs no analysis on the
+text. Each index has a mode: `PREFIX`, `CONTAINS`, or `SPARSE`, the
+first being the default. The `last_name` index is created with the
+mode `CONTAINS` which matches terms on suffixes instead of prefix
+only. Examples of this are available below and more detail can be
+found in the section on
+[OnDiskIndex](https://github.com/xedin/sasi#ondiskindexbuilder).The
+`created_at` column is created with its mode set to `SPARSE`, which is
+meant to improve performance of querying large, dense number ranges
+like timestamps for data inserted every millisecond. Details of the
+`SPARSE` implementation can also be found in the section on the
+[OnDiskIndex](https://github.com/xedin/sasi#ondiskindexbuilder). The `age`
+index is created with the default `PREFIX` mode and no
+case-sensitivity or text analysis options are specified since the
+field is numeric.
+
+After inserting the following data and performing a `nodetool flush`,
+SASI performing index flushes to disk can be seen in Cassandra's logs
+-- although the direct call to flush is not required (see
+[IndexMemtable](#indexmemtable) for more details).
+
+```
+cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at)
+        ... VALUES (556ebd54-cbe5-4b75-9aae-bf2a31a24500, 'Pavel', 'Yaskevich', 27, 181, 1442959315018);
+
+cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at)
+        ... VALUES (5770382a-c56f-4f3f-b755-450e24d55217, 'Jordan', 'West', 26, 173, 1442959315019);
+
+cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at)
+        ... VALUES (96053844-45c3-4f15-b1b7-b02c441d3ee1, 'Mikhail', 'Stepura', 36, 173, 1442959315020);
+
+cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at)
+        ... VALUES (f5dfcabe-de96-4148-9b80-a1c41ed276b4, 'Michael', 'Kjellman', 26, 180, 1442959315021);
+
+cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at)
+        ... VALUES (2970da43-e070-41a8-8bcb-35df7a0e608a, 'Johnny', 'Zhang', 32, 175, 1442959315022);
+
+cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at)
+        ... VALUES (6b757016-631d-4fdb-ac62-40b127ccfbc7, 'Jason', 'Brown', 40, 182, 1442959315023);
+
+cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at)
+        ... VALUES (8f909e8a-008e-49dd-8d43-1b0df348ed44, 'Vijay', 'Parthasarathy', 34, 183, 1442959315024);
+
+cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi;
+
+ first_name | last_name     | age | height | created_at
+------------+---------------+-----+--------+---------------
+    Michael |      Kjellman |  26 |    180 | 1442959315021
+    Mikhail |       Stepura |  36 |    173 | 1442959315020
+      Jason |         Brown |  40 |    182 | 1442959315023
+      Pavel |     Yaskevich |  27 |    181 | 1442959315018
+      Vijay | Parthasarathy |  34 |    183 | 1442959315024
+     Jordan |          West |  26 |    173 | 1442959315019
+     Johnny |         Zhang |  32 |    175 | 1442959315022
+
+(7 rows)
+```
+
+#### Equality & Prefix Queries
+
+SASI supports simple queries already supported by CQL, however, for
+text fields like `first_name` equals queries perform prefix searches
+-- this is similar to `first_name LIKE 'M*'` in SQL (excluding case
+sensitivity, which is dependent on the index configuration). The
+semantics of CQL's `=` were modified instead of making further
+modifications of the grammar with the introduction of a `LIKE`
+operator. Ideally, CQL would be modified to include such an operator,
+supporting both prefix and suffix searches.
+
+```
+cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi
+        ... WHERE first_name = 'M';
+
+ first_name | last_name | age | height | created_at
+------------+-----------+-----+--------+---------------
+    Michael |  Kjellman |  26 |    180 | 1442959315021
+    Mikhail |   Stepura |  36 |    173 | 1442959315020
+
+(2 rows)
+```
+
+Of course, the case of the query does not matter for the `first_name`
+column because of the options provided at index creation time.
+
+```
+cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi
+        ... WHERE first_name = 'm';
+
+ first_name | last_name | age | height | created_at
+------------+-----------+-----+--------+---------------
+    Michael |  Kjellman |  26 |    180 | 1442959315021
+    Mikhail |   Stepura |  36 |    173 | 1442959315020
+
+(2 rows)
+```
+
+#### Compound Queries
+
+SASI supports queries with multiple predicates, however, due to the
+nature of the default indexing implementation, CQL requires the user
+to specify `ALLOW FILTERING` to opt-in to the potential performance
+pitfalls of such a query. With SASI, while the requirement to include
+`ALLOW FILTERING` remains, to reduce modifications to the grammar, the
+performance pitfalls do not exist because filtering is not
+performed. Details on how SASI joins data from multiple predicates is
+available below in the
+[Implementation Details](https://github.com/xedin/sasi#implementation-details)
+section.
+
+```
+cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi
+        ... WHERE first_name = 'M' and age < 30 ALLOW FILTERING;
+
+ first_name | last_name | age | height | created_at
+------------+-----------+-----+--------+---------------
+    Michael |  Kjellman |  26 |    180 | 1442959315021
+
+(1 rows)
+```
+
+#### Suffix Queries
+
+The next example demonstrates `CONTAINS` mode on the `last_name`
+column. By using this mode predicates can search for any strings
+containing the search string as a sub-string. In this case the strings
+containing "a" or "an".
+
+```
+cqlsh:demo> SELECT * FROM sasi WHERE last_name = 'a';
+
+ id                                   | age | created_at    | first_name | height | last_name
+--------------------------------------+-----+---------------+------------+--------+---------------
+ f5dfcabe-de96-4148-9b80-a1c41ed276b4 |  26 | 1442959315021 |    Michael |    180 |      Kjellman
+ 96053844-45c3-4f15-b1b7-b02c441d3ee1 |  36 | 1442959315020 |    Mikhail |    173 |       Stepura
+ 556ebd54-cbe5-4b75-9aae-bf2a31a24500 |  27 | 1442959315018 |      Pavel |    181 |     Yaskevich
+ 8f909e8a-008e-49dd-8d43-1b0df348ed44 |  34 | 1442959315024 |      Vijay |    183 | Parthasarathy
+ 2970da43-e070-41a8-8bcb-35df7a0e608a |  32 | 1442959315022 |     Johnny |    175 |         Zhang
+
+(5 rows)
+
+cqlsh:demo> SELECT * FROM sasi WHERE last_name = 'an';
+
+ id                                   | age | created_at    | first_name | height | last_name
+--------------------------------------+-----+---------------+------------+--------+-----------
+ f5dfcabe-de96-4148-9b80-a1c41ed276b4 |  26 | 1442959315021 |    Michael |    180 |  Kjellman
+ 2970da43-e070-41a8-8bcb-35df7a0e608a |  32 | 1442959315022 |     Johnny |    175 |     Zhang
+
+(2 rows)
+```
+
+#### Expressions on Non-Indexed Columns
+
+SASI also supports filtering on non-indexed columns like `height`. The
+expression can only narrow down an existing query using `AND`.
+
+```
+cqlsh:demo> SELECT * FROM sasi WHERE last_name = 'a' AND height >= 175 ALLOW FILTERING;
+
+ id                                   | age | created_at    | first_name | height | last_name
+--------------------------------------+-----+---------------+------------+--------+---------------
+ f5dfcabe-de96-4148-9b80-a1c41ed276b4 |  26 | 1442959315021 |    Michael |    180 |      Kjellman
+ 556ebd54-cbe5-4b75-9aae-bf2a31a24500 |  27 | 1442959315018 |      Pavel |    181 |     Yaskevich
+ 8f909e8a-008e-49dd-8d43-1b0df348ed44 |  34 | 1442959315024 |      Vijay |    183 | Parthasarathy
+ 2970da43-e070-41a8-8bcb-35df7a0e608a |  32 | 1442959315022 |     Johnny |    175 |         Zhang
+
+(4 rows)
+```
+
+#### Text Analysis (Tokenization and Stemming)
+
+Lastly, to demonstrate text analysis an additional column is needed on
+the table. Its definition, index, and statements to update rows are shown below.
+
+```
+cqlsh:demo> ALTER TABLE sasi ADD bio text;
+cqlsh:demo> CREATE CUSTOM INDEX ON sasi (bio) USING 'org.apache.cassandra.index.sasi.SASIIndex'
+        ... WITH OPTIONS = {
+        ... 'analyzer_class': 'org.apache.cassandra.index.sasi.analyzer.StandardAnalyzer',
+        ... 'tokenization_enable_stemming': 'true',
+        ... 'analyzed': 'true',
+        ... 'tokenization_normalize_lowercase': 'true',
+        ... 'tokenization_locale': 'en'
+        ... };
+cqlsh:demo> UPDATE sasi SET bio = 'Software Engineer, who likes distributed systems, doesnt like to argue.' WHERE id = 5770382a-c56f-4f3f-b755-450e24d55217;
+cqlsh:demo> UPDATE sasi SET bio = 'Software Engineer, works on the freight distribution at nights and likes arguing' WHERE id = 556ebd54-cbe5-4b75-9aae-bf2a31a24500;
+cqlsh:demo> SELECT * FROM sasi;
+
+ id                                   | age | bio                                                                              | created_at    | first_name | height | last_name
+--------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+---------------
+ f5dfcabe-de96-4148-9b80-a1c41ed276b4 |  26 |                                                                             null | 1442959315021 |    Michael |    180 |      Kjellman
+ 96053844-45c3-4f15-b1b7-b02c441d3ee1 |  36 |                                                                             null | 1442959315020 |    Mikhail |    173 |       Stepura
+ 6b757016-631d-4fdb-ac62-40b127ccfbc7 |  40 |                                                                             null | 1442959315023 |      Jason |    182 |         Brown
+ 556ebd54-cbe5-4b75-9aae-bf2a31a24500 |  27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 |      Pavel |    181 |     Yaskevich
+ 8f909e8a-008e-49dd-8d43-1b0df348ed44 |  34 |                                                                             null | 1442959315024 |      Vijay |    183 | Parthasarathy
+ 5770382a-c56f-4f3f-b755-450e24d55217 |  26 |          Software Engineer, who likes distributed systems, doesnt like to argue. | 1442959315019 |     Jordan |    173 |          West
+ 2970da43-e070-41a8-8bcb-35df7a0e608a |  32 |                                                                             null | 1442959315022 |     Johnny |    175 |         Zhang
+
+(7 rows)
+```
+
+Index terms and query search strings are stemmed for the `bio` column
+because it was configured to use the
+[`StandardAnalyzer`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java)
+and `analyzed` is set to `true`. The
+`tokenization_normalize_lowercase` is similar to the `case_sensitive`
+property but for the
+[`StandardAnalyzer`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java). These
+query demonstrates the stemming applied by [`StandardAnalyzer`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java).
+
+```
+cqlsh:demo> SELECT * FROM sasi WHERE bio = 'distributing';
+
+ id                                   | age | bio                                                                              | created_at    | first_name | height | last_name
+--------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+-----------
+ 556ebd54-cbe5-4b75-9aae-bf2a31a24500 |  27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 |      Pavel |    181 | Yaskevich
+ 5770382a-c56f-4f3f-b755-450e24d55217 |  26 |          Software Engineer, who likes distributed systems, doesnt like to argue. | 1442959315019 |     Jordan |    173 |      West
+
+(2 rows)
+
+cqlsh:demo> SELECT * FROM sasi WHERE bio = 'they argued';
+
+ id                                   | age | bio                                                                              | created_at    | first_name | height | last_name
+--------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+-----------
+ 556ebd54-cbe5-4b75-9aae-bf2a31a24500 |  27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 |      Pavel |    181 | Yaskevich
+ 5770382a-c56f-4f3f-b755-450e24d55217 |  26 |          Software Engineer, who likes distributed systems, doesnt like to argue. | 1442959315019 |     Jordan |    173 |      West
+
+(2 rows)
+
+cqlsh:demo> SELECT * FROM sasi WHERE bio = 'working at the company';
+
+ id                                   | age | bio                                                                              | created_at    | first_name | height | last_name
+--------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+-----------
+ 556ebd54-cbe5-4b75-9aae-bf2a31a24500 |  27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 |      Pavel |    181 | Yaskevich
+
+(1 rows)
+
+cqlsh:demo> SELECT * FROM sasi WHERE bio = 'soft eng';
+
+ id                                   | age | bio                                                                              | created_at    | first_name | height | last_name
+--------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+-----------
+ 556ebd54-cbe5-4b75-9aae-bf2a31a24500 |  27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 |      Pavel |    181 | Yaskevich
+ 5770382a-c56f-4f3f-b755-450e24d55217 |  26 |          Software Engineer, who likes distributed systems, doesnt like to argue. | 1442959315019 |     Jordan |    173 |      West
+
+(2 rows)
+```
+
+## Implementation Details
+
+While SASI, at the surface, is simply an implementation of the
+`Index` interface, at its core there are several data
+structures and algorithms used to satisfy it. These are described
+here. Additionally, the changes internal to Cassandra to support SASIs
+integration are described.
+
+The `Index` interface divides responsibility of the
+implementer into two parts: Indexing and Querying. Further, Cassandra
+makes it possible to divide those responsibilities into the memory and
+disk components. SASI takes advantage of Cassandra's write-once,
+immutable, ordered data model to build indexes along with the flushing
+of the memtable to disk -- this is the origin of the name "SSTable
+Attached Secondary Index".
+
+The SASI index data structures are built in memory as the SSTable is
+being written and they are flushed to disk before the writing of the
+SSTable completes. The writing of each index file only requires
+sequential writes to disk. In some cases, partial flushes are
+performed, and later stitched back together, to reduce memory
+usage. These data structures are optimized for this use case.
+
+Taking advantage of Cassandra's ordered data model, at query time,
+candidate indexes are narrowed down for searching minimize the amount
+of work done. Searching is then performed using an efficient method
+that streams data off disk as needed.
+
+### Indexing
+
+Per SSTable, SASI writes an index file for each indexed column. The
+data for these files is built in memory using the
+[`OnDiskIndexBuilder`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndexBuilder.java). Once
+flushed to disk, the data is read using the
+[`OnDiskIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java)
+class. These are composed of bytes representing indexed terms,
+organized for efficient writing or searching respectively. The keys
+and values they hold represent tokens and positions in an SSTable and
+these are stored per-indexed term in
+[`TokenTreeBuilder`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTreeBuilder.java)s
+for writing, and
+[`TokenTree`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java)s
+for querying. These index files are memory mapped after being written
+to disk, for quicker access. For indexing data in the memtable SASI
+uses its
+[`IndexMemtable`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/IndexMemtable.java)
+class.
+
+#### OnDiskIndex(Builder)
+
+Each
+[`OnDiskIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java)
+is an instance of a modified
+[Suffix Array](https://en.wikipedia.org/wiki/Suffix_array) data
+structure. The
+[`OnDiskIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java)
+is comprised of page-size blocks of sorted terms and pointers to the
+terms' associated data, as well as the data itself, stored also in one
+or more page-sized blocks. The
+[`OnDiskIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java)
+is structured as a tree of arrays, where each level describes the
+terms in the level below, the final level being the terms
+themselves. The `PointerLevel`s and their `PointerBlock`s contain
+terms and pointers to other blocks that *end* with those terms. The
+`DataLevel`, the final level, and its `DataBlock`s contain terms and
+point to the data itself, contained in [`TokenTree`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java)s.
+
+The terms written to the
+[`OnDiskIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java)
+vary depending on its "mode": either `PREFIX`, `CONTAINS`, or
+`SPARSE`. In the `PREFIX` and `SPARSE` cases terms exact values are
+written exactly once per `OnDiskIndex`. For example, a `PREFIX` index
+with terms `Jason`, `Jordan`, `Pavel`, all three will be included in
+the index. A `CONTAINS` index writes additional terms for each suffix of
+each term recursively. Continuing with the example, a `CONTAINS` index
+storing the previous terms would also store `ason`, `ordan`, `avel`,
+`son`, `rdan`, `vel`, etc. This allows for queries on the suffix of
+strings. The `SPARSE` mode differs from `PREFIX` in that for every 64
+blocks of terms a
+[`TokenTree`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java)
+is built merging all the
+[`TokenTree`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java)s
+for each term into a single one. This copy of the data is used for
+efficient iteration of large ranges of e.g. timestamps. The index
+"mode" is configurable per column at index creation time.
+
+#### TokenTree(Builder)
+
+The
+[`TokenTree`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java)
+is an implementation of the well-known
+[B+-tree](https://en.wikipedia.org/wiki/B%2B_tree) that has been
+modified to optimize for its use-case. In particular, it has been
+optimized to associate tokens, longs, with a set of positions in an
+SSTable, also longs. Allowing the set of long values accommodates
+the possibility of a hash collision in the token, but the data
+structure is optimized for the unlikely possibility of such a
+collision.
+
+To optimize for its write-once environment the
+[`TokenTreeBuilder`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTreeBuilder.java)
+completely loads its interior nodes as the tree is built and it uses
+the well-known algorithm optimized for bulk-loading the data
+structure.
+
+[`TokenTree`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java)s provide the means to iterate a tokens, and file
+positions, that match a given term, and to skip forward in that
+iteration, an operation used heavily at query time.
+
+#### IndexMemtable
+
+The
+[`IndexMemtable`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/IndexMemtable.java)
+handles indexing the in-memory data held in the memtable. The
+[`IndexMemtable`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/IndexMemtable.java)
+in turn manages either a
+[`TrieMemIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java)
+or a
+[`SkipListMemIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/SkipListMemIndex.java)
+per-column. The choice of which index type is used is data
+dependent. The
+[`TrieMemIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java)
+is used for literal types. `AsciiType` and `UTF8Type` are literal
+types by defualt but any column can be configured as a literal type
+using the `is_literal` option at index creation time. For non-literal
+types the
+[`SkipListMemIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/SkipListMemIndex.java)
+is used. The
+[`TrieMemIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java)
+is an implementation that can efficiently support prefix queries on
+character-like data. The
+[`SkipListMemIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/SkipListMemIndex.java),
+conversely, is better suited for Cassandra other data types like
+numbers.
+
+The
+[`TrieMemIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java)
+is built using either the `ConcurrentRadixTree` or
+`ConcurrentSuffixTree` from the `com.goooglecode.concurrenttrees`
+package. The choice between the two is made based on the indexing
+mode, `PREFIX` or other modes, and `CONTAINS` mode, respectively.
+
+The
+[`SkipListMemIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/SkipListMemIndex.java)
+is built on top of `java.util.concurrent.ConcurrentSkipListSet`.
+
+### Querying
+
+Responsible for converting the internal `IndexExpression`
+representation into SASI's
+[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java)
+and
+[`Expression`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java)
+tree, optimizing the tree to reduce the amount of work done, and
+driving the query itself the
+[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java)
+is the work horse of SASI's querying implementation. To efficiently
+perform union and intersection operations SASI provides several
+iterators similar to Cassandra's `MergeIterator` but tailored
+specifically for SASIs use, and with more features. The
+[`RangeUnionIterator`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java),
+like its name suggests, performs set union over sets of tokens/keys
+matching the query, only reading as much data as it needs from each
+set to satisfy the query. The
+[`RangeIntersectionIterator`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java),
+similar to its counterpart, performs set intersection over its data.
+
+#### QueryPlan
+
+The
+[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java)
+instantiated per search query is at the core of SASIs querying
+implementation. Its work can be divided in two stages: analysis and
+execution.
+
+During the analysis phase,
+[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java)
+converts from Cassandra's internal representation of
+`IndexExpression`s, which has also been modified to support encoding
+queries that contain ORs and groupings of expressions using
+parentheses (see the
+[Cassandra Internal Changes](https://github.com/xedin/sasi#cassandra-internal-changes)
+section below for more details). This process produces a tree of
+[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java)s, which in turn may contain [`Expression`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java)s, all of which
+provide an alternative, more efficient, representation of the query.
+
+During execution the
+[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java)
+uses the `DecoratedKey`-generating iterator created from the
+[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java) tree. These keys are read from disk and a final check to
+ensure they satisfy the query is made, once again using the
+[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java) tree. At the point the desired amount of matching data has
+been found, or there is no more matching data, the result set is
+returned to the coordinator through the existing internal components.
+
+The number of queries (total/failed/timed-out), and their latencies,
+are maintined per-table/column family.
+
+SASI also supports concurrently iterating terms for the same index
+accross SSTables. The concurrency factor is controlled by the
+`cassandra.search_concurrency_factor` system property. The default is
+`1`.
+
+##### QueryController
+
+Each
+[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java)
+references a
+[`QueryController`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java)
+used throughout the execution phase. The
+[`QueryController`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java)
+has two responsibilities: to manage and ensure the proper cleanup of
+resources (indexes), and to strictly enforce the time bound for query,
+specified by the user via the range slice timeout. All indexes are
+accessed via the
+[`QueryController`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java)
+so that they can be safely released by it later. The
+[`QueryController`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java)'s
+`checkpoint` function is called in specific places in the execution
+path to ensure the time-bound is enforced.
+
+##### QueryPlan Optimizations
+
+While in the analysis phase, the
+[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java)
+performs several potential optimizations to the query. The goal of
+these optimizations is to reduce the amount of work performed during
+the execution phase.
+
+The simplest optimization performed is compacting multiple expressions
+joined by logical intersection (`AND`) into a single [`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java) with
+three or more [`Expression`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java)s. For example, the query `WHERE age < 100 AND
+fname = 'p*' AND first_name != 'pa*' AND age > 21` would,
+without modification, have the following tree:
+
+                          ┌───────┐
+                 ┌────────│  AND  │──────┐
+                 │        └───────┘      │
+                 ▼                       ▼
+              ┌───────┐             ┌──────────┐
+        ┌─────│  AND  │─────┐       │age < 100 │
+        │     └───────┘     │       └──────────┘
+        ▼                   ▼
+    ┌──────────┐          ┌───────┐
+    │ fname=p* │        ┌─│  AND  │───┐
+    └──────────┘        │ └───────┘   │
+                        ▼             ▼
+                    ┌──────────┐  ┌──────────┐
+                    │fname!=pa*│  │ age > 21 │
+                    └──────────┘  └──────────┘
+
+[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java)
+will remove the redundant right branch whose root is the final `AND`
+and has leaves `fname != pa*` and `age > 21`. These [`Expression`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java)s will
+be compacted into the parent `AND`, a safe operation due to `AND`
+being associative and commutative. The resulting tree looks like the
+following:
+
+                                  ┌───────┐
+                         ┌────────│  AND  │──────┐
+                         │        └───────┘      │
+                         ▼                       ▼
+                      ┌───────┐             ┌──────────┐
+          ┌───────────│  AND  │────────┐    │age < 100 │
+          │           └───────┘        │    └──────────┘
+          ▼               │            ▼
+    ┌──────────┐          │      ┌──────────┐
+    │ fname=p* │          ▼      │ age > 21 │
+    └──────────┘    ┌──────────┐ └──────────┘
+                    │fname!=pa*│
+                    └──────────┘
+
+When excluding results from the result set, using `!=`, the
+[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java)
+determines the best method for handling it. For range queries, for
+example, it may be optimal to divide the range into multiple parts
+with a hole for the exclusion. For string queries, such as this one,
+it is more optimal, however, to simply note which data to skip, or
+exclude, while scanning the index. Following this optimization the
+tree looks like this:
+
+                                   ┌───────┐
+                          ┌────────│  AND  │──────┐
+                          │        └───────┘      │
+                          ▼                       ▼
+                       ┌───────┐             ┌──────────┐
+               ┌───────│  AND  │────────┐    │age < 100 │
+               │       └───────┘        │    └──────────┘
+               ▼                        ▼
+        ┌──────────────────┐         ┌──────────┐
+        │     fname=p*     │         │ age > 21 │
+        │ exclusions=[pa*] │         └──────────┘
+        └──────────────────┘
+
+The last type of optimization applied, for this query, is to merge
+range expressions across branches of the tree -- without modifying the
+meaning of the query, of course. In this case, because the query
+contains all `AND`s the `age` expressions can be collapsed. Along with
+this optimization, the initial collapsing of unneeded `AND`s can also
+be applied once more to result in this final tree using to execute the
+query:
+
+                            ┌───────┐
+                     ┌──────│  AND  │───────┐
+                     │      └───────┘       │
+                     ▼                      ▼
+           ┌──────────────────┐    ┌────────────────┐
+           │     fname=p*     │    │ 21 < age < 100 │
+           │ exclusions=[pa*] │    └────────────────┘
+           └──────────────────┘
+
+#### Operations and Expressions
+
+As discussed, the
+[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java)
+optimizes a tree represented by
+[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java)s
+as interior nodes, and
+[`Expression`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java)s
+as leaves. The
+[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java)
+class, more specifically, can have zero, one, or two
+[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java)s
+as children and an unlimited number of expressions. The iterators used
+to perform the queries, discussed below in the
+"Range(Union|Intersection)Iterator" section, implement the necessary
+logic to merge results transparently regardless of the
+[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java)s
+children.
+
+Besides participating in the optimizations performed by the
+[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java),
+[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java)
+is also responsible for taking a row that has been returned by the
+query and making a final validation that it in fact does match. This
+`satisfiesBy` operation is performed recursively from the root of the
+[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java)
+tree for a given query. These checks are performed directly on the
+data in a given row. For more details on how `satisfiesBy` works see
+the documentation
+[in the code](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java#L87-L123).
+
+#### Range(Union|Intersection)Iterator
+
+The abstract `RangeIterator` class provides a unified interface over
+the two main operations performed by SASI at various layers in the
+execution path: set intersection and union. These operations are
+performed in a iterated, or "streaming", fashion to prevent unneeded
+reads of elements from either set. In both the intersection and union
+cases the algorithms take advantage of the data being pre-sorted using
+the same sort order, e.g. term or token order.
+
+The
+[`RangeUnionIterator`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java)
+performs the "Merge-Join" portion of the
+[Sort-Merge-Join](https://en.wikipedia.org/wiki/Sort-merge_join)
+algorithm, with the properties of an outer-join, or union. It is
+implemented with several optimizations to improve its performance over
+a large number of iterators -- sets to union. Specifically, the
+iterator exploits the likely case of the data having many sub-groups
+of overlapping ranges and the unlikely case that all ranges will
+overlap each other. For more details see the
+[javadoc](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java#L9-L21).
+
+The
+[`RangeIntersectionIterator`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java)
+itself is not a subclass of `RangeIterator`. It is a container for
+several classes, one of which, `AbstractIntersectionIterator`,
+sub-classes `RangeIterator`. SASI supports two methods of performing
+the intersection operation, and the ability to be adaptive in choosing
+between them based on some properties of the data.
+
+`BounceIntersectionIterator`, and the `BOUNCE` strategy, works like
+the
+[`RangeUnionIterator`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java)
+in that it performs a "Merge-Join", however, its nature is similar to
+a inner-join, where like values are merged by a data-specific merge
+function (e.g. merging two tokens in a list to lookup in a SSTable
+later). See the
+[javadoc](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java#L88-L101)
+for more details on its implementation.
+
+`LookupIntersectionIterator`, and the `LOOKUP` strategy, performs a
+different operation, more similar to a lookup in an associative data
+structure, or "hash lookup" in database terminology. Once again,
+details on the implementation can be found in the
+[javadoc](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java#L199-L208).
+
+The choice between the two iterators, or the `ADAPTIVE` strategy, is
+based upon the ratio of data set sizes of the minimum and maximum
+range of the sets being intersected. If the number of the elements in
+minimum range divided by the number of elements is the maximum range
+is less than or equal to `0.01`, then the `ADAPTIVE` strategy chooses
+the `LookupIntersectionIterator`, otherwise the
+`BounceIntersectionIterator` is chosen.
+
+### The SASIIndex Class
+
+The above components are glued together by the
+[`SASIIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasiIndex.java)
+class which implements `Index`, and is instantiated
+per-table containing SASI indexes. It manages all indexes for a table
+via the
+[`sasi.conf.DataTracker`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/conf/DataTracker.java)
+and
+[`sasi.conf.view.View`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/conf/view/View.java)
+components, controls writing of all indexes for an SSTable via its
+[`PerSSTableIndexWriter`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/PerSSTableIndexWriter.java), and initiates searches with
+`Indexer`. These classes glue the previously
+mentioned indexing components together with Cassandra's SSTable
+life-cycle ensuring indexes are not only written when Memtable's flush
+but also as SSTable's are compacted. For querying, the
+`Indexer` does little but defer to
+[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java)
+and update e.g. latency metrics exposed by SASI.
+
+### Cassandra Internal Changes
+
+To support the above changes and integrate them into Cassandra a few
+minor internal changes were made to Cassandra itself. These are
+described here.
+
+#### SSTable Write Life-cycle Notifications
+
+The `SSTableFlushObserver` is an observer pattern-like interface,
+whose sub-classes can register to be notified about events in the
+life-cycle of writing out a SSTable. Sub-classes can be notified when a
+flush begins and ends, as well as when each next row is about to be
+written, and each next column. SASI's `PerSSTableIndexWriter`,
+discussed above, is the only current subclass.
+
+### Limitations and Caveats
+
+The following are items that can be addressed in future updates but are not
+available in this repository or are not currently implemented.
+
+* The cluster must be configured to use a partitioner that produces
+  `LongToken`s, e.g. `Murmur3Partitioner`. Other existing partitioners which
+  don't produce LongToken e.g. `ByteOrderedPartitioner` and `RandomPartitioner`
+  will not work with SASI.
+* `ALLOW FILTERING`, the requirement of at least one indexes `=`
+  expression, and lack of `LIKE` limit SASIs
+  feature-set. Modifications to the grammar to allow `Index`
+  implementations to enumerate its supported features would allow SASI
+  to expose more features without need to support them in other
+  implementations.
+* Not Equals and OR support have been removed in this release while
+  changes are made to Cassandra itself to support them.
+
+### Contributors
+
+* [Pavel Yaskevich](https://github.com/xedin)
+* [Jordan West](https://github.com/jrwest)
+* [Michael Kjellman](https://github.com/mkjellman)
+* [Jason Brown](https://github.com/jasobrown)
+* [Mikhail Stepura](https://github.com/mishail)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/lib/concurrent-trees-2.4.0.jar
----------------------------------------------------------------------
diff --git a/lib/concurrent-trees-2.4.0.jar b/lib/concurrent-trees-2.4.0.jar
new file mode 100644
index 0000000..9c488fe
Binary files /dev/null and b/lib/concurrent-trees-2.4.0.jar differ

http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/lib/hppc-0.5.4.jar
----------------------------------------------------------------------
diff --git a/lib/hppc-0.5.4.jar b/lib/hppc-0.5.4.jar
new file mode 100644
index 0000000..d84b83b
Binary files /dev/null and b/lib/hppc-0.5.4.jar differ

http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/lib/jflex-1.6.0.jar
----------------------------------------------------------------------
diff --git a/lib/jflex-1.6.0.jar b/lib/jflex-1.6.0.jar
new file mode 100644
index 0000000..550e446
Binary files /dev/null and b/lib/jflex-1.6.0.jar differ

http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/lib/licenses/concurrent-trees-2.4.0.txt
----------------------------------------------------------------------
diff --git a/lib/licenses/concurrent-trees-2.4.0.txt b/lib/licenses/concurrent-trees-2.4.0.txt
new file mode 100644
index 0000000..50086f8
--- /dev/null
+++ b/lib/licenses/concurrent-trees-2.4.0.txt
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!) The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/lib/licenses/hppc-0.5.4.txt
----------------------------------------------------------------------
diff --git a/lib/licenses/hppc-0.5.4.txt b/lib/licenses/hppc-0.5.4.txt
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/lib/licenses/hppc-0.5.4.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.