You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by im...@apache.org on 2018/10/12 21:48:03 UTC
[09/17] asterixdb-site git commit: fix missing docs and url

http://git-wip-us.apache.org/repos/asf/asterixdb-site/blob/b2f1d3e6/content/docs/0.9.4/sqlpp/primer-sqlpp.html
----------------------------------------------------------------------
diff --git a/content/docs/0.9.4/sqlpp/primer-sqlpp.html b/content/docs/0.9.4/sqlpp/primer-sqlpp.html
index d0f6ca5..1a935a1 100644
--- a/content/docs/0.9.4/sqlpp/primer-sqlpp.html
+++ b/content/docs/0.9.4/sqlpp/primer-sqlpp.html
@@ -1,13 +1,13 @@
 <!DOCTYPE html>
 <!--
- | Generated by Apache Maven Doxia Site Renderer 1.8.1 from src/site/markdown/sqlpp/primer-sqlpp.md at 2018-10-02
+ | Generated by Apache Maven Doxia Site Renderer 1.8.1 from src/site/markdown/sqlpp/primer-sqlpp.md at 2018-10-12
  | Rendered using Apache Maven Fluido Skin 1.7
 -->
 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <meta name="Date-Revision-yyyymmdd" content="20181002" />
+    <meta name="Date-Revision-yyyymmdd" content="20181012" />
     <meta http-equiv="Content-Language" content="en" />
     <title>AsterixDB &#x2013; AsterixDB 101: An ADM and SQL++ Primer</title>
     <link rel="stylesheet" href="../css/apache-maven-fluido-1.7.min.css" />
@@ -26,7 +26,7 @@
 
       <div id="breadcrumbs">
         <ul class="breadcrumb">
-        <li id="publishDate">Last Published: 2018-10-02</li>
+        <li id="publishDate">Last Published: 2018-10-12</li>
       <li id="projectVersion" class="pull-right">Version: 0.9.4</li>
       <li class="pull-right"><a href="../index.html" title="Documentation Home">Documentation Home</a></li>
         </ul>
@@ -40,26 +40,26 @@
     <li><a href="../ansible.html" title="Option 2: using Ansible"><span class="none"></span>Option 2: using Ansible</a></li>
     <li><a href="../aws.html" title="Option 3: using Amazon Web Services"><span class="none"></span>Option 3: using Amazon Web Services</a></li>
       <li class="nav-header">AsterixDB Primer</li>
-    <li class="active"><a href="#"><span class="none"></span>Option 1: using SQL++</a></li>
-    <li><a href="../aql/primer.html" title="Option 2: using AQL"><span class="none"></span>Option 2: using AQL</a></li>
+    <li class="active"><a href="#"><span class="none"></span>Using SQL++</a></li>
       <li class="nav-header">Data Model</li>
     <li><a href="../datamodel.html" title="The Asterix Data Model"><span class="none"></span>The Asterix Data Model</a></li>
-      <li class="nav-header">Queries - SQL++</li>
+      <li class="nav-header">Queries</li>
     <li><a href="../sqlpp/manual.html" title="The SQL++ Query Language"><span class="none"></span>The SQL++ Query Language</a></li>
     <li><a href="../sqlpp/builtins.html" title="Builtin Functions"><span class="none"></span>Builtin Functions</a></li>
-      <li class="nav-header">Queries - AQL</li>
-    <li><a href="../aql/manual.html" title="The Asterix Query Language (AQL)"><span class="none"></span>The Asterix Query Language (AQL)</a></li>
-    <li><a href="../aql/builtins.html" title="Builtin Functions"><span class="none"></span>Builtin Functions</a></li>
       <li class="nav-header">API/SDK</li>
     <li><a href="../api.html" title="HTTP API"><span class="none"></span>HTTP API</a></li>
     <li><a href="../csv.html" title="CSV Output"><span class="none"></span>CSV Output</a></li>
       <li class="nav-header">Advanced Features</li>
-    <li><a href="../aql/fulltext.html" title="Support of Full-text Queries"><span class="none"></span>Support of Full-text Queries</a></li>
     <li><a href="../aql/externaldata.html" title="Accessing External Data"><span class="none"></span>Accessing External Data</a></li>
-    <li><a href="../feeds/tutorial.html" title="Support for Data Ingestion"><span class="none"></span>Support for Data Ingestion</a></li>
+    <li><a href="../feeds.html" title="Data Ingestion with Feeds"><span class="none"></span>Data Ingestion with Feeds</a></li>
     <li><a href="../udf.html" title="User Defined Functions"><span class="none"></span>User Defined Functions</a></li>
-    <li><a href="../aql/filters.html" title="Filter-Based LSM Index Acceleration"><span class="none"></span>Filter-Based LSM Index Acceleration</a></li>
-    <li><a href="../aql/similarity.html" title="Support of Similarity Queries"><span class="none"></span>Support of Similarity Queries</a></li>
+    <li><a href="../sqlpp/filters.html" title="Filter-Based LSM Index Acceleration"><span class="none"></span>Filter-Based LSM Index Acceleration</a></li>
+    <li><a href="../sqlpp/fulltext.html" title="Support of Full-text Queries"><span class="none"></span>Support of Full-text Queries</a></li>
+    <li><a href="../sqlpp/similarity.html" title="Support of Similarity Queries"><span class="none"></span>Support of Similarity Queries</a></li>
+      <li class="nav-header">Deprecated</li>
+    <li><a href="../aql/primer.html" title="AsterixDB Primer: Using AQL"><span class="none"></span>AsterixDB Primer: Using AQL</a></li>
+    <li><a href="../aql/manual.html" title="Queries: The Asterix Query Language (AQL)"><span class="none"></span>Queries: The Asterix Query Language (AQL)</a></li>
+    <li><a href="../aql/builtins.html" title="Queries: Builtin Functions (AQL)"><span class="none"></span>Queries: Builtin Functions (AQL)</a></li>
 </ul>
           <hr />
           <div id="poweredBy">

http://git-wip-us.apache.org/repos/asf/asterixdb-site/blob/b2f1d3e6/content/docs/0.9.4/sqlpp/similarity.html
----------------------------------------------------------------------
diff --git a/content/docs/0.9.4/sqlpp/similarity.html b/content/docs/0.9.4/sqlpp/similarity.html
new file mode 100644
index 0000000..6f100ae
--- /dev/null
+++ b/content/docs/0.9.4/sqlpp/similarity.html
@@ -0,0 +1,310 @@
+<!DOCTYPE html>
+<!--
+ | Generated by Apache Maven Doxia Site Renderer 1.8.1 from src/site/markdown/sqlpp/similarity.md at 2018-10-12
+ | Rendered using Apache Maven Fluido Skin 1.7
+-->
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <meta name="Date-Revision-yyyymmdd" content="20181012" />
+    <meta http-equiv="Content-Language" content="en" />
+    <title>AsterixDB &#x2013; AsterixDB  Support of Similarity Queries</title>
+    <link rel="stylesheet" href="../css/apache-maven-fluido-1.7.min.css" />
+    <link rel="stylesheet" href="../css/site.css" />
+    <link rel="stylesheet" href="../css/print.css" media="print" />
+    <script type="text/javascript" src="../js/apache-maven-fluido-1.7.min.js"></script>
+
+  </head>
+  <body class="topBarDisabled">
+    <div class="container-fluid">
+      <div id="banner">
+        <div class="pull-left"><a href=".././" id="bannerLeft"><img src="../images/asterixlogo.png"  alt="AsterixDB"/></a></div>
+        <div class="pull-right"></div>
+        <div class="clear"><hr/></div>
+      </div>
+
+      <div id="breadcrumbs">
+        <ul class="breadcrumb">
+        <li id="publishDate">Last Published: 2018-10-12</li>
+      <li id="projectVersion" class="pull-right">Version: 0.9.4</li>
+      <li class="pull-right"><a href="../index.html" title="Documentation Home">Documentation Home</a></li>
+        </ul>
+      </div>
+      <div class="row-fluid">
+        <div id="leftColumn" class="span2">
+          <div class="well sidebar-nav">
+    <ul class="nav nav-list">
+      <li class="nav-header">Get Started - Installation</li>
+    <li><a href="../ncservice.html" title="Option 1: using NCService"><span class="none"></span>Option 1: using NCService</a></li>
+    <li><a href="../ansible.html" title="Option 2: using Ansible"><span class="none"></span>Option 2: using Ansible</a></li>
+    <li><a href="../aws.html" title="Option 3: using Amazon Web Services"><span class="none"></span>Option 3: using Amazon Web Services</a></li>
+      <li class="nav-header">AsterixDB Primer</li>
+    <li><a href="../sqlpp/primer-sqlpp.html" title="Using SQL++"><span class="none"></span>Using SQL++</a></li>
+      <li class="nav-header">Data Model</li>
+    <li><a href="../datamodel.html" title="The Asterix Data Model"><span class="none"></span>The Asterix Data Model</a></li>
+      <li class="nav-header">Queries</li>
+    <li><a href="../sqlpp/manual.html" title="The SQL++ Query Language"><span class="none"></span>The SQL++ Query Language</a></li>
+    <li><a href="../sqlpp/builtins.html" title="Builtin Functions"><span class="none"></span>Builtin Functions</a></li>
+      <li class="nav-header">API/SDK</li>
+    <li><a href="../api.html" title="HTTP API"><span class="none"></span>HTTP API</a></li>
+    <li><a href="../csv.html" title="CSV Output"><span class="none"></span>CSV Output</a></li>
+      <li class="nav-header">Advanced Features</li>
+    <li><a href="../aql/externaldata.html" title="Accessing External Data"><span class="none"></span>Accessing External Data</a></li>
+    <li><a href="../feeds.html" title="Data Ingestion with Feeds"><span class="none"></span>Data Ingestion with Feeds</a></li>
+    <li><a href="../udf.html" title="User Defined Functions"><span class="none"></span>User Defined Functions</a></li>
+    <li><a href="../sqlpp/filters.html" title="Filter-Based LSM Index Acceleration"><span class="none"></span>Filter-Based LSM Index Acceleration</a></li>
+    <li><a href="../sqlpp/fulltext.html" title="Support of Full-text Queries"><span class="none"></span>Support of Full-text Queries</a></li>
+    <li class="active"><a href="#"><span class="none"></span>Support of Similarity Queries</a></li>
+      <li class="nav-header">Deprecated</li>
+    <li><a href="../aql/primer.html" title="AsterixDB Primer: Using AQL"><span class="none"></span>AsterixDB Primer: Using AQL</a></li>
+    <li><a href="../aql/manual.html" title="Queries: The Asterix Query Language (AQL)"><span class="none"></span>Queries: The Asterix Query Language (AQL)</a></li>
+    <li><a href="../aql/builtins.html" title="Queries: Builtin Functions (AQL)"><span class="none"></span>Queries: Builtin Functions (AQL)</a></li>
+</ul>
+          <hr />
+          <div id="poweredBy">
+            <div class="clear"></div>
+            <div class="clear"></div>
+            <div class="clear"></div>
+            <div class="clear"></div>
+<a href=".././" title="AsterixDB" class="builtBy"><img class="builtBy"  alt="AsterixDB" src="../images/asterixlogo.png"    /></a>
+            </div>
+          </div>
+        </div>
+        <div id="bodyColumn"  class="span10" >
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements.  See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership.  The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License.  You may obtain a copy of the License at
+ !
+ !   http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied.  See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+<h1>AsterixDB  Support of Similarity Queries</h1>
+<div class="section">
+<h2><a name="Table_of_Contents"></a><a name="toc" id="toc">Table of Contents</a></h2>
+<ul>
+
+<li><a href="#Motivation">Motivation</a></li>
+<li><a href="#DataTypesAndSimilarityFunctions">Data Types and Similarity Functions</a></li>
+<li><a href="#SimilaritySelectionQueries">Similarity Selection Queries</a></li>
+<li><a href="#SimilarityJoinQueries">Similarity Join Queries</a></li>
+<li><a href="#UsingIndexesToSupportSimilarityQueries">Using Indexes to Support Similarity Queries</a></li>
+</ul></div>
+<div class="section">
+<h2><a name="Motivation_.5BBack_to_TOC.5D"></a><a name="Motivation" id="Motivation">Motivation</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
+<p>Similarity queries are widely used in applications where users need to find objects that satisfy a similarity predicate, while exact matching is not sufficient. These queries are especially important for social and Web applications, where errors, abbreviations, and inconsistencies are common.  As an example, we may want to find all the movies starring Schwarzenegger, while we don&#x2019;t know the exact spelling of his last name (despite his popularity in both the movie industry and politics :-)). As another example, we want to find all the Facebook users who have similar friends. To meet this type of needs, AsterixDB supports similarity queries using efficient indexes and algorithms.</p></div>
+<div class="section">
+<h2><a name="Data_Types_and_Similarity_Functions_.5BBack_to_TOC.5D"></a><a name="DataTypesAndSimilarityFunctions" id="DataTypesAndSimilarityFunctions">Data Types and Similarity Functions</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
+<p>AsterixDB supports <a class="externalLink" href="http://en.wikipedia.org/wiki/Levenshtein_distance">edit distance</a> (on strings) and <a class="externalLink" href="http://en.wikipedia.org/wiki/Jaccard_index">Jaccard</a> (on sets).  For instance, in our <a href="primer.html#ADM:_Modeling_Semistructed_Data_in_AsterixDB">TinySocial</a> example, the <tt>friend-ids</tt> of a Facebook user forms a set of friends, and we can define a similarity between the sets of friends of two users. We can also convert a string to a set of grams of a length &#x201c;n&#x201d; (called &#x201c;n-grams&#x201d;) and define the Jaccard similarity between the two gram sets of the two strings. Formally, the &#x201c;n-grams&#x201d; of a string are its substrings of length &#x201c;n&#x201d;. For instance, the 3-grams of the string <tt>schwarzenegger</tt> are <tt>sch</tt>, <tt>chw</tt>, <tt>hwa</tt>, &#x2026;, <tt>ger</tt>.</p>
+<p>AsterixDB provides <a href="functions.html#Tokenizing_Functions">tokenization functions</a> to convert strings to sets, and the <a href="functions.html#Similarity_Functions">similarity functions</a>.</p></div>
+<div class="section">
+<h2><a name="Similarity_Selection_Queries_.5BBack_to_TOC.5D"></a><a name="SimilaritySelectionQueries" id="SimilaritySelectionQueries">Similarity Selection Queries</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
+<p>The following query asks for all the Facebook users whose name is similar to <tt>Suzanna Tilson</tt>, i.e., their edit distance is at most 2.</p>
+
+<div>
+<div>
+<pre class="source">    use dataverse TinySocial;
+
+    for $user in dataset('FacebookUsers')
+    let $ed := edit-distance($user.name, &quot;Suzanna Tilson&quot;)
+    where $ed &lt;= 2
+    return $user
+</pre></div></div>
+
+<p>The following query asks for all the Facebook users whose set of friend ids is similar to <tt>[1,5,9,10]</tt>, i.e., their Jaccard similarity is at least 0.6.</p>
+
+<div>
+<div>
+<pre class="source">    use dataverse TinySocial;
+
+    for $user in dataset('FacebookUsers')
+    let $sim := similarity-jaccard($user.friend-ids, [1,5,9,10])
+    where $sim &gt;= 0.6f
+    return $user
+</pre></div></div>
+
+<p>AsterixDB allows a user to use a similarity operator <tt>~=</tt> to express a condition by defining the similarity function and threshold using &#x201c;set&#x201d; statements earlier. For instance, the above query can be equivalently written as:</p>
+
+<div>
+<div>
+<pre class="source">    use dataverse TinySocial;
+
+    set simfunction &quot;jaccard&quot;;
+    set simthreshold &quot;0.6f&quot;;
+
+    for $user in dataset('FacebookUsers')
+    where $user.friend-ids ~= [1,5,9,10]
+    return $user
+</pre></div></div>
+
+<p>In this query, we first declare Jaccard as the similarity function using <tt>simfunction</tt> and then specify the threshold <tt>0.6f</tt> using <tt>simthreshold</tt>.</p></div>
+<div class="section">
+<h2><a name="Similarity_Join_Queries_.5BBack_to_TOC.5D"></a><a name="SimilarityJoinQueries" id="SimilarityJoinQueries">Similarity Join Queries</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
+<p>AsterixDB supports fuzzy joins between two sets. The following <a href="primer.html#Query_5_-_Fuzzy_Join">query</a> finds, for each Facebook user, all Twitter users with names similar to their name based on the edit distance.</p>
+
+<div>
+<div>
+<pre class="source">    use dataverse TinySocial;
+
+    set simfunction &quot;edit-distance&quot;;
+    set simthreshold &quot;3&quot;;
+
+    for $fbu in dataset FacebookUsers
+    return {
+        &quot;id&quot;: $fbu.id,
+        &quot;name&quot;: $fbu.name,
+        &quot;similar-users&quot;: for $t in dataset TweetMessages
+                            let $tu := $t.user
+                            where $tu.name ~= $fbu.name
+                            return {
+                            &quot;twitter-screenname&quot;: $tu.screen-name,
+                            &quot;twitter-name&quot;: $tu.name
+                            }
+    };
+</pre></div></div>
+</div>
+<div class="section">
+<h2><a name="Using_Indexes_to_Support_Similarity_Queries_.5BBack_to_TOC.5D"></a><a name="UsingIndexesToSupportSimilarityQueries" id="UsingIndexesToSupportSimilarityQueries">Using Indexes to Support Similarity Queries</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
+<p>AsterixDB uses two types of indexes to support similarity queries, namely &#x201c;ngram index&#x201d; and &#x201c;keyword index&#x201d;.</p>
+<div class="section">
+<h3><a name="NGram_Index"></a>NGram Index</h3>
+<p>An &#x201c;ngram index&#x201d; is constructed on a set of strings.  We generate n-grams for each string, and build an inverted list for each n-gram that includes the ids of the strings with this gram.  A similarity query can be answered efficiently by accessing the inverted lists of the grams in the query and counting the number of occurrences of the string ids on these inverted lists.  The similar idea can be used to answer queries with Jaccard similarity.  A detailed description of these techniques is available at this <a class="externalLink" href="http://www.ics.uci.edu/~chenli/pub/icde2009-memreducer.pdf">paper</a>.</p>
+<p>For instance, the following DDL statements create an ngram index on the <tt>FacebookUsers.name</tt> attribute using an inverted index of 3-grams.</p>
+
+<div>
+<div>
+<pre class="source">    use dataverse TinySocial;
+
+    create index fbUserIdx on FacebookUsers(name) type ngram(3);
+</pre></div></div>
+
+<p>The number &#x201c;3&#x201d; in &#x201c;ngram(3)&#x201d; is the length &#x201c;n&#x201d; in the grams. This index can be used to optimize similarity queries on this attribute using <a href="functions.html#edit-distance">edit-distance</a>, <a href="functions.html#edit-distance-check">edit-distance-check</a>, <a href="functions.html#similarity-jaccard">similarity-jaccard</a>, or <a href="functions.html#similarity-jaccard-check">similarity-jaccard-check</a> queries on this attribute where the similarity is defined on sets of 3-grams.  This index can also be used to optimize queries with the &#x201c;<a href="(functions.html#contains">contains()</a>&#x201d; predicate (i.e., substring matching) since it can be also be solved by counting on the inverted lists of the grams in the query string.</p>
+<div class="section">
+<h4><a name="NGram_Index_usage_case_-_edit-distance"></a>NGram Index usage case - <a href="functions.html#edit-distance">edit-distance</a></h4>
+
+<div>
+<div>
+<pre class="source">    use dataverse TinySocial;
+
+    for $user in dataset('FacebookUsers')
+    let $ed := edit-distance($user.name, &quot;Suzanna Tilson&quot;)
+    where $ed &lt;= 2
+    return $user
+</pre></div></div>
+</div>
+<div class="section">
+<h4><a name="NGram_Index_usage_case_-_edit-distance-check"></a>NGram Index usage case - <a href="functions.html#edit-distance-check">edit-distance-check</a></h4>
+
+<div>
+<div>
+<pre class="source">    use dataverse TinySocial;
+
+    for $user in dataset('FacebookUsers')
+    let $ed := edit-distance-check($user.name, &quot;Suzanna Tilson&quot;, 2)
+    where $ed[0]
+    return $ed[1]
+</pre></div></div>
+</div>
+<div class="section">
+<h4><a name="NGram_Index_usage_case_-_similarity-jaccard"></a>NGram Index usage case - <a href="functions.html#similarity-jaccard">similarity-jaccard</a></h4>
+
+<div>
+<div>
+<pre class="source">    use dataverse TinySocial;
+
+    for $user in dataset('FacebookUsers')
+    let $sim := similarity-jaccard($user.friend-ids, [1,5,9,10])
+    where $sim &gt;= 0.6f
+    return $user
+</pre></div></div>
+</div>
+<div class="section">
+<h4><a name="NGram_Index_usage_case_-_similarity-jaccard-check"></a>NGram Index usage case - <a href="functions.html#similarity-jaccard-check">similarity-jaccard-check</a></h4>
+
+<div>
+<div>
+<pre class="source">    use dataverse TinySocial;
+
+    for $user in dataset('FacebookUsers')
+    let $sim := similarity-jaccard-check($user.friend-ids, [1,5,9,10], 0.6f)
+    where $sim[0]
+    return $user
+</pre></div></div>
+</div>
+<div class="section">
+<h4><a name="NGram_Index_usage_case_-_contains.28.29"></a>NGram Index usage case - <a href="(functions.html#contains">contains()</a></h4>
+
+<div>
+<div>
+<pre class="source">    use dataverse TinySocial;
+
+    for $i in dataset('FacebookMessages')
+    where contains($i.message, &quot;phone&quot;)
+    return {&quot;mid&quot;: $i.message-id, &quot;message&quot;: $i.message}
+</pre></div></div>
+</div></div>
+<div class="section">
+<h3><a name="Keyword_Index"></a>Keyword Index</h3>
+<p>A &#x201c;keyword index&#x201d; is constructed on a set of strings or sets (e.g., OrderedList, UnorderedList). Instead of generating grams as in an ngram index, we generate tokens (e.g., words) and for each token, construct an inverted list that includes the ids of the objects with this token.  The following two examples show how to create keyword index on two different types:</p>
+<div class="section">
+<h4><a name="Keyword_Index_on_String_Type"></a>Keyword Index on String Type</h4>
+
+<div>
+<div>
+<pre class="source">    use dataverse TinySocial;
+
+    drop index FacebookMessages.fbMessageIdx if exists;
+    create index fbMessageIdx on FacebookMessages(message) type keyword;
+
+    for $o in dataset('FacebookMessages')
+    let $jacc := similarity-jaccard-check(word-tokens($o.message), word-tokens(&quot;love like ccast&quot;), 0.2f)
+    where $jacc[0]
+    return $o
+</pre></div></div>
+</div>
+<div class="section">
+<h4><a name="Keyword_Index_on_UnorderedList_Type"></a>Keyword Index on UnorderedList Type</h4>
+
+<div>
+<div>
+<pre class="source">    use dataverse TinySocial;
+
+    create index fbUserIdx_fids on FacebookUsers(friend-ids) type keyword;
+
+    for $c in dataset('FacebookUsers')
+    let $jacc := similarity-jaccard-check($c.friend-ids, {{3,10}}, 0.5f)
+    where $jacc[0]
+    return $c
+</pre></div></div>
+
+<p>As shown above, keyword index can be used to optimize queries with token-based similarity predicates, including <a href="functions.html#similarity-jaccard">similarity-jaccard</a> and <a href="functions.html#similarity-jaccard-check">similarity-jaccard-check</a>.</p></div></div></div>
+        </div>
+      </div>
+    </div>
+    <hr/>
+    <footer>
+      <div class="container-fluid">
+        <div class="row-fluid">
+<div class="row-fluid">Apache AsterixDB, AsterixDB, Apache, the Apache
+        feather logo, and the Apache AsterixDB project logo are either
+        registered trademarks or trademarks of The Apache Software
+        Foundation in the United States and other countries.
+        All other marks mentioned may be trademarks or registered
+        trademarks of their respective owners.
+      </div>
+        </div>
+      </div>
+    </footer>
+  </body>
+</html>

http://git-wip-us.apache.org/repos/asf/asterixdb-site/blob/b2f1d3e6/content/docs/0.9.4/udf.html
----------------------------------------------------------------------
diff --git a/content/docs/0.9.4/udf.html b/content/docs/0.9.4/udf.html
index 83f7114..65bc7bb 100644
--- a/content/docs/0.9.4/udf.html
+++ b/content/docs/0.9.4/udf.html
@@ -1,15 +1,15 @@
 <!DOCTYPE html>
 <!--
- | Generated by Apache Maven Doxia Site Renderer 1.8.1 from src/site/markdown/udf.md at 2018-10-02
+ | Generated by Apache Maven Doxia Site Renderer 1.8.1 from target/generated-site/markdown/udf.md at 2018-10-12
  | Rendered using Apache Maven Fluido Skin 1.7
 -->
 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <meta name="Date-Revision-yyyymmdd" content="20181002" />
+    <meta name="Date-Revision-yyyymmdd" content="20181012" />
     <meta http-equiv="Content-Language" content="en" />
-    <title>AsterixDB &#x2013; Support for User Defined Functions in AsterixDB</title>
+    <title>AsterixDB &#x2013; User-defined Functions</title>
     <link rel="stylesheet" href="./css/apache-maven-fluido-1.7.min.css" />
     <link rel="stylesheet" href="./css/site.css" />
     <link rel="stylesheet" href="./css/print.css" media="print" />
@@ -26,7 +26,7 @@
 
       <div id="breadcrumbs">
         <ul class="breadcrumb">
-        <li id="publishDate">Last Published: 2018-10-02</li>
+        <li id="publishDate">Last Published: 2018-10-12</li>
       <li id="projectVersion" class="pull-right">Version: 0.9.4</li>
       <li class="pull-right"><a href="index.html" title="Documentation Home">Documentation Home</a></li>
         </ul>
@@ -40,26 +40,26 @@
     <li><a href="ansible.html" title="Option 2: using Ansible"><span class="none"></span>Option 2: using Ansible</a></li>
     <li><a href="aws.html" title="Option 3: using Amazon Web Services"><span class="none"></span>Option 3: using Amazon Web Services</a></li>
       <li class="nav-header">AsterixDB Primer</li>
-    <li><a href="sqlpp/primer-sqlpp.html" title="Option 1: using SQL++"><span class="none"></span>Option 1: using SQL++</a></li>
-    <li><a href="aql/primer.html" title="Option 2: using AQL"><span class="none"></span>Option 2: using AQL</a></li>
+    <li><a href="sqlpp/primer-sqlpp.html" title="Using SQL++"><span class="none"></span>Using SQL++</a></li>
       <li class="nav-header">Data Model</li>
     <li><a href="datamodel.html" title="The Asterix Data Model"><span class="none"></span>The Asterix Data Model</a></li>
-      <li class="nav-header">Queries - SQL++</li>
+      <li class="nav-header">Queries</li>
     <li><a href="sqlpp/manual.html" title="The SQL++ Query Language"><span class="none"></span>The SQL++ Query Language</a></li>
     <li><a href="sqlpp/builtins.html" title="Builtin Functions"><span class="none"></span>Builtin Functions</a></li>
-      <li class="nav-header">Queries - AQL</li>
-    <li><a href="aql/manual.html" title="The Asterix Query Language (AQL)"><span class="none"></span>The Asterix Query Language (AQL)</a></li>
-    <li><a href="aql/builtins.html" title="Builtin Functions"><span class="none"></span>Builtin Functions</a></li>
       <li class="nav-header">API/SDK</li>
     <li><a href="api.html" title="HTTP API"><span class="none"></span>HTTP API</a></li>
     <li><a href="csv.html" title="CSV Output"><span class="none"></span>CSV Output</a></li>
       <li class="nav-header">Advanced Features</li>
-    <li><a href="aql/fulltext.html" title="Support of Full-text Queries"><span class="none"></span>Support of Full-text Queries</a></li>
     <li><a href="aql/externaldata.html" title="Accessing External Data"><span class="none"></span>Accessing External Data</a></li>
-    <li><a href="feeds/tutorial.html" title="Support for Data Ingestion"><span class="none"></span>Support for Data Ingestion</a></li>
+    <li><a href="feeds.html" title="Data Ingestion with Feeds"><span class="none"></span>Data Ingestion with Feeds</a></li>
     <li class="active"><a href="#"><span class="none"></span>User Defined Functions</a></li>
-    <li><a href="aql/filters.html" title="Filter-Based LSM Index Acceleration"><span class="none"></span>Filter-Based LSM Index Acceleration</a></li>
-    <li><a href="aql/similarity.html" title="Support of Similarity Queries"><span class="none"></span>Support of Similarity Queries</a></li>
+    <li><a href="sqlpp/filters.html" title="Filter-Based LSM Index Acceleration"><span class="none"></span>Filter-Based LSM Index Acceleration</a></li>
+    <li><a href="sqlpp/fulltext.html" title="Support of Full-text Queries"><span class="none"></span>Support of Full-text Queries</a></li>
+    <li><a href="sqlpp/similarity.html" title="Support of Similarity Queries"><span class="none"></span>Support of Similarity Queries</a></li>
+      <li class="nav-header">Deprecated</li>
+    <li><a href="aql/primer.html" title="AsterixDB Primer: Using AQL"><span class="none"></span>AsterixDB Primer: Using AQL</a></li>
+    <li><a href="aql/manual.html" title="Queries: The Asterix Query Language (AQL)"><span class="none"></span>Queries: The Asterix Query Language (AQL)</a></li>
+    <li><a href="aql/builtins.html" title="Queries: Builtin Functions (AQL)"><span class="none"></span>Queries: Builtin Functions (AQL)</a></li>
 </ul>
           <hr />
           <div id="poweredBy">
@@ -90,160 +90,173 @@
  ! specific language governing permissions and limitations
  ! under the License.
  !-->
-<h1>Support for User Defined Functions in AsterixDB</h1>
+<h1>User-defined Functions</h1>
 <div class="section">
 <h2><a name="Table_of_Contents"></a><a name="atoc" id="#toc">Table of Contents</a></h2>
 <ul>
 
-<li><a href="#PreprocessingCollectedData">Using UDF to preprocess feed-collected data</a></li>
-<li><a href="#WritingAnExternalUDF">Writing an External UDF</a></li>
-<li><a href="#CreatingAnAsterixDBLibrary">Creating an AsterixDB Library</a></li>
-<li><a href="#installingUDF">Installing an AsterixDB Library</a></li>
-</ul>
-<p>In this document, we describe the support for implementing, using, and installing user-defined functions (UDF) in AsterixDB. We will explain how we can use UDFs to preprocess, e.g., data collected using feeds (see the <a href="feeds/tutorial.html">feeds tutorial</a>).</p>
+<li><a href="#introduction">Introduction</a></li>
+<li><a href="#installingUDF">Installing an UDF Library</a></li>
+<li><a href="#UDFOnFeeds">Attaching an UDF on Data Feeds</a></li>
+<li><a href="#udfConfiguration">A quick look of the UDF configuration</a></li>
+<li><a href="#uninstall">Unstalling an UDF Library</a><!--
+! Licensed to the Apache Software Foundation (ASF) under one
+! or more contributor license agreements.  See the NOTICE file
+! distributed with this work for additional information
+! regarding copyright ownership.  The ASF licenses this file
+! to you under the Apache License, Version 2.0 (the
+! "License"); you may not use this file except in compliance
+! with the License.  You may obtain a copy of the License at
+!
+!   http://www.apache.org/licenses/LICENSE-2.0
+!
+! Unless required by applicable law or agreed to in writing,
+! software distributed under the License is distributed on an
+! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+! KIND, either express or implied.  See the License for the
+! specific language governing permissions and limitations
+! under the License.
+!--></li>
+</ul></div>
+<div class="section">
+<h2><a name="Introduction"></a><a name="introduction">Introduction</a></h2>
+<p>Apache AsterixDB supports two languages for writing user-defined functions (UDFs): SQL++ and Java. A user can encapsulate data processing logic into a UDF and invoke it later repeatedly. For SQL++ functions, a user can refer to <a href="sqlpp/manual.html#Functions">SQL++ Functions</a> for their usages. In this document, we focus on how to install/invoke/uninstall a Java function library using the Ansible script that we provide.</p></div>
 <div class="section">
-<h3><a name="Installing_an_AsterixDB_Library"></a><a name="installingUDF">Installing an AsterixDB Library</a></h3>
-<p>We assume you have followed the <a href="../install.html">installation instructions</a> to set up a running AsterixDB instance. Let us refer your AsterixDB instance by the name &#x201c;my_asterix&#x201d;.</p>
+<h2><a name="Installing_an_UDF_Library"></a><a name="installingUDF">Installing an UDF Library</a></h2>
+<p>UDFs have to be installed offline. This section describes the process assuming that you have followed the preceding <a href="ansible.html">ansible installation instructions</a> to deploy an AsterixDB instance on your local machine or cluster. Here are the instructions to install an UDF library:</p>
 <ul>
 
 <li>
 
-<p>Step 1: Stop the AsterixDB instance if it is in the ACTIVE state.</p>
+<p>Step 1: Stop the AsterixDB instance if it is ACTIVE.</p>
 
 <div>
 <div>
-<pre class="source">$ managix stop -n my_asterix
+<pre class="source">$ bin/stop.sh
 </pre></div></div>
 </li>
 <li>
 
-<p>Step 2: Install the library using Managix install command. Just to illustrate, we use the help command to look up the syntax</p>
+<p>Step 2: Deploy the UDF package.</p>
 
 <div>
 <div>
-<pre class="source">$ managix help  -cmd install
-Installs a library to an asterix instance.
-Options
-n  Name of Asterix Instance
-d  Name of the dataverse under which the library will be installed
-l  Name of the library
-p  Path to library zip bundle
+<pre class="source">$ bin/udf.sh -m i -d DATAVERSE_NAME -l LIBRARY_NAME -p UDF_PACKAGE_PATH
 </pre></div></div>
 </li>
-</ul>
-<p>Above is a sample output and explains the usage and the required parameters. Each library has a name and is installed under a dataverse. Recall that we had created a dataverse by the name - &#x201c;feeds&#x201d; prior to  creating our datatypes and dataset. We shall name our library - &#x201c;testlib&#x201d;.</p>
-<p>We assume you have a library zip bundle that needs to be installed. To install the library, use the Managix install command. An example is shown below.</p>
+<li>
+
+<p>Step 3: Start AsterixDB</p>
 
 <div>
 <div>
-<pre class="source">    $ managix install -n my_asterix -d feeds -l testlib -p extlibs/asterix-external-data-0.8.7-binary-assembly.zip
+<pre class="source">$ bin/start.sh
 </pre></div></div>
-
-<p>You should see the following message:</p>
+</li>
+</ul>
+<p>After AsterixDB starts, you can use the following query to check whether your UDFs have been sucessfully registered with the system.</p>
 
 <div>
 <div>
-<pre class="source">    INFO: Installed library testlib
+<pre class="source">    SELECT * FROM Metadata.`Function`;
 </pre></div></div>
 
-<p>We shall next start our AsterixDB instance using the start command as shown below.</p>
+<p>In the AsterixDB source release, we provide several sample UDFs that you can try out. You need to build the AsterixDB source to get the compiled UDF package. It can be found under the <tt>asterixdb-external</tt> sub-project. Assuming that these UDFs have been installed into the <tt>udfs</tt> dataverse and <tt>testlib</tt> library, here is an example that uses the sample UDF <tt>mysum</tt> to compute the sum of two input integers.</p>
 
 <div>
 <div>
-<pre class="source">    $ managix start -n my_asterix
-</pre></div></div>
+<pre class="source">    use udfs;
 
-<p>You may now use the AsterixDB library in AQL statements and queries. To look at the installed artifacts, you may execute the following query at the AsterixDB web-console.</p>
+    testlib#mysum(3,4);
+</pre></div></div>
+</div>
+<div class="section">
+<h2><a name="Attaching_a_UDF_on_Data_Feeds"></a><a name="UDFOnFeeds" id="UDFOnFeeds">Attaching a UDF on Data Feeds</a></h2>
+<p>In <a href="feeds.html">Data Ingestion using feeds</a>, we introduced an efficient way for users to get data into AsterixDB. In some use cases, users may want to pre-process the incoming data before storing it into the dataset. To meet this need, AsterixDB allows the user to attach a UDF onto the ingestion pipeline. Following the example in <a href="feeds.html">Data Ingestion</a>, here we show an example of how to attach a UDF that extracts the user names mentioned from the incoming Tweet text, storing the processed Tweets into a dataset.</p>
+<p>We start by creating the datatype and dataset that will be used for the feed and UDF. One thing to keep in mind is that data flows from the feed to the UDF and then to the dataset. This means that the feed&#x2019;s datatype should be the same as the input type of the UDF, and the output datatype of the UDF should be the same as the dataset&#x2019;s datatype. Thus, users should make sure that their datatypes are consistent in the UDF configuration. Users can also take advantage of open datatypes in AsterixDB by creating a minimum description of the data for simplicity. Here we use open datatypes:</p>
 
 <div>
 <div>
-<pre class="source">    for $x in dataset Metadata.Function
-    return $x
+<pre class="source">    use udfs;
+
+    create type TweetType if not exists as open {
+        id: int64
+    };
 
-    for $x in dataset Metadata.Library
-    return $x
+    create dataset ProcessedTweets(TweetType) primary key id;
 </pre></div></div>
 
-<p>Our library is now installed and is ready to be used.</p></div></div>
-<div class="section">
-<h2><a name="Preprocessing_Collected_Data"></a><a name="PreprocessingCollectedData" id="PreprocessingCollectedData">Preprocessing Collected Data</a></h2>
-<p>In the following we assume that you already created the <tt>TwitterFeed</tt> and its corresponding data types and dataset following the instruction explained in the <a href="feeds/tutorial.html">feeds tutorial</a>.</p>
-<p>A feed definition may optionally include the specification of a user-defined function that is to be applied to each feed object prior to persistence. Examples of pre-processing might include adding attributes, filtering out objects, sampling, sentiment analysis, feature extraction, etc. We can express a UDF, which can be defined in AQL or in a programming language such as Java, to perform such pre-processing. An AQL UDF is a good fit when pre-processing a object requires the result of a query (join or aggregate) over data contained in AsterixDB datasets. More sophisticated processing such as sentiment analysis of text is better handled by providing a Java UDF. A Java UDF has an initialization phase that allows the UDF to access any resources it may need to initialize itself prior to being used in a data flow. It is assumed by the AsterixDB compiler to be stateless and thus usable as an embarrassingly parallel black box. In contrast, the AsterixDB compiler can reason about an AQL 
 UDF and involve the use of indexes during its invocation.</p>
-<p>We consider an example transformation of a raw tweet into its lightweight version called <tt>ProcessedTweet</tt>, which is defined next.</p>
+<p>As the <tt>TweetType</tt> is an open datatype, processed Tweets can be stored into the dataset after they are annotated with an extra attribute. Given the datatype and dataset above, we can create a Twitter Feed with the same datatype. Please refer to section <a href="feeds.html">Data Ingestion</a> if you have any trouble in creating feeds.</p>
 
 <div>
 <div>
-<pre class="source">    use dataverse feeds;
-
-    create type ProcessedTweet if not exists as open {
-        id: string,
-        user_name:string,
-        location:point,
-        created_at:string,
-        message_text:string,
-        country: string,
-        topics: {{string}}
+<pre class="source">    use udfs;
+
+    create feed TwitterFeed with {
+      &quot;adapter-name&quot;: &quot;push_twitter&quot;,
+      &quot;type-name&quot;: &quot;TweetType&quot;,
+      &quot;format&quot;: &quot;twitter-status&quot;,
+      &quot;consumer.key&quot;: &quot;************&quot;,
+      &quot;consumer.secret&quot;: &quot;************&quot;,
+      &quot;access.token&quot;: &quot;**********&quot;,
+      &quot;access.token.secret&quot;: &quot;*************&quot;
     };
-
-    create dataset ProcessedTweets(ProcessedTweet)
-    primary key id;
 </pre></div></div>
 
-<p>The processing required in transforming a collected tweet to its lighter version of type <tt>ProcessedTweet</tt> involves extracting the topics or hash-tags (if any) in a tweet and collecting them in the referred &#x201c;topics&#x201d; attribute for the tweet. Additionally, the latitude and longitude values (doubles) are combined into the spatial point type. Note that spatial data types are considered as first-class citizens that come with the support for creating indexes. Next we show a revised version of our example TwitterFeed that involves the use of a UDF. We assume that the UDF that contains the transformation logic into a &#x201c;ProcessedTweet&#x201d; is available as a Java UDF inside an AsterixDB library named &#x2018;testlib&#x2019;. We defer the writing of a Java UDF and its installation as part of an AsterixDB library to a later section of this document.</p>
+<p>After creating the feed, we attach the UDF onto the feed pipeline and start the feed with following statements:</p>
 
 <div>
 <div>
-<pre class="source">    use dataverse feeds;
+<pre class="source">    use udfs;
 
-    create feed ProcessedTwitterFeed if not exists
-    using &quot;push_twitter&quot;
-    ((&quot;type-name&quot;=&quot;Tweet&quot;),
-    (&quot;consumer.key&quot;=&quot;************&quot;),
-    (&quot;consumer.secret&quot;=&quot;**************&quot;),
-    (&quot;access.token&quot;=&quot;**********&quot;),
-    (&quot;access.token.secret&quot;=&quot;*************&quot;))
+    connect feed TwitterFeed to dataset ProcessedTweets apply function udfs#addMentionedUsers;
 
-    apply function testlib#addHashTagsInPlace;
+    start feed TwitterFeed;
 </pre></div></div>
 
-<p>Note that a feed adaptor and a UDF act as pluggable components. These contribute towards providing a generic &#x201c;plug-and-play&#x201d; model where custom implementations can be provided to cater to specific requirements.</p>
-<div class="section">
-<div class="section">
-<h4><a name="Building_a_Cascade_Network_of_Feeds"></a>Building a Cascade Network of Feeds</h4>
-<p>Multiple high-level applications may wish to consume the data ingested from a data feed. Each such application might perceive the feed in a different way and require the arriving data to be processed and/or persisted differently. Building a separate flow of data from the external source for each application is wasteful of resources as the pre-processing or transformations required by each application might overlap and could be done together in an incremental fashion to avoid redundancy. A single flow of data from the external source could provide data for multiple applications. To achieve this, we introduce the notion of primary and secondary feeds in AsterixDB.</p>
-<p>A feed in AsterixDB is considered to be a primary feed if it gets its data from an external data source. The objects contained in a feed (subsequent to any pre-processing) are directed to a designated AsterixDB dataset. Alternatively or additionally, these objects can be used to derive other feeds known as secondary feeds. A secondary feed is similar to its parent feed in every other aspect; it can have an associated UDF to allow for any subsequent processing, can be persisted into a dataset, and/or can be made to derive other secondary feeds to form a cascade network. A primary feed and a dependent secondary feed form a hierarchy. As an example, we next show an example AQL statement that redefines the previous feed &#x201c;ProcessedTwitterFeed&#x201d; in terms of their respective parent feed (TwitterFeed).</p>
+<p>You can check the annotated Tweets by querying the <tt>ProcessedTweets</tt> dataset:</p>
 
 <div>
 <div>
-<pre class="source">    use dataverse feeds;
-
-    drop feed ProcessedTwitterFeed if exists;
-
-    create secondary feed ProcessedTwitterFeed from feed TwitterFeed
-    apply function testlib#addHashTags;
+<pre class="source">    SELECT * FROM ProcessedTweets LIMIT 10;
+</pre></div></div>
+</div>
+<div class="section">
+<h2><a name="A_quick_look_of_the_UDF_configuration"></a><a name="udfConfiguration">A quick look of the UDF configuration</a></h2>
+<p>AsterixDB uses an XML configuration file to describe the UDFs. A user can use it to define and reuse their compiled UDFs for different purposes. Here is a snippet of the configuration used in our <a href="#UDFOnFeeds">previous example</a>:</p>
 
-    connect feed ProcessedTwitterFeed to dataset ProcessedTweets;
+<div>
+<div>
+<pre class="source">    &lt;libraryFunction&gt;
+      &lt;name&gt;addMentionedUsers&lt;/name&gt;
+      &lt;function_type&gt;SCALAR&lt;/function_type&gt;
+      &lt;argument_type&gt;TweetType&lt;/argument_type&gt;
+      &lt;return_type&gt;TweetType&lt;/return_type&gt;
+      &lt;definition&gt;org.apache.asterix.external.library.AddMentionedUsersFactory&lt;/definition&gt;
+      &lt;parameters&gt;text&lt;/parameters&gt;
+    &lt;/libraryFunction&gt;
 </pre></div></div>
 
-<p>The <tt>addHashTags</tt> function is already provided in the example UDF.To see what objects are being inserted into the dataset, we can perform a simple dataset scan after allowing a few moments for the feed to start ingesting data:</p>
+<p>Here are the explanations of the fields in the configuration file:</p>
 
 <div>
 <div>
-<pre class="source">    use dataverse feeds;
-
-    for $i in dataset ProcessedTweets limit 10 return $i;
+<pre class="source">   name: The proper name that is used for invoke the function.
+   function_type: The type of the function.
+   argument_type: The datatype of the arguments passed in. If there is more than one parameter, separate them with comma(s), e.g., `AINT32,AINT32`.
+   return_type: The datatype of the returning value.
+   definition: A reference to the function factory.
+   parameters: The parameters passed into the function.
 </pre></div></div>
 
-<p>For an example of how to write a Java UDF from scratch, the source for the example UDF that has been used in this tutorial is available [here] (<a class="externalLink" href="https://github.com/apache/asterixdb/tree/master/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library">https://github.com/apache/asterixdb/tree/master/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library</a>)</p></div></div></div>
+<p>In our feeds example, we passed in <tt>&quot;text&quot;</tt> as a parameter to the function so it knows which field to look at to get the Tweet text. If the Twitter API were to change its field names in the future, we can accommodate that change by simply modifying the configuration file instead of recompiling the whole UDF package. This feature can be further utilized in use cases where a user has a Machine Learning algorithm with different trained model files. If you are interested, You can find more examples <a class="externalLink" href="https://github.com/apache/asterixdb/tree/master/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library">here</a></p></div>
 <div class="section">
-<h2><a name="Unstalling_an_AsterixDB_Library"></a><a name="installingUDF">Unstalling an AsterixDB Library</a></h2>
-<p>To uninstall a library, use the Managix uninstall command as follows:</p>
+<h2><a name="Unstalling_an_UDF_Library"></a><a name="uninstall">Unstalling an UDF Library</a></h2>
+<p>If you want to uninstall the UDF library, put AsterixDB into <tt>INACTVIVE</tt> mode and run following command:</p>
 
 <div>
 <div>
-<pre class="source">    $ managix stop -n my_asterix
-
-    $ managix uninstall -n my_asterix -d feeds -l testlib
+<pre class="source">    $ bin/udf.sh -m u -d DATAVERSE_NAME -l LIBRARY_NAME
 </pre></div></div></div>
         </div>
       </div>

http://git-wip-us.apache.org/repos/asf/asterixdb-site/blob/b2f1d3e6/docs/0.9.4/ansible.html
----------------------------------------------------------------------
diff --git a/docs/0.9.4/ansible.html b/docs/0.9.4/ansible.html
new file mode 100644
index 0000000..4722841
--- /dev/null
+++ b/docs/0.9.4/ansible.html
@@ -0,0 +1,293 @@
+<!DOCTYPE html>
+<!--
+ | Generated by Apache Maven Doxia Site Renderer 1.8.1 from target/generated-site/markdown/ansible.md at 2018-10-12
+ | Rendered using Apache Maven Fluido Skin 1.7
+-->
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <meta name="Date-Revision-yyyymmdd" content="20181012" />
+    <meta http-equiv="Content-Language" content="en" />
+    <title>AsterixDB &#x2013; Installation using Ansible</title>
+    <link rel="stylesheet" href="./css/apache-maven-fluido-1.7.min.css" />
+    <link rel="stylesheet" href="./css/site.css" />
+    <link rel="stylesheet" href="./css/print.css" media="print" />
+    <script type="text/javascript" src="./js/apache-maven-fluido-1.7.min.js"></script>
+
+  </head>
+  <body class="topBarDisabled">
+    <div class="container-fluid">
+      <div id="banner">
+        <div class="pull-left"><a href="./" id="bannerLeft"><img src="images/asterixlogo.png"  alt="AsterixDB"/></a></div>
+        <div class="pull-right"></div>
+        <div class="clear"><hr/></div>
+      </div>
+
+      <div id="breadcrumbs">
+        <ul class="breadcrumb">
+        <li id="publishDate">Last Published: 2018-10-12</li>
+      <li id="projectVersion" class="pull-right">Version: 0.9.4</li>
+      <li class="pull-right"><a href="index.html" title="Documentation Home">Documentation Home</a></li>
+        </ul>
+      </div>
+      <div class="row-fluid">
+        <div id="leftColumn" class="span2">
+          <div class="well sidebar-nav">
+    <ul class="nav nav-list">
+      <li class="nav-header">Get Started - Installation</li>
+    <li><a href="ncservice.html" title="Option 1: using NCService"><span class="none"></span>Option 1: using NCService</a></li>
+    <li class="active"><a href="#"><span class="none"></span>Option 2: using Ansible</a></li>
+    <li><a href="aws.html" title="Option 3: using Amazon Web Services"><span class="none"></span>Option 3: using Amazon Web Services</a></li>
+      <li class="nav-header">AsterixDB Primer</li>
+    <li><a href="sqlpp/primer-sqlpp.html" title="Using SQL++"><span class="none"></span>Using SQL++</a></li>
+      <li class="nav-header">Data Model</li>
+    <li><a href="datamodel.html" title="The Asterix Data Model"><span class="none"></span>The Asterix Data Model</a></li>
+      <li class="nav-header">Queries</li>
+    <li><a href="sqlpp/manual.html" title="The SQL++ Query Language"><span class="none"></span>The SQL++ Query Language</a></li>
+    <li><a href="sqlpp/builtins.html" title="Builtin Functions"><span class="none"></span>Builtin Functions</a></li>
+      <li class="nav-header">API/SDK</li>
+    <li><a href="api.html" title="HTTP API"><span class="none"></span>HTTP API</a></li>
+    <li><a href="csv.html" title="CSV Output"><span class="none"></span>CSV Output</a></li>
+      <li class="nav-header">Advanced Features</li>
+    <li><a href="aql/externaldata.html" title="Accessing External Data"><span class="none"></span>Accessing External Data</a></li>
+    <li><a href="feeds.html" title="Data Ingestion with Feeds"><span class="none"></span>Data Ingestion with Feeds</a></li>
+    <li><a href="udf.html" title="User Defined Functions"><span class="none"></span>User Defined Functions</a></li>
+    <li><a href="sqlpp/filters.html" title="Filter-Based LSM Index Acceleration"><span class="none"></span>Filter-Based LSM Index Acceleration</a></li>
+    <li><a href="sqlpp/fulltext.html" title="Support of Full-text Queries"><span class="none"></span>Support of Full-text Queries</a></li>
+    <li><a href="sqlpp/similarity.html" title="Support of Similarity Queries"><span class="none"></span>Support of Similarity Queries</a></li>
+      <li class="nav-header">Deprecated</li>
+    <li><a href="aql/primer.html" title="AsterixDB Primer: Using AQL"><span class="none"></span>AsterixDB Primer: Using AQL</a></li>
+    <li><a href="aql/manual.html" title="Queries: The Asterix Query Language (AQL)"><span class="none"></span>Queries: The Asterix Query Language (AQL)</a></li>
+    <li><a href="aql/builtins.html" title="Queries: Builtin Functions (AQL)"><span class="none"></span>Queries: Builtin Functions (AQL)</a></li>
+</ul>
+          <hr />
+          <div id="poweredBy">
+            <div class="clear"></div>
+            <div class="clear"></div>
+            <div class="clear"></div>
+            <div class="clear"></div>
+<a href="./" title="AsterixDB" class="builtBy"><img class="builtBy"  alt="AsterixDB" src="images/asterixlogo.png"    /></a>
+            </div>
+          </div>
+        </div>
+        <div id="bodyColumn"  class="span10" >
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements.  See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership.  The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License.  You may obtain a copy of the License at
+ !
+ !   http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied.  See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+<h1>Installation using Ansible</h1>
+<div class="section">
+<h2><a name="Table_of_Contents"></a><a name="atoc" id="#toc">Table of Contents</a></h2>
+<ul>
+
+<li><a href="#Introduction">Introduction</a></li>
+<li><a href="#Prerequisites">Prerequisites</a></li>
+<li><a href="#config">Cluster Configuration</a></li>
+<li><a href="#lifecycle">Cluster Lifecycle Management</a></li>
+</ul><!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements.  See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership.  The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License.  You may obtain a copy of the License at
+ !
+ !   http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied.  See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+</div>
+<div class="section">
+<h2><a name="Introduction" id="Introduction">Introduction</a></h2>
+<p>This installation option provides several wrapped <a class="externalLink" href="https://www.ansible.com/">Ansible</a>-based scripts to deploy, start, stop, and erase an AsterixDB instance on a multi-node cluster without requiring users to interact with each individual node in the cluster.</p></div>
+<div class="section">
+<h2><a name="Prerequisites" id="Prerequisites">Prerequisites</a></h2>
+<ul>
+
+<li>
+
+<p>Supported operating systems: <b>Linux</b> and <b>MacOS</b></p>
+</li>
+<li>
+
+<p>Install pip on your client machine:</p>
+<p>CentOS</p>
+
+<div>
+<div>
+<pre class="source"> $ sudo yum install python-pip
+</pre></div></div>
+
+<p>Ubuntu</p>
+
+<div>
+<div>
+<pre class="source"> $ sudo apt-get install python-pip
+</pre></div></div>
+
+<p>macOS</p>
+
+<div>
+<div>
+<pre class="source"> $ brew install pip
+</pre></div></div>
+</li>
+<li>
+
+<p>Install Ansible, boto, and boto3 on your client machine:</p>
+
+<div>
+<div>
+<pre class="source"> $ pip install ansible
+ $ pip install boto
+ $ pip install boto3
+</pre></div></div>
+
+<p>Note that you might need <tt>sudo</tt> depending on your system configuration.</p>
+<p><b>Make sure that the version of Ansible is no less than 2.2.1.0</b>:</p>
+
+<div>
+<div>
+<pre class="source"> $ ansible --version
+ ansible 2.2.1.0
+</pre></div></div>
+</li>
+<li>
+
+<p>Download the AsterixDB distribution package, unzip it, and navigate to <tt>opt/ansible/</tt></p>
+
+<div>
+<div>
+<pre class="source"> $ cd opt/ansible
+</pre></div></div>
+
+<p>The following files and directories are in the directory <tt>opt/ansible</tt>:</p>
+
+<div>
+<div>
+<pre class="source"> README  bin  conf  yaml
+</pre></div></div>
+
+<p><tt>bin</tt> contains scripts that deploy, start, stop and erase a multi-node AsterixDB cluster, according to the configuration specified in files under <tt>conf</tt>, and <tt>yaml</tt> contains internal Ansible scripts that the shell scripts in <tt>bin</tt> use.</p>
+</li>
+</ul></div>
+<div class="section">
+<h2><a name="Cluster_Configuration"></a><a name="config" id="config">Cluster Configuration</a></h2>
+<ul>
+
+<li>
+
+<p><b>Nodes and account</b>. Edit the inventory file <tt>conf/inventory</tt> when necessary. You mostly only need to specify the node DNS names (or IPs) for the cluster controller, i.e., the master node, in the <b>[cc]</b> section, and node controllers, i.e., slave nodes, in the <b>[ncs]</b> section. The following example configures a cluster with two slave nodes (172.0.1.11 and 172.0.1.12) and one master node (172.0.1.10).</p>
+
+<div>
+<div>
+<pre class="source"> [cc]
+ 172.0.1.10
+
+ [ncs]
+ 172.0.1.11
+ 172.0.1.12
+</pre></div></div>
+
+<p><b>Configure passwordless ssh from your current client that runs the scripts to all nodes listed in <tt>conf/inventory</tt> as well as <tt>localhost</tt>.</b> If the ssh user account for target machines is different from your current username, please uncomment and edit the following two lines:</p>
+
+<div>
+<div>
+<pre class="source"> ;[all:vars]
+ ;ansible_ssh_user=&lt;fill with your ssh account username&gt;
+</pre></div></div>
+
+<p>If you want to specify advanced Ansible builtin variables, please refer to the <a class="externalLink" href="http://docs.ansible.com/ansible/intro_inventory.html">Ansible documentation</a>.</p>
+</li>
+<li>
+
+<p><b>Remote working directories</b>. Edit <tt>conf/instance_settings.yml</tt> to change the remote binary directory (the variable &#x201c;binarydir&#x201d;) when necessary. By default, the binary directory will be under the home directory (as the value of Ansible builtin variable ansible_env.HOME) of the ssh user account on each node.</p>
+</li>
+</ul></div>
+<div class="section">
+<h2><a name="Cluster_Lifecycle_Management"></a><a name="lifecycle" id="lifecycle">Cluster Lifecycle Management</a></h2>
+<ul>
+
+<li>
+
+<p>Deploy the binary to all nodes:</p>
+
+<div>
+<div>
+<pre class="source"> $ bin/deploy.sh
+</pre></div></div>
+</li>
+<li>
+
+<p>Every time before starting the AsterixDB cluster, you can edit the instance configuration file <tt>conf/instance/cc.conf</tt>, except that IP addresses/DNS names are generated and cannot be changed. All available parameters and their usage can be found <a href="ncservice.html#Parameters">here</a>.</p>
+</li>
+<li>
+
+<p>Launch your AsterixDB cluster:</p>
+
+<div>
+<div>
+<pre class="source"> $ bin/start.sh
+</pre></div></div>
+
+<p>Now you can use the multi-node AsterixDB cluster by opening the master node listed in <tt>conf/inventory</tt> at port <tt>19001</tt> (which can be customized in <tt>conf/instance/cc.conf</tt>) in your browser.</p>
+</li>
+<li>
+
+<p>If you want to stop the the multi-node AsterixDB cluster, run the following script:</p>
+
+<div>
+<div>
+<pre class="source"> $ bin/stop.sh
+</pre></div></div>
+</li>
+<li>
+
+<p>If you want to remove the binary on all nodes, run the following script:</p>
+
+<div>
+<div>
+<pre class="source"> $ bin/erase.sh
+</pre></div></div>
+</li>
+</ul></div>
+        </div>
+      </div>
+    </div>
+    <hr/>
+    <footer>
+      <div class="container-fluid">
+        <div class="row-fluid">
+<div class="row-fluid">Apache AsterixDB, AsterixDB, Apache, the Apache
+        feather logo, and the Apache AsterixDB project logo are either
+        registered trademarks or trademarks of The Apache Software
+        Foundation in the United States and other countries.
+        All other marks mentioned may be trademarks or registered
+        trademarks of their respective owners.
+      </div>
+        </div>
+      </div>
+    </footer>
+  </body>
+</html>

http://git-wip-us.apache.org/repos/asf/asterixdb-site/blob/b2f1d3e6/docs/0.9.4/api.html
----------------------------------------------------------------------
diff --git a/docs/0.9.4/api.html b/docs/0.9.4/api.html
index f1d98be..2de7ec2 100644
--- a/docs/0.9.4/api.html
+++ b/docs/0.9.4/api.html
@@ -1,13 +1,13 @@
 <!DOCTYPE html>
 <!--
- | Generated by Apache Maven Doxia Site Renderer 1.8.1 from src/site/markdown/api.md at 2018-10-02
+ | Generated by Apache Maven Doxia Site Renderer 1.8.1 from src/site/markdown/api.md at 2018-10-11
  | Rendered using Apache Maven Fluido Skin 1.7
 -->
 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <meta name="Date-Revision-yyyymmdd" content="20181002" />
+    <meta name="Date-Revision-yyyymmdd" content="20181011" />
     <meta http-equiv="Content-Language" content="en" />
     <title>AsterixDB &#x2013; HTTP API to AsterixDB</title>
     <link rel="stylesheet" href="./css/apache-maven-fluido-1.7.min.css" />
@@ -26,8 +26,8 @@
 
       <div id="breadcrumbs">
         <ul class="breadcrumb">
-        <li id="publishDate">Last Published: 2018-10-02</li>
-      <li id="projectVersion" class="pull-right">Version: 0.9.4</li>
+        <li id="publishDate">Last Published: 2018-10-11</li>
+      <li id="projectVersion" class="pull-right">Version: 0.9.5-SNAPSHOT</li>
       <li class="pull-right"><a href="index.html" title="Documentation Home">Documentation Home</a></li>
         </ul>
       </div>
@@ -40,26 +40,26 @@
     <li><a href="ansible.html" title="Option 2: using Ansible"><span class="none"></span>Option 2: using Ansible</a></li>
     <li><a href="aws.html" title="Option 3: using Amazon Web Services"><span class="none"></span>Option 3: using Amazon Web Services</a></li>
       <li class="nav-header">AsterixDB Primer</li>
-    <li><a href="sqlpp/primer-sqlpp.html" title="Option 1: using SQL++"><span class="none"></span>Option 1: using SQL++</a></li>
-    <li><a href="aql/primer.html" title="Option 2: using AQL"><span class="none"></span>Option 2: using AQL</a></li>
+    <li><a href="sqlpp/primer-sqlpp.html" title="Using SQL++"><span class="none"></span>Using SQL++</a></li>
       <li class="nav-header">Data Model</li>
     <li><a href="datamodel.html" title="The Asterix Data Model"><span class="none"></span>The Asterix Data Model</a></li>
-      <li class="nav-header">Queries - SQL++</li>
+      <li class="nav-header">Queries</li>
     <li><a href="sqlpp/manual.html" title="The SQL++ Query Language"><span class="none"></span>The SQL++ Query Language</a></li>
     <li><a href="sqlpp/builtins.html" title="Builtin Functions"><span class="none"></span>Builtin Functions</a></li>
-      <li class="nav-header">Queries - AQL</li>
-    <li><a href="aql/manual.html" title="The Asterix Query Language (AQL)"><span class="none"></span>The Asterix Query Language (AQL)</a></li>
-    <li><a href="aql/builtins.html" title="Builtin Functions"><span class="none"></span>Builtin Functions</a></li>
       <li class="nav-header">API/SDK</li>
     <li class="active"><a href="#"><span class="none"></span>HTTP API</a></li>
     <li><a href="csv.html" title="CSV Output"><span class="none"></span>CSV Output</a></li>
       <li class="nav-header">Advanced Features</li>
-    <li><a href="aql/fulltext.html" title="Support of Full-text Queries"><span class="none"></span>Support of Full-text Queries</a></li>
     <li><a href="aql/externaldata.html" title="Accessing External Data"><span class="none"></span>Accessing External Data</a></li>
-    <li><a href="feeds/tutorial.html" title="Support for Data Ingestion"><span class="none"></span>Support for Data Ingestion</a></li>
+    <li><a href="feeds.html" title="Data Ingestion with Feeds"><span class="none"></span>Data Ingestion with Feeds</a></li>
     <li><a href="udf.html" title="User Defined Functions"><span class="none"></span>User Defined Functions</a></li>
-    <li><a href="aql/filters.html" title="Filter-Based LSM Index Acceleration"><span class="none"></span>Filter-Based LSM Index Acceleration</a></li>
-    <li><a href="aql/similarity.html" title="Support of Similarity Queries"><span class="none"></span>Support of Similarity Queries</a></li>
+    <li><a href="sqlpp/filters.html" title="Filter-Based LSM Index Acceleration"><span class="none"></span>Filter-Based LSM Index Acceleration</a></li>
+    <li><a href="sqlpp/fulltext.html" title="Support of Full-text Queries"><span class="none"></span>Support of Full-text Queries</a></li>
+    <li><a href="sqlpp/similarity.html" title="Support of Similarity Queries"><span class="none"></span>Support of Similarity Queries</a></li>
+      <li class="nav-header">Deprecated</li>
+    <li><a href="aql/primer.html" title="AsterixDB Primer: Using AQL"><span class="none"></span>AsterixDB Primer: Using AQL</a></li>
+    <li><a href="aql/manual.html" title="Queries: The Asterix Query Language (AQL)"><span class="none"></span>Queries: The Asterix Query Language (AQL)</a></li>
+    <li><a href="aql/builtins.html" title="Queries: Builtin Functions (AQL)"><span class="none"></span>Queries: Builtin Functions (AQL)</a></li>
 </ul>
           <hr />
           <div id="poweredBy">