You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by el...@apache.org on 2016/03/07 19:27:58 UTC

[20/59] [partial] calcite git commit: [CALCITE-1078] Detach avatica from the core calcite Maven project

http://git-wip-us.apache.org/repos/asf/calcite/blob/5cee486f/avatica/site/_docs/reference.md
----------------------------------------------------------------------
diff --git a/avatica/site/_docs/reference.md b/avatica/site/_docs/reference.md
new file mode 100644
index 0000000..7bc9bc3
--- /dev/null
+++ b/avatica/site/_docs/reference.md
@@ -0,0 +1,1248 @@
+---
+layout: docs
+title: SQL language
+permalink: /docs/reference.html
+---
+<!--
+{% comment %}
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to you under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+{% endcomment %}
+-->
+
+The page describes the SQL dialect recognized by Calcite's default SQL parser.
+
+## Grammar
+
+SQL grammar in [BNF](http://en.wikipedia.org/wiki/Backus%E2%80%93Naur_Form)-like
+form.
+
+{% highlight sql %}
+statement:
+      setStatement
+  |   resetStatement
+  |   explain
+  |   insert
+  |   update
+  |   merge
+  |   delete
+  |   query
+
+setStatement:
+      [ ALTER ( SYSTEM | SESSION ) ] SET identifier '=' expression
+
+resetStatement:
+      [ ALTER ( SYSTEM | SESSION ) ] RESET identifier
+  |   [ ALTER ( SYSTEM | SESSION ) ] RESET ALL
+
+explain:
+      EXPLAIN PLAN
+      [ WITH TYPE | WITH IMPLEMENTATION | WITHOUT IMPLEMENTATION ]
+      [ EXCLUDING ATTRIBUTES | INCLUDING [ ALL ] ATTRIBUTES ]
+      FOR ( insert | update | merge | delete | query )
+
+insert:
+      ( INSERT | UPSERT ) INTO tablePrimary
+      [ '(' column [, column ]* ')' ]
+      query
+
+update:
+      UPDATE tablePrimary
+      SET assign [, assign ]*
+      [ WHERE booleanExpression ]
+
+assign:
+      identifier '=' expression
+
+merge:
+      MERGE INTO tablePrimary [ [ AS ] alias ]
+      USING tablePrimary
+      ON booleanExpression
+      [ WHEN MATCHED THEN UPDATE SET assign [, assign ]* ]
+      [ WHEN NOT MATCHED THEN INSERT VALUES '(' value [ , value ]* ')' ]
+
+delete:
+      DELETE FROM tablePrimary [ [ AS ] alias ]
+      [ WHERE booleanExpression ]
+
+query:
+      [ WITH withItem [ , withItem ]* query ]
+  |   {
+          select
+      |   query UNION [ ALL ] query
+      |   query EXCEPT query
+      |   query INTERSECT query
+      }
+      [ ORDER BY orderItem [, orderItem ]* ]
+      [ LIMIT { count | ALL } ]
+      [ OFFSET start { ROW | ROWS } ]
+      [ FETCH { FIRST | NEXT } [ count ] { ROW | ROWS } ]
+
+withItem:
+      name
+      [ '(' column [, column ]* ')' ]
+      AS '(' query ')'
+
+orderItem:
+      expression [ ASC | DESC ] [ NULLS FIRST | NULLS LAST ]
+
+select:
+      SELECT [ STREAM ] [ ALL | DISTINCT ]
+          { * | projectItem [, projectItem ]* }
+      FROM tableExpression
+      [ WHERE booleanExpression ]
+      [ GROUP BY { groupItem [, groupItem ]* } ]
+      [ HAVING booleanExpression ]
+      [ WINDOW windowName AS windowSpec [, windowName AS windowSpec ]* ]
+
+projectItem:
+      expression [ [ AS ] columnAlias ]
+  |   tableAlias . *
+
+tableExpression:
+      tableReference [, tableReference ]*
+  |   tableExpression [ NATURAL ] [ LEFT | RIGHT | FULL ] JOIN tableExpression [ joinCondition ]
+
+joinCondition:
+      ON booleanExpression
+  |   USING '(' column [, column ]* ')'
+
+tableReference:
+      [ LATERAL ]
+      tablePrimary
+      [ [ AS ] alias [ '(' columnAlias [, columnAlias ]* ')' ] ]
+
+tablePrimary:
+      [ TABLE ] [ [ catalogName . ] schemaName . ] tableName
+  |   '(' query ')'
+  |   values
+  |   UNNEST '(' expression ')' [ WITH ORDINALITY ]
+  |   TABLE '(' [ SPECIFIC ] functionName '(' expression [, expression ]* ')' ')'
+
+values:
+      VALUES expression [, expression ]*
+
+groupItem:
+      expression
+  |   '(' ')'
+  |   '(' expression [, expression ]* ')'
+  |   CUBE '(' expression [, expression ]* ')'
+  |   ROLLUP '(' expression [, expression ]* ')'
+  |   GROUPING SETS '(' groupItem [, groupItem ]* ')'
+
+windowRef:
+      windowName
+  |   windowSpec
+
+windowSpec:
+      [ windowName ]
+      '('
+      [ ORDER BY orderItem [, orderItem ]* ]
+      [ PARTITION BY expression [, expression ]* ]
+      [
+          RANGE numericOrIntervalExpression { PRECEDING | FOLLOWING }
+      |   ROWS numericExpression { PRECEDING | FOLLOWING }
+      ]
+      ')'
+{% endhighlight %}
+
+In *merge*, at least one of the WHEN MATCHED and WHEN NOT MATCHED clauses must
+be present.
+
+In *orderItem*, if *expression* is a positive integer *n*, it denotes
+the <em>n</em>th item in the SELECT clause.
+
+An aggregate query is a query that contains a GROUP BY or a HAVING
+clause, or aggregate functions in the SELECT clause. In the SELECT,
+HAVING and ORDER BY clauses of an aggregate query, all expressions
+must be constant within the current group (that is, grouping constants
+as defined by the GROUP BY clause, or constants), or aggregate
+functions, or a combination of constants and aggregate
+functions. Aggregate and grouping functions may only appear in an
+aggregate query, and only in a SELECT, HAVING or ORDER BY clause.
+
+A scalar sub-query is a sub-query used as an expression.
+If the sub-query returns no rows, the value is NULL; if it
+returns more than one row, it is an error.
+
+IN, EXISTS and scalar sub-queries can occur
+in any place where an expression can occur (such as the SELECT clause,
+WHERE clause, ON clause of a JOIN, or as an argument to an aggregate
+function).
+
+An IN, EXISTS or scalar sub-query may be correlated; that is, it
+may refer to tables in the FROM clause of an enclosing query.
+
+## Keywords
+
+The following is a list of SQL keywords.
+Reserved keywords are **bold**.
+
+{% comment %} start {% endcomment %}
+A,
+**ABS**,
+ABSOLUTE,
+ACTION,
+ADA,
+ADD,
+ADMIN,
+AFTER,
+**ALL**,
+**ALLOCATE**,
+**ALLOW**,
+**ALTER**,
+ALWAYS,
+**AND**,
+**ANY**,
+**ARE**,
+**ARRAY**,
+**AS**,
+ASC,
+**ASENSITIVE**,
+ASSERTION,
+ASSIGNMENT,
+**ASYMMETRIC**,
+**AT**,
+**ATOMIC**,
+ATTRIBUTE,
+ATTRIBUTES,
+**AUTHORIZATION**,
+**AVG**,
+BEFORE,
+**BEGIN**,
+BERNOULLI,
+**BETWEEN**,
+**BIGINT**,
+**BINARY**,
+**BIT**,
+**BLOB**,
+**BOOLEAN**,
+**BOTH**,
+BREADTH,
+**BY**,
+C,
+**CALL**,
+**CALLED**,
+**CARDINALITY**,
+CASCADE,
+**CASCADED**,
+**CASE**,
+**CAST**,
+CATALOG,
+CATALOG_NAME,
+**CEIL**,
+**CEILING**,
+CHAIN,
+**CHAR**,
+**CHARACTER**,
+CHARACTERISTICTS,
+CHARACTERS,
+**CHARACTER_LENGTH**,
+CHARACTER_SET_CATALOG,
+CHARACTER_SET_NAME,
+CHARACTER_SET_SCHEMA,
+**CHAR_LENGTH**,
+**CHECK**,
+CLASS_ORIGIN,
+**CLOB**,
+**CLOSE**,
+**COALESCE**,
+COBOL,
+**COLLATE**,
+COLLATION,
+COLLATION_CATALOG,
+COLLATION_NAME,
+COLLATION_SCHEMA,
+**COLLECT**,
+**COLUMN**,
+COLUMN_NAME,
+COMMAND_FUNCTION,
+COMMAND_FUNCTION_CODE,
+**COMMIT**,
+COMMITTED,
+**CONDITION**,
+CONDITION_NUMBER,
+**CONNECT**,
+CONNECTION,
+CONNECTION_NAME,
+**CONSTRAINT**,
+CONSTRAINTS,
+CONSTRAINT_CATALOG,
+CONSTRAINT_NAME,
+CONSTRAINT_SCHEMA,
+CONSTRUCTOR,
+CONTAINS,
+CONTINUE,
+**CONVERT**,
+**CORR**,
+**CORRESPONDING**,
+**COUNT**,
+**COVAR_POP**,
+**COVAR_SAMP**,
+**CREATE**,
+**CROSS**,
+**CUBE**,
+**CUME_DIST**,
+**CURRENT**,
+**CURRENT_CATALOG**,
+**CURRENT_DATE**,
+**CURRENT_DEFAULT_TRANSFORM_GROUP**,
+**CURRENT_PATH**,
+**CURRENT_ROLE**,
+**CURRENT_SCHEMA**,
+**CURRENT_TIME**,
+**CURRENT_TIMESTAMP**,
+**CURRENT_TRANSFORM_GROUP_FOR_TYPE**,
+**CURRENT_USER**,
+**CURSOR**,
+CURSOR_NAME,
+**CYCLE**,
+DATA,
+**DATE**,
+DATETIME_INTERVAL_CODE,
+DATETIME_INTERVAL_PRECISION,
+**DAY**,
+**DEALLOCATE**,
+**DEC**,
+**DECIMAL**,
+**DECLARE**,
+**DEFAULT**,
+DEFAULTS,
+DEFERRABLE,
+DEFERRED,
+DEFINED,
+DEFINER,
+DEGREE,
+**DELETE**,
+**DENSE_RANK**,
+DEPTH,
+**DEREF**,
+DERIVED,
+DESC,
+**DESCRIBE**,
+DESCRIPTION,
+DESCRIPTOR,
+**DETERMINISTIC**,
+DIAGNOSTICS,
+**DISALLOW**,
+**DISCONNECT**,
+DISPATCH,
+**DISTINCT**,
+DOMAIN,
+**DOUBLE**,
+**DROP**,
+**DYNAMIC**,
+DYNAMIC_FUNCTION,
+DYNAMIC_FUNCTION_CODE,
+**EACH**,
+**ELEMENT**,
+**ELSE**,
+**END**,
+**END-EXEC**,
+EQUALS,
+**ESCAPE**,
+**EVERY**,
+**EXCEPT**,
+EXCEPTION,
+EXCLUDE,
+EXCLUDING,
+**EXEC**,
+**EXECUTE**,
+**EXISTS**,
+**EXP**,
+**EXPLAIN**,
+**EXTEND**,
+**EXTERNAL**,
+**EXTRACT**,
+**FALSE**,
+**FETCH**,
+**FILTER**,
+FINAL,
+FIRST,
+**FIRST_VALUE**,
+**FLOAT**,
+**FLOOR**,
+FOLLOWING,
+**FOR**,
+**FOREIGN**,
+FORTRAN,
+FOUND,
+**FREE**,
+**FROM**,
+**FULL**,
+**FUNCTION**,
+**FUSION**,
+G,
+GENERAL,
+GENERATED,
+**GET**,
+**GLOBAL**,
+GO,
+GOTO,
+**GRANT**,
+GRANTED,
+**GROUP**,
+**GROUPING**,
+**HAVING**,
+HIERARCHY,
+**HOLD**,
+**HOUR**,
+**IDENTITY**,
+IMMEDIATE,
+IMPLEMENTATION,
+**IMPORT**,
+**IN**,
+INCLUDING,
+INCREMENT,
+**INDICATOR**,
+INITIALLY,
+**INNER**,
+**INOUT**,
+INPUT,
+**INSENSITIVE**,
+**INSERT**,
+INSTANCE,
+INSTANTIABLE,
+**INT**,
+**INTEGER**,
+**INTERSECT**,
+**INTERSECTION**,
+**INTERVAL**,
+**INTO**,
+INVOKER,
+**IS**,
+ISOLATION,
+JAVA,
+**JOIN**,
+K,
+KEY,
+KEY_MEMBER,
+KEY_TYPE,
+LABEL,
+**LANGUAGE**,
+**LARGE**,
+LAST,
+**LAST_VALUE**,
+**LATERAL**,
+**LEADING**,
+**LEFT**,
+LENGTH,
+LEVEL,
+LIBRARY,
+**LIKE**,
+**LIMIT**,
+**LN**,
+**LOCAL**,
+**LOCALTIME**,
+**LOCALTIMESTAMP**,
+LOCATOR,
+**LOWER**,
+M,
+MAP,
+**MATCH**,
+MATCHED,
+**MAX**,
+MAXVALUE,
+**MEMBER**,
+**MERGE**,
+MESSAGE_LENGTH,
+MESSAGE_OCTET_LENGTH,
+MESSAGE_TEXT,
+**METHOD**,
+**MIN**,
+**MINUTE**,
+MINVALUE,
+**MOD**,
+**MODIFIES**,
+**MODULE**,
+**MONTH**,
+MORE,
+**MULTISET**,
+MUMPS,
+NAME,
+NAMES,
+**NATIONAL**,
+**NATURAL**,
+**NCHAR**,
+**NCLOB**,
+NESTING,
+**NEW**,
+**NEXT**,
+**NO**,
+**NONE**,
+**NORMALIZE**,
+NORMALIZED,
+**NOT**,
+**NULL**,
+NULLABLE,
+**NULLIF**,
+NULLS,
+NUMBER,
+**NUMERIC**,
+OBJECT,
+OCTETS,
+**OCTET_LENGTH**,
+**OF**,
+**OFFSET**,
+**OLD**,
+**ON**,
+**ONLY**,
+**OPEN**,
+OPTION,
+OPTIONS,
+**OR**,
+**ORDER**,
+ORDERING,
+ORDINALITY,
+OTHERS,
+**OUT**,
+**OUTER**,
+OUTPUT,
+**OVER**,
+**OVERLAPS**,
+**OVERLAY**,
+OVERRIDING,
+PAD,
+**PARAMETER**,
+PARAMETER_MODE,
+PARAMETER_NAME,
+PARAMETER_ORDINAL_POSITION,
+PARAMETER_SPECIFIC_CATALOG,
+PARAMETER_SPECIFIC_NAME,
+PARAMETER_SPECIFIC_SCHEMA,
+PARTIAL,
+**PARTITION**,
+PASCAL,
+PASSTHROUGH,
+PATH,
+**PERCENTILE_CONT**,
+**PERCENTILE_DISC**,
+**PERCENT_RANK**,
+PLACING,
+PLAN,
+PLI,
+**POSITION**,
+**POWER**,
+PRECEDING,
+**PRECISION**,
+**PREPARE**,
+PRESERVE,
+**PRIMARY**,
+PRIOR,
+PRIVILEGES,
+**PROCEDURE**,
+PUBLIC,
+**RANGE**,
+**RANK**,
+READ,
+**READS**,
+**REAL**,
+**RECURSIVE**,
+**REF**,
+**REFERENCES**,
+**REFERENCING**,
+**REGR_AVGX**,
+**REGR_AVGY**,
+**REGR_COUNT**,
+**REGR_INTERCEPT**,
+**REGR_R2**,
+**REGR_SLOPE**,
+**REGR_SXX**,
+**REGR_SXY**,
+**REGR_SYY**,
+RELATIVE,
+**RELEASE**,
+REPEATABLE,
+**RESET**,
+RESTART,
+RESTRICT,
+**RESULT**,
+**RETURN**,
+RETURNED_CARDINALITY,
+RETURNED_LENGTH,
+RETURNED_OCTET_LENGTH,
+RETURNED_SQLSTATE,
+**RETURNS**,
+**REVOKE**,
+**RIGHT**,
+ROLE,
+**ROLLBACK**,
+**ROLLUP**,
+ROUTINE,
+ROUTINE_CATALOG,
+ROUTINE_NAME,
+ROUTINE_SCHEMA,
+**ROW**,
+**ROWS**,
+ROW_COUNT,
+**ROW_NUMBER**,
+**SAVEPOINT**,
+SCALE,
+SCHEMA,
+SCHEMA_NAME,
+**SCOPE**,
+SCOPE_CATALOGS,
+SCOPE_NAME,
+SCOPE_SCHEMA,
+**SCROLL**,
+**SEARCH**,
+**SECOND**,
+SECTION,
+SECURITY,
+**SELECT**,
+SELF,
+**SENSITIVE**,
+SEQUENCE,
+SERIALIZABLE,
+SERVER,
+SERVER_NAME,
+SESSION,
+**SESSION_USER**,
+**SET**,
+SETS,
+**SIMILAR**,
+SIMPLE,
+SIZE,
+**SMALLINT**,
+**SOME**,
+SOURCE,
+SPACE,
+**SPECIFIC**,
+**SPECIFICTYPE**,
+SPECIFIC_NAME,
+**SQL**,
+**SQLEXCEPTION**,
+**SQLSTATE**,
+**SQLWARNING**,
+**SQRT**,
+**START**,
+STATE,
+STATEMENT,
+**STATIC**,
+**STDDEV_POP**,
+**STDDEV_SAMP**,
+**STREAM**,
+STRUCTURE,
+STYLE,
+SUBCLASS_ORIGIN,
+**SUBMULTISET**,
+SUBSTITUTE,
+**SUBSTRING**,
+**SUM**,
+**SYMMETRIC**,
+**SYSTEM**,
+**SYSTEM_USER**,
+**TABLE**,
+**TABLESAMPLE**,
+TABLE_NAME,
+TEMPORARY,
+**THEN**,
+TIES,
+**TIME**,
+**TIMESTAMP**,
+**TIMEZONE_HOUR**,
+**TIMEZONE_MINUTE**,
+**TINYINT**,
+**TO**,
+TOP_LEVEL_COUNT,
+**TRAILING**,
+TRANSACTION,
+TRANSACTIONS_ACTIVE,
+TRANSACTIONS_COMMITTED,
+TRANSACTIONS_ROLLED_BACK,
+TRANSFORM,
+TRANSFORMS,
+**TRANSLATE**,
+**TRANSLATION**,
+**TREAT**,
+**TRIGGER**,
+TRIGGER_CATALOG,
+TRIGGER_NAME,
+TRIGGER_SCHEMA,
+**TRIM**,
+**TRUE**,
+TYPE,
+**UESCAPE**,
+UNBOUNDED,
+UNCOMMITTED,
+UNDER,
+**UNION**,
+**UNIQUE**,
+**UNKNOWN**,
+UNNAMED,
+**UNNEST**,
+**UPDATE**,
+**UPPER**,
+**UPSERT**,
+USAGE,
+**USER**,
+USER_DEFINED_TYPE_CATALOG,
+USER_DEFINED_TYPE_CODE,
+USER_DEFINED_TYPE_NAME,
+USER_DEFINED_TYPE_SCHEMA,
+**USING**,
+**VALUE**,
+**VALUES**,
+**VARBINARY**,
+**VARCHAR**,
+**VARYING**,
+**VAR_POP**,
+**VAR_SAMP**,
+VERSION,
+VIEW,
+**WHEN**,
+**WHENEVER**,
+**WHERE**,
+**WIDTH_BUCKET**,
+**WINDOW**,
+**WITH**,
+**WITHIN**,
+**WITHOUT**,
+WORK,
+WRAPPER,
+WRITE,
+XML,
+**YEAR**,
+ZONE.
+{% comment %} end {% endcomment %}
+
+## Identifiers
+
+Identifiers are the names of tables, columns and other metadata
+elements used in a SQL query.
+
+Unquoted identifiers, such as emp, must start with a letter and can
+only contain letters, digits, and underscores. They are implicitly
+converted to upper case.
+
+Quoted identifiers, such as `"Employee Name"`, start and end with
+double quotes.  They may contain virtually any character, including
+spaces and other punctuation.  If you wish to include a double quote
+in an identifier, use another double quote to escape it, like this:
+`"An employee called ""Fred""."`.
+
+In Calcite, matching identifiers to the name of the referenced object is
+case-sensitive.  But remember that unquoted identifiers are implicitly
+converted to upper case before matching, and if the object it refers
+to was created using an unquoted identifier for its name, then its
+name will have been converted to upper case also.
+
+## Data types
+
+### Scalar types
+
+| Data type   | Description               | Range and examples   |
+|:----------- |:------------------------- |:---------------------|
+| BOOLEAN     | Logical values            | Values: TRUE, FALSE, UNKNOWN
+| TINYINT     | 1 byte signed integer     | Range is -255 to 256
+| SMALLINT    | 2 byte signed integer     | Range is -32768 to 32767
+| INTEGER, INT | 4 byte signed integer    | Range is -2147483648 to 2147483647
+| BIGINT      | 8 byte signed integer     | Range is -9223372036854775808 to 9223372036854775807
+| DECIMAL(p, s) | Fixed point             | Example: 123.45 is a DECIMAL(5, 2) value.
+| NUMERIC     | Fixed point               |
+| REAL, FLOAT | 4 byte floating point     | 6 decimal digits precision
+| DOUBLE      | 8 byte floating point     | 15 decimal digits precision
+| CHAR(n), CHARACTER(n) | Fixed-width character string | 'Hello', '' (empty string), _latin1'Hello', n'Hello', _UTF16'Hello', 'Hello' 'there' (literal split into multiple parts)
+| VARCHAR(n), CHARACTER VARYING(n) | Variable-length character string | As CHAR(n)
+| BINARY(n)   | Fixed-width binary string | x'45F0AB', x'' (empty binary string), x'AB' 'CD' (multi-part binary string literal)
+| VARBINARY(n), BINARY VARYING(n) | Variable-length binary string | As BINARY(n)
+| DATE        | Date                      | Example: DATE '1969-07-20'
+| TIME        | Time of day               | Example: TIME '20:17:40'
+| TIMESTAMP [ WITHOUT TIME ZONE ] | Date and time | Example: TIMESTAMP '1969-07-20 20:17:40'
+| TIMESTAMP WITH TIME ZONE | Date and time with time zone | Example: TIMESTAMP '1969-07-20 20:17:40 America/Los Angeles'
+| INTERVAL timeUnit [ TO timeUnit ] | Date time interval | Examples: INTERVAL '1:5' YEAR TO MONTH, INTERVAL '45' DAY
+| Anchored interval | Date time interval  | Example: (DATE '1969-07-20', DATE '1972-08-29')
+
+Where:
+
+{% highlight sql %}
+timeUnit:
+  YEAR | MONTH | DAY | HOUR | MINUTE | SECOND
+{% endhighlight %}
+
+Note:
+
+* DATE, TIME and TIMESTAMP have no time zone. There is not even an implicit
+  time zone, such as UTC (as in Java) or the local time zone. It is left to
+  the user or application to supply a time zone.
+
+### Non-scalar types
+
+| Type     | Description
+|:-------- |:-----------------------------------------------------------
+| ANY      | A value of an unknown type
+| ROW      | Row with 1 or more columns
+| MAP      | Collection of keys mapped to values
+| MULTISET | Unordered collection that may contain duplicates
+| ARRAY    | Ordered, contiguous collection that may contain duplicates
+| CURSOR   | Cursor over the result of executing a query
+
+## Operators and functions
+
+### Comparison operators
+
+| Operator syntax                                   | Description
+|:------------------------------------------------- |:-----------
+| value1 = value2                                   | Equals
+| value1 <> value2                                  | Not equal
+| value1 > value2                                   | Greater than
+| value1 >= value2                                  | Greater than or equal
+| value1 < value2                                   | Less than
+| value1 <= value2                                  | Less than or equal
+| value IS NULL                                     | Whether *value* is null
+| value IS NOT NULL                                 | Whether *value* is not null
+| value1 IS DISTINCT FROM value2                    | Whether two values are not equal, treating null values as the same
+| value1 IS NOT DISTINCT FROM value2                | Whether two values are equal, treating null values as the same
+| value1 BETWEEN value2 AND value3                  | Whether *value1* is greater than or equal to *value2* and less than or equal to *value3*
+| value1 NOT BETWEEN value2 AND value3              | Whether *value1* is less than *value2* or greater than *value3*
+| string1 LIKE string2 [ ESCAPE string3 ]           | Whether *string1* matches pattern *string2*
+| string1 NOT LIKE string2 [ ESCAPE string3 ]       | Whether *string1* does not match pattern *string2*
+| string1 SIMILAR TO string2 [ ESCAPE string3 ]     | Whether *string1* matches regular expression *string2*
+| string1 NOT SIMILAR TO string2 [ ESCAPE string3 ] | Whether *string1* does not match regular expression *string2*
+| value IN (value [, value]* )                      | Whether *value* is equal to a value in a list
+| value NOT IN (value [, value]* )                  | Whether *value* is not equal to every value in a list
+| value IN (sub-query)                              | Whether *value* is equal to a row returned by *sub-query*
+| value NOT IN (sub-query)                          | Whether *value* is not equal to every row returned by *sub-query*
+| EXISTS (sub-query)                                | Whether *sub-query* returns at least one row
+
+### Logical operators
+
+| Operator syntax        | Description
+|:---------------------- |:-----------
+| boolean1 OR boolean2   | Whether *boolean1* is TRUE or *boolean2* is TRUE
+| boolean1 AND boolean2  | Whether *boolean1* and *boolean2* are both TRUE
+| NOT boolean            | Whether *boolean* is not TRUE; returns UNKNOWN if *boolean* is UNKNOWN
+| boolean IS FALSE       | Whether *boolean* is FALSE; returns FALSE if *boolean* is UNKNOWN
+| boolean IS NOT FALSE   | Whether *boolean* is not FALSE; returns TRUE if *boolean* is UNKNOWN
+| boolean IS TRUE        | Whether *boolean* is TRUE; returns FALSE if *boolean* is UNKNOWN
+| boolean IS NOT TRUE    | Whether *boolean* is not TRUE; returns TRUE if *boolean* is UNKNOWN
+| boolean IS UNKNOWN     | Whether *boolean* is UNKNOWN
+| boolean IS NOT UNKNOWN | Whether *boolean* is not UNKNOWN
+
+### Arithmetic operators and functions
+
+| Operator syntax           | Description
+|:------------------------- |:-----------
+| + numeric                 | Returns *numeric*
+|:- numeric                 | Returns negative *numeric*
+| numeric1 + numeric2       | Returns *numeric1* plus *numeric2*
+| numeric1 - numeric2       | Returns *numeric1* minus *numeric2*
+| numeric1 * numeric2       | Returns *numeric1* multiplied by *numeric2*
+| numeric1 / numeric2       | Returns *numeric1* divided by *numeric2*
+| POWER(numeric1, numeric2) | Returns *numeric1* raised to the power of *numeric2*
+| ABS(numeric)              | Returns the absolute value of *numeric*
+| MOD(numeric, numeric)     | Returns the remainder (modulus) of *numeric1* divided by *numeric2*. The result is negative only if *numeric1* is negative
+| SQRT(numeric)             | Returns the square root of *numeric*
+| LN(numeric)               | Returns the natural logarithm (base *e*) of *numeric*
+| LOG10(numeric)            | Returns the base 10 logarithm of *numeric*
+| EXP(numeric)              | Returns *e* raised to the power of *numeric*
+| CEIL(numeric)             | Rounds *numeric* up, and returns the smallest number that is greater than or equal to *numeric*
+| FLOOR(numeric)            | Rounds *numeric* down, and returns the largest number that is less than or equal to *numeric*
+
+### Character string operators and functions
+
+| Operator syntax            | Description
+|:-------------------------- |:-----------
+| string &#124;&#124; string | Concatenates two character strings.
+| CHAR_LENGTH(string)        | Returns the number of characters in a character string
+| CHARACTER_LENGTH(string)   | As CHAR_LENGTH(*string*)
+| UPPER(string)              | Returns a character string converted to upper case
+| LOWER(string)              | Returns a character string converted to lower case
+| POSITION(string1 IN string2) | Returns the position of the first occurrence of *string1* in *string2*
+| TRIM( { BOTH &#124; LEADING &#124; TRAILING } string1 FROM string2) | Removes the longest string containing only the characters in *string1* from the start/end/both ends of *string1*
+| OVERLAY(string1 PLACING string2 FROM integer [ FOR integer2 ]) | Replaces a substring of *string1* with *string2*
+| SUBSTRING(string FROM integer)  | Returns a substring of a character string starting at a given point.
+| SUBSTRING(string FROM integer FOR integer) | Returns a substring of a character string starting at a given point with a given length.
+| INITCAP(string)            | Returns *string* with the first letter of each word converter to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters.
+
+Not implemented:
+
+* SUBSTRING(string FROM regexp FOR regexp)
+
+### Binary string operators and functions
+
+| Operator syntax | Description
+|:--------------- |:-----------
+| binary &#124;&#124; binary | Concatenates two binary strings.
+| POSITION(binary1 IN binary2) | Returns the position of the first occurrence of *binary1* in *binary2*
+| OVERLAY(binary1 PLACING binary2 FROM integer [ FOR integer2 ]) | Replaces a substring of *binary1* with *binary2*
+| SUBSTRING(binary FROM integer) | Returns a substring of *binary* starting at a given point
+| SUBSTRING(binary FROM integer FOR integer) | Returns a substring of *binary* starting at a given point with a given length
+
+### Date/time functions
+
+| Operator syntax           | Description
+|:------------------------- |:-----------
+| LOCALTIME                 | Returns the current date and time in the session time zone in a value of datatype TIME
+| LOCALTIME(precision)      | Returns the current date and time in the session time zone in a value of datatype TIME, with *precision* digits of precision
+| LOCALTIMESTAMP            | Returns the current date and time in the session time zone in a value of datatype TIMESTAMP
+| LOCALTIMESTAMP(precision) | Returns the current date and time in the session time zone in a value of datatype TIMESTAMP, with *precision* digits of precision
+| CURRENT_TIME              | Returns the current time in the session time zone, in a value of datatype TIMESTAMP WITH TIME ZONE
+| CURRENT_DATE              | Returns the current date in the session time zone, in a value of datatype DATE
+| CURRENT_TIMESTAMP         | Returns the current date and time in the session time zone, in a value of datatype TIMESTAMP WITH TIME ZONE
+| EXTRACT(timeUnit FROM datetime) | Extracts and returns the value of a specified datetime field from a datetime value expression
+| FLOOR(datetime TO timeUnit) | Rounds *datetime* down to *timeUnit*
+| CEIL(datetime TO timeUnit) | Rounds *datetime* up to *timeUnit*
+
+Not implemented:
+
+* EXTRACT(timeUnit FROM interval)
+* CEIL(interval)
+* FLOOR(interval)
+* datetime - datetime timeUnit [ TO timeUnit ]
+* interval OVERLAPS interval
+* \+ interval
+* \- interval
+* interval + interval
+* interval - interval
+* interval / interval
+* datetime + interval
+* datetime - interval
+
+### System functions
+
+| Operator syntax | Description
+|:--------------- |:-----------
+| USER            | Equivalent to CURRENT_USER
+| CURRENT_USER    | User name of current execution context
+| SESSION_USER    | Session user name
+| SYSTEM_USER     | Returns the name of the current data store user as identified by the operating system
+| CURRENT_PATH    | Returns a character string representing the current lookup scope for references to user-defined routines and types
+| CURRENT_ROLE    | Returns the current active role
+
+### Conditional functions and operators
+
+| Operator syntax | Description
+|:--------------- |:-----------
+| CASE value<br/>WHEN value1 [, value11 ]* THEN result1<br/>[ WHEN valueN [, valueN1 ]* THEN resultN ]*<br/>[ ELSE resultZ ]<br/> END | Simple case
+| CASE<br/>WHEN condition1 THEN result1<br/>[ WHEN conditionN THEN resultN ]*<br/>[ ELSE resultZ ]<br/>END | Searched case
+| NULLIF(value, value) | Returns NULL if the values are the same.<br/><br/>For example, <code>NULLIF(5, 5)</code> returns NULL; <code>NULLIF(5, 0)</code> returns 5.
+| COALESCE(value, value [, value ]* ) | Provides a value if the first value is null.<br/><br/>For example, <code>COALESCE(NULL, 5)</code> returns 5.
+
+### Type conversion
+
+| Operator syntax | Description
+|:--------------- | :----------
+| CAST(value AS type) | Converts a value to a given type.
+
+### Value constructors
+
+| Operator syntax | Description
+|:--------------- |:-----------
+| ROW (value [, value]* ) | Creates a row from a list of values.
+| (value [, value]* )     | Creates a row from a list of values.
+| map '[' key ']'     | Returns the element of a map with a particular key.
+| array '[' index ']' | Returns the element at a particular location in an array.
+| ARRAY '[' value [, value ]* ']' | Creates an array from a list of values.
+| MAP '[' key, value [, key, value ]* ']' | Creates a map from a list of key-value pairs.
+
+### Collection functions
+
+| Operator syntax | Description
+|:--------------- |:-----------
+| ELEMENT(value)  | Returns the sole element of a array or multiset; null if the collection is empty; throws if it has more than one element.
+| CARDINALITY(value) | Returns the number of elements in an array or multiset.
+
+See also: UNNEST relational operator converts a collection to a relation.
+
+### JDBC function escape
+
+#### Numeric
+
+| Operator syntax                | Description
+|:------------------------------ |:-----------
+| {fn LOG10(numeric)}            | Returns the base-10 logarithm of *numeric*
+| {fn POWER(numeric1, numeric2)} | Returns *numeric1* raised to the power of *numeric2*
+
+Not implemented:
+
+* {fn ABS(numeric)} - Returns the absolute value of *numeric*
+* {fn ACOS(numeric)} - Returns the arc cosine of *numeric*
+* {fn ASIN(numeric)} - Returns the arc sine of *numeric*
+* {fn ATAN(numeric)} - Returns the arc tangent of *numeric*
+* {fn ATAN2(numeric, numeric)}
+* {fn CEILING(numeric)} - Rounds *numeric* up, and returns the smallest number that is greater than or equal to *numeric*
+* {fn COS(numeric)} - Returns the cosine of *numeric*
+* {fn COT(numeric)}
+* {fn DEGREES(numeric)} - Converts *numeric* from radians to degrees
+* {fn EXP(numeric)} - Returns *e* raised to the power of *numeric*
+* {fn FLOOR(numeric)} - Rounds *numeric* down, and returns the largest number that is less than or equal to *numeric*
+* {fn LOG(numeric)} - Returns the natural logarithm (base *e*) of *numeric*
+* {fn MOD(numeric1, numeric2)} - Returns the remainder (modulus) of *numeric1* divided by *numeric2*. The result is negative only if *numeric1* is negative
+* {fn PI()} - Returns a value that is closer than any other value to *pi*
+* {fn RADIANS(numeric)} - Converts *numeric* from degrees to radians
+* {fn RAND(numeric)}
+* {fn ROUND(numeric, numeric)}
+* {fn SIGN(numeric)}
+* {fn SIN(numeric)} - Returns the sine of *numeric*
+* {fn SQRT(numeric)} - Returns the square root of *numeric*
+* {fn TAN(numeric)} - Returns the tangent of *numeric*
+* {fn TRUNCATE(numeric, numeric)}
+
+#### String
+
+| Operator syntax | Description
+|:--------------- |:-----------
+| {fn LOCATE(string1, string2)} | Returns the position in *string2* of the first occurrence of *string1*. Searches from the beginning of the second CharacterExpression, unless the startIndex parameter is specified.
+| {fn INSERT(string1, start, length, string2)} | Inserts *string2* into a slot in *string1*
+| {fn LCASE(string)}            | Returns a string in which all alphabetic characters in *string* have been converted to lower case
+
+Not implemented:
+
+* {fn ASCII(string)} - Convert a single-character string to the corresponding ASCII code, an integer between 0 and 255
+* {fn CHAR(string)}
+* {fn CONCAT(character, character)} - Returns the concatenation of character strings
+* {fn DIFFERENCE(string, string)}
+* {fn LEFT(string, integer)}
+* {fn LENGTH(string)}
+* {fn LOCATE(string1, string2 [, integer])} - Returns the position in *string2* of the first occurrence of *string1*. Searches from the beginning of *string2*, unless *integer* is specified.
+* {fn LTRIM(string)}
+* {fn REPEAT(string, integer)}
+* {fn REPLACE(string, string, string)}
+* {fn RIGHT(string, integer)}
+* {fn RTRIM(string)}
+* {fn SOUNDEX(string)}
+* {fn SPACE(integer)}
+* {fn SUBSTRING(string, integer, integer)}
+* {fn UCASE(string)} - Returns a string in which all alphabetic characters in *string* have been converted to upper case
+
+#### Date/time
+
+Not implemented:
+
+* {fn CURDATE()}
+* {fn CURTIME()}
+* {fn DAYNAME(date)}
+* {fn DAYOFMONTH(date)}
+* {fn DAYOFWEEK(date)}
+* {fn DAYOFYEAR(date)}
+* {fn HOUR(time)}
+* {fn MINUTE(time)}
+* {fn MONTH(date)}
+* {fn MONTHNAME(date)}
+* {fn NOW()}
+* {fn QUARTER(date)}
+* {fn SECOND(time)}
+* {fn TIMESTAMPADD(interval, count, timestamp)}
+* {fn TIMESTAMPDIFF(interval, timestamp, timestamp)}
+* {fn WEEK(date)}
+* {fn YEAR(date)}
+
+#### System
+
+Not implemented:
+
+* {fn DATABASE()}
+* {fn IFNULL(value, value)}
+* {fn USER(value, value)}
+* {fn CONVERT(value, type)}
+
+### Aggregate functions
+
+Syntax:
+
+{% highlight sql %}
+aggregateCall:
+        agg( [ DISTINCT ] value [, value]* ) [ FILTER ( WHERE condition ) ]
+    |   agg(*) [ FILTER ( WHERE condition ) ]
+{% endhighlight %}
+
+If `FILTER` is present, the aggregate function only considers rows for which
+*condition* evaluates to TRUE.
+
+If `DISTINCT` is present, duplicate argument values are eliminated before being
+passed to the aggregate function.
+
+| Operator syntax                    | Description
+|:---------------------------------- |:-----------
+| COLLECT( [ DISTINCT ] value)       | Returns a multiset of the values
+| COUNT( [ DISTINCT ] value [, value]* ) | Returns the number of input rows for which *value* is not null (wholly not null if *value* is composite)
+| COUNT(*)                           | Returns the number of input rows
+| AVG( [ DISTINCT ] numeric)         | Returns the average (arithmetic mean) of *numeric* across all input values
+| SUM( [ DISTINCT ] numeric)         | Returns the sum of *numeric* across all input values
+| MAX( [ DISTINCT ] value)           | Returns the maximum value of *value* across all input values
+| MIN( [ DISTINCT ] value)           | Returns the minimum value of *value* across all input values
+| STDDEV_POP( [ DISTINCT ] numeric)  | Returns the population standard deviation of *numeric* across all input values
+| STDDEV_SAMP( [ DISTINCT ] numeric) | Returns the sample standard deviation of *numeric* across all input values
+| VAR_POP( [ DISTINCT ] value)       | Returns the population variance (square of the population standard deviation) of *numeric* across all input values
+| VAR_SAMP( [ DISTINCT ] numeric)    | Returns the sample variance (square of the sample standard deviation) of *numeric* across all input values
+| COVAR_POP(numeric1, numeric2)      | Returns the population covariance of the pair (*numeric1*, *numeric2*) across all input values
+| COVAR_SAMP(numeric1, numeric2)     | Returns the sample covariance of the pair (*numeric1*, *numeric2*) across all input values
+| REGR_SXX(numeric1, numeric2)       | Returns the sum of squares of the dependent expression in a linear regression model
+| REGR_SYY(numeric1, numeric2)       | Returns the sum of squares of the independent expression in a linear regression model
+
+Not implemented:
+
+* REGR_AVGX(numeric1, numeric2)
+* REGR_AVGY(numeric1, numeric2)
+* REGR_COUNT(numeric1, numeric2)
+* REGR_INTERCEPT(numeric1, numeric2)
+* REGR_R2(numeric1, numeric2)
+* REGR_SLOPE(numeric1, numeric2)
+* REGR_SXY(numeric1, numeric2)
+
+### Window functions
+
+| Operator syntax                           | Description
+|:----------------------------------------- |:-----------
+| COUNT(value [, value ]* ) OVER window     | Returns the number of rows in *window* for which *value* is not null (wholly not null if *value* is composite)
+| COUNT(*) OVER window                      | Returns the number of rows in *window*
+| AVG(numeric) OVER window                  | Returns the average (arithmetic mean) of *numeric* across all values in *window*
+| SUM(numeric) OVER window                  | Returns the sum of *numeric* across all values in *window*
+| MAX(value) OVER window                    | Returns the maximum value of *value* across all values in *window*
+| MIN(value) OVER window                    | Returns the minimum value of *value* across all values in *window*
+| RANK() OVER window                        | Returns the rank of the current row with gaps; same as ROW_NUMBER of its first peer
+| DENSE_RANK() OVER window                  | Returns the rank of the current row without gaps; this function counts peer groups
+| ROW_NUMBER() OVER window                  | Returns the number of the current row within its partition, counting from 1
+| FIRST_VALUE(value) OVER window            | Returns *value* evaluated at the row that is the first row of the window frame
+| LAST_VALUE(value) OVER window             | Returns *value* evaluated at the row that is the last row of the window frame
+| LEAD(value, offset, default) OVER window  | Returns *value* evaluated at the row that is *offset* rows after the current row within the partition; if there is no such row, instead returns *default*. Both *offset* and *default* are evaluated with respect to the current row. If omitted, *offset* defaults to 1 and *default* to NULL
+| LAG(value, offset, default) OVER window   | Returns *value* evaluated at the row that is *offset* rows before the current row within the partition; if there is no such row, instead returns *default*. Both *offset* and *default* are evaluated with respect to the current row. If omitted, *offset* defaults to 1 and *default* to NULL
+| NTILE(value) OVER window                  | Returns an integer ranging from 1 to *value*, dividing the partition as equally as possible
+
+Not implemented:
+
+* COUNT(DISTINCT value) OVER window
+* FIRST_VALUE(value) IGNORE NULLS OVER window
+* LAST_VALUE(value) IGNORE NULLS OVER window
+* PERCENT_RANK(value) OVER window
+* CUME_DIST(value) OVER window
+* NTH_VALUE(value, nth) OVER window
+
+### Grouping functions
+
+| Operator syntax      | Description
+|:-------------------- |:-----------
+| GROUPING(expression) | Returns 1 if expression is rolled up in the current row's grouping set, 0 otherwise
+| GROUP_ID()           | Returns an integer that uniquely identifies the combination of grouping keys
+| GROUPING_ID(expression [, expression ] * ) | Returns a bit vector of the given grouping expressions
+
+### User-defined functions
+
+Calcite is extensible. You can define each kind of function using user code.
+For each kind of function there are often several ways to define a function,
+varying from convenient to efficient.
+
+To implement a *scalar function*, there are 3 options:
+
+* Create a class with a public static `eval` method,
+  and register the class;
+* Create a class with a public non-static `eval` method,
+  and a public constructor with no arguments,
+  and register the class;
+* Create a class with one or more public static methods,
+  and register each class/method combination.
+
+To implement an *aggregate function*, there are 2 options:
+
+* Create a class with public static `init`, `add` and `result` methods,
+  and register the class;
+* Create a class with public non-static `init`, `add` and `result` methods,
+  and a  public constructor with no arguments,
+  and register the class.
+
+Optionally, add a public `merge` method to the class; this allows Calcite to
+generate code that merges sub-totals.
+
+Optionally, make your class implement the
+[SqlSplittableAggFunction]({{ site.apiRoot }}/org/apache/calcite/sql/SqlSplittableAggFunction.html)
+interface; this allows Calcite to decompose the function across several stages
+of aggregation, roll up from summary tables, and push it through joins.
+
+To implement a *table function*, there are 3 options:
+
+* Create a class with a static `eval` method that returns
+  [ScannableTable]({{ site.apiRoot }}/org/apache/calcite/schema/ScannableTable.html)
+  or
+  [QueryableTable]({{ site.apiRoot }}/org/apache/calcite/schema/QueryableTable.html),
+  and register the class;
+* Create a class with a non-static `eval` method that returns
+  [ScannableTable]({{ site.apiRoot }}/org/apache/calcite/schema/ScannableTable.html)
+  or
+  [QueryableTable]({{ site.apiRoot }}/org/apache/calcite/schema/QueryableTable.html),
+  and register the class;
+* Create a class with one or more public static methods that return
+  [ScannableTable]({{ site.apiRoot }}/org/apache/calcite/schema/ScannableTable.html)
+  or
+  [QueryableTable]({{ site.apiRoot }}/org/apache/calcite/schema/QueryableTable.html),
+  and register each class/method combination.
+
+To implement a *table macro*, there are 3 options:
+
+* Create a class with a static `eval` method that returns
+  [TranslatableTable]({{ site.apiRoot }}/org/apache/calcite/schema/TranslatableTable.html),
+  and register the class;
+* Create a class with a non-static `eval` method that returns
+  [TranslatableTable]({{ site.apiRoot }}/org/apache/calcite/schema/TranslatableTable.html),
+  and register the class;
+* Create a class with one or more public static methods that return
+  [TranslatableTable]({{ site.apiRoot }}/org/apache/calcite/schema/TranslatableTable.html),
+  and register each class/method combination.
+
+Calcite deduces the parameter types and result type of a function from the
+parameter and return types of the Java method that implements it. Further, you
+can specify the name and optionality of each parameter using the
+[Parameter]({{ site.apiRoot }}/org/apache/calcite/linq4j/function/Parameter.html)
+annotation.
+
+### Calling functions with named and optional parameters
+
+Usually when you call a function, you need to specify all of its parameters,
+in order. But that can be a problem if a function has a lot of parameters,
+and especially if you want to add more parameters over time.
+
+To solve this problem, the SQL standard allows you to pass parameters by name,
+and to define parameters which are optional (that is, have a default value
+that is used if they are not specified).
+
+Suppose you have a function `f`, declared as in the following pseudo syntax:
+
+```FUNCTION f(
+  INTEGER a,
+  INTEGER b DEFAULT NULL,
+  INTEGER c,
+  INTEGER d DEFAULT NULL,
+  INTEGER e DEFAULT NULL) RETURNS INTEGER```
+
+All of the function's parameters have names, and parameters `b`, `d` and `e`
+have a default value of `NULL` and are therefore optional.
+(In Calcite, `NULL` is the only allowable default value for optional parameters;
+this may change
+[in future](https://issues.apache.org/jira/browse/CALCITE-947).)
+
+When calling a function with optional parameters,
+you can omit optional arguments at the end of the list, or use the `DEFAULT`
+keyword for any optional arguments.
+Here are some examples:
+
+* `f(1, 2, 3, 4, 5)` provides a value to each parameter, in order;
+* `f(1, 2, 3, 4)` omits `e`, which gets its default value, `NULL`;
+* `f(1, DEFAULT, 3)` omits `d` and `e`,
+  and specifies to use the default value of `b`;
+* `f(1, DEFAULT, 3, DEFAULT, DEFAULT)` has the same effect as the previous
+  example;
+* `f(1, 2)` is not legal, because `c` is not optional;
+* `f(1, 2, DEFAULT, 4)` is not legal, because `c` is not optional.
+
+You can specify arguments by name using the `=>` syntax.
+If one argument is named, they all must be.
+Arguments may be in any other, but must not specify any argument more than once,
+and you need to provide a value for every parameter which is not optional.
+Here are some examples:
+
+* `f(c => 3, d => 1, a => 0)` is equivalent to `f(0, NULL, 3, 1, NULL)`;
+* `f(c => 3, d => 1)` is not legal, because you have not specified a value for
+  `a` and `a` is not optional.
+

http://git-wip-us.apache.org/repos/asf/calcite/blob/5cee486f/avatica/site/_docs/stream.md
----------------------------------------------------------------------
diff --git a/avatica/site/_docs/stream.md b/avatica/site/_docs/stream.md
new file mode 100644
index 0000000..f66fbce
--- /dev/null
+++ b/avatica/site/_docs/stream.md
@@ -0,0 +1,1023 @@
+---
+layout: docs
+title: Streaming
+permalink: /docs/stream.html
+---
+<!--
+{% comment %}
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to you under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+{% endcomment %}
+-->
+
+Calcite has extended SQL and relational algebra in order to support
+streaming queries.
+
+* TOC
+{:toc}
+
+## Introduction
+
+Streams are collections to records that flow continuously, and forever.
+Unlike tables, they are not typically stored on disk, but flow over the
+network and are held for short periods of time in memory.
+
+Streams complement tables because they represent what is happening in the
+present and future of the enterprise whereas tables represent the past.
+It is very common for a stream to be archived into a table.
+
+Like tables, you often want to query streams in a high-level language
+based on relational algebra, validated according to a schema, and optimized
+to take advantage of available resources and algorithms.
+
+Calcite's SQL is an extension to standard SQL, not another 'SQL-like' language.
+The distinction is important, for several reasons:
+
+* Streaming SQL is easy to learn for anyone who knows regular SQL.
+* The semantics are clear, because we aim to produce the same results on a
+  stream as if the same data were in a table.
+* You can write queries that combine streams and tables (or the history of
+  a stream, which is basically an in-memory table).
+* Lots of existing tools can generate standard SQL.
+
+If you don't use the `STREAM` keyword, you are back in regular
+standard SQL.
+
+## An example schema
+
+Our streaming SQL examples use the following schema:
+
+* `Orders (rowtime, productId, orderId, units)` - a stream and a table
+* `Products (rowtime, productId, name)` - a table
+* `Shipments (rowtime, orderId)` - a stream
+
+## A simple query
+
+Let's start with the simplest streaming query:
+
+{% highlight sql %}
+SELECT STREAM *
+FROM Orders;
+
+  rowtime | productId | orderId | units
+----------+-----------+---------+-------
+ 10:17:00 |        30 |       5 |     4
+ 10:17:05 |        10 |       6 |     1
+ 10:18:05 |        20 |       7 |     2
+ 10:18:07 |        30 |       8 |    20
+ 11:02:00 |        10 |       9 |     6
+ 11:04:00 |        10 |      10 |     1
+ 11:09:30 |        40 |      11 |    12
+ 11:24:11 |        10 |      12 |     4
+{% endhighlight %}
+
+This query reads all columns and rows from the `Orders` stream.
+Like any streaming query, it never terminates. It outputs a record whenever
+a record arrives in `Orders`.
+
+Type `Control-C` to terminate the query.
+
+The `STREAM` keyword is the main extension in streaming SQL. It tells the
+system that you are interested in incoming orders, not existing ones. The query
+
+{% highlight sql %}
+SELECT *
+FROM Orders;
+
+  rowtime | productId | orderId | units
+----------+-----------+---------+-------
+ 08:30:00 |        10 |       1 |     3
+ 08:45:10 |        20 |       2 |     1
+ 09:12:21 |        10 |       3 |    10
+ 09:27:44 |        30 |       4 |     2
+
+4 records returned.
+{% endhighlight %}
+
+is also valid, but will print out all existing orders and then terminate. We
+call it a *relational* query, as opposed to *streaming*. It has traditional
+SQL semantics.
+
+`Orders` is special, in that it has both a stream and a table. If you try to run
+a streaming query on a table, or a relational query on a stream, Calcite gives
+an error:
+
+{% highlight sql %}
+SELECT * FROM Shipments;
+
+ERROR: Cannot convert stream 'SHIPMENTS' to a table
+
+SELECT STREAM * FROM Products;
+
+ERROR: Cannot convert table 'PRODUCTS' to a stream
+{% endhighlight %}
+
+# Filtering rows
+
+Just as in regular SQL, you use a `WHERE` clause to filter rows:
+
+{% highlight sql %}
+SELECT STREAM *
+FROM Orders
+WHERE units > 3;
+
+  rowtime | productId | orderId | units
+----------+-----------+---------+-------
+ 10:17:00 |        30 |       5 |     4
+ 10:18:07 |        30 |       8 |    20
+ 11:02:00 |        10 |       9 |     6
+ 11:09:30 |        40 |      11 |    12
+ 11:24:11 |        10 |      12 |     4
+{% endhighlight %}
+
+# Projecting expressions
+
+Use expressions in the `SELECT` clause to choose which columns to return or
+compute expressions:
+
+{% highlight sql %}
+SELECT STREAM rowtime,
+  'An order for ' || units || ' '
+    || CASE units WHEN 1 THEN 'unit' ELSE 'units' END
+    || ' of product #' || productId AS description
+FROM Orders;
+
+  rowtime | description
+----------+---------------------------------------
+ 10:17:00 | An order for 4 units of product #30
+ 10:17:05 | An order for 1 unit of product #10
+ 10:18:05 | An order for 2 units of product #20
+ 10:18:07 | An order for 20 units of product #30
+ 11:02:00 | An order by 6 units of product #10
+ 11:04:00 | An order by 1 unit of product #10
+ 11:09:30 | An order for 12 units of product #40
+ 11:24:11 | An order by 4 units of product #10
+{% endhighlight %}
+
+We recommend that you always include the `rowtime` column in the `SELECT`
+clause. Having a sorted timestamp in each stream and streaming query makes it
+possible to do advanced calculations later, such as `GROUP BY` and `JOIN`.
+
+# Tumbling windows
+
+There are several ways to compute aggregate functions on streams. The
+differences are:
+
+* How many rows come out for each row in?
+* Does each incoming value appear in one total, or more?
+* What defines the "window", the set of rows that contribute to a given output row?
+* Is the result a stream or a relation?
+
+There are various window types:
+
+* tumbling window (GROUP BY)
+* hopping window (multi GROUP BY)
+* sliding window (window functions)
+* cascading window (window functions)
+
+and the following diagram shows the kinds of query in which to use them:
+
+![Window types]({{ site.baseurl }}/img/window-types.png)
+
+First we'll look a *tumbling window*, which is defined by a streaming
+`GROUP BY`. Here is an example:
+
+{% highlight sql %}
+SELECT STREAM CEIL(rowtime TO HOUR) AS rowtime,
+  productId,
+  COUNT(*) AS c,
+  SUM(units) AS units
+FROM Orders
+GROUP BY CEIL(rowtime TO HOUR), productId;
+
+  rowtime | productId |       c | units
+----------+-----------+---------+-------
+ 11:00:00 |        30 |       2 |    24
+ 11:00:00 |        10 |       1 |     1
+ 11:00:00 |        20 |       1 |     7
+ 12:00:00 |        10 |       3 |    11
+ 12:00:00 |        40 |       1 |    12
+{% endhighlight %}
+
+The result is a stream. At 11 o'clock, Calcite emits a sub-total for every
+`productId` that had an order since 10 o'clock, timestamped 11 o'clock.
+At 12 o'clock, it will emit
+the orders that occurred between 11:00 and 12:00. Each input row contributes to
+only one output row.
+
+How did Calcite know that the 10:00:00 sub-totals were complete at 11:00:00,
+so that it could emit them? It knows that `rowtime` is increasing, and it knows
+that `CEIL(rowtime TO HOUR)` is also increasing. So, once it has seen a row
+at or after 11:00:00, it will never see a row that will contribute to a 10:00:00
+total.
+
+A column or expression that is increasing or decreasing is said to be
+*monotonic*.
+
+If column or expression has values that are slightly out of order,
+and the stream has a mechanism (such as punctuation or watermarks)
+to declare that a particular value will never be seen again, then
+the column or expression is said to be *quasi-monotonic*.
+
+Without a monotonic or quasi-monotonic expression in the `GROUP BY` clause,
+Calcite is
+not able to make progress, and it will not allow the query:
+
+{% highlight sql %}
+SELECT STREAM productId,
+  COUNT(*) AS c,
+  SUM(units) AS units
+FROM Orders
+GROUP BY productId;
+
+ERROR: Streaming aggregation requires at least one monotonic expression in GROUP BY clause
+{% endhighlight %}
+
+Monotonic and quasi-monotonic columns need to be declared in the schema.
+The monotonicity is
+enforced when records enter the stream and assumed by queries that read from
+that stream. We recommend that you give each stream a timestamp column called
+`rowtime`, but you can declare others to be monotonic, `orderId`, for example.
+
+We discuss punctuation, watermarks, and other ways of making progress
+<a href="#punctuation">below</a>.
+
+# Tumbling windows, improved
+
+The previous example of tumbling windows was easy to write because the window
+was one hour. For intervals that are not a whole time unit, say 2 hours or
+2 hours and 17 minutes, you cannot use `CEIL`, and the expression gets more
+complicated.
+
+Calcite supports an alternative syntax for tumbling windows:
+
+{% highlight sql %}
+SELECT STREAM TUMBLE_END(rowtime, INTERVAL '1' HOUR) AS rowtime,
+  productId,
+  COUNT(*) AS c,
+  SUM(units) AS units
+FROM Orders
+GROUP BY TUMBLE(rowtime, INTERVAL '1' HOUR), productId;
+
+  rowtime | productId |       c | units
+----------+-----------+---------+-------
+ 11:00:00 |        30 |       2 |    24
+ 11:00:00 |        10 |       1 |     1
+ 11:00:00 |        20 |       1 |     7
+ 12:00:00 |        10 |       3 |    11
+ 12:00:00 |        40 |       1 |    12
+{% endhighlight %}
+
+As you can see, it returns the same results as the previous query. The `TUMBLE`
+function returns a grouping key that is the same for all the rows that will end
+up in a given summary row; the `TUMBLE_END` function takes the same arguments
+and returns the time at which that window ends;
+there is also a `TUMBLE_START` function.
+
+`TUMBLE` has an optional parameter to align the window.
+In the following example,
+we use a 30 minute interval and 0:12 as the alignment time,
+so the query emits summaries at 12 and 42 minutes past each hour:
+
+{% highlight sql %}
+SELECT STREAM
+  TUMBLE_END(rowtime, INTERVAL '30' MINUTE, TIME '0:12') AS rowtime,
+  productId,
+  COUNT(*) AS c,
+  SUM(units) AS units
+FROM Orders
+GROUP BY TUMBLE(rowtime, INTERVAL '30' MINUTE, TIME '0:12'),
+  productId;
+
+  rowtime | productId |       c | units
+----------+-----------+---------+-------
+ 10:42:00 |        30 |       2 |    24
+ 10:42:00 |        10 |       1 |     1
+ 10:42:00 |        20 |       1 |     7
+ 11:12:00 |        10 |       2 |     7
+ 11:12:00 |        40 |       1 |    12
+ 11:42:00 |        10 |       1 |     4
+{% endhighlight %}
+
+# Hopping windows
+
+Hopping windows are a generalization of tumbling windows that allow data to
+be kept in a window for a longer than the emit interval.
+
+For example, the following query emits a row timestamped 11:00 containing data
+from 08:00 to 11:00 (or 10:59.9 if we're being pedantic),
+and a row timestamped 12:00 containing data from 09:00
+to 12:00.
+
+{% highlight sql %}
+SELECT STREAM
+  HOP_END(rowtime, INTERVAL '1' HOUR, INTERVAL '3' HOUR) AS rowtime,
+  COUNT(*) AS c,
+  SUM(units) AS units
+FROM Orders
+GROUP BY HOP(rowtime, INTERVAL '1' HOUR, INTERVAL '3' HOUR);
+
+  rowtime |        c | units
+----------+----------+-------
+ 11:00:00 |        4 |    27
+ 12:00:00 |        8 |    50
+{% endhighlight %}
+
+In this query, because the retain period is 3 times the emit period, every input
+row contributes to exactly 3 output rows. Imagine that the `HOP` function
+generates a collection of group keys for incoming row, and places its values
+in the accumulators of each of those group keys. For example,
+`HOP(10:18:00, INTERVAL '1' HOUR, INTERVAL '3')` generates 3 periods
+
+```[08:00, 09:00)
+[09:00, 10:00)
+[10:00, 11:00)
+```
+
+This raises the possibility of allowing user-defined partitioning functions
+for users who are not happy with the built-in functions `HOP` and `TUMBLE`.
+
+We can build complex complex expressions such as an exponentially decaying
+moving average:
+
+{% highlight sql %}
+SELECT STREAM HOP_END(rowtime),
+  productId,
+  SUM(unitPrice * EXP((rowtime - HOP_START(rowtime)) SECOND / INTERVAL '1' HOUR))
+   / SUM(EXP((rowtime - HOP_START(rowtime)) SECOND / INTERVAL '1' HOUR))
+FROM Orders
+GROUP BY HOP(rowtime, INTERVAL '1' SECOND, INTERVAL '1' HOUR),
+  productId
+{% endhighlight %}
+
+Emits:
+
+* a row at `11:00:00` containing rows in `[10:00:00, 11:00:00)`;
+* a row at `11:00:01` containing rows in `[10:00:01, 11:00:01)`.
+
+The expression weighs recent orders more heavily than older orders.
+Extending the window from 1 hour to 2 hours or 1 year would have
+virtually no effect on the accuracy of the result (but use more memory
+and compute).
+
+Note that we use `HOP_START` inside an aggregate function (`SUM`) because it
+is a value that is constant for all rows within a sub-total. This
+would not be allowed for typical aggregate functions (`SUM`, `COUNT`
+etc.).
+
+If you are familiar with `GROUPING SETS`, you may notice that partitioning
+functions can be seen as a generalization of `GROUPING SETS`, in that they
+allow an input row to contribute to multiple sub-totals.
+The auxiliary functions for `GROUPING SETS`,
+such as `GROUPING()` and `GROUP_ID`,
+can be used inside aggregate functions, so it is not surprising that
+`HOP_START` and `HOP_END` can be used in the same way.
+
+# GROUPING SETS
+
+`GROUPING SETS` is valid for a streaming query provided that every
+grouping set contains a monotonic or quasi-monotonic expression.
+
+`CUBE` and `ROLLUP` are not valid for streaming query, because they will
+produce at least one grouping set that aggregates everything (like
+`GROUP BY ()`).
+
+# Filtering after aggregation
+
+As in standard SQL, you can apply a `HAVING` clause to filter rows emitted by
+a streaming `GROUP BY`:
+
+{% highlight sql %}
+SELECT STREAM TUMBLE_END(rowtime, INTERVAL '1' HOUR) AS rowtime,
+  productId
+FROM Orders
+GROUP BY TUMBLE(rowtime, INTERVAL '1' HOUR), productId
+HAVING COUNT(*) > 2 OR SUM(units) > 10;
+
+  rowtime | productId
+----------+-----------
+ 10:00:00 |        30
+ 11:00:00 |        10
+ 11:00:00 |        40
+{% endhighlight %}
+
+# Sub-queries, views and SQL's closure property
+
+The previous `HAVING` query can be expressed using a `WHERE` clause on a
+sub-query:
+
+{% highlight sql %}
+SELECT STREAM rowtime, productId
+FROM (
+  SELECT TUMBLE_END(rowtime, INTERVAL '1' HOUR) AS rowtime,
+    productId,
+    COUNT(*) AS c,
+    SUM(units) AS su
+  FROM Orders
+  GROUP BY TUMBLE(rowtime, INTERVAL '1' HOUR), productId)
+WHERE c > 2 OR su > 10;
+
+  rowtime | productId
+----------+-----------
+ 10:00:00 |        30
+ 11:00:00 |        10
+ 11:00:00 |        40
+{% endhighlight %}
+
+`HAVING` was introduced in the early days of SQL, when a way was needed to
+perform a filter *after* aggregation. (Recall that `WHERE` filters rows before
+they enter the `GROUP BY` clause.)
+
+Since then, SQL has become a mathematically closed language, which means that
+any operation you can perform on a table can also perform on a query.
+
+The *closure property* of SQL is extremely powerful. Not only does it render
+`HAVING` obsolete (or, at least, reduce it to syntactic sugar), it makes views
+possible:
+
+{% highlight sql %}
+CREATE VIEW HourlyOrderTotals (rowtime, productId, c, su) AS
+  SELECT TUMBLE_END(rowtime, INTERVAL '1' HOUR),
+    productId,
+    COUNT(*),
+    SUM(units)
+  FROM Orders
+  GROUP BY TUMBLE(rowtime, INTERVAL '1' HOUR), productId;
+
+SELECT STREAM rowtime, productId
+FROM HourlyOrderTotals
+WHERE c > 2 OR su > 10;
+
+  rowtime | productId
+----------+-----------
+ 10:00:00 |        30
+ 11:00:00 |        10
+ 11:00:00 |        40
+{% endhighlight %}
+
+Sub-queries in the `FROM` clause are sometimes referred to as "inline views",
+but really, they are more fundamental than views. Views are just a convenient
+way to carve your SQL into manageable chunks by giving the pieces names and
+storing them in the metadata repository.
+
+Many people find that nested queries and views are even more useful on streams
+than they are on relations. Streaming queries are pipelines of
+operators all running continuously, and often those pipelines get quite long.
+Nested queries and views help to express and manage those pipelines.
+
+And, by the way, a `WITH` clause can accomplish the same as a sub-query or
+a view:
+
+{% highlight sql %}
+WITH HourlyOrderTotals (rowtime, productId, c, su) AS (
+  SELECT TUMBLE_END(rowtime, INTERVAL '1' HOUR),
+    productId,
+    COUNT(*),
+    SUM(units)
+  FROM Orders
+  GROUP BY TUMBLE(rowtime, INTERVAL '1' HOUR), productId)
+SELECT STREAM rowtime, productId
+FROM HourlyOrderTotals
+WHERE c > 2 OR su > 10;
+
+  rowtime | productId
+----------+-----------
+ 10:00:00 |        30
+ 11:00:00 |        10
+ 11:00:00 |        40
+{% endhighlight %}
+
+# Converting between streams and relations
+
+Look back at the definition of the `HourlyOrderTotals` view.
+Is the view a stream or a relation?
+
+It does not contain the `STREAM` keyword, so it is a relation.
+However, it is a relation that can be converted into a stream.
+
+You can use it in both relational and streaming queries:
+
+{% highlight sql %}
+# A relation; will query the historic Orders table.
+# Returns the largest number of product #10 ever sold in one hour.
+SELECT max(su)
+FROM HourlyOrderTotals
+WHERE productId = 10;
+
+# A stream; will query the Orders stream.
+# Returns every hour in which at least one product #10 was sold.
+SELECT STREAM rowtime
+FROM HourlyOrderTotals
+WHERE productId = 10;
+{% endhighlight %}
+
+This approach is not limited to views and sub-queries.
+Following the approach set out in CQL [<a href="#ref1">1</a>], every query
+in streaming SQL is defined as a relational query and converted to a stream
+using the `STREAM` keyword in the top-most `SELECT`.
+
+If the `STREAM` keyword is present in sub-queries or view definitions, it has no
+effect.
+
+At query preparation time, Calcite figures out whether the relations referenced
+in the query can be converted to streams or historical relations.
+
+Sometimes a stream makes available some of its history (say the last 24 hours of
+data in an Apache Kafka [<a href="#ref2">2</a>] topic)
+but not all. At run time, Calcite figures out whether there is sufficient
+history to run the query, and if not, gives an error.
+
+# The "pie chart" problem: Relational queries on streams
+
+One particular case where you need to convert a stream to a relation
+occurs in what I call the "pie chart problem". Imagine that you need to
+write a web page with a chart, like the following, that summarizes the
+number of orders for each product over the last hour.
+
+![Pie chart]({{ site.baseurl }}/img/pie-chart.png)
+
+But the `Orders` stream only contains a few records, not an hour's summary.
+We need to run a relational query on the history of the stream:
+
+{% highlight sql %}
+SELECT productId, count(*)
+FROM Orders
+WHERE rowtime BETWEEN current_timestamp - INTERVAL '1' HOUR
+              AND current_timestamp;
+{% endhighlight %}
+
+If the history of the `Orders` stream is being spooled to the `Orders` table,
+we can answer the query, albeit at a high cost. Better, if we can tell the
+system to materialize one hour summary into a table,
+maintain it continuously as the stream flows,
+and automatically rewrite queries to use the table.
+
+# Sorting
+
+The story for `ORDER BY` is similar to `GROUP BY`.
+The syntax looks like regular SQL, but Calcite must be sure that it can deliver
+timely results. It therefore requires a monotonic expression on the leading edge
+of your `ORDER BY` key.
+
+{% highlight sql %}
+SELECT STREAM CEIL(rowtime TO hour) AS rowtime, productId, orderId, units
+FROM Orders
+ORDER BY CEIL(rowtime TO hour) ASC, units DESC;
+
+  rowtime | productId | orderId | units
+----------+-----------+---------+-------
+ 10:00:00 |        30 |       8 |    20
+ 10:00:00 |        30 |       5 |     4
+ 10:00:00 |        20 |       7 |     2
+ 10:00:00 |        10 |       6 |     1
+ 11:00:00 |        40 |      11 |    12
+ 11:00:00 |        10 |       9 |     6
+ 11:00:00 |        10 |      12 |     4
+ 11:00:00 |        10 |      10 |     1
+{% endhighlight %}
+
+Most queries will return results in the order that they were inserted,
+because the engine is using streaming algorithms, but you should not rely on it.
+For example, consider this:
+
+{% highlight sql %}
+SELECT STREAM *
+FROM Orders
+WHERE productId = 10
+UNION ALL
+SELECT STREAM *
+FROM Orders
+WHERE productId = 30;
+
+  rowtime | productId | orderId | units
+----------+-----------+---------+-------
+ 10:17:05 |        10 |       6 |     1
+ 10:17:00 |        30 |       5 |     4
+ 10:18:07 |        30 |       8 |    20
+ 11:02:00 |        10 |       9 |     6
+ 11:04:00 |        10 |      10 |     1
+ 11:24:11 |        10 |      12 |     4
+{% endhighlight %}
+
+The rows with `productId` = 30 are apparently out of order, probably because
+the `Orders` stream was partitioned on `productId` and the partitioned streams
+sent their data at different times.
+
+If you require a particular ordering, add an explicit `ORDER BY`:
+
+{% highlight sql %}
+SELECT STREAM *
+FROM Orders
+WHERE productId = 10
+UNION ALL
+SELECT STREAM *
+FROM Orders
+WHERE productId = 30
+ORDER BY rowtime;
+
+  rowtime | productId | orderId | units
+----------+-----------+---------+-------
+ 10:17:00 |        30 |       5 |     4
+ 10:17:05 |        10 |       6 |     1
+ 10:18:07 |        30 |       8 |    20
+ 11:02:00 |        10 |       9 |     6
+ 11:04:00 |        10 |      10 |     1
+ 11:24:11 |        10 |      12 |     4
+{% endhighlight %}
+
+Calcite will probably implement the `UNION ALL` by merging using `rowtime`,
+which is only slightly less efficient.
+
+You only need to add an `ORDER BY` to the outermost query. If you need to,
+say, perform `GROUP BY` after a `UNION ALL`, Calcite will add an `ORDER BY`
+implicitly, in order to make the GROUP BY algorithm possible.
+
+# Table constructor
+
+The `VALUES` clause creates an inline table with a given set of rows.
+
+Streaming is disallowed. The set of rows never changes, and therefore a stream
+would never return any rows.
+
+{% highlight sql %}
+> SELECT STREAM * FROM (VALUES (1, 'abc'));
+
+ERROR: Cannot stream VALUES
+{% endhighlight %}
+
+# Sliding windows
+
+Standard SQL features so-called "analytic functions" that can be used in the
+`SELECT` clause. Unlike `GROUP BY`, these do not collapse records. For each
+record that goes in, one record comes out. But the aggregate function is based
+on a window of many rows.
+
+Let's look at an example.
+
+{% highlight sql %}
+SELECT STREAM rowtime,
+  productId,
+  units,
+  SUM(units) OVER (ORDER BY rowtime RANGE INTERVAL '1' HOUR PRECEDING) unitsLastHour
+FROM Orders;
+{% endhighlight %}
+
+The feature packs a lot of power with little effort. You can have multiple
+functions in the `SELECT` clause, based on multiple window specifications.
+
+The following example returns orders whose average order size over the last
+10 minutes is greater than the average order size for the last week.
+
+{% highlight sql %}
+SELECT STREAM *
+FROM (
+  SELECT STREAM rowtime,
+    productId,
+    units,
+    AVG(units) OVER product (RANGE INTERVAL '10' MINUTE PRECEDING) AS m10,
+    AVG(units) OVER product (RANGE INTERVAL '7' DAY PRECEDING) AS d7
+  FROM Orders
+  WINDOW product AS (
+    ORDER BY rowtime
+    PARTITION BY productId))
+WHERE m10 > d7;
+{% endhighlight %}
+
+For conciseness, here we use a syntax where you partially define a window
+using a `WINDOW` clause and then refine the window in each `OVER` clause.
+You could also define all windows in the `WINDOW` clause, or all windows inline,
+if you wish.
+
+But the real power goes beyond syntax. Behind the scenes, this query is
+maintaining two tables, and adding and removing values from sub-totals using
+with FIFO queues. But you can access those tables without introducing a join
+into the query.
+
+Some other features of the windowed aggregation syntax:
+
+* You can define windows based on row count.
+* The window can reference rows that have not yet arrived.
+  (The stream will wait until they have arrived).
+* You can compute order-dependent functions such as `RANK` and median.
+
+# Cascading windows
+
+What if we want a query that returns a result for every record, like a
+sliding window, but resets totals on a fixed time period, like a
+tumbling window? Such a pattern is called a *cascading window*. Here
+is an example:
+
+{% highlight sql %}
+SELECT STREAM rowtime,
+  productId,
+  units,
+  SUM(units) OVER (PARTITION BY FLOOR(rowtime TO HOUR)) AS unitsSinceTopOfHour
+FROM Orders;
+{% endhighlight %}
+
+It looks similar to a sliding window query, but the monotonic
+expression occurs within the `PARTITION BY` clause of the window. As
+the rowtime moves from from 10:59:59 to 11:00:00,
+`FLOOR(rowtime TO HOUR)` changes from 10:00:00 to 11:00:00,
+and therefore a new partition starts.
+The first row to arrive in the new hour will start a
+new total; the second row will have a total that consists of two rows,
+and so on.
+
+Calcite knows that the old partition will never be used again, so
+removes all sub-totals for that partition from its internal storage.
+
+Analytic functions that using cascading and sliding windows can be
+combined in the same query.
+
+# Joining streams to tables
+
+There are two kinds of join where streams are concerned: stream-to-table
+join and stream-to-stream join.
+
+A stream-to-table join is straightforward if the contents of the table
+are not changing. This query enriches a stream of orders with
+each product's list price:
+
+{% highlight sql %}
+SELECT STREAM o.rowtime, o.productId, o.orderId, o.units,
+  p.name, p.unitPrice
+FROM Orders AS o
+JOIN Products AS p
+  ON o.productId = p.productId;
+
+  rowtime | productId | orderId | units | name   | unitPrice
+----------+-----------+---------+-------+ -------+-----------
+ 10:17:00 |        30 |       5 |     4 | Cheese |        17
+ 10:17:05 |        10 |       6 |     1 | Beer   |      0.25
+ 10:18:05 |        20 |       7 |     2 | Wine   |         6
+ 10:18:07 |        30 |       8 |    20 | Cheese |        17
+ 11:02:00 |        10 |       9 |     6 | Beer   |      0.25
+ 11:04:00 |        10 |      10 |     1 | Beer   |      0.25
+ 11:09:30 |        40 |      11 |    12 | Bread  |       100
+ 11:24:11 |        10 |      12 |     4 | Beer   |      0.25
+{% endhighlight %}
+
+What should happen if the table is changing? For example,
+suppose the unit price of product 10 is increased to 0.35 at 11:00.
+Orders placed before 11:00 should have the old price, and orders
+placed after 11:00 should reflect the new price.
+
+One way to implement this is to have a table that keeps every version
+with a start and end effective date, `ProductVersions` in the following
+example:
+
+{% highlight sql %}
+SELECT STREAM *
+FROM Orders AS o
+JOIN ProductVersions AS p
+  ON o.productId = p.productId
+  AND o.rowtime BETWEEN p.startDate AND p.endDate
+
+  rowtime | productId | orderId | units | productId1 |   name | unitPrice
+----------+-----------+---------+-------+ -----------+--------+-----------
+ 10:17:00 |        30 |       5 |     4 |         30 | Cheese |        17
+ 10:17:05 |        10 |       6 |     1 |         10 | Beer   |      0.25
+ 10:18:05 |        20 |       7 |     2 |         20 | Wine   |         6
+ 10:18:07 |        30 |       8 |    20 |         30 | Cheese |        17
+ 11:02:00 |        10 |       9 |     6 |         10 | Beer   |      0.35
+ 11:04:00 |        10 |      10 |     1 |         10 | Beer   |      0.35
+ 11:09:30 |        40 |      11 |    12 |         40 | Bread  |       100
+ 11:24:11 |        10 |      12 |     4 |         10 | Beer   |      0.35
+{% endhighlight %}
+
+The other way to implement this is to use a database with temporal support
+(the ability to find the contents of the database as it was at any moment
+in the past), and the system needs to know that the `rowtime` column of
+the `Orders` stream corresponds to the transaction timestamp of the
+`Products` table.
+
+For many applications, it is not worth the cost and effort of temporal
+support or a versioned table. It is acceptable to the application that
+the query gives different results when replayed: in this example, on replay,
+all orders of product 10 are assigned the later unit price, 0.35.
+
+# Joining streams to streams
+
+It makes sense to join two streams if the join condition somehow forces
+them to remain a finite distance from one another. In the following query,
+the ship date is within one hour of the order date:
+
+{% highlight sql %}
+SELECT STREAM o.rowtime, o.productId, o.orderId, s.rowtime AS shipTime
+FROM Orders AS o
+JOIN Shipments AS s
+  ON o.orderId = p.orderId
+  AND s.rowtime BETWEEN o.rowtime AND o.rowtime + INTERVAL '1' HOUR;
+
+  rowtime | productId | orderId | shipTime
+----------+-----------+---------+----------
+ 10:17:00 |        30 |       5 | 10:55:00
+ 10:17:05 |        10 |       6 | 10:20:00
+ 11:02:00 |        10 |       9 | 11:58:00
+ 11:24:11 |        10 |      12 | 11:44:00
+{% endhighlight %}
+
+Note that quite a few orders do not appear, because they did not ship
+within an hour. By the time the system receives order 10, timestamped 11:24:11,
+it has already removed orders up to and including order 8, timestamped 10:18:07,
+from its hash table.
+
+As you can see, the "lock step", tying together monotonic or quasi-monotonic
+columns of the two streams, is necessary for the system to make progress.
+It will refuse to execute a query if it cannot deduce a lock step.
+
+# DML
+
+It's not only queries that make sense against streams;
+it also makes sense to run DML statements (`INSERT`, `UPDATE`, `DELETE`,
+and also their rarer cousins `UPSERT` and `REPLACE`) against streams.
+
+DML is useful because it allows you do materialize streams
+or tables based on streams,
+and therefore save effort when values are used often.
+
+Consider how streaming applications often consist of pipelines of queries,
+each query transforming input stream(s) to output stream(s).
+The component of a pipeline can be a view:
+
+{% highlight sql %}
+CREATE VIEW LargeOrders AS
+SELECT STREAM * FROM Orders WHERE units > 1000;
+{% endhighlight %}
+
+or a standing `INSERT` statement:
+
+{% highlight sql %}
+INSERT INTO LargeOrders
+SELECT STREAM * FROM Orders WHERE units > 1000;
+{% endhighlight %}
+
+These look similar, and in both cases the next step(s) in the pipeline
+can read from `LargeOrders` without worrying how it was populated.
+There is a difference in efficiency: the `INSERT` statement does the 
+same work no matter how many consumers there are; the view does work
+proportional to the number of consumers, and in particular, does no 
+work if there are no consumers.
+
+Other forms of DML make sense for streams. For example, the following
+standing `UPSERT` statement maintains a table that materializes a summary
+of the last hour of orders:
+
+{% highlight sql %}
+UPSERT INTO OrdersSummary
+SELECT STREAM productId,
+  COUNT(*) OVER lastHour AS c
+FROM Orders
+WINDOW lastHour AS (
+  PARTITION BY productId
+  ORDER BY rowtime
+  RANGE INTERVAL '1' HOUR PRECEDING)
+{% endhighlight %}
+
+# Punctuation
+
+Punctuation[<a href="#ref5">5</a>] allows a stream query to make progress
+even if there are not enough values in a monotonic key to push the results out.
+
+(I prefer the term "rowtime bounds",
+and watermarks[<a href="#ref6">6</a>] are a related concept,
+but for these purposes, punctuation will suffice.)
+
+If a stream has punctuation enabled then it may not be sorted but is
+nevertheless sortable. So, for the purposes of semantics, it is sufficient
+to work in terms of sorted streams.
+
+By the way, an out-of-order stream is also sortable if it is *t-sorted*
+(i.e. every record is guaranteed to arrive within *t* seconds of its
+timestamp) or *k-sorted* (i.e. every record is guaranteed to be no more
+than *k* positions out of order). So queries on these streams can be
+planned similarly to queries on streams with punctuation.
+
+And, we often want to aggregate over attributes that are not
+time-based but are nevertheless monotonic. "The number of times a team
+has shifted between winning-state and losing-state" is one such
+monotonic attribute. The system needs to figure out for itself that it
+is safe to aggregate over such an attribute; punctuation does not add
+any extra information.
+
+I have in mind some metadata (cost metrics) for the planner:
+
+1. Is this stream sorted on a given attribute (or attributes)?
+2. Is it possible to sort the stream on a given attribute? (For finite
+   relations, the answer is always "yes"; for streams it depends on the
+   existence of punctuation, or linkage between the attributes and the
+   sort key.)
+3. What latency do we need to introduce in order to perform that sort?
+4. What is the cost (in CPU, memory etc.) of performing that sort?
+
+We already have (1), in [BuiltInMetadata.Collation]({{ site.apiRoot }}/org/apache/calcite/rel/metadata/BuiltInMetadata.Collation.html).
+For (2), the answer is always "true" for finite relations.
+But we'll need to implement (2), (3) and (4) for streams.
+
+# State of the stream
+
+Not all concepts in this article have been implemented in Calcite.
+And others may be implemented in Calcite but not in a particular adapter
+such as SamzaSQL [<a href="#ref3">3</a>] [<a href="#ref4">4</a>].
+
+## Implemented
+
+* Streaming `SELECT`, `WHERE`, `GROUP BY`, `HAVING`, `UNION ALL`, `ORDER BY`
+* `FLOOR` and `CEIL` functions
+* Monotonicity
+* Streaming `VALUES` is disallowed
+
+## Not implemented
+
+The following features are presented in this document as if Calcite
+supports them, but in fact it does not (yet). Full support means
+that the reference implementation supports the feature (including
+negative cases) and the TCK tests it.
+
+* Stream-to-stream `JOIN`
+* Stream-to-table `JOIN`
+* Stream on view
+* Streaming `UNION ALL` with `ORDER BY` (merge)
+* Relational query on stream
+* Streaming windowed aggregation (sliding and cascading windows)
+* Check that `STREAM` in sub-queries and views is ignored
+* Check that streaming `ORDER BY` cannot have `OFFSET` or `LIMIT`
+* Limited history; at run time, check that there is sufficient history
+  to run the query.
+* [Quasi-monotonicity](https://issues.apache.org/jira/browse/CALCITE-1096)
+* `HOP` and `TUMBLE` (and auxiliary `HOP_START`, `HOP_END`,
+  `TUMBLE_START`, `TUMBLE_END`) functions
+
+## To do in this document
+
+* Re-visit whether you can stream `VALUES`
+* `OVER` clause to define window on stream
+* Consider whether to allow `CUBE` and `ROLLUP` in streaming queries,
+  with an understanding that some levels of aggregation will never complete
+  (because they have no monotonic expressions) and thus will never be emitted.
+* Fix the `UPSERT` example to remove records for products that have not
+  occurred in the last hour.
+* DML that outputs to multiple streams; perhaps an extension to the standard
+  `REPLACE` statement. 
+
+# Functions
+
+The following functions are not present in standard SQL
+but are defined in streaming SQL.
+
+Scalar functions:
+
+* `FLOOR(dateTime TO intervalType)` rounds a date, time or timestamp value
+  down to a given interval type
+* `CEIL(dateTime TO intervalType)` rounds a date, time or timestamp value
+  up to a given interval type
+
+Partitioning functions:
+
+* `HOP(t, emit, retain)` returns a collection of group keys for a row
+  to be part of a hopping window
+* `HOP(t, emit, retain, align)` returns a collection of group keys for a row
+  to be part of a hopping window with a given alignment
+* `TUMBLE(t, emit)` returns a group key for a row
+  to be part of a tumbling window
+* `TUMBLE(t, emit, align)` returns a group key for a row
+  to be part of a tumbling window with a given alignment
+
+`TUMBLE(t, e)` is equivalent to `TUMBLE(t, e, TIME '00:00:00')`.
+
+`TUMBLE(t, e, a)` is equivalent to `HOP(t, e, e, a)`.
+
+`HOP(t, e, r)` is equivalent to `HOP(t, e, r, TIME '00:00:00')`.
+
+# References
+
+* [<a name="ref1">1</a>]
+  <a href="http://ilpubs.stanford.edu:8090/758/">Arvind Arasu, Shivnath Babu,
+  and Jennifer Widom (2003) The CQL Continuous Query
+  Language: Semantic Foundations and Query Execution</a>.
+* [<a name="ref2">2</a>]
+  <a href="http://kafka.apache.org/documentation.html">Apache Kafka</a>.
+* [<a name="ref3">3</a>] <a href="http://samza.apache.org">Apache Samza</a>.
+* [<a name="ref4">4</a>] <a href="https://github.com/milinda/samza-sql">SamzaSQL</a>.
+* [<a name="ref5">5</a>]
+  <a href="http://www.whitworth.edu/academic/department/mathcomputerscience/faculty/tuckerpeter/pdf/117896_final.pdf">Peter
+  A. Tucker, David Maier, Tim Sheard, and Leonidas Fegaras (2003) Exploiting
+  Punctuation Semantics in Continuous Data Streams</a>.
+* [<a name="ref6">6</a>]
+  <a href="http://research.google.com/pubs/pub41378.html">Tyler Akidau,
+  Alex Balikov, Kaya Bekiroglu, Slava Chernyak, Josh Haberman, Reuven Lax,
+  Sam McVeety, Daniel Mills, Paul Nordstrom, and Sam Whittle (2013)
+  MillWheel: Fault-Tolerant Stream Processing at Internet Scale</a>.

http://git-wip-us.apache.org/repos/asf/calcite/blob/5cee486f/avatica/site/_docs/testapi.md
----------------------------------------------------------------------
diff --git a/avatica/site/_docs/testapi.md b/avatica/site/_docs/testapi.md
new file mode 100644
index 0000000..661f374
--- /dev/null
+++ b/avatica/site/_docs/testapi.md
@@ -0,0 +1,28 @@
+---
+title: Test API
+layout: external
+external_url: /testapidocs
+---
+{% comment %}
+Ideally, we want to use {{ site.apiRoot }} instead of hardcoding
+the above external_url value, but I don't believe there's a way to do that
+{% endcomment %}
+
+<!--
+{% comment %}
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to you under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+{% endcomment %}
+-->