You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jb...@apache.org on 2018/03/26 19:14:59 UTC

[1/3] lucene-solr:master: SOLR-11947: Squashed commit of the following ref guide changes:

Repository: lucene-solr
Updated Branches:
  refs/heads/master dc2ad7022 -> e69c614cf


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1ed4e226/solr/solr-ref-guide/src/regression.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/regression.adoc b/solr/solr-ref-guide/src/regression.adoc
new file mode 100644
index 0000000..b57c62b
--- /dev/null
+++ b/solr/solr-ref-guide/src/regression.adoc
@@ -0,0 +1,439 @@
+= Linear Regression
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+This section of the math expressions user guide covers simple and multivariate linear regression.
+
+
+== Simple Linear Regression
+
+The `regress` function is used to build a linear regression model
+between two random variables. Sample observations are provided with two
+numeric arrays. The first numeric array is the *independent variable* and
+the second array is the *dependent variable*.
+
+In the example below the `random` function selects 5000 random samples each containing
+the fields *filesize_d* and *response_d*. The two fields are vectorized
+and stored in variables *b* and *c*. Then the `regress` function performs a regression
+analysis on the two numeric arrays.
+
+The `regress` function returns a single tuple with the results of the regression
+analysis.
+
+Note that in this regression analysis the value of *RSquared* is *.75*. This means that changes in
+*filesize_d* explain 75% of the variability of the *response_d* variable.
+
+[source,text]
+----
+let(a=random(collection2, q="*:*", rows="5000", fl="filesize_d, response_d"),
+    b=col(a, filesize_d),
+    c=col(a, response_d),
+    d=regress(b, c))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "d": {
+          "significance": 0,
+          "totalSumSquares": 10564812.895147054,
+          "R": 0.8674822407146515,
+          "RSquared": 0.7525254379553127,
+          "meanSquareError": 523.1137343558588,
+          "intercept": -49.528134913099095,
+          "slopeConfidenceInterval": 0.0003171801710329995,
+          "regressionSumSquares": 7950290.450836472,
+          "slope": 0.019945557923159506,
+          "interceptStdErr": 6.489732340389941,
+          "N": 5000
+        }
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 98
+      }
+    ]
+  }
+}
+----
+
+=== Prediction
+
+The `predict` function uses the regression model to make predictions.
+Using the example above the regression model can be used to predict the value
+of *response_d* given a value for *filesize_d*.
+
+In the example below the `predict` function uses the regression analysis to predict
+the value of *response_d* for the *filesize_d* value of 40000.
+
+
+[source,text]
+----
+let(a=random(collection2, q="*:*", rows="5000", fl="filesize_d, response_d"),
+    b=col(a, filesize_d),
+    c=col(a, response_d),
+    d=regress(b, c),
+    e=predict(d, 40000))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "e": 748.079241022975
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 95
+      }
+    ]
+  }
+}
+----
+
+The `predict` function can also make predictions for an array of values. In this
+case it returns an array of predictions.
+
+In the example below the `predict` function uses the regression analysis to
+predict values for each of the 5000 samples of `filesize_d` used to generate the model.
+In this case 5000 predictions are returned.
+
+[source,text]
+----
+let(a=random(collection2, q="*:*", rows="5000", fl="filesize_d, response_d"),
+    b=col(a, filesize_d),
+    c=col(a, response_d),
+    d=regress(b, c),
+    e=predict(d, b))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "e": [
+          742.2525322514165,
+          709.6972488729955,
+          687.8382568904871,
+          820.2511324266264,
+          720.4006432289061,
+          761.1578181053039,
+          759.1304101159126,
+          699.5597256337142,
+          742.4738911248204,
+          769.0342605881644,
+          746.6740473150268,
+          ...
+          ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 113
+      }
+    ]
+  }
+}
+----
+
+=== Residuals
+
+The difference between the observed value and the predicted value is known as the
+residual. There isn't a specific function to calculate the residuals but vector
+math can used to perform the calculation.
+
+In the example below the predictions are stored in variable *e*. The `ebeSubtract`
+function is then used to subtract the predictions
+from the actual *response_d* values stored in variable *c*. Variable *f* contains
+the array of residuals.
+
+[source,text]
+----
+let(a=random(collection2, q="*:*", rows="5000", fl="filesize_d, response_d"),
+    b=col(a, filesize_d),
+    c=col(a, response_d),
+    d=regress(b, c),
+    e=predict(d, b),
+    f=ebeSubtract(c, e))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "e": [
+          31.30678554491226,
+          -30.292830927953446,
+          -30.49508862647258,
+          -30.499884780783532,
+          -9.696458959319784,
+          -30.521563961535094,
+          -30.28380938033081,
+          -9.890289849359306,
+          30.819723560583157,
+          -30.213178859683012,
+          -30.609943619066826,
+          10.527700442607625,
+          10.68046928406568,
+          ...
+          ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 113
+      }
+    ]
+  }
+}
+----
+
+== Multivariate Linear Regression
+
+The `olsRegress` function performs a multivariate linear regression analysis. Multivariate linear
+regression models the linear relationship between two or more *independent* variables and a *dependent* variable.
+
+The example below extends the simple linear regression example by introducing a new independent variable
+called *service_d*. The *service_d* variable is the service level of the request and it can range from 1 to 4
+in the data-set. The higher the service level, the higher the bandwidth available for the request.
+
+Notice that the two independent variables *filesize_d* and *service_d* are vectorized and stored
+in the variables *b* and *c*. The variables *b* and *c* are then added as rows to a `matrix`. The matrix is
+then transposed so that each row in the matrix represents one observation with *filesize_d* and *service_d*.
+The `olsRegress` function then performs the multivariate regression analysis using the observation matrix as the
+independent variables and the *response_d* values, stored in variable *d*, as the dependent variable.
+
+Notice that the RSquared of the regression analysis is 1. This means that linear relationship between
+*filesize_d* and *service_d* describe 100% of the variability of the *response_d* variable.
+
+[source,text]
+----
+let(a=random(collection2, q="*:*", rows="30000", fl="filesize_d, service_d, response_d"),
+    b=col(a, filesize_d),
+    c=col(a, service_d),
+    d=col(a, response_d),
+    e=transpose(matrix(b, c)),
+    f=olsRegress(e, d))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "f": {
+          "regressionParametersStandardErrors": [
+            2.0660690430026933e-13,
+            5.1212982077663434e-18,
+            9.10920932555875e-15
+          ],
+          "RSquared": 1,
+          "regressionParameters": [
+            6.553210695971329e-12,
+            0.019999999999999858,
+            -20.49999999999968
+          ],
+          "regressandVariance": 2124.130825172683,
+          "regressionParametersVariance": [
+            [
+              0.013660174897582315,
+              -3.361258014840509e-7,
+              -0.00006893737578369605
+            ],
+            [
+              -3.361258014840509e-7,
+              8.393183709503206e-12,
+              6.430253229589981e-11
+            ],
+            [
+              -0.00006893737578369605,
+              6.430253229589981e-11,
+              0.000026553878455570856
+            ]
+          ],
+          "adjustedRSquared": 1,
+          "residualSumSquares": 9.373703759269822e-20
+        }
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 690
+      }
+    ]
+  }
+}
+----
+
+=== Prediction
+
+The `predict` function can also be used to make predictions for multivariate linear regression. Below is an example
+of a single prediction using the multivariate linear regression model and a single observation. The observation
+is an array that matches the structure of the observation matrix used to build the model. In this case
+the first value represent a *filesize_d* of 40000 and the second value represents a *service_d* of 4.
+
+[source,text]
+----
+let(a=random(collection2, q="*:*", rows="5000", fl="filesize_d, service_d, response_d"),
+    b=col(a, filesize_d),
+    c=col(a, service_d),
+    d=col(a, response_d),
+    e=transpose(matrix(b, c)),
+    f=olsRegress(e, d),
+    g=predict(f, array(40000, 4)))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "g": 718.0000000000005
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 117
+      }
+    ]
+  }
+}
+----
+
+The `predict` function can also make predictions for more than one multivariate observation. In this scenario
+an observation matrix used. In the example below the observation matrix used to build the multivariate regression model
+is passed to the `predict` function and it returns an array of predictions.
+
+
+[source,text]
+----
+let(a=random(collection2, q="*:*", rows="5000", fl="filesize_d, service_d, response_d"),
+    b=col(a, filesize_d),
+    c=col(a, service_d),
+    d=col(a, response_d),
+    e=transpose(matrix(b, c)),
+    f=olsRegress(e, d),
+    g=predict(f, e))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "e": [
+          685.498283591961,
+          801.2175699959365,
+          776.7638245911025,
+          610.3559852681935,
+          751.0925865965207,
+          787.2914663381897,
+          744.3632053810668,
+          688.3729301599697,
+          765.367783417171,
+          724.9309687628346,
+          834.4350712384264,
+          ...
+          ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 113
+      }
+    ]
+  }
+}
+----
+
+=== Residuals
+
+Once the predictions are generated the residuals can be calculated using the same approach used with
+simple linear regression.
+
+Below is an example of the residuals calculation following a multivariate linear regression. In the example
+the predictions stored variable *g* are subtracted from observed values stored in variable *d*.
+
+[source,text]
+----
+let(a=random(collection2, q="*:*", rows="5000", fl="filesize_d, service_d, response_d"),
+    b=col(a, filesize_d),
+    c=col(a, service_d),
+    d=col(a, response_d),
+    e=transpose(matrix(b, c)),
+    f=olsRegress(e, d),
+    g=predict(f, e),
+    h=ebeSubtract(d, g))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "e": [
+         1.1368683772161603e-13,
+         1.1368683772161603e-13,
+         0,
+         1.1368683772161603e-13,
+         0,
+         1.1368683772161603e-13,
+         0,
+         2.2737367544323206e-13,
+         1.1368683772161603e-13,
+         2.2737367544323206e-13,
+         1.1368683772161603e-13,
+          ...
+          ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 113
+      }
+    ]
+  }
+}
+----
+
+
+
+

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1ed4e226/solr/solr-ref-guide/src/scalar-math.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/scalar-math.adoc b/solr/solr-ref-guide/src/scalar-math.adoc
new file mode 100644
index 0000000..07b1eb5
--- /dev/null
+++ b/solr/solr-ref-guide/src/scalar-math.adoc
@@ -0,0 +1,137 @@
+= Scalar Math
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+The most basic math expressions are scalar expressions. Scalar expressions
+perform mathematical operations on numbers.
+
+For example the expression below adds two numbers together:
+
+[source,text]
+----
+add(1, 1)
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "return-value": 2
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 2
+      }
+    ]
+  }
+}
+----
+
+Math expressions can be nested. For example in the expression
+below the output of the `add` function is the second parameter
+of the `pow` function:
+
+[source,text]
+----
+pow(10, add(1,1))
+----
+
+This expression returns the following response:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "return-value": 100
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+== Streaming Scalar Math
+
+Scalar math expressions can also be applied to each tuple in a stream
+through use of the `select` stream decorator. The `select` function wraps a
+stream of tuples and selects fields to include in each tuple.
+The `select` function can also use math expressions to compute
+new values and add them to the outgoing tuples.
+
+In the example below the `select` expression is wrapping a search
+expression. The `select` function is selecting the *price_f* field
+and computing a new field called *newPrice* using the `mult` math
+expression.
+
+The first parameter of the `mult` expression is the *price_f* field.
+The second parameter is the scalar value 10. This multiplies the value
+of the *price_f* field in each tuple by 10.
+
+[source,text]
+----
+select(search(collection2, q="*:*", fl="price_f", sort="price_f desc", rows="3"),
+       price_f,
+       mult(price_f, 10) as newPrice)
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "price_f": 0.99999994,
+        "newPrice": 9.9999994
+      },
+      {
+        "price_f": 0.99999994,
+        "newPrice": 9.9999994
+      },
+      {
+        "price_f": 0.9999992,
+        "newPrice": 9.999992
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 3
+      }
+    ]
+  }
+}
+----
+
+== More Scalar Math Functions
+
+The following scalar math functions are available in the math expressions library:
+
+`abs`, `add`, `div`, `mult`, `sub`, `log`,
+`pow`, `mod`, `ceil`, `floor`, `sin`, `asin`,
+`sinh`, `cos`, `acos`, `cosh`, `tan`, `atan`,
+`tanh`, `round`, `precision`, `sqrt`, `cbrt`
+

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1ed4e226/solr/solr-ref-guide/src/statistics.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/statistics.adoc b/solr/solr-ref-guide/src/statistics.adoc
new file mode 100644
index 0000000..74da76b
--- /dev/null
+++ b/solr/solr-ref-guide/src/statistics.adoc
@@ -0,0 +1,575 @@
+= Statistics
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+This section of the user guide covers the core statistical functions
+available in math expressions.
+
+== Descriptive Statistics
+
+The `describe` function can be used to return descriptive statistics about a
+numeric array. The `describe` function returns a single *tuple* with name/value
+pairs containing descriptive statistics.
+
+Below is a simple example that selects a random sample of documents,
+vectorizes the *price_f* field in the result set and uses the `describe` function to
+return descriptive statistics about the vector:
+
+[source,text]
+----
+let(a=random(collection1, q="*:*", rows="1500", fl="price_f"),
+    b=col(a, price_f),
+    c=describe(b))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "c": {
+          "sumsq": 4999.041975263254,
+          "max": 0.99995726,
+          "var": 0.08344429493940454,
+          "geometricMean": 0.36696588922559575,
+          "sum": 7497.460565552007,
+          "kurtosis": -1.2000739963006035,
+          "N": 15000,
+          "min": 0.00012338161,
+          "mean": 0.49983070437013266,
+          "popVar": 0.08343873198640858,
+          "skewness": -0.001735537500095477,
+          "stdev": 0.28886726179926403
+        }
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 305
+      }
+    ]
+  }
+}
+----
+
+== Histograms and Frequency Tables
+
+Histograms and frequency tables are are tools for understanding the distribution
+of a random variable.
+
+The `hist` function creates a histogram designed for usage with continuous data. The
+`freqTable` function creates a frequency table for use with discrete data.
+
+=== histograms
+
+Below is an example that selects a random sample, creates a vector from the
+result set and uses the `hist` function to return a histogram with 5 bins.
+The `hist` function returns a list of tuples with summary statistics for each bin.
+
+[source,text]
+----
+let(a=random(collection1, q="*:*", rows="15000", fl="price_f"),
+    b=col(a, price_f),
+    c=hist(b, 5))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "c": [
+          {
+            "prob": 0.2057939717603699,
+            "min": 0.000010371208,
+            "max": 0.19996578,
+            "mean": 0.10010319358402578,
+            "var": 0.003366805016271609,
+            "cumProb": 0.10293732468049072,
+            "sum": 309.0185585938884,
+            "stdev": 0.058024176136086666,
+            "N": 3087
+          },
+          {
+            "prob": 0.19381868629885585,
+            "min": 0.20007741,
+            "max": 0.3999073,
+            "mean": 0.2993590803885827,
+            "var": 0.003401644034068929,
+            "cumProb": 0.3025295802728267,
+            "sum": 870.5362057700005,
+            "stdev": 0.0583236147205309,
+            "N": 2908
+          },
+          {
+            "prob": 0.20565789836690007,
+            "min": 0.39995712,
+            "max": 0.5999038,
+            "mean": 0.4993620963792545,
+            "var": 0.0033158364923609046,
+            "cumProb": 0.5023006239697967,
+            "sum": 1540.5320673300018,
+            "stdev": 0.05758330046429177,
+            "N": 3085
+          },
+          {
+            "prob": 0.19437108496008693,
+            "min": 0.6000449,
+            "max": 0.79973197,
+            "mean": 0.7001752711861512,
+            "var": 0.0033895105082360185,
+            "cumProb": 0.7026537198687285,
+            "sum": 2042.4112660500066,
+            "stdev": 0.058219502816805456,
+            "N": 2917
+          },
+          {
+            "prob": 0.20019582213899467,
+            "min": 0.7999126,
+            "max": 0.99987316,
+            "mean": 0.8985428275824184,
+            "var": 0.003312360017780078,
+            "cumProb": 0.899450457219298,
+            "sum": 2698.3241112299997,
+            "stdev": 0.05755310606544253,
+            "N": 3003
+          }
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 322
+      }
+    ]
+  }
+}
+----
+
+The `col` function can be used to *vectorize* a column of data from the list of tuples
+returned by the `hist` function.
+
+In the example below, the *N* field,
+which is the number of observations in the each bin, is returned as a vector.
+
+[source,text]
+----
+let(a=random(collection1, q="*:*", rows="15000", fl="price_f"),
+     b=col(a, price_f),
+     c=hist(b, 11),
+     d=col(c, N))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "d": [
+          1387,
+          1396,
+          1391,
+          1357,
+          1384,
+          1360,
+          1367,
+          1375,
+          1307,
+          1310,
+          1366
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 307
+      }
+    ]
+  }
+}
+----
+
+=== Frequency Tables
+
+The `freqTable` function returns a frequency distribution for a discrete data set.
+The `freqTable` function doesn't create bins like the histogram. Instead it counts
+the occurrence of each discrete data value and returns a list of tuples with the
+frequency statistics for each value. Fields from a frequency table can be vectorized using
+using the `col` function in the same manner as a histogram.
+
+Below is a simple example of a frequency table built from a random sample of
+a discrete variable.
+
+[source,text]
+----
+let(a=random(collection1, q="*:*", rows="15000", fl="day_i"),
+     b=col(a, day_i),
+     c=freqTable(b))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+  "result-set": {
+    "docs": [
+      {
+        "c": [
+          {
+            "pct": 0.0318,
+            "count": 477,
+            "cumFreq": 477,
+            "cumPct": 0.0318,
+            "value": 0
+          },
+          {
+            "pct": 0.033133333333333334,
+            "count": 497,
+            "cumFreq": 974,
+            "cumPct": 0.06493333333333333,
+            "value": 1
+          },
+          {
+            "pct": 0.03426666666666667,
+            "count": 514,
+            "cumFreq": 1488,
+            "cumPct": 0.0992,
+            "value": 2
+          },
+          {
+            "pct": 0.0346,
+            "count": 519,
+            "cumFreq": 2007,
+            "cumPct": 0.1338,
+            "value": 3
+          },
+          {
+            "pct": 0.03133333333333333,
+            "count": 470,
+            "cumFreq": 2477,
+            "cumPct": 0.16513333333333333,
+            "value": 4
+          },
+          {
+            "pct": 0.03333333333333333,
+            "count": 500,
+            "cumFreq": 2977,
+            "cumPct": 0.19846666666666668,
+            "value": 5
+          }
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 281
+      }
+    ]
+  }
+}
+----
+
+== Percentiles
+
+The `percentile` function returns the estimated value for a specific percentile in
+a sample set. The example below returns the estimation for the 95th percentile
+of the *price_f* field.
+
+[source,text]
+----
+let(a=random(collection1, q="*:*", rows="15000", fl="price_f"),
+     b=col(a, price_f),
+     c=percentile(b, 95))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+ {
+   "result-set": {
+     "docs": [
+       {
+         "c": 312.94
+       },
+       {
+         "EOF": true,
+         "RESPONSE_TIME": 286
+       }
+     ]
+   }
+ }
+----
+
+== Covariance and Correlation
+
+Covariance and Correlation measure how random variables move
+together.
+
+=== Covariance and Covariance Matrices
+
+The `cov` function calculates the covariance of two sample sets of data.
+
+In the example below covariance is calculated for two numeric
+arrays.
+
+The example below uses arrays created by the `array` function. Its important to note that
+vectorized data from Solr Cloud collections can be used with any function that
+operates on arrays.
+
+[source,text]
+----
+let(a=array(1, 2, 3, 4, 5),
+    b=array(100, 200, 300, 400, 500),
+    c=cov(a, b))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+ {
+   "result-set": {
+     "docs": [
+       {
+         "c": 0.9484775349999998
+       },
+       {
+         "EOF": true,
+         "RESPONSE_TIME": 286
+       }
+     ]
+   }
+ }
+----
+
+If a matrix is passed to the `cov` function it will automatically compute a covariance
+matrix for the columns of the matrix.
+
+Notice in the example three numeric arrays are added as rows
+in a matrix. The matrix is then transposed to turn the rows into
+columns, and the covariance matrix is computed for the columns of the
+matrix.
+
+[source,text]
+----
+let(a=array(1, 2, 3, 4, 5),
+     b=array(100, 200, 300, 400, 500),
+     c=array(30, 40, 80, 90, 110),
+     d=transpose(matrix(a, b, c)),
+     e=cov(d))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+ {
+   "result-set": {
+     "docs": [
+       {
+         "e": [
+           [
+             2.5,
+             250,
+             52.5
+           ],
+           [
+             250,
+             25000,
+             5250
+           ],
+           [
+             52.5,
+             5250,
+             1150
+           ]
+         ]
+       },
+       {
+         "EOF": true,
+         "RESPONSE_TIME": 2
+       }
+     ]
+   }
+ }
+----
+
+=== Correlation and Correlation Matrices
+
+Correlation is measure of covariance that has been scaled between
+-1 and 1.
+
+Three correlation types are supported:
+
+* *pearsons* (default)
+* *kendalls*
+* *spearmans*
+
+The type of correlation is specified by adding the *type* named parameter in the
+function call. The example below demonstrates the use of the *type*
+named parameter.
+
+[source,text]
+----
+let(a=array(1, 2, 3, 4, 5),
+    b=array(100, 200, 300, 400, 5000),
+    c=corr(a, b, type=spearmans))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+ {
+   "result-set": {
+     "docs": [
+       {
+         "c": 0.7432941462471664
+       },
+       {
+         "EOF": true,
+         "RESPONSE_TIME": 0
+       }
+     ]
+   }
+ }
+----
+
+Like the `cov` function, the `corr` function automatically builds a correlation matrix
+if a matrix is passed as a parameter. The correlation matrix is built by correlating the columns
+of the matrix passed in.
+
+== Statistical Inference Tests
+
+Statistical inference tests test a hypothesis on *random samples* and return p-values which
+can be used to infer the reliability of the test for the entire population.
+
+The following statistical inference tests are available:
+
+* `anova`: One-Way-Anova tests if there is a statistically significant difference in the
+means of two or more random samples.
+
+* `ttest`: The T-test tests if there is a statistically significant difference in the means of two
+random samples.
+
+* `pairedTtest`: The paired t-test tests if there is a statistically significant difference
+in the means of two random samples with paired data.
+
+* `gTestDataSet`: The G-test tests if two samples of binned discrete data were drawn
+from the same population.
+
+* `chiSquareDataset`: The Chi-Squared test tests if two samples of binned discrete data were
+drawn from the same population.
+
+* `mannWhitney`: The Mann-Whitney test is a non-parametric test that tests if two
+samples of continuous were pulled
+from the same population. The Mann-Whitney test is often used instead of the T-test when the
+underlying assumptions of the T-test are not
+met.
+
+* `ks`: The Kolmogorov-Smirnov test tests if two samples of continuous data were drawn from
+the same distribution.
+
+Below is a simple example of a T-test performed on two random samples.
+The returned p-value of .93 means we can accept the null hypothesis
+that the two samples do not have statistically significantly differences in the means.
+
+[source,text]
+----
+let(a=random(collection1, q="*:*", rows="1500", fl="price_f"),
+    b=random(collection1, q="*:*", rows="1500", fl="price_f"),
+    c=col(a, price_f),
+    d=col(b, price_f),
+    e=ttest(c, d))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "e": {
+          "p-value": 0.9350135639249795,
+          "t-statistic": 0.081545541074817
+        }
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 48
+      }
+    ]
+  }
+}
+----
+
+== Transformations
+
+In statistical analysis its often useful to transform data sets before performing
+statistical calculations. The statistical function library includes the following
+commonly used transformations:
+
+* `rank`: Returns a numeric array with the rank-transformed value of each element of the original
+array.
+
+* `log`: Returns a numeric array with the natural log of each element of the original array.
+
+* `sqrt`: Returns a numeric array with the square root of each element of the original array.
+
+* `cbrt`: Returns a numeric array with the cube root of each element of the original array.
+
+Below is an example of a ttest performed on log transformed data sets:
+
+[source,text]
+----
+let(a=random(collection1, q="*:*", rows="1500", fl="price_f"),
+    b=random(collection1, q="*:*", rows="1500", fl="price_f"),
+    c=log(col(a, price_f)),
+    d=log(col(b, price_f)),
+    e=ttest(c, d))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "e": {
+          "p-value": 0.9655110070265056,
+          "t-statistic": -0.04324265449471238
+        }
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 58
+      }
+    ]
+  }
+}
+----

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1ed4e226/solr/solr-ref-guide/src/streaming-expressions.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/streaming-expressions.adoc b/solr/solr-ref-guide/src/streaming-expressions.adoc
index ed37ce1..ccf3bf3 100644
--- a/solr/solr-ref-guide/src/streaming-expressions.adoc
+++ b/solr/solr-ref-guide/src/streaming-expressions.adoc
@@ -1,5 +1,5 @@
 = Streaming Expressions
-:page-children: stream-source-reference, stream-decorator-reference, stream-evaluator-reference, statistical-programming, graph-traversal
+:page-children: stream-source-reference, stream-decorator-reference, stream-evaluator-reference, statistical-programming, math-expressions, graph-traversal
 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1ed4e226/solr/solr-ref-guide/src/term-vectors.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/term-vectors.adoc b/solr/solr-ref-guide/src/term-vectors.adoc
new file mode 100644
index 0000000..cbd21a0
--- /dev/null
+++ b/solr/solr-ref-guide/src/term-vectors.adoc
@@ -0,0 +1,237 @@
+= Text Analysis and Term Vectors
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+TF-IDF term vectors are often used to represent text documents when performing text mining
+and machine learning operations. This section of the user guide describes how to
+use math expressions to perform text analysis and create TF-IDF term vectors.
+
+== Text Analysis
+
+The `analyze` function applies a Solr analyzer to a text field and returns the tokens
+emitted by the analyzer in an array. Any analyzer chain that is attached to a field in Solr's
+schema can be used with the `analyze` function.
+
+In the example below, the text "hello world" is analyzed using the analyzer chain attached to the *subject* field in
+the schema. The *subject* field is defined as the field type *text_general* and the text is analyzed using the
+analysis chain configured for the *text_general* field type.
+
+[source,text]
+----
+analyze("hello world", subject)
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "return-value": [
+          "hello",
+          "world"
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+=== Annotating Documents
+
+The `analyze` function can be used inside of a `select` function to annotate documents with the tokens
+generated by the analysis.
+
+The example below is performing a `search` in collection1. Each tuple returned by the `search`
+contains an *id* and *subject*. For each tuple, the
+`select` function is selecting the *id* field and calling the `analyze` function on the *subject* field.
+The analyzer chain specified by the *subject_bigram* field is configured to perform a bigram analysis.
+The tokens generated by the `analyze` function are added to each tuple in a field called `terms`.
+
+Notice in the output that an array of bigram terms have been added to the tuples.
+
+[source,text]
+----
+select(search(collection1, q="*:*", fl="id, subject", sort="id asc"),
+       id,
+       analyze(subject, subject_bigram) as terms)
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "terms": [
+          "text analysis",
+          "analysis example"
+        ],
+        "id": "1"
+      },
+      {
+        "terms": [
+          "example number",
+          "number two"
+        ],
+        "id": "2"
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 4
+      }
+    ]
+  }
+}
+----
+
+== Term Vectors
+
+The `termVectors` function can be used to build *TF-IDF*
+term vectors from the terms generated by the `analyze` function.
+
+The `termVectors` function operates over a list of tuples that contain a field
+called *id* and a field called *terms*. Notice
+that this is the exact output structure of the *document annotation* example above.
+
+The `termVectors` function builds a *matrix* from the list of tuples. There is *row* in the
+matrix for each tuple in the list. There is a *column* in the matrix for each term in the *terms*
+field.
+
+The example below builds on the *document annotation* example.
+The list of tuples are stored in variable *a*. The `termVectors` function
+operates over variable *a* and builds a matrix with *2 rows* and *4 columns*.
+
+The `termVectors` function also sets the *row* and *column* labels of the term vectors matrix.
+The row labels are the document ids and the
+column labels are the terms.
+
+In the example below, the `getRowLabels` and `getColumnLabels` functions return
+the row and column labels which are then stored in variables *c* and *d*.
+The *echo* parameter is echoing variables *c* and *d*, so the output includes
+the row and column labels.
+
+[source,text]
+----
+let(echo="c, d",
+    a=select(search(collection3, q="*:*", fl="id, subject", sort="id asc"),
+             id,
+             analyze(subject, subject_bigram) as terms),
+    b=termVectors(a, minTermLength=4, minDocFreq=0, maxDocFreq=1),
+    c=getRowLabels(b),
+    d=getColumnLabels(b))
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "c": [
+          "1",
+          "2"
+        ],
+        "d": [
+          "analysis example",
+          "example number",
+          "number two",
+          "text analysis"
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 5
+      }
+    ]
+  }
+}
+----
+
+=== TF-IDF Values
+
+The values within the term vectors matrix are the TF-IDF values for each term in each document. The
+example below shows the values of the matrix.
+
+[source,text]
+----
+let(a=select(search(collection3, q="*:*", fl="id, subject", sort="id asc"),
+             id,
+             analyze(subject, subject_bigram) as terms),
+    b=termVectors(a, minTermLength=4, minDocFreq=0, maxDocFreq=1))
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "b": [
+          [
+            1.4054651081081644,
+            0,
+            0,
+            1.4054651081081644
+          ],
+          [
+            0,
+            1.4054651081081644,
+            1.4054651081081644,
+            0
+          ]
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 5
+      }
+    ]
+  }
+}
+----
+
+=== Limiting the Noise
+
+One of the key challenges when with working term vectors is that text often has a significant amount of noise
+which can obscure the important terms in the data. The `termVectors` function has several parameters
+designed to filter out the less meaningful terms. This is also important because eliminating
+the noisy terms helps keep the term vector matrix small enough to fit comfortably in memory.
+
+There are four parameters designed to filter noisy terms from the term vector matrix:
+
+* *minTermLength*: The minimum term length required to include the term in the matrix.
+* *minDocFreq*: The minimum *percentage* (0 to 1) of documents the term must appear in to be included in the index.
+* *maxDocFreq*: The maximum *percentage* (0 to 1) of documents the term can appear in to be included in the index.
+* *exclude*: A comma delimited list of strings used to exclude terms. If a term contains any of the exclude strings that
+term will be excluded from the term vector.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1ed4e226/solr/solr-ref-guide/src/time-series.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/time-series.adoc b/solr/solr-ref-guide/src/time-series.adoc
new file mode 100644
index 0000000..e765270
--- /dev/null
+++ b/solr/solr-ref-guide/src/time-series.adoc
@@ -0,0 +1,431 @@
+= Time Series
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+This section of the user guide provides an overview of time series *aggregation*,
+*smoothing* and *differencing*.
+
+== Time Series Aggregation
+
+The `timeseries` function performs fast, distributed time
+series aggregation leveraging Solr's builtin faceting and date math capabilities.
+
+The example below performs a monthly time series aggregation:
+
+[source,text]
+----
+timeseries(collection1,
+           q=*:*,
+           field="recdate_dt",
+           start="2012-01-20T17:33:18Z",
+           end="2012-12-20T17:33:18Z",
+           gap="+1MONTH",
+           format="YYYY-MM",
+           count(*))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "recdate_dt": "2012-01",
+        "count(*)": 8703
+      },
+      {
+        "recdate_dt": "2012-02",
+        "count(*)": 8648
+      },
+      {
+        "recdate_dt": "2012-03",
+        "count(*)": 8621
+      },
+      {
+        "recdate_dt": "2012-04",
+        "count(*)": 8533
+      },
+      {
+        "recdate_dt": "2012-05",
+        "count(*)": 8792
+      },
+      {
+        "recdate_dt": "2012-06",
+        "count(*)": 8598
+      },
+      {
+        "recdate_dt": "2012-07",
+        "count(*)": 8679
+      },
+      {
+        "recdate_dt": "2012-08",
+        "count(*)": 8469
+      },
+      {
+        "recdate_dt": "2012-09",
+        "count(*)": 8637
+      },
+      {
+        "recdate_dt": "2012-10",
+        "count(*)": 8536
+      },
+      {
+        "recdate_dt": "2012-11",
+        "count(*)": 8785
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 16
+      }
+    ]
+  }
+}
+----
+
+== Vectorizing the Time Series
+
+Before a time series result can be operated on by math expressions
+ the data will need to be vectorized. Specifically
+in the example above, the aggregation field count(*) will need to by moved into an array.
+As described in the Streams and Vectorization section of the user guide, the `col` function can be used
+to copy a numeric column from a list of tuples into an array.
+
+The expression below demonstrates the vectorization of the count(*) field.
+
+[source,text]
+----
+let(a=timeseries(collection1,
+                 q=*:*,
+                 field="test_dt",
+                 start="2012-01-20T17:33:18Z",
+                 end="2012-12-20T17:33:18Z",
+                 gap="+1MONTH",
+                 format="YYYY-MM",
+                 count(*)),
+    b=col(a, count(*)))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "b": [
+          8703,
+          8648,
+          8621,
+          8533,
+          8792,
+          8598,
+          8679,
+          8469,
+          8637,
+          8536,
+          8785
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 5
+      }
+    ]
+  }
+}
+----
+
+== Smoothing
+
+Time series smoothing is often used to remove the noise from a time series and help
+spot the underlying trends.
+The math expressions library has three *sliding window* approaches
+for time series smoothing. The *sliding window* approaches use a summary value
+from a sliding window of the data to calculate a new set of smoothed data points.
+
+The three *sliding window* functions are lagging indicators, which means
+they don't start to move in the direction of the trend until the trend effects
+the summary value of the sliding window. Because of this lagging quality these smoothing
+functions are often used to confirm the direction of the trend.
+
+=== Moving Average
+
+The `movingAvg` function computes a simple moving average over a sliding window of data.
+The example below generates a time series, vectorizes the count(*) field and computes the
+moving average with a window size of 3.
+
+The moving average function returns an array that is of shorter length
+then the original data set. This is because results are generated only when a full window of data
+is available for computing the average. With a window size of three the moving average will
+begin generating results at the 3rd value. The prior values are not included in the result.
+
+This is true for all the sliding window functions.
+
+[source,text]
+----
+let(a=timeseries(collection1,
+                 q=*:*,
+                 field="test_dt",
+                 start="2012-01-20T17:33:18Z",
+                 end="2012-12-20T17:33:18Z",
+                 gap="+1MONTH",
+                 format="YYYY-MM",
+                 count(*)),
+    b=col(a, count(*)),
+    c=movingAvg(b, 3))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "c": [
+          8657.333333333334,
+          8600.666666666666,
+          8648.666666666666,
+          8641,
+          8689.666666666666,
+          8582,
+          8595,
+          8547.333333333334,
+          8652.666666666666
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 7
+      }
+    ]
+  }
+}
+----
+
+=== Exponential Moving Average
+
+The `expMovingAvg` function uses a different formula for computing the moving average that
+responds faster to changes in the underlying data. This means that it is
+less of a lagging indicator then the simple moving average.
+
+Below is an example that computes an exponential moving average:
+
+[source,text]
+----
+let(a=timeseries(collection1, q=*:*,
+                 field="test_dt",
+                 start="2012-01-20T17:33:18Z",
+                 end="2012-12-20T17:33:18Z",
+                 gap="+1MONTH",
+                 format="YYYY-MM",
+                 count(*)),
+    b=col(a, count(*)),
+    c=expMovingAvg(b, 3))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "c": [
+          8657.333333333334,
+          8595.166666666668,
+          8693.583333333334,
+          8645.791666666668,
+          8662.395833333334,
+          8565.697916666668,
+          8601.348958333334,
+          8568.674479166668,
+          8676.837239583334
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 5
+      }
+    ]
+  }
+}
+----
+
+=== Moving Median
+
+The `movingMedian` function uses the median of the sliding window rather than the average.
+In many cases the moving median will be more *robust* to outliers then moving averages.
+
+Below is an example computing the moving median:
+
+[source,text]
+----
+let(a=timeseries(collection1,
+                 q=*:*,
+                 field="test_dt",
+                 start="2012-01-20T17:33:18Z",
+                 end="2012-12-20T17:33:18Z",
+                 gap="+1MONTH",
+                 format="YYYY-MM",
+                 count(*)),
+    b=col(a, count(*)),
+    c=movingMedian(b, 3))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "c": [
+          8648,
+          8621,
+          8621,
+          8598,
+          8679,
+          8598,
+          8637,
+          8536,
+          8637
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 7
+      }
+    ]
+  }
+}
+----
+
+== Differencing
+
+Differencing is often used to remove the
+trend or seasonality from a time series. This is known as making a time series
+*stationary*.
+
+=== First Difference
+
+The actual technique of differencing is to use the difference between values rather then the
+original values. The *first difference* takes the difference between a value and the value
+that came directly before it. The first difference is often used to remove the trend
+from a time series.
+
+In the example below, the `diff` function computes the first difference of a time series.
+The result array length is one value smaller then the original array.
+This is because the `diff` function only returns a result for values
+where the prior value has been subtracted.
+
+[source,text]
+----
+let(a=timeseries(collection1,
+                 q=*:*,
+                 field="test_dt",
+                 start="2012-01-20T17:33:18Z",
+                 end="2012-12-20T17:33:18Z",
+                 gap="+1MONTH",
+                 format="YYYY-MM",
+                 count(*)),
+    b=col(a, count(*)),
+    c=diff(b))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "c": [
+          -55,
+          -27,
+          -88,
+          259,
+          -194,
+          81,
+          -210,
+          168,
+          -101,
+          249
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 11
+      }
+    ]
+  }
+}
+----
+
+=== Lagged Differences
+
+The `diff` function has an optional second parameter to specify a lag in the difference.
+If a lag is specified the difference is taken between a value and the value at a specified
+lag in the past. Lagged differences are often used to remove seasonality from a time series.
+
+The simple example below demonstrates how lagged differencing works.
+Notice that the array in the example follows a simple repeated pattern. This type of pattern
+is often displayed with seasonality. In this example we can remove this pattern using
+the `diff` function with a lag of 4. This will subtract the value lagging four indexes
+behind the current index. Notice that result set size is the original array size minus the lag.
+This is because the `diff` function only returns results for values where the lag of 4
+is possible to compute.
+
+[source,text]
+----
+let(a=array(1,2,5,2,1,2,5,2,1,2,5),
+     b=diff(a, 4))
+----
+
+Expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "b": [
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1ed4e226/solr/solr-ref-guide/src/variables.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/variables.adoc b/solr/solr-ref-guide/src/variables.adoc
new file mode 100644
index 0000000..7e12e75
--- /dev/null
+++ b/solr/solr-ref-guide/src/variables.adoc
@@ -0,0 +1,147 @@
+= Variables
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+== The Let Expressions
+
+The `let` expression sets variables and returns
+the value of the last variable by default. The output of any streaming expression
+or math expression can be set to a variable.
+
+Below is a simple example setting three variables *a*, *b*
+and *c*. Variables *a* and *b* are set to arrays. The variable *c* is set
+to the output of the `ebeAdd` function which performs element-by-element
+addition of the two arrays.
+
+Notice that the last variable, *c*, is returned.
+
+[source,text]
+----
+let(a=array(1, 2, 3),
+    b=array(10, 20, 30),
+    c=ebeAdd(a, b))
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "c": [
+          11,
+          22,
+          33
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 4
+      }
+    ]
+  }
+}
+----
+
+== Echoing Variables
+
+All variables can be output by setting the *echo* variable to *true*.
+
+[source,text]
+----
+let(echo=true,
+    a=array(1, 2, 3),
+    b=array(10, 20, 30),
+    c=ebeAdd(a, b))
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "a": [
+          1,
+          2,
+          3
+        ],
+        "b": [
+          10,
+          20,
+          30
+        ],
+        "c": [
+          11,
+          22,
+          33
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+A specific set of variables can be echoed by providing a comma delimited
+list of variables to the echo parameter.
+
+[source,text]
+----
+let(echo="a,b",
+    a=array(1, 2, 3),
+    b=array(10, 20, 30),
+    c=ebeAdd(a, b))
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "a": [
+          1,
+          2,
+          3
+        ],
+        "b": [
+          10,
+          20,
+          30
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1ed4e226/solr/solr-ref-guide/src/vector-math.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/vector-math.adoc b/solr/solr-ref-guide/src/vector-math.adoc
new file mode 100644
index 0000000..22d610f
--- /dev/null
+++ b/solr/solr-ref-guide/src/vector-math.adoc
@@ -0,0 +1,343 @@
+= Vector Math
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+This section of the user guide covers vector math and
+vector manipulation functions.
+
+== Arrays
+
+Arrays can be created with the `array` function.
+
+For example the expression below creates a numeric array with
+three elements:
+
+[source,text]
+----
+array(1, 2, 3)
+----
+
+When this expression is sent to the /stream handler it responds with
+a json array.
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "return-value": [
+          1,
+          2,
+          3
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+== Array Operations
+
+Arrays can be passed as parameters to functions that operate on arrays.
+
+For example, an array can be reversed with the `rev` function:
+
+[source,text]
+----
+rev(array(1, 2, 3))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "return-value": [
+          3,
+          2,
+          1
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+Another example is the `length` function,
+which returns the length of an array:
+
+[source,text]
+----
+length(array(1, 2, 3))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "return-value": 3
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+A slice of an array can be taken with the `copyOfRange` function, which
+copies elements of an array from a start and end range.
+
+[source,text]
+----
+copyOfRange(array(1,2,3,4,5,6), 1, 4)
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "return-value": [
+          2,
+          3,
+          4
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+== Vector Summarizations and Norms
+
+There are a set of functions that perform
+summerizations and return norms of arrays. These functions
+operate over an array and return a single
+value. The following vector summarizations and norm functions are available:
+`mult`, `add`, `sumSq`, `mean`, `l1norm`, `l2norm`, `linfnorm`.
+
+The example below is using the `mult` function,
+which multiples all the values of an array.
+
+[source,text]
+----
+mult(array(2,4,8))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "return-value": 64
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+The vector norm functions provide different formulas for calculating vector magnitude.
+
+The example below calculates the *l2norm* of an array.
+
+[source,text]
+----
+l2norm(array(2,4,8))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "return-value": 9.16515138991168
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+== Scalar Vector Math
+
+Scalar vector math functions add, subtract, multiple or divide a scalar value with every value in a vector.
+The following functions perform these operations: `scalarAdd`, `scalarSubtract`, `scalarMultiply`
+and `scalarDivide`.
+
+
+Below is an example of the `scalarMultiply` function, which multiplies the scalar value 3 with
+every value of an array.
+
+[source,text]
+----
+scalarMultiply(3, array(1,2,3))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "return-value": [
+          3,
+          6,
+          9
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+== Element-By-Element Vector Math
+
+Two vectors can be added, subtracted, multiplied and divided using element-by-element
+vector math functions. The following element-by-element vector math functions are:
+`ebeAdd`, `ebeSubtract`, `ebeMultiply`, `ebeDivide`.
+
+The expression below performs the element-by-element subtraction of two arrays.
+
+[source,text]
+----
+ebeSubtract(array(10, 15, 20), array(1,2,3))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "return-value": [
+          9,
+          13,
+          17
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 5
+      }
+    ]
+  }
+}
+----
+
+== Dot Product and Cosine Similarity
+
+The `dotProduct` and `cosineSimilarity` functions are often used as similarity measures between two
+sparse vectors. The `dotProduct` is a measure of both angle and magnitude while `cosineSimilarity`
+is a measure only of angle.
+
+Below is an example of the `dotProduct` function:
+
+[source,text]
+----
+dotProduct(array(2,3,0,0,0,1), array(2,0,1,0,0,3))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "return-value": 7
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 15
+      }
+    ]
+  }
+}
+----
+
+Below is an example of the `cosineSimilarity` function:
+
+[source,text]
+----
+cosineSimilarity(array(2,3,0,0,0,1), array(2,0,1,0,0,3))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "return-value": 0.5
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 7
+      }
+    ]
+  }
+}
+----
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1ed4e226/solr/solr-ref-guide/src/vectorization.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/vectorization.adoc b/solr/solr-ref-guide/src/vectorization.adoc
new file mode 100644
index 0000000..b01dcc8
--- /dev/null
+++ b/solr/solr-ref-guide/src/vectorization.adoc
@@ -0,0 +1,243 @@
+= Streams and Vectorization
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+This section of the user guide explores techniques
+for retrieving streams of data from Solr and vectorizing the
+*numeric* fields.
+
+The next chapter of the user guide covers
+Text Analysis and Term Vectors which describes how to
+vectorize *text* fields.
+
+== Streams
+
+Streaming Expressions has a wide range of stream sources that can be used to
+retrieve data from Solr Cloud collections. Math expressions can be used
+to vectorize and analyze the results sets.
+
+Below are some of the key stream sources:
+
+* *random*: Random sampling is widely used in statistics, probability and machine learning.
+The `random` function returns a random sample of search results that match a
+query. The random samples can be vectorized and operated on by math expressions and the results
+can be used to describe and make inferences about the entire population.
+
+* *timeseries*: The `timeseries`
+expression provides fast distributed time series aggregations, which can be
+vectorized and analyzed with math expressions.
+
+* *knnSearch*: K-nearest neighbor is a core machine learning algorithm. The `knnSearch`
+function is a specialized knn algorithm optimized to find the k-nearest neighbors of a document in
+a distributed index. Once the nearest neighbors are retrieved they can be vectorized
+and operated on by machine learning and text mining algorithms.
+
+* *sql*: SQL is the primary query language used by data scientists. The `sql` function supports
+data retrieval using a subset of SQL which includes both full text search and
+fast distributed aggregations. The result sets can then be vectorized and operated
+on by math expressions.
+
+* *jdbc*: The `jdbc` function allows data from any JDBC compliant data source to be combined with
+streams originating from Solr. Result sets from outside data sources can be vectorized and operated
+on by math expressions in the same manner as result sets originating from Solr.
+
+* *topic*: Messaging is an important foundational technology for large scale computing. The `topic`
+function provides publish/subscribe messaging capabilities by treating
+Solr Cloud as a distributed message queue. Topics are extremely powerful
+because they allow subscription by query. Topics can be use to support a broad set of
+use cases including bulk text mining operations and AI alerting.
+
+* *nodes*: Graph queries are frequently used by recommendation engines and are an important
+machine learning tool. The `nodes` function provides fast, distributed, breadth
+first graph traversal over documents in a Solr Cloud collection. The node sets collected
+by the `nodes` function can be operated on by statistical and machine learning expressions to
+gain more insight into the graph.
+
+* *search*: Ranked search results are a powerful tool for finding the most relevant
+documents from a large document corpus. The `search` expression
+returns the top N ranked search results that match any
+Solr query, including geo-spatial queries. The smaller set of relevant
+documents can then be explored with statistical, machine learning and
+text mining expressions to gather insights about the data set.
+
+== Assigning Streams to Variables
+
+The output of any streaming expression can be set to a variable.
+Below is a very simple example using the `random` function to fetch
+three random samples from collection1. The random samples are returned
+as *tuples*, which contain name/value pairs.
+
+
+[source,text]
+----
+let(a=random(collection1, q="*:*", rows="3", fl="price_f"))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "a": [
+          {
+            "price_f": 0.7927976
+          },
+          {
+            "price_f": 0.060795486
+          },
+          {
+            "price_f": 0.55128294
+          }
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 11
+      }
+    ]
+  }
+}
+----
+
+== Creating a Vector with the *col* Function
+
+The `col` function iterates over a list of tuples and copies the values
+from a specific column into an *array*.
+
+The output of the `col` function is an numeric array that can be set to a
+variable and operated on by math expressions.
+
+Below is an example of the `col` function:
+
+[source,text]
+----
+let(a=random(collection1, q="*:*", rows="3", fl="price_f"),
+    b=col(a, price_f))
+----
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "b": [
+          0.42105234,
+          0.85237443,
+          0.7566981
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 9
+      }
+    ]
+  }
+}
+----
+
+== Applying Math Expressions to the Vector
+
+Once a vector has been created any math expression that operates on vectors
+can be applied. In the example below the `mean` function is applied to
+the vector assigned to variable *b*.
+
+[source,text]
+----
+let(a=random(collection1, q="*:*", rows="15000", fl="price_f"),
+    b=col(a, price_f),
+    c=mean(b))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "c": 0.5016035594638814
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 306
+      }
+    ]
+  }
+}
+----
+
+== Creating Matrices
+
+Matrices can be created by vectorizing multiple numeric fields
+and adding them to a matrix. The matrices can then be operated on by
+any math expression that operates on matrices.
+
+Note that this section deals with the creation of matrices
+from numeric data. The next chapter of the user guide covers
+Text Analysis and Term Vectors which describes how to build TF-IDF
+term vector matrices from text fields.
+
+Below is a simple example where four random samples are taken
+from different sub-populations in the data. The *price_f* field of
+each random sample is
+vectorized and the vectors are added as rows to a matrix.
+Then the `sumRows`
+function is applied to the matrix to return a vector containing
+the sum of each row.
+
+[source,text]
+----
+let(a=random(collection1, q="market:A", rows="5000", fl="price_f"),
+    b=random(collection1, q="market:B", rows="5000", fl="price_f"),
+    c=random(collection1, q="market:C", rows="5000", fl="price_f"),
+    d=random(collection1, q="market:D", rows="5000", fl="price_f"),
+    e=col(a, price_f),
+    f=col(b, price_f),
+    g=col(c, price_f),
+    h=col(d, price_f),
+    i=matrix(e, f, g, h),
+    j=sumRows(i))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "j": [
+          154390.1293375,
+          167434.89453,
+          159293.258493,
+          149773.42769,
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 9
+      }
+    ]
+  }
+}
+----
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1ed4e226/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/FieldValueEvaluator.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/FieldValueEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/FieldValueEvaluator.java
index fac4274..a12a74e 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/FieldValueEvaluator.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/FieldValueEvaluator.java
@@ -31,10 +31,12 @@ public class FieldValueEvaluator extends SourceEvaluator {
   private static final long serialVersionUID = 1L;
   
   private String fieldName;
+  private boolean literal;
   
   public FieldValueEvaluator(String fieldName) {
-    if(fieldName.startsWith("'") && fieldName.endsWith("'") && fieldName.length() > 1){
+    if(fieldName.startsWith("\"") && fieldName.endsWith("\"") && fieldName.length() > 1){
       fieldName = fieldName.substring(1, fieldName.length() - 1);
+      literal = true;
     }
     
     this.fieldName = fieldName;
@@ -42,6 +44,10 @@ public class FieldValueEvaluator extends SourceEvaluator {
   
   @Override
   public Object evaluate(Tuple tuple) throws IOException {
+    if(literal) {
+      return fieldName;
+    }
+
     Object value = tuple.get(fieldName);
     
     // This is somewhat radical.
@@ -84,10 +90,6 @@ public class FieldValueEvaluator extends SourceEvaluator {
       }
     }
 
-    if(value == null) {
-      return fieldName;
-    }
-
     return value;
   }
   


[2/3] lucene-solr:master: SOLR-11947: Squashed commit of the following ref guide changes:

Posted by jb...@apache.org.
SOLR-11947: Squashed commit of the following ref guide changes:

commit 61053f2fe373bff0b451f549e063550f08ecdac1
Author: Joel Bernstein <jb...@apache.org>
Date:   Mon Mar 26 12:44:12 2018 -0400

    SOLR-11947: Fix orphaned files

commit 42302073bf61fde134caeff71b6db3978e113b4d
Author: Joel Bernstein <jb...@apache.org>
Date:   Mon Mar 26 12:27:26 2018 -0400

    SOLR-11947: small change

commit b16b1453c2e7d5083f588b4b874c918d521e9fe5
Author: Joel Bernstein <jb...@apache.org>
Date:   Mon Mar 26 12:23:17 2018 -0400

    SOLR-11947: proofing

commit 57265ce4659a427c179e206b79d8fe05b01a5f93
Author: Joel Bernstein <jb...@apache.org>
Date:   Sat Mar 24 14:41:48 2018 -0400

    SOLR-11947: monte carlo WIP

commit 04e8381f6b5b329c5fa17c1f31c2d848fe9cec2a
Author: Joel Bernstein <jb...@apache.org>
Date:   Fri Mar 23 16:24:10 2018 -0400

    SOLR-11947: probabiity WIP

commit 4298a6d514e7e431e322a4f62c22c336430a89f1
Author: Joel Bernstein <jb...@apache.org>
Date:   Fri Mar 23 13:07:05 2018 -0400

    SOLR-11947: time series WIP

commit 1a7654f9225948cd4adb3056bc2192cc0d24b3ee
Author: Joel Bernstein <jb...@apache.org>
Date:   Fri Mar 23 11:32:53 2018 -0400

    SOLR-11947: machine learning WIP

commit fae0c3aa46e6f26fecb59077207982b2f584ec86
Author: Joel Bernstein <jb...@apache.org>
Date:   Thu Mar 22 22:14:15 2018 -0400

    SOLR-11947: machine learning WIP

commit fb6a96b2bdc4bbc4c2b5b62b6e69cd561ef9e31b
Author: Joel Bernstein <jb...@apache.org>
Date:   Thu Mar 22 14:36:08 2018 -0400

    SOLR-11947: numerical analysis WIP

commit a648ba939c90caf5db2a5b88023bd580d4d1e8af
Author: Joel Bernstein <jb...@apache.org>
Date:   Thu Mar 22 12:27:33 2018 -0400

    SOLR-11947: numerical analysis WIP

commit ce8f1b710d414d8e3ff3c8676f64fc3017316a15
Author: Joel Bernstein <jb...@apache.org>
Date:   Wed Mar 21 19:56:10 2018 -0400

    SOLR-11947: numerical analysis WIP

commit 5e25a4884341cdd84988e13250f255eb23d7fd50
Author: Joel Bernstein <jb...@apache.org>
Date:   Tue Mar 20 22:01:59 2018 -0400

    SOLR-11947: Curve fitting WIP

commit f381414dc44ecfa781988c5ca75bfb1c80de6674
Author: Joel Bernstein <jb...@apache.org>
Date:   Tue Mar 20 21:49:39 2018 -0400

    SOLR-11947: Curve fitting WIP

commit 4be725132215ed44cc84587bb0d11be216360b74
Author: Joel Bernstein <jb...@apache.org>
Date:   Mon Mar 19 19:55:10 2018 -0400

    SOLR-11947: Monte Carlo WIP

commit d330b412e46be0ebf8d75e99295e3fe9f978c02c
Author: Joel Bernstein <jb...@apache.org>
Date:   Sun Mar 18 22:00:55 2018 -0400

    SOLR-11947: Probability WIP

commit e3d6160c1fa650e054b9694c57d34b3950c80175
Author: Joel Bernstein <jb...@apache.org>
Date:   Sat Mar 17 21:18:43 2018 -0400

    SOLR-11947: More WIP

commit 8484b0283f79825dee8eaee82604120d04511de4
Author: Joel Bernstein <jb...@apache.org>
Date:   Fri Mar 16 15:03:06 2018 -0400

    SOLR-11947: machine learning WIP

commit 77ecfdc71d79ca8eded0355669310c6025c70d96
Author: Joel Bernstein <jb...@apache.org>
Date:   Thu Mar 15 21:33:09 2018 -0400

    SOLR-11947: machine learning WIP

commit 7488caf5e54436a0e5fe85c0dda4ea31d8357600
Author: Joel Bernstein <jb...@apache.org>
Date:   Thu Mar 15 19:08:50 2018 -0400

    SOLR-11947: machine learning WIP

commit 102ee2e1857e7d7f45d7f3195a0a4e91eacb766d
Author: Joel Bernstein <jb...@apache.org>
Date:   Thu Mar 15 15:18:31 2018 -0400

    SOLR-11947: machine learning WIP

commit 0d5cd2b4a4fd012fe6d640a86733280702cf8673
Author: Joel Bernstein <jb...@apache.org>
Date:   Wed Mar 14 21:49:15 2018 -0400

    SOLR-11947: numerical analysis WIP

commit 31eec30576479a9023c7b0e6ccb2d9f685e128a1
Author: Joel Bernstein <jb...@apache.org>
Date:   Wed Mar 14 14:41:06 2018 -0400

    SOLR-11947: numerical analysis WIP

commit c6e324ac56ca6e9f229d6acb39fdcf60c3356230
Author: Joel Bernstein <jb...@apache.org>
Date:   Tue Mar 13 15:16:26 2018 -0400

    SOLR-11947: term vectors WIP

commit 8c843999eabdb82665641caa9c21f07e95b70a86
Author: Joel Bernstein <jb...@apache.org>
Date:   Mon Mar 12 18:03:53 2018 -0400

    SOLR-11947: Add curve fitting to TOC

commit 09be026f6ad400d965fd373403d7a2eb2fae0c90
Author: Joel Bernstein <jb...@apache.org>
Date:   Mon Mar 12 15:36:05 2018 -0400

    SOLR-11947: Text analysis WIP

commit e48b4d69abadb603a90c052aa1e36dd60ae7fd33
Author: Joel Bernstein <jb...@apache.org>
Date:   Sun Mar 11 18:29:20 2018 -0400

    SOLR-11947: TOC changes

commit f71ebc079713e16492ba45cedafc3b9512f6bae2
Author: Joel Bernstein <jb...@apache.org>
Date:   Sat Mar 10 17:54:04 2018 -0500

    SOLR-11947: WIP term vectors

commit ebc6b3943a27454adaf1a2309b6720bb2ba63c8c
Author: Joel Bernstein <jb...@apache.org>
Date:   Sat Mar 10 13:34:19 2018 -0500

    SOLR-11947: WIP regression

commit 44752b2d34f46bc7f5693839e42ab3cef9edc47c
Author: Joel Bernstein <jb...@apache.org>
Date:   Fri Mar 9 22:40:40 2018 -0500

    SOLR-11947: WIP for vectorization.adoc

commit 43254fcb05386264a6d591b1fa2c2573dcc2d2a3
Author: Joel Bernstein <jb...@apache.org>
Date:   Fri Mar 9 19:42:26 2018 -0500

    SOLR-11947: Test local links

commit b60df2000978f70720eb0a36543752fd3bf07d2c
Author: Joel Bernstein <jb...@apache.org>
Date:   Thu Mar 8 21:41:17 2018 -0500

    SOLR-11947: Update math-expressions TOC

commit de068c3af8557d60de37cb29f3ed7da3f5442772
Author: Joel Bernstein <jb...@apache.org>
Date:   Thu Mar 8 21:24:46 2018 -0500

    SOLR-11947: Continued work on math expressions documentation.

commit fe445f2c997ea825d1ae9b9912406521249befc0
Author: Joel Bernstein <jb...@apache.org>
Date:   Sun Mar 4 20:22:33 2018 -0500

    SOLR-12054: ebeAdd and ebeSubtract should support matrix operations

commit 1f3ae745cc26453a34a64a4327ceac7cc91d23f5
Author: Joel Bernstein <jb...@apache.org>
Date:   Sun Mar 4 13:24:54 2018 -0500

    SOLR-11947: Initial commit for new math expression docs WIP


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/1ed4e226
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/1ed4e226
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/1ed4e226

Branch: refs/heads/master
Commit: 1ed4e226ac66078a775c869a375c8c816220edec
Parents: dc2ad70
Author: Joel Bernstein <jb...@apache.org>
Authored: Mon Mar 26 12:48:33 2018 -0400
Committer: Joel Bernstein <jb...@apache.org>
Committed: Mon Mar 26 15:05:06 2018 -0400

----------------------------------------------------------------------
 solr/solr-ref-guide/src/curve-fitting.adoc      | 182 +++++
 solr/solr-ref-guide/src/machine-learning.adoc   | 680 +++++++++++++++++++
 solr/solr-ref-guide/src/math-expressions.adoc   |  59 ++
 solr/solr-ref-guide/src/matrix-math.adoc        | 443 ++++++++++++
 solr/solr-ref-guide/src/montecarlo.adoc         | 213 ++++++
 solr/solr-ref-guide/src/numerical-analysis.adoc | 430 ++++++++++++
 solr/solr-ref-guide/src/probability.adoc        | 415 +++++++++++
 solr/solr-ref-guide/src/regression.adoc         | 439 ++++++++++++
 solr/solr-ref-guide/src/scalar-math.adoc        | 137 ++++
 solr/solr-ref-guide/src/statistics.adoc         | 575 ++++++++++++++++
 .../src/streaming-expressions.adoc              |   2 +-
 solr/solr-ref-guide/src/term-vectors.adoc       | 237 +++++++
 solr/solr-ref-guide/src/time-series.adoc        | 431 ++++++++++++
 solr/solr-ref-guide/src/variables.adoc          | 147 ++++
 solr/solr-ref-guide/src/vector-math.adoc        | 343 ++++++++++
 solr/solr-ref-guide/src/vectorization.adoc      | 243 +++++++
 .../solrj/io/eval/FieldValueEvaluator.java      |  12 +-
 17 files changed, 4982 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1ed4e226/solr/solr-ref-guide/src/curve-fitting.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/curve-fitting.adoc b/solr/solr-ref-guide/src/curve-fitting.adoc
new file mode 100644
index 0000000..057cc23
--- /dev/null
+++ b/solr/solr-ref-guide/src/curve-fitting.adoc
@@ -0,0 +1,182 @@
+= Curve Fitting
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+== Polynomial Curve Fitting
+
+
+The `polyfit` function is a general purpose curve fitter used to model
+the *non-linear* relationship between two random variables.
+
+The `polyfit` function is passed *x* and *y* axises and fits a smooth curve to the data.
+If only a single array is provided it is treated as the *y* axis and a sequence is generated
+for the *x* axis.
+
+The `polyfit` function also has a parameter the specifies the degree of the polynomial. The higher
+the degree the more curves that can be modeled.
+
+The example below uses the `polyfit` function to fit a curve to an array using
+a 3 degree polynomial. The fitted curve is then subtracted from the original curve. The output
+shows the error between the fitted curve and the original curve, known as the residuals.
+The output also includes the sum-of-squares of the residuals which provides a measure
+of how large the error is..
+
+[source,text]
+----
+let(echo="residuals, sumSqError",
+    y=array(0, 1, 2, 3, 4, 5.7, 6, 7, 6, 5, 5, 3, 2, 1, 0),
+    curve=polyfit(y, 3),
+    residuals=ebeSubtract(y, curve),
+    sumSqError=sumSq(residuals))
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "residuals": [
+          0.5886274509803899,
+          -0.0746078431372561,
+          -0.49492135315664765,
+          -0.6689571213100631,
+          -0.5933591898297781,
+          0.4352283990519288,
+          0.32016160310277897,
+          1.1647963800904968,
+          0.272488687782805,
+          -0.3534055160525744,
+          0.2904697263520779,
+          -0.7925296272355089,
+          -0.5990476190476182,
+          -0.12572829131652274,
+          0.6307843137254909
+        ],
+        "sumSqError": 4.7294282482223595
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+In the next example the curve is fit using a 5 degree polynomial. Notice that the curve
+is fit closer, shown by the smaller residuals and lower value for the sum-of-squares of the
+residuals. This is because the higher polynomial produced a closer fit.
+
+[source,text]
+----
+let(echo="residuals, sumSqError",
+    y=array(0, 1, 2, 3, 4, 5.7, 6, 7, 6, 5, 5, 3, 2, 1, 0),
+    curve=polyfit(y, 5),
+    residuals=ebeSubtract(y, curve),
+    sumSqError=sumSq(residuals))
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "residuals": [
+          -0.12337461300309674,
+          0.22708978328173413,
+          0.12266015718028167,
+          -0.16502738747320755,
+          -0.41142804563857105,
+          0.2603044014808713,
+          -0.12128970101106162,
+          0.6234168308471704,
+          -0.1754692675745293,
+          -0.5379689969473249,
+          0.4651616185671843,
+          -0.288175756132409,
+          0.027970945463215102,
+          0.18699690402476687,
+          -0.09086687306501587
+        ],
+        "sumSqError": 1.413089480179252
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+
+== Prediction, Derivatives and Integrals
+
+The `polyfit` function returns an array which contains the *y* value data points
+of the fitted curve.
+
+In order to predict values along the curve an interpolation function must be created
+for the curve. Once an interpolation functin has been created the `predict`,
+`derivative` and `integral` functions can be applied to the curve.
+
+In the example below the x axis is included for clarity.
+The `polyfit` function returns an array with the fitted curve.
+A linear inpolation function is then created for the curve with the `lerp` function.
+The `predict` function is then used to predict a value along the curve, in this
+case the prediction is made for the *x* value of .5.
+
+[source,text]
+----
+let(x=array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14),
+    y=array(0, 1, 2, 3, 4, 5.7, 6, 7, 6, 5, 5, 3, 2, 1, 0),
+    curve=polyfit(x, y, 5),
+    interp=lerp(x, curve),
+    p=predict(interp, .5))
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "p": 0.4481424148606813
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+
+
+

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1ed4e226/solr/solr-ref-guide/src/machine-learning.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/machine-learning.adoc b/solr/solr-ref-guide/src/machine-learning.adoc
new file mode 100644
index 0000000..cbb3e05
--- /dev/null
+++ b/solr/solr-ref-guide/src/machine-learning.adoc
@@ -0,0 +1,680 @@
+= Machine Learning
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+This section of the math expressions user guide covers machine learning
+functions.
+
+== Feature Scaling
+
+Before performing machine learning operations its often necessary to
+scale the feature vectors so they can be compared at the same scale.
+
+All the scaling function operate on vectors and matrices.
+When operating on a matrix the *rows* of the matrix are scaled.
+
+=== Min/Max Scaling
+
+The `minMaxScale` function scales a vector or matrix between a min and
+max value. By default it will scale between 0 and 1 if min/max values
+are not provided.
+
+Below is a simple example of min/max scaling between 0 and 1.
+Notice that once brought into the same scale the vectors are the same.
+
+[source,text]
+----
+let(a=array(20, 30, 40, 50),
+    b=array(200, 300, 400, 500),
+    c=matrix(a, b),
+    d=minMaxScale(c))
+----
+
+This expression returns the following response:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "d": [
+          [
+            0,
+            0.3333333333333333,
+            0.6666666666666666,
+            1
+          ],
+          [
+            0,
+            0.3333333333333333,
+            0.6666666666666666,
+            1
+          ]
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+=== Standardization
+
+The `standardize` function scales a vector so that it has a
+mean of 0 and a standard deviation of 1. Standardization can be
+used with machine learning algorithms, such as SVM, that
+perform better when the data has a normal distribution.
+
+[source,text]
+----
+let(a=array(20, 30, 40, 50),
+    b=array(200, 300, 400, 500),
+    c=matrix(a, b),
+    d=standardize(c))
+----
+
+This expression returns the following response:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "d": [
+          [
+            -1.161895003862225,
+            -0.3872983346207417,
+            0.3872983346207417,
+            1.161895003862225
+          ],
+          [
+            -1.1618950038622249,
+            -0.38729833462074165,
+            0.38729833462074165,
+            1.1618950038622249
+          ]
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 17
+      }
+    ]
+  }
+}
+----
+
+=== Unitize
+
+The `unitize` function scales vectors to a magnitude of 1. A vector with a
+magnitude of 1 is known as a unit vector.  Unit vectors are
+preferred when the vector math deals
+with vector direction rather than magnitude.
+
+[source,text]
+----
+let(a=array(20, 30, 40, 50),
+    b=array(200, 300, 400, 500),
+    c=matrix(a, b),
+    d=unitize(c))
+----
+
+This expression returns the following response:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "d": [
+          [
+            0.2721655269759087,
+            0.40824829046386296,
+            0.5443310539518174,
+            0.6804138174397716
+          ],
+          [
+            0.2721655269759087,
+            0.4082482904638631,
+            0.5443310539518174,
+            0.6804138174397717
+          ]
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 6
+      }
+    ]
+  }
+}
+----
+
+== Distance
+
+The `distance` function computes a distance measure for two
+numeric arrays or a *distance matrix* for the columns of a matrix.
+
+There are four distance measures currently supported:
+
+* euclidean (default)
+* manhattan
+* canberra
+* earthMovers
+
+Below is an example for computing euclidean distance for
+two numeric arrays:
+
+
+[source,text]
+----
+let(a=array(20, 30, 40, 50),
+    b=array(21, 29, 41, 49),
+    c=distance(a, b))
+----
+
+This expression returns the following response:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "c": 2
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+Below is an example for computing a distance matrix for columns
+of a matrix:
+
+[source,text]
+----
+let(a=array(20, 30, 40),
+    b=array(21, 29, 41),
+    c=array(31, 40, 50),
+    d=matrix(a, b, c),
+    c=distance(d))
+----
+
+This expression returns the following response:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "e": [
+          [
+            0,
+            15.652475842498529,
+            34.07345007480164
+          ],
+          [
+            15.652475842498529,
+            0,
+            18.547236990991408
+          ],
+          [
+            34.07345007480164,
+            18.547236990991408,
+            0
+          ]
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 24
+      }
+    ]
+  }
+}
+----
+
+== K-means Clustering
+
+The `kmeans` functions performs k-means clustering of the rows of a matrix.
+Once the clustering has been completed there are a number of useful functions available
+for examining the *clusters* and *centroids*.
+
+The examples below are clustering *term vectors*.
+The chapter on link:term-vectors.adoc[Text Analysis and Term Vectors] should be
+consulted for a full explanation of these features.
+
+=== Centroid Features
+
+In the example below the `kmeans` function is used to cluster a result set from the Enron email data-set
+and then the top features are extracted from the cluster centroids.
+
+Let's look at what data is assigned to each variable:
+
+* *a*: The `random` function returns a sample of 500 documents from the *enron*
+collection that match the query *body:oil*. The `select` function selects the *id* and
+and annotates each tuple with the analyzed bigram terms from the body field.
+
+* *b*: The `termVectors` function creates a TF-IDF term vector matrix from the
+tuples stored in variable *a*. Each row in the matrix represents a document. The columns of the matrix
+are the bigram terms that were attached to each tuple.
+* *c*: The `kmeans` function clusters the rows of the matrix into 5 clusters. The k-means clustering is performed using the
+*Euclidean distance* measure.
+* *d*: The `getCentroids` function returns a matrix of cluster centroids. Each row in the matrix is a centroid
+from one of the 5 clusters. The columns of the matrix are the same bigrams terms of the term vector matrix.
+* *e*: The `topFeatures` function returns the column labels for the top 5 features of each centroid in the matrix.
+This returns the top 5 bigram terms for each centroid.
+
+[source,text]
+----
+let(a=select(random(enron, q="body:oil", rows="500", fl="id, body"),
+                    id,
+                    analyze(body, body_bigram) as terms),
+    b=termVectors(a, maxDocFreq=.10, minDocFreq=.05, minTermLength=14, exclude="_,copyright"),
+    c=kmeans(b, 5),
+    d=getCentroids(c),
+    e=topFeatures(d, 5))
+----
+
+This expression returns the following response:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "e": [
+          [
+            "enron enronxgate",
+            "north american",
+            "energy services",
+            "conference call",
+            "power generation"
+          ],
+          [
+            "financial times",
+            "chief financial",
+            "financial officer",
+            "exchange commission",
+            "houston chronicle"
+          ],
+          [
+            "southern california",
+            "california edison",
+            "public utilities",
+            "utilities commission",
+            "rate increases"
+          ],
+          [
+            "rolling blackouts",
+            "public utilities",
+            "electricity prices",
+            "federal energy",
+            "price controls"
+          ],
+          [
+            "california edison",
+            "regulatory commission",
+            "southern california",
+            "federal energy",
+            "power generators"
+          ]
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 982
+      }
+    ]
+  }
+}
+----
+
+=== Cluster Features
+
+The example below examines the top features of a specific cluster. This example uses the same techniques
+as the centroids example but the top features are extracted from a cluster rather then the centroids.
+
+The `getCluster` function returns a cluster by its index. Each cluster is a matrix containing term vectors
+that have been clustered together based on their features.
+
+In the example below the `topFeatures` function is used to extract the top 4 features from each term vector
+in the cluster.
+
+[source,text]
+----
+let(a=select(random(collection3, q="body:oil", rows="500", fl="id, body"),
+                    id,
+                    analyze(body, body_bigram) as terms),
+    b=termVectors(a, maxDocFreq=.09, minDocFreq=.03, minTermLength=14, exclude="_,copyright"),
+    c=kmeans(b, 25),
+    d=getCluster(c, 0),
+    e=topFeatures(d, 4))
+----
+
+This expression returns the following response:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "e": [
+          [
+            "electricity board",
+            "maharashtra state",
+            "power purchase",
+            "state electricity",
+            "reserved enron"
+          ],
+          [
+            "electricity board",
+            "maharashtra state",
+            "state electricity",
+            "purchase agreement",
+            "independent power"
+          ],
+          [
+            "maharashtra state",
+            "reserved enron",
+            "federal government",
+            "state government",
+            "dabhol project"
+          ],
+          [
+            "purchase agreement",
+            "power purchase",
+            "electricity board",
+            "maharashtra state",
+            "state government"
+          ],
+          [
+            "investment grade",
+            "portland general",
+            "general electric",
+            "holding company",
+            "transmission lines"
+          ],
+          [
+            "state government",
+            "state electricity",
+            "purchase agreement",
+            "electricity board",
+            "maharashtra state"
+          ],
+          [
+            "electricity board",
+            "state electricity",
+            "energy management",
+            "maharashtra state",
+            "energy markets"
+          ],
+          [
+            "electricity board",
+            "maharashtra state",
+            "state electricity",
+            "state government",
+            "second quarter"
+          ]
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 978
+      }
+    ]
+  }
+}
+----
+
+== Multi K-means Clustering
+
+K-means clustering will be produce different results depending on
+the initial placement of the centroids. K-means is fast enough
+that multiple trials can be performed and the best outcome selected.
+The `multiKmeans` function runs the K-means
+clustering algorithm for a gven number of trials and selects the
+best result based on which trial produces the lowest intra-cluster
+variance.
+
+The example below is identical to centroids example except that
+it uses `multiKmeans` with 100 trials, rather then a single
+trial of the `kmeans` function.
+
+[source,text]
+----
+let(a=select(random(collection3, q="body:oil", rows="500", fl="id, body"),
+                    id,
+                    analyze(body, body_bigram) as terms),
+    b=termVectors(a, maxDocFreq=.09, minDocFreq=.03, minTermLength=14, exclude="_,copyright"),
+    c=multiKmeans(b, 5, 100),
+    d=getCentroids(c),
+    e=topFeatures(d, 5))
+----
+
+This expression returns the following response:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "e": [
+          [
+            "enron enronxgate",
+            "energy trading",
+            "energy markets",
+            "energy services",
+            "unleaded gasoline"
+          ],
+          [
+            "maharashtra state",
+            "electricity board",
+            "state electricity",
+            "energy trading",
+            "chief financial"
+          ],
+          [
+            "price controls",
+            "electricity prices",
+            "francisco chronicle",
+            "wholesale electricity",
+            "power generators"
+          ],
+          [
+            "southern california",
+            "california edison",
+            "public utilities",
+            "francisco chronicle",
+            "utilities commission"
+          ],
+          [
+            "california edison",
+            "power purchases",
+            "system operator",
+            "term contracts",
+            "independent system"
+          ]
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 1182
+      }
+    ]
+  }
+}
+----
+
+== Fuzzy K-means Clustering
+
+The `fuzzyKmeans` function is a soft clustering algorithm which
+allows vectors to be assigned to more then one cluster. The *fuzziness* parameter
+is a value between 1 and 2 that determines how fuzzy to make the cluster assignment.
+
+After the clustering has been performed the `getMembershipMatrix` function can be called
+on the clustering result to return a matrix describing which clusters each vector belongs to.
+There is a row in the matrix for each vector that was clustered. There is a column in the matrix
+for each cluster. The values in the columns are the probability that the vector belonged to the specific
+cluster.
+
+A simple example will make this more clear. In the example below 300 documents are analyzed and
+then turned into a term vector matrix. Then the `fuzzyKmeans` function clusters the
+term vectors into 12 clusters with a fuzziness factor of 1.25.
+
+The `getMembershipMatrix` function is used to return the membership matrix and the first row
+of membership matrix is retrieved with the `rowAt` function. The `precision` function is then applied to the first row
+of the matrix to make it easier to read.
+
+The output shows a single vector representing the cluster membership probabilities for the first
+term vector. Notice that the term vector has the highest association with the 12th cluster,
+but also has significant associations with the 3rd, 5th, 6th and 7th clusters.
+
+[source,text]
+----
+et(a=select(random(collection3, q="body:oil", rows="300", fl="id, body"),
+                   id,
+                   analyze(body, body_bigram) as terms),
+   b=termVectors(a, maxDocFreq=.09, minDocFreq=.03, minTermLength=14, exclude="_,copyright"),
+   c=fuzzyKmeans(b, 12, fuzziness=1.25),
+   d=getMembershipMatrix(c),
+   e=rowAt(d, 0),
+   f=precision(e, 5))
+----
+
+This expression returns the following response:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "f": [
+          0,
+          0,
+          0.178,
+          0,
+          0.17707,
+          0.17775,
+          0.16214,
+          0,
+          0,
+          0,
+          0,
+          0.30504
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 2157
+      }
+    ]
+  }
+}
+----
+
+== K-nearest Neighbor
+
+The `knn` function searches the rows of a matrix for the
+K-nearest neighbors of a search vector. The `knn` function
+returns a *matrix* of the K-nearest neighbors. The `knn` function
+has a *named parameter* called *distance* which specifies the distance measure.
+There are four distance measures currently supported:
+
+* euclidean (Default)
+* manhattan
+* canberra
+* earthMovers
+
+The example below builds on the clustering examples to demonstrate
+the `knn` function.
+
+In the example, the centroids matrix is set to variable *d*. The first
+centroid vector is selected from the matrix with the `rowAt` function.
+Then the `knn` function is used to find the 3 nearest neighbors
+to the centroid vector in the term vector matrix (variable b).
+
+The `knn` function returns a matrix with the 3 nearest neighbors based on the
+default distance measure which is euclidean. Finally, the top 4 features
+of the term vectors in the nearest neighbor matrix are returned.
+
+[source,text]
+----
+let(a=select(random(collection3, q="body:oil", rows="500", fl="id, body"),
+                    id,
+                    analyze(body, body_bigram) as terms),
+    b=termVectors(a, maxDocFreq=.09, minDocFreq=.03, minTermLength=14, exclude="_,copyright"),
+    c=multiKmeans(b, 5, 100),
+    d=getCentroids(c),
+    e=rowAt(d, 0),
+    g=knn(b, e, 3),
+    h=topFeatures(g, 4))
+----
+
+This expression returns the following response:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "h": [
+          [
+            "california power",
+            "electricity supply",
+            "concerned about",
+            "companies like"
+          ],
+          [
+            "maharashtra state",
+            "california power",
+            "electricity board",
+            "alternative energy"
+          ],
+          [
+            "electricity board",
+            "maharashtra state",
+            "state electricity",
+            "houston chronicle"
+          ]
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 1243
+      }
+    ]
+  }
+}
+----
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1ed4e226/solr/solr-ref-guide/src/math-expressions.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/math-expressions.adoc b/solr/solr-ref-guide/src/math-expressions.adoc
new file mode 100644
index 0000000..e2ed438
--- /dev/null
+++ b/solr/solr-ref-guide/src/math-expressions.adoc
@@ -0,0 +1,59 @@
+= Math Expressions
+:page-children: scalar-math, vector-math, variables, matrix-math, vectorization, term-vectors, statistics, probability, montecarlo, time-series, regression, numerical-analysis, curve-fitting, machine-learning
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+The Streaming Expression library includes a powerful
+mathematical programing syntax with many of the features of a
+functional programming language. The syntax includes variables,
+data structures and a growing set of mathematical functions.
+
+This user guide provides an overview of the different areas of
+mathematical coverage starting with basic scalar math and
+ending with machine learning. Along the way the guide covers variables
+and data structures and techniques for combining Solr's
+powerful streams with mathematical functions to make every
+record in your Solr Cloud cluster computable.
+
+== link:scalar-math.adoc[Scalar Math]
+
+== link:vector-math.adoc[Vector Math]
+
+== link:variables.adoc[Variables]
+
+== link:matrix-math.adoc[Matrix Math]
+
+== link:vectorization.adoc[Streams and Vectorization]
+
+== link:term-vectors.adoc[Text Analysis and Term Vectors]
+
+== link:statistics.adoc[Statistics]
+
+== link:probability.adoc[Probability]
+
+== link:montecarlo.adoc[Monte Carlo Simulations]
+
+== link:time-series.adoc[Time Series]
+
+== link:regression.adoc[Linear Regression]
+
+== link:numerical-analysis.adoc[Interpolation, Derivatives and Integrals]
+
+== link:curve-fitting.adoc[Curve Fitting]
+
+== link:machine-learning.adoc[Machine Learning]

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1ed4e226/solr/solr-ref-guide/src/matrix-math.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/matrix-math.adoc b/solr/solr-ref-guide/src/matrix-math.adoc
new file mode 100644
index 0000000..ba45cca
--- /dev/null
+++ b/solr/solr-ref-guide/src/matrix-math.adoc
@@ -0,0 +1,443 @@
+= Matrices and Matrix Math
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+This section of the user guide covers the
+basics of matrix creation, manipulation and matrix math. Other sections
+of the user guide demonstrate how matrices are used by the statistics,
+probability and machine learning functions.
+
+== Matrix Creation
+
+A matrix can be created with the `matrix` function.
+The matrix function is passed a list of `arrays` with
+each array representing a *row* in the matrix.
+
+The example below creates a two-by-two matrix.
+
+[source,text]
+----
+matrix(array(1, 2),
+       array(4, 5))
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "return-value": [
+          [
+            1,
+            2
+          ],
+          [
+            4,
+            5
+          ]
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+----
+
+== Accessing Rows and Columns
+
+The rows and columns of a matrix can be accessed using the `rowAt`
+and `colAt` functions.
+
+The example below creates a 2 by 2 matrix and returns the second column of the matrix.
+Notice that the matrix is passed variables in this example rather than
+directly passed a list of arrays.
+
+[source,text]
+----
+let(a=array(1, 2),
+    b=array(4, 5),
+    c=matrix(a, b),
+    d=colAt(c, 1))
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "d": [
+          2,
+          5
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+== Row and Column Labels
+
+A matrix can have column and rows and labels. The functions
+`setRowLabels`, `setColumnLabels`, `getRowLabels` and `getColumnLabels`
+can be used to set and get the labels. The label values
+are set using string arrays.
+
+The example below sets the row and column labels. In other sections of the
+user guide examples are shown where functions return matrices
+with the labels already set.
+
+Below is a simple example of setting and
+getting row and column labels
+on a matrix.
+
+[source,text]
+----
+let(echo="d, e",
+    a=matrix(array(1, 2),
+             array(4, 5)),
+    b=setRowLabels(a, array("row0", "row1")),
+    c=setColumnLabels(b, array("col0", "col1")),
+    d=getRowLabels(c),
+    e=getColumnLabels(c))
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "d": [
+          "row0",
+          "row1"
+        ],
+        "e": [
+          "col0",
+          "col1"
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+== Matrix Attributes
+
+A matrix can also have an arbitrary set of named attributes associated
+with it. Certain functions, such as the `termVectors` function,
+return matrices that contain attributes that describe data in the matrix.
+
+Attributes can be retrieved by name using the `getAttribute` function and
+the entire attribute map can be returned using the `getAttributes`
+function.
+
+== Matrix Dimensions
+
+The dimensions of a matrix can be determined using the
+`rowCount` and `columnCount` functions.
+
+The example below retrieves the dimensions of a matrix.
+
+[source,text]
+----
+let(echo="b,c",
+    a=matrix(array(1, 2, 3),
+             array(4, 5, 6)),
+    b=rowCount(a),
+    c=columnCount(a))
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "b": 2,
+        "c": 3
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+== Matrix Transposition
+
+A matrix can be https://en.wikipedia.org/wiki/Transpose[transposed]
+using the `transpose` function.
+
+An example of matrix transposition is shown below:
+
+[source,text]
+----
+let(a=matrix(array(1, 2),
+             array(4, 5)),
+    b=transpose(a))
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "b": [
+          [
+            1,
+            4
+          ],
+          [
+            2,
+            5
+          ]
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 24
+      }
+    ]
+  }
+}
+----
+
+== Matrix Summations
+
+The rows and columns of a matrix can be summed with the `sumRows` and `sumColumns` functions.
+Below is an example of the `sumRows` function which returns an
+array with the sum of each row.
+
+[source,text]
+----
+let(a=matrix(array(1, 2, 3),
+             array(4, 5, 6)),
+    b=sumRows(a))
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "b": [
+          6,
+          15
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 2
+      }
+    ]
+  }
+}
+----
+
+The `grandSum` function returns the sum of all values in the matrix.
+Below is an example of the `grandSum` function:
+
+[source,text]
+----
+let(a=matrix(array(1, 2, 3),
+             array(4, 5, 6)),
+    b=grandSum(a))
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "b": 21
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+== Scalar Matrix Math
+
+The same scalar math functions that apply to vectors can also be applied to matrices: `scalarAdd`, `scalarSubtract`,
+`scalarMultiply`, `scalarDivide`. Below is an example of the `scalarAdd` function
+which adds a scalar value to each element in a matrix.
+
+
+[source,text]
+----
+let(a=matrix(array(1, 2),
+             array(4, 5)),
+    b=scalarAdd(10, a))
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "b": [
+          [
+            11,
+            12
+          ],
+          [
+            14,
+            15
+          ]
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+== Matrix Addition and Subtraction
+
+Two matrices can be added and subtracted using the `ebeAdd` and `ebeSubtract` functions,
+which perform element-by-element addition
+and subtraction of matrices.
+
+Below is a simple example of an element-by-element addition of a matrix by itself:
+
+[source,text]
+----
+let(a=matrix(array(1, 2),
+             array(4, 5)),
+    b=ebeAdd(a, a))
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "b": [
+          [
+            2,
+            4
+          ],
+          [
+            8,
+            10
+          ]
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+== Matrix Multiplication
+
+Matrix multiplication can be accomplished using the `matrixMult` function. Below is a simple
+example of matrix multiplication:
+
+[source,text]
+----
+let(a=matrix(array(1, 2),
+             array(4, 5)),
+    b=matrix(array(11, 12),
+             array(14, 15)),
+    c=matrixMult(a, b))
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "c": [
+          [
+            39,
+            42
+          ],
+          [
+            114,
+            123
+          ]
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1ed4e226/solr/solr-ref-guide/src/montecarlo.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/montecarlo.adoc b/solr/solr-ref-guide/src/montecarlo.adoc
new file mode 100644
index 0000000..814110f
--- /dev/null
+++ b/solr/solr-ref-guide/src/montecarlo.adoc
@@ -0,0 +1,213 @@
+= Monte Carlo Simulations
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+Monte Carlo simulations are commonly used to model the behavior of
+stochastic systems. This section of the user guide describes
+how to perform both *uncorrelated* and *correlated* Monte Carlo simulations
+using the *sampling* capabilities of the probability distribution framework.
+
+=== Uncorrelated Simulations
+
+Uncorrelated Monte Carlo simulations model stochastic systems with the assumption
+ that the underlying random variables move independently of each other.
+ A simple example of a Monte Carlo simulation using two independently changing random variables
+ is described below.
+
+In this example a Monte Carlo simulation is used to determine the probability that a simple hinge assembly will
+fall within a required length specification.
+
+The hinge has two components *A* and *B*. The combined length of the two components must be less then 5 centimeters
+to fall within specification.
+
+A random sampling of lengths for component *A* has shown that its length conforms to a
+normal distribution with a mean of 2.2 centimeters and a standard deviation of .0195
+centimeters.
+
+A random sampling of lengths for component *B* has shown that its length conforms
+to a normal distribution with a mean of 2.71 centimeters and a standard deviation of .0198 centimeters.
+
+The Monte Carlo simulation below performs the following steps:
+
+* A normal distribution with a mean of 2.2 and a standard deviation of .0195 is created to model the length of componentA.
+* A normal distribution with a mean of 2.71 and a standard deviation of .0198 is created to model the length of componentB.
+* The `monteCarlo` function is used to simulate component pairs. The `monteCarlo` function
+  calls the *add(sample(componentA), sample(componentB))* function 100000 times and collects the results in an array. Each
+  time the function is called a random sample is drawn from the componentA
+  and componentB length distributions. The `add` function adds the two samples to calculate the combined length.
+  The result of each function run is collected in an array and assigned to the *simresults* variable.
+* An `empiricalDistribution` function is then created from the *simresults* array to model the distribution of the
+  simulation results.
+* Finally, the `cumulativeProbability` function is called on the *simmodel* to determine the cumulative probability
+  that the combined length of the components is 5 or less.
+* Based on the simulation there is .9994371944629039 probability that the combined length of a component pair will
+be 5 or less.
+
+[source,text]
+----
+let(componentA=normalDistribution(2.2,  .0195),
+    componentB=normalDistribution(2.71, .0198),
+    simresults=monteCarlo(add(sample(componentA), sample(componentB)), 100000),
+    simmodel=empiricalDistribution(simresults),
+    prob=cumulativeProbability(simmodel,  5))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "prob": 0.9994371944629039
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 660
+      }
+    ]
+  }
+}
+----
+
+=== Correlated Simulations
+
+The simulation above assumes that the lengths of *componentA* and *componentB* vary independently.
+What would happen to the probability model if there was a correlation between the lengths of
+*componentA* and *componentB*.
+
+In the example below a database containing assembled pairs of components is used to determine
+if there is a correlation between the lengths of the components, and how the correlation effects the model.
+
+Before performing a simulation of the effects of correlation on the probability model its
+useful to understand what the correlation is between the lengths of *componentA* and *componentB*.
+
+In the example below 5000 random samples are selected from a collection
+of assembled hinges. Each sample contains
+lengths of the components in the fields *componentA_d* and *componentB_d*.
+
+Both fields are then vectorized. The *componentA_d* vector is stored in
+variable *b* and the *componentB_d* variable is stored in variable *c*.
+
+Then the correlation of the two vectors is calculated using the `corr` function. Note that the outcome
+from `corr` is 0.9996931313216989. This means that *componentA_d* and *componentB_d* are almost
+perfectly correlated.
+
+[source,text]
+----
+let(a=random(collection5, q="*:*", rows="5000", fl="componentA_d, componentB_d"),
+    b=col(a, componentA_d)),
+    c=col(a, componentB_d)),
+    d=corr(b, c))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "d": 0.9996931313216989
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 309
+      }
+    ]
+  }
+}
+----
+
+How does correlation effect the probability model?
+
+The example below explores how to use a *multivariate normal distribution* function
+to model how correlation effects the probability of hinge defects.
+
+In this example 5000 random samples are selected from a collection
+containing length data for assembled hinges. Each sample contains
+the fields *componentA_d* and *componentB_d*.
+
+Both fields are then vectorized. The *componentA_d* vector is stored in
+variable *b* and the *componentB_d* variable is stored in variable *c*.
+
+An array is created that contains the *means* of the two vectorized fields.
+
+Then both vectors are added to a matrix which is transposed. This creates
+an *observation* matrix where each row contains one observation of
+*componentA_d* and *componentB_d*. A covariance matrix is then created from the columns of
+the observation matrix with the
+`cov` function. The covariance matrix describes the covariance between
+*componentA_d* and *componentB_d*.
+
+The `multivariateNormalDistribution` function is then called with the
+array of means for the two fields and the covariance matrix. The model
+for the multivariate normal distribution is stored in variable *g*.
+
+The `monteCarlo` function then calls the function *add(sample(g))* 50000 times
+and collections the results in a vector. Each time the function is called a single sample
+is drawn from the multivariate normal distribution. Each sample is a vector containing
+one *componentA* and *componentB* pair. the `add` function adds the values in the vector to
+calculate the length of the pair. Over the long term the samples drawn from the
+multivariate normal distribution will conform to the covariance matrix used to construct it.
+
+Just as in the non-correlated example an empirical distribution is used to model probabilities
+of the simulation vector and the `cumulativeProbability` function is used to compute the cumulative
+probability that the combined component length will be 5 centimeters or less.
+
+Notice that the probability of a hinge meeting specification has dropped to 0.9889517439980468.
+This is because the strong correlation
+between the lengths of components means that their lengths rise together causing more hinges to
+fall out of the 5 centimeter specification.
+
+[source,text]
+----
+let(a=random(hinges, q="*:*", rows="5000", fl="componentA_d, componentB_d"),
+    b=col(a, componentA_d),
+    c=col(a, componentB_d),
+    cor=corr(b,c),
+    d=array(mean(b), mean(c)),
+    e=transpose(matrix(b, c)),
+    f=cov(e),
+    g=multiVariateNormalDistribution(d, f),
+    h=monteCarlo(add(sample(g)), 50000),
+    i=empiricalDistribution(h),
+    j=cumulativeProbability(i, 5))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "j": 0.9889517439980468
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 599
+      }
+    ]
+  }
+}
+----
+

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1ed4e226/solr/solr-ref-guide/src/numerical-analysis.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/numerical-analysis.adoc b/solr/solr-ref-guide/src/numerical-analysis.adoc
new file mode 100644
index 0000000..cb2bc2e
--- /dev/null
+++ b/solr/solr-ref-guide/src/numerical-analysis.adoc
@@ -0,0 +1,430 @@
+= Interpolation, Derivatives and Integrals
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+This section of the math expression user guide covers *interpolation*, *derivatives* and *integrals*.
+These three interrelated topics are part of the field of mathematics called *numerical analysis*.
+
+== Interpolation
+
+Interpolation is used to construct new data points between a set of known control of points.
+The ability to *predict* new data points allows for *sampling* along the curve defined by the
+control points.
+
+The interpolation functions described below all return an *interpolation model*
+that can be passed to other functions which make use of the sampling capability.
+
+If returned directly the interpolation model returns an array containing predictions for each of the
+control points. This is useful in the case of `loess` interpolation which first smooths the control points
+and then interpolates the smoothed points. All other interpolation function simply return the original
+control points because interpolation predicts a curve that passes through the original control points.
+
+There are different algorithms for interpolation that will result in different predictions
+along the curve. The math expressions library currently supports the following
+interpolation functions:
+
+* `lerp`: Linear interpolation predicts points that pass through each control point and
+  form straight lines between control points.
+* `spline`: Spline interpolation predicts points that pass through each control point
+and form a smooth curve between control points.
+* `akima`: Akima spline interpolation is similar to spline interpolation but is stable to outliers.
+* `loess`: Loess interpolation first performs a non-linear local regression to smooth the original
+control points. Then a spline is used to interpolate the smoothed control points.
+
+=== Upsampling
+
+Interpolation can be used to increase the sampling rate along a curve. One example
+of this would be to take a time series with samples every minute and create a data set with
+samples every second. In order to do this the data points between the minutes must be created.
+
+The `predict` function can be used to predict values anywhere within the bounds of the interpolation
+range.  The example below shows a very simple example of upsampling.
+
+In the example linear interpolation is performed on the arrays in variables *x* and *y*. The *x* variable,
+which is the x axis, is a sequence from 0 to 20 with a stride of 2. The *y* variable defines the curve
+along the x axis.
+
+The `lerp` function performs the interpolation and returns the interpolation model.
+
+The `u` value is an array from 0 to 20 with a stride of 1. This fills in the gaps of the original x axis.
+The `predict` function then uses the interpolation function in variable *l* to predict values for
+every point in the array assigned to variable *u*.
+
+The variable *p* is the array of predictions, which is the upsampled set of y values.
+
+[source,text]
+----
+let(x=array(0, 2,  4,  6,  8,   10, 12,  14, 16, 18, 20),
+    y=array(5, 10, 60, 190, 100, 130, 100, 20, 30, 10, 5),
+    l=lerp(x, y),
+    u=array(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20),
+    p=predict(l, u))
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "g": [
+          5,
+          7.5,
+          10,
+          35,
+          60,
+          125,
+          190,
+          145,
+          100,
+          115,
+          130,
+          115,
+          100,
+          60,
+          20,
+          25,
+          30,
+          20,
+          10,
+          7.5,
+          5
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+=== Smoothing Interpolation
+
+The `loess` function is a smoothing interpolator which means it doesn't derive
+a function that passes through the original control points. Instead the `loess` function
+returns a function that smooths the original control points.
+
+A technique known as local regression is used to compute the smoothed curve.  The size of the
+neighborhood of the local regression can be adjusted
+to control how close the new curve conforms to the original control points.
+
+The `loess` function is passed *x* and *y* axises and fits a smooth curve to the data.
+If only a single array is provided it is treated as the *y* axis and a sequence is generated
+for the *x* axis.
+
+The example below uses the `loess` function to fit a curve to a set of *y* values in an array.
+The bandwidth parameter defines the percent of data to use for the local
+regression. The lower the percent the smaller the neighborhood used for the local
+regression and the closer the curve will be to the original data.
+
+In the example the fitted curve is subtracted from the original curve using the
+`ebeSubtract` function. The output shows the error between the
+fitted curve and the original curve, known as the residuals. The output also includes
+the sum-of-squares of the residuals which provides a measure
+of how large the error is.
+
+[source,text]
+----
+let(echo="residuals, sumSqError",
+    y=array(0, 1, 2, 3, 4, 5.7, 6, 7, 7, 7,6, 7, 7, 7, 6, 5, 5, 3, 2, 1, 0),
+    curve=loess(y, bandwidth=.3),
+    residuals=ebeSubtract(y, curve),
+    sumSqError=sumSq(residuals))
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "residuals": [
+          0,
+          0,
+          0,
+          -0.040524802275866634,
+          -0.10531988096456502,
+          0.5906115002526198,
+          0.004215074334896762,
+          0.4201374330912433,
+          0.09618315578013803,
+          0.012107948556718817,
+          -0.9892939034492398,
+          0.012014364143757561,
+          0.1093830927709325,
+          0.523166271893805,
+          0.09658362075164639,
+          -0.011433819306139625,
+          0.9899403519886416,
+          -0.011707983372932773,
+          -0.004223284004140737,
+          -0.00021462867928434548,
+          0.0018723112875456138
+        ],
+        "sumSqError": 2.8016013870800616
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+In the next example the curve is fit using a bandwidth of .25. Notice that the curve
+is a closer fit, shown by the smaller residuals and lower value for the sum-of-squares of the
+residuals.
+
+[source,text]
+----
+let(echo="residuals, sumSqError",
+    y=array(0, 1, 2, 3, 4, 5.7, 6, 7, 6, 5, 5, 3, 2, 1, 0),
+    curve=loess(y, .25),
+    residuals=ebeSubtract(y, curve),
+    sumSqError=sumSq(residuals))
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "residuals": [
+          0,
+          0,
+          0,
+          0,
+          -0.19117650587715396,
+          0.442863451538809,
+          -0.18553845993358564,
+          0.29990769020356645,
+          0,
+          0.23761890236245709,
+          -0.7344358765888117,
+          0.2376189023624491,
+          0,
+          0.30373119215254984,
+          -3.552713678800501e-15,
+          -0.23761890236245264,
+          0.7344358765888046,
+          -0.2376189023625095,
+          0,
+          2.842170943040401e-14,
+          -2.4868995751603507e-14
+        ],
+        "sumSqError": 1.7539413576337557
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+== Derivatives
+
+The derivative of a function measures the rate of change of the *y* value in respects to the
+rate of change of the *x* value.
+
+The `derivative` function can compute the derivative of any *interpolation* function.
+The `derivative` function can also compute the derivative of a derivative.
+
+The example below computes the derivative for a `loess` interpolation function.
+
+[source,text]
+----
+let(x=array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20),
+    y=array(0, 1, 2, 3, 4, 5.7, 6, 7, 7, 7,6, 7, 7, 7, 6, 5, 5, 3, 2, 1, 0),
+    curve=loess(x, y, bandwidth=.3),
+    derivative=derivative(curve))
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "derivative": [
+          1.0022002675659012,
+          0.9955994648681976,
+          1.0154018729613081,
+          1.0643674501141696,
+          1.0430879694757085,
+          0.9698717643975381,
+          0.7488201070357539,
+          0.44627000894357516,
+          0.19019561285422165,
+          0.01703599324311178,
+          -0.001908408138535126,
+          -0.009121607450087499,
+          -0.2576361507216319,
+          -0.49378951291352746,
+          -0.7288073815664,
+          -0.9871806872210384,
+          -1.0025400632604322,
+          -1.001836567536853,
+          -1.0076227586138085,
+          -1.0021524620888589,
+          -1.0020541789058157
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+== Integrals
+
+An integral is a measure of the volume underneath a curve.
+The `integrate` function computes an integral for a specific
+range of an interpolated curve.
+
+In the example below the `integrate` function computes an
+integral for the entire range of the curve, 0 through 20.
+
+[source,text]
+----
+let(x=array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20),
+    y=array(0, 1, 2, 3, 4, 5.7, 6, 7, 7, 7,6, 7, 7, 7, 6, 5, 5, 3, 2, 1, 0),
+    curve=loess(x, y, bandwidth=.3),
+    integral=integrate(curve,  0, 20))
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "integral": 90.17446104846645
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+In the next example an integral is computed for the range of 0 through 10.
+
+[source,text]
+----
+let(x=array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20),
+    y=array(0, 1, 2, 3, 4, 5.7, 6, 7, 7, 7,6, 7, 7, 7, 6, 5, 5, 3, 2, 1, 0),
+    curve=loess(x, y, bandwidth=.3),
+    integral=integrate(curve,  0, 10))
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "integral": 45.300912584519914
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+== Bicubic Spline
+
+The `bicubicSpline` function can be used to interpolate and predict values
+anywhere within a grid of data.
+
+A simple example will make this more clear.
+
+In example below a bicubic spline is used to interpolate a matrix of real estate data.
+Each row of the matrix represents a specific *year*. Each column of the matrix
+represents a *floor* of the building. The grid of numbers is the average selling price of
+an apartment for each year and floor. For example in 2002 the average selling price for
+the 9th floor was 415000 (row 3, column 3).
+
+The `bicubicSpline` function is then used to
+interpolate the grid, and the `predict` function is used to predict a value for year 2003, floor 8.
+Notice that the matrix does not included a data point for year 2003, floor 8. The `bicupicSpline`
+function creates that data point based on the surrounding data in the matrix.
+
+[source,text]
+----
+let(years=array(1998, 2000, 2002, 2004, 2006),
+    floors=array(1, 5, 9, 13, 17, 19),
+    prices = matrix(array(300000, 320000, 330000, 350000, 360000, 370000),
+                    array(320000, 330000, 340000, 350000, 365000, 380000),
+                    array(400000, 410000, 415000, 425000, 430000, 440000),
+                    array(410000, 420000, 425000, 435000, 445000, 450000),
+                    array(420000, 430000, 435000, 445000, 450000, 470000)),
+    bspline=bicubicSpline(years, floors, prices),
+    prediction=predict(bspline, 2003, 8))
+----
+
+When this expression is sent to the /stream handler it
+responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "prediction": 418279.5009328358
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1ed4e226/solr/solr-ref-guide/src/probability.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/probability.adoc b/solr/solr-ref-guide/src/probability.adoc
new file mode 100644
index 0000000..9c46d08
--- /dev/null
+++ b/solr/solr-ref-guide/src/probability.adoc
@@ -0,0 +1,415 @@
+= Probability Distributions
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+This section of the user guide covers the
+*probability distribution
+framework* included in the math expressions library.
+
+== Probability Distributions
+
+The probability distribution framework includes
+many commonly used *real* and *discrete* probability
+distributions, including support for *empirical* and
+*enumerated* distributions that model real world data.
+
+The probability distribution framework also includes a set
+of functions that use the probability distributions
+to support probability calculations and sampling.
+
+=== Real Distributions
+
+The probability distribution framework has the following functions
+which support well known real probability distributions:
+
+* `normalDistribution`: Creates a normal distribution function.
+
+* `logNormalDistribution`: Creates a log normal distribution function.
+
+* `gammaDistribution`: Creates a gamma distribution function.
+
+* `betaDistribution`: Creates a beta distribution function.
+
+* `uniformDistribution`: Creates a uniform real distribution function.
+
+* `weibullDistribution`: Creates a Weibull distribution function.
+
+* `triangularDistribution`: Creates a triangular distribution function.
+
+* `constantDistribution`: Creates constant real distribution function.
+
+=== Empirical Distribution
+
+The `empiricalDistribution` function creates a real probability
+distribution from actual data. An empirical distribution
+can be used interchangeably with any of the theoretical
+real distributions.
+
+=== Discrete
+
+The probability distribution framework has the following functions
+which support well known discrete probability distributions:
+
+* `poissonDistribution`: Creates a Poisson distribution function.
+
+* `binomialDistribution`: Creates a binomial distribution function.
+
+* `uniformIntegerDistribution`: Creates a uniform integer distribution function.
+
+* `geometricDistribution`: Creates a geometric distribution function.
+
+* `zipFDistribution`: Creates a Zipf distribution function.
+
+=== Enumerated Distributions
+
+The `enumeratedDistribution` function creates a discrete
+distribution function from a data set of discrete values,
+or from and enumerated list of values and probabilities.
+
+Enumerated distribution functions can be used interchangeably
+with any of the theoretical discrete distributions.
+
+=== Cumulative Probability
+
+The `cumulativeProbability` function can be used with all
+probability distributions to calculate the
+cumulative probability of encountering a specific
+random variable within a specific distribution.
+
+Below is example of calculating the cumulative probability
+of a random variable within a normal distribution.
+
+In the example a normal distribution function is created
+with a mean of 10 and a standard deviation of 5. Then
+the cumulative probability of the value 12 is calculated for this
+specific distribution.
+
+[source,text]
+----
+let(a=normalDistribution(10, 5),
+    b=cumulativeProbability(a, 12))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "b": 0.6554217416103242
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+Below is an example of a cumulative probability calculation
+using an empirical distribution.
+
+In the example an empirical distribution is created from a random
+sample taken from the *price_f* field.
+
+The cumulative probability of the value .75 is then calculated.
+The *price_f* field in this example was generated using a
+uniform real distribution between 0 and 1, so the output of the
+ `cumulativeProbability` function is very close to .75.
+
+[source,text]
+----
+let(a=random(collection1, q="*:*", rows="30000", fl="price_f"),
+    b=col(a, price_f),
+    c=empiricalDistribution(b),
+    d=cumulativeProbability(c, .75))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "b": 0.7554217416103242
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+=== Probability
+
+The `probability` function can be used with any discrete
+distribution function to compute the probability of a
+discrete value.
+
+Below is an example which calculates the probability
+of a discrete value within a Poisson distribution.
+
+In the example a Poisson distribution function is created
+with a mean of 100. Then the
+probability of encountering a sample of the discrete value 101 is calculated for this
+specific distribution.
+
+[source,text]
+----
+let(a=poissonDistribution(100),
+    b=probability(a, 101))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "b": 0.039466333474403106
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+Below is an example of a probability calculation
+using an enumerated distribution.
+
+In the example an enumerated distribution is created from a random
+sample taken from the *day_i* field, which was created
+using a uniform integer distribution between 0 and 30.
+
+The probability of the discrete value 10 is then calculated.
+
+[source,text]
+----
+let(a=random(collection1, q="*:*", rows="30000", fl="day_i"),
+    b=col(a, day_i),
+    c=enumeratedDistribution(b),
+    d=probability(c, 10))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "d": 0.03356666666666666
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 488
+      }
+    ]
+  }
+}
+----
+
+=== Sampling
+
+All probability distributions support sampling. The `sample`
+function returns 1 or more random samples from a probability
+distribution.
+
+Below is an example drawing a single sample from
+a normal distribution.
+
+[source,text]
+----
+let(a=normalDistribution(10, 5),
+    b=sample(a))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "b": 11.24578055004963
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 0
+      }
+    ]
+  }
+}
+----
+
+Below is an example drawing 10 samples from a normal
+distribution.
+
+[source,text]
+----
+let(a=normalDistribution(10, 5),
+    b=sample(a, 10))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "b": [
+          10.18444709339441,
+          9.466947971749377,
+          1.2420697166234458,
+          11.074501226984806,
+          7.659629052136225,
+          0.4440887839190708,
+          13.710925254778786,
+          2.089566359480239,
+          0.7907293097654424,
+          2.8184587681006734
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 3
+      }
+    ]
+  }
+}
+----
+
+=== Multivariate Normal Distribution
+
+The multivariate normal distribution is a generalization of the
+univariate normal distribution to higher dimensions.
+
+The multivariate normal distribution models two or more random
+variables that are normally distributed. The relationship between
+the variables is defined by a covariance matrix.
+
+==== Sampling
+
+The `sample` function can be used to draw samples
+from a multivariate normal distribution in much the same
+way as a univariate normal distribution.
+The difference is that each sample will be an array containing a sample
+drawn from each of the underlying normal distributions.
+If multiple samples are drawn, the `sample` function returns a matrix with a
+sample in each row. Over the long term the columns of the sample
+matrix will conform to the covariance matrix used to parametrize the
+multivariate normal distribution.
+
+The example below demonstrates how to initialize and draw samples
+from a multivariate normal distribution.
+
+In this example 5000 random samples are selected from a collection
+of log records. Each sample contains
+the fields *filesize_d* and *response_d*. The values of both fields conform
+to a normal distribution.
+
+Both fields are then vectorized. The *filesize_d* vector is stored in
+variable *b* and the *response_d* variable is stored in variable *c*.
+
+An array is created that contains the *means* of the two vectorized fields.
+
+Then both vectors are added to a matrix which is transposed. This creates
+an *observation* matrix where each row contains one observation of
+*filesize_d* and *response_d*. A covariance matrix is then created from the columns of
+the observation matrix with the
+`cov` function. The covariance matrix describes the covariance between
+*filesize_d* and *response_d*.
+
+The `multivariateNormalDistribution` function is then called with the
+array of means for the two fields and the covariance matrix. The model for the
+multivariate normal distribution is assigned to variable *g*.
+
+Finally five samples are drawn from the multivariate normal distribution. The samples
+are returned as a matrix, with each row representing one sample. There are two
+columns in the matrix. The first column contains samples for *filesize_d* and the second
+column contains samples for *response_d*. Over the long term the covariance between
+the columns will conform to the covariance matrix used to instantiate the
+multivariate normal distribution.
+
+[source,text]
+----
+let(a=random(collection2, q="*:*", rows="5000", fl="filesize_d, response_d"),
+    b=col(a, filesize_d),
+    c=col(a, response_d),
+    d=array(mean(b), mean(c)),
+    e=transpose(matrix(b, c)),
+    f=cov(e),
+    g=multiVariateNormalDistribution(d, f),
+    h=sample(g, 5))
+----
+
+When this expression is sent to the /stream handler it responds with:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "h": [
+          [
+            41974.85669321393,
+            779.4097049705296
+          ],
+          [
+            42869.19876441414,
+            834.2599296790783
+          ],
+          [
+            38556.30444839889,
+            720.3683470060988
+          ],
+          [
+            37689.31290928216,
+            686.5549428100018
+          ],
+          [
+            40564.74398214547,
+            769.9328090774
+          ]
+        ]
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 162
+      }
+    ]
+  }
+}
+----
+


[3/3] lucene-solr:master: SOLR-11947: Rollback inadvertent code change during documentation updates.

Posted by jb...@apache.org.
SOLR-11947: Rollback inadvertent code change during documentation updates.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e69c614c
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e69c614c
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e69c614c

Branch: refs/heads/master
Commit: e69c614cf6419673d159d0efbad3ba4675f1772b
Parents: 1ed4e22
Author: Joel Bernstein <jb...@apache.org>
Authored: Mon Mar 26 14:48:33 2018 -0400
Committer: Joel Bernstein <jb...@apache.org>
Committed: Mon Mar 26 15:05:07 2018 -0400

----------------------------------------------------------------------
 .../solrj/io/eval/FieldValueEvaluator.java      | 36 +++++++++-----------
 1 file changed, 17 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e69c614c/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/FieldValueEvaluator.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/FieldValueEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/FieldValueEvaluator.java
index a12a74e..3086fb4 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/FieldValueEvaluator.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/FieldValueEvaluator.java
@@ -29,27 +29,21 @@ import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
 
 public class FieldValueEvaluator extends SourceEvaluator {
   private static final long serialVersionUID = 1L;
-  
+
   private String fieldName;
-  private boolean literal;
-  
+
   public FieldValueEvaluator(String fieldName) {
-    if(fieldName.startsWith("\"") && fieldName.endsWith("\"") && fieldName.length() > 1){
+    if(fieldName.startsWith("'") && fieldName.endsWith("'") && fieldName.length() > 1){
       fieldName = fieldName.substring(1, fieldName.length() - 1);
-      literal = true;
     }
-    
+
     this.fieldName = fieldName;
   }
-  
+
   @Override
   public Object evaluate(Tuple tuple) throws IOException {
-    if(literal) {
-      return fieldName;
-    }
-
     Object value = tuple.get(fieldName);
-    
+
     // This is somewhat radical.
     // Here, we allow for the use of the context to provide alternative values
     // when they are not available in the provided tuple. This means that all
@@ -57,14 +51,14 @@ public class FieldValueEvaluator extends SourceEvaluator {
     // can even evaluate over fields from both of them in the same evaluation
     if(null == value && null != getStreamContext()){
       value = getStreamContext().getLets().get(fieldName);
-      
+
       // If what's contained in the context is itself an evaluator then
       // we need to evaluate it
       if(value instanceof StreamEvaluator){
         value = ((StreamEvaluator)value).evaluate(tuple);
       }
     }
-    
+
     // if we have an array then convert to an ArrayList
     // if we have an iterable that is not a list then convert to ArrayList
     // lists are good to go
@@ -90,9 +84,13 @@ public class FieldValueEvaluator extends SourceEvaluator {
       }
     }
 
+    if(value == null) {
+      return fieldName;
+    }
+
     return value;
   }
-  
+
   @Override
   public StreamExpressionParameter toExpression(StreamFactory factory) throws IOException {
     return new StreamExpressionValue(fieldName);
@@ -101,9 +99,9 @@ public class FieldValueEvaluator extends SourceEvaluator {
   @Override
   public Explanation toExplanation(StreamFactory factory) throws IOException {
     return new Explanation(nodeId.toString())
-      .withExpressionType(ExpressionType.EVALUATOR)
-      .withImplementingClass(getClass().getName())
-      .withExpression(toExpression(factory).toString());
+        .withExpressionType(ExpressionType.EVALUATOR)
+        .withImplementingClass(getClass().getName())
+        .withExpression(toExpression(factory).toString());
   }
 
-}
+}
\ No newline at end of file