You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ct...@apache.org on 2017/05/12 14:35:31 UTC
[21/37] lucene-solr:branch_6_6: squash merge jira/solr-10290 into master

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c2aab8/solr/solr-ref-guide/src/fonts/glyphicons/glyphicons-halflings-regular.ttf
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/fonts/glyphicons/glyphicons-halflings-regular.ttf b/solr/solr-ref-guide/src/fonts/glyphicons/glyphicons-halflings-regular.ttf
new file mode 100755
index 0000000..1413fc6
Binary files /dev/null and b/solr/solr-ref-guide/src/fonts/glyphicons/glyphicons-halflings-regular.ttf differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c2aab8/solr/solr-ref-guide/src/fonts/glyphicons/glyphicons-halflings-regular.woff
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/fonts/glyphicons/glyphicons-halflings-regular.woff b/solr/solr-ref-guide/src/fonts/glyphicons/glyphicons-halflings-regular.woff
new file mode 100755
index 0000000..9e61285
Binary files /dev/null and b/solr/solr-ref-guide/src/fonts/glyphicons/glyphicons-halflings-regular.woff differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c2aab8/solr/solr-ref-guide/src/fonts/glyphicons/glyphicons-halflings-regular.woff2
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/fonts/glyphicons/glyphicons-halflings-regular.woff2 b/solr/solr-ref-guide/src/fonts/glyphicons/glyphicons-halflings-regular.woff2
new file mode 100755
index 0000000..64539b5
Binary files /dev/null and b/solr/solr-ref-guide/src/fonts/glyphicons/glyphicons-halflings-regular.woff2 differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c2aab8/solr/solr-ref-guide/src/fonts/mplus1mn/mplus1mn-bold-ascii.ttf
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/fonts/mplus1mn/mplus1mn-bold-ascii.ttf b/solr/solr-ref-guide/src/fonts/mplus1mn/mplus1mn-bold-ascii.ttf
new file mode 100644
index 0000000..726bcc4
Binary files /dev/null and b/solr/solr-ref-guide/src/fonts/mplus1mn/mplus1mn-bold-ascii.ttf differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c2aab8/solr/solr-ref-guide/src/fonts/mplus1mn/mplus1mn-bold_italic-ascii.ttf
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/fonts/mplus1mn/mplus1mn-bold_italic-ascii.ttf b/solr/solr-ref-guide/src/fonts/mplus1mn/mplus1mn-bold_italic-ascii.ttf
new file mode 100644
index 0000000..c91d944
Binary files /dev/null and b/solr/solr-ref-guide/src/fonts/mplus1mn/mplus1mn-bold_italic-ascii.ttf differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c2aab8/solr/solr-ref-guide/src/fonts/mplus1mn/mplus1mn-italic-ascii.ttf
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/fonts/mplus1mn/mplus1mn-italic-ascii.ttf b/solr/solr-ref-guide/src/fonts/mplus1mn/mplus1mn-italic-ascii.ttf
new file mode 100644
index 0000000..77c1684
Binary files /dev/null and b/solr/solr-ref-guide/src/fonts/mplus1mn/mplus1mn-italic-ascii.ttf differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c2aab8/solr/solr-ref-guide/src/fonts/mplus1mn/mplus1mn-regular-ascii-conums.ttf
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/fonts/mplus1mn/mplus1mn-regular-ascii-conums.ttf b/solr/solr-ref-guide/src/fonts/mplus1mn/mplus1mn-regular-ascii-conums.ttf
new file mode 100644
index 0000000..5645bbe
Binary files /dev/null and b/solr/solr-ref-guide/src/fonts/mplus1mn/mplus1mn-regular-ascii-conums.ttf differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c2aab8/solr/solr-ref-guide/src/fonts/mplus1p-regular-fallback.ttf
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/fonts/mplus1p-regular-fallback.ttf b/solr/solr-ref-guide/src/fonts/mplus1p-regular-fallback.ttf
new file mode 100644
index 0000000..5251e5c
Binary files /dev/null and b/solr/solr-ref-guide/src/fonts/mplus1p-regular-fallback.ttf differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c2aab8/solr/solr-ref-guide/src/format-of-solr-xml.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/format-of-solr-xml.adoc b/solr/solr-ref-guide/src/format-of-solr-xml.adoc
new file mode 100644
index 0000000..6483ce1
--- /dev/null
+++ b/solr/solr-ref-guide/src/format-of-solr-xml.adoc
@@ -0,0 +1,151 @@
+= Format of solr.xml
+:page-shortname: format-of-solr-xml
+:page-permalink: format-of-solr-xml.html
+
+The `solr.xml` file defines some global configuration options that apply to all or many cores.
+
+This section will describe the default `solr.xml` file included with Solr and how to modify it for your needs. For details on how to configure `core.properties`, see the section <<defining-core-properties.adoc#defining-core-properties,Defining core.properties>>.
+
+[[Formatofsolr.xml-Definingsolr.xml]]
+== Defining solr.xml
+
+You can find `solr.xml` in your Solr Home directory or in Zookeeper. The default `solr.xml` file looks like this:
+
+[source,xml]
+----
+<solr>
+
+  <solrcloud>
+    <str name="host">${host:}</str>
+    <int name="hostPort">${jetty.port:8983}</int>
+    <str name="hostContext">${hostContext:solr}</str>
+    <int name="zkClientTimeout">${zkClientTimeout:15000}</int>
+    <bool name="genericCoreNodeNames">${genericCoreNodeNames:true}</bool>
+  </solrcloud>
+
+  <shardHandlerFactory name="shardHandlerFactory"
+    class="HttpShardHandlerFactory">
+    <int name="socketTimeout">${socketTimeout:0}</int>
+    <int name="connTimeout">${connTimeout:0}</int>
+  </shardHandlerFactory>
+
+</solr>
+----
+
+As you can see, the discovery Solr configuration is "SolrCloud friendly". However, the presence of the `<solrcloud>` element does _not_ mean that the Solr instance is running in SolrCloud mode. Unless the `-DzkHost` or `-DzkRun` are specified at startup time, this section is ignored.
+
+[[Formatofsolr.xml-Solr.xmlParameters]]
+=== Solr.xml Parameters
+
+==== The `<solr>` Element
+
+There are no attributes that you can specify in the `<solr>` tag, which is the root element of `solr.xml`. The tables below list the child nodes of each XML element in `solr.xml`.
+
+// TODO: Change column width to %autowidth.spread when https://github.com/asciidoctor/asciidoctor-pdf/issues/599 is fixed
+
+[cols="30,70",options="header"]
+|===
+|Node |Description
+|`adminHandler` |If used, this attribute should be set to the FQN (Fully qualified name) of a class that inherits from CoreAdminHandler. For example, `<str name="adminHandler">com.myorg.MyAdminHandler</str>` would configure the custom admin handler (MyAdminHandler) to handle admin requests. If this attribute isn't set, Solr uses the default admin handler, `org.apache.solr.handler.admin.CoreAdminHandler`. For more information on this parameter, see the Solr Wiki at http://wiki.apache.org/solr/CoreAdmin#cores.
+|`collectionsHandler` |As above, for custom CollectionsHandler implementations.
+| `infoHandler` |As above, for custom InfoHandler implementations.
+|`coreLoadThreads` |Specifies the number of threads that will be assigned to load cores in parallel.
+|`coreRootDirectory` |The root of the core discovery tree, defaults to SOLR_HOME.
+|`managementPath` |Currently non-operational.
+|`sharedLib` |Specifies the path to a common library directory that will be shared across all cores. Any JAR files in this directory will be added to the search path for Solr plugins. This path is relative to the top-level container's Solr Home. Custom handlers may be placed in this directory
+|`shareSchema` |This attribute, when set to true, ensures that the multiple cores pointing to the same Schema resource file will be referring to the same IndexSchema Object. Sharing the IndexSchema Object makes loading the core faster. If you use this feature, make sure that no core-specific property is used in your Schema file.
+|`transientCacheSize` |Defines how many cores with transient=true that can be loaded before swapping the least recently used core for a new core.
+|`configSetBaseDir` |The directory under which configsets for solr cores can be found. Defaults to SOLR_HOME/configsets
+|===
+
+==== The `<solrcloud>` element
+
+This element defines several parameters that relate so SolrCloud. This section is ignored unless the solr instance is started with either `-DzkRun` or `-DzkHost`
+
+// TODO: Change column width to %autowidth.spread when https://github.com/asciidoctor/asciidoctor-pdf/issues/599 is fixed
+
+[cols="30,70",options="header"]
+|===
+|Node |Description
+|`distribUpdateConnTimeout` |Used to set the underlying "connTimeout" for intra-cluster updates.
+|`distribUpdateSoTimeout` |Used to set the underlying "socketTimeout" for intra-cluster updates.
+|`host` |The hostname Solr uses to access cores.
+|`hostContext` |The url context path.
+|`hostPort` |The port Solr uses to access cores. In the default `solr.xml` file, this is set to `${jetty.port:8983`}, which will use the Solr port defined in Jetty, and otherwise fall back to 8983.
+|`leaderVoteWait` |When SolrCloud is starting up, how long each Solr node will wait for all known replicas for that shard to be found before assuming that any nodes that haven't reported are down.
+|`leaderConflictResolveWait` |When trying to elect a leader for a shard, this property sets the maximum time a replica will wait to see conflicting state information to be resolved; temporary conflicts in state information can occur when doing rolling restarts, especially when the node hosting the Overseer is restarted. Typically, the default value of 180000 (ms) is sufficient for conflicts to be resolved; you may need to increase this value if you have hundreds or thousands of small collections in SolrCloud.
+|`zkClientTimeout` |A timeout for connection to a ZooKeeper server. It is used with SolrCloud.
+|`zkHost` |In SolrCloud mode, the URL of the ZooKeeper host that Solr should use for cluster state information.
+|`genericCoreNodeNames` |If `TRUE`, node names are not based on the address of the node, but on a generic name that identifies the core. When a different machine takes over serving that core things will be much easier to understand.
+|`zkCredentialsProvider` & ` zkACLProvider` |Optional parameters that can be specified if you are using <<zookeeper-access-control.adoc#zookeeper-access-control,ZooKeeper Access Control>>.
+|===
+
+==== The `<logging>` element
+
+// TODO: Change column width to %autowidth.spread when https://github.com/asciidoctor/asciidoctor-pdf/issues/599 is fixed
+
+[cols="30,70",options="header"]
+|===
+|Node |Description
+|`class` |The class to use for logging. The corresponding JAR file must be available to solr, perhaps through a `<lib>` directive in solrconfig.xml.
+|`enabled` |true/false - whether to enable logging or not.
+|===
+
+===== The `<logging><watcher>` element
+
+// TODO: Change column width to %autowidth.spread when https://github.com/asciidoctor/asciidoctor-pdf/issues/599 is fixed
+
+[cols="30,70",options="header"]
+|===
+|Node |Description
+|`size` |The number of log events that are buffered.
+|`threshold` |The logging level above which your particular logging implementation will record. For example when using log4j one might specify DEBUG, WARN, INFO, etc.
+|===
+
+==== The `<shardHandlerFactory>` element
+
+Custom shard handlers can be defined in `solr.xml` if you wish to create a custom shard handler.
+
+[source,xml]
+----
+<shardHandlerFactory name="ShardHandlerFactory" class="qualified.class.name">
+----
+
+Since this is a custom shard handler, sub-elements are specific to the implementation. The default and only shard handler provided by Solr is the HttpShardHandlerFactory in which case, the following sub-elements can be specified:
+
+// TODO: Change column width to %autowidth.spread when https://github.com/asciidoctor/asciidoctor-pdf/issues/599 is fixed
+
+[cols="30,70",options="header"]
+|===
+|Node |Description
+|`socketTimeout` |The read timeout for intra-cluster query and administrative requests. The default is the same as the distribUpdateSoTimeout specified in the solrcloud section.
+|`connTimeout` |The connection timeout for intra-cluster query and administrative requests. Defaults to the distribUpdateConnTimeout specified in the solrcloud section
+|`urlScheme` |URL scheme to be used in distributed search
+|`maxConnectionsPerHost` |Maximum connections allowed per host. Defaults to 20
+|`maxConnections` |Maximum total connections allowed. Defaults to 10000
+|`corePoolSize` |The initial core size of the threadpool servicing requests. Default is 0.
+|`maximumPoolSize` |The maximum size of the threadpool servicing requests. Default is unlimited.
+|`maxThreadIdleTime` |The amount of time in seconds that idle threads persist for in the queue, before being killed. Default is 5 seconds.
+|`sizeOfQueue` |If the threadpool uses a backing queue, what is its maximum size to use direct handoff. Default is to use a SynchronousQueue.
+|`fairnessPolicy` |A boolean to configure if the threadpool favours fairness over throughput. Default is false to favor throughput.
+|===
+
+[[Formatofsolr.xml-SubstitutingJVMSystemPropertiesinsolr.xml]]
+== Substituting JVM System Properties in solr.xml
+
+Solr supports variable substitution of JVM system property values in `solr.xml`, which allows runtime specification of various configuration options. The syntax is `${propertyname[:option default value]`}. This allows defining a default that can be overridden when Solr is launched. If a default value is not specified, then the property must be specified at runtime or the `solr.xml` file will generate an error when parsed.
+
+Any JVM system properties usually specified using the -D flag when starting the JVM, can be used as variables in the `solr.xml` file.
+
+For example, in the `solr.xml` file shown below, the `socketTimeout` and `connTimeout` values are each set to "0". However, if you start Solr using '`bin/solr -DsocketTimeout=1000`', the `socketTimeout` option of the `HttpShardHandlerFactory` to be overridden using a value of 1000ms, while the `connTimeout` option will continue to use the default property value of "0".
+
+[source,xml]
+----
+<solr>
+  <shardHandlerFactory name="shardHandlerFactory"
+                       class="HttpShardHandlerFactory">
+    <int name="socketTimeout">${socketTimeout:0}</int>
+    <int name="connTimeout">${connTimeout:0}</int>
+  </shardHandlerFactory>
+</solr>
+----

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c2aab8/solr/solr-ref-guide/src/function-queries.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/function-queries.adoc b/solr/solr-ref-guide/src/function-queries.adoc
new file mode 100644
index 0000000..99a704a
--- /dev/null
+++ b/solr/solr-ref-guide/src/function-queries.adoc
@@ -0,0 +1,252 @@
+= Function Queries
+:page-shortname: function-queries
+:page-permalink: function-queries.html
+
+Function queries enable you to generate a relevancy score using the actual value of one or more numeric fields.
+
+Function queries are supported by the <<the-dismax-query-parser.adoc#the-dismax-query-parser,DisMax>>, <<the-extended-dismax-query-parser.adoc#the-extended-dismax-query-parser,Extended DisMax>>, and <<the-standard-query-parser.adoc#the-standard-query-parser,standard>> query parsers.
+
+Function queries use _functions_. The functions can be a constant (numeric or string literal), a field, another function or a parameter substitution argument. You can use these functions to modify the ranking of results for users. These could be used to change the ranking of results based on a user's location, or some other calculation.
+
+[[FunctionQueries-UsingFunctionQuery]]
+== Using Function Query
+
+Functions must be expressed as function calls (for example, `sum(a,b)` instead of simply `a+b`).
+
+There are several ways of using function queries in a Solr query:
+
+* Via an explicit QParser that expects function arguments, such <<other-parsers.adoc#OtherParsers-FunctionQueryParser,`func`>> or <<other-parsers.adoc#OtherParsers-FunctionRangeQueryParser,`frange`>> . For example:
++
+[source,text]
+----
+q={!func}div(popularity,price)&fq={!frange l=1000}customer_ratings
+----
+* In a Sort expression. For example:
++
+[source,text]
+----
+sort=div(popularity,price) desc, score desc
+----
+* Add the results of functions as pseudo-fields to documents in query results. For instance, for:
++
+[source,text]
+----
+&fl=sum(x, y),id,a,b,c,score
+----
++
+the output would be:
++
+[source,xml]
+----
+...
+<str name="id">foo</str>
+<float name="sum(x,y)">40</float>
+<float name="score">0.343</float>
+...
+----
+* Use in a parameter that is explicitly for specifying functions, such as the EDisMax query parser's <<the-extended-dismax-query-parser.adoc#the-extended-dismax-query-parser,`boost`>> param, or DisMax query parser's <<the-dismax-query-parser.adoc#TheDisMaxQueryParser-Thebf_BoostFunctions_Parameter,`bf` (boost function) parameter>>. (Note that the `bf` parameter actually takes a list of function queries separated by white space and each with an optional boost. Make sure you eliminate any internal white space in single function queries when using `bf`). For example:
++
+[source,text]
+----
+q=dismax&bf="ord(popularity)^0.5 recip(rord(price),1,1000,1000)^0.3"
+----
+* Introduce a function query inline in the lucene QParser with the `\_val_` keyword. For example:
++
+[source,text]
+----
+q=_val_:mynumericfield _val_:"recip(rord(myfield),1,2,3)"
+----
+
+Only functions with fast random access are recommended.
+
+[[FunctionQueries-AvailableFunctions]]
+== Available Functions
+
+The table below summarizes the functions available for function queries.
+
+// TODO: Change column width to %autowidth.spread when https://github.com/asciidoctor/asciidoctor-pdf/issues/599 is fixed
+
+[cols="20,40,40",options="header"]
+|===
+|Function |Description |Syntax Examples
+|abs |Returns the absolute value of the specified value or function. |`abs(x)` `abs(-5)`
+|"constant" |Specifies a floating point constant. |`1.5`
+|def |`def` is short for default. Returns the value of field "field", or if the field does not exist, returns the default value specified. and yields the first value where `exists()==true`.) |`def(rating,5):` This `def()` function returns the rating, or if no rating specified in the doc, returns 5 `def(myfield, 1.0):` equivalent to `if(exists(myfield),myfield,1.0)`
+|div |Divides one value or function by another. div(x,y) divides x by y. |`div(1,y)` `div(sum(x,100),max(y,1))`
+|dist |Return the distance between two vectors (points) in an n-dimensional space. Takes in the power, plus two or more ValueSource instances and calculates the distances between the two vectors. Each ValueSource must be a number. There must be an even number of ValueSource instances passed in and the method assumes that the first half represent the first vector and the second half represent the second vector. |`dist(2, x, y, 0, 0):` calculates the Euclidean distance between (0,0) and (x,y) for each document `dist(1, x, y, 0, 0)`: calculates the Manhattan (taxicab) distance between (0,0) and (x,y) for each document `dist(2, x,y,z,0,0,0):` Euclidean distance between (0,0,0) and (x,y,z) for each document. `dist(1,x,y,z,e,f,g)`: Manhattan distance between (x,y,z) and (e,f,g) where each letter is a field name
+|docfreq(field,val) |Returns the number of documents that contain the term in the field. This is a constant (the same value for all documents in the index). You can quote the term if it's more complex, or do parameter substitution for the term value. |`docfreq(text,'solr')` `...&defType=func` `&q=docfreq(text,$myterm)&myterm=solr`
+|field[[FunctionQueries-field]] a|
+Returns the numeric docValues or indexed value of the field with the specified name. In its simplest (single argument) form, this function can only be used on single valued fields, and can be called using the name of the field as a string, or for most conventional field names simply use the field name by itself with out using the `field(...)` syntax.
+
+When using docValues, an optional 2nd argument can be specified to select the "`min"` or "```max```" value of multivalued fields.
+
+0 is returned for documents without a value in the field.
+
+ a|
+These 3 examples are all equivalent:
+
+* `myFloatFieldName`
+* `field(myFloatFieldName)`
+* `field("myFloatFieldName")`
+
+The last form is convinient when your field name is atypical:
+
+* `field("my complex float fieldName")`
+
+For multivalued docValues fields:
+
+* `field(myMultiValuedFloatField,min)`
+* `field(myMultiValuedFloatField,max)`
+
+|hsin |The Haversine distance calculates the distance between two points on a sphere when traveling along the sphere. The values must be in radians. `hsin` also take a Boolean argument to specify whether the function should convert its output to radians. |`hsin(2, true, x, y, 0, 0)`
+|idf |Inverse document frequency; a measure of whether the term is common or rare across all documents. Obtained by dividing the total number of documents by the number of documents containing the term, and then taking the logarithm of that quotient. See also `tf`. |`idf(fieldName,'solr')`: measures the inverse of the frequency of the occurrence of the term `'solr'` in` fieldName`.
+|if a|
+Enables conditional function queries. In `if(test,value1,value2)`:
+
+* `test` is or refers to a logical value or expression that returns a logical value (TRUE or FALSE).
+* `value1` is the value that is returned by the function if `test` yields TRUE.
+* `value2` is the value that is returned by the function if `test` yields FALSE.
+
+An expression can be any function which outputs boolean values, or even functions returning numeric values, in which case value 0 will be interpreted as false, or strings, in which case empty string is interpreted as false.
+
+ |`if(termfreq` `(cat,'electronics'),` `popularity,42)` : This function checks each document for the to see if it contains the term "```electronics```" in the `cat` field. If it does, then the value of the `popularity` field is returned, otherwise the value of `42` is returned.
+|linear |Implements `m*x+c` where `m` and `c` are constants and `x` is an arbitrary function. This is equivalent to `sum(product(m,x),c)`, but slightly more efficient as it is implemented as a single function. |`linear(x,m,c)` `linear(x,2,4)` returns `2*x+4`
+|log |Returns the log base 10 of the specified function. a|
+`log(x)`
+
+`log(sum(x,100))`
+
+|map |Maps any values of an input function x that fall within min and max inclusive to the specified target. The arguments min and max must be constants. The arguments `target` and `default` can be constants or functions. If the value of x does not fall between min and max, then either the value of x is returned, or a default value is returned if specified as a 5th argument. a|
+`map(x,min,max,target)` `map(x,0,0,1)` - changes any values of 0 to 1. This can be useful in handling default 0 values.
+
+`map(x,min,max,target,default)` `map(x,0,100,1,-1)` - changes any values between `0` and `100` to `1`, and all other values to` -1`.
+
+`map(x,0,100,` `sum(x,599),` `docfreq(text,solr))` - changes any values between `0` and `100` to x+599, and all other values to frequency of the term 'solr' in the field text.
+
+|max a|
+Returns the maximum numeric value of multiple nested functions or constants, which are specified as arguments: `max(x,y,...)`. The max function can also be useful for "bottoming out" another function or field at some specified constant.
+
+(Use the `field(myfield,max)` syntax for <<FunctionQueries-field,selecting the maximum value of a single multivalued field>>)
+
+ |`max(myfield,myotherfield,0)`
+|maxdoc |Returns the number of documents in the index, including those that are marked as deleted but have not yet been purged. This is a constant (the same value for all documents in the index). |`maxdoc()`
+|min a|
+Returns the minimum numeric value of multiple nested functions of constants, which are specified as arguments: `min(x,y,...)`. The min function can also be useful for providing an "upper bound" on a function using a constant.
+
+(Use the `field(myfield,min)` <<FunctionQueries-field,syntax for selecting the minimum value of a single multivalued field>>)
+
+ |`min(myfield,myotherfield,0)`
+|ms a|
+Returns milliseconds of difference between its arguments. Dates are relative to the Unix or POSIX time epoch, midnight, January 1, 1970 UTC. Arguments may be the name of an indexed `TrieDateField`, or date math based on a <<working-with-dates.adoc#working-with-dates,constant date or `NOW`>>.
+
+* `ms()`: Equivalent to `ms(NOW)`, number of milliseconds since the epoch.
+* `ms(a):` Returns the number of milliseconds since the epoch that the argument represents.
+* `ms(a,b)` : Returns the number of milliseconds that b occurs before a (that is, a - b)
+
+ |`ms(NOW/DAY)` `ms(2000-01-01T00:00:00Z)` `ms(mydatefield)` `ms(NOW,mydatefield)` `ms(mydatefield,` `2000-01-01T00:00:00Z)` `ms(datefield1,` `datefield2)`
+|norm(_field_) |Returns the "norm" stored in the index for the specified field. This is the product of the index time boost and the length normalization factor, according to the {lucene-javadocs}/core/org/apache/lucene/search/similarities/Similarity.html[Similarity] for the field. |`norm(fieldName)`
+|numdocs |Returns the number of documents in the index, not including those that are marked as deleted but have not yet been purged. This is a constant (the same value for all documents in the index). |`numdocs()`
+|ord a|
+Returns the ordinal of the indexed field value within the indexed list of terms for that field in Lucene index order (lexicographically ordered by unicode value), starting at 1. In other words, for a given field, all values are ordered lexicographically; this function then returns the offset of a particular value in that ordering. The field must have a maximum of one value per document (not multi-valued). 0 is returned for documents without a value in the field.
+
+[IMPORTANT]
+====
+`ord()` depends on the position in an index and can change when other documents are inserted or deleted.
+====
+
+See also `rord` below.
+
+ |`ord(myIndexedField)` Example: If there were only three values ("apple","banana","pear") for a particular field X, then: `ord(X) `would be 1 for documents containing "apple", 2 for documnts containing "banana", etc...
+|payload a|
+Returns the float value computed from the decoded payloads of the term specified. The return value is computed using the `min`, `max`, or `average` of the decoded payloads. A special `first` function can be used instead of the others, to short-circuit term enumeration and return only the decoded payload of the first term. The field specified must have float or integer payload encoding capability (via `DelimitedPayloadTokenFilter` or `NumericPayloadTokenFilter`). If no payload is found for the term, the default value is returned.
+
+* `payload(field_name,term)`: default value is 0.0, `average` function is used.
+* `payload(field_name,term,default_value)`: default value can be a constant, field name, or another float returning function. `average` function used.
+* `payload(field_name,term,default_value,function)`: function values can be `min`, `max`, `average`, or `first`. |`payload(payloaded_field_dpf,term,0.0,first)`
+
+|pow |Raises the specified base to the specified power. `pow(x,y)` raises x to the power of y. |`pow(x,y)` `pow(x,log(y))` `pow(x,0.5):` the same as `sqrt`
+|product |Returns the product of multiple values or functions, which are specified in a comma-separated list. `mul(...)` may also be used as an alias for this function. |`product(x,y,...)` `product(x,2)` `product(x,y)mul(x,y)`
+|query |Returns the score for the given subquery, or the default value for documents not matching the query. Any type of subquery is supported through either parameter de-referencing `$otherparam` or direct specification of the query string in the <<local-parameters-in-queries.adoc#local-parameters-in-queries,Local Parameters>> through the `v` key. |`query(subquery, default)` `q=product` `(popularity,` ` query({!dismax v='solr rocks'})`: returns the product of the popularity and the score of the DisMax query. `q=product` `(popularity,` ` query($qq))&qq={!dismax}solr rocks`: equivalent to the previous query, using parameter de-referencing. `q=product` `(popularity,` ` query($qq,0.1))` `&qq={!dismax}` `solr rocks`: specifies a default score of 0.1 for documents that don't match the DisMax query.
+|recip a|
+Performs a reciprocal function with `recip(x,m,a,b)` implementing `a/(m*x+b)` where `m,a,b` are constants, and `x` is any arbitrarily complex function.
+
+When a and b are equal, and x>=0, this function has a maximum value of 1 that drops as x increases. Increasing the value of a and b together results in a movement of the entire function to a flatter part of the curve. These properties can make this an ideal function for boosting more recent documents when x is `rord(datefield)`.
+
+ |`recip(myfield,m,a,b)` `recip(rord` `(creationDate),` `1,1000,1000)`
+|rord |Returns the reverse ordering of that returned by `ord`. |`rord(myDateField)`
+|scale a|
+Scales values of the function x such that they fall between the specified `minTarget` and `maxTarget` inclusive. The current implementation traverses all of the function values to obtain the min and max, so it can pick the correct scale.
+
+The current implementation cannot distinguish when documents have been deleted or documents that have no value. It uses 0.0 values for these cases. This means that if values are normally all greater than 0.0, one can still end up with 0.0 as the min value to map from. In these cases, an appropriate map() function could be used as a workaround to change 0.0 to a value in the real range, as shown here: scale(map(x,0,0,5),1,2)
+
+ |`scale(x,` `minTarget,` `maxTarget)` `scale(x,1,2)`: scales the values of x such that all values will be between 1 and 2 inclusive.
+|sqedist |The Square Euclidean distance calculates the 2-norm (Euclidean distance) but does not take the square root, thus saving a fairly expensive operation. It is often the case that applications that care about Euclidean distance do not need the actual distance, but instead can use the square of the distance. There must be an even number of ValueSource instances passed in and the method assumes that the first half represent the first vector and the second half represent the second vector. |`sqedist(x_td, y_td, 0, 0)`
+|sqrt |Returns the square root of the specified value or function. |`sqrt(x)sqrt(100)sqrt(sum(x,100))`
+|strdist |Calculate the distance between two strings. Uses the Lucene spell checker `StringDistance` interface and supports all of the implementations available in that package, plus allows applications to plug in their own via Solr's resource loading capabilities. `strdist` takes (string1, string2, distance measure). Possible values for distance measure are: jw: Jaro-Winkler edit: Levenstein or Edit distance ngram: The NGramDistance, if specified, can optionally pass in the ngram size too. Default is 2. FQN: Fully Qualified class Name for an implementation of the StringDistance interface. Must have a no-arg constructor. |`strdist("SOLR",id,edit)`
+|sub |Returns x-y from sub(x,y). |`sub(myfield,myfield2)` `sub(100,` `sqrt(myfield))`
+|sum |Returns the sum of multiple values or functions, which are specified in a comma-separated list. `add(...)` may be used as an alias for this function |`sum(x,y,...) sum(x,1)` `sum(x,y)` `sum(sqrt(x),log(y),z,0.5)add(x,y)`
+|sumtotaltermfreq |Returns the sum of `totaltermfreq` values for all terms in the field in the entire index (i.e., the number of indexed tokens for that field). (Aliases `sumtotaltermfreq` to `sttf`.) |If doc1:(fieldX:A B C) and doc2:(fieldX:A A A A): `docFreq(fieldX:A)` = 2 (A appears in 2 docs) `freq(doc1, fieldX:A)` = 4 (A appears 4 times in doc 2) `totalTermFreq(fieldX:A)` = 5 (A appears 5 times across all docs) `sumTotalTermFreq(fieldX)` = 7 in `fieldX`, there are 5 As, 1 B, 1 C
+|termfreq |Returns the number of times the term appears in the field for that document. |`termfreq(text,'memory')`
+|tf |Term frequency; returns the term frequency factor for the given term, using the {lucene-javadocs}/core/org/apache/lucene/search/similarities/Similarity.html[Similarity] for the field. The `tf-idf` value increases proportionally to the number of times a word appears in the document, but is offset by the frequency of the word in the document, which helps to control for the fact that some words are generally more common than others. See also `idf`. |`tf(text,'solr')`
+|top a|
+Causes the function query argument to derive its values from the top-level IndexReader containing all parts of an index. For example, the ordinal of a value in a single segment will be different from the ordinal of that same value in the complete index.
+
+The `ord()` and `rord()` functions implicitly use `top()`, and hence `ord(foo)` is equivalent to `top(ord(foo))`.
+
+ |
+|totaltermfreq |Returns the number of times the term appears in the field in the entire index. (Aliases `totaltermfreq` to `ttf`.) |`ttf(text,'memory')`
+|===
+
+The following functions are boolean – they return true or false. They are mostly useful as the first argument of the `if` function, and some of these can be combined. If used somewhere else, it will yield a '1' or '0'.
+
+[width="100%",options="header",]
+|===
+|Function |Description |Syntax Examples
+|and |Returns a value of true if and only if all of its operands evaluate to true. |`and(not` `(exists` `(popularity)),` `exists` `(price)):` returns `true` for any document which has a value in the `price` field, but does not have a value in the `popularity` field
+|or |A logical disjunction. |`or(value1,value2):` TRUE if either `value1` or `value2` is true.
+|xor |Logical exclusive disjunction, or one or the other but not both. |`xor(field1,field2)` returns TRUE if either `field1` or `field2` is true; FALSE if both are true.
+|not |The logically negated value of the wrapped function. |`not(exists(author))`: TRUE only when `exists(author)` is false.
+|exists |Returns TRUE if any member of the field exists. |`exists(author)` returns TRUE for any document has a value in the "author" field. `exists(query(price:5.00))` returns TRUE if "price" matches "5.00".
+|gt, gte, lt, lte, eq |5 comparison functions: Greater Than, Greater Than or Equal, Less Than, Less Than or Equal, Equal |`if(lt(ms(mydatefield),315569259747),0.8,1)` translates to this pseudocode: `if mydatefield < 315569259747 then 0.8 else 1`
+|===
+
+[[FunctionQueries-ExampleFunctionQueries]]
+== Example Function Queries
+
+To give you a better understanding of how function queries can be used in Solr, suppose an index stores the dimensions in meters x,y,z of some hypothetical boxes with arbitrary names stored in field `boxname`. Suppose we want to search for box matching name `findbox` but ranked according to volumes of boxes. The query parameters would be:
+
+`q=boxname:findbox _val_:"product(x,y,z)"`
+
+This query will rank the results based on volumes. In order to get the computed volume, you will need to request the `score`, which will contain the resultant volume:
+
+`&fl=*, score`
+
+Suppose that you also have a field storing the weight of the box as `weight`. To sort by the density of the box and return the value of the density in score, you would submit the following query:
+
+[source,text]
+----
+http://localhost:8983/solr/collection_name/select?q=boxname:findbox _val_:"div(weight,product(x,y,z))"&fl=boxname x y z weight score
+----
+
+[[FunctionQueries-SortByFunction]]
+== Sort By Function
+
+You can sort your query results by the output of a function. For example, to sort results by distance, you could enter:
+
+[source,text]
+----
+http://localhost:8983/solr/collection_name/select?q=*:*&sort=dist(2, point1, point2) desc
+----
+
+Sort by function also supports pseudo-fields: fields can be generated dynamically and return results as though it was normal field in the index. For example,
+
+`&fl=id,sum(x, y),score`
+
+would return:
+
+[source,xml]
+----
+<str name="id">foo</str>
+<float name="sum(x,y)">40</float>
+<float name="score">0.343</float>
+----

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c2aab8/solr/solr-ref-guide/src/further-assistance.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/further-assistance.adoc b/solr/solr-ref-guide/src/further-assistance.adoc
new file mode 100644
index 0000000..8cbeedc
--- /dev/null
+++ b/solr/solr-ref-guide/src/further-assistance.adoc
@@ -0,0 +1,7 @@
+= Further Assistance
+:page-shortname: further-assistance
+:page-permalink: further-assistance.html
+
+There is a very active user community around Solr and Lucene. The solr-user mailing list, and #solr IRC channel are both great resources for asking questions.
+
+To view the mailing list archives, subscribe to the list, or join the IRC channel, please see https://lucene.apache.org/solr/community.html.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c2aab8/solr/solr-ref-guide/src/getting-assistance.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/getting-assistance.adoc b/solr/solr-ref-guide/src/getting-assistance.adoc
new file mode 100644
index 0000000..85f9cec
--- /dev/null
+++ b/solr/solr-ref-guide/src/getting-assistance.adoc
@@ -0,0 +1,25 @@
+= Getting Assistance
+:page-shortname: getting-assistance
+:page-permalink: getting-assistance.html
+
+At the bottom of each screen of the Admin UI is a set of links that can be used to get more assistance with configuring and using Solr.
+
+.Assistance icons
+image::images/getting-assistance/Assistance.png[image]
+
+
+These icons include the following links.
+
+// TODO: Change column width to %autowidth.spread when https://github.com/asciidoctor/asciidoctor-pdf/issues/599 is fixed
+
+[cols="25,75",options="header"]
+|===
+|Link |Description
+|Documentation |Navigates to the Apache Solr documentation hosted on https://lucene.apache.org/solr/.
+|Issue Tracker |Navigates to the JIRA issue tracking server for the Apache Solr project. This server resides at https://issues.apache.org/jira/browse/SOLR.
+|IRC Channel |Navigates to Solr's http://en.wikipedia.org/wiki/Internet_Relay_Chat[IRC] live-chat room: http://webchat.freenode.net/?channels=#solr.
+|Community forum |Navigates to the Apache Wiki page which has further information about ways to engage in the Solr User community mailing lists: https://wiki.apache.org/solr/UsingMailingLists.
+|Solr Query Syntax |Navigates to the section <<query-syntax-and-parsing.adoc#query-syntax-and-parsing,Query Syntax and Parsing>> in this Reference Guide.
+|===
+
+These links cannot be modified without editing the `index.html` in the `server/solr/solr-webapp` directory that contains the Admin UI files.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c2aab8/solr/solr-ref-guide/src/getting-started-with-solrcloud.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/getting-started-with-solrcloud.adoc b/solr/solr-ref-guide/src/getting-started-with-solrcloud.adoc
new file mode 100644
index 0000000..3f7eb65
--- /dev/null
+++ b/solr/solr-ref-guide/src/getting-started-with-solrcloud.adoc
@@ -0,0 +1,159 @@
+= Getting Started with SolrCloud
+:page-shortname: getting-started-with-solrcloud
+:page-permalink: getting-started-with-solrcloud.html
+
+SolrCloud is designed to provide a highly available, fault tolerant environment for distributing your indexed content and query requests across multiple servers.
+
+It's a system in which data is organized into multiple pieces, or shards, that can be hosted on multiple machines, with replicas providing redundancy for both scalability and fault tolerance, and a ZooKeeper server that helps manage the overall structure so that both indexing and search requests can be routed properly.
+
+This section explains SolrCloud and its inner workings in detail, but before you dive in, it's best to have an idea of what it is you're trying to accomplish.
+
+This page provides a simple tutorial to start Solr in SolrCloud mode, so you can begin to get a sense for how shards interact with each other during indexing and when serving queries. To that end, we'll use simple examples of configuring SolrCloud on a single machine, which is obviously not a real production environment, which would include several servers or virtual machines. In a real production environment, you'll also use the real machine names instead of "localhost" which we've used here.
+
+In this section you will learn how to start a SolrCloud cluster using startup scripts and a specific configset.
+
+[TIP]
+====
+This tutorial assumes that you're already familiar with the basics of using Solr. If you need a refresher, please see the <<getting-started.adoc#getting-started,Getting Started section>> to get a grounding in Solr concepts. If you load documents as part of that exercise, you should start over with a fresh Solr installation for these SolrCloud tutorials.
+====
+
+[[GettingStartedwithSolrCloud-SolrCloudExample]]
+== SolrCloud Example
+
+[[GettingStartedwithSolrCloud-InteractiveStartup]]
+=== Interactive Startup
+
+The `bin/solr` script makes it easy to get started with SolrCloud as it walks you through the process of launching Solr nodes in cloud mode and adding a collection. To get started, simply do:
+
+[source,bash]
+----
+bin/solr -e cloud
+----
+
+This starts an interactive session to walk you through the steps of setting up a simple SolrCloud cluster with embedded ZooKeeper.
+
+The script starts by asking you how many Solr nodes you want to run in your local cluster, with the default being 2.
+
+[source,plain]
+----
+Welcome to the SolrCloud example!
+
+This interactive session will help you launch a SolrCloud cluster on your local workstation.
+To begin, how many Solr nodes would you like to run in your local cluster? (specify 1-4 nodes) [2]
+----
+
+The script supports starting up to 4 nodes, but we recommend using the default of 2 when starting out. These nodes will each exist on a single machine, but will use different ports to mimic operation on different servers.
+
+Next, the script will prompt you for the port to bind each of the Solr nodes to, such as:
+
+[source,plain]
+----
+ Please enter the port for node1 [8983]
+----
+
+Choose any available port for each node; the default for the first node is 8983 and 7574 for the second node. The script will start each node in order and show you the command it uses to start the server, such as:
+
+[source,bash]
+----
+solr start -cloud -s example/cloud/node1/solr -p 8983
+----
+
+The first node will also start an embedded ZooKeeper server bound to port 9983. The Solr home for the first node is in `example/cloud/node1/solr` as indicated by the `-s` option.
+
+After starting up all nodes in the cluster, the script prompts you for the name of the collection to create:
+
+[source,plain]
+----
+ Please provide a name for your new collection: [gettingstarted]
+----
+
+The suggested default is "gettingstarted" but you might want to choose a name more appropriate for your specific search application.
+
+Next, the script prompts you for the number of shards to distribute the collection across. <<shards-and-indexing-data-in-solrcloud.adoc#shards-and-indexing-data-in-solrcloud,Sharding>> is covered in more detail later on, so if you're unsure, we suggest using the default of 2 so that you can see how a collection is distributed across multiple nodes in a SolrCloud cluster.
+
+Next, the script will prompt you for the number of replicas to create for each shard.  <<shards-and-indexing-data-in-solrcloud.adoc#shards-and-indexing-data-in-solrcloud,Replication>> is covered in more detail later in the guide, so if you're unsure, then use the default of 2 so that you can see how replication is handled in SolrCloud.
+
+Lastly, the script will prompt you for the name of a configuration directory for your collection. You can choose *basic_configs*, *data_driven_schema_configs*, or *sample_techproducts_configs*. The configuration directories are pulled from `server/solr/configsets/` so you can review them beforehand if you wish. The *data_driven_schema_configs* configuration (the default) is useful when you're still designing a schema for your documents and need some flexiblity as you experiment with Solr.
+
+At this point, you should have a new collection created in your local SolrCloud cluster. To verify this, you can run the status command:
+
+[source,bash]
+----
+bin/solr status
+----
+
+If you encounter any errors during this process, check the Solr log files in `example/cloud/node1/logs` and `example/cloud/node2/logs`.
+
+You can see how your collection is deployed across the cluster by visiting the cloud panel in the Solr Admin UI: http://localhost:8983/solr/#/~cloud. Solr also provides a way to perform basic diagnostics for a collection using the healthcheck command:
+
+[source,bash]
+----
+bin/solr healthcheck -c gettingstarted
+----
+
+The healthcheck command gathers basic information about each replica in a collection, such as number of docs, current status (active, down, etc), and address (where the replica lives in the cluster).
+
+Documents can now be added to SolrCloud using the <<post-tool.adoc#post-tool,Post Tool>>.
+
+To stop Solr in SolrCloud mode, you would use the `bin/solr` script and issue the `stop` command, as in:
+
+[source,bash]
+----
+bin/solr stop -all
+----
+
+[[GettingStartedwithSolrCloud-Startingwith-noprompt]]
+=== Starting with -noprompt
+
+You can also get SolrCloud started with all the defaults instead of the interactive session using the following command:
+
+[source,bash]
+----
+bin/solr -e cloud -noprompt
+----
+
+[[GettingStartedwithSolrCloud-RestartingNodes]]
+=== Restarting Nodes
+
+You can restart your SolrCloud nodes using the `bin/solr` script. For instance, to restart node1 running on port 8983 (with an embedded ZooKeeper server), you would do:
+
+[source,bash]
+----
+bin/solr restart -c -p 8983 -s example/cloud/node1/solr
+----
+
+To restart node2 running on port 7574, you can do:
+
+[source,bash]
+----
+bin/solr restart -c -p 7574 -z localhost:9983 -s example/cloud/node2/solr
+----
+
+Notice that you need to specify the ZooKeeper address (`-z localhost:9983`) when starting node2 so that it can join the cluster with node1.
+
+[[GettingStartedwithSolrCloud-Addinganodetoacluster]]
+=== Adding a node to a cluster
+
+Adding a node to an existing cluster is a bit advanced and involves a little more understanding of Solr. Once you startup a SolrCloud cluster using the startup scripts, you can add a new node to it by:
+
+[source,bash]
+----
+mkdir <solr.home for new solr node>
+cp <existing solr.xml path> <new solr.home>
+bin/solr start -cloud -s solr.home/solr -p <port num> -z <zk hosts string>
+----
+
+Notice that the above requires you to create a Solr home directory. You either need to copy `solr.xml` to the `solr_home` directory, or keep in centrally in ZooKeeper `/solr.xml`.
+
+Example (with directory structure) that adds a node to an example started with "bin/solr -e cloud":
+
+[source,bash]
+----
+mkdir -p example/cloud/node3/solr
+cp server/solr/solr.xml example/cloud/node3/solr
+bin/solr start -cloud -s example/cloud/node3/solr -p 8987 -z localhost:9983
+----
+
+The previous command will start another Solr node on port 8987 with Solr home set to `example/cloud/node3/solr`. The new node will write its log files to `example/cloud/node3/logs`.
+
+Once you're comfortable with how the SolrCloud example works, we recommend using the process described in <<taking-solr-to-production.adoc#taking-solr-to-production,Taking Solr to Production>> for setting up SolrCloud nodes in production.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c2aab8/solr/solr-ref-guide/src/getting-started.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/getting-started.adoc b/solr/solr-ref-guide/src/getting-started.adoc
new file mode 100644
index 0000000..3fd7c76
--- /dev/null
+++ b/solr/solr-ref-guide/src/getting-started.adoc
@@ -0,0 +1,27 @@
+= Getting Started
+:page-shortname: getting-started
+:page-permalink: getting-started.html
+:page-children: installing-solr, running-solr, a-quick-overview, a-step-closer, solr-control-script-reference
+
+Solr makes it easy for programmers to develop sophisticated, high-performance search applications with advanced features such as faceting (arranging search results in columns with numerical counts of key terms).
+
+Solr builds on another open source search technology: Lucene, a Java library that provides indexing and search technology, as well as spellchecking, hit highlighting and advanced analysis/tokenization capabilities. Both Solr and Lucene are managed by the Apache Software Foundation (http://www.apache.org/[www.apache.org)].
+
+The Lucene search library currently ranks among the top 15 open source projects and is one of the top 5 Apache projects, with installations at over 4,000 companies. Lucene/Solr downloads have grown nearly ten times over the past three years, with a current run-rate of over 6,000 downloads a day. The Solr search server, which provides application builders a ready-to-use search platform on top of the Lucene search library, is the fastest growing Lucene sub-project. Apache Lucene/Solr offers an attractive alternative to the proprietary licensed search and discovery software vendors.
+
+This section helps you get Solr up and running quickly, and introduces you to the basic Solr architecture and features. It covers the following topics:
+
+<<installing-solr.adoc#installing-solr,Installing Solr>>: A walkthrough of the Solr installation process.
+
+<<running-solr.adoc#running-solr,Running Solr>>: An introduction to running Solr. Includes information on starting up the servers, adding documents, and running queries.
+
+<<a-quick-overview.adoc#a-quick-overview,A Quick Overview>>: A high-level overview of how Solr works.
+
+<<a-step-closer.adoc#a-step-closer,A Step Closer>>: An introduction to Solr's home directory and configuration options.
+
+<<solr-control-script-reference.adoc#solr-control-script-reference,Solr Control Script Reference>>: a complete reference of all of the commands and options available with the bin/solr script.
+
+[TIP]
+====
+Solr includes a Quick Start tutorial which will be helpful if you are just starting out with Solr. You can find it online at http://lucene.apache.org/solr/quickstart.html, or in your Solr installation at `$SOLR_INSTALL_DIR/docs/quickstart.html`.
+====

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c8c2aab8/solr/solr-ref-guide/src/graph-traversal.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/graph-traversal.adoc b/solr/solr-ref-guide/src/graph-traversal.adoc
new file mode 100644
index 0000000..66e55d2
--- /dev/null
+++ b/solr/solr-ref-guide/src/graph-traversal.adoc
@@ -0,0 +1,532 @@
+= Graph Traversal
+:page-shortname: graph-traversal
+:page-permalink: graph-traversal.html
+
+Graph traversal with streaming expressions uses the `gatherNodes` function to perform a breadth-first graph traversal.
+
+The `gatherNodes` function can be combined with the `scoreNodes` function to provide recommendations. `gatherNodes` can also be combined with the wider streaming expression library to perform complex operations on gathered node sets.
+
+`gatherNodes` traversals are distributed within a SolrCloud collection and can span collections.
+
+`gatherNodes` is designed for use cases that involve zooming into a neighborhood in the graph and performing precise traversals to gather node sets and aggregations. In these types of use cases `gatherNodes` will often provide sub-second performance. Some sample use cases are provided later in the document.
+
+[IMPORTANT]
+====
+This document assumes a basic understanding of graph terminology and streaming expressions. You can begin exploring graph traversal concepts with this https://en.wikipedia.org/wiki/Graph_traversal[Wikipedia article]. More details about streaming expressions are available in this Guide, in the section <<streaming-expressions.adoc#streaming-expressions,Streaming Expressions>>.
+====
+
+[[GraphTraversal-BasicSyntax]]
+== Basic Syntax
+
+We'll start with the most basic syntax and slowly build up more complexity. The most basic syntax for `gatherNodes` is:
+
+[source,plain]
+----
+gatherNodes(emails,
+            walk="johndoe@apache.org->from",
+            gather="to")
+----
+
+Let's break down this simple expression.
+
+The first parameter, `emails`, is the collection being traversed. The second parameter, `walk`, maps a hard-coded node ID ("\johndoe@apache.org") to a field in the index (`from`). This will return all the *edges* in the index that have `johndoe@apache.org` in the `from` field.
+
+The `gather` parameter tells the function to gather the values in the `to `field. The values that are gathered are the node IDs emitted by the function.
+
+In the example above the nodes emitted will be all of the people that "johndoe@apache.org" has emailed.
+
+The walk parameter also accepts a list of root node IDs:
+
+[source,plain]
+----
+gatherNodes(emails,
+            walk="johndoe@apache.org, janesmith@apache.org->from",
+            gather="to")
+----
+
+The `gatherNodes` function above finds all the edges with "johndoe@apache.org" or "janesmith@apache.org" in the `from` field and gathers the `to` field.
+
+Like all <<streaming-expressions.adoc#streaming-expressions,Streaming Expressions>>, you can execute a `gatherNodes` expression by sending it to the `/stream` handler. For example:
+
+[source,bash]
+----
+curl --data-urlencode 'expr=gatherNodes(emails,
+                                        walk="johndoe@apache.org, janesmith@apache.org->from",
+                                        gather="to")' http://localhost:8983/solr/emails/stream
+----
+
+The output of this expression would look like this:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "node": "slist@campbell.com",
+        "collection": "emails",
+        "field": "to",
+        "level": 1
+      },
+      {
+        "node": "catherine.pernot@enron.com",
+        "collection": "emails",
+        "field": "to",
+        "level": 1
+      },
+      {
+        "node": "airam.arteaga@enron.com",
+        "collection": "emails",
+        "field": "to",
+        "level": 1
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 44
+      }
+    ]
+  }
+}
+----
+
+All of the tuples returned have the `node` field. The `node` field contains the node IDs gathered by the function. The `collection`, `field`, and `level` of the traversal are also included in the output.
+
+Notice that the level is "1" for each tuple in the example. The root nodes are level 0 (in the example above, the root nodes are "johndoe@apache.org, janesmith@apache.org") By default the `gatherNodes` function emits only the _*leaf nodes*_ of the traversal, which is the outer-most node set. To emit the root nodes you can specify the `scatter` parameter:
+
+[source,plain]
+----
+gatherNodes(emails,
+            walk="johndoe@apache.org->from",
+            gather="to",
+            scatter="branches, leaves")
+----
+
+The `scatter` parameter controls whether to emit the _branches_ with the _leaves_. The root nodes are considered "branches" because they are not the outer-most level of the traversal.
+
+When scattering both branches and leaves the output would like this:
+
+[source,json]
+----
+{
+  "result-set": {
+    "docs": [
+      {
+        "node": "johndoe@apache.org",
+        "collection": "emails",
+        "field": "node",
+        "level": 0
+      },
+      {
+        "node": "slist@campbell.com",
+        "collection": "emails",
+        "field": "to",
+        "level": 1
+      },
+      {
+        "node": "catherine.pernot@enron.com",
+        "collection": "emails",
+        "field": "to",
+        "level": 1
+      },
+      {
+        "node": "airam.arteaga@enron.com",
+        "collection": "emails",
+        "field": "to",
+        "level": 1
+      },
+      {
+        "EOF": true,
+        "RESPONSE_TIME": 44
+      }
+    ]
+  }
+}
+----
+
+Now the level 0 root node is included in the output.
+
+[[GraphTraversal-Aggregations]]
+== Aggregations
+
+`gatherNodes` also supports aggregations. For example:
+
+[source,plain]
+----
+gatherNodes(emails,
+            walk="johndoe@apache.org, janesmith@apache.org->from",
+            gather="to",
+            count(*))
+----
+
+The expression above finds the edges with "\johndoe@apache.org" or "\janesmith@apache.org" in the `from` field and gathers the values from the `to` field. It also aggregates the count for each node ID gathered.
+
+A gathered node could have a count of 2 if both "\johndoe@apache.org" and "\janesmith@apache.org" have emailed the same person. Node sets contain a unique set of nodes, so the same person won't appear twice in the node set, but the count will reflect that it appeared twice during the traversal.
+
+Edges are uniqued as part of the traversal so the count will *not* reflect the number of times "\johndoe@apache.org" emailed the same person. For example, personA might have emailed personB 100 times. These edges would get uniqued and only be counted once. But if person personC also emailed personB this would increment the count for personB.
+
+The aggregation functions supported are `count(*)`, `sum(field)`, `min(field)`, `max(field)`, and `avg(field)`. The fields being aggregated should be present in the edges collected during the traversal. Later examples (below) will show aggregations can be a powerful tool for providing recommendations and limiting the scope of traversals.
+
+[[GraphTraversal-NestinggatherNodesfunctions]]
+== Nesting gatherNodes functions
+
+The `gatherNodes` function can be nested to traverse deeper into the graph. For example:
+
+[source,plain]
+----
+gatherNodes(emails,
+            gatherNodes(emails,
+                        walk="johndoe@apache.org->from",
+                        gather="to"),
+            walk="node->from",
+            gather="to")
+----
+
+In the example above the outer `gatherNodes` function operates on the node set collected from the inner `gatherNodes` function.
+
+Notice that the inner `gatherNodes` function behaves exactly as the examples already discussed. But the `walk` parameter of the outer `gatherNodes` function behaves differently.
+
+In the outer `gatherNodes` function the `walk` parameter works with tuples coming from an internal streaming expression. In this scenario the `walk` parameter maps the `node` field to the `from` field. Remember that the node IDs collected from the inner `gatherNodes` expression are placed in the `node` field.
+
+Put more simply, the inner expression gathers all the people that "\johndoe@apache.org" has emailed. We can call this group the "friends of \johndoe@apache.org". The outer expression gathers all the people that the "friends of \johndoe@apache.org" have emailed. This is a basic friends-of-friends traversal.
+
+This construct of nesting `gatherNodes` functions is the basic technique for doing a controlled traversal through the graph.
+
+[[GraphTraversal-CycleDetection]]
+== Cycle Detection
+
+The `gatherNodes` function performs cycle detection across the entire traversal. This ensures that nodes that have already been visited are not traversed again. Cycle detection is important for both limiting the size of traversals and gathering accurate aggregations. Without cycle detection the size of the traversal could grow exponentially with each hop in the traversal. With cycle detection only new nodes encountered are traversed.
+
+Cycle detection *does not* cross collection boundaries. This is because internally the collection name is part of the node ID. For example the node ID "\johndoe@apache.org", is really `emails/johndoe@apache.org`. When traversing to another collection "\johndoe@apache.org" will be traversed.
+
+[[GraphTraversal-FilteringtheTraversal]]
+== Filtering the Traversal
+
+Each level in the traversal can be filtered with a filter query. For example:
+
+[source,plain]
+----
+gatherNodes(emails,
+            walk="johndoe@apache.org->from",
+            fq="body:(solr rocks)",
+            gather="to")
+----
+
+In the example above only emails that match the filter query will be included in the traversal. Any Solr query can be included here. So you can do fun things like <<spatial-search.adoc#spatial-search,geospatial queries>>, apply any of the available <<query-syntax-and-parsing.adoc#query-syntax-and-parsing,query parsers>>, or even write custom query parsers to limit the traversal.
+
+[[GraphTraversal-RootStreams]]
+== Root Streams
+
+Any streaming expression can be used to provide the root nodes for a traversal. For example:
+
+[source,plain]
+----
+gatherNodes(emails,
+            search(emails, q="body:(solr rocks)", fl="to", sort="score desc", rows="20")
+            walk="to->from",
+            gather="to")
+----
+
+The example above provides the root nodes through a search expression. You can also provide arbitrarily complex, nested streaming expressions with joins, etc., to specify the root nodes.
+
+Notice that the `walk` parameter maps a field from the tuples generated by the inner stream. In this case it maps the `to` field from the inner stream to the `from` field.
+
+[[GraphTraversal-SkippingHighFrequencyNodes]]
+== Skipping High Frequency Nodes
+
+It's often desirable to skip traversing high frequency nodes in the graph. This is similar in nature to a search term stop list. The best way to describe this is through an example use case.
+
+Let's say that you want to recommend content for a user based on a collaborative filter. Below is one approach for a simple collaborative filter:
+
+. Find all content userA has read.
+. Find users whose reading list is closest to userA. These are users with similar tastes as userA.
+. Recommend content based on what the users in step 2 have read, that userA has not yet read.
+
+Look closely at step 2. In large graphs, step 2 can lead to a very large traversal. This is because userA may have viewed content that has been viewed by millions of other people. We may want to skip these high frequency nodes for two reasons:
+
+. A large traversal that visit millions of unique nodes is slow and takes a lot of memory because cycle detection is tracked in memory.
+. High frequency nodes are also not useful in determining users with similar tastes. The content that fewer people have viewed provides a more precise recommendation.
+
+The `gatherNodes` function has the `maxDocFreq` param to allow for filtering out high frequency nodes. The sample code below shows steps 1 and 2 of the recommendation:
+
+[source,plain]
+----
+ gatherNodes(logs,
+             search(logs, q="userID:user1", fl="articleID", sort="articleID asc", fq="action:view", qt="/export"),
+             walk="articleID->articleID",
+             gather="userID",
+             fq="action:view",
+             maxDocFreq="10000",
+             count(*)))
+----
+
+In the example above, the inner search expression searches the `logs` collection and returning all the articles viewed by "user1". The outer `gatherNodes` expression takes all the articles emitted from the inner search expression and finds all the records in the logs collection for those articles. It then gathers and aggregates the users that have read the articles. The `maxDocFreq` parameter limits the articles returned to those that appear in no more then 10,000 log records (per shard). This guards against returning articles that have been viewed by millions of users.
+
+[[GraphTraversal-TrackingtheTraversal]]
+== Tracking the Traversal
+
+By default the `gatherNodes` function only tracks enough information to do cycle detection. This provides enough information to output the nodes and aggregations in the graph.
+
+For some use cases, such as graph visualization, we also need to output the edges. Setting `trackTraversal="true"` tells `gatherNodes` to track the connections between nodes, so the edges can be constructed. When `trackTraversal` is enabled a new `ancestors` property will appear with each node. The `ancestors` property contains a list of node IDs that pointed to the node.
+
+Below is a sample `gatherNodes` expression with `trackTraversal` set to true:
+
+[source,plain]
+----
+gatherNodes(emails,
+            gatherNodes(emails,
+                        walk="johndoe@apache.org->from",
+                        gather="to",
+                        trackTraversal="true"),
+            walk="node->from",
+            trackTraversal="true",
+            gather="to")
+----
+
+[[GraphTraversal-Cross-CollectionTraversals]]
+== Cross-Collection Traversals
+
+Nested `gatherNodes` functions can operate on different SolrCloud collections. This allow traversals to "walk" from one collection to another to gather nodes. Cycle detection does not cross collection boundaries, so nodes collected in one collection will be traversed in a different collection. This was done deliberately to support cross-collection traversals. Note that the output from a cross-collection traversal will likely contain duplicate nodes with different collection attributes.
+
+Below is a sample `gatherNodes` expression that traverses from the "emails" collection to the "logs" collection:
+
+[source,plain]
+----
+gatherNodes(logs,
+            gatherNodes(emails,
+                        search(emails, q="body:(solr rocks)", fl="from", sort="score desc", rows="20")
+                        walk="from->from",
+                        gather="to",
+                        scatter="leaves, branches"),
+            walk="node->user",
+            fq="action:edit",
+            gather="contentID")
+----
+
+The example above finds all people who sent emails with a body that contains "solr rocks". It then finds all the people these people have emailed. Then it traverses to the logs collection and gathers all the content IDs that these people have edited.
+
+[[GraphTraversal-CombininggatherNodesWithOtherStreamingExpressions]]
+== Combining gatherNodes With Other Streaming Expressions
+
+The `gatherNodes` function can act as both a stream source and a stream decorator. The connection with the wider stream expression library provides tremendous power and flexibility when performing graph traversals. Here is an example of using the streaming expression library to intersect two friend networks:
+
+[source,plain]
+----
+            intersect(on="node",
+                      sort(by="node asc",
+                           gatherNodes(emails,
+                                       gatherNodes(emails,
+                                                   walk="johndoe@apache.org->from",
+                                                   gather="to"),
+                                       walk="node->from",
+                                       gather="to",
+                                       scatter="branches,leaves")),
+                       sort(by="node asc",
+                            gatherNodes(emails,
+                                        gatherNodes(emails,
+                                                    walk="janedoe@apache.org->from",
+                                                    gather="to"),
+                                        walk="node->from",
+                                        gather="to",
+                                        scatter="branches,leaves")))
+----
+
+The example above gathers two separate friend networks, one rooted with "\johndoe@apache.org" and another rooted with "\janedoe@apache.org". The friend networks are then sorted by the `node` field, and intersected. The resulting node set will be the intersection of the two friend networks.
+
+[[GraphTraversal-SampleUseCases]]
+== Sample Use Cases
+
+[[GraphTraversal-CalculateMarketBasketCo-occurrence]]
+=== Calculate Market Basket Co-occurrence
+
+It is often useful to know which products are most frequently purchased with a particular product. This example uses a simple market basket table (indexed in Solr) to store past shopping baskets. The schema for the table is very simple with each row containing a `basketID` and a `productID`. This can be seen as a graph with each row in the table representing an edge. And it can be traversed very quickly to calculate basket co-occurrence, even when the graph contains billions of edges.
+
+Here is the sample syntax:
+
+[source,plain]
+----
+top(n="5",
+    sort="count(*) desc",
+    gatherNodes(baskets,
+                random(baskets, q="productID:ABC", fl="basketID", rows="500"),
+                walk="basketID->basketID",
+                fq="-productID:ABC",
+                gather="productID",
+                count(*)))
+----
+
+Let's break down exactly what this traversal is doing.
+
+. The first expression evaluated is the inner `random` expression, which returns 500 random basketIDs, from the `baskets` collection, that have the `productID` "ABC". The `random` expression is very useful for recommendations because it limits the traversal to a fixed set of baskets, and because it adds the element of surprise into the recommendation. Using the `random` function you can provide fast sample sets from very large graphs.
+. The outer `gatherNodes` expression finds all the records in the `baskets` collection for the basketIDs generated in step 1. It also filters out `productID` "ABC" so it doesn't show up in the results. It then gathers and counts the productID's across these baskets.
+. The outer `top` expression ranks the productIDs emitted in step 2 by the count and selects the top 5.
+
+In a nutshell this expression finds the products that most frequently co-occur with product "ABC" in past shopping baskets.
+
+[[GraphTraversal-UsingthescoreNodesFunctiontoMakeaRecommendation]]
+=== Using the scoreNodes Function to Make a Recommendation
+
+This use case builds on the market basket example <<GraphTraversal-CalculateMarketBasketCo-occurrence,above>> that calculates which products co-occur most frequently with productID:ABC. The ranked co-occurrence counts provide candidates for a recommendation. The `scoreNodes` function can be used to score the candidates to find the best recommendation.
+
+Before diving into the syntax of the `scoreNodes` function it's useful to understand why the raw co-occurrence counts may not produce the best recommendation. The reason is that raw co-occurrence counts favor items that occur frequently across all baskets. A better recommendation would find the product that has the most significant relationship with productID ABC. The `scoreNodes` function uses a term frequency-inverse document frequency (TF-IDF) algorithm to find the most significant relationship.
+
+[[GraphTraversal-HowItWorks]]
+==== *How It Works*
+
+The `scoreNodes` function assigns a score to each node emitted by the gatherNodes expression. By default the `scoreNodes` function uses the `count(*)` aggregation, which is the co-occurrence count, as the TF value. The IDF value for each node is fetched from the collection where the node was gathered. Each node is then scored using the TF*IDF formula, which provides a boost to nodes with a lower frequency across all market baskets.
+
+Combining the co-occurrence count with the IDF provides a score that shows how important the relationship is between productID ABC and the recommendation candidates.
+
+The `scoreNodes` function adds the score to each node in the `nodeScore` field.
+
+[[GraphTraversal-ExampleSyntax]]
+==== *Example Syntax*
+
+[source,plain]
+----
+top(n="1",
+    sort="nodeScore desc",
+    scoreNodes(top(n="50",
+                   sort="count(*) desc",
+                   gatherNodes(baskets,
+                               random(baskets, q="productID:ABC", fl="basketID", rows="500"),
+                               walk="basketID->basketID",
+                               fq="-productID:ABC",
+                               gather="productID",
+                               count(*)))))
+----
+
+This example builds on the earlier example "Calculate market basket co-occurrence".
+
+. Notice that the inner-most `top` function is taking the top 50 products that co-occur most frequently with productID ABC. This provides 50 candidate recommendations.
+. The `scoreNodes` function then assigns a score to the candidates based on the TF*IDF of each node.
+. The outer `top` expression selects the highest scoring node. This is the recommendation.
+
+[[GraphTraversal-RecommendContentBasedonCollaborativeFilter]]
+=== Recommend Content Based on Collaborative Filter
+
+In this example we'll recommend content for a user based on a collaborative filter. This recommendation is made using log records that contain the `userID` and `articleID` and the action performed. In this scenario each log record can be viewed as an edge in a graph. The userID and articleID are the nodes and the action is an edge property used to filter the traversal.
+
+Here is the sample syntax:
+
+[source,plain]
+----
+top(n="5",
+    sort="count(*) desc",
+    gatherNodes(logs,
+                top(n="30",
+                    sort="count(*) desc",
+                    gatherNodes(logs,
+                                search(logs, q="userID:user1", fl="articleID", sort="articleID asc", fq="action:read", qt="/export"),
+                                walk="articleID->articleID",
+                                gather="userID",
+                                fq="action:read",
+                                maxDocFreq="10000",
+                                count(*))),
+                walk="node->userID",
+                gather="articleID",
+                fq="action:read",
+                count(*)))
+----
+
+Let's break down the expression above step-by-step.
+
+. The first expression evaluated is the inner `search` expression. This expression searches the `logs` collection for all records matching "user1". This is the user we are making the recommendation for.
++
+There is a filter applied to pull back only records where the "action:read". It returns the `articleID` for each record found. In other words, this expression returns all the articles "user1" has read.
+. The inner `gatherNodes` expression operates over the articleIDs returned from step 1. It takes each `articleID` found and searches them against the `articleID` field.
++
+Note that it skips high frequency nodes using the `maxDocFreq` param to filter out articles that appear over 10,000 times in the logs. It gathers userIDs and aggregates the counts for each user. This step finds the users that have read the same articles that "user1" has read and counts how many of the same articles they have read.
+. The inner `top` expression ranks the users emitted from step 2. It will emit the top 30 users who have the most overlap with user1's reading list.
+. The outer `gatherNodes` expression gathers the reading list for the users emitted from step 3. It counts the articleIDs that are gathered.
++
+Any article selected in step 1 (user1 reading list), will not appear in this step due to cycle detection. So this step returns the articles read by the users with the most similar readings habits to "user1" that "user1" has not read yet. It also counts the number of times each article has been read across this user group.
+. The outer `top` expression takes the top articles emitted from step 4. This is the recommendation.
+
+[[GraphTraversal-ProteinPathwayTraversal]]
+=== Protein Pathway Traversal
+
+In recent years, scientists have become increasingly able to rationally design drugs that target the mutated proteins, called oncogenes, responsible for some cancers. Proteins typically act through long chains of chemical interactions between multiple proteins, called pathways, and, while the oncogene in the pathway may not have a corresponding drug, another protein in the pathway may. Graph traversal on a protein collection that records protein interactions and drugs may yield possible candidates. (Thanks to Lewis Geer of the NCBI, for providing this example).
+
+The example below illustrates a protein pathway traversal:
+
+[source,plain]
+----
+gatherNodes(proteins,
+            gatherNodes(proteins,
+                        walk="NRAS->name",
+                        gather="interacts"),
+            walk="node->name",
+            gather="drug")
+----
+
+Let's break down exactly what this traversal is doing.
+
+. The inner `gatherNodes` expression traverses in the `proteins` collection. It finds all the edges in the graph where the name of the protein is "NRAS". Then it gathers the proteins in the `interacts` field. This gathers all the proteins that "NRAS" interactions with.
+. The outer `gatherNodes` expression also works with the `proteins` collection. It gathers all the drugs that correspond to proteins emitted from step 1.
+. Using this stepwise approach you can gather the drugs along the pathway of interactions any number of steps away from the root protein.
+
+[[GraphTraversal-ExportingGraphMLtoSupportGraphVisualization]]
+== Exporting GraphML to Support Graph Visualization
+
+In the examples above, the `gatherNodes` expression was sent to Solr's `/stream` handler like any other streaming expression. This approach outputs the nodes in the same JSON tuple format as other streaming expressions so that it can be treated like any other streaming expression. You can use the `/stream` handler when you need to operate directly on the tuples, such as in the recommendation use cases above.
+
+There are other graph traversal use cases that involve graph visualization. Solr supports these use cases with the introduction of the `/graph` request handler, which takes a `gatherNodes` expression and outputs the results in GraphML.
+
+http://graphml.graphdrawing.org/[GraphML] is an XML format supported by graph visualization tools such as https://gephi.org/[Gephi], which is a sophisticated open source tool for statistically analyzing and visualizing graphs. Using a `gatherNodes` expression, parts of a larger graph can be exported in GraphML and then imported into tools like Gephi.
+
+There are a few things to keep mind when exporting a graph in GraphML:
+
+. The `/graph` handler can export both the nodes and edges in the graph. By default, it only exports the nodes. To export the edges you must set `trackTraversal="true"` in the `gatherNodes` expression.
+. The `/graph` handler currently accepts an arbitrarily complex streaming expression which includes a `gatherNodes` expression. If the streaming expression doesn't include a `gatherNodes` expression, the `/graph` handler will not properly output GraphML.
+. The `/graph` handler currently accepts a single arbitrarily complex, nested `gatherNodes` expression per request. This means you cannot send in a streaming expression that joins or intersects the node sets from multiple `gatherNodes` expressions. The `/graph` handler does support any level of nesting within a single `gatherNodes` expression. The `/stream` handler does support joining and intersecting node sets, but the `/graph` handler currently does not.
+
+[[GraphTraversal-SampleRequest]]
+=== Sample Request
+
+[source,bash]
+----
+curl --data-urlencode 'expr=gatherNodes(enron_emails,
+                                        gatherNodes(enron_emails,
+                                                    walk="kayne.coulter@enron.com->from",
+                                                    trackTraversal="true",
+                                                    gather="to"),
+                                        walk="node->from",
+                                        scatter="leaves,branches",
+                                        trackTraversal="true",
+                                        gather="to")' http://localhost:8983/solr/enron_emails/graph
+----
+
+[[GraphTraversal-SampleGraphMLOutput]]
+=== Sample GraphML Output
+
+[source,xml]
+----
+<graphml xmlns="http://graphml.graphdrawing.org/xmlns"
+xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd">
+<graph id="G" edgedefault="directed">
+     <node id="kayne.coulter@enron.com">
+           <data key="field">node</data>
+           <data key="level">0</data>
+           <data key="count(*)">0.0</data>
+     </node>
+     <node id="don.baughman@enron.com">
+           <data key="field">to</data>
+           <data key="level">1</data>
+           <data key="count(*)">1.0</data>
+     </node>
+     <edge id="1"  source="kayne.coulter@enron.com"  target="don.baughman@enron.com"/>
+     <node id="john.kinser@enron.com">
+           <data key="field">to</data>
+           <data key="level">1</data>
+           <data key="count(*)">1.0</data>
+    </node>
+    <edge id="2"  source="kayne.coulter@enron.com"  target="john.kinser@enron.com"/>
+    <node id="jay.wills@enron.com">
+          <data key="field">to</data>
+          <data key="level">1</data>
+          <data key="count(*)">1.0</data>
+    </node>
+    <edge id="3"  source="kayne.coulter@enron.com"  target="jay.wills@enron.com"/>
+</graph></graphml>
+----