You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@atlas.apache.org by sh...@apache.org on 2015/08/12 07:45:12 UTC
[1/2] incubator-atlas git commit: ATLAS-91 Add solr configuration and
documentation (suma.shivaprasad via shwethags)
Repository: incubator-atlas
Updated Branches:
refs/heads/master 147242e17 -> 48343db99
http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/48343db9/src/conf/solr/schema.xml
----------------------------------------------------------------------
diff --git a/src/conf/solr/schema.xml b/src/conf/solr/schema.xml
new file mode 100644
index 0000000..1d445b1
--- /dev/null
+++ b/src/conf/solr/schema.xml
@@ -0,0 +1,534 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!--
+ This is the Solr schema file. This file should be named "schema.xml" and
+ should be in the conf directory under the solr home
+ (i.e. ./solr/conf/schema.xml by default)
+ or located where the classloader for the Solr webapp can find it.
+
+ This example schema is the recommended starting point for users.
+ It should be kept correct and concise, usable out-of-the-box.
+
+ For more information, on how to customize this file, please see
+ http://wiki.apache.org/solr/SchemaXml
+-->
+
+<schema name="titan-schema" version="1.5">
+ <!-- attribute "name" is the name of this schema and is only used for display purposes.
+ version="x.y" is Solr's version number for the schema syntax and
+ semantics. It should not normally be changed by applications.
+
+ 1.0: multiValued attribute did not exist, all fields are multiValued
+ by nature
+ 1.1: multiValued attribute introduced, false by default
+ 1.2: omitTermFreqAndPositions attribute introduced, true by default
+ except for text fields.
+ 1.3: removed optional field compress feature
+ 1.4: autoGeneratePhraseQueries attribute introduced to drive QueryParser
+ behavior when a single string produces multiple tokens. Defaults
+ to off for version >= 1.4
+ 1.5: omitNorms defaults to true for primitive field types
+ (int, float, boolean, string...)
+ -->
+
+
+ <!-- Valid attributes for fields:
+ name: mandatory - the name for the field
+ type: mandatory - the name of a field type from the
+ <types> fieldType section
+ indexed: true if this field should be indexed (searchable or sortable)
+ stored: true if this field should be retrievable
+ docValues: true if this field should have doc values. Doc values are
+ useful for faceting, grouping, sorting and function queries. Although not
+ required, doc values will make the index faster to load, more
+ NRT-friendly and more memory-efficient. They however come with some
+ limitations: they are currently only supported by StrField, UUIDField
+ and all Trie*Fields, and depending on the field type, they might
+ require the field to be single-valued, be required or have a default
+ value (check the documentation of the field type you're interested in
+ for more information)
+ multiValued: true if this field may contain multiple values per document
+ omitNorms: (expert) set to true to omit the norms associated with
+ this field (this disables length normalization and index-time
+ boosting for the field, and saves some memory). Only full-text
+ fields or fields that need an index-time boost need norms.
+ Norms are omitted for primitive (non-analyzed) types by default.
+ termVectors: [false] set to true to store the term vector for a
+ given field.
+ When using MoreLikeThis, fields used for similarity should be
+ stored for best performance.
+ termPositions: Store position information with the term vector.
+ This will increase storage costs.
+ termOffsets: Store offset information with the term vector. This
+ will increase storage costs.
+ required: The field is required. It will throw an error if the
+ value does not exist
+ default: a value that should be used if no value is specified
+ when adding a document.
+ -->
+
+ <!-- field names should consist of alphanumeric or underscore characters only and
+ not start with a digit. This is not currently strictly enforced,
+ but other field names will not have first class support from all components
+ and back compatibility is not guaranteed. Names with both leading and
+ trailing underscores (e.g. _version_) are reserved.
+ -->
+
+ <!-- If you remove this field, you must _also_ disable the update log in solrconfig.xml
+ or Solr won't start. _version_ and update log are required for SolrCloud
+ -->
+ <field name="_version_" type="long" indexed="true" stored="true"/>
+
+ <!-- points to the root document of a block of nested documents. Required for nested
+ document support, may be removed otherwise
+ -->
+ <field name="_root_" type="string" indexed="true" stored="false"/>
+
+ <!-- Only remove the "id" field if you have a very good reason to. While not strictly
+ required, it is highly recommended. A <uniqueKey> is present in almost all Solr
+ installations. See the <uniqueKey> declaration below where <uniqueKey> is set to "id".
+ -->
+ <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
+
+ <!-- Dynamic field definitions allow using convention over configuration
+ for fields via the specification of patterns to match field names.
+ EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
+ RESTRICTION: the glob-like pattern in the name attribute must have
+ a "*" only at the start or the end. -->
+
+ <dynamicField name="*_i" type="int" indexed="true" stored="true"/>
+ <dynamicField name="*_is" type="int" indexed="true" stored="true" multiValued="true"/>
+ <dynamicField name="*_s" type="string" indexed="true" stored="true" />
+ <dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>
+ <dynamicField name="*_l" type="long" indexed="true" stored="true"/>
+ <dynamicField name="*_ls" type="long" indexed="true" stored="true" multiValued="true"/>
+ <dynamicField name="*_t" type="text_general" indexed="true" stored="true"/>
+ <dynamicField name="*_txt" type="text_general" indexed="true" stored="true" multiValued="true"/>
+ <dynamicField name="*_en" type="text_en" indexed="true" stored="true" multiValued="true"/>
+ <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
+ <dynamicField name="*_bs" type="boolean" indexed="true" stored="true" multiValued="true"/>
+ <dynamicField name="*_f" type="float" indexed="true" stored="true"/>
+ <dynamicField name="*_fs" type="float" indexed="true" stored="true" multiValued="true"/>
+ <dynamicField name="*_d" type="double" indexed="true" stored="true"/>
+ <dynamicField name="*_ds" type="double" indexed="true" stored="true" multiValued="true"/>
+
+ <!-- Type used to index the lat and lon components for the "location" FieldType -->
+ <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
+
+ <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
+ <dynamicField name="*_dts" type="date" indexed="true" stored="true" multiValued="true"/>
+ <dynamicField name="*_p" type="location" indexed="true" stored="true"/>
+
+ <!-- some trie-coded dynamic fields for faster range queries -->
+ <dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
+ <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
+ <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
+ <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
+ <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
+
+ <dynamicField name="*_c" type="currency" indexed="true" stored="true"/>
+
+ <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
+ <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
+
+ <dynamicField name="random_*" type="random" />
+
+ <!-- uncomment the following to ignore any fields that don't already match an existing
+ field name or dynamic field, rather than reporting them as an error.
+ alternately, change the type="ignored" to some other type e.g. "text" if you want
+ unknown fields indexed and/or stored by default -->
+ <!--dynamicField name="*" type="ignored" multiValued="true" /-->
+
+ <!-- Field to use to determine and enforce document uniqueness.
+ Unless this field is marked with required="false", it will be a required field
+ -->
+ <uniqueKey>id</uniqueKey>
+
+ <!-- copyField commands copy one field to another at the time a document
+ is added to the index. It's used either to index the same field differently,
+ or to add multiple fields to the same field for easier/faster searching. -->
+
+ <!--
+ <copyField source="title" dest="text"/>
+ <copyField source="body" dest="text"/>
+ -->
+
+ <!-- field type definitions. The "name" attribute is
+ just a label to be used by field definitions. The "class"
+ attribute and any other attributes determine the real
+ behavior of the fieldType.
+ Class names starting with "solr" refer to java classes in a
+ standard package such as org.apache.solr.analysis
+ -->
+
+ <!-- The StrField type is not analyzed, but indexed/stored verbatim.
+ It supports doc values but in that case the field needs to be
+ single-valued and either required or have a default value.
+ -->
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
+
+ <!-- boolean type: "true" or "false" -->
+ <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
+
+ <!-- sortMissingLast and sortMissingFirst attributes are optional attributes are
+ currently supported on types that are sorted internally as strings
+ and on numeric types.
+ This includes "string","boolean", and, as of 3.5 (and 4.x),
+ int, float, long, date, double, including the "Trie" variants.
+ - If sortMissingLast="true", then a sort on this field will cause documents
+ without the field to come after documents with the field,
+ regardless of the requested sort order (asc or desc).
+ - If sortMissingFirst="true", then a sort on this field will cause documents
+ without the field to come before documents with the field,
+ regardless of the requested sort order.
+ - If sortMissingLast="false" and sortMissingFirst="false" (the default),
+ then default lucene sorting will be used which places docs without the
+ field first in an ascending sort and last in a descending sort.
+ -->
+
+ <!--
+ Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
+
+ These fields support doc values, but they require the field to be
+ single-valued and either be required or have a default value.
+ -->
+ <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
+ <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
+ <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
+ <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
+
+ <!--
+ Numeric field types that index each value at various levels of precision
+ to accelerate range queries when the number of values between the range
+ endpoints is large. See the javadoc for NumericRangeQuery for internal
+ implementation details.
+
+ Smaller precisionStep values (specified in bits) will lead to more tokens
+ indexed per value, slightly larger index size, and faster range queries.
+ A precisionStep of 0 disables indexing at different precision levels.
+ -->
+ <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
+ <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
+ <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
+ <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
+
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
+ is a more restricted form of the canonical representation of dateTime
+ http://www.w3.org/TR/xmlschema-2/#dateTime
+ The trailing "Z" designates UTC time and is mandatory.
+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
+ All other components are mandatory.
+
+ Expressions can also be used to denote calculations that should be
+ performed relative to "NOW" to determine the value, ie...
+
+ NOW/HOUR
+ ... Round to the start of the current hour
+ NOW-1DAY
+ ... Exactly 1 day prior to now
+ NOW/DAY+6MONTHS+3DAYS
+ ... 6 months and 3 days in the future from the start of
+ the current day
+
+ Consult the TrieDateField javadocs for more information.
+
+ Note: For faster range queries, consider the tdate type
+ -->
+ <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
+
+ <!-- A Trie based date field for faster date range queries and date faceting. -->
+ <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
+
+
+ <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
+ <fieldType name="binary" class="solr.BinaryField"/>
+
+ <!-- The "RandomSortField" is not used to store or search any
+ data. You can declare fields of this type it in your schema
+ to generate pseudo-random orderings of your docs for sorting
+ or function purposes. The ordering is generated based on the field
+ name and the version of the index. As long as the index version
+ remains unchanged, and the same field name is reused,
+ the ordering of the docs will be consistent.
+ If you want different psuedo-random orderings of documents,
+ for the same version of the index, use a dynamicField and
+ change the field name in the request.
+ -->
+ <fieldType name="random" class="solr.RandomSortField" indexed="true" />
+
+ <!-- solr.TextField allows the specification of custom text analyzers
+ specified as a tokenizer and a list of token filters. Different
+ analyzers may be specified for indexing and querying.
+
+ The optional positionIncrementGap puts space between multiple fields of
+ this type on the same document, with the purpose of preventing false phrase
+ matching across fields.
+
+ For more info on customizing your analyzer chain, please see
+ http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
+ -->
+
+ <!-- One can also specify an existing Analyzer class that has a
+ default constructor via the class attribute on the analyzer element.
+ Example:
+ <fieldType name="text_greek" class="solr.TextField">
+ <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
+ </fieldType>
+ -->
+
+ <!-- A text field that only splits on whitespace for exact matching of words -->
+ <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <!-- A general text field that has reasonable, generic
+ cross-language defaults: it tokenizes with StandardTokenizer,
+ removes stop words from case-insensitive "stopwords.txt"
+ (empty by default), and down cases. At query time only, it
+ also applies synonyms. -->
+ <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
+ <analyzer type="index">
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
+ <!-- in this example, we will only use synonyms at query time
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+ -->
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ <analyzer type="query">
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <!-- A text field with defaults appropriate for English: it
+ tokenizes with StandardTokenizer, removes English stop words
+ (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and
+ finally applies Porter's stemming. The query time analyzer
+ also applies synonyms from synonyms.txt. -->
+ <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
+ <analyzer type="index">
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <!-- in this example, we will only use synonyms at query time
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+ -->
+ <!-- Case insensitive stop word removal.
+ -->
+ <filter class="solr.StopFilterFactory"
+ ignoreCase="true"
+ words="lang/stopwords_en.txt"
+ />
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+ <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
+ -->
+ <filter class="solr.PorterStemFilterFactory"/>
+ </analyzer>
+ <analyzer type="query">
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+ <filter class="solr.StopFilterFactory"
+ ignoreCase="true"
+ words="lang/stopwords_en.txt"
+ />
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+ <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
+ -->
+ <filter class="solr.PorterStemFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <!-- A text field with defaults appropriate for English, plus
+ aggressive word-splitting and autophrase features enabled.
+ This field is just like text_en, except it adds
+ WordDelimiterFilter to enable splitting and matching of
+ words on case-change, alpha numeric boundaries, and
+ non-alphanumeric chars. This means certain compound word
+ cases will work, for example query "wi fi" will match
+ document "WiFi" or "wi-fi".
+ -->
+ <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
+ <analyzer type="index">
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <!-- in this example, we will only use synonyms at query time
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+ -->
+ <!-- Case insensitive stop word removal.
+ -->
+ <filter class="solr.StopFilterFactory"
+ ignoreCase="true"
+ words="lang/stopwords_en.txt"
+ />
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.PorterStemFilterFactory"/>
+ </analyzer>
+ <analyzer type="query">
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+ <filter class="solr.StopFilterFactory"
+ ignoreCase="true"
+ words="lang/stopwords_en.txt"
+ />
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.PorterStemFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <!-- Less flexible matching, but less false matches. Probably not ideal for product names,
+ but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
+ <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
+ <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
+ possible with WordDelimiterFilter in conjuncton with stemming. -->
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <!-- Just like text_general except it reverses the characters of
+ each token, to enable more efficient leading wildcard queries. -->
+ <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
+ <analyzer type="index">
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
+ maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
+ </analyzer>
+ <analyzer type="query">
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <!-- This is an example of using the KeywordTokenizer along
+ With various TokenFilterFactories to produce a sortable field
+ that does not include some properties of the source text
+ -->
+ <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
+ <analyzer>
+ <!-- KeywordTokenizer does no actual tokenizing, so the entire
+ input string is preserved as a single token
+ -->
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
+ <!-- The LowerCase TokenFilter does what you expect, which can be
+ when you want your sorting to be case insensitive
+ -->
+ <filter class="solr.LowerCaseFilterFactory" />
+ <!-- The TrimFilter removes any leading or trailing whitespace -->
+ <filter class="solr.TrimFilterFactory" />
+ <!-- The PatternReplaceFilter gives you the flexibility to use
+ Java Regular expression to replace any sequence of characters
+ matching a pattern with an arbitrary replacement string,
+ which may include back references to portions of the original
+ string matched by the pattern.
+
+ See the Java Regular Expression documentation for more
+ information on pattern and replacement string syntax.
+
+ http://docs.oracle.com/javase/7/docs/api/java/util/regex/package-summary.html
+ -->
+ <filter class="solr.PatternReplaceFilterFactory"
+ pattern="([^a-z])" replacement="" replace="all"
+ />
+ </analyzer>
+ </fieldType>
+
+ <!-- lowercases the entire field value, keeping it as a single token. -->
+ <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
+ <analyzer>
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory" />
+ </analyzer>
+ </fieldType>
+
+ <!-- since fields of this type are by default not stored or indexed,
+ any data added to them will be ignored outright. -->
+ <fieldType name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
+
+ <!-- This point type indexes the coordinates as separate fields (subFields)
+ If subFieldType is defined, it references a type, and a dynamic field
+ definition is created matching *___<typename>. Alternately, if
+ subFieldSuffix is defined, that is used to create the subFields.
+ Example: if subFieldType="double", then the coordinates would be
+ indexed in fields myloc_0___double,myloc_1___double.
+ Example: if subFieldSuffix="_d" then the coordinates would be indexed
+ in fields myloc_0_d,myloc_1_d
+ The subFields are an implementation detail of the fieldType, and end
+ users normally should not need to know about them.
+ -->
+ <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
+
+ <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
+ <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
+
+ <!-- Spatial rectangle (bounding box) field. It supports most spatial predicates, and has
+ special relevancy modes: score=overlapRatio|area|area2D (local-param to the query). DocValues is recommended for
+ relevancy. -->
+ <fieldType name="bbox" class="solr.BBoxField"
+ geo="true" distanceUnits="kilometers" numberType="_bbox_coord" />
+ <fieldType name="_bbox_coord" class="solr.TrieDoubleField" precisionStep="8" docValues="true" stored="false"/>
+
+ <!-- Money/currency field type. See http://wiki.apache.org/solr/MoneyFieldType
+ Parameters:
+ defaultCurrency: Specifies the default currency if none specified. Defaults to "USD"
+ precisionStep: Specifies the precisionStep for the TrieLong field used for the amount
+ providerClass: Lets you plug in other exchange provider backend:
+ solr.FileExchangeRateProvider is the default and takes one parameter:
+ currencyConfig: name of an xml file holding exchange rates
+ solr.OpenExchangeRatesOrgProvider uses rates from openexchangerates.org:
+ ratesFileLocation: URL or path to rates JSON file (default latest.json on the web)
+ refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60)
+ -->
+ <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" />
+
+ <!--Titan specific-->
+ <fieldType name="uuid"
+ class="solr.UUIDField"
+ indexed="true" />
+
+ <dynamicField name="*_uuid" type="uuid" indexed="true" stored="true"/>
+
+ <!-- TTL -->
+ <field name="ttl" type="string" indexed="true" stored="true" />
+ <field name="expire_at" type="date" indexed="true" stored="true" />
+ <field name="timestamp" type="date" indexed="true" stored="true" />
+
+</schema>
http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/48343db9/src/conf/solr/solrconfig.xml
----------------------------------------------------------------------
diff --git a/src/conf/solr/solrconfig.xml b/src/conf/solr/solrconfig.xml
new file mode 100644
index 0000000..ce2e20b
--- /dev/null
+++ b/src/conf/solr/solrconfig.xml
@@ -0,0 +1,625 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!--
+ For more details about configurations options that may appear in
+ this file, see http://wiki.apache.org/solr/SolrConfigXml.
+-->
+<config>
+ <!-- In all configuration below, a prefix of "solr." for class names
+ is an alias that causes solr to search appropriate packages,
+ including org.apache.solr.(search|update|request|core|analysis)
+
+ You may also specify a fully qualified Java classname if you
+ have your own custom plugins.
+ -->
+
+ <!-- Controls what version of Lucene various components of Solr
+ adhere to. Generally, you want to use the latest version to
+ get all bug fixes and improvements. It is highly recommended
+ that you fully re-index after changing this setting as it can
+ affect both how text is indexed and queried.
+ -->
+ <luceneMatchVersion>5.0.0</luceneMatchVersion>
+
+ <!-- Data Directory
+
+ Used to specify an alternate directory to hold all index data
+ other than the default ./data under the Solr home. If
+ replication is in use, this should match the replication
+ configuration.
+ -->
+ <dataDir>${solr.data.dir:}</dataDir>
+
+
+ <!-- The DirectoryFactory to use for indexes.
+
+ solr.StandardDirectoryFactory is filesystem
+ based and tries to pick the best implementation for the current
+ JVM and platform. solr.NRTCachingDirectoryFactory, the default,
+ wraps solr.StandardDirectoryFactory and caches small files in memory
+ for better NRT performance.
+
+ One can force a particular implementation via solr.MMapDirectoryFactory,
+ solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory.
+
+ solr.RAMDirectoryFactory is memory based, not
+ persistent, and doesn't work with replication.
+ -->
+ <directoryFactory name="DirectoryFactory"
+ class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}">
+
+ <!-- These will be used if you are using the solr.HdfsDirectoryFactory,
+ otherwise they will be ignored. If you don't plan on using hdfs,
+ you can safely remove this section. -->
+ <!-- The root directory that collection data should be written to. -->
+ <str name="solr.hdfs.home">${solr.hdfs.home:}</str>
+ <!-- The hadoop configuration files to use for the hdfs client. -->
+ <str name="solr.hdfs.confdir">${solr.hdfs.confdir:}</str>
+ <!-- Enable/Disable the hdfs cache. -->
+ <str name="solr.hdfs.blockcache.enabled">${solr.hdfs.blockcache.enabled:true}</str>
+ <!-- Enable/Disable using one global cache for all SolrCores.
+ The settings used will be from the first HdfsDirectoryFactory created. -->
+ <str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:true}</str>
+
+ </directoryFactory>
+
+ <!-- The CodecFactory for defining the format of the inverted index.
+ The default implementation is SchemaCodecFactory, which is the official Lucene
+ index format, but hooks into the schema to provide per-field customization of
+ the postings lists and per-document values in the fieldType element
+ (postingsFormat/docValuesFormat). Note that most of the alternative implementations
+ are experimental, so if you choose to customize the index format, it's a good
+ idea to convert back to the official format e.g. via IndexWriter.addIndexes(IndexReader)
+ before upgrading to a newer version to avoid unnecessary reindexing.
+ -->
+ <codecFactory class="solr.SchemaCodecFactory"/>
+
+
+ <!-- To enable dynamic schema REST APIs, use the following for <schemaFactory>:
+
+ <schemaFactory class="ManagedIndexSchemaFactory">
+ <bool name="mutable">true</bool>
+ <str name="managedSchemaResourceName">managed-schema</str>
+ </schemaFactory>
+
+ When ManagedIndexSchemaFactory is specified, Solr will load the schema from
+ he resource named in 'managedSchemaResourceName', rather than from schema.xml.
+ Note that the managed schema resource CANNOT be named schema.xml. If the managed
+ schema does not exist, Solr will create it after reading schema.xml, then rename
+ 'schema.xml' to 'schema.xml.bak'.
+
+ Do NOT hand edit the managed schema - external modifications will be ignored and
+ overwritten as a result of schema modification REST API calls.
+
+ When ManagedIndexSchemaFactory is specified with mutable = true, schema
+ modification REST API calls will be allowed; otherwise, error responses will be
+ sent back for these requests.
+ -->
+
+ <schemaFactory class="ClassicIndexSchemaFactory"/>
+
+ <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Index Config - These settings control low-level behavior of indexing
+ Most example settings here show the default value, but are commented
+ out, to more easily see where customizations have been made.
+
+ Note: This replaces <indexDefaults> and <mainIndex> from older versions
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
+ <indexConfig>
+
+ <!-- LockFactory
+
+ This option specifies which Lucene LockFactory implementation
+ to use.
+
+ single = SingleInstanceLockFactory - suggested for a
+ read-only index or when there is no possibility of
+ another process trying to modify the index.
+ native = NativeFSLockFactory - uses OS native file locking.
+ Do not use when multiple solr webapps in the same
+ JVM are attempting to share a single index.
+ simple = SimpleFSLockFactory - uses a plain file for locking
+
+ Defaults: 'native' is default for Solr3.6 and later, otherwise
+ 'simple' is the default
+
+ More details on the nuances of each LockFactory...
+ http://wiki.apache.org/lucene-java/AvailableLockFactories
+ -->
+ <lockType>${solr.lock.type:native}</lockType>
+
+ <!-- Lucene Infostream
+
+ To aid in advanced debugging, Lucene provides an "InfoStream"
+ of detailed information when indexing.
+
+ Setting the value to true will instruct the underlying Lucene
+ IndexWriter to write its info stream to solr's log. By default,
+ this is enabled here, and controlled through log4j.properties.
+ -->
+ <infoStream>true</infoStream>
+ </indexConfig>
+
+
+ <!-- JMX
+
+ This example enables JMX if and only if an existing MBeanServer
+ is found, use this if you want to configure JMX through JVM
+ parameters. Remove this to disable exposing Solr configuration
+ and statistics to JMX.
+
+ For more details see http://wiki.apache.org/solr/SolrJmx
+ -->
+ <jmx />
+ <!-- If you want to connect to a particular server, specify the
+ agentId
+ -->
+ <!-- <jmx agentId="myAgent" /> -->
+ <!-- If you want to start a new MBeanServer, specify the serviceUrl -->
+ <!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/>
+ -->
+
+ <!-- The default high-performance update handler -->
+ <updateHandler class="solr.DirectUpdateHandler2">
+
+ <!-- Enables a transaction log, used for real-time get, durability, and
+ and solr cloud replica recovery. The log can grow as big as
+ uncommitted changes to the index, so use of a hard autoCommit
+ is recommended (see below).
+ "dir" - the target directory for transaction logs, defaults to the
+ solr data directory. -->
+ <updateLog>
+ <str name="dir">${solr.ulog.dir:}</str>
+ </updateLog>
+
+ <!-- AutoCommit
+
+ Perform a hard commit automatically under certain conditions.
+ Instead of enabling autoCommit, consider using "commitWithin"
+ when adding documents.
+
+ http://wiki.apache.org/solr/UpdateXmlMessages
+
+ maxDocs - Maximum number of documents to add since the last
+ commit before automatically triggering a new commit.
+
+ maxTime - Maximum amount of time in ms that is allowed to pass
+ since a document was added before automatically
+ triggering a new commit.
+ openSearcher - if false, the commit causes recent index changes
+ to be flushed to stable storage, but does not cause a new
+ searcher to be opened to make those changes visible.
+
+ If the updateLog is enabled, then it's highly recommended to
+ have some sort of hard autoCommit to limit the log size.
+ -->
+ <autoCommit>
+ <maxTime>${solr.autoCommit.maxTime:15000}</maxTime>
+ <openSearcher>false</openSearcher>
+ </autoCommit>
+
+ <!-- softAutoCommit is like autoCommit except it causes a
+ 'soft' commit which only ensures that changes are visible
+ but does not ensure that data is synced to disk. This is
+ faster and more near-realtime friendly than a hard commit.
+ -->
+ <autoSoftCommit>
+ <maxTime>${solr.autoSoftCommit.maxTime:-1}</maxTime>
+ </autoSoftCommit>
+
+ </updateHandler>
+
+ <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Query section - these settings control query time things like caches
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
+ <query>
+ <!-- Max Boolean Clauses
+
+ Maximum number of clauses in each BooleanQuery, an exception
+ is thrown if exceeded.
+
+ ** WARNING **
+
+ This option actually modifies a global Lucene property that
+ will affect all SolrCores. If multiple solrconfig.xml files
+ disagree on this property, the value at any given moment will
+ be based on the last SolrCore to be initialized.
+
+ -->
+ <maxBooleanClauses>1024</maxBooleanClauses>
+
+
+ <!-- Solr Internal Query Caches
+
+ There are two implementations of cache available for Solr,
+ LRUCache, based on a synchronized LinkedHashMap, and
+ FastLRUCache, based on a ConcurrentHashMap.
+
+ FastLRUCache has faster gets and slower puts in single
+ threaded operation and thus is generally faster than LRUCache
+ when the hit ratio of the cache is high (> 75%), and may be
+ faster under other scenarios on multi-cpu systems.
+ -->
+
+ <!-- Filter Cache
+
+ Cache used by SolrIndexSearcher for filters (DocSets),
+ unordered sets of *all* documents that match a query. When a
+ new searcher is opened, its caches may be prepopulated or
+ "autowarmed" using data from caches in the old searcher.
+ autowarmCount is the number of items to prepopulate. For
+ LRUCache, the autowarmed items will be the most recently
+ accessed items.
+
+ Parameters:
+ class - the SolrCache implementation LRUCache or
+ (LRUCache or FastLRUCache)
+ size - the maximum number of entries in the cache
+ initialSize - the initial capacity (number of entries) of
+ the cache. (see java.util.HashMap)
+ autowarmCount - the number of entries to prepopulate from
+ and old cache.
+ -->
+ <filterCache class="solr.FastLRUCache"
+ size="512"
+ initialSize="512"
+ autowarmCount="0"/>
+
+ <!-- Query Result Cache
+
+ Caches results of searches - ordered lists of document ids
+ (DocList) based on a query, a sort, and the range of documents requested.
+ -->
+ <queryResultCache class="solr.LRUCache"
+ size="512"
+ initialSize="512"
+ autowarmCount="0"/>
+
+ <!-- Document Cache
+
+ Caches Lucene Document objects (the stored fields for each
+ document). Since Lucene internal document ids are transient,
+ this cache will not be autowarmed.
+ -->
+ <documentCache class="solr.LRUCache"
+ size="512"
+ initialSize="512"
+ autowarmCount="0"/>
+
+ <!-- custom cache currently used by block join -->
+ <cache name="perSegFilter"
+ class="solr.search.LRUCache"
+ size="10"
+ initialSize="0"
+ autowarmCount="10"
+ regenerator="solr.NoOpRegenerator" />
+
+ <!-- Lazy Field Loading
+
+ If true, stored fields that are not requested will be loaded
+ lazily. This can result in a significant speed improvement
+ if the usual case is to not load all stored fields,
+ especially if the skipped fields are large compressed text
+ fields.
+ -->
+ <enableLazyFieldLoading>true</enableLazyFieldLoading>
+
+ <!-- Result Window Size
+
+ An optimization for use with the queryResultCache. When a search
+ is requested, a superset of the requested number of document ids
+ are collected. For example, if a search for a particular query
+ requests matching documents 10 through 19, and queryWindowSize is 50,
+ then documents 0 through 49 will be collected and cached. Any further
+ requests in that range can be satisfied via the cache.
+ -->
+ <queryResultWindowSize>20</queryResultWindowSize>
+
+ <!-- Maximum number of documents to cache for any entry in the
+ queryResultCache.
+ -->
+ <queryResultMaxDocsCached>200</queryResultMaxDocsCached>
+
+ <!-- Use Cold Searcher
+
+ If a search request comes in and there is no current
+ registered searcher, then immediately register the still
+ warming searcher and use it. If "false" then all requests
+ will block until the first searcher is done warming.
+ -->
+ <useColdSearcher>false</useColdSearcher>
+
+ <!-- Max Warming Searchers
+
+ Maximum number of searchers that may be warming in the
+ background concurrently. An error is returned if this limit
+ is exceeded.
+
+ Recommend values of 1-2 for read-only slaves, higher for
+ masters w/o cache warming.
+ -->
+ <maxWarmingSearchers>2</maxWarmingSearchers>
+
+ </query>
+
+
+ <!-- Request Dispatcher
+
+ This section contains instructions for how the SolrDispatchFilter
+ should behave when processing requests for this SolrCore.
+
+ handleSelect is a legacy option that affects the behavior of requests
+ such as /select?qt=XXX
+
+ handleSelect="true" will cause the SolrDispatchFilter to process
+ the request and dispatch the query to a handler specified by the
+ "qt" param, assuming "/select" isn't already registered.
+
+ handleSelect="false" will cause the SolrDispatchFilter to
+ ignore "/select" requests, resulting in a 404 unless a handler
+ is explicitly registered with the name "/select"
+
+ handleSelect="true" is not recommended for new users, but is the default
+ for backwards compatibility
+ -->
+ <requestDispatcher handleSelect="false" >
+ <!-- Request Parsing
+
+ These settings indicate how Solr Requests may be parsed, and
+ what restrictions may be placed on the ContentStreams from
+ those requests
+
+ enableRemoteStreaming - enables use of the stream.file
+ and stream.url parameters for specifying remote streams.
+
+ multipartUploadLimitInKB - specifies the max size (in KiB) of
+ Multipart File Uploads that Solr will allow in a Request.
+
+ formdataUploadLimitInKB - specifies the max size (in KiB) of
+ form data (application/x-www-form-urlencoded) sent via
+ POST. You can use POST to pass request parameters not
+ fitting into the URL.
+
+ addHttpRequestToContext - if set to true, it will instruct
+ the requestParsers to include the original HttpServletRequest
+ object in the context map of the SolrQueryRequest under the
+ key "httpRequest". It will not be used by any of the existing
+ Solr components, but may be useful when developing custom
+ plugins.
+
+ *** WARNING ***
+ The settings below authorize Solr to fetch remote files, You
+ should make sure your system has some authentication before
+ using enableRemoteStreaming="true"
+
+ -->
+ <requestParsers enableRemoteStreaming="true"
+ multipartUploadLimitInKB="2048000"
+ formdataUploadLimitInKB="2048"
+ addHttpRequestToContext="false"/>
+
+ <!-- HTTP Caching
+
+ Set HTTP caching related parameters (for proxy caches and clients).
+
+ The options below instruct Solr not to output any HTTP Caching
+ related headers
+ -->
+ <httpCaching never304="true" />
+
+ </requestDispatcher>
+
+ <!-- Request Handlers
+
+ http://wiki.apache.org/solr/SolrRequestHandler
+
+ Incoming queries will be dispatched to a specific handler by name
+ based on the path specified in the request.
+
+ Legacy behavior: If the request path uses "/select" but no Request
+ Handler has that name, and if handleSelect="true" has been specified in
+ the requestDispatcher, then the Request Handler is dispatched based on
+ the qt parameter. Handlers without a leading '/' are accessed this way
+ like so: http://host/app/[core/]select?qt=name If no qt is
+ given, then the requestHandler that declares default="true" will be
+ used or the one named "standard".
+
+ If a Request Handler is declared with startup="lazy", then it will
+ not be initialized until the first request that uses it.
+
+ -->
+ <!-- SearchHandler
+
+ http://wiki.apache.org/solr/SearchHandler
+
+ For processing Search Queries, the primary Request Handler
+ provided with Solr is "SearchHandler" It delegates to a sequent
+ of SearchComponents (see below) and supports distributed
+ queries across multiple shards
+ -->
+ <requestHandler name="/select" class="solr.SearchHandler">
+ <!-- default values for query parameters can be specified, these
+ will be overridden by parameters in the request
+ -->
+ <lst name="defaults">
+ <str name="echoParams">explicit</str>
+ <int name="rows">10</int>
+ </lst>
+
+ </requestHandler>
+
+ <!-- A request handler that returns indented JSON by default -->
+ <requestHandler name="/query" class="solr.SearchHandler">
+ <lst name="defaults">
+ <str name="echoParams">explicit</str>
+ <str name="wt">json</str>
+ <str name="indent">true</str>
+ <str name="df">text</str>
+ </lst>
+ </requestHandler>
+
+ <!--
+ The export request handler is used to export full sorted result sets.
+ Do not change these defaults.
+ -->
+ <requestHandler name="/export" class="solr.SearchHandler">
+ <lst name="invariants">
+ <str name="rq">{!xport}</str>
+ <str name="wt">xsort</str>
+ <str name="distrib">false</str>
+ </lst>
+
+ <arr name="components">
+ <str>query</str>
+ </arr>
+ </requestHandler>
+
+
+ <initParams path="/update/**,/query,/select,/tvrh,/elevate,/spell">
+ <lst name="defaults">
+ <str name="df">text</str>
+ </lst>
+ </initParams>
+
+ <!-- Field Analysis Request Handler
+
+ RequestHandler that provides much the same functionality as
+ analysis.jsp. Provides the ability to specify multiple field
+ types and field names in the same request and outputs
+ index-time and query-time analysis for each of them.
+
+ Request parameters are:
+ analysis.fieldname - field name whose analyzers are to be used
+
+ analysis.fieldtype - field type whose analyzers are to be used
+ analysis.fieldvalue - text for index-time analysis
+ q (or analysis.q) - text for query time analysis
+ analysis.showmatch (true|false) - When set to true and when
+ query analysis is performed, the produced tokens of the
+ field value analysis will be marked as "matched" for every
+ token that is produces by the query analysis
+ -->
+ <requestHandler name="/analysis/field"
+ startup="lazy"
+ class="solr.FieldAnalysisRequestHandler" />
+
+
+ <!-- Document Analysis Handler
+
+ http://wiki.apache.org/solr/AnalysisRequestHandler
+
+ An analysis handler that provides a breakdown of the analysis
+ process of provided documents. This handler expects a (single)
+ content stream with the following format:
+
+ <docs>
+ <doc>
+ <field name="id">1</field>
+ <field name="name">The Name</field>
+ <field name="text">The Text Value</field>
+ </doc>
+ <doc>...</doc>
+ <doc>...</doc>
+ ...
+ </docs>
+
+ Note: Each document must contain a field which serves as the
+ unique key. This key is used in the returned response to associate
+ an analysis breakdown to the analyzed document.
+
+ Like the FieldAnalysisRequestHandler, this handler also supports
+ query analysis by sending either an "analysis.query" or "q"
+ request parameter that holds the query text to be analyzed. It
+ also supports the "analysis.showmatch" parameter which when set to
+ true, all field tokens that match the query tokens will be marked
+ as a "match".
+ -->
+ <requestHandler name="/analysis/document"
+ class="solr.DocumentAnalysisRequestHandler"
+ startup="lazy" />
+
+ <!-- Echo the request contents back to the client -->
+ <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
+ <lst name="defaults">
+ <str name="echoParams">explicit</str>
+ <str name="echoHandler">true</str>
+ </lst>
+ </requestHandler>
+
+
+
+ <!-- Search Components
+
+ Search components are registered to SolrCore and used by
+ instances of SearchHandler (which can access them by name)
+
+ By default, the following components are available:
+
+ <searchComponent name="query" class="solr.QueryComponent" />
+ <searchComponent name="facet" class="solr.FacetComponent" />
+ <searchComponent name="mlt" class="solr.MoreLikeThisComponent" />
+ <searchComponent name="highlight" class="solr.HighlightComponent" />
+ <searchComponent name="stats" class="solr.StatsComponent" />
+ <searchComponent name="debug" class="solr.DebugComponent" />
+
+ -->
+
+ <!-- Terms Component
+
+ http://wiki.apache.org/solr/TermsComponent
+
+ A component to return terms and document frequency of those
+ terms
+ -->
+ <searchComponent name="terms" class="solr.TermsComponent"/>
+
+ <!-- A request handler for demonstrating the terms component -->
+ <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
+ <lst name="defaults">
+ <bool name="terms">true</bool>
+ <bool name="distrib">false</bool>
+ </lst>
+ <arr name="components">
+ <str>terms</str>
+ </arr>
+ </requestHandler>
+
+ <!-- Legacy config for the admin interface -->
+ <admin>
+ <defaultQuery>*:*</defaultQuery>
+ </admin>
+
+
+ <!--Titan specific-->
+ <updateRequestProcessorChain default="true">
+ <processor class="solr.TimestampUpdateProcessorFactory">
+ <str name="fieldName">timestamp</str>
+ </processor>
+ <processor class="solr.processor.DocExpirationUpdateProcessorFactory">
+ <int name="autoDeletePeriodSeconds">5</int>
+ <str name="ttlFieldName">ttl</str>
+ <str name="expirationFieldName">expire_at</str>
+ </processor>
+ <processor class="solr.FirstFieldValueUpdateProcessorFactory">
+ <str name="fieldName">expire_at_dt</str>
+ </processor>
+ <processor class="solr.LogUpdateProcessorFactory"/>
+ <processor class="solr.RunUpdateProcessorFactory"/>
+ </updateRequestProcessorChain>
+</config>
http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/48343db9/src/conf/solr/stopwords.txt
----------------------------------------------------------------------
diff --git a/src/conf/solr/stopwords.txt b/src/conf/solr/stopwords.txt
new file mode 100644
index 0000000..ae1e83e
--- /dev/null
+++ b/src/conf/solr/stopwords.txt
@@ -0,0 +1,14 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/48343db9/src/conf/solr/synonyms.txt
----------------------------------------------------------------------
diff --git a/src/conf/solr/synonyms.txt b/src/conf/solr/synonyms.txt
new file mode 100644
index 0000000..7f72128
--- /dev/null
+++ b/src/conf/solr/synonyms.txt
@@ -0,0 +1,29 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+#some test synonym mappings unlikely to appear in real input text
+aaafoo => aaabar
+bbbfoo => bbbfoo bbbbar
+cccfoo => cccbar cccbaz
+fooaaa,baraaa,bazaaa
+
+# Some synonym groups specific to this example
+GB,gib,gigabyte,gigabytes
+MB,mib,megabyte,megabytes
+Television, Televisions, TV, TVs
+#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
+#after us won't split it into two words.
+
+# Synonym mappings can be used for spelling correction too
+pixima => pixma
+
[2/2] incubator-atlas git commit: ATLAS-91 Add solr configuration and
documentation (suma.shivaprasad via shwethags)
Posted by sh...@apache.org.
ATLAS-91 Add solr configuration and documentation (suma.shivaprasad via shwethags)
Project: http://git-wip-us.apache.org/repos/asf/incubator-atlas/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-atlas/commit/48343db9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-atlas/tree/48343db9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-atlas/diff/48343db9
Branch: refs/heads/master
Commit: 48343db999b495458409644c8b9d2fd0bd9fa99d
Parents: 147242e
Author: Shwetha GS <ss...@hortonworks.com>
Authored: Wed Aug 12 11:15:02 2015 +0530
Committer: Shwetha GS <ss...@hortonworks.com>
Committed: Wed Aug 12 11:15:02 2015 +0530
----------------------------------------------------------------------
client/pom.xml | 6 +
docs/src/site/twiki/InstallationSteps.twiki | 40 +
pom.xml | 71 +-
release-log.txt | 1 +
repository/pom.xml | 10 +
.../titan/diskstorage/solr/Solr5Index.java | 962 +++++++++++++++++++
.../repository/graph/TitanGraphProvider.java | 36 +
src/conf/solr/currency.xml | 67 ++
src/conf/solr/lang/stopwords_en.txt | 54 ++
src/conf/solr/protwords.txt | 21 +
src/conf/solr/schema.xml | 534 ++++++++++
src/conf/solr/solrconfig.xml | 625 ++++++++++++
src/conf/solr/stopwords.txt | 14 +
src/conf/solr/synonyms.txt | 29 +
14 files changed, 2466 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/48343db9/client/pom.xml
----------------------------------------------------------------------
diff --git a/client/pom.xml b/client/pom.xml
index d393b3a..2e27930 100755
--- a/client/pom.xml
+++ b/client/pom.xml
@@ -37,6 +37,12 @@
<artifactId>atlas-typesystem</artifactId>
</dependency>
+ <!-- supports simple auth handler -->
+ <dependency>
+ <groupId>org.apache.httpcomponents</groupId>
+ <artifactId>httpclient</artifactId>
+ </dependency>
+
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/48343db9/docs/src/site/twiki/InstallationSteps.twiki
----------------------------------------------------------------------
diff --git a/docs/src/site/twiki/InstallationSteps.twiki b/docs/src/site/twiki/InstallationSteps.twiki
index 0391c2d..e056d17 100644
--- a/docs/src/site/twiki/InstallationSteps.twiki
+++ b/docs/src/site/twiki/InstallationSteps.twiki
@@ -35,6 +35,15 @@ Tar is structured as follows
|- client.properties
|- atlas-env.sh
|- log4j.xml
+ |- solr
+ |- currency.xml
+ |- lang
+ |- stopwords_en.txt
+ |- protowords.txt
+ |- schema.xml
+ |- solrconfig.xml
+ |- stopwords.txt
+ |- synonyms.txt
|- docs
|- server
|- webapp
@@ -112,6 +121,37 @@ and change it to look as below
export METADATA_SERVER_OPTS="-Djava.awt.headless=true -Djava.security.krb5.realm= -Djava.security.krb5.kdc="
</verbatim>
+* Configuring SOLR as the Indexing Backend for the Graph Repository
+
+By default, Atlas uses Titan as the graph repository and is the only graph repository implementation available currently.
+For configuring Titan to work with Solr, please follow the instructions below
+<verbatim>
+* Install solr if not already running. Versions of SOLR supported are 4.8.1 or 5.2.1.
+
+* Start solr in cloud mode.
+ SolrCloud mode uses a ZooKeeper Service as a highly available, central location for cluster management.
+ For a small cluster, running with an existing ZooKeeper quorum should be fine. For larger clusters, you would want to run separate multiple ZooKeeper quorum with atleast 3 servers.
+ Note: Atlas currently supports solr in "cloud" mode only. "http" mode is not supported. For more information, refer solr documentation - https://cwiki.apache.org/confluence/display/solr/SolrCloud
+
+* Run the following commands from SOLR_HOME directory to create collections in Solr corresponding to the indexes that Atlas uses
+ bin/solr create -c vertex_index -d ATLAS_HOME/conf/solr -shards #numShards -replicationFactor #replicationFactor
+ bin/solr create -c edge_index -d ATLAS_HOME/conf/solr -shards #numShards -replicationFactor #replicationFactor
+ bin/solr create -c fulltext_index -d ATLAS_HOME/conf/solr -shards #numShards -replicationFactor #replicationFactor
+
+ Note: If numShards and replicationFactor are not specified, they default to 1 which suffices if you are trying out solr with ATLAS on a single node instance.
+ Otherwise specify numShards according to the number of hosts that are in the Solr cluster and the maxShardsPerNode configuration.
+ The number of shards cannot exceed the total number of Solr nodes in your SolrCloud cluster
+
+* Change ATLAS configuration to point to the Solr instance setup. Please make sure the following configurations are set to the below values in ATLAS_HOME//conf/application.properties
+ atlas.graph.index.search.backend=<'solr' for solr 4.8.1>/<'solr5' for solr 5.2.1>
+ atlas.graph.index.search.solr.mode=cloud
+ atlas.graph.index.search.solr.zookeeper-url=<the ZK quorum setup for solr as comma separated value> eg: 10.1.6.4:2181,10.1.6.5:2181
+
+* Restart Atlas
+</verbatim>
+
+For more information on Titan solr configuration , please refer http://s3.thinkaurelius.com/docs/titan/0.5.4/solr.htm
+
*Starting Atlas Server*
<verbatim>
bin/atlas_start.py [-port <port>]
http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/48343db9/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index facd539..3d729b5 100755
--- a/pom.xml
+++ b/pom.xml
@@ -329,8 +329,8 @@
<titan.version>0.5.4</titan.version>
<hadoop.version>2.7.0</hadoop.version>
<hbase.version>0.98.9-hadoop2</hbase.version>
+ <solr.version>5.1.0</solr.version>
<kafka.version>0.8.2.0</kafka.version>
-
<!-- scala versions -->
<scala.version>2.10.4</scala.version>
<scala.binary.version>2.10</scala.binary.version>
@@ -397,8 +397,8 @@
</activation>
<properties>
<titan.storage.backend>hbase</titan.storage.backend>
- <titan.index.backend>solr</titan.index.backend>
- <solr.zk.address>localhost:9983</solr.zk.address>
+ <titan.index.backend>solr5</titan.index.backend>
+ <solr.zk.address>localhost:2181</solr.zk.address>
<titan.storage.hostname>localhost</titan.storage.hostname>
</properties>
</profile>
@@ -578,6 +578,10 @@
<groupId>org.htrace</groupId>
<artifactId>*</artifactId>
</exclusion>
+ <exclusion>
+ <groupId>commons-httpclient</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
</exclusions>
</dependency>
@@ -903,11 +907,49 @@
<artifactId>*</artifactId>
<groupId>org.ow2.asm</groupId>
</exclusion>
+ <exclusion>
+ <artifactId>*</artifactId>
+ <groupId>org.apache.solr</groupId>
+ </exclusion>
</exclusions>
</dependency>
<dependency>
+ <groupId>org.apache.solr</groupId>
+ <artifactId>solr-core</artifactId>
+ <version>${solr.version}</version>
+ <exclusions>
+ <exclusion>
+ <artifactId>*</artifactId>
+ <groupId>org.eclipse.jetty</groupId>
+ </exclusion>
+ <exclusion>
+ <artifactId>*</artifactId>
+ <groupId>org.eclipse.jetty.orbit</groupId>
+ </exclusion>
+ <exclusion>
+ <artifactId>*</artifactId>
+ <groupId>org.restlet.jee</groupId>
+ </exclusion>
+ <exclusion>
+ <artifactId>*</artifactId>
+ <groupId>org.ow2.asm</groupId>
+ </exclusion>
+ <exclusion>
+ <artifactId>*</artifactId>
+ <groupId>org.apache.lucene</groupId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.solr</groupId>
+ <artifactId>solr-solrj</artifactId>
+ <version>${solr.version}</version>
+ </dependency>
+
+ <dependency>
<groupId>com.thinkaurelius.titan</groupId>
<artifactId>titan-lucene</artifactId>
<version>${titan.version}</version>
@@ -1084,7 +1126,19 @@
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
- <version>4.2.5</version>
+ <version>4.4.1</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.httpcomponents</groupId>
+ <artifactId>httpcore</artifactId>
+ <version>4.4.1</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.httpcomponents</groupId>
+ <artifactId>httpmime</artifactId>
+ <version>4.4.1</version>
</dependency>
<!--Test dependencies-->
@@ -1376,6 +1430,14 @@
</descriptors>
<finalName>apache-atlas-${project.version}</finalName>
</configuration>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>single</goal>
+ </goals>
+ </execution>
+ </executions>
</plugin>
<plugin>
@@ -1502,6 +1564,7 @@
<exclude>**/*.iml</exclude>
<exclude>**/*.json</exclude>
<exclude>**/target/**</exclude>
+ <exclude>**/target*/**</exclude>
<exclude>**/build/**</exclude>
<exclude>**/*.patch</exclude>
<exclude>derby.log</exclude>
http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/48343db9/release-log.txt
----------------------------------------------------------------------
diff --git a/release-log.txt b/release-log.txt
index 5d83baa..5de705f 100644
--- a/release-log.txt
+++ b/release-log.txt
@@ -8,6 +8,7 @@ ATLAS-54 Rename configs in hive hook (shwethags)
ATLAS-3 Mixed Index creation fails with Date types (suma.shivaprasad via shwethags)
ALL CHANGES:
+ATLAS-91 Add solr configuration and documentation (suma.shivaprasad via shwethags)
ATLAS-95 import-hive.sh reports illegal java parameters (shwethags)
ATLAS-74 Create notification framework (shwethags)
ATLAS-93 import-hive.sh reports FileNotFoundException (shwethags)
http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/48343db9/repository/pom.xml
----------------------------------------------------------------------
diff --git a/repository/pom.xml b/repository/pom.xml
index a2f8e08..8e4d0f3 100755
--- a/repository/pom.xml
+++ b/repository/pom.xml
@@ -95,6 +95,16 @@
</dependency>
<dependency>
+ <groupId>org.apache.solr</groupId>
+ <artifactId>solr-core</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.solr</groupId>
+ <artifactId>solr-solrj</artifactId>
+ </dependency>
+
+ <dependency>
<groupId>com.thinkaurelius.titan</groupId>
<artifactId>titan-berkeleyje</artifactId>
</dependency>
http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/48343db9/repository/src/main/java/com/thinkaurelius/titan/diskstorage/solr/Solr5Index.java
----------------------------------------------------------------------
diff --git a/repository/src/main/java/com/thinkaurelius/titan/diskstorage/solr/Solr5Index.java b/repository/src/main/java/com/thinkaurelius/titan/diskstorage/solr/Solr5Index.java
new file mode 100644
index 0000000..e484c18
--- /dev/null
+++ b/repository/src/main/java/com/thinkaurelius/titan/diskstorage/solr/Solr5Index.java
@@ -0,0 +1,962 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.thinkaurelius.titan.diskstorage.solr;
+
+import com.google.common.base.Joiner;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Sets;
+import com.thinkaurelius.titan.core.Order;
+import com.thinkaurelius.titan.core.TitanElement;
+import com.thinkaurelius.titan.core.attribute.Cmp;
+import com.thinkaurelius.titan.core.attribute.Geo;
+import com.thinkaurelius.titan.core.attribute.Geoshape;
+import com.thinkaurelius.titan.core.attribute.Text;
+import com.thinkaurelius.titan.core.schema.Mapping;
+import com.thinkaurelius.titan.diskstorage.BackendException;
+import com.thinkaurelius.titan.diskstorage.BaseTransaction;
+import com.thinkaurelius.titan.diskstorage.BaseTransactionConfig;
+import com.thinkaurelius.titan.diskstorage.BaseTransactionConfigurable;
+import com.thinkaurelius.titan.diskstorage.PermanentBackendException;
+import com.thinkaurelius.titan.diskstorage.TemporaryBackendException;
+import com.thinkaurelius.titan.diskstorage.configuration.ConfigNamespace;
+import com.thinkaurelius.titan.diskstorage.configuration.ConfigOption;
+import com.thinkaurelius.titan.diskstorage.configuration.Configuration;
+import com.thinkaurelius.titan.diskstorage.indexing.IndexEntry;
+import com.thinkaurelius.titan.diskstorage.indexing.IndexFeatures;
+import com.thinkaurelius.titan.diskstorage.indexing.IndexMutation;
+import com.thinkaurelius.titan.diskstorage.indexing.IndexProvider;
+import com.thinkaurelius.titan.diskstorage.indexing.IndexQuery;
+import com.thinkaurelius.titan.diskstorage.indexing.KeyInformation;
+import com.thinkaurelius.titan.diskstorage.indexing.RawQuery;
+import com.thinkaurelius.titan.diskstorage.solr.transform.GeoToWktConverter;
+import com.thinkaurelius.titan.diskstorage.util.DefaultTransaction;
+import com.thinkaurelius.titan.graphdb.configuration.PreInitializeConfigOptions;
+import com.thinkaurelius.titan.graphdb.database.serialize.AttributeUtil;
+import com.thinkaurelius.titan.graphdb.database.serialize.attribute.AbstractDecimal;
+import com.thinkaurelius.titan.graphdb.query.TitanPredicate;
+import com.thinkaurelius.titan.graphdb.query.condition.And;
+import com.thinkaurelius.titan.graphdb.query.condition.Condition;
+import com.thinkaurelius.titan.graphdb.query.condition.Not;
+import com.thinkaurelius.titan.graphdb.query.condition.Or;
+import com.thinkaurelius.titan.graphdb.query.condition.PredicateCondition;
+import com.thinkaurelius.titan.graphdb.types.ParameterType;
+import org.apache.commons.lang.StringUtils;
+import org.apache.http.client.HttpClient;
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.impl.CloudSolrClient;
+import org.apache.solr.client.solrj.impl.HttpClientUtil;
+import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.apache.solr.client.solrj.impl.LBHttpSolrClient;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.client.solrj.request.UpdateRequest;
+import org.apache.solr.client.solrj.response.CollectionAdminResponse;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.client.solrj.util.ClientUtils;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.cloud.ClusterState;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.cloud.Slice;
+import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.zookeeper.KeeperException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.TimeZone;
+import java.util.UUID;
+
+import static com.thinkaurelius.titan.graphdb.configuration.GraphDatabaseConfiguration.INDEX_MAX_RESULT_SET_SIZE;
+import static com.thinkaurelius.titan.graphdb.configuration.GraphDatabaseConfiguration.INDEX_NS;
+
+/**
+ * NOTE: Copied from titan for supporting sol5. Do not change
+ */
+@PreInitializeConfigOptions
+public class Solr5Index implements IndexProvider {
+
+ private static final Logger logger = LoggerFactory.getLogger(Solr5Index.class);
+
+
+ private static final String DEFAULT_ID_FIELD = "id";
+
+ private enum Mode {
+ HTTP, CLOUD;
+
+ public static Mode parse(String mode) {
+ for (Mode m : Mode.values()) {
+ if (m.toString().equalsIgnoreCase(mode)) return m;
+ }
+ throw new IllegalArgumentException("Unrecognized mode: "+mode);
+ }
+ }
+
+ public static final ConfigNamespace SOLR_NS =
+ new ConfigNamespace(INDEX_NS, "solr", "Solr index configuration");
+
+ public static final ConfigOption<String> SOLR_MODE = new ConfigOption<String>(SOLR_NS,"mode",
+ "The operation mode for Solr which is either via HTTP (`http`) or using SolrCloud (`cloud`)",
+ ConfigOption.Type.GLOBAL_OFFLINE, "cloud");
+
+ public static final ConfigOption<Boolean> DYNAMIC_FIELDS = new ConfigOption<Boolean>(SOLR_NS,"dyn-fields",
+ "Whether to use dynamic fields (which appends the data type to the field name). If dynamic fields is disabled" +
+ "the user must map field names and define them explicitly in the schema.",
+ ConfigOption.Type.GLOBAL_OFFLINE, true);
+
+ public static final ConfigOption<String[]> KEY_FIELD_NAMES = new ConfigOption<String[]>(SOLR_NS,"key-field-names",
+ "Field name that uniquely identifies each document in Solr. Must be specified as a list of `collection=field`.",
+ ConfigOption.Type.GLOBAL, String[].class);
+
+ public static final ConfigOption<String> TTL_FIELD = new ConfigOption<String>(SOLR_NS,"ttl_field",
+ "Name of the TTL field for Solr collections.",
+ ConfigOption.Type.GLOBAL_OFFLINE, "ttl");
+
+ /** SolrCloud Configuration */
+
+ public static final ConfigOption<String> ZOOKEEPER_URL = new ConfigOption<String>(SOLR_NS,"zookeeper-url",
+ "URL of the Zookeeper instance coordinating the SolrCloud cluster",
+ ConfigOption.Type.MASKABLE, "localhost:2181");
+
+ public static final ConfigOption<Integer> NUM_SHARDS = new ConfigOption<Integer>(SOLR_NS,"num-shards",
+ "Number of shards for a collection. This applies when creating a new collection which is only supported under the SolrCloud operation mode.",
+ ConfigOption.Type.GLOBAL_OFFLINE, 1);
+
+ public static final ConfigOption<Integer> MAX_SHARDS_PER_NODE = new ConfigOption<Integer>(SOLR_NS,"max-shards-per-node",
+ "Maximum number of shards per node. This applies when creating a new collection which is only supported under the SolrCloud operation mode.",
+ ConfigOption.Type.GLOBAL_OFFLINE, 1);
+
+ public static final ConfigOption<Integer> REPLICATION_FACTOR = new ConfigOption<Integer>(SOLR_NS,"replication-factor",
+ "Replication factor for a collection. This applies when creating a new collection which is only supported under the SolrCloud operation mode.",
+ ConfigOption.Type.GLOBAL_OFFLINE, 1);
+
+
+ /** HTTP Configuration */
+
+ public static final ConfigOption<String[]> HTTP_URLS = new ConfigOption<String[]>(SOLR_NS,"http-urls",
+ "List of URLs to use to connect to Solr Servers (LBHttpSolrClient is used), don't add core or collection name to the URL.",
+ ConfigOption.Type.MASKABLE, new String[] { "http://localhost:8983/solr" });
+
+ public static final ConfigOption<Integer> HTTP_CONNECTION_TIMEOUT = new ConfigOption<Integer>(SOLR_NS,"http-connection-timeout",
+ "Solr HTTP connection timeout.",
+ ConfigOption.Type.MASKABLE, 5000);
+
+ public static final ConfigOption<Boolean> HTTP_ALLOW_COMPRESSION = new ConfigOption<Boolean>(SOLR_NS,"http-compression",
+ "Enable/disable compression on the HTTP connections made to Solr.",
+ ConfigOption.Type.MASKABLE, false);
+
+ public static final ConfigOption<Integer> HTTP_MAX_CONNECTIONS_PER_HOST = new ConfigOption<Integer>(SOLR_NS,"http-max-per-host",
+ "Maximum number of HTTP connections per Solr host.",
+ ConfigOption.Type.MASKABLE, 20);
+
+ public static final ConfigOption<Integer> HTTP_GLOBAL_MAX_CONNECTIONS = new ConfigOption<Integer>(SOLR_NS,"http-max",
+ "Maximum number of HTTP connections in total to all Solr servers.",
+ ConfigOption.Type.MASKABLE, 100);
+
+ public static final ConfigOption<Boolean> WAIT_SEARCHER = new ConfigOption<Boolean>(SOLR_NS, "wait-searcher",
+ "When mutating - wait for the index to reflect new mutations before returning. This can have a negative impact on performance.",
+ ConfigOption.Type.LOCAL, false);
+
+
+
+ private static final IndexFeatures SOLR_FEATURES = new IndexFeatures.Builder().supportsDocumentTTL()
+ .setDefaultStringMapping(Mapping.TEXT).supportedStringMappings(Mapping.TEXT, Mapping.STRING).build();
+
+ private final SolrClient solrClient;
+ private final Configuration configuration;
+ private final Mode mode;
+ private final boolean dynFields;
+ private final Map<String, String> keyFieldIds;
+ private final String ttlField;
+ private final int maxResults;
+ private final boolean waitSearcher;
+
+ public Solr5Index(final Configuration config) throws BackendException {
+ Preconditions.checkArgument(config!=null);
+ configuration = config;
+
+ mode = Mode.parse(config.get(SOLR_MODE));
+ dynFields = config.get(DYNAMIC_FIELDS);
+ keyFieldIds = parseKeyFieldsForCollections(config);
+ maxResults = config.get(INDEX_MAX_RESULT_SET_SIZE);
+ ttlField = config.get(TTL_FIELD);
+ waitSearcher = config.get(WAIT_SEARCHER);
+
+ if (mode==Mode.CLOUD) {
+ String zookeeperUrl = config.get(Solr5Index.ZOOKEEPER_URL);
+ CloudSolrClient cloudServer = new CloudSolrClient(zookeeperUrl, true);
+ cloudServer.connect();
+ solrClient = cloudServer;
+ } else if (mode==Mode.HTTP) {
+ HttpClient clientParams = HttpClientUtil.createClient(new ModifiableSolrParams() {{
+ add(HttpClientUtil.PROP_ALLOW_COMPRESSION, config.get(HTTP_ALLOW_COMPRESSION).toString());
+ add(HttpClientUtil.PROP_CONNECTION_TIMEOUT, config.get(HTTP_CONNECTION_TIMEOUT).toString());
+ add(HttpClientUtil.PROP_MAX_CONNECTIONS_PER_HOST, config.get(HTTP_MAX_CONNECTIONS_PER_HOST).toString());
+ add(HttpClientUtil.PROP_MAX_CONNECTIONS, config.get(HTTP_GLOBAL_MAX_CONNECTIONS).toString());
+ }});
+
+ solrClient = new LBHttpSolrClient(clientParams, config.get(HTTP_URLS));
+
+
+ } else {
+ throw new IllegalArgumentException("Unsupported Solr operation mode: " + mode);
+ }
+ }
+
+ private Map<String, String> parseKeyFieldsForCollections(Configuration config) throws BackendException {
+ Map<String, String> keyFieldNames = new HashMap<String, String>();
+ String[] collectionFieldStatements = config.has(KEY_FIELD_NAMES)?config.get(KEY_FIELD_NAMES):new String[0];
+ for (String collectionFieldStatement : collectionFieldStatements) {
+ String[] parts = collectionFieldStatement.trim().split("=");
+ if (parts.length != 2) {
+ throw new PermanentBackendException("Unable to parse the collection name / key field name pair. It should be of the format collection=field");
+ }
+ String collectionName = parts[0];
+ String keyFieldName = parts[1];
+ keyFieldNames.put(collectionName, keyFieldName);
+ }
+ return keyFieldNames;
+ }
+
+ private String getKeyFieldId(String collection) {
+ String field = keyFieldIds.get(collection);
+ if (field==null) field = DEFAULT_ID_FIELD;
+ return field;
+ }
+
+ /**
+ * Unlike the ElasticSearch Index, which is schema free, Solr requires a schema to
+ * support searching. This means that you will need to modify the solr schema with the
+ * appropriate field definitions in order to work properly. If you have a running instance
+ * of Solr and you modify its schema with new fields, don't forget to re-index!
+ * @param store Index store
+ * @param key New key to register
+ * @param information Datatype to register for the key
+ * @param tx enclosing transaction
+ * @throws com.thinkaurelius.titan.diskstorage.BackendException
+ */
+ @Override
+ public void register(String store, String key, KeyInformation information, BaseTransaction tx) throws BackendException {
+ if (mode==Mode.CLOUD) {
+ CloudSolrClient client = (CloudSolrClient) solrClient;
+ try {
+ createCollectionIfNotExists(client, configuration, store);
+ } catch (IOException e) {
+ throw new PermanentBackendException(e);
+ } catch (SolrServerException e) {
+ throw new PermanentBackendException(e);
+ } catch (InterruptedException e) {
+ throw new PermanentBackendException(e);
+ } catch (KeeperException e) {
+ throw new PermanentBackendException(e);
+ }
+ }
+ //Since all data types must be defined in the schema.xml, pre-registering a type does not work
+ }
+
+ @Override
+ public void mutate(Map<String, Map<String, IndexMutation>> mutations, KeyInformation.IndexRetriever informations, BaseTransaction tx) throws BackendException {
+ logger.debug("Mutating SOLR");
+ try {
+ for (Map.Entry<String, Map<String, IndexMutation>> stores : mutations.entrySet()) {
+ String collectionName = stores.getKey();
+ String keyIdField = getKeyFieldId(collectionName);
+
+ List<String> deleteIds = new ArrayList<String>();
+ Collection<SolrInputDocument> changes = new ArrayList<SolrInputDocument>();
+
+ for (Map.Entry<String, IndexMutation> entry : stores.getValue().entrySet()) {
+ String docId = entry.getKey();
+ IndexMutation mutation = entry.getValue();
+ Preconditions.checkArgument(!(mutation.isNew() && mutation.isDeleted()));
+ Preconditions.checkArgument(!mutation.isNew() || !mutation.hasDeletions());
+ Preconditions.checkArgument(!mutation.isDeleted() || !mutation.hasAdditions());
+
+ //Handle any deletions
+ if (mutation.hasDeletions()) {
+ if (mutation.isDeleted()) {
+ logger.trace("Deleting entire document {}", docId);
+ deleteIds.add(docId);
+ } else {
+ HashSet<IndexEntry> fieldDeletions = Sets.newHashSet(mutation.getDeletions());
+ if (mutation.hasAdditions()) {
+ for (IndexEntry indexEntry : mutation.getAdditions()) {
+ fieldDeletions.remove(indexEntry);
+ }
+ }
+ deleteIndividualFieldsFromIndex(collectionName, keyIdField, docId, fieldDeletions);
+ }
+ }
+
+ if (mutation.hasAdditions()) {
+ int ttl = mutation.determineTTL();
+
+ SolrInputDocument doc = new SolrInputDocument();
+ doc.setField(keyIdField, docId);
+
+ boolean isNewDoc = mutation.isNew();
+
+ if (isNewDoc)
+ logger.trace("Adding new document {}", docId);
+
+ for (IndexEntry e : mutation.getAdditions()) {
+ final Object fieldValue = convertValue(e.value);
+ doc.setField(e.field, isNewDoc
+ ? fieldValue : new HashMap<String, Object>(1) {{ put("set", fieldValue); }});
+ }
+ if (ttl>0) {
+ Preconditions.checkArgument(isNewDoc,"Solr only supports TTL on new documents [%s]",docId);
+ doc.setField(ttlField, String.format("+%dSECONDS", ttl));
+ }
+ changes.add(doc);
+ }
+ }
+
+ commitDeletes(collectionName, deleteIds);
+ commitDocumentChanges(collectionName, changes);
+ }
+ } catch (Exception e) {
+ throw storageException(e);
+ }
+ }
+
+ private Object convertValue(Object value) throws BackendException {
+ if (value instanceof Geoshape)
+ return GeoToWktConverter.convertToWktString((Geoshape) value);
+ // in order to serialize/deserialize properly Solr will have to have an
+ // access to Titan source which has Decimal type, so for now we simply convert to
+ // double and let Solr do the same thing or fail.
+ if (value instanceof AbstractDecimal)
+ return ((AbstractDecimal) value).doubleValue();
+ if (value instanceof UUID)
+ return value.toString();
+ return value;
+ }
+
+ @Override
+ public void restore(Map<String, Map<String, List<IndexEntry>>> documents, KeyInformation.IndexRetriever informations, BaseTransaction tx) throws BackendException {
+ try {
+ for (Map.Entry<String, Map<String, List<IndexEntry>>> stores : documents.entrySet()) {
+ final String collectionName = stores.getKey();
+
+ List<String> deleteIds = new ArrayList<String>();
+ List<SolrInputDocument> newDocuments = new ArrayList<SolrInputDocument>();
+
+ for (Map.Entry<String, List<IndexEntry>> entry : stores.getValue().entrySet()) {
+ final String docID = entry.getKey();
+ final List<IndexEntry> content = entry.getValue();
+
+ if (content == null || content.isEmpty()) {
+ if (logger.isTraceEnabled())
+ logger.trace("Deleting document [{}]", docID);
+
+ deleteIds.add(docID);
+ continue;
+ }
+
+ newDocuments.add(new SolrInputDocument() {{
+ setField(getKeyFieldId(collectionName), docID);
+
+ for (IndexEntry addition : content) {
+ Object fieldValue = addition.value;
+ setField(addition.field, convertValue(fieldValue));
+ }
+ }});
+ }
+
+ commitDeletes(collectionName, deleteIds);
+ commitDocumentChanges(collectionName, newDocuments);
+ }
+ } catch (Exception e) {
+ throw new TemporaryBackendException("Could not restore Solr index", e);
+ }
+ }
+
+ private void deleteIndividualFieldsFromIndex(String collectionName, String keyIdField, String docId, HashSet<IndexEntry> fieldDeletions) throws SolrServerException, IOException {
+ if (fieldDeletions.isEmpty()) return;
+
+ Map<String, String> fieldDeletes = new HashMap<String, String>(1) {{ put("set", null); }};
+
+ SolrInputDocument doc = new SolrInputDocument();
+ doc.addField(keyIdField, docId);
+ StringBuilder sb = new StringBuilder();
+ for (IndexEntry fieldToDelete : fieldDeletions) {
+ doc.addField(fieldToDelete.field, fieldDeletes);
+ sb.append(fieldToDelete).append(",");
+ }
+
+ if (logger.isTraceEnabled())
+ logger.trace("Deleting individual fields [{}] for document {}", sb.toString(), docId);
+
+ UpdateRequest singleDocument = newUpdateRequest();
+ singleDocument.add(doc);
+ solrClient.request(singleDocument, collectionName);
+ }
+
+ private void commitDocumentChanges(String collectionName, Collection<SolrInputDocument> documents) throws SolrServerException, IOException {
+ if (documents.size() == 0) return;
+
+ try {
+ solrClient.request(newUpdateRequest().add(documents), collectionName);
+ } catch (HttpSolrClient.RemoteSolrException rse) {
+ logger.error("Unable to save documents to Solr as one of the shape objects stored were not compatible with Solr.", rse);
+ logger.error("Details in failed document batch: ");
+ for (SolrInputDocument d : documents) {
+ Collection<String> fieldNames = d.getFieldNames();
+ for (String name : fieldNames) {
+ logger.error(name + ":" + d.getFieldValue(name).toString());
+ }
+ }
+
+ throw rse;
+ }
+ }
+
+ private void commitDeletes(String collectionName, List<String> deleteIds) throws SolrServerException, IOException {
+ if (deleteIds.size() == 0) return;
+ solrClient.request(newUpdateRequest().deleteById(deleteIds), collectionName);
+ }
+
+ @Override
+ public List<String> query(IndexQuery query, KeyInformation.IndexRetriever informations, BaseTransaction tx) throws BackendException {
+ List<String> result;
+ String collection = query.getStore();
+ String keyIdField = getKeyFieldId(collection);
+ SolrQuery solrQuery = new SolrQuery("*:*");
+ String queryFilter = buildQueryFilter(query.getCondition(), informations.get(collection));
+ solrQuery.addFilterQuery(queryFilter);
+ if (!query.getOrder().isEmpty()) {
+ List<IndexQuery.OrderEntry> orders = query.getOrder();
+ for (IndexQuery.OrderEntry order1 : orders) {
+ String item = order1.getKey();
+ SolrQuery.ORDER order = order1.getOrder() == Order.ASC ? SolrQuery.ORDER.asc : SolrQuery.ORDER.desc;
+ solrQuery.addSort(new SolrQuery.SortClause(item, order));
+ }
+ }
+ solrQuery.setStart(0);
+ if (query.hasLimit()) {
+ solrQuery.setRows(query.getLimit());
+ } else {
+ solrQuery.setRows(maxResults);
+ }
+ try {
+ QueryResponse response = solrClient.query(collection, solrQuery);
+
+ if (logger.isDebugEnabled())
+ logger.debug("Executed query [{}] in {} ms", query.getCondition(), response.getElapsedTime());
+
+ int totalHits = response.getResults().size();
+
+ if (!query.hasLimit() && totalHits >= maxResults)
+ logger.warn("Query result set truncated to first [{}] elements for query: {}", maxResults, query);
+
+ result = new ArrayList<String>(totalHits);
+ for (SolrDocument hit : response.getResults()) {
+ result.add(hit.getFieldValue(keyIdField).toString());
+ }
+ } catch (IOException e) {
+ logger.error("Query did not complete : ", e);
+ throw new PermanentBackendException(e);
+ } catch (SolrServerException e) {
+ logger.error("Unable to query Solr index.", e);
+ throw new PermanentBackendException(e);
+ }
+ return result;
+ }
+
+ @Override
+ public Iterable<RawQuery.Result<String>> query(RawQuery query, KeyInformation.IndexRetriever informations, BaseTransaction tx) throws BackendException {
+ List<RawQuery.Result<String>> result;
+ String collection = query.getStore();
+ String keyIdField = getKeyFieldId(collection);
+ SolrQuery solrQuery = new SolrQuery(query.getQuery())
+ .addField(keyIdField)
+ .setIncludeScore(true)
+ .setStart(query.getOffset())
+ .setRows(query.hasLimit() ? query.getLimit() : maxResults);
+
+ try {
+ QueryResponse response = solrClient.query(collection, solrQuery);
+ if (logger.isDebugEnabled())
+ logger.debug("Executed query [{}] in {} ms", query.getQuery(), response.getElapsedTime());
+
+ int totalHits = response.getResults().size();
+ if (!query.hasLimit() && totalHits >= maxResults) {
+ logger.warn("Query result set truncated to first [{}] elements for query: {}", maxResults, query);
+ }
+ result = new ArrayList<RawQuery.Result<String>>(totalHits);
+
+ for (SolrDocument hit : response.getResults()) {
+ double score = Double.parseDouble(hit.getFieldValue("score").toString());
+ result.add(new RawQuery.Result<String>(hit.getFieldValue(keyIdField).toString(), score));
+ }
+ } catch (IOException e) {
+ logger.error("Query did not complete : ", e);
+ throw new PermanentBackendException(e);
+ } catch (SolrServerException e) {
+ logger.error("Unable to query Solr index.", e);
+ throw new PermanentBackendException(e);
+ }
+ return result;
+ }
+
+ private static String escapeValue(Object value) {
+ return ClientUtils.escapeQueryChars(value.toString());
+ }
+
+ public String buildQueryFilter(Condition<TitanElement> condition, KeyInformation.StoreRetriever informations) {
+ if (condition instanceof PredicateCondition) {
+ PredicateCondition<String, TitanElement> atom = (PredicateCondition<String, TitanElement>) condition;
+ Object value = atom.getValue();
+ String key = atom.getKey();
+ TitanPredicate titanPredicate = atom.getPredicate();
+
+ if (value instanceof Number) {
+ String queryValue = escapeValue(value);
+ Preconditions.checkArgument(titanPredicate instanceof Cmp, "Relation not supported on numeric types: " + titanPredicate);
+ Cmp numRel = (Cmp) titanPredicate;
+ switch (numRel) {
+ case EQUAL:
+ return (key + ":" + queryValue);
+ case NOT_EQUAL:
+ return ("-" + key + ":" + queryValue);
+ case LESS_THAN:
+ //use right curly to mean up to but not including value
+ return (key + ":[* TO " + queryValue + "}");
+ case LESS_THAN_EQUAL:
+ return (key + ":[* TO " + queryValue + "]");
+ case GREATER_THAN:
+ //use left curly to mean greater than but not including value
+ return (key + ":{" + queryValue + " TO *]");
+ case GREATER_THAN_EQUAL:
+ return (key + ":[" + queryValue + " TO *]");
+ default: throw new IllegalArgumentException("Unexpected relation: " + numRel);
+ }
+ } else if (value instanceof String) {
+ Mapping map = getStringMapping(informations.get(key));
+ assert map==Mapping.TEXT || map==Mapping.STRING;
+ if (map==Mapping.TEXT && !titanPredicate.toString().startsWith("CONTAINS"))
+ throw new IllegalArgumentException("Text mapped string values only support CONTAINS queries and not: " + titanPredicate);
+ if (map==Mapping.STRING && titanPredicate.toString().startsWith("CONTAINS"))
+ throw new IllegalArgumentException("String mapped string values do not support CONTAINS queries: " + titanPredicate);
+
+ //Special case
+ if (titanPredicate == Text.CONTAINS) {
+ //e.g. - if terms tomorrow and world were supplied, and fq=text:(tomorrow world)
+ //sample data set would return 2 documents: one where text = Tomorrow is the World,
+ //and the second where text = Hello World. Hence, we are decomposing the query string
+ //and building an AND query explicitly because we need AND semantics
+ value = ((String) value).toLowerCase();
+ List<String> terms = Text.tokenize((String) value);
+
+ if (terms.isEmpty()) {
+ return "";
+ } else if (terms.size() == 1) {
+ return (key + ":(" + escapeValue(terms.get(0)) + ")");
+ } else {
+ And<TitanElement> andTerms = new And<TitanElement>();
+ for (String term : terms) {
+ andTerms.add(new PredicateCondition<String, TitanElement>(key, titanPredicate, term));
+ }
+ return buildQueryFilter(andTerms, informations);
+ }
+ }
+ if (titanPredicate == Text.PREFIX || titanPredicate == Text.CONTAINS_PREFIX) {
+ return (key + ":" + escapeValue(value) + "*");
+ } else if (titanPredicate == Text.REGEX || titanPredicate == Text.CONTAINS_REGEX) {
+ return (key + ":/" + value + "/");
+ } else if (titanPredicate == Cmp.EQUAL) {
+ return (key + ":\"" + escapeValue(value) + "\"");
+ } else if (titanPredicate == Cmp.NOT_EQUAL) {
+ return ("-" + key + ":\"" + escapeValue(value) + "\"");
+ } else {
+ throw new IllegalArgumentException("Relation is not supported for string value: " + titanPredicate);
+ }
+ } else if (value instanceof Geoshape) {
+ Geoshape geo = (Geoshape)value;
+ if (geo.getType() == Geoshape.Type.CIRCLE) {
+ Geoshape.Point center = geo.getPoint();
+ return ("{!geofilt sfield=" + key +
+ " pt=" + center.getLatitude() + "," + center.getLongitude() +
+ " d=" + geo.getRadius() + "} distErrPct=0"); //distance in kilometers
+ } else if (geo.getType() == Geoshape.Type.BOX) {
+ Geoshape.Point southwest = geo.getPoint(0);
+ Geoshape.Point northeast = geo.getPoint(1);
+ return (key + ":[" + southwest.getLatitude() + "," + southwest.getLongitude() +
+ " TO " + northeast.getLatitude() + "," + northeast.getLongitude() + "]");
+ } else if (geo.getType() == Geoshape.Type.POLYGON) {
+ List<Geoshape.Point> coordinates = getPolygonPoints(geo);
+ StringBuilder poly = new StringBuilder(key + ":\"IsWithin(POLYGON((");
+ for (Geoshape.Point coordinate : coordinates) {
+ poly.append(coordinate.getLongitude()).append(" ").append(coordinate.getLatitude()).append(", ");
+ }
+ //close the polygon with the first coordinate
+ poly.append(coordinates.get(0).getLongitude()).append(" ").append(coordinates.get(0).getLatitude());
+ poly.append(")))\" distErrPct=0");
+ return (poly.toString());
+ }
+ } else if (value instanceof Date) {
+ String queryValue = escapeValue(toIsoDate((Date)value));
+ Preconditions.checkArgument(titanPredicate instanceof Cmp, "Relation not supported on date types: " + titanPredicate);
+ Cmp numRel = (Cmp) titanPredicate;
+
+ switch (numRel) {
+ case EQUAL:
+ return (key + ":" + queryValue);
+ case NOT_EQUAL:
+ return ("-" + key + ":" + queryValue);
+ case LESS_THAN:
+ //use right curly to mean up to but not including value
+ return (key + ":[* TO " + queryValue + "}");
+ case LESS_THAN_EQUAL:
+ return (key + ":[* TO " + queryValue + "]");
+ case GREATER_THAN:
+ //use left curly to mean greater than but not including value
+ return (key + ":{" + queryValue + " TO *]");
+ case GREATER_THAN_EQUAL:
+ return (key + ":[" + queryValue + " TO *]");
+ default: throw new IllegalArgumentException("Unexpected relation: " + numRel);
+ }
+ } else if (value instanceof Boolean) {
+ Cmp numRel = (Cmp) titanPredicate;
+ String queryValue = escapeValue(value);
+ switch (numRel) {
+ case EQUAL:
+ return (key + ":" + queryValue);
+ case NOT_EQUAL:
+ return ("-" + key + ":" + queryValue);
+ default:
+ throw new IllegalArgumentException("Boolean types only support EQUAL or NOT_EQUAL");
+ }
+ } else if (value instanceof UUID) {
+ if (titanPredicate == Cmp.EQUAL) {
+ return (key + ":\"" + escapeValue(value) + "\"");
+ } else if (titanPredicate == Cmp.NOT_EQUAL) {
+ return ("-" + key + ":\"" + escapeValue(value) + "\"");
+ } else {
+ throw new IllegalArgumentException("Relation is not supported for uuid value: " + titanPredicate);
+ }
+ } else throw new IllegalArgumentException("Unsupported type: " + value);
+ } else if (condition instanceof Not) {
+ String sub = buildQueryFilter(((Not)condition).getChild(),informations);
+ if (StringUtils.isNotBlank(sub)) return "-("+sub+")";
+ else return "";
+ } else if (condition instanceof And) {
+ int numChildren = ((And) condition).size();
+ StringBuilder sb = new StringBuilder();
+ for (Condition<TitanElement> c : condition.getChildren()) {
+ String sub = buildQueryFilter(c, informations);
+
+ if (StringUtils.isBlank(sub))
+ continue;
+
+ // we don't have to add "+" which means AND iff
+ // a. it's a NOT query,
+ // b. expression is a single statement in the AND.
+ if (!sub.startsWith("-") && numChildren > 1)
+ sb.append("+");
+
+ sb.append(sub).append(" ");
+ }
+ return sb.toString();
+ } else if (condition instanceof Or) {
+ StringBuilder sb = new StringBuilder();
+ int element=0;
+ for (Condition<TitanElement> c : condition.getChildren()) {
+ String sub = buildQueryFilter(c,informations);
+ if (StringUtils.isBlank(sub)) continue;
+ if (element==0) sb.append("(");
+ else sb.append(" OR ");
+ sb.append(sub);
+ element++;
+ }
+ if (element>0) sb.append(")");
+ return sb.toString();
+ } else {
+ throw new IllegalArgumentException("Invalid condition: " + condition);
+ }
+ return null;
+ }
+
+ private String toIsoDate(Date value) {
+ TimeZone tz = TimeZone.getTimeZone("UTC");
+ DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
+ df.setTimeZone(tz);
+ return df.format(value);
+ }
+
+ private List<Geoshape.Point> getPolygonPoints(Geoshape polygon) {
+ List<Geoshape.Point> locations = new ArrayList<Geoshape.Point>();
+
+ int index = 0;
+ boolean hasCoordinates = true;
+ while (hasCoordinates) {
+ try {
+ locations.add(polygon.getPoint(index));
+ } catch (ArrayIndexOutOfBoundsException ignore) {
+ //just means we asked for a point past the size of the list
+ //of known coordinates
+ hasCoordinates = false;
+ }
+ }
+
+ return locations;
+ }
+
+ /**
+ * Solr handles all transactions on the server-side. That means all
+ * commit, optimize, or rollback applies since the last commit/optimize/rollback.
+ * Solr documentation recommends best way to update Solr is in one process to avoid
+ * race conditions.
+ *
+ * @return New Transaction Handle
+ * @throws com.thinkaurelius.titan.diskstorage.BackendException
+ */
+ @Override
+ public BaseTransactionConfigurable beginTransaction(BaseTransactionConfig config) throws BackendException {
+ return new DefaultTransaction(config);
+ }
+
+ @Override
+ public void close() throws BackendException {
+ logger.trace("Shutting down connection to Solr", solrClient);
+ try {
+ solrClient.close();
+ } catch (IOException e) {
+ throw new TemporaryBackendException(e);
+ }
+ }
+
+ @Override
+ public void clearStorage() throws BackendException {
+ try {
+ if (mode!=Mode.CLOUD) throw new UnsupportedOperationException("Operation only supported for SolrCloud");
+ logger.debug("Clearing storage from Solr: {}", solrClient);
+ ZkStateReader zkStateReader = ((CloudSolrClient) solrClient).getZkStateReader();
+ zkStateReader.updateClusterState(true);
+ ClusterState clusterState = zkStateReader.getClusterState();
+ for (String collection : clusterState.getCollections()) {
+ logger.debug("Clearing collection [{}] in Solr",collection);
+ UpdateRequest deleteAll = newUpdateRequest();
+ deleteAll.deleteByQuery("*:*");
+ solrClient.request(deleteAll, collection);
+ }
+
+ } catch (SolrServerException e) {
+ logger.error("Unable to clear storage from index due to server error on Solr.", e);
+ throw new PermanentBackendException(e);
+ } catch (IOException e) {
+ logger.error("Unable to clear storage from index due to low-level I/O error.", e);
+ throw new PermanentBackendException(e);
+ } catch (Exception e) {
+ logger.error("Unable to clear storage from index due to general error.", e);
+ throw new PermanentBackendException(e);
+ }
+ }
+
+ @Override
+ public boolean supports(KeyInformation information, TitanPredicate titanPredicate) {
+ Class<?> dataType = information.getDataType();
+ Mapping mapping = Mapping.getMapping(information);
+ if (mapping!=Mapping.DEFAULT && !AttributeUtil.isString(dataType)) return false;
+
+ if (Number.class.isAssignableFrom(dataType)) {
+ return titanPredicate instanceof Cmp;
+ } else if (dataType == Geoshape.class) {
+ return titanPredicate == Geo.WITHIN;
+ } else if (AttributeUtil.isString(dataType)) {
+ switch(mapping) {
+ case DEFAULT:
+ case TEXT:
+ return titanPredicate == Text.CONTAINS || titanPredicate == Text.CONTAINS_PREFIX || titanPredicate == Text.CONTAINS_REGEX;
+ case STRING:
+ return titanPredicate == Cmp.EQUAL || titanPredicate==Cmp.NOT_EQUAL || titanPredicate==Text.REGEX || titanPredicate==Text.PREFIX;
+ // case TEXTSTRING:
+ // return (titanPredicate instanceof Text) || titanPredicate == Cmp.EQUAL || titanPredicate==Cmp.NOT_EQUAL;
+ }
+ } else if (dataType == Date.class) {
+ if (titanPredicate instanceof Cmp) return true;
+ } else if (dataType == Boolean.class) {
+ return titanPredicate == Cmp.EQUAL || titanPredicate == Cmp.NOT_EQUAL;
+ } else if (dataType == UUID.class) {
+ return titanPredicate == Cmp.EQUAL || titanPredicate==Cmp.NOT_EQUAL;
+ }
+ return false;
+ }
+
+ @Override
+ public boolean supports(KeyInformation information) {
+ Class<?> dataType = information.getDataType();
+ Mapping mapping = Mapping.getMapping(information);
+ if (Number.class.isAssignableFrom(dataType) || dataType == Geoshape.class || dataType == Date.class || dataType == Boolean.class || dataType == UUID.class) {
+ if (mapping==Mapping.DEFAULT) return true;
+ } else if (AttributeUtil.isString(dataType)) {
+ if (mapping==Mapping.DEFAULT || mapping==Mapping.TEXT || mapping==Mapping.STRING) return true;
+ }
+ return false;
+ }
+
+ @Override
+ public String mapKey2Field(String key, KeyInformation keyInfo) {
+ Preconditions.checkArgument(!StringUtils.containsAny(key, new char[]{' '}),"Invalid key name provided: %s",key);
+ if (!dynFields) return key;
+ if (ParameterType.MAPPED_NAME.hasParameter(keyInfo.getParameters())) return key;
+ String postfix;
+ Class datatype = keyInfo.getDataType();
+ if (AttributeUtil.isString(datatype)) {
+ Mapping map = getStringMapping(keyInfo);
+ switch (map) {
+ case TEXT: postfix = "_t"; break;
+ case STRING: postfix = "_s"; break;
+ default: throw new IllegalArgumentException("Unsupported string mapping: " + map);
+ }
+ } else if (AttributeUtil.isWholeNumber(datatype)) {
+ if (datatype.equals(Long.class)) postfix = "_l";
+ else postfix = "_i";
+ } else if (AttributeUtil.isDecimal(datatype)) {
+ if (datatype.equals(Float.class)) postfix = "_f";
+ else postfix = "_d";
+ } else if (datatype.equals(Geoshape.class)) {
+ postfix = "_g";
+ } else if (datatype.equals(Date.class)) {
+ postfix = "_dt";
+ } else if (datatype.equals(Boolean.class)) {
+ postfix = "_b";
+ } else if (datatype.equals(UUID.class)) {
+ postfix = "_uuid";
+ } else throw new IllegalArgumentException("Unsupported data type ["+datatype+"] for field: " + key);
+ return key+postfix;
+ }
+
+ @Override
+ public IndexFeatures getFeatures() {
+ return SOLR_FEATURES;
+ }
+
+ /*
+ ################# UTILITY METHODS #######################
+ */
+
+ private static Mapping getStringMapping(KeyInformation information) {
+ assert AttributeUtil.isString(information.getDataType());
+ Mapping map = Mapping.getMapping(information);
+ if (map==Mapping.DEFAULT) map = Mapping.TEXT;
+ return map;
+ }
+
+ private UpdateRequest newUpdateRequest() {
+ UpdateRequest req = new UpdateRequest();
+ req.setAction(UpdateRequest.ACTION.COMMIT, true, true);
+ if (waitSearcher) {
+ req.setAction(UpdateRequest.ACTION.COMMIT, true, true);
+ }
+ return req;
+ }
+
+ private BackendException storageException(Exception solrException) {
+ return new TemporaryBackendException("Unable to complete query on Solr.", solrException);
+ }
+
+ private static void createCollectionIfNotExists(CloudSolrClient client, Configuration config, String collection)
+ throws IOException, SolrServerException, KeeperException, InterruptedException {
+ if (!checkIfCollectionExists(client, collection)) {
+ Integer numShards = config.get(NUM_SHARDS);
+ Integer maxShardsPerNode = config.get(MAX_SHARDS_PER_NODE);
+ Integer replicationFactor = config.get(REPLICATION_FACTOR);
+
+ CollectionAdminRequest.Create createRequest = new CollectionAdminRequest.Create();
+
+ createRequest.setConfigName(collection);
+ createRequest.setCollectionName(collection);
+ createRequest.setNumShards(numShards);
+ createRequest.setMaxShardsPerNode(maxShardsPerNode);
+ createRequest.setReplicationFactor(replicationFactor);
+
+ CollectionAdminResponse createResponse = createRequest.process(client);
+ if (createResponse.isSuccess()) {
+ logger.trace("Collection {} successfully created.", collection);
+ } else {
+ throw new SolrServerException(Joiner.on("\n").join(createResponse.getErrorMessages()));
+ }
+ }
+
+ waitForRecoveriesToFinish(client, collection);
+ }
+
+ /**
+ * Checks if the collection has already been created in Solr.
+ */
+ private static boolean checkIfCollectionExists(CloudSolrClient server, String collection) throws KeeperException, InterruptedException {
+ ZkStateReader zkStateReader = server.getZkStateReader();
+ zkStateReader.updateClusterState(true);
+ ClusterState clusterState = zkStateReader.getClusterState();
+ return clusterState.getCollectionOrNull(collection) != null;
+ }
+
+ /**
+ * Wait for all the collection shards to be ready.
+ */
+ private static void waitForRecoveriesToFinish(CloudSolrClient server, String collection) throws KeeperException, InterruptedException {
+ ZkStateReader zkStateReader = server.getZkStateReader();
+ try {
+ boolean cont = true;
+
+ while (cont) {
+ boolean sawLiveRecovering = false;
+ zkStateReader.updateClusterState(true);
+ ClusterState clusterState = zkStateReader.getClusterState();
+ Map<String, Slice> slices = clusterState.getSlicesMap(collection);
+ Preconditions.checkNotNull("Could not find collection:" + collection, slices);
+
+ for (Map.Entry<String, Slice> entry : slices.entrySet()) {
+ Map<String, Replica> shards = entry.getValue().getReplicasMap();
+ for (Map.Entry<String, Replica> shard : shards.entrySet()) {
+ String state = shard.getValue().getStr(ZkStateReader.STATE_PROP);
+ if ((state.equals(ZkStateReader.RECOVERING)
+ || state.equals(ZkStateReader.SYNC) || state
+ .equals(ZkStateReader.DOWN))
+ && clusterState.liveNodesContain(shard.getValue().getStr(
+ ZkStateReader.NODE_NAME_PROP))) {
+ sawLiveRecovering = true;
+ }
+ }
+ }
+ if (!sawLiveRecovering) {
+ cont = false;
+ } else {
+ Thread.sleep(1000);
+ }
+ }
+ } finally {
+ logger.info("Exiting solr wait");
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/48343db9/repository/src/main/java/org/apache/atlas/repository/graph/TitanGraphProvider.java
----------------------------------------------------------------------
diff --git a/repository/src/main/java/org/apache/atlas/repository/graph/TitanGraphProvider.java b/repository/src/main/java/org/apache/atlas/repository/graph/TitanGraphProvider.java
index 5e61b9a..6605ae7 100755
--- a/repository/src/main/java/org/apache/atlas/repository/graph/TitanGraphProvider.java
+++ b/repository/src/main/java/org/apache/atlas/repository/graph/TitanGraphProvider.java
@@ -18,9 +18,12 @@
package org.apache.atlas.repository.graph;
+import com.google.common.collect.ImmutableMap;
import com.google.inject.Provides;
import com.thinkaurelius.titan.core.TitanFactory;
import com.thinkaurelius.titan.core.TitanGraph;
+import com.thinkaurelius.titan.diskstorage.StandardIndexProvider;
+import com.thinkaurelius.titan.diskstorage.solr.Solr5Index;
import org.apache.atlas.ApplicationProperties;
import org.apache.atlas.AtlasException;
import org.apache.commons.configuration.Configuration;
@@ -28,6 +31,10 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.inject.Singleton;
+import java.lang.reflect.Field;
+import java.lang.reflect.Modifier;
+import java.util.HashMap;
+import java.util.Map;
/**
* Default implementation for Graph Provider that doles out Titan Graph.
@@ -48,6 +55,35 @@ public class TitanGraphProvider implements GraphProvider<TitanGraph> {
return ApplicationProperties.getSubsetConfiguration(configProperties, GRAPH_PREFIX);
}
+ static {
+ addSolr5Index();
+ }
+
+ /**
+ * Titan loads index backend name to implementation using StandardIndexProvider.ALL_MANAGER_CLASSES
+ * But StandardIndexProvider.ALL_MANAGER_CLASSES is a private static final ImmutableMap
+ * Only way to inject Solr5Index is to modify this field. So, using hacky reflection to add Sol5Index
+ */
+ private static void addSolr5Index() {
+ try {
+ Field field = StandardIndexProvider.class.getDeclaredField("ALL_MANAGER_CLASSES");
+ field.setAccessible(true);
+
+ Field modifiersField = Field.class.getDeclaredField("modifiers");
+ modifiersField.setAccessible(true);
+ modifiersField.setInt(field, field.getModifiers() & ~Modifier.FINAL);
+
+ Map<String, String> customMap = new HashMap(StandardIndexProvider.getAllProviderClasses());
+ customMap.put("solr5", Solr5Index.class.getName());
+ ImmutableMap<String, String> immap = ImmutableMap.copyOf(customMap);
+ field.set(null, immap);
+
+ LOG.debug("Injected solr5 index - {}", Solr5Index.class.getName());
+ } catch(Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
@Override
@Singleton
@Provides
http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/48343db9/src/conf/solr/currency.xml
----------------------------------------------------------------------
diff --git a/src/conf/solr/currency.xml b/src/conf/solr/currency.xml
new file mode 100644
index 0000000..3a9c58a
--- /dev/null
+++ b/src/conf/solr/currency.xml
@@ -0,0 +1,67 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- Example exchange rates file for CurrencyField type named "currency" in example schema -->
+
+<currencyConfig version="1.0">
+ <rates>
+ <!-- Updated from http://www.exchangerate.com/ at 2011-09-27 -->
+ <rate from="USD" to="ARS" rate="4.333871" comment="ARGENTINA Peso" />
+ <rate from="USD" to="AUD" rate="1.025768" comment="AUSTRALIA Dollar" />
+ <rate from="USD" to="EUR" rate="0.743676" comment="European Euro" />
+ <rate from="USD" to="BRL" rate="1.881093" comment="BRAZIL Real" />
+ <rate from="USD" to="CAD" rate="1.030815" comment="CANADA Dollar" />
+ <rate from="USD" to="CLP" rate="519.0996" comment="CHILE Peso" />
+ <rate from="USD" to="CNY" rate="6.387310" comment="CHINA Yuan" />
+ <rate from="USD" to="CZK" rate="18.47134" comment="CZECH REP. Koruna" />
+ <rate from="USD" to="DKK" rate="5.515436" comment="DENMARK Krone" />
+ <rate from="USD" to="HKD" rate="7.801922" comment="HONG KONG Dollar" />
+ <rate from="USD" to="HUF" rate="215.6169" comment="HUNGARY Forint" />
+ <rate from="USD" to="ISK" rate="118.1280" comment="ICELAND Krona" />
+ <rate from="USD" to="INR" rate="49.49088" comment="INDIA Rupee" />
+ <rate from="USD" to="XDR" rate="0.641358" comment="INTNL MON. FUND SDR" />
+ <rate from="USD" to="ILS" rate="3.709739" comment="ISRAEL Sheqel" />
+ <rate from="USD" to="JPY" rate="76.32419" comment="JAPAN Yen" />
+ <rate from="USD" to="KRW" rate="1169.173" comment="KOREA (SOUTH) Won" />
+ <rate from="USD" to="KWD" rate="0.275142" comment="KUWAIT Dinar" />
+ <rate from="USD" to="MXN" rate="13.85895" comment="MEXICO Peso" />
+ <rate from="USD" to="NZD" rate="1.285159" comment="NEW ZEALAND Dollar" />
+ <rate from="USD" to="NOK" rate="5.859035" comment="NORWAY Krone" />
+ <rate from="USD" to="PKR" rate="87.57007" comment="PAKISTAN Rupee" />
+ <rate from="USD" to="PEN" rate="2.730683" comment="PERU Sol" />
+ <rate from="USD" to="PHP" rate="43.62039" comment="PHILIPPINES Peso" />
+ <rate from="USD" to="PLN" rate="3.310139" comment="POLAND Zloty" />
+ <rate from="USD" to="RON" rate="3.100932" comment="ROMANIA Leu" />
+ <rate from="USD" to="RUB" rate="32.14663" comment="RUSSIA Ruble" />
+ <rate from="USD" to="SAR" rate="3.750465" comment="SAUDI ARABIA Riyal" />
+ <rate from="USD" to="SGD" rate="1.299352" comment="SINGAPORE Dollar" />
+ <rate from="USD" to="ZAR" rate="8.329761" comment="SOUTH AFRICA Rand" />
+ <rate from="USD" to="SEK" rate="6.883442" comment="SWEDEN Krona" />
+ <rate from="USD" to="CHF" rate="0.906035" comment="SWITZERLAND Franc" />
+ <rate from="USD" to="TWD" rate="30.40283" comment="TAIWAN Dollar" />
+ <rate from="USD" to="THB" rate="30.89487" comment="THAILAND Baht" />
+ <rate from="USD" to="AED" rate="3.672955" comment="U.A.E. Dirham" />
+ <rate from="USD" to="UAH" rate="7.988582" comment="UKRAINE Hryvnia" />
+ <rate from="USD" to="GBP" rate="0.647910" comment="UNITED KINGDOM Pound" />
+
+ <!-- Cross-rates for some common currencies -->
+ <rate from="EUR" to="GBP" rate="0.869914" />
+ <rate from="EUR" to="NOK" rate="7.800095" />
+ <rate from="GBP" to="NOK" rate="8.966508" />
+ </rates>
+</currencyConfig>
http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/48343db9/src/conf/solr/lang/stopwords_en.txt
----------------------------------------------------------------------
diff --git a/src/conf/solr/lang/stopwords_en.txt b/src/conf/solr/lang/stopwords_en.txt
new file mode 100644
index 0000000..2c164c0
--- /dev/null
+++ b/src/conf/solr/lang/stopwords_en.txt
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# a couple of test stopwords to test that the words are really being
+# configured from this file:
+stopworda
+stopwordb
+
+# Standard english stop words taken from Lucene's StopAnalyzer
+a
+an
+and
+are
+as
+at
+be
+but
+by
+for
+if
+in
+into
+is
+it
+no
+not
+of
+on
+or
+such
+that
+the
+their
+then
+there
+these
+they
+this
+to
+was
+will
+with
http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/48343db9/src/conf/solr/protwords.txt
----------------------------------------------------------------------
diff --git a/src/conf/solr/protwords.txt b/src/conf/solr/protwords.txt
new file mode 100644
index 0000000..1dfc0ab
--- /dev/null
+++ b/src/conf/solr/protwords.txt
@@ -0,0 +1,21 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+# Use a protected word file to protect against the stemmer reducing two
+# unrelated words to the same base word.
+
+# Some non-words that normally won't be encountered,
+# just to test that they won't be stemmed.
+dontstems
+zwhacky
+