You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@vxquery.apache.org by sj...@apache.org on 2016/05/27 19:43:12 UTC
[2/2] vxquery git commit: VXQUERY-32 Integrate Apache Lucene
VXQUERY-32 Integrate Apache Lucene
Added two functions to VXQuery:
build-collection-on-index creates a lucene index for a collection
collection-from-index queries a lucene index to return a collection
Added tests
Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/1f623b16
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/1f623b16
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/1f623b16
Branch: refs/heads/master
Commit: 1f623b1664fb5a86dae8b6be2497d822f80505d9
Parents: 2b59326
Author: Steven Glenn Jacobs <sj...@ucr.edu>
Authored: Fri May 27 12:42:47 2016 -0700
Committer: Steven Glenn Jacobs <sj...@ucr.edu>
Committed: Fri May 27 12:42:47 2016 -0700
----------------------------------------------------------------------
vxquery-core/pom.xml | 31 +-
.../vxquery/functions/builtin-functions.xml | 22 +
.../apache/vxquery/index/IndexAttributes.java | 287 ++++++
.../vxquery/index/IndexDocumentBuilder.java | 882 +++++++++++++++++++
.../org/apache/vxquery/index/IndexElement.java | 42 +
.../VXQueryCollectionOperatorDescriptor.java | 81 +-
.../functions/index/CaseSensitiveAnalyzer.java | 144 +++
.../index/CaseSensitiveQueryParser.java | 42 +
...ctionFromIndexUnnestingEvaluatorFactory.java | 327 +++++++
.../IndexConstructorScalarEvaluatorFactory.java | 70 ++
.../functions/index/IndexConstructorUtil.java | 144 +++
.../runtime/functions/util/FunctionHelper.java | 7 +-
.../vxquery/xmlparser/SAXContentHandler.java | 72 +-
.../org/apache/vxquery/xmlparser/XMLParser.java | 2 +-
.../org/apache/vxquery/xtest/VXQueryTest.java | 49 +-
.../Indexing/createIndex.txt | 0
.../ExpectedTestResults/Indexing/useIndex1.txt | 2 +
.../ExpectedTestResults/Indexing/useIndex2.txt | 1 +
.../ExpectedTestResults/Indexing/useIndex3.txt | 1 +
.../ExpectedTestResults/Indexing/useIndex4.txt | 1 +
.../ExpectedTestResults/Indexing/useIndex5.txt | 3 +
.../ExpectedTestResults/Indexing/useIndex6.txt | 2 +
.../ExpectedTestResults/Indexing/useIndex7.txt | 3 +
.../Queries/XQuery/Indexing/createIndex.xq | 20 +
.../Queries/XQuery/Indexing/useIndex1.xq | 25 +
.../Queries/XQuery/Indexing/useIndex2.xq | 24 +
.../Queries/XQuery/Indexing/useIndex3.xq | 27 +
.../Queries/XQuery/Indexing/useIndex4.xq | 24 +
.../Queries/XQuery/Indexing/useIndex5.xq | 23 +
.../Queries/XQuery/Indexing/useIndex6.xq | 23 +
.../Queries/XQuery/Indexing/useIndex7.xq | 27 +
.../src/test/resources/VXQueryCatalog.xml | 15 +
.../src/test/resources/cat/IndexingQueries.xml | 63 ++
33 files changed, 2380 insertions(+), 106 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-core/pom.xml
----------------------------------------------------------------------
diff --git a/vxquery-core/pom.xml b/vxquery-core/pom.xml
index d8f5f53..56eb45f 100644
--- a/vxquery-core/pom.xml
+++ b/vxquery-core/pom.xml
@@ -266,7 +266,36 @@
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
-
+ <dependency>
+ <artifactId>lucene-core</artifactId>
+ <groupId>org.apache.lucene</groupId>
+ <type>jar</type>
+ <version>5.5.1</version>
+ </dependency>
+ <dependency>
+ <artifactId>lucene-queryparser</artifactId>
+ <groupId>org.apache.lucene</groupId>
+ <type>jar</type>
+ <version>5.5.1</version>
+ </dependency>
+ <dependency>
+ <artifactId>lucene-analyzers-common</artifactId>
+ <groupId>org.apache.lucene</groupId>
+ <type>jar</type>
+ <version>5.5.1</version>
+ </dependency>
+ <dependency>
+ <artifactId>lucene-demo</artifactId>
+ <groupId>org.apache.lucene</groupId>
+ <type>jar</type>
+ <version>5.5.1</version>
+ </dependency>
+ <dependency>
+ <artifactId>lucene-backward-codecs</artifactId>
+ <groupId>org.apache.lucene</groupId>
+ <type>jar</type>
+ <version>5.5.1</version>
+ </dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml b/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml
index 3b9371d..8379ccf 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml
+++ b/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml
@@ -128,6 +128,28 @@
<!-- Collection operator is added during the rewrite rules phase. -->
</function>
+ <!-- fn:build-index-on-collection($arg as xs:string?, $indexFolder as xs:string?) as node()* -->
+ <function name="fn:build-index-on-collection">
+ <param name="collection-folder" type="xs:string?"/>
+ <param name="index-folder" type="xs:string?"/>
+ <return type="node()*"/>
+ <runtime type="scalar" class="org.apache.vxquery.runtime.functions.index.IndexConstructorScalarEvaluatorFactory"/>
+ </function>
+
+ <!-- fn:collection-from-index($indexfolder as xs:string?, $elementpath as xs:string?) as node()* -->
+ <function name="fn:collection-from-index">
+ <param name="index-folder" type="xs:string?"/>
+ <param name="element-path" type="xs:string?"/>
+ <return type="node()*"/>
+ <runtime type="unnesting" class="org.apache.vxquery.runtime.functions.index.CollectionFromIndexUnnestingEvaluatorFactory"/>
+ <property type="DocumentOrder" class="org.apache.vxquery.compiler.rewriter.rules.propagationpolicies.InputPropertyPropagationPolicy">
+ <argument value="0"/>
+ </property>
+ <property type="UniqueNodes" class="org.apache.vxquery.compiler.rewriter.rules.propagationpolicies.InputPropertyPropagationPolicy">
+ <argument value="0"/>
+ </property>
+ </function>
+
<!-- fn:collection-with-tag($arg1 as xs:string?, $arg2 as xs:string?) as node()* -->
<function name="fn:collection-with-tag">
<param name="arg1" type="xs:string?"/>
http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-core/src/main/java/org/apache/vxquery/index/IndexAttributes.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/index/IndexAttributes.java b/vxquery-core/src/main/java/org/apache/vxquery/index/IndexAttributes.java
new file mode 100644
index 0000000..cf8e3c0
--- /dev/null
+++ b/vxquery-core/src/main/java/org/apache/vxquery/index/IndexAttributes.java
@@ -0,0 +1,287 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.vxquery.index;
+
+import java.util.List;
+
+import org.xml.sax.Attributes;
+
+public class IndexAttributes implements Attributes {
+ int length;
+
+ List<String> names;
+ List<String> values;
+ List<String> uris;
+ List<String> localnames;
+ List<String> types;
+ List<String> qnames;
+
+ public IndexAttributes(List<String> n, List<String> v, List<String> u, List<String> l, List<String> t,
+ List<String> q) {
+ length = n.size();
+ names = n;
+ values = v;
+ uris = u;
+ localnames = l;
+ types = t;
+ qnames = q;
+ }
+
+ /**
+ * Return the number of attributes in the list.
+ * <p>
+ * Once you know the number of attributes, you can iterate through the list.
+ * </p>
+ *
+ * @return The number of attributes in the list.
+ * @see #getURI(int)
+ * @see #getLocalName(int)
+ * @see #getQName(int)
+ * @see #getType(int)
+ * @see #getValue(int)
+ */
+ @Override
+ public int getLength() {
+ return length;
+ }
+
+ /**
+ * Look up an attribute's Namespace URI by index.
+ *
+ * @param index
+ * The attribute index (zero-based).
+ * @return The Namespace URI, or the empty string if none
+ * is available, or null if the index is out of
+ * range.
+ * @see #getLength
+ */
+ @Override
+ public String getURI(int index) {
+ return uris.get(index);
+ }
+
+ /**
+ * Look up an attribute's local name by index.
+ *
+ * @param index
+ * The attribute index (zero-based).
+ * @return The local name, or the empty string if Namespace
+ * processing is not being performed, or null
+ * if the index is out of range.
+ * @see #getLength
+ */
+ @Override
+ public String getLocalName(int index) {
+ return localnames.get(index);
+ }
+
+ /**
+ * Look up an attribute's XML qualified (prefixed) name by index.
+ *
+ * @param index
+ * The attribute index (zero-based).
+ * @return The XML qualified name, or the empty string
+ * if none is available, or null if the index
+ * is out of range.
+ * @see #getLength
+ */
+ @Override
+ public String getQName(int index) {
+ return qnames.get(index);
+ }
+
+ /**
+ * Look up an attribute's type by index.
+ * <p>
+ * The attribute type is one of the strings "CDATA", "ID", "IDREF", "IDREFS", "NMTOKEN", "NMTOKENS", "ENTITY",
+ * "ENTITIES", or "NOTATION" (always in upper case).
+ * </p>
+ * <p>
+ * If the parser has not read a declaration for the attribute, or if the parser does not report attribute types,
+ * then it must return the value "CDATA" as stated in the XML 1.0 Recommendation (clause 3.3.3,
+ * "Attribute-Value Normalization").
+ * </p>
+ * <p>
+ * For an enumerated attribute that is not a notation, the parser will report the type as "NMTOKEN".
+ * </p>
+ *
+ * @param index
+ * The attribute index (zero-based).
+ * @return The attribute's type as a string, or null if the
+ * index is out of range.
+ * @see #getLength
+ */
+ @Override
+ public String getType(int index) {
+ return types.get(index);
+ }
+
+ /**
+ * Look up an attribute's value by index.
+ * <p>
+ * If the attribute value is a list of tokens (IDREFS, ENTITIES, or NMTOKENS), the tokens will be concatenated into
+ * a single string with each token separated by a single space.
+ * </p>
+ *
+ * @param index
+ * The attribute index (zero-based).
+ * @return The attribute's value as a string, or null if the
+ * index is out of range.
+ * @see #getLength
+ */
+ @Override
+ public String getValue(int index) {
+ return values.get(index);
+ }
+
+ ////////////////////////////////////////////////////////////////////
+ // Name-based query.
+ ////////////////////////////////////////////////////////////////////
+
+ /**
+ * Look up the index of an attribute by Namespace name.
+ *
+ * @param uri
+ * The Namespace URI, or the empty string if
+ * the name has no Namespace URI.
+ * @param localName
+ * The attribute's local name.
+ * @return The index of the attribute, or -1 if it does not
+ * appear in the list.
+ */
+ @Override
+ public int getIndex(String uri, String localName) {
+ for (int i = 0; i < length; i++) {
+ if (localnames.get(i).equals(localName) && uris.get(i).equals(uri)) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * Look up the index of an attribute by XML qualified (prefixed) name.
+ *
+ * @param qName
+ * The qualified (prefixed) name.
+ * @return The index of the attribute, or -1 if it does not
+ * appear in the list.
+ */
+ @Override
+ public int getIndex(String qName) {
+ for (int i = 0; i < length; i++) {
+ if (qnames.get(i).equals(qName)) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * Look up an attribute's type by Namespace name.
+ * <p>
+ * See {@link #getType(int) getType(int)} for a description of the possible types.
+ * </p>
+ *
+ * @param uri
+ * The Namespace URI, or the empty String if the
+ * name has no Namespace URI.
+ * @param localName
+ * The local name of the attribute.
+ * @return The attribute type as a string, or null if the
+ * attribute is not in the list or if Namespace
+ * processing is not being performed.
+ */
+ @Override
+ public String getType(String uri, String localName) {
+ for (int i = 0; i < length; i++) {
+ if (localnames.get(i).equals(localName) && uris.get(i).equals(uri)) {
+ return types.get(i);
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Look up an attribute's type by XML qualified (prefixed) name.
+ * <p>
+ * See {@link #getType(int) getType(int)} for a description of the possible types.
+ * </p>
+ *
+ * @param qName
+ * The XML qualified name.
+ * @return The attribute type as a string, or null if the
+ * attribute is not in the list or if qualified names
+ * are not available.
+ */
+ @Override
+ public String getType(String qName) {
+ for (int i = 0; i < length; i++) {
+ if (qnames.get(i).equals(qName)) {
+ return types.get(i);
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Look up an attribute's value by Namespace name.
+ * <p>
+ * See {@link #getValue(int) getValue(int)} for a description of the possible values.
+ * </p>
+ *
+ * @param uri
+ * The Namespace URI, or the empty String if the
+ * name has no Namespace URI.
+ * @param localName
+ * The local name of the attribute.
+ * @return The attribute value as a string, or null if the
+ * attribute is not in the list.
+ */
+ @Override
+ public String getValue(String uri, String localName) {
+ for (int i = 0; i < length; i++) {
+ if (localnames.get(i).equals(localName) && uris.get(i).equals(uri)) {
+ return values.get(i);
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Look up an attribute's value by XML qualified (prefixed) name.
+ * <p>
+ * See {@link #getValue(int) getValue(int)} for a description of the possible values.
+ * </p>
+ *
+ * @param qName
+ * The XML qualified name.
+ * @return The attribute value as a string, or null if the
+ * attribute is not in the list or if qualified names
+ * are not available.
+ */
+ @Override
+ public String getValue(String qName) {
+ for (int i = 0; i < length; i++) {
+ if (qnames.get(i).equals(qName)) {
+ return values.get(i);
+ }
+ }
+ return null;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-core/src/main/java/org/apache/vxquery/index/IndexDocumentBuilder.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/index/IndexDocumentBuilder.java b/vxquery-core/src/main/java/org/apache/vxquery/index/IndexDocumentBuilder.java
new file mode 100644
index 0000000..2884097
--- /dev/null
+++ b/vxquery-core/src/main/java/org/apache/vxquery/index/IndexDocumentBuilder.java
@@ -0,0 +1,882 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.vxquery.index;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hyracks.data.std.api.IPointable;
+import org.apache.hyracks.data.std.primitive.BooleanPointable;
+import org.apache.hyracks.data.std.primitive.BytePointable;
+import org.apache.hyracks.data.std.primitive.DoublePointable;
+import org.apache.hyracks.data.std.primitive.FloatPointable;
+import org.apache.hyracks.data.std.primitive.IntegerPointable;
+import org.apache.hyracks.data.std.primitive.LongPointable;
+import org.apache.hyracks.data.std.primitive.ShortPointable;
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.data.std.primitive.VoidPointable;
+import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.vxquery.datamodel.accessors.PointablePool;
+import org.apache.vxquery.datamodel.accessors.PointablePoolFactory;
+import org.apache.vxquery.datamodel.accessors.SequencePointable;
+import org.apache.vxquery.datamodel.accessors.TaggedValuePointable;
+import org.apache.vxquery.datamodel.accessors.atomic.CodedQNamePointable;
+import org.apache.vxquery.datamodel.accessors.atomic.XSBinaryPointable;
+import org.apache.vxquery.datamodel.accessors.atomic.XSDatePointable;
+import org.apache.vxquery.datamodel.accessors.atomic.XSDateTimePointable;
+import org.apache.vxquery.datamodel.accessors.atomic.XSDecimalPointable;
+import org.apache.vxquery.datamodel.accessors.atomic.XSDurationPointable;
+import org.apache.vxquery.datamodel.accessors.atomic.XSQNamePointable;
+import org.apache.vxquery.datamodel.accessors.atomic.XSTimePointable;
+import org.apache.vxquery.datamodel.accessors.nodes.AttributeNodePointable;
+import org.apache.vxquery.datamodel.accessors.nodes.DocumentNodePointable;
+import org.apache.vxquery.datamodel.accessors.nodes.ElementNodePointable;
+import org.apache.vxquery.datamodel.accessors.nodes.NodeTreePointable;
+import org.apache.vxquery.datamodel.accessors.nodes.TextOrCommentNodePointable;
+import org.apache.vxquery.datamodel.values.ValueTag;
+import org.apache.vxquery.runtime.functions.cast.CastToStringOperation;
+import org.apache.vxquery.serializer.XMLSerializer;
+
+public class IndexDocumentBuilder extends XMLSerializer {
+ private final IPointable treePointable;
+
+ private final PointablePool pp;
+ private NodeTreePointable ntp;
+
+ private final ArrayBackedValueStorage abvs = new ArrayBackedValueStorage();
+ private final DataOutput dOut = abvs.getDataOutput();
+ private final CastToStringOperation castToString = new CastToStringOperation();
+ private final Document doc;
+ private final List<ComplexItem> results;
+
+ private final byte[] bstart;
+ private final int sstart;
+ private final int lstart;
+ private final IndexWriter writer;
+
+ class ComplexItem {
+ public final StringField sf;
+ public final String id;
+
+ public ComplexItem(StringField sfin, String idin) {
+ sf = sfin;
+ id = idin;
+ }
+ }
+
+ //TODO: Handle Processing Instructions, PrefixedNames, and Namepsace entries
+ public IndexDocumentBuilder(IPointable tree, IndexWriter inWriter) {
+ this.treePointable = tree;
+ writer = inWriter;
+
+ //convert to tagged value pointable
+ TaggedValuePointable tvp = (TaggedValuePointable) TaggedValuePointable.FACTORY.createPointable();
+ tvp.set(treePointable.getByteArray(), 0, treePointable.getLength());
+
+ //get bytes and info from doc pointer
+ bstart = tvp.getByteArray();
+ sstart = tvp.getStartOffset();
+ lstart = tvp.getLength();
+
+ doc = new Document();
+
+ results = new ArrayList<ComplexItem>();
+
+ pp = PointablePoolFactory.INSTANCE.createPointablePool();
+ }
+
+ //This is a wrapper to start indexing using the functions adapted from XMLSerializer
+ public void printStart() throws IOException {
+
+ print(bstart, sstart, lstart, "0", "");
+ for (int i = 1; i < results.size() - 1; i++) {
+ //TODO: Since each doc is a file,
+ //we can only handle files
+ //small enough to fit in memory
+ doc.add(results.get(i).sf);
+ }
+ writer.addDocument(doc);
+
+ }
+
+ //adapted from XMLSerializer. The following functions are used to traverse the TaggedValuePointable
+ //and create the index elements, then create the item for the lucene index
+ public void print(byte[] b, int s, int l, String deweyId, String epath) throws IOException {
+ TaggedValuePointable tvp = pp.takeOne(TaggedValuePointable.class);
+ try {
+ tvp.set(b, s, l);
+ printTaggedValuePointable(tvp, deweyId, epath);
+ } finally {
+ pp.giveBack(tvp);
+ }
+ }
+
+ private void printTaggedValuePointable(TaggedValuePointable tvp, String deweyId, String epath) throws IOException {
+ byte tag = tvp.getTag();
+ String type = "text";
+ String[] result = { "", "" };
+ switch ((int) tag) {
+ case ValueTag.XS_ANY_URI_TAG:
+ result = printString(tvp, epath);
+ break;
+
+ case ValueTag.XS_BASE64_BINARY_TAG:
+ result = printBase64Binary(tvp, epath);
+ break;
+
+ case ValueTag.XS_BOOLEAN_TAG:
+ result = printBoolean(tvp, epath);
+ break;
+
+ case ValueTag.XS_DATE_TAG:
+ result = printDate(tvp, epath);
+ break;
+
+ case ValueTag.XS_DATETIME_TAG:
+ result = printDateTime(tvp, epath);
+ break;
+
+ case ValueTag.XS_DAY_TIME_DURATION_TAG:
+ result = printDTDuration(tvp, epath);
+ break;
+
+ case ValueTag.XS_BYTE_TAG:
+ result = printByte(tvp, epath);
+ break;
+
+ case ValueTag.XS_DECIMAL_TAG:
+ result = printDecimal(tvp, epath);
+ break;
+
+ case ValueTag.XS_DOUBLE_TAG:
+ result = printDouble(tvp, epath);
+ break;
+
+ case ValueTag.XS_DURATION_TAG:
+ result = printDuration(tvp, epath);
+ break;
+
+ case ValueTag.XS_FLOAT_TAG:
+ result = printFloat(tvp, epath);
+ break;
+
+ case ValueTag.XS_G_DAY_TAG:
+ result = printGDay(tvp, epath);
+ break;
+
+ case ValueTag.XS_G_MONTH_TAG:
+ result = printGMonth(tvp, epath);
+ break;
+
+ case ValueTag.XS_G_MONTH_DAY_TAG:
+ result = printGMonthDay(tvp, epath);
+ break;
+
+ case ValueTag.XS_G_YEAR_TAG:
+ result = printGYear(tvp, epath);
+ break;
+
+ case ValueTag.XS_G_YEAR_MONTH_TAG:
+ result = printGYearMonth(tvp, epath);
+ break;
+
+ case ValueTag.XS_HEX_BINARY_TAG:
+ result = printHexBinary(tvp, epath);
+ break;
+
+ case ValueTag.XS_INT_TAG:
+ case ValueTag.XS_UNSIGNED_SHORT_TAG:
+ result = printInt(tvp, epath);
+ break;
+
+ case ValueTag.XS_INTEGER_TAG:
+ case ValueTag.XS_LONG_TAG:
+ case ValueTag.XS_NEGATIVE_INTEGER_TAG:
+ case ValueTag.XS_NON_POSITIVE_INTEGER_TAG:
+ case ValueTag.XS_NON_NEGATIVE_INTEGER_TAG:
+ case ValueTag.XS_POSITIVE_INTEGER_TAG:
+ case ValueTag.XS_UNSIGNED_INT_TAG:
+ case ValueTag.XS_UNSIGNED_LONG_TAG:
+ result = printInteger(tvp, epath);
+ break;
+
+ case ValueTag.XS_NOTATION_TAG:
+ result = printString(tvp, epath);
+ break;
+
+ case ValueTag.XS_QNAME_TAG:
+ result = printQName(tvp, epath);
+ break;
+
+ case ValueTag.XS_SHORT_TAG:
+ case ValueTag.XS_UNSIGNED_BYTE_TAG:
+ result = printShort(tvp, epath);
+ break;
+
+ case ValueTag.XS_STRING_TAG:
+ case ValueTag.XS_NORMALIZED_STRING_TAG:
+ case ValueTag.XS_TOKEN_TAG:
+ case ValueTag.XS_LANGUAGE_TAG:
+ case ValueTag.XS_NMTOKEN_TAG:
+ case ValueTag.XS_NAME_TAG:
+ case ValueTag.XS_NCNAME_TAG:
+ case ValueTag.XS_ID_TAG:
+ case ValueTag.XS_IDREF_TAG:
+ case ValueTag.XS_ENTITY_TAG:
+ result = printString(tvp, epath);
+ break;
+
+ case ValueTag.XS_TIME_TAG:
+ result = printTime(tvp, epath);
+ break;
+
+ case ValueTag.XS_UNTYPED_ATOMIC_TAG:
+ result = printString(tvp, epath);
+ break;
+
+ case ValueTag.XS_YEAR_MONTH_DURATION_TAG:
+ result = printYMDuration(tvp, epath);
+ break;
+
+ case ValueTag.ATTRIBUTE_NODE_TAG:
+ type = "attribute";
+ printAttributeNode(tvp, deweyId, epath);
+ break;
+
+ case ValueTag.TEXT_NODE_TAG:
+ type = "textnode";
+ result = printTextNode(tvp, epath);
+ break;
+
+ case ValueTag.COMMENT_NODE_TAG:
+ type = "comment";
+ result = printCommentNode(tvp, epath);
+ break;
+
+ case ValueTag.SEQUENCE_TAG:
+ type = "sequence";
+ printSequence(tvp, deweyId, epath);
+ break;
+
+ case ValueTag.NODE_TREE_TAG:
+ type = "tree";
+ printNodeTree(tvp, deweyId, epath);
+ break;
+
+ case ValueTag.ELEMENT_NODE_TAG:
+ type = "element";
+ printElementNode(tvp, deweyId, epath);
+ break;
+
+ case ValueTag.DOCUMENT_NODE_TAG:
+ type = "doc";
+ buildIndexItem(deweyId, type, result, epath);
+ printDocumentNode(tvp, deweyId, epath);
+ break;
+
+ default:
+ throw new UnsupportedOperationException("Encountered tag: " + tvp.getTag());
+ }
+ if ((int) tag != ValueTag.DOCUMENT_NODE_TAG && (int) tag != ValueTag.SEQUENCE_TAG
+ && (int) tag != ValueTag.NODE_TREE_TAG && (int) tag != ValueTag.ELEMENT_NODE_TAG
+ && (int) tag != ValueTag.ATTRIBUTE_NODE_TAG) {
+ buildIndexItem(deweyId, type, result, epath);
+ }
+
+ }
+
+ private void buildIndexItem(String deweyId, String type, String[] result, String parentPath) {
+ //Create an Index element
+ IndexElement test = new IndexElement(deweyId, type, result[1]);
+
+ String path = test.epath();
+
+ path = StringUtils.replace(path, parentPath, "");
+ //Parser doesn't like / so paths are saved as name.name....
+ String luceneParentPath = parentPath.replaceAll("/", ".");
+
+ if (!type.equals("doc")) {
+ path = path.replaceFirst("/", ":");
+ } else {
+ luceneParentPath = "";
+ }
+ //Parser doesn't like / so paths are saved as name.name....
+ path = path.replaceAll("/", ".");
+ //Add this element to the array (they will be added in reverse order.
+ String fullItem = luceneParentPath + path + "." + test.type();
+
+ results.add(new ComplexItem(new StringField("item", fullItem, Field.Store.YES), test.id()));
+ }
+
+ private String[] printDecimal(TaggedValuePointable tvp, String path) throws IOException {
+ String[] result = { "", path };
+ XSDecimalPointable dp = pp.takeOne(XSDecimalPointable.class);
+ try {
+ tvp.getValue(dp);
+ abvs.reset();
+ castToString.convertDecimal(dp, dOut);
+ result = printStringAbvs(path);
+ } catch (Exception e) {
+ throw new IOException(e);
+ } finally {
+ pp.giveBack(dp);
+ }
+ return result;
+ }
+
+ private void printNodeTree(TaggedValuePointable tvp, String deweyId, String path) throws IOException {
+ if (ntp != null) {
+ throw new IllegalStateException("Nested NodeTreePointable found");
+ }
+ ntp = pp.takeOne(NodeTreePointable.class);
+ TaggedValuePointable rootTVP = pp.takeOne(TaggedValuePointable.class);
+ try {
+ tvp.getValue(ntp);
+ ntp.getRootNode(rootTVP);
+ printTaggedValuePointable(rootTVP, deweyId, path);
+ } finally {
+ pp.giveBack(rootTVP);
+ pp.giveBack(ntp);
+ ntp = null;
+ }
+ }
+
+ private String[] printCommentNode(TaggedValuePointable tvp, String path) {
+ String[] result = { "", path };
+ TextOrCommentNodePointable tcnp = pp.takeOne(TextOrCommentNodePointable.class);
+ UTF8StringPointable utf8sp = pp.takeOne(UTF8StringPointable.class);
+ try {
+ tvp.getValue(tcnp);
+ tcnp.getValue(ntp, utf8sp);
+
+ result = printString(utf8sp, path);
+
+ } finally {
+ pp.giveBack(tcnp);
+ pp.giveBack(utf8sp);
+ }
+ return result;
+ }
+
+ private String[] printTextNode(TaggedValuePointable tvp, String path) {
+ String[] result = { "", path };
+ TextOrCommentNodePointable tcnp = pp.takeOne(TextOrCommentNodePointable.class);
+ UTF8StringPointable utf8sp = pp.takeOne(UTF8StringPointable.class);
+ try {
+ tvp.getValue(tcnp);
+ tcnp.getValue(ntp, utf8sp);
+ result = printString(utf8sp, path);
+ } finally {
+ pp.giveBack(tcnp);
+ pp.giveBack(utf8sp);
+ }
+ return result;
+ }
+
+ private void printAttributeNode(TaggedValuePointable tvp, String deweyId, String path) throws IOException {
+ String[] result = { "", path };
+ AttributeNodePointable anp = pp.takeOne(AttributeNodePointable.class);
+ CodedQNamePointable cqp = pp.takeOne(CodedQNamePointable.class);
+ UTF8StringPointable utf8sp = pp.takeOne(UTF8StringPointable.class);
+ TaggedValuePointable valueTVP = pp.takeOne(TaggedValuePointable.class);
+ try {
+ tvp.getValue(anp);
+ anp.getName(cqp);
+ result = printPrefixedQName(cqp, utf8sp, path);
+ buildIndexItem(deweyId, "attribute", result, path);
+
+ anp.getValue(ntp, valueTVP);
+
+ String attributeValueId = deweyId + ".0";
+ printTaggedValuePointable(valueTVP, attributeValueId, result[1]);
+
+ } finally {
+ pp.giveBack(valueTVP);
+ pp.giveBack(utf8sp);
+ pp.giveBack(anp);
+ pp.giveBack(cqp);
+ }
+ }
+
+ private void printElementNode(TaggedValuePointable tvp, String deweyId, String path) throws IOException {
+ String[] result = { "", path };
+ ElementNodePointable enp = pp.takeOne(ElementNodePointable.class);
+ CodedQNamePointable cqp = pp.takeOne(CodedQNamePointable.class);
+ UTF8StringPointable utf8sp = pp.takeOne(UTF8StringPointable.class);
+ SequencePointable seqp = pp.takeOne(SequencePointable.class);
+ try {
+ tvp.getValue(enp);
+ enp.getName(cqp);
+ result = printPrefixedQName(cqp, utf8sp, path);
+ buildIndexItem(deweyId, "element", result, path);
+
+ enp.getAttributeSequence(ntp, seqp);
+ int numattributes = 0;
+ if (seqp.getByteArray() != null && seqp.getEntryCount() > 0) {
+ printSequence(seqp, deweyId, 0, result[1]);
+ numattributes = seqp.getEntryCount();
+ }
+
+ enp.getChildrenSequence(ntp, seqp);
+ if (seqp.getByteArray() != null) {
+ printSequence(seqp, deweyId, numattributes, result[1]);
+ }
+
+ } finally {
+ pp.giveBack(seqp);
+ pp.giveBack(utf8sp);
+ pp.giveBack(cqp);
+ pp.giveBack(enp);
+ }
+ }
+
+ private String[] printPrefixedQName(CodedQNamePointable cqp, UTF8StringPointable utf8sp, String path) {
+ ntp.getString(cqp.getLocalCode(), utf8sp);
+ return printString(utf8sp, path);
+ }
+
+ private void printDocumentNode(TaggedValuePointable tvp, String deweyId, String path) throws IOException {
+ DocumentNodePointable dnp = pp.takeOne(DocumentNodePointable.class);
+ SequencePointable seqp = pp.takeOne(SequencePointable.class);
+ try {
+ tvp.getValue(dnp);
+ dnp.getContent(ntp, seqp);
+ printSequence(seqp, deweyId, 0, path);
+ } finally {
+ pp.giveBack(seqp);
+ pp.giveBack(dnp);
+ }
+ }
+
+ private void printSequence(TaggedValuePointable tvp, String deweyId, String path) throws IOException {
+ SequencePointable seqp = pp.takeOne(SequencePointable.class);
+ try {
+ tvp.getValue(seqp);
+ printSequence(seqp, deweyId, 0, path);
+ } finally {
+ pp.giveBack(seqp);
+ }
+ }
+
+ private void printSequence(SequencePointable seqp, String deweyId, int addon, String path) throws IOException {
+ VoidPointable vp = pp.takeOne(VoidPointable.class);
+ try {
+ int len = seqp.getEntryCount();
+ for (int i = 0; i < len; ++i) {
+ int location = i + addon;
+ String childID = deweyId + "." + Integer.toString(location);
+ seqp.getEntry(i, vp);
+ print(vp.getByteArray(), vp.getStartOffset(), vp.getLength(), childID, path);
+ }
+ } finally {
+ pp.giveBack(vp);
+ }
+ }
+
+ private String[] printBase64Binary(TaggedValuePointable tvp, String path) throws IOException {
+ String[] result = { "", path };
+ XSBinaryPointable bp = pp.takeOne(XSBinaryPointable.class);
+ try {
+ tvp.getValue(bp);
+ abvs.reset();
+ castToString.convertBase64Binary(bp, dOut);
+ result = printStringAbvs(path);
+ } catch (Exception e) {
+ throw new IOException(e);
+ } finally {
+ pp.giveBack(bp);
+ }
+ return result;
+ }
+
+ private String[] printBoolean(TaggedValuePointable tvp, String path) {
+ String[] result = { "", path };
+ BooleanPointable bp = pp.takeOne(BooleanPointable.class);
+ try {
+ tvp.getValue(bp);
+ result[0] = Boolean.toString(bp.getBoolean());
+ result[1] = path + "/" + result[0];
+ } finally {
+ pp.giveBack(bp);
+ }
+ return result;
+ }
+
+ private String[] printByte(TaggedValuePointable tvp, String path) {
+ String[] result = { "", path };
+ BytePointable bp = pp.takeOne(BytePointable.class);
+ try {
+ tvp.getValue(bp);
+ result[0] = Byte.toString(bp.byteValue());
+ result[1] = path + "/" + result[0];
+ } finally {
+ pp.giveBack(bp);
+ }
+ return result;
+ }
+
+ private String[] printDouble(TaggedValuePointable tvp, String path) throws IOException {
+ String[] result = { "", path };
+ DoublePointable dp = pp.takeOne(DoublePointable.class);
+ try {
+ tvp.getValue(dp);
+ abvs.reset();
+ castToString.convertDouble(dp, dOut);
+ result = printStringAbvs(path);
+ } catch (Exception e) {
+ throw new IOException(e);
+ } finally {
+ pp.giveBack(dp);
+ }
+ return result;
+ }
+
+ private String[] printDate(TaggedValuePointable tvp, String path) throws IOException {
+ String[] result = { "", path };
+ XSDatePointable dp = pp.takeOne(XSDatePointable.class);
+ try {
+ tvp.getValue(dp);
+ abvs.reset();
+ castToString.convertDate(dp, dOut);
+ result = printStringAbvs(path);
+ } catch (Exception e) {
+ throw new IOException(e);
+ } finally {
+ pp.giveBack(dp);
+ }
+ return result;
+ }
+
+ private String[] printDateTime(TaggedValuePointable tvp, String path) throws IOException {
+ String[] result = { "", path };
+ XSDateTimePointable dtp = pp.takeOne(XSDateTimePointable.class);
+ try {
+ tvp.getValue(dtp);
+ abvs.reset();
+ castToString.convertDatetime(dtp, dOut);
+ result = printStringAbvs(path);
+ } catch (Exception e) {
+ throw new IOException(e);
+ } finally {
+ pp.giveBack(dtp);
+ }
+ return result;
+ }
+
+ private String[] printDTDuration(TaggedValuePointable tvp, String path) throws IOException {
+ String[] result = { "", path };
+ LongPointable lp = pp.takeOne(LongPointable.class);
+ try {
+ tvp.getValue(lp);
+ abvs.reset();
+ castToString.convertDTDuration(lp, dOut);
+ result = printStringAbvs(path);
+ } catch (Exception e) {
+ throw new IOException(e);
+ } finally {
+ pp.giveBack(lp);
+ }
+ return result;
+ }
+
+ private String[] printDuration(TaggedValuePointable tvp, String path) throws IOException {
+ String[] result = { "", path };
+ XSDurationPointable dp = pp.takeOne(XSDurationPointable.class);
+ try {
+ tvp.getValue(dp);
+ abvs.reset();
+ castToString.convertDuration(dp, dOut);
+ result = printStringAbvs(path);
+ } catch (Exception e) {
+ throw new IOException(e);
+ } finally {
+ pp.giveBack(dp);
+ }
+ return result;
+ }
+
+ private String[] printFloat(TaggedValuePointable tvp, String path) throws IOException {
+ String[] result = { "", path };
+ FloatPointable fp = pp.takeOne(FloatPointable.class);
+ try {
+ tvp.getValue(fp);
+ abvs.reset();
+ castToString.convertFloat(fp, dOut);
+ result = printStringAbvs(path);
+ } catch (Exception e) {
+ throw new IOException(e);
+ } finally {
+ pp.giveBack(fp);
+ }
+ return result;
+ }
+
+ private String[] printGDay(TaggedValuePointable tvp, String path) throws IOException {
+ String[] result = { "", path };
+ XSDatePointable dp = pp.takeOne(XSDatePointable.class);
+ try {
+ tvp.getValue(dp);
+ abvs.reset();
+ castToString.convertGDay(dp, dOut);
+ result = printStringAbvs(path);
+ } catch (Exception e) {
+ throw new IOException(e);
+ } finally {
+ pp.giveBack(dp);
+ }
+ return result;
+ }
+
+ private String[] printGMonth(TaggedValuePointable tvp, String path) throws IOException {
+ String[] result = { "", path };
+ XSDatePointable dp = pp.takeOne(XSDatePointable.class);
+ try {
+ tvp.getValue(dp);
+ abvs.reset();
+ castToString.convertGMonth(dp, dOut);
+ result = printStringAbvs(path);
+ } catch (Exception e) {
+ throw new IOException(e);
+ } finally {
+ pp.giveBack(dp);
+ }
+ return result;
+ }
+
+ private String[] printGMonthDay(TaggedValuePointable tvp, String path) throws IOException {
+ String[] result = { "", path };
+ XSDatePointable dp = pp.takeOne(XSDatePointable.class);
+ try {
+ tvp.getValue(dp);
+ abvs.reset();
+ castToString.convertGMonthDay(dp, dOut);
+ result = printStringAbvs(path);
+ } catch (Exception e) {
+ throw new IOException(e);
+ } finally {
+ pp.giveBack(dp);
+ }
+ return result;
+ }
+
+ private String[] printGYear(TaggedValuePointable tvp, String path) throws IOException {
+ String[] result = { "", path };
+ XSDatePointable dp = pp.takeOne(XSDatePointable.class);
+ try {
+ tvp.getValue(dp);
+ abvs.reset();
+ castToString.convertGYear(dp, dOut);
+ result = printStringAbvs(path);
+ } catch (Exception e) {
+ throw new IOException(e);
+ } finally {
+ pp.giveBack(dp);
+ }
+ return result;
+ }
+
+ private String[] printGYearMonth(TaggedValuePointable tvp, String path) throws IOException {
+ String[] result = { "", path };
+ XSDatePointable dp = pp.takeOne(XSDatePointable.class);
+ try {
+ tvp.getValue(dp);
+ abvs.reset();
+ castToString.convertGYearMonth(dp, dOut);
+ result = printStringAbvs(path);
+ } catch (Exception e) {
+ throw new IOException(e);
+ } finally {
+ pp.giveBack(dp);
+ }
+ return result;
+ }
+
+ private String[] printHexBinary(TaggedValuePointable tvp, String path) throws IOException {
+ String[] result = { "", path };
+ XSBinaryPointable bp = pp.takeOne(XSBinaryPointable.class);
+ try {
+ tvp.getValue(bp);
+ abvs.reset();
+ castToString.convertHexBinary(bp, dOut);
+ result = printStringAbvs(path);
+ } catch (Exception e) {
+ throw new IOException(e);
+ } finally {
+ pp.giveBack(bp);
+ }
+ return result;
+ }
+
+ private String[] printInt(TaggedValuePointable tvp, String path) {
+ String[] result = { "", path };
+ IntegerPointable ip = pp.takeOne(IntegerPointable.class);
+ try {
+ tvp.getValue(ip);
+ result[0] = Integer.toString(ip.intValue());
+ result[1] = path + "/" + result[0];
+ } finally {
+ pp.giveBack(ip);
+ }
+ return result;
+ }
+
+ private String[] printInteger(TaggedValuePointable tvp, String path) {
+ String[] result = { "", path };
+ LongPointable lp = pp.takeOne(LongPointable.class);
+ try {
+ tvp.getValue(lp);
+ result[0] = Long.toString(lp.longValue());
+ result[1] = path + "/" + result[0];
+ } finally {
+ pp.giveBack(lp);
+ }
+ return result;
+ }
+
+ private String[] printShort(TaggedValuePointable tvp, String path) {
+ ShortPointable sp = pp.takeOne(ShortPointable.class);
+ String[] result = { "", path };
+ try {
+ tvp.getValue(sp);
+ result[0] = Short.toString(sp.shortValue());
+ result[1] = path + "/" + result[0];
+ } finally {
+ pp.giveBack(sp);
+ }
+ return result;
+ }
+
+ private String[] printQName(TaggedValuePointable tvp, String path) throws IOException {
+ XSQNamePointable dp = pp.takeOne(XSQNamePointable.class);
+ String[] result = { "", path };
+ try {
+ tvp.getValue(dp);
+ abvs.reset();
+ castToString.convertQName(dp, dOut);
+ result = printStringAbvs(path);
+ } catch (Exception e) {
+ throw new IOException(e);
+ } finally {
+ pp.giveBack(dp);
+ }
+ return result;
+ }
+
+ private String[] printStringAbvs(String path) {
+ UTF8StringPointable utf8sp = pp.takeOne(UTF8StringPointable.class);
+ String[] result = { "", path };
+ try {
+ utf8sp.set(abvs.getByteArray(), abvs.getStartOffset() + 1, abvs.getLength() - 1);
+ result = printString(utf8sp, path);
+ } finally {
+ pp.giveBack(utf8sp);
+ }
+ return result;
+ }
+
+ private String[] printString(TaggedValuePointable tvp, String path) {
+ UTF8StringPointable utf8sp = pp.takeOne(UTF8StringPointable.class);
+ String[] result = { "", path };
+ try {
+ tvp.getValue(utf8sp);
+ result = printString(utf8sp, path);
+ } finally {
+ pp.giveBack(utf8sp);
+ }
+ return result;
+ }
+
+ private String[] printString(UTF8StringPointable utf8sp, String path) {
+ int utfLen = utf8sp.getUTFLength();
+ int offset = 2;
+ String[] result = { "", path };
+ while (utfLen > 0) {
+ char c = utf8sp.charAt(offset);
+ switch (c) {
+ case '<':
+ result[0] += "<";
+ break;
+
+ case '>':
+ result[0] += ">";
+ break;
+
+ case '&':
+ result[0] += "&";
+ break;
+
+ case '"':
+ result[0] += """;
+ break;
+
+ case '\'':
+ result[0] += "'";
+ break;
+
+ default:
+ result[0] += Character.toString(c);
+ break;
+ }
+ int cLen = UTF8StringPointable.getModifiedUTF8Len(c);
+ offset += cLen;
+ utfLen -= cLen;
+
+ }
+ result[1] = path + "/" + result[0];
+ return result;
+ }
+
+ private String[] printTime(TaggedValuePointable tvp, String path) throws IOException {
+ XSTimePointable tp = pp.takeOne(XSTimePointable.class);
+ String[] result = { "", path };
+ try {
+ tvp.getValue(tp);
+ abvs.reset();
+ castToString.convertTime(tp, dOut);
+ result = printStringAbvs(path);
+ } catch (Exception e) {
+ throw new IOException(e);
+ } finally {
+ pp.giveBack(tp);
+ }
+ return result;
+ }
+
+ private String[] printYMDuration(TaggedValuePointable tvp, String path) throws IOException {
+ IntegerPointable ip = pp.takeOne(IntegerPointable.class);
+ String[] result = { "", path };
+ try {
+ tvp.getValue(ip);
+ abvs.reset();
+ castToString.convertYMDuration(ip, dOut);
+ result = printStringAbvs(path);
+ } catch (Exception e) {
+ throw new IOException(e);
+ } finally {
+ pp.giveBack(ip);
+ }
+ return result;
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-core/src/main/java/org/apache/vxquery/index/IndexElement.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/index/IndexElement.java b/vxquery-core/src/main/java/org/apache/vxquery/index/IndexElement.java
new file mode 100644
index 0000000..d2487a5
--- /dev/null
+++ b/vxquery-core/src/main/java/org/apache/vxquery/index/IndexElement.java
@@ -0,0 +1,42 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.vxquery.index;
+
+public class IndexElement {
+ private String id;
+ private String type;
+ private String elementpath;
+
+ public IndexElement(String id, String type, String elementpath) {
+ this.id = id;
+ this.type = type;
+ this.elementpath = elementpath;
+ }
+
+ public String id() {
+ return id;
+ }
+
+ public String type() {
+ return type;
+ }
+
+ public String epath() {
+ return elementpath;
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java
index b8dca63..ef51cee 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java
@@ -18,8 +18,6 @@ package org.apache.vxquery.metadata;
import java.io.ByteArrayInputStream;
import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
import java.io.InputStream;
import java.net.InetAddress;
import java.nio.ByteBuffer;
@@ -31,8 +29,6 @@ import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
-import javax.xml.parsers.ParserConfigurationException;
-
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.TrueFileFilter;
import org.apache.commons.lang.StringUtils;
@@ -67,7 +63,6 @@ import org.apache.vxquery.hdfs2.HDFSFunctions;
import org.apache.vxquery.xmlparser.ITreeNodeIdProvider;
import org.apache.vxquery.xmlparser.TreeNodeIdProvider;
import org.apache.vxquery.xmlparser.XMLParser;
-import org.xml.sax.SAXException;
public class VXQueryCollectionOperatorDescriptor extends AbstractSingleActivityOperatorDescriptor {
private static final long serialVersionUID = 1L;
@@ -179,50 +174,34 @@ public class VXQueryCollectionOperatorDescriptor extends AbstractSingleActivityO
for (int i = 0; i < size; i++) {
//read split
context = ctxFactory.createContext(job.getConfiguration(), i);
- try {
- reader = inputFormat.createRecordReader(inputSplits.get(i), context);
- reader.initialize(inputSplits.get(i), context);
- while (reader.nextKeyValue()) {
- value = reader.getCurrentValue().toString();
- //Split value if it contains more than one item with the tag
- if (StringUtils.countMatches(value, tag) > 1) {
- String items[] = value.split(tag);
- for (String item : items) {
- if (item.length() > 0) {
- item = START_TAG + tag + item;
- stream = new ByteArrayInputStream(
- item.getBytes(StandardCharsets.UTF_8));
- parser.parseHDFSElements(stream, writer, fta, i);
- }
+
+ reader = inputFormat.createRecordReader(inputSplits.get(i), context);
+ reader.initialize(inputSplits.get(i), context);
+ while (reader.nextKeyValue()) {
+ value = reader.getCurrentValue().toString();
+ //Split value if it contains more than one item with the tag
+ if (StringUtils.countMatches(value, tag) > 1) {
+ String items[] = value.split(tag);
+ for (String item : items) {
+ if (item.length() > 0) {
+ item = START_TAG + tag + item;
+ stream = new ByteArrayInputStream(
+ item.getBytes(StandardCharsets.UTF_8));
+ parser.parseHDFSElements(stream, writer, fta, i);
}
- } else {
- value = START_TAG + value;
- //create an input stream to the file currently reading and send it to parser
- stream = new ByteArrayInputStream(
- value.getBytes(StandardCharsets.UTF_8));
- parser.parseHDFSElements(stream, writer, fta, i);
}
- }
-
- } catch (InterruptedException e) {
- if (LOGGER.isLoggable(Level.SEVERE)) {
- LOGGER.severe(e.getMessage());
+ } else {
+ value = START_TAG + value;
+ //create an input stream to the file currently reading and send it to parser
+ stream = new ByteArrayInputStream(value.getBytes(StandardCharsets.UTF_8));
+ parser.parseHDFSElements(stream, writer, fta, i);
}
}
- }
- } catch (IOException e) {
- if (LOGGER.isLoggable(Level.SEVERE)) {
- LOGGER.severe(e.getMessage());
- }
- } catch (ParserConfigurationException e) {
- if (LOGGER.isLoggable(Level.SEVERE)) {
- LOGGER.severe(e.getMessage());
- }
- } catch (SAXException e) {
- if (LOGGER.isLoggable(Level.SEVERE)) {
- LOGGER.severe(e.getMessage());
}
+
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
}
} else {
try {
@@ -248,22 +227,14 @@ public class VXQueryCollectionOperatorDescriptor extends AbstractSingleActivityO
throw new HyracksDataException("Invalid HDFS directory parameter (" + nodeId + ":"
+ directory + ") passed to collection.");
}
- } catch (FileNotFoundException e) {
- if (LOGGER.isLoggable(Level.SEVERE)) {
- LOGGER.severe(e.getMessage());
- }
- } catch (IOException e) {
- if (LOGGER.isLoggable(Level.SEVERE)) {
- LOGGER.severe(e.getMessage());
- }
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
}
}
try {
fs.close();
- } catch (IOException e) {
- if (LOGGER.isLoggable(Level.SEVERE)) {
- LOGGER.severe(e.getMessage());
- }
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
}
}
}
http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/CaseSensitiveAnalyzer.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/CaseSensitiveAnalyzer.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/CaseSensitiveAnalyzer.java
new file mode 100644
index 0000000..803aeee
--- /dev/null
+++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/CaseSensitiveAnalyzer.java
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.vxquery.runtime.functions.index;
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.StopAnalyzer;
+import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.standard.ClassicAnalyzer;
+import org.apache.lucene.analysis.standard.ClassicTokenizer;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.std40.StandardTokenizer40;
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.util.Version;
+
+/**
+ * Filters {@link StandardTokenizer} with {@link StandardFilter},
+ * and {@link StopFilter}, using a list of
+ * English stop words.
+ * <a name="version"/>
+ * <p>
+ * You must specify the required {@link Version}
+ * compatibility when creating StandardAnalyzer:
+ * <ul>
+ * <li>As of 3.4, Hiragana and Han characters are no longer wrongly split
+ * from their combining characters. If you use a previous version number,
+ * you get the exact broken behavior for backwards compatibility.
+ * <li>As of 3.1, StandardTokenizer implements Unicode text segmentation,
+ * and StopFilter correctly handles Unicode 4.0 supplementary characters
+ * in stopwords. {@link ClassicTokenizer} and {@link ClassicAnalyzer}
+ * are the pre-3.1 implementations of StandardTokenizer and
+ * StandardAnalyzer.
+ * <li>As of 2.9, StopFilter preserves position increments
+ * <li>As of 2.4, Tokens incorrectly identified as acronyms
+ * are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
+ * </ul>
+ */
+public final class CaseSensitiveAnalyzer extends StopwordAnalyzerBase {
+
+ /** Default maximum allowed token length */
+ public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
+
+ private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
+
+ /**
+ * An unmodifiable set containing some common English words that are usually not
+ * useful for searching.
+ */
+ public static final CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
+
+ /**
+ * Builds an analyzer with the given stop words.
+ *
+ * @param stopWords
+ * stop words
+ */
+ public CaseSensitiveAnalyzer(CharArraySet stopWords) {
+ super(stopWords);
+ }
+
+ /**
+ * Builds an analyzer with the default stop words ({@link #STOP_WORDS_SET}).
+ */
+ public CaseSensitiveAnalyzer() {
+ this(STOP_WORDS_SET);
+ }
+
+ /**
+ * Builds an analyzer with the stop words from the given reader.
+ *
+ * @see WordlistLoader#getWordSet(Reader)
+ * @param stopwords
+ * Reader to read stop words from
+ */
+ public CaseSensitiveAnalyzer(Reader stopwords) throws IOException {
+ this(loadStopwordSet(stopwords));
+ }
+
+ /**
+ * Set maximum allowed token length. If a token is seen
+ * that exceeds this length then it is discarded. This
+ * setting only takes effect the next time tokenStream or
+ * tokenStream is called.
+ */
+ public void setMaxTokenLength(int length) {
+ maxTokenLength = length;
+ }
+
+ /**
+ * @see #setMaxTokenLength
+ */
+ public int getMaxTokenLength() {
+ return maxTokenLength;
+ }
+
+ @Override
+ protected TokenStreamComponents createComponents(final String fieldName) {
+ final Tokenizer src;
+ if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
+ StandardTokenizer t = new StandardTokenizer();
+ t.setMaxTokenLength(maxTokenLength);
+ src = t;
+ } else {
+ StandardTokenizer40 t = new StandardTokenizer40();
+ t.setMaxTokenLength(maxTokenLength);
+ src = t;
+ }
+ TokenStream tok = new StandardFilter(src);
+ tok = new StopFilter(tok, stopwords);
+ return new TokenStreamComponents(src, tok) {
+ @Override
+ protected void setReader(final Reader reader) {
+ int m = CaseSensitiveAnalyzer.this.maxTokenLength;
+ if (src instanceof StandardTokenizer) {
+ ((StandardTokenizer) src).setMaxTokenLength(m);
+ } else {
+ ((StandardTokenizer40) src).setMaxTokenLength(m);
+ }
+ super.setReader(reader);
+ }
+ };
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/CaseSensitiveQueryParser.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/CaseSensitiveQueryParser.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/CaseSensitiveQueryParser.java
new file mode 100644
index 0000000..7cb0a18
--- /dev/null
+++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/CaseSensitiveQueryParser.java
@@ -0,0 +1,42 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.vxquery.runtime.functions.index;
+
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryparser.classic.FastCharStream;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.Query;
+
+public class CaseSensitiveQueryParser extends QueryParser {
+
+ public CaseSensitiveQueryParser(String f, Analyzer a) {
+ super(new FastCharStream(new StringReader("")));
+ init(f, a);
+ }
+
+ @Override
+ protected Query getPrefixQuery(String field, String termStr) throws ParseException {
+ if (!getAllowLeadingWildcard() && termStr.startsWith("*"))
+ throw new ParseException("'*' not allowed as first character in PrefixQuery");
+ Term t = new Term(field, termStr);
+ return newPrefixQuery(t);
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/CollectionFromIndexUnnestingEvaluatorFactory.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/CollectionFromIndexUnnestingEvaluatorFactory.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/CollectionFromIndexUnnestingEvaluatorFactory.java
new file mode 100644
index 0000000..cf0b203
--- /dev/null
+++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/CollectionFromIndexUnnestingEvaluatorFactory.java
@@ -0,0 +1,327 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.vxquery.runtime.functions.index;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
+import org.apache.hyracks.algebricks.runtime.base.IUnnestingEvaluator;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.data.std.api.IPointable;
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.apache.hyracks.dataflow.common.comm.util.ByteBufferInputStream;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.vxquery.datamodel.accessors.TaggedValuePointable;
+import org.apache.vxquery.datamodel.values.ValueTag;
+import org.apache.vxquery.exceptions.ErrorCode;
+import org.apache.vxquery.exceptions.SystemException;
+import org.apache.vxquery.index.IndexAttributes;
+import org.apache.vxquery.runtime.functions.base.AbstractTaggedValueArgumentUnnestingEvaluator;
+import org.apache.vxquery.runtime.functions.base.AbstractTaggedValueArgumentUnnestingEvaluatorFactory;
+import org.apache.vxquery.xmlparser.ITreeNodeIdProvider;
+import org.apache.vxquery.xmlparser.SAXContentHandler;
+import org.apache.vxquery.xmlparser.TreeNodeIdProvider;
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+
+public class CollectionFromIndexUnnestingEvaluatorFactory extends AbstractTaggedValueArgumentUnnestingEvaluatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public CollectionFromIndexUnnestingEvaluatorFactory(IScalarEvaluatorFactory[] args) {
+ super(args);
+ }
+
+ @Override
+ protected IUnnestingEvaluator createEvaluator(IHyracksTaskContext ctx, IScalarEvaluator[] args)
+ throws AlgebricksException {
+
+ return new AbstractTaggedValueArgumentUnnestingEvaluator(args) {
+
+ private ArrayBackedValueStorage nodeAbvs = new ArrayBackedValueStorage();
+
+ private int indexPlace;
+ private int indexLength;
+ private String elementPath;
+ private String indexName;
+
+ private UTF8StringPointable stringIndexFolder = (UTF8StringPointable) UTF8StringPointable.FACTORY
+ .createPointable();
+ private UTF8StringPointable stringElementPath = (UTF8StringPointable) UTF8StringPointable.FACTORY
+ .createPointable();
+ private ByteBufferInputStream bbis = new ByteBufferInputStream();
+ private DataInputStream di = new DataInputStream(bbis);
+
+ private IndexReader reader;
+ private IndexSearcher searcher;
+ private Analyzer analyzer;
+ private QueryParser parser;
+ private ScoreDoc[] hits;
+ private SAXContentHandler handler;
+ private Query query;
+ private Document doc;
+ private List<IndexableField> fields;
+
+ @Override
+ public boolean step(IPointable result) throws AlgebricksException {
+ /* each step will create a tuple for a single xml file
+ * This is done using the parse function
+ * checkoverflow is used throughout. This is because memory might not be
+ * able to hold all of the results at once, so we return 1 million at
+ * a time and check when we need to get more
+ */
+ if (indexPlace < indexLength) {
+ nodeAbvs.reset();
+ try {
+ //TODO: now we get back the entire document
+ doc = searcher.doc(hits[indexPlace].doc);
+ fields = doc.getFields();
+ parse(nodeAbvs);
+ } catch (IOException e) {
+ throw new AlgebricksException(e);
+ }
+ indexPlace += 1;
+ result.set(nodeAbvs.getByteArray(), nodeAbvs.getStartOffset(), nodeAbvs.getLength());
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ protected void init(TaggedValuePointable[] args) throws SystemException {
+
+ int partition = ctxview.getTaskAttemptId().getTaskId().getPartition();
+ ITreeNodeIdProvider nodeIdProvider = new TreeNodeIdProvider((short) partition);
+ handler = new SAXContentHandler(false, nodeIdProvider, true);
+
+ nodeAbvs.reset();
+ indexPlace = 0;
+ TaggedValuePointable tvp1 = args[0];
+ TaggedValuePointable tvp2 = args[1];
+
+ if (tvp1.getTag() != ValueTag.XS_STRING_TAG || tvp2.getTag() != ValueTag.XS_STRING_TAG) {
+ throw new SystemException(ErrorCode.FORG0006);
+ }
+ tvp1.getValue(stringIndexFolder);
+ tvp2.getValue(stringElementPath);
+ //This whole loop is to get the string arguments, indefolder, elementpath, and match option
+ try {
+ // Get the list of files.
+ bbis.setByteBuffer(ByteBuffer.wrap(
+ Arrays.copyOfRange(stringIndexFolder.getByteArray(), stringIndexFolder.getStartOffset(),
+ stringIndexFolder.getLength() + stringIndexFolder.getStartOffset())),
+ 0);
+ indexName = di.readUTF();
+ bbis.setByteBuffer(ByteBuffer.wrap(
+ Arrays.copyOfRange(stringElementPath.getByteArray(), stringElementPath.getStartOffset(),
+ stringElementPath.getLength() + stringElementPath.getStartOffset())),
+ 0);
+ elementPath = di.readUTF();
+
+ indexPlace = 0;
+
+ //Create the index reader.
+ reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexName)));
+ } catch (IOException e) {
+ throw new SystemException(ErrorCode.SYSE0001, e);
+ }
+
+ searcher = new IndexSearcher(reader);
+ analyzer = new CaseSensitiveAnalyzer();
+
+ parser = new CaseSensitiveQueryParser("item", analyzer);
+
+ String queryString = elementPath.replaceAll("/", ".");
+ queryString = "item:" + queryString + "*";
+
+ int lastslash = elementPath.lastIndexOf("/");
+ elementPath = elementPath.substring(0, lastslash) + ":" + elementPath.substring(lastslash + 1);
+ elementPath = elementPath.replaceAll("/", ".") + ".element";
+
+ TopDocs results = null;
+ try {
+ query = parser.parse(queryString);
+
+ //TODO: Right now it only returns 1000000 results
+ results = searcher.search(query, 1000000);
+
+ } catch (Exception e) {
+ throw new SystemException(null);
+ }
+
+ hits = results.scoreDocs;
+ System.out.println("found: " + results.totalHits);
+ indexPlace = 0;
+ indexLength = hits.length;
+
+ }
+
+ public void parse(ArrayBackedValueStorage abvsFileNode) throws IOException {
+ try {
+ handler.startDocument();
+
+ for (int i = 0; i < fields.size(); i++) {
+ String fieldValue = fields.get(i).stringValue();
+ if (fieldValue.equals(elementPath)) {
+ buildElement(abvsFileNode, i);
+ }
+ }
+
+ handler.endDocument();
+ handler.writeDocument(abvsFileNode);
+ } catch (Exception e) {
+ throw new IOException(e);
+ }
+ }
+
+ private int buildElement(ArrayBackedValueStorage abvsFileNode, int fieldNum) throws SAXException {
+ int whereIFinish = fieldNum;
+ IndexableField field = fields.get(fieldNum);
+ String contents = field.stringValue();
+ String uri = "";
+
+ int firstColon = contents.indexOf(':');
+ int lastDot = contents.lastIndexOf('.');
+ String type = contents.substring(lastDot + 1);
+ String lastBit = contents.substring(firstColon + 1, lastDot);
+
+ if (type.equals("textnode")) {
+ char[] charContents = lastBit.toCharArray();
+ handler.characters(charContents, 0, charContents.length);
+
+ }
+ if (type.equals("element")) {
+ List<String> names = new ArrayList<String>();
+ List<String> values = new ArrayList<String>();
+ List<String> uris = new ArrayList<String>();
+ List<String> localNames = new ArrayList<String>();
+ List<String> types = new ArrayList<String>();
+ List<String> qNames = new ArrayList<String>();
+ whereIFinish = findAttributeChildren(whereIFinish, names, values, uris, localNames, types, qNames);
+ Attributes atts = new IndexAttributes(names, values, uris, localNames, types, qNames);
+
+ handler.startElement(uri, lastBit, lastBit, atts);
+
+ boolean noMoreChildren = false;
+
+ while (whereIFinish + 1 < fields.size() && !noMoreChildren) {
+ if (isChild(fields.get(whereIFinish + 1), field)) {
+ whereIFinish = buildElement(abvsFileNode, whereIFinish + 1);
+ } else {
+ noMoreChildren = true;
+ }
+ }
+
+ handler.endElement(uri, lastBit, lastBit);
+
+ }
+ return whereIFinish;
+ }
+
+ /*This function creates the attribute children for an element node
+ *
+ */
+ int findAttributeChildren(int fieldnum, List<String> n, List<String> v, List<String> u, List<String> l,
+ List<String> t, List<String> q) {
+ int nextindex = fieldnum + 1;
+ boolean foundattributes = false;
+ if (nextindex < fields.size()) {
+ IndexableField nextguy;
+
+ while (nextindex < fields.size()) {
+ nextguy = fields.get(nextindex);
+ String contents = nextguy.stringValue();
+ int firstcolon = contents.indexOf(':');
+ int lastdot = contents.lastIndexOf('.');
+ String lastbit = contents.substring(firstcolon + 1, lastdot);
+
+ if (isDirectChildAttribute(nextguy, fields.get(fieldnum))) {
+ foundattributes = true;
+ n.add(lastbit);
+ IndexableField nextnextguy = fields.get(nextindex + 1);
+ contents = nextnextguy.stringValue();
+ firstcolon = contents.indexOf(':');
+ lastdot = contents.lastIndexOf('.');
+ String nextlastbit = contents.substring(firstcolon + 1, lastdot);
+ v.add(nextlastbit);
+ u.add(lastbit);
+ l.add(lastbit);
+ t.add(lastbit);
+ q.add(lastbit);
+ } else {
+ break;
+ }
+ nextindex += 2;
+ }
+ }
+ if (foundattributes) {
+ return nextindex - 1;
+
+ } else {
+ return fieldnum;
+ }
+ }
+
+ boolean isChild(IndexableField child, IndexableField adult) {
+ String childId = child.stringValue();
+ String adultId = adult.stringValue();
+
+ int lastDotChild = childId.lastIndexOf('.');
+ int lastDotAdult = adultId.lastIndexOf('.');
+
+ String childPath = childId.substring(0, lastDotChild);
+ String adultPath = adultId.substring(0, lastDotAdult);
+ adultPath = adultPath.replaceFirst(":", ".");
+
+ return (childPath.startsWith(adultPath + ":") || childPath.startsWith(adultPath + "."));
+ }
+
+ boolean isDirectChildAttribute(IndexableField child, IndexableField adult) {
+ String childId = child.stringValue();
+ String adultId = adult.stringValue();
+
+ String childPath = childId.substring(0, childId.lastIndexOf('.'));
+ String adultPath = adultId.substring(0, adultId.lastIndexOf('.'));
+ adultPath = adultPath.replaceFirst(":", ".");
+ String[] childSegments = child.stringValue().split("\\.");
+
+ String childType = childSegments[childSegments.length - 1];
+
+ return (childPath.startsWith(adultPath + ":") && childType.equals("attribute"));
+ }
+
+ };
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/vxquery/blob/1f623b16/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/IndexConstructorScalarEvaluatorFactory.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/IndexConstructorScalarEvaluatorFactory.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/IndexConstructorScalarEvaluatorFactory.java
new file mode 100644
index 0000000..c3776d9
--- /dev/null
+++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/index/IndexConstructorScalarEvaluatorFactory.java
@@ -0,0 +1,70 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.vxquery.runtime.functions.index;
+
+import java.io.DataInputStream;
+
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.data.std.api.IPointable;
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.apache.hyracks.dataflow.common.comm.util.ByteBufferInputStream;
+import org.apache.vxquery.datamodel.accessors.TaggedValuePointable;
+import org.apache.vxquery.datamodel.builders.sequence.SequenceBuilder;
+import org.apache.vxquery.exceptions.SystemException;
+import org.apache.vxquery.runtime.functions.base.AbstractTaggedValueArgumentScalarEvaluator;
+import org.apache.vxquery.runtime.functions.base.AbstractTaggedValueArgumentScalarEvaluatorFactory;
+import org.apache.vxquery.xmlparser.ITreeNodeIdProvider;
+import org.apache.vxquery.xmlparser.TreeNodeIdProvider;
+
+public class IndexConstructorScalarEvaluatorFactory extends AbstractTaggedValueArgumentScalarEvaluatorFactory {
+ //Creates one Lucene doc per file
+
+ private static final long serialVersionUID = 1L;
+
+ public IndexConstructorScalarEvaluatorFactory(IScalarEvaluatorFactory[] args) {
+ super(args);
+ }
+
+ @Override
+ protected IScalarEvaluator createEvaluator(IHyracksTaskContext ctx, IScalarEvaluator[] args)
+ throws AlgebricksException {
+ final ArrayBackedValueStorage abvs = new ArrayBackedValueStorage();
+ final UTF8StringPointable stringp = (UTF8StringPointable) UTF8StringPointable.FACTORY.createPointable();
+ final TaggedValuePointable nodep = (TaggedValuePointable) TaggedValuePointable.FACTORY.createPointable();
+ final ByteBufferInputStream bbis = new ByteBufferInputStream();
+ final DataInputStream di = new DataInputStream(bbis);
+ final SequenceBuilder sb = new SequenceBuilder();
+ final ArrayBackedValueStorage abvsFileNode = new ArrayBackedValueStorage();
+ final int partition = ctx.getTaskAttemptId().getTaskId().getPartition();
+ final String nodeId = ctx.getJobletContext().getApplicationContext().getNodeId();
+ final ITreeNodeIdProvider nodeIdProvider = new TreeNodeIdProvider((short) partition);
+
+ return new AbstractTaggedValueArgumentScalarEvaluator(args) {
+
+ @Override
+ protected void evaluate(TaggedValuePointable[] args, IPointable result) throws SystemException {
+ IndexConstructorUtil.evaluate(args, result, stringp, bbis, di, sb, abvs, nodeIdProvider, abvsFileNode,
+ nodep, false, nodeId);
+ }
+
+ };
+ }
+}