You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@vxquery.apache.org by pr...@apache.org on 2014/03/05 02:57:40 UTC
[14/14] git commit: Tracking both old and new parsers.
Tracking both old and new parsers.
Project: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/commit/278c0db4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/tree/278c0db4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/diff/278c0db4
Branch: refs/heads/prestonc/parser
Commit: 278c0db437a41b2feb53fc133969457ee4dd0e17
Parents: b3aee30
Author: Preston Carman <pr...@apache.org>
Authored: Wed Feb 19 15:28:23 2014 -0800
Committer: Preston Carman <pr...@apache.org>
Committed: Thu Feb 27 14:24:55 2014 -0800
----------------------------------------------------------------------
.../vxquery/xmlparser/SAXContentHandler.java | 232 +++++++++++++++++--
1 file changed, 214 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/278c0db4/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
index 40a35b0..2b3d613 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
@@ -16,8 +16,11 @@ package org.apache.vxquery.xmlparser;
import java.io.DataOutput;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
import org.apache.vxquery.datamodel.accessors.nodes.NodeTreePointable;
+import org.apache.vxquery.datamodel.builders.nodes.AbstractNodeBuilder;
import org.apache.vxquery.datamodel.builders.nodes.AttributeNodeBuilder;
import org.apache.vxquery.datamodel.builders.nodes.CommentNodeBuilder;
import org.apache.vxquery.datamodel.builders.nodes.DictionaryBuilder;
@@ -39,6 +42,7 @@ import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable;
import edu.uci.ics.hyracks.data.std.util.ArrayBackedValueStorage;
public class SAXContentHandler implements ContentHandler, LexicalHandler {
+ private final ArrayBackedValueStorage docABVS;
private final boolean createNodeIds;
@@ -46,6 +50,8 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
private final ITreeNodeIdProvider nodeIdProvider;
+ private final ArrayBackedValueStorage tempABVS;
+
private final DocumentNodeBuilder docb;
private final TextNodeBuilder tnb;
@@ -60,7 +66,13 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
private final StringBuilder buffer;
+ private final List<ElementNodeBuilder> enbStack;
+
+ private final List<ElementNodeBuilder> freeENBList;
+
private int nodeIdCounter;
+ private int copyOldCounter = 0;
+ private int copyNewCounter = 0;
private boolean pendingText;
@@ -70,7 +82,7 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
// Structure and data.
private final GrowableIntArray leavesKind;
-// private final GrowableIntArray leavesStart;
+ // private final GrowableIntArray leavesStart;
private final GrowableIntArray leavesEnd;
// private final GrowableIntArray leavesDepth;
// private final GrowableIntArray leavesParent;
@@ -96,17 +108,21 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
private final int LEAF_POST_NODE = 7;
public SAXContentHandler(boolean attachTypes, ITreeNodeIdProvider nodeIdProvider) {
+ docABVS = new ArrayBackedValueStorage();
this.createNodeIds = nodeIdProvider != null;
this.attachTypes = attachTypes;
this.nodeIdProvider = nodeIdProvider;
- enb = new ElementNodeBuilder();
+ this.tempABVS = new ArrayBackedValueStorage();
docb = new DocumentNodeBuilder();
tnb = new TextNodeBuilder();
cnb = new CommentNodeBuilder();
pinb = new PINodeBuilder();
+ enb = new ElementNodeBuilder();
anb = new AttributeNodeBuilder();
db = new DictionaryBuilder();
buffer = new StringBuilder();
+ enbStack = new ArrayList<ElementNodeBuilder>();
+ freeENBList = new ArrayList<ElementNodeBuilder>();
pendingText = false;
leavesKind = new GrowableIntArray(600);
@@ -136,6 +152,10 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
public void endDocument() throws SAXException {
try {
flushText();
+ docb.endChildrenChunk();
+ copyOldCounter++;
+ docb.finish();
+
leafNodeStart(LEAF_POST_DOCUMENT);
leafNodeEnd();
textCurrentDepth--;
@@ -155,13 +175,18 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
// System.err.println(i + " " + k[i] + " - " + d[i] + ":" + s[i] + ":" + e[i] + " p=" + p[i] + " a=" + a[i]
// + " c=" + c[i]);
// }
-
}
@Override
public void endElement(String uri, String localName, String name) throws SAXException {
try {
flushText();
+ ElementNodeBuilder enb = enbStack.remove(enbStack.size() - 1);
+ enb.endChildrenChunk();
+ copyOldCounter++;
+ endChildInParent(enb);
+ freeENB(enb);
+
leafNodeStart(LEAF_POST_NODE);
leafNodeEnd();
textCurrentDepth--;
@@ -183,6 +208,20 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
public void processingInstruction(String target, String data) throws SAXException {
try {
flushText();
+ startChildInParent(pinb);
+ tempABVS.reset();
+ tempABVS.getDataOutput().writeUTF(target);
+ if (createNodeIds) {
+ pinb.setLocalNodeId(nodeIdCounter);
+ }
+ pinb.setTarget(tempABVS);
+ copyOldCounter++;
+ tempABVS.reset();
+ tempABVS.getDataOutput().writeUTF(data);
+ pinb.setContent(tempABVS);
+ copyOldCounter++;
+ endChildInParent(pinb);
+
// Save to leavesABVS
leafNodeStart(LEAF_PI);
pinb.reset(leavesABVS);
@@ -208,6 +247,8 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
@Override
public void startDocument() throws SAXException {
+ copyOldCounter = 0;
+ copyNewCounter = 0;
leavesKind.clear();
//leavesStart.clear();
leavesABVS.reset();
@@ -227,11 +268,18 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
try {
nodeIdCounter = 0;
db.reset();
+ docABVS.reset();
+ docb.reset(docABVS);
+ if (createNodeIds) {
+ docb.setLocalNodeId(nodeIdCounter++);
+ }
+ docb.startChildrenChunk();
leafNodeStart(LEAF_PRE_DOCUMENT);
- docb.reset(leavesABVS);
+ DocumentNodeBuilder docb2 = new DocumentNodeBuilder();
+ docb2.reset(leavesABVS);
if (createNodeIds) {
- docb.setLocalNodeId(nodeIdCounter++);
+ docb2.setLocalNodeId(nodeIdCounter);
}
leafNodeEnd();
@@ -249,13 +297,11 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
flushText();
int idx = name.indexOf(':');
String prefix = idx < 0 ? "" : name.substring(0, idx);
+ ElementNodeBuilder enb = createENB();
+ startChildInParent(enb);
int uriCode = db.lookup(uri);
int localNameCode = db.lookup(localName);
int prefixCode = db.lookup(prefix);
-
- // Save to leavesABVS
- leafNodeStart(LEAF_PRE_NODE);
- enb.setMvs(leavesABVS);
enb.setName(uriCode, localNameCode, prefixCode);
if (attachTypes) {
int typeUriCode = db.lookup(XQueryConstants.XS_NSURI);
@@ -266,9 +312,25 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
if (createNodeIds) {
enb.setLocalNodeId(nodeIdCounter++);
}
+
+ // Save to leavesABVS
+ leafNodeStart(LEAF_PRE_NODE);
+ ElementNodeBuilder enb2 = createENB();
+ enb2.setMvs(leavesABVS);
+ enb2.setName(uriCode, localNameCode, prefixCode);
+ if (attachTypes) {
+ int typeUriCode = db.lookup(XQueryConstants.XS_NSURI);
+ int typeLocalNameCode = db.lookup(BuiltinTypeQNames.UNTYPED_STR);
+ int typePrefixCode = db.lookup(XQueryConstants.XS_PREFIX);
+ enb2.setType(typeUriCode, typeLocalNameCode, typePrefixCode);
+ }
+ if (createNodeIds) {
+ enb2.setLocalNodeId(nodeIdCounter);
+ }
leafNodeEnd();
textCurrentDepth++;
+ enb.startAttributeChunk();
final int nAttrs = atts.getLength();
for (int i = 0; i < nAttrs; ++i) {
String aName = atts.getQName(i);
@@ -276,6 +338,25 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
int aPrefixCode = db.lookup(aIdx < 0 ? "" : aName.substring(0, aIdx));
int aLocalNameCode = db.lookup(atts.getLocalName(i));
int aUriCode = db.lookup(atts.getURI(i));
+ String aValue = atts.getValue(i);
+ tempABVS.reset();
+ DataOutput tempOut = tempABVS.getDataOutput();
+ tempOut.write(ValueTag.XS_UNTYPED_ATOMIC_TAG);
+ tempOut.writeUTF(aValue);
+ enb.startAttribute(anb);
+ anb.setName(aUriCode, aLocalNameCode, aPrefixCode);
+ if (attachTypes) {
+ int typeUriCode = db.lookup(XQueryConstants.XS_NSURI);
+ int typeLocalNameCode = db.lookup(BuiltinTypeQNames.UNTYPED_ATOMIC_STR);
+ int typePrefixCode = db.lookup(XQueryConstants.XS_PREFIX);
+ anb.setType(typeUriCode, typeLocalNameCode, typePrefixCode);
+ }
+ if (createNodeIds) {
+ anb.setLocalNodeId(nodeIdCounter++);
+ }
+ anb.setValue(tempABVS);
+ copyOldCounter++;
+ enb.endAttribute(anb);
// Save to leavesABVS
leafNodeStart(LEAF_ATTRIBUTE);
@@ -292,7 +373,12 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
}
anb.setValue(atts.getValue(i));
leafNodeEnd();
+
}
+ enb.endAttributeChunk();
+ copyOldCounter++;
+ enb.startChildrenChunk();
+ enbStack.add(enb);
} catch (IOException e) {
e.printStackTrace();
throw new SAXException(e);
@@ -307,7 +393,16 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
public void comment(char[] ch, int start, int length) throws SAXException {
try {
flushText();
+ startChildInParent(cnb);
buffer.append(ch, start, length);
+ tempABVS.reset();
+ tempABVS.getDataOutput().writeUTF(buffer.toString());
+ if (createNodeIds) {
+ cnb.setLocalNodeId(nodeIdCounter);
+ }
+ cnb.setValue(tempABVS);
+ copyOldCounter++;
+ endChildInParent(cnb);
// Save to leavesABVS
leafNodeStart(LEAF_COMMENT);
@@ -327,6 +422,16 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
private void flushText() throws IOException {
if (pendingText) {
+ peekENBStackTop().startChild(tnb);
+ tempABVS.reset();
+ tempABVS.getDataOutput().writeUTF(buffer.toString());
+ if (createNodeIds) {
+ tnb.setLocalNodeId(nodeIdCounter);
+ }
+ tnb.setValue(tempABVS);
+ copyOldCounter++;
+ peekENBStackTop().endChild(tnb);
+
// Save to leavesABVS
leafNodeStart(LEAF_TEXT);
tnb.reset(leavesABVS);
@@ -380,20 +485,51 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
out.writeInt(nodeIdProvider.getId());
}
db.write(abvs);
+ out.write(docABVS.getByteArray(), docABVS.getStartOffset(), docABVS.getLength());
+ copyOldCounter++;
+ System.err.println("copyCounter: " + copyOldCounter);
+ }
+
+ int currentOffset = 0;
+
+ public void writeOnce(ArrayBackedValueStorage abvs) throws IOException {
+ DataOutput out = abvs.getDataOutput();
+ out.write(ValueTag.NODE_TREE_TAG);
+ byte header = NodeTreePointable.HEADER_DICTIONARY_EXISTS_MASK;
+ if (attachTypes) {
+ header |= NodeTreePointable.HEADER_TYPE_EXISTS_MASK;
+ }
+ if (createNodeIds) {
+ header |= NodeTreePointable.HEADER_NODEID_EXISTS_MASK;
+ }
+ out.write(header);
+ if (createNodeIds) {
+ out.writeInt(nodeIdProvider.getId());
+ }
+ db.write(abvs);
+
+ copyNewCounter++;
// leavesStart.getArray()[i]
- int currentOffset = 0;
for (int i = 0; i < leavesKind.getSize(); ++i) {
if (leavesKind.getArray()[i] == LEAF_PRE_DOCUMENT) {
- out.write(leavesABVS.getByteArray(), currentOffset, leavesEnd.getArray()[i] - currentOffset);
+ flushLeaveNodesUpTo(out, i);
+
+ // for (int x = currentOffset; x < leavesEnd.getArray()[i]; ++x) {
+ // System.err.println(i + "\t" + leavesKind.getArray()[i] + "\t" + leavesABVS.getByteArray()[x]);
+ // }
int children = leavesChildrenCount.getArray()[i];
+ System.err.println("children " + children);
if (children > 0) {
sequenceSlotStub(abvs, children);
}
// Continue with nodes.
+ childrenLength = 0;
} else if (leavesKind.getArray()[i] == LEAF_PRE_NODE) {
+ flushLeaveNodesUpTo(out, i - 1);
+
int nsCount = 0;
int attrCount = leavesAttributeCount.getArray()[i];
int childrenCount = leavesChildrenCount.getArray()[i];
@@ -401,37 +537,61 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
enb.setMvs(abvs);
enb.setTagHeader(nsCount, attrCount, childrenCount);
- out.write(leavesABVS.getByteArray(), currentOffset, leavesEnd.getArray()[i] - currentOffset);
+ flushLeaveNodesUpTo(out, i);
if (attrCount > 0) {
sequenceSlotStub(abvs, attrCount);
+ int attributeLength = 0;
for (int s = 0; s < attrCount; ++s) {
++i;
- out.write(leavesABVS.getByteArray(), currentOffset, leavesEnd.getArray()[i] - currentOffset);
- updateSequenceSlot(abvs);
+ attributeLength = leavesEnd.getArray()[i] - currentOffset;
+ updateSequenceSlot(abvs, attributeLength);
}
+ flushLeaveNodesUpTo(out, i);
}
if (childrenCount > 0) {
sequenceSlotStub(abvs, childrenCount);
}
// Continue with nodes.
+ startChildHunk(currentOffset);
} else if (leavesKind.getArray()[i] == LEAF_POST_DOCUMENT) {
+ flushLeaveNodesUpTo(out, i - 1);
+
// no action
} else if (leavesKind.getArray()[i] == LEAF_POST_NODE) {
+ flushLeaveNodesUpTo(out, i - 1);
+
updateSequenceSlot(abvs);
} else {
- out.write(leavesABVS.getByteArray(), currentOffset, leavesEnd.getArray()[i] - currentOffset);
- updateSequenceSlot(abvs);
+ childrenLength = leavesEnd.getArray()[i] - currentOffset;
+ updateSequenceSlot(abvs, childrenLength);
}
+ }
+ System.err.println("copyNewCounter: " + copyNewCounter);
+ }
+
+ private void flushLeaveNodesUpTo(DataOutput out, int i) throws IOException {
+ if (currentOffset != leavesEnd.getArray()[i]) {
+ out.write(leavesABVS.getByteArray(), currentOffset, leavesEnd.getArray()[i] - currentOffset);
currentOffset = leavesEnd.getArray()[i];
+ copyNewCounter++;
}
}
+ int childrenLength = 0;
+ int childrenOffset = 0;
+
+ private void startChildHunk(int currentOffset) {
+ childrenLength = 0;
+ childrenOffset = currentOffset;
+ }
+
private void sequenceSlotStub(ArrayBackedValueStorage abvs, int count) throws IOException {
DataOutput out = abvs.getDataOutput();
out.writeInt(count);
+ // System.err.println("Slot count " + count);
int offset = abvs.getLength();
for (int s = 0; s < count; ++s) {
out.writeInt(-1);
@@ -450,15 +610,51 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
childSlotCounter++;
}
- private void updateSequenceSlot(ArrayBackedValueStorage abvs) {
+ private void updateSequenceSlot(ArrayBackedValueStorage abvs, int length) {
// for (int i = 0; i < childSlotCounter; ++i) {
// System.err.println("\t" + i + " " + childStartOffset.getArray()[i] + " - " + childSlotOffset.getArray()[i]);
// }
childSlotCounter--;
- int length = abvs.getLength() - childStartOffset.getArray()[childSlotCounter];
+ // int length = abvs.getLength() - childStartOffset.getArray()[childSlotCounter];
IntegerPointable.setInteger(abvs.getByteArray(), childSlotOffset.getArray()[childSlotCounter], length);
}
+ private void updateSequenceSlot(ArrayBackedValueStorage abvs) {
+ int length = abvs.getLength() - childStartOffset.getArray()[childSlotCounter];
+ updateSequenceSlot(abvs, length);
+ }
+
+ private ElementNodeBuilder createENB() {
+ if (freeENBList.isEmpty()) {
+ return new ElementNodeBuilder();
+ }
+ return freeENBList.remove(freeENBList.size() - 1);
+ }
+
+ private void freeENB(ElementNodeBuilder enb) {
+ freeENBList.add(enb);
+ }
+
+ private ElementNodeBuilder peekENBStackTop() {
+ return enbStack.get(enbStack.size() - 1);
+ }
+
+ private void startChildInParent(AbstractNodeBuilder anb) throws IOException {
+ if (enbStack.isEmpty()) {
+ docb.startChild(anb);
+ } else {
+ peekENBStackTop().startChild(anb);
+ }
+ }
+
+ private void endChildInParent(AbstractNodeBuilder anb) throws IOException {
+ if (enbStack.isEmpty()) {
+ docb.endChild(anb);
+ } else {
+ peekENBStackTop().endChild(anb);
+ }
+ }
+
private void leafNodeStart(int kind) {
leavesKind.append(kind);
//leavesStart.append(leavesABVS.getLength());