You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@vxquery.apache.org by pr...@apache.org on 2014/03/05 02:57:40 UTC

[14/14] git commit: Tracking both old and new parsers.

Tracking both old and new parsers.


Project: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/commit/278c0db4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/tree/278c0db4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/diff/278c0db4

Branch: refs/heads/prestonc/parser
Commit: 278c0db437a41b2feb53fc133969457ee4dd0e17
Parents: b3aee30
Author: Preston Carman <pr...@apache.org>
Authored: Wed Feb 19 15:28:23 2014 -0800
Committer: Preston Carman <pr...@apache.org>
Committed: Thu Feb 27 14:24:55 2014 -0800

----------------------------------------------------------------------
 .../vxquery/xmlparser/SAXContentHandler.java    | 232 +++++++++++++++++--
 1 file changed, 214 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/278c0db4/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
----------------------------------------------------------------------
diff --git a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
index 40a35b0..2b3d613 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
@@ -16,8 +16,11 @@ package org.apache.vxquery.xmlparser;
 
 import java.io.DataOutput;
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
 
 import org.apache.vxquery.datamodel.accessors.nodes.NodeTreePointable;
+import org.apache.vxquery.datamodel.builders.nodes.AbstractNodeBuilder;
 import org.apache.vxquery.datamodel.builders.nodes.AttributeNodeBuilder;
 import org.apache.vxquery.datamodel.builders.nodes.CommentNodeBuilder;
 import org.apache.vxquery.datamodel.builders.nodes.DictionaryBuilder;
@@ -39,6 +42,7 @@ import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable;
 import edu.uci.ics.hyracks.data.std.util.ArrayBackedValueStorage;
 
 public class SAXContentHandler implements ContentHandler, LexicalHandler {
+    private final ArrayBackedValueStorage docABVS;
 
     private final boolean createNodeIds;
 
@@ -46,6 +50,8 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
 
     private final ITreeNodeIdProvider nodeIdProvider;
 
+    private final ArrayBackedValueStorage tempABVS;
+
     private final DocumentNodeBuilder docb;
 
     private final TextNodeBuilder tnb;
@@ -60,7 +66,13 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
 
     private final StringBuilder buffer;
 
+    private final List<ElementNodeBuilder> enbStack;
+
+    private final List<ElementNodeBuilder> freeENBList;
+
     private int nodeIdCounter;
+    private int copyOldCounter = 0;
+    private int copyNewCounter = 0;
 
     private boolean pendingText;
 
@@ -70,7 +82,7 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
 
     // Structure and data.
     private final GrowableIntArray leavesKind;
-//    private final GrowableIntArray leavesStart;
+    //    private final GrowableIntArray leavesStart;
     private final GrowableIntArray leavesEnd;
     //    private final GrowableIntArray leavesDepth;
     //    private final GrowableIntArray leavesParent;
@@ -96,17 +108,21 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
     private final int LEAF_POST_NODE = 7;
 
     public SAXContentHandler(boolean attachTypes, ITreeNodeIdProvider nodeIdProvider) {
+        docABVS = new ArrayBackedValueStorage();
         this.createNodeIds = nodeIdProvider != null;
         this.attachTypes = attachTypes;
         this.nodeIdProvider = nodeIdProvider;
-        enb = new ElementNodeBuilder();
+        this.tempABVS = new ArrayBackedValueStorage();
         docb = new DocumentNodeBuilder();
         tnb = new TextNodeBuilder();
         cnb = new CommentNodeBuilder();
         pinb = new PINodeBuilder();
+        enb = new ElementNodeBuilder();
         anb = new AttributeNodeBuilder();
         db = new DictionaryBuilder();
         buffer = new StringBuilder();
+        enbStack = new ArrayList<ElementNodeBuilder>();
+        freeENBList = new ArrayList<ElementNodeBuilder>();
         pendingText = false;
 
         leavesKind = new GrowableIntArray(600);
@@ -136,6 +152,10 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
     public void endDocument() throws SAXException {
         try {
             flushText();
+            docb.endChildrenChunk();
+            copyOldCounter++;
+            docb.finish();
+
             leafNodeStart(LEAF_POST_DOCUMENT);
             leafNodeEnd();
             textCurrentDepth--;
@@ -155,13 +175,18 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
         //            System.err.println(i + " " + k[i] + " - " + d[i] + ":" + s[i] + ":" + e[i] + " p=" + p[i] + " a=" + a[i]
         //                    + " c=" + c[i]);
         //        }
-
     }
 
     @Override
     public void endElement(String uri, String localName, String name) throws SAXException {
         try {
             flushText();
+            ElementNodeBuilder enb = enbStack.remove(enbStack.size() - 1);
+            enb.endChildrenChunk();
+            copyOldCounter++;
+            endChildInParent(enb);
+            freeENB(enb);
+
             leafNodeStart(LEAF_POST_NODE);
             leafNodeEnd();
             textCurrentDepth--;
@@ -183,6 +208,20 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
     public void processingInstruction(String target, String data) throws SAXException {
         try {
             flushText();
+            startChildInParent(pinb);
+            tempABVS.reset();
+            tempABVS.getDataOutput().writeUTF(target);
+            if (createNodeIds) {
+                pinb.setLocalNodeId(nodeIdCounter);
+            }
+            pinb.setTarget(tempABVS);
+            copyOldCounter++;
+            tempABVS.reset();
+            tempABVS.getDataOutput().writeUTF(data);
+            pinb.setContent(tempABVS);
+            copyOldCounter++;
+            endChildInParent(pinb);
+
             // Save to leavesABVS
             leafNodeStart(LEAF_PI);
             pinb.reset(leavesABVS);
@@ -208,6 +247,8 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
 
     @Override
     public void startDocument() throws SAXException {
+        copyOldCounter = 0;
+        copyNewCounter = 0;
         leavesKind.clear();
         //leavesStart.clear();
         leavesABVS.reset();
@@ -227,11 +268,18 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
         try {
             nodeIdCounter = 0;
             db.reset();
+            docABVS.reset();
+            docb.reset(docABVS);
+            if (createNodeIds) {
+                docb.setLocalNodeId(nodeIdCounter++);
+            }
+            docb.startChildrenChunk();
 
             leafNodeStart(LEAF_PRE_DOCUMENT);
-            docb.reset(leavesABVS);
+            DocumentNodeBuilder docb2 = new DocumentNodeBuilder();
+            docb2.reset(leavesABVS);
             if (createNodeIds) {
-                docb.setLocalNodeId(nodeIdCounter++);
+                docb2.setLocalNodeId(nodeIdCounter);
             }
             leafNodeEnd();
 
@@ -249,13 +297,11 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
             flushText();
             int idx = name.indexOf(':');
             String prefix = idx < 0 ? "" : name.substring(0, idx);
+            ElementNodeBuilder enb = createENB();
+            startChildInParent(enb);
             int uriCode = db.lookup(uri);
             int localNameCode = db.lookup(localName);
             int prefixCode = db.lookup(prefix);
-
-            // Save to leavesABVS
-            leafNodeStart(LEAF_PRE_NODE);
-            enb.setMvs(leavesABVS);
             enb.setName(uriCode, localNameCode, prefixCode);
             if (attachTypes) {
                 int typeUriCode = db.lookup(XQueryConstants.XS_NSURI);
@@ -266,9 +312,25 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
             if (createNodeIds) {
                 enb.setLocalNodeId(nodeIdCounter++);
             }
+
+            // Save to leavesABVS
+            leafNodeStart(LEAF_PRE_NODE);
+            ElementNodeBuilder enb2 = createENB();
+            enb2.setMvs(leavesABVS);
+            enb2.setName(uriCode, localNameCode, prefixCode);
+            if (attachTypes) {
+                int typeUriCode = db.lookup(XQueryConstants.XS_NSURI);
+                int typeLocalNameCode = db.lookup(BuiltinTypeQNames.UNTYPED_STR);
+                int typePrefixCode = db.lookup(XQueryConstants.XS_PREFIX);
+                enb2.setType(typeUriCode, typeLocalNameCode, typePrefixCode);
+            }
+            if (createNodeIds) {
+                enb2.setLocalNodeId(nodeIdCounter);
+            }
             leafNodeEnd();
             textCurrentDepth++;
 
+            enb.startAttributeChunk();
             final int nAttrs = atts.getLength();
             for (int i = 0; i < nAttrs; ++i) {
                 String aName = atts.getQName(i);
@@ -276,6 +338,25 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
                 int aPrefixCode = db.lookup(aIdx < 0 ? "" : aName.substring(0, aIdx));
                 int aLocalNameCode = db.lookup(atts.getLocalName(i));
                 int aUriCode = db.lookup(atts.getURI(i));
+                String aValue = atts.getValue(i);
+                tempABVS.reset();
+                DataOutput tempOut = tempABVS.getDataOutput();
+                tempOut.write(ValueTag.XS_UNTYPED_ATOMIC_TAG);
+                tempOut.writeUTF(aValue);
+                enb.startAttribute(anb);
+                anb.setName(aUriCode, aLocalNameCode, aPrefixCode);
+                if (attachTypes) {
+                    int typeUriCode = db.lookup(XQueryConstants.XS_NSURI);
+                    int typeLocalNameCode = db.lookup(BuiltinTypeQNames.UNTYPED_ATOMIC_STR);
+                    int typePrefixCode = db.lookup(XQueryConstants.XS_PREFIX);
+                    anb.setType(typeUriCode, typeLocalNameCode, typePrefixCode);
+                }
+                if (createNodeIds) {
+                    anb.setLocalNodeId(nodeIdCounter++);
+                }
+                anb.setValue(tempABVS);
+                copyOldCounter++;
+                enb.endAttribute(anb);
 
                 // Save to leavesABVS
                 leafNodeStart(LEAF_ATTRIBUTE);
@@ -292,7 +373,12 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
                 }
                 anb.setValue(atts.getValue(i));
                 leafNodeEnd();
+
             }
+            enb.endAttributeChunk();
+            copyOldCounter++;
+            enb.startChildrenChunk();
+            enbStack.add(enb);
         } catch (IOException e) {
             e.printStackTrace();
             throw new SAXException(e);
@@ -307,7 +393,16 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
     public void comment(char[] ch, int start, int length) throws SAXException {
         try {
             flushText();
+            startChildInParent(cnb);
             buffer.append(ch, start, length);
+            tempABVS.reset();
+            tempABVS.getDataOutput().writeUTF(buffer.toString());
+            if (createNodeIds) {
+                cnb.setLocalNodeId(nodeIdCounter);
+            }
+            cnb.setValue(tempABVS);
+            copyOldCounter++;
+            endChildInParent(cnb);
 
             // Save to leavesABVS
             leafNodeStart(LEAF_COMMENT);
@@ -327,6 +422,16 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
 
     private void flushText() throws IOException {
         if (pendingText) {
+            peekENBStackTop().startChild(tnb);
+            tempABVS.reset();
+            tempABVS.getDataOutput().writeUTF(buffer.toString());
+            if (createNodeIds) {
+                tnb.setLocalNodeId(nodeIdCounter);
+            }
+            tnb.setValue(tempABVS);
+            copyOldCounter++;
+            peekENBStackTop().endChild(tnb);
+
             // Save to leavesABVS
             leafNodeStart(LEAF_TEXT);
             tnb.reset(leavesABVS);
@@ -380,20 +485,51 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
             out.writeInt(nodeIdProvider.getId());
         }
         db.write(abvs);
+        out.write(docABVS.getByteArray(), docABVS.getStartOffset(), docABVS.getLength());
+        copyOldCounter++;
+        System.err.println("copyCounter: " + copyOldCounter);
+    }
+
+    int currentOffset = 0;
+
+    public void writeOnce(ArrayBackedValueStorage abvs) throws IOException {
+        DataOutput out = abvs.getDataOutput();
+        out.write(ValueTag.NODE_TREE_TAG);
+        byte header = NodeTreePointable.HEADER_DICTIONARY_EXISTS_MASK;
+        if (attachTypes) {
+            header |= NodeTreePointable.HEADER_TYPE_EXISTS_MASK;
+        }
+        if (createNodeIds) {
+            header |= NodeTreePointable.HEADER_NODEID_EXISTS_MASK;
+        }
+        out.write(header);
+        if (createNodeIds) {
+            out.writeInt(nodeIdProvider.getId());
+        }
+        db.write(abvs);
+
+        copyNewCounter++;
 
         // leavesStart.getArray()[i]
-        int currentOffset = 0;
         for (int i = 0; i < leavesKind.getSize(); ++i) {
             if (leavesKind.getArray()[i] == LEAF_PRE_DOCUMENT) {
-                out.write(leavesABVS.getByteArray(), currentOffset, leavesEnd.getArray()[i] - currentOffset);
+                flushLeaveNodesUpTo(out, i);
+
+                //                for (int x = currentOffset; x < leavesEnd.getArray()[i]; ++x) {
+                //                    System.err.println(i + "\t" + leavesKind.getArray()[i] + "\t" + leavesABVS.getByteArray()[x]);
+                //                }
 
                 int children = leavesChildrenCount.getArray()[i];
+                System.err.println("children " + children);
                 if (children > 0) {
                     sequenceSlotStub(abvs, children);
                 }
                 // Continue with nodes.
+                childrenLength = 0;
 
             } else if (leavesKind.getArray()[i] == LEAF_PRE_NODE) {
+                flushLeaveNodesUpTo(out, i - 1);
+
                 int nsCount = 0;
                 int attrCount = leavesAttributeCount.getArray()[i];
                 int childrenCount = leavesChildrenCount.getArray()[i];
@@ -401,37 +537,61 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
                 enb.setMvs(abvs);
                 enb.setTagHeader(nsCount, attrCount, childrenCount);
 
-                out.write(leavesABVS.getByteArray(), currentOffset, leavesEnd.getArray()[i] - currentOffset);
+                flushLeaveNodesUpTo(out, i);
 
                 if (attrCount > 0) {
                     sequenceSlotStub(abvs, attrCount);
+                    int attributeLength = 0;
                     for (int s = 0; s < attrCount; ++s) {
                         ++i;
-                        out.write(leavesABVS.getByteArray(), currentOffset, leavesEnd.getArray()[i] - currentOffset);
-                        updateSequenceSlot(abvs);
+                        attributeLength = leavesEnd.getArray()[i] - currentOffset;
+                        updateSequenceSlot(abvs, attributeLength);
                     }
+                    flushLeaveNodesUpTo(out, i);
                 }
 
                 if (childrenCount > 0) {
                     sequenceSlotStub(abvs, childrenCount);
                 }
                 // Continue with nodes.
+                startChildHunk(currentOffset);
 
             } else if (leavesKind.getArray()[i] == LEAF_POST_DOCUMENT) {
+                flushLeaveNodesUpTo(out, i - 1);
+
                 // no action
             } else if (leavesKind.getArray()[i] == LEAF_POST_NODE) {
+                flushLeaveNodesUpTo(out, i - 1);
+
                 updateSequenceSlot(abvs);
             } else {
-                out.write(leavesABVS.getByteArray(), currentOffset, leavesEnd.getArray()[i] - currentOffset);
-                updateSequenceSlot(abvs);
+                childrenLength = leavesEnd.getArray()[i] - currentOffset;
+                updateSequenceSlot(abvs, childrenLength);
             }
+        }
+        System.err.println("copyNewCounter: " + copyNewCounter);
+    }
+
+    private void flushLeaveNodesUpTo(DataOutput out, int i) throws IOException {
+        if (currentOffset != leavesEnd.getArray()[i]) {
+            out.write(leavesABVS.getByteArray(), currentOffset, leavesEnd.getArray()[i] - currentOffset);
             currentOffset = leavesEnd.getArray()[i];
+            copyNewCounter++;
         }
     }
 
+    int childrenLength = 0;
+    int childrenOffset = 0;
+
+    private void startChildHunk(int currentOffset) {
+        childrenLength = 0;
+        childrenOffset = currentOffset;
+    }
+
     private void sequenceSlotStub(ArrayBackedValueStorage abvs, int count) throws IOException {
         DataOutput out = abvs.getDataOutput();
         out.writeInt(count);
+        //        System.err.println("Slot count " + count);
         int offset = abvs.getLength();
         for (int s = 0; s < count; ++s) {
             out.writeInt(-1);
@@ -450,15 +610,51 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler {
         childSlotCounter++;
     }
 
-    private void updateSequenceSlot(ArrayBackedValueStorage abvs) {
+    private void updateSequenceSlot(ArrayBackedValueStorage abvs, int length) {
         //        for (int i = 0; i < childSlotCounter; ++i) {
         //            System.err.println("\t" + i + " " + childStartOffset.getArray()[i] + " - " + childSlotOffset.getArray()[i]);
         //        }
         childSlotCounter--;
-        int length = abvs.getLength() - childStartOffset.getArray()[childSlotCounter];
+        //        int length = abvs.getLength() - childStartOffset.getArray()[childSlotCounter];
         IntegerPointable.setInteger(abvs.getByteArray(), childSlotOffset.getArray()[childSlotCounter], length);
     }
 
+    private void updateSequenceSlot(ArrayBackedValueStorage abvs) {
+        int length = abvs.getLength() - childStartOffset.getArray()[childSlotCounter];
+        updateSequenceSlot(abvs, length);
+    }
+
+    private ElementNodeBuilder createENB() {
+        if (freeENBList.isEmpty()) {
+            return new ElementNodeBuilder();
+        }
+        return freeENBList.remove(freeENBList.size() - 1);
+    }
+
+    private void freeENB(ElementNodeBuilder enb) {
+        freeENBList.add(enb);
+    }
+
+    private ElementNodeBuilder peekENBStackTop() {
+        return enbStack.get(enbStack.size() - 1);
+    }
+
+    private void startChildInParent(AbstractNodeBuilder anb) throws IOException {
+        if (enbStack.isEmpty()) {
+            docb.startChild(anb);
+        } else {
+            peekENBStackTop().startChild(anb);
+        }
+    }
+
+    private void endChildInParent(AbstractNodeBuilder anb) throws IOException {
+        if (enbStack.isEmpty()) {
+            docb.endChild(anb);
+        } else {
+            peekENBStackTop().endChild(anb);
+        }
+    }
+
     private void leafNodeStart(int kind) {
         leavesKind.append(kind);
         //leavesStart.append(leavesABVS.getLength());