You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xalan.apache.org by jk...@apache.org on 2001/05/09 02:35:28 UTC
cvs commit: xml-xalan/java/src/org/apache/xml/dtm DTMDocumentImpl.java
jkesselm 01/05/08 17:35:28
Modified: java/src/org/apache/xml/dtm Tag: DTM_EXP
DTMDocumentImpl.java
Log:
Merge updated code, in progress. Some errors fixed (ands
written where ors were intended), some concerns annotated,
empty glue routines dropped in pending reconcilliation with
the DTMBuilder stub. I've got a bit more to do in the way of
fast-once-through sanity checking before attempting that.
(This is _not_ a full code review pass. For now, I'm assuming
that most of the routines work, or don't work, as documented.)
Revision Changes Path
No revision
No revision
1.1.2.6 +230 -11 xml-xalan/java/src/org/apache/xml/dtm/Attic/DTMDocumentImpl.java
Index: DTMDocumentImpl.java
===================================================================
RCS file: /home/cvs/xml-xalan/java/src/org/apache/xml/dtm/Attic/DTMDocumentImpl.java,v
retrieving revision 1.1.2.5
retrieving revision 1.1.2.6
diff -u -r1.1.2.5 -r1.1.2.6
--- DTMDocumentImpl.java 2001/05/08 16:45:19 1.1.2.5
+++ DTMDocumentImpl.java 2001/05/09 00:35:28 1.1.2.6
@@ -122,7 +122,7 @@
// nodes array: integer array blocks to hold the first level reference of the nodes,
// each reference slot is addressed by a nodeHandle index value.
- // Assumed indices are not larger than {@link NODEHANDLE_MASK}
+ // Assumes indices are not larger than {@link NODEHANDLE_MASK}
// ({@link DOCHANDLE_SHIFT} bits).
ChunkedIntArray nodes = new ChunkedIntArray(4);
@@ -152,6 +152,11 @@
/**
* Construct a DTM.
+ *
+ * %REVIEW% Do we really want to support a no-arguments constructor
+ * defaulting to document number 0? Or do we want to insist the
+ * document ID number always be supplied, and let the caller pass 0
+ * if that's really what they intend? The latter seems safer.
*/
public DTMDocumentImpl(){
initDocument(0); // clear nodes and document handle
@@ -172,6 +177,7 @@
private final int appendNode(int w0, int w1, int w2, int w3)
{
// A decent compiler will probably inline this.
+ // %REVIEW% jjk Do we want to rely on "a decent JIT compiler"?
int slotnumber = nodes.appendSlot(w0, w1, w2, w3);
if (DEBUG) System.out.println(slotnumber+": "+w0+" "+w1+" "+w2+" "+w3);
@@ -205,6 +211,8 @@
/**
* Set a reference pointer to the element name symbol table.
+ * %REVIEW% Should this really be Public? Changing it while
+ * DTM is in use would be a disaster.
*
* @param poolRef DTMStringPool reference to an instance of table.
*/
@@ -212,8 +220,19 @@
m_elementNames = poolRef;
}
+ /**
+ * Get a reference pointer to the element name symbol table.
+ *
+ * @return DTMStringPool reference to an instance of table.
+ */
+ public DTMStringPool getElementNameTable() {
+ return m_elementNames;
+ }
+
/**
* Set a reference pointer to the namespace URI symbol table.
+ * %REVIEW% Should this really be Public? Changing it while
+ * DTM is in use would be a disaster.
*
* @param poolRef DTMStringPool reference to an instance of table.
*/
@@ -221,8 +240,19 @@
m_nsNames = poolRef;
}
+ /**
+ * Get a reference pointer to the namespace URI symbol table.
+ *
+ * @return DTMStringPool reference to an instance of table.
+ */
+ public DTMStringPool getNsNameTable() {
+ return m_nsNames;
+ }
+
/**
* Set a reference pointer to the attribute name symbol table.
+ * %REVIEW% Should this really be Public? Changing it while
+ * DTM is in use would be a disaster.
*
* @param poolRef DTMStringPool reference to an instance of table.
*/
@@ -230,8 +260,19 @@
m_attributeNames = poolRef;
}
+ /**
+ * Get a reference pointer to the attribute name symbol table.
+ *
+ * @return DTMStringPool reference to an instance of table.
+ */
+ public DTMStringPool getAttributeNameTable() {
+ return m_attributeNames;
+ }
+
/**
* Set a reference pointer to the prefix name symbol table.
+ * %REVIEW% Should this really be Public? Changing it while
+ * DTM is in use would be a disaster.
*
* @param poolRef DTMStringPool reference to an instance of table.
*/
@@ -240,6 +281,15 @@
}
/**
+ * Get a reference pointer to the prefix name symbol table.
+ *
+ * @return DTMStringPool reference to an instance of table.
+ */
+ public DTMStringPool getPrefixNameTable() {
+ return m_prefixNames;
+ }
+
+ /**
* Set a reference pointer to the expanded name symbol table.
*
* @param poolRef DTMStringPool reference to an instance of table.
@@ -250,13 +300,45 @@
// m_expandedNames = poolRef;
// }
+
+ /**
+ * Set a reference pointer to the content-text repository
+ *
+ * @param bufferRef FastStringBuffer reference to an instance of
+ * buffer
+ */
+ void setContentBuffer(FastStringBuffer buffer) {
+ m_char = buffer;
+ }
+
+ /**
+ * Get a reference pointer to the content-text repository
+ *
+ * @return FastStringBuffer reference to an instance of buffer
+ */
+ void getContentBuffer() {
+ return m_char;
+ }
+
+
+
+
+
// ========= Document Handler Functions =========
+ // %TBD% jjk -- DocumentHandler is SAX Level 1, and should
+ // be phased out in favor of ContentHandler/LexicalHandler
/**
* Receive notification of the beginning of a dtm document.
*
* The DTMManager will invoke this method when the dtm is created.
*
+ * %REVIEW% Given the way getDocument() is currently coded,
+ * the docHandle parameter is apparently supposed to be the
+ * document number pre-shifted up into the high bits. Do we
+ * really want to require that, or should we accept the
+ * document number instead and shift it for them?
+ *
* @param docHandle int the handle for the DTM document.
*/
final void initDocument(int docHandle)
@@ -282,7 +364,7 @@
{
done = true;
// %TBD% may need to notice the last slot number and slot count to avoid
- // residule data from provious use of this DTM
+ // residual data from provious use of this DTM
}
/**
@@ -536,6 +618,10 @@
* The node created will be chained according to its natural order of request
* received. %TBD% It can be rechained later via the optional DTM writable interface.
*
+ * %REVIEW% for text normalization issues, unless we are willing to
+ * insist that all adjacent text must be merged before this method
+ * is called.
+ *
* @param ch The characters from the XML document.
* @param start The start position in the array.
* @param length The number of characters to read from the array.
@@ -667,7 +753,7 @@
nodes.readSlot(kid, gotslot);
}
// If parent slot matches given parent, return kid
- if (gotslot[1] == nodeHandle) return kid & m_docHandle;
+ if (gotslot[1] == nodeHandle) return kid | m_docHandle;
}
// No child found
return NULL;
@@ -689,15 +775,15 @@
int lastChild = NULL;
for (int nextkid = getFirstChild(nodeHandle); nextkid != NULL;
nextkid = getNextSibling(nextkid)) {
- lastChild = nextkid & m_docHandle;
+ lastChild = nextkid;
}
- return lastChild;
+ return lastChild | m_docHandle;
}
/**
* Retrieves an attribute node by by qualified name and namespace URI.
*
- * @param nodeHandle int Handle of the node.
+ * @param nodeHandle int Handle of the node upon which to look up this attribute.
* @param namespaceURI The namespace URI of the attribute to
* retrieve, or null.
* @param name The local name of the attribute to
@@ -718,7 +804,7 @@
// Iterate through Attribute Nodes
while (type == ATTRIBUTE_NODE) {
if ((nsIndex == (gotslot[0] << 16)) && (gotslot[3] == nameIndex))
- return nodeHandle & m_docHandle;
+ return nodeHandle | m_docHandle;
// Goto next sibling
nodeHandle = gotslot[2];
nodes.readSlot(nodeHandle, gotslot);
@@ -734,13 +820,19 @@
*/
public int getFirstAttribute(int nodeHandle) {
nodeHandle &= NODEHANDLE_MASK;
+
+ // %REVIEW% jjk: Just a quick observation: If you're going to
+ // call readEntry repeatedly on the same node, it may be
+ // more efficiently to do a readSlot to get the data locally,
+ // reducing the addressing and call-and-return overhead.
+
// Should we check if handle is element (do we want sanity checks?)
if (ELEMENT_NODE != (nodes.readEntry(nodeHandle, 0) & 0xFFFF))
return NULL;
// First Attribute (if any) should be at next position in table
nodeHandle++;
return(ATTRIBUTE_NODE == (nodes.readEntry(nodeHandle, 0) & 0xFFFF)) ?
- nodeHandle & m_docHandle : NULL;
+ nodeHandle | m_docHandle : NULL;
}
/**
@@ -787,6 +879,9 @@
}
// Next Sibling is in the next position if it shares the same parent
int thisParent = nodes.readEntry(nodeHandle, 1);
+
+ // %REVIEW% jjk: Old code was reading from nodehandle+1.
+ // That would be ++nodeHandle, not nodeHandle++. Check this!
if (nodes.readEntry(nodeHandle++, 1) == thisParent)
return (m_docHandle | nodeHandle);
@@ -812,9 +907,9 @@
int kid = NULL;
for (int nextkid = getFirstChild(parent); nextkid != nodeHandle;
nextkid = getNextSibling(nextkid)) {
- kid = (m_docHandle | nextkid);
+ kid = nextkid;
}
- return kid;
+ return kid | m_docHandle;
}
/**
@@ -829,7 +924,13 @@
public int getNextAttribute(int nodeHandle) {
nodeHandle &= NODEHANDLE_MASK;
nodes.readSlot(nodeHandle, gotslot);
+
+ //%REVIEW% Why are we using short here? There's no storage
+ //reduction for an automatic variable, especially one used
+ //so briefly, and it typically costs more cycles to process
+ //than an int would.
short type = (short) (gotslot[0] & 0xFFFF);
+
if (type == ELEMENT_NODE) {
return getFirstAttribute(nodeHandle);
} else if (type == ATTRIBUTE_NODE) {
@@ -842,6 +943,10 @@
/**
* Given a namespace handle, advance to the next namespace.
*
+ * %TBD% THIS METHOD DOES NOT MATCH THE CURRENT SIGNATURE IN
+ * THE DTM INTERFACE. FIX IT, OR JUSTIFY CHANGING THE DTM
+ * API.
+ *
* @param namespaceHandle handle to node which must be of type NAMESPACE_NODE.
* @return handle of next namespace, or DTM.NULL to indicate none exists.
*/
@@ -863,7 +968,7 @@
public int getNextDescendant(int subtreeRootHandle, int nodeHandle) {
subtreeRootHandle &= NODEHANDLE_MASK;
nodeHandle &= NODEHANDLE_MASK;
- // Document root - no next-sib
+ // Document root [Document Node? -- jjk] - no next-sib
if (nodeHandle == 0)
return NULL;
while (!m_isError) {
@@ -1549,6 +1654,9 @@
throws org.xml.sax.SAXException {}
// ==== Construction methods (may not be supported by some implementations!) =====
+ // %REVIEW% jjk: These probably aren't the right API. At the very least
+ // they need to deal with current-insertion-location and end-element
+ // issues.
/**
* Append a child to the end of the child list of the current node. Please note that the node
@@ -1584,4 +1692,115 @@
// ###shs Think more about how this differs from createTextNode
createTextNode(str);
}
+
+
+ // ==== BUILDER methods ====
+ // %TBD% jjk: These are API sketches based on the assumption that the SAX
+ // ContentHandler adapter code lives in the DTMBuilder object and
+ // invokes these to actually construct the DTM nodes. An alternative
+ // would be to move that code directly into this class and have those
+ // methods construct the DTM directly. NOTE that it is assumed that the
+ // Builder code and the DTM instance have already negotiated to share the
+ // string pools/buffers, and that the Builder will accept full responsibility
+ // for populating those -- including normalizing across consecutive blocks
+ // of characters().
+
+ /** Append a text child at the current insertion point. Assumes that the
+ * actual content of the text has previously been appended to the m_char
+ * buffer (shared with the builder).
+ *
+ * @param contentStart int Starting offset of node's content in m_char.
+ * @param contentLength int Length of node's content in m_char.
+ * */
+ void appendTextChild(int contentStart,int contentLength)
+ {
+ // %TBD%
+ }
+
+ /** Append a comment child at the current insertion point. Assumes that the
+ * actual content of the comment has previously been appended to the m_char
+ * buffer (shared with the builder).
+ *
+ * @param contentStart int Starting offset of node's content in m_char.
+ * @param contentLength int Length of node's content in m_char.
+ * */
+ void appendComment(int contentStart,int contentLength)
+ {
+ // %TBD%
+ }
+
+
+ /** Append an Element child at the current insertion point. This
+ * Element then _becomes_ the insertion point; subsequent appends
+ * become its lastChild until an appendEndElement() call is made.
+ *
+ * Assumes that the symbols (local name, namespace URI and prefix)
+ * have already been added to the pools
+ *
+ * @param namespaceIndex: Index within the namespaceURI string pool
+ * @param localNameIndex Index within the local name string pool
+ * @param prefixIndex: Index within the prefix string pool
+ * */
+ void startElement(int namespaceIndex,int localNameIndex, int prefixIndex)
+ {
+ // %TBD%
+ }
+
+ /** Append a Namespace Declaration child at the current insertion point.
+ * Assumes that the symbols (namespace URI and prefix) have already been
+ * added to the pools
+ *
+ * @param prefixIndex: Index within the prefix string pool
+ * @param namespaceIndex: Index within the namespaceURI string pool
+ * @param isID: If someone really insists on writing a bad DTD, it is
+ * theoretically possible for a namespace declaration to also be declared
+ * as being a node ID. I don't really want to support that stupidity,
+ * but I'm not sure we can refuse to accept it.
+ * */
+ void appendNSDeclaration(int prefixIndex, int namespaceIndex,
+ boolean isID)
+ {
+ // %TBD%
+ }
+
+ /** Append a Namespace Declaration child at the current insertion
+ * point. Assumes that the symbols (namespace URI, local name, and
+ * prefix) have already been added to the pools, and that the content has
+ * already been appended to m_char. Note that the attribute's content has
+ * been flattened into a single string; DTM does _NOT_ attempt to model
+ * the details of entity references within attribute values.
+ *
+ * @param namespaceIndex int Index within the namespaceURI string pool
+ * @param localNameIndex int Index within the local name string pool
+ * @param prefixIndex int Index within the prefix string pool
+ * @param isID boolean True if this attribute was declared as an ID
+ * (for use in supporting getElementByID).
+ * @param contentStart int Starting offset of node's content in m_char.
+ * @param contentLength int Length of node's content in m_char.
+ * */
+ void appendAttribute(int namespaceIndex, int localNameIndex, int prefixIndex,
+ boolean isID,
+ int contentStart, int contentLength)
+ {
+ // %TBD%
+ }
+
+
+
+ /** Terminate the element currently acting as an insertion point. Subsequent
+ * insertions will occur as the last child of this element's parent.
+ * */
+ void appendEndElement()
+ {
+ // %TBD%
+ }
+
+ /** All appends to this document have finished; do whatever final
+ * cleanup is needed. I expect this will actually be a no-op.
+ * */
+ void appendEndDocument()
+ {
+ // %TBD%
+ }
+
}
---------------------------------------------------------------------
To unsubscribe, e-mail: xalan-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xalan-cvs-help@xml.apache.org