You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xalan.apache.org by jk...@apache.org on 2001/05/09 02:35:28 UTC

cvs commit: xml-xalan/java/src/org/apache/xml/dtm DTMDocumentImpl.java

jkesselm    01/05/08 17:35:28

  Modified:    java/src/org/apache/xml/dtm Tag: DTM_EXP
                        DTMDocumentImpl.java
  Log:
  Merge updated code, in progress. Some errors fixed (ands
  written where ors were intended), some concerns annotated,
  empty glue routines dropped in pending reconcilliation with
  the DTMBuilder stub. I've got a bit more to do in the way of
  fast-once-through sanity checking before attempting that.
  (This is _not_ a full code review pass. For now, I'm assuming
  that most of the routines work, or don't work, as documented.)
  
  Revision  Changes    Path
  No                   revision
  
  
  No                   revision
  
  
  1.1.2.6   +230 -11   xml-xalan/java/src/org/apache/xml/dtm/Attic/DTMDocumentImpl.java
  
  Index: DTMDocumentImpl.java
  ===================================================================
  RCS file: /home/cvs/xml-xalan/java/src/org/apache/xml/dtm/Attic/DTMDocumentImpl.java,v
  retrieving revision 1.1.2.5
  retrieving revision 1.1.2.6
  diff -u -r1.1.2.5 -r1.1.2.6
  --- DTMDocumentImpl.java	2001/05/08 16:45:19	1.1.2.5
  +++ DTMDocumentImpl.java	2001/05/09 00:35:28	1.1.2.6
  @@ -122,7 +122,7 @@
   
   	// nodes array: integer array blocks to hold the first level reference of the nodes,
   	// each reference slot is addressed by a nodeHandle index value.
  -	// Assumed indices are not larger than {@link NODEHANDLE_MASK}
  +	// Assumes indices are not larger than {@link NODEHANDLE_MASK}
           // ({@link DOCHANDLE_SHIFT} bits).
   	ChunkedIntArray nodes = new ChunkedIntArray(4);
   
  @@ -152,6 +152,11 @@
   
   	/**
   	 * Construct a DTM.
  +	 *
  +	 * %REVIEW% Do we really want to support a no-arguments constructor
  +	 * defaulting to document number 0? Or do we want to insist the
  +	 * document ID number always be supplied, and let the caller pass 0
  +	 * if that's really what they intend? The latter seems safer.
   	 */
   	public DTMDocumentImpl(){
   		initDocument(0);		 // clear nodes and document handle
  @@ -172,6 +177,7 @@
   	private final int appendNode(int w0, int w1, int w2, int w3)
   	{
   		// A decent compiler will probably inline this.
  +	        // %REVIEW% jjk Do we want to rely on "a decent JIT compiler"?
   		int slotnumber = nodes.appendSlot(w0, w1, w2, w3);
   
   		if (DEBUG) System.out.println(slotnumber+": "+w0+" "+w1+" "+w2+" "+w3);
  @@ -205,6 +211,8 @@
   
   	/**
   	 * Set a reference pointer to the element name symbol table.
  +	 * %REVIEW% Should this really be Public? Changing it while
  +	 * DTM is in use would be a disaster.
   	 *
   	 * @param poolRef DTMStringPool reference to an instance of table.
   	 */
  @@ -212,8 +220,19 @@
   		m_elementNames = poolRef;
   	}
   
  +        /**
  +	 * Get a reference pointer to the element name symbol table.
  +	 *
  +	 * @return DTMStringPool reference to an instance of table.
  +	 */
  +        public DTMStringPool getElementNameTable() {
  +                 return m_elementNames;
  +         }
  +
   	/**
   	 * Set a reference pointer to the namespace URI symbol table.
  +	 * %REVIEW% Should this really be Public? Changing it while
  +	 * DTM is in use would be a disaster.
   	 *
   	 * @param poolRef DTMStringPool reference to an instance of table.
   	 */
  @@ -221,8 +240,19 @@
   		m_nsNames = poolRef;
   	}
   
  +        /**
  +	 * Get a reference pointer to the namespace URI symbol table.
  +	 *
  +	 * @return DTMStringPool reference to an instance of table.
  +	 */
  +        public DTMStringPool getNsNameTable() {
  +                 return m_nsNames;
  +         }
  +
   	/**
   	 * Set a reference pointer to the attribute name symbol table.
  +	 * %REVIEW% Should this really be Public? Changing it while
  +	 * DTM is in use would be a disaster.
   	 *
   	 * @param poolRef DTMStringPool reference to an instance of table.
   	 */
  @@ -230,8 +260,19 @@
   		m_attributeNames = poolRef;
   	}
   
  +        /**
  +	 * Get a reference pointer to the attribute name symbol table.
  +	 *
  +	 * @return DTMStringPool reference to an instance of table.
  +	 */
  +        public DTMStringPool getAttributeNameTable() {
  +                 return m_attributeNames;
  +         }
  +
   	/**
   	 * Set a reference pointer to the prefix name symbol table.
  +	 * %REVIEW% Should this really be Public? Changing it while
  +	 * DTM is in use would be a disaster.
   	 *
   	 * @param poolRef DTMStringPool reference to an instance of table.
   	 */
  @@ -240,6 +281,15 @@
   	}
   
   	/**
  +	 * Get a reference pointer to the prefix name symbol table.
  +	 *
  +	 * @return DTMStringPool reference to an instance of table.
  +	 */
  +	public DTMStringPool getPrefixNameTable() {
  +		return m_prefixNames;
  +	}
  +
  +	/**
   	 * Set a reference pointer to the expanded name symbol table.
   	 *
   	 * @param poolRef DTMStringPool reference to an instance of table.
  @@ -250,13 +300,45 @@
   	  //		m_expandedNames = poolRef;
   	  //	}
   
  +
  +         /**
  +          * Set a reference pointer to the content-text repository
  +          *
  +          * @param bufferRef FastStringBuffer reference to an instance of
  +          * buffer
  +          */
  +         void setContentBuffer(FastStringBuffer buffer) {
  +                 m_char = buffer;
  +         }
  + 
  +         /**
  +          * Get a reference pointer to the content-text repository
  +          *
  +          * @return FastStringBuffer reference to an instance of buffer
  +          */
  +         void getContentBuffer() {
  +                 return m_char;
  +         }
  +
  +
  +
  +
  +
   	// ========= Document Handler Functions =========
  +        // %TBD% jjk -- DocumentHandler is SAX Level 1, and should
  +        // be phased out in favor of ContentHandler/LexicalHandler
   
   	/**
   	 * Receive notification of the beginning of a dtm document.
   	 *
   	 * The DTMManager will invoke this method when the dtm is created.
   	 *
  +	 * %REVIEW% Given the way getDocument() is currently coded,
  +	 * the docHandle parameter is apparently supposed to be the
  +	 * document number pre-shifted up into the high bits. Do we
  +	 * really want to require that, or should we accept the
  +	 * document number instead and shift it for them?
  +	 *
   	 * @param docHandle int the handle for the DTM document.
   	 */
   	final void initDocument(int docHandle)
  @@ -282,7 +364,7 @@
   	{
   		done = true;
   		// %TBD% may need to notice the last slot number and slot count to avoid
  -		// residule data from provious use of this DTM
  +		// residual data from provious use of this DTM
   	}
   
   	/**
  @@ -536,6 +618,10 @@
   	 * The node created will be chained according to its natural order of request
   	 * received.  %TBD% It can be rechained later via the optional DTM writable interface.
   	 *
  +	 * %REVIEW% for text normalization issues, unless we are willing to
  +	 * insist that all adjacent text must be merged before this method
  +	 * is called.
  +	 *
   	 * @param ch The characters from the XML document.
   	 * @param start The start position in the array.
   	 * @param length The number of characters to read from the array.
  @@ -667,7 +753,7 @@
   				nodes.readSlot(kid, gotslot);
   			}
   			// If parent slot matches given parent, return kid
  -			if (gotslot[1] == nodeHandle)	return kid & m_docHandle;
  +			if (gotslot[1] == nodeHandle)	return kid | m_docHandle;
   		}
   		// No child found
   		return NULL;
  @@ -689,15 +775,15 @@
   		int lastChild = NULL;
   		for (int nextkid = getFirstChild(nodeHandle); nextkid != NULL;
   				nextkid = getNextSibling(nextkid)) {
  -			lastChild = nextkid & m_docHandle;
  +			lastChild = nextkid;
   		}
  -		return lastChild;
  +		return lastChild | m_docHandle;		
   	}
   
   	/**
   	 * Retrieves an attribute node by by qualified name and namespace URI.
   	 *
  -	 * @param nodeHandle int Handle of the node.
  +	 * @param nodeHandle int Handle of the node upon which to look up this attribute.
   	 * @param namespaceURI The namespace URI of the attribute to
   	 *   retrieve, or null.
   	 * @param name The local name of the attribute to
  @@ -718,7 +804,7 @@
   		// Iterate through Attribute Nodes
   		while (type == ATTRIBUTE_NODE) {
   			if ((nsIndex == (gotslot[0] << 16)) && (gotslot[3] == nameIndex))
  -				return nodeHandle & m_docHandle;
  +				return nodeHandle | m_docHandle;
   			// Goto next sibling
   			nodeHandle = gotslot[2];
   			nodes.readSlot(nodeHandle, gotslot);
  @@ -734,13 +820,19 @@
   	 */
   	public int getFirstAttribute(int nodeHandle) {
   		nodeHandle &= NODEHANDLE_MASK;
  +
  +		// %REVIEW% jjk: Just a quick observation: If you're going to
  +		// call readEntry repeatedly on the same node, it may be
  +		// more efficiently to do a readSlot to get the data locally,
  +		// reducing the addressing and call-and-return overhead.
  +
   		// Should we check if handle is element (do we want sanity checks?)
   		if (ELEMENT_NODE != (nodes.readEntry(nodeHandle, 0) & 0xFFFF))
   			return NULL;
   		// First Attribute (if any) should be at next position in table
   		nodeHandle++;
   		return(ATTRIBUTE_NODE == (nodes.readEntry(nodeHandle, 0) & 0xFFFF)) ? 
  -		nodeHandle & m_docHandle : NULL;
  +		nodeHandle | m_docHandle : NULL;
   	}
   
   	/**
  @@ -787,6 +879,9 @@
   		}
   		// Next Sibling is in the next position if it shares the same parent
   		int thisParent = nodes.readEntry(nodeHandle, 1);
  +		
  +		// %REVIEW% jjk: Old code was reading from nodehandle+1.
  +		// That would be ++nodeHandle, not nodeHandle++. Check this!
   		if (nodes.readEntry(nodeHandle++, 1) == thisParent)
   			return (m_docHandle | nodeHandle);
   
  @@ -812,9 +907,9 @@
   		int kid = NULL;
   		for (int nextkid = getFirstChild(parent); nextkid != nodeHandle;
   				nextkid = getNextSibling(nextkid)) {
  -			kid = (m_docHandle | nextkid);
  +			kid = nextkid;
   		}
  -		return kid;
  +		return kid | m_docHandle;
   	}
   
   	/**
  @@ -829,7 +924,13 @@
   	public int getNextAttribute(int nodeHandle) {
   		nodeHandle &= NODEHANDLE_MASK;
   		nodes.readSlot(nodeHandle, gotslot);
  +
  +		//%REVIEW% Why are we using short here? There's no storage
  +		//reduction for an automatic variable, especially one used
  +		//so briefly, and it typically costs more cycles to process
  +		//than an int would.
   		short type = (short) (gotslot[0] & 0xFFFF);
  +
   		if (type == ELEMENT_NODE) {
   			return getFirstAttribute(nodeHandle);
   		} else if (type == ATTRIBUTE_NODE) {
  @@ -842,6 +943,10 @@
   	/**
   	 * Given a namespace handle, advance to the next namespace.
   	 *
  +	 * %TBD% THIS METHOD DOES NOT MATCH THE CURRENT SIGNATURE IN
  +	 * THE DTM INTERFACE.  FIX IT, OR JUSTIFY CHANGING THE DTM
  +	 * API.
  +	 *
   	 * @param namespaceHandle handle to node which must be of type NAMESPACE_NODE.
   	 * @return handle of next namespace, or DTM.NULL to indicate none exists.
   	 */
  @@ -863,7 +968,7 @@
   	public int getNextDescendant(int subtreeRootHandle, int nodeHandle) {
   		subtreeRootHandle &= NODEHANDLE_MASK;
   		nodeHandle &= NODEHANDLE_MASK;
  -		// Document root - no next-sib
  +		// Document root [Document Node? -- jjk] - no next-sib
   		if (nodeHandle == 0)
   			return NULL;
   		while (!m_isError) {
  @@ -1549,6 +1654,9 @@
   	throws org.xml.sax.SAXException {}
   
   	// ==== Construction methods (may not be supported by some implementations!) =====
  +	// %REVIEW% jjk: These probably aren't the right API. At the very least
  +	// they need to deal with current-insertion-location and end-element
  +	// issues.
   
   	/**
   	 * Append a child to the end of the child list of the current node. Please note that the node
  @@ -1584,4 +1692,115 @@
   		// ###shs Think more about how this differs from createTextNode
   		createTextNode(str);
   	}
  +
  +
  +  // ==== BUILDER methods ====
  +  // %TBD% jjk: These are API sketches based on the assumption that the SAX
  +  // ContentHandler adapter code lives in the DTMBuilder object and
  +  // invokes these to actually construct the DTM nodes. An alternative
  +  // would be to move that code directly into this class and have those
  +  // methods construct the DTM directly. NOTE that it is assumed that the
  +  // Builder code and the DTM instance have already negotiated to share the
  +  // string pools/buffers, and that the Builder will accept full responsibility
  +  // for populating those -- including normalizing across consecutive blocks
  +  // of characters().
  +
  +  /** Append a text child at the current insertion point. Assumes that the
  +   * actual content of the text has previously been appended to the m_char
  +   * buffer (shared with the builder).
  +   *
  +   * @param contentStart int Starting offset of node's content in m_char.
  +   * @param contentLength int Length of node's content in m_char.
  +   * */
  +  void appendTextChild(int contentStart,int contentLength)
  +  {
  +    // %TBD%
  +  }
  +  
  +  /** Append a comment child at the current insertion point. Assumes that the
  +   * actual content of the comment has previously been appended to the m_char
  +   * buffer (shared with the builder).
  +   *
  +   * @param contentStart int Starting offset of node's content in m_char.
  +   * @param contentLength int Length of node's content in m_char.
  +   * */
  +  void appendComment(int contentStart,int contentLength)
  +  {
  +    // %TBD%
  +  }
  +  
  +  
  +  /** Append an Element child at the current insertion point. This
  +   * Element then _becomes_ the insertion point; subsequent appends
  +   * become its lastChild until an appendEndElement() call is made.
  +   * 
  +   * Assumes that the symbols (local name, namespace URI and prefix)
  +   * have already been added to the pools
  +   *
  +   * @param namespaceIndex: Index within the namespaceURI string pool
  +   * @param localNameIndex Index within the local name string pool
  +   * @param prefixIndex: Index within the prefix string pool
  +   * */
  +  void startElement(int namespaceIndex,int localNameIndex, int prefixIndex)
  +  {
  +    // %TBD%
  +  }
  +  
  +  /** Append a Namespace Declaration child at the current insertion point.
  +   * Assumes that the symbols (namespace URI and prefix) have already been
  +   * added to the pools
  +   *
  +   * @param prefixIndex: Index within the prefix string pool
  +   * @param namespaceIndex: Index within the namespaceURI string pool
  +   * @param isID: If someone really insists on writing a bad DTD, it is
  +   * theoretically possible for a namespace declaration to also be declared
  +   * as being a node ID. I don't really want to support that stupidity,
  +   * but I'm not sure we can refuse to accept it.
  +   * */
  +  void appendNSDeclaration(int prefixIndex, int namespaceIndex,
  +                           boolean isID)
  +  {
  +    // %TBD%
  +  }
  +
  +  /** Append a Namespace Declaration child at the current insertion
  +   * point.  Assumes that the symbols (namespace URI, local name, and
  +   * prefix) have already been added to the pools, and that the content has
  +   * already been appended to m_char. Note that the attribute's content has
  +   * been flattened into a single string; DTM does _NOT_ attempt to model
  +   * the details of entity references within attribute values.
  +   *
  +   * @param namespaceIndex int Index within the namespaceURI string pool
  +   * @param localNameIndex int Index within the local name string pool
  +   * @param prefixIndex int Index within the prefix string pool
  +   * @param isID boolean True if this attribute was declared as an ID
  +   * (for use in supporting getElementByID).
  +   * @param contentStart int Starting offset of node's content in m_char.
  +   * @param contentLength int Length of node's content in m_char.
  +   * */
  +  void appendAttribute(int namespaceIndex, int localNameIndex, int prefixIndex,
  +                       boolean isID,
  +                       int contentStart, int contentLength)
  +  {
  +    // %TBD%
  +  }
  +  
  +
  +
  +  /** Terminate the element currently acting as an insertion point. Subsequent
  +   * insertions will occur as the last child of this element's parent.
  +   * */
  +  void appendEndElement()
  +  {
  +    // %TBD%
  +  }
  +  
  +  /**  All appends to this document have finished; do whatever final
  +   * cleanup is needed. I expect this will actually be a no-op.
  +   * */
  +  void appendEndDocument()
  +  {
  +    // %TBD%
  +  }
  +
   }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: xalan-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xalan-cvs-help@xml.apache.org