You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by sk...@apache.org on 2004/04/17 12:03:07 UTC
cvs commit: jakarta-commons/digester/src/examples/api/document-markup Main.java MarkupDigester.java SetTextSegmentRule.java TextSegmentHandler.java build.xml readme.txt

skitching    2004/04/17 03:03:07

  Added:       digester/src/examples/api/document-markup Main.java
                        MarkupDigester.java SetTextSegmentRule.java
                        TextSegmentHandler.java build.xml readme.txt
  Log:
  Example showing how to parse "document-markup" style xml
  (aka "mixed content").
  
  Revision  Changes    Path
  1.1                  jakarta-commons/digester/src/examples/api/document-markup/Main.java
  
  Index: Main.java
  ===================================================================
  /*
   * Copyright 2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */ 
  
  /** 
   * A simple "test harness" which demonstrates how the MarkupDigester class
   * (plus the supporting interface/rule classes) can process "document-markup"
   * style xml data.
   * <p>
   * See the readme file included with this example for more information.
   */
   
  public class Main {
      /** The input xml to be parsed by this example. */
      String in = "<p>Hi, this is an <i>example</i> of some <b>bold</b> text.</p>";
     
      /** Invoked when a text segment is present in the parsed input. */
      public void addSegment(String text) {
          System.out.println("Text segment: [" + text + "]");
      }
      
      /** Invoked when an &lt;i&gt; node is found in the parsed input. */
      public void addItalic(String text) {
          System.out.println("Italic: [" + text + "]");
      }
      
      /** Invoked when an &lt;b&gt; node is found in the parsed input. */
      public void addBold(String text) {
          System.out.println("Bold: [" + text + "]");
      }
      
      /** 
       * Invoked via a standard Digester CallMethodRule, passing the 
       * "body text" of the top-level xml element. This demonstrates
       * the default behaviour of Digester (which is not suitable for
       * processing markup-style xml). 
       */
      public void addAllText(String text) {
          System.out.println(
              "And the merged text for the p element is [" + text + "]");
      }
      
      /**
       * Main method of this test harness. Set up some digester rules,
       * then parse the input xml contained in the "in" member variable.
       * The rules cause methods on this object to be invoked, which just
       * dump information to standard output, to show the callbacks that
       * a real program could arrange to get when parsing markup input.
       */
      public void run() throws Exception {
          System.out.println("Started.");        
          MarkupDigester d = new MarkupDigester();
      
          d.push(this);
          
          SetTextSegmentRule r = new SetTextSegmentRule("addSegment");
          d.addRule("p", r);
          d.addCallMethod("p", "addAllText", 0);
  
          d.addCallMethod("p/i", "addItalic", 0);
          d.addCallMethod("p/b", "addBold", 0);
          
          d.parse(new java.io.StringReader(in));
          
  
          System.out.println("Finished.");        
      }
  
      /** See the run method. */    
      public static void main(String[] args) throws Exception {
          new Main().run();
      }
  }
  
  
  
  1.1                  jakarta-commons/digester/src/examples/api/document-markup/MarkupDigester.java
  
  Index: MarkupDigester.java
  ===================================================================
  /*
   * Copyright 2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */ 
  
  import org.apache.commons.digester.Digester;
  import org.apache.commons.digester.Rule;
  
  import java.util.List;
  import javax.xml.parsers.SAXParser;
  import org.xml.sax.XMLReader;
  import org.xml.sax.SAXException;
  import org.xml.sax.Attributes;
  
  /**
   * This is a subclass of digester which supports rules which implement
   * the TextSegmentHandler interface, causing the "textSegment" method
   * on each matching rule (of the appropriate type) to be invoked when
   * an element contains a segment of text followed by a child element.
   * <p>
   * See the readme file included with this example for more information.
   */
   
  public class MarkupDigester extends Digester {
  
      /** See equivalent constructor in Digester class. */
      public MarkupDigester() {
      }
  
      /** See equivalent constructor in Digester class. */
      public MarkupDigester(SAXParser parser) {
          super(parser);
      }
  
      /** See equivalent constructor in Digester class. */
      public MarkupDigester(XMLReader reader) {
          super(reader);
      }
  
      //===================================================================
  
      /**
       * The text found in the current element since the last child element.
       */
      protected StringBuffer currTextSegment = new StringBuffer();
  
      /**
       * Process notification of character data received from the body of
       * an XML element.
       *
       * @param buffer The characters from the XML document
       * @param start Starting offset into the buffer
       * @param length Number of characters from the buffer
       *
       * @exception SAXException if a parsing error is to be reported
       */
      public void characters(char buffer[], int start, int length)
              throws SAXException {
  
          super.characters(buffer, start, length);
          currTextSegment.append(buffer, start, length);
      }
  
      /**
       * Process notification of the start of an XML element being reached.
       *
       * @param namespaceURI The Namespace URI, or the empty string if the element
       *   has no Namespace URI or if Namespace processing is not being performed.
       * @param localName The local name (without prefix), or the empty
       *   string if Namespace processing is not being performed.
       * @param qName The qualified name (with prefix), or the empty
       *   string if qualified names are not available.
       * @param list The attributes attached to the element. If there are
       *   no attributes, it shall be an empty Attributes object. 
       * @exception SAXException if a parsing error is to be reported
       */
   
      public void startElement(String namespaceURI, String localName,
                               String qName, Attributes list)
              throws SAXException {
  
          handleTextSegments();
  
          // Unlike bodyText, which accumulates despite intervening child
          // elements, currTextSegment gets cleared here. This means that
          // we don't need to save it on a stack either.
          currTextSegment.setLength(0);
  
          super.startElement(namespaceURI, localName, qName, list);
      }
  
      /**
       * Process notification of the end of an XML element being reached.
       *
       * @param namespaceURI - The Namespace URI, or the empty string if the
       *   element has no Namespace URI or if Namespace processing is not
       *   being performed.
       * @param localName - The local name (without prefix), or the empty
       *   string if Namespace processing is not being performed.
       * @param qName - The qualified XML 1.0 name (with prefix), or the
       *   empty string if qualified names are not available.
       * @exception SAXException if a parsing error is to be reported
       */
      public void endElement(String namespaceURI, String localName,
                             String qName) throws SAXException {
   
          handleTextSegments();
          currTextSegment.setLength(0);
          super.endElement(namespaceURI, localName, qName);
       }
  
      /**
       * Iterate over the list of rules most recently matched, and
       * if any of them implement the TextSegmentHandler interface then
       * invoke that rule's textSegment method passing the current
       * segment of text from the xml element body.
       */
      private void handleTextSegments() throws SAXException {    
          if (currTextSegment.length() > 0) {
              String segment = currTextSegment.toString();
              List parentMatches = (List) matches.peek();
              int len = parentMatches.size();
              for(int i=0; i<len; ++i) {
                  Rule r = (Rule) parentMatches.get(i);
                  if (r instanceof TextSegmentHandler) {
                      TextSegmentHandler h = (TextSegmentHandler) r;
                      try {
                          h.textSegment(segment);
                      } catch(Exception e) {
                          throw createSAXException(e);
                      }
                  }
              }
          }
      }
  }
  
  
  
  1.1                  jakarta-commons/digester/src/examples/api/document-markup/SetTextSegmentRule.java
  
  Index: SetTextSegmentRule.java
  ===================================================================
  /*
   * Copyright 2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */ 
  
  import org.apache.commons.beanutils.MethodUtils;
  import org.apache.commons.digester.Rule;
  
  /**
   * When a text segment is discovered, it calls a specific method on the top
   * object on the stack.
   */
  
  public class SetTextSegmentRule extends Rule implements TextSegmentHandler {
  
  
      // ----------------------------------------------------------- Constructors
  
      public SetTextSegmentRule(String methodName) {
          this.methodName = methodName;
      }
  
      // ----------------------------------------------------- Instance Variables
  
  
      /**
       * The method name to call on the parent object.
       */
      protected String methodName = null;
  
      // --------------------------------------------------------- Public Methods
  
      /**
       * Process the end of this element.
       */
      public void textSegment(String text) throws Exception {
  
          Object target = digester.peek(0);
  
          // Call the specified method
          Class paramTypes[] = new Class[] {String.class};
          MethodUtils.invokeMethod(target, methodName,
              new Object[]{ text }, paramTypes);
      }
  }
  
  
  
  1.1                  jakarta-commons/digester/src/examples/api/document-markup/TextSegmentHandler.java
  
  Index: TextSegmentHandler.java
  ===================================================================
  /*
   * Copyright 2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */ 
  
  /**
   * Public interface for any Rule subclass which is interested in handling
   * text segments as well as the complete body text.
   */
  
  public interface TextSegmentHandler {
      public void textSegment(String text) throws Exception;
  }
  
  
  
  1.1                  jakarta-commons/digester/src/examples/api/document-markup/build.xml
  
  Index: build.xml
  ===================================================================
  <project name="Example-Markup" default="compile" basedir=".">
  
  
  <!-- ========== Initialize Properties ===================================== -->
  
  
    <property file="build.properties"/>                <!-- Component local   -->
    <property file="../build.properties"/>             <!-- examples/api local-->
    <property file="../../../../build.properties"/>    <!-- Digester local     -->
    <property file="../../../../../build.properties"/> <!-- Commons local     -->
    <property file="${user.home}/build.properties"/>   <!-- User local        -->
  
  
  <!-- ========== External Dependencies ===================================== -->
  
  
    <!-- The directories corresponding to your necessary dependencies -->
    <property name="jaxp.home"               value="/usr/local/jaxp1.1"/>
    <property name="commons.home"            value="../../../../.."/>
    <property name="beanutils.home"          value="${commons.home}/beanutils"/>
    <property name="collections.home"        value="${commons.home}/collections"/>
    <property name="logging.home"            value="${commons.home}/logging"/>
    <property name="digester.home"            value="${commons.home}/digester"/>
  
  
  <!-- ========== Derived Values ============================================ -->
  
  
    <!-- The locations of necessary jar files -->
    <property name="jaxp.jaxp.jar"           value="${jaxp.home}/jaxp.jar"/>
    <property name="jaxp.parser.jar"         value="${jaxp.home}/crimson.jar"/>
    <property name="commons-beanutils.jar"   value="${beanutils.home}/dist/commons-beanutils.jar"/>
    <property name="commons-collections.jar" value="${collections.home}/dist/commons-collections.jar"/>
    <property name="commons-logging.jar"     value="${logging.home}/dist/commons-logging.jar"/>
    <property name="commons-digester.jar"     value="${digester.home}/dist/commons-digester.jar"/>
  
  
  <!-- ========== Component Declarations ==================================== -->
  
    <!-- The name of this component -->
    <property name="component.name"          value="markup"/>
  
  
  <!-- ========== Compiler Defaults ========================================= -->
  
    <!-- Should Java compilations set the 'debug' compiler option? -->
    <property name="compile.debug"           value="true"/>
  
    <!-- Should Java compilations set the 'deprecation' compiler option? -->
    <property name="compile.deprecation"     value="false"/>
  
    <!-- Should Java compilations set the 'optimize' compiler option? -->
    <property name="compile.optimize"        value="true"/>
  
    <!-- Construct compile classpath -->
    <path id="compile.classpath">
      <pathelement location="."/>
      <pathelement location="${jaxp.jaxp.jar}"/>
      <pathelement location="${jaxp.parser.jar}"/>
      <pathelement location="${commons-beanutils.jar}"/>
      <pathelement location="${commons-collections.jar}"/>
      <pathelement location="${commons-logging.jar}"/>
      <pathelement location="${commons-digester.jar}"/>
    </path>
  
  
  <!-- ========== Executable Targets ======================================== -->
  
  
    <target name="compile">
      <javac  srcdir="."
             destdir="."
               debug="${compile.debug}"
         deprecation="${compile.deprecation}"
            optimize="${compile.optimize}">
        <classpath refid="compile.classpath"/>
      </javac>
    </target>
  
  
    <target name="clean">
      <delete>
        <fileset dir="." includes="*.class"/>
      </delete>
      <delete dir="docs"/>
    </target>
  
    <target name="all" depends="clean,compile"/>
  
    <target name="javadoc" depends="compile">
      <mkdir      dir="docs"/>
      <javadoc destdir="docs"
                   author="true"
                  private="true"
                  version="true">
        <classpath  refid="compile.classpath"/>
        <fileset dir="." includes="*.java"/>
      </javadoc>
    </target>
  
    <target name="run" depends="compile">
      <java classname="Main" fork="yes">
        <classpath refid="compile.classpath"/>
        <classpath>
          <pathelement location="."/>
        </classpath>
      </java>
    </target>
  </project>
  
  
  
  1.1                  jakarta-commons/digester/src/examples/api/document-markup/readme.txt
  
  Index: readme.txt
  ===================================================================
  == overview
  
  The files in this directory are intended as an example of how to use
  the Apache Digester to parse "document-markup" style xml. It also serves as an
  example of how to subclass the main Digester class in order to extend
  its functionality.
  
  By "document-markup" xml, we mean input like XHTML, where the data is valid
  xml and where some elements contain interleaved text and child elements.
  
  For example, "<p>Hi, <i>this</i> is some <b>document-style</b> xml.</p>"
  
  Topics covered:
  * how to subclass digester
  * how to process markup-style xml.
  
  == compiling and running
  
  First rename the build.properties.sample file in the parent directory
  to build.properties and edit it to suit your environment. Then in this
  directory:
  
  * to compile:
    ant compile
  
  * to run:
    ant run
  
  Alternatively, you can set up your CLASSPATH appropriately, and
  run the example directly. See the build.properties and build.xml
  files for details.
  
  == Notes
  
  The primary use of the Digester is to process xml configuration files.
  Such files do not typically interleave text and child elements in the
  style encountered with document markup. The standard Digester behaviour is 
  therefore to accumulate all text within an xml element's body (of which there is
  expected to be only one "segment") and present it to a Rule or user method
  as a single string.
  
  While this significantly simplifies the implementation of Rule classes for
  the primary Digester goal of parsing configuration files, this process of
  simplifying all text within an element into a single string "loses" critical
  information necessary to correctly parse "document-markup" xml.
  
  This example shows one method of extending the Digester class to resolve
  this issue..
  
  At some time the ability to process "document-markup" style xml may be built 
  into the standard Digester class.
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org