You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by an...@apache.org on 2001/06/07 10:26:27 UTC

cvs commit: xml-xerces/java/samples/xni Counter.java

andyc       01/06/07 01:26:27

  Modified:    java/samples/sax Tag: xerces_j_2 Counter.java
               java/samples/xni Tag: xerces_j_2 Counter.java
  Log:
  Added a new "tagginess" feature to report a tag factor for the
  parsed files. This allows you to get a rough estimate of the
  amount of the file that is consumed by element, attribute, and
  processing instruction tag characters such as "<", "='", "<?",
  etc. Attribute values and PI data are not counted when
  calculating tag characters because they are considered values
  and cannot just simply be removed or compressed using some
  custom serialization mechanism.
  
  Revision  Changes    Path
  No                   revision
  
  
  No                   revision
  
  
  1.1.2.5   +61 -4     xml-xerces/java/samples/sax/Attic/Counter.java
  
  Index: Counter.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/samples/sax/Attic/Counter.java,v
  retrieving revision 1.1.2.4
  retrieving revision 1.1.2.5
  diff -u -r1.1.2.4 -r1.1.2.5
  --- Counter.java	2001/04/16 05:42:21	1.1.2.4
  +++ Counter.java	2001/06/07 08:26:18	1.1.2.5
  @@ -90,7 +90,7 @@
    *
    * @author Andy Clark, IBM
    *
  - * @version $Id: Counter.java,v 1.1.2.4 2001/04/16 05:42:21 andyc Exp $
  + * @version $Id: Counter.java,v 1.1.2.5 2001/06/07 08:26:18 andyc Exp $
    */
   public class Counter
       extends DefaultHandler {
  @@ -127,6 +127,9 @@
       /** Default memory usage report (false). */
       protected static final boolean DEFAULT_MEMORY_USAGE = false;
   
  +    /** Default "tagginess" report (false). */
  +    protected static final boolean DEFAULT_TAGGINESS = false;
  +
       //
       // Data
       //
  @@ -143,6 +146,9 @@
       /** Number of ignorable whitespace characters. */
       protected long fIgnorableWhitespace;
   
  +    /** Number of characters of tags. */
  +    protected long fTagCharacters;
  +
       //
       // Constructors
       //
  @@ -156,7 +162,8 @@
       //
   
       /** Prints the results. */
  -    public void printResults(PrintWriter out, String uri, long time, long memory) {
  +    public void printResults(PrintWriter out, String uri, long time, 
  +                             long memory, boolean tagginess) {
   
           // filename.xml: 631 ms (4 elems, 0 attrs, 78 spaces, 0 chars)
           out.print(uri);
  @@ -177,6 +184,14 @@
           out.print(" spaces, ");
           out.print(fCharacters);
           out.print(" chars)");
  +        if (tagginess) {
  +            out.print(' ');
  +            long totalCharacters = fTagCharacters 
  +                                 + fCharacters + fIgnorableWhitespace;
  +            long tagValue = fTagCharacters * 100 / totalCharacters;
  +            out.print(tagValue);
  +            out.print("% tagginess");
  +        }
           out.println();
           out.flush();
   
  @@ -193,6 +208,7 @@
           fAttributes          = 0;
           fCharacters          = 0;
           fIgnorableWhitespace = 0;
  +        fTagCharacters       = 0;
   
       } // startDocument()
   
  @@ -201,9 +217,20 @@
                                Attributes attrs) throws SAXException {
   
           fElements++;
  +        fTagCharacters++; // open angle bracket
  +        fTagCharacters += raw.length();
           if (attrs != null) {
  -            fAttributes += attrs.getLength();
  +            int attrCount = attrs.getLength();
  +            fAttributes += attrCount;
  +            for (int i = 0; i < attrCount; i++) {
  +                fTagCharacters++; // space
  +                fTagCharacters += attrs.getQName(i).length();
  +                fTagCharacters++; // '='
  +                fTagCharacters++; // open quote
  +                fTagCharacters++; // close quote
  +            }
           }
  +        fTagCharacters++; // close angle bracket
   
       } // startElement(String,String,StringAttributes)
   
  @@ -223,6 +250,17 @@
   
       } // ignorableWhitespace(char[],int,int);
   
  +    /** Processing instruction. */
  +    public void processingInstruction(String target, String data)
  +        throws SAXException {
  +        fTagCharacters += 2; // "<?"
  +        fTagCharacters += target.length();
  +        if (data != null && data.length() > 0) {
  +            fTagCharacters++; // space
  +        }
  +        fTagCharacters += 2; // "?>"
  +    } // processingInstruction(String,String)
  +
       //
       // ErrorHandler methods
       //
  @@ -292,6 +330,7 @@
           boolean validation = DEFAULT_VALIDATION;
           boolean schemaValidation = DEFAULT_SCHEMA_VALIDATION;
           boolean memoryUsage = DEFAULT_MEMORY_USAGE;
  +        boolean tagginess = DEFAULT_TAGGINESS;
           
           // process arguments
           for (int i = 0; i < argv.length; i++) {
  @@ -339,6 +378,10 @@
                       memoryUsage = option.equals("m");
                       continue;
                   }
  +                if (option.equalsIgnoreCase("t")) {
  +                    tagginess = option.equals("t");
  +                    continue;
  +                }
                   if (option.equals("-rem")) {
                       if (++i == argv.length) {
                           System.err.println("error: Missing argument to -# option.");
  @@ -352,6 +395,8 @@
                       printUsage();
                       continue;
                   }
  +                System.err.println("error: unknown option ("+option+").");
  +                continue;
               }
   
               // use default parser?
  @@ -403,7 +448,7 @@
                   long time = timeAfter - timeBefore;
                   long memory = memoryUsage 
                               ? memoryBefore - memoryAfter : Long.MIN_VALUE;
  -                counter.printResults(out, arg, time, memory);
  +                counter.printResults(out, arg, time, memory, tagginess);
               }
               catch (SAXParseException e) {
                   // ignore
  @@ -436,6 +481,7 @@
           System.err.println("  -s | -S     Turn on/off Schema validation support.");
           System.err.println("              NOTE: Not supported by all parsers.");
           System.err.println("  -m | -M     Turn on/off memory usage report");
  +        System.err.println("  -t | -T     Turn on/off \"tagginess\" report.");
           System.err.println("  --rem text  Output user defined comment before next parse.");
           System.err.println("  -h          This help screen.");
   
  @@ -450,6 +496,8 @@
           System.err.println(DEFAULT_SCHEMA_VALIDATION ? "on" : "off");
           System.err.print("  Memory:     ");
           System.err.println(DEFAULT_MEMORY_USAGE ? "on" : "off");
  +        System.err.print("  Tagginess:  ");
  +        System.err.println(DEFAULT_TAGGINESS ? "on" : "off");
   
           System.err.println();
           System.err.println("notes:");
  @@ -457,6 +505,15 @@
           System.err.println("  basis of parser performance comparison! Real analytical methods should be");
           System.err.println("  used. For better results, perform multiple document parses within the same");
           System.err.println("  virtual machine to remove class loading from parse time and memory usage.");
  +        System.out.println();
  +        System.out.println("  The \"tagginess\" measurement gives a rough estimate of the percentage of");
  +        System.out.println("  markup versus content in the XML document. The percent tagginess of a ");
  +        System.out.println("  document is equal to the minimum amount of tag characters required for ");
  +        System.out.println("  elements, attributes, and processing instructions divided by the total");
  +        System.out.println("  amount of characters (characters, ignorable whitespace, and tag characters)");
  +        System.out.println("  in the document.");
  +        System.out.println();
  +        System.err.println("  Not all features are supported by different parser configurations.");
   
       } // printUsage()
   
  
  
  
  No                   revision
  
  
  No                   revision
  
  
  1.1.2.3   +83 -4     xml-xerces/java/samples/xni/Attic/Counter.java
  
  Index: Counter.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/samples/xni/Attic/Counter.java,v
  retrieving revision 1.1.2.2
  retrieving revision 1.1.2.3
  diff -u -r1.1.2.2 -r1.1.2.3
  --- Counter.java	2001/05/09 21:10:46	1.1.2.2
  +++ Counter.java	2001/06/07 08:26:24	1.1.2.3
  @@ -89,7 +89,7 @@
    *
    * @author Andy Clark, IBM
    *
  - * @version $Id: Counter.java,v 1.1.2.2 2001/05/09 21:10:46 lehors Exp $
  + * @version $Id: Counter.java,v 1.1.2.3 2001/06/07 08:26:24 andyc Exp $
    */
   public class Counter
       extends XMLDocumentParser 
  @@ -127,6 +127,9 @@
       /** Default memory usage report (false). */
       protected static final boolean DEFAULT_MEMORY_USAGE = false;
   
  +    /** Default "tagginess" report (false). */
  +    protected static final boolean DEFAULT_TAGGINESS = false;
  +
       //
       // Data
       //
  @@ -143,6 +146,9 @@
       /** Number of ignorable whitespace characters. */
       protected long fIgnorableWhitespace;
   
  +    /** Number of characters of tags. */
  +    protected long fTagCharacters;
  +
       //
       // Constructors
       //
  @@ -158,7 +164,8 @@
       //
   
       /** Prints the results. */
  -    public void printResults(PrintWriter out, String uri, long time, long memory) {
  +    public void printResults(PrintWriter out, String uri, long time, 
  +                             long memory, boolean tagginess) {
   
           // filename.xml: 631 ms (4 elems, 0 attrs, 78 spaces, 0 chars)
           out.print(uri);
  @@ -179,6 +186,14 @@
           out.print(" spaces, ");
           out.print(fCharacters);
           out.print(" chars)");
  +        if (tagginess) {
  +            out.print(' ');
  +            long totalCharacters = fTagCharacters 
  +                                 + fCharacters + fIgnorableWhitespace;
  +            long tagValue = fTagCharacters * 100 / totalCharacters;
  +            out.print(tagValue);
  +            out.print("% tagginess");
  +        }
           out.println();
           out.flush();
   
  @@ -196,6 +211,7 @@
           fAttributes          = 0;
           fCharacters          = 0;
           fIgnorableWhitespace = 0;
  +        fTagCharacters       = 0;
   
       } // startDocument(String,String)
   
  @@ -204,9 +220,43 @@
           throws XNIException {
   
           fElements++;
  +        fTagCharacters++; // open angle bracket
  +        fTagCharacters += element.rawname.length();
  +        if (attrs != null) {
  +            int attrCount = attrs.getLength();
  +            fAttributes += attrCount;
  +            for (int i = 0; i < attrCount; i++) {
  +                fTagCharacters++; // space
  +                fTagCharacters += attrs.getName(i).length();
  +                fTagCharacters++; // '='
  +                fTagCharacters++; // open quote
  +                fTagCharacters++; // close quote
  +            }
  +        }
  +        fTagCharacters++; // close angle bracket
  +
  +    } // startElement(QName,XMLAttributes)
  +
  +    /** Empty element. */
  +    public void emptyElement(QName element, XMLAttributes attrs) 
  +        throws XNIException {
  +
  +        fElements++;
  +        fTagCharacters++; // open angle bracket
  +        fTagCharacters += element.rawname.length();
           if (attrs != null) {
  -            fAttributes += attrs.getLength();
  +            int attrCount = attrs.getLength();
  +            fAttributes += attrCount;
  +            for (int i = 0; i < attrCount; i++) {
  +                fTagCharacters++; // space
  +                fTagCharacters += attrs.getName(i).length();
  +                fTagCharacters++; // '='
  +                fTagCharacters++; // open quote
  +                fTagCharacters++; // close quote
  +            }
           }
  +        fTagCharacters++; // forward slash
  +        fTagCharacters++; // close angle bracket
   
       } // startElement(QName,XMLAttributes)
   
  @@ -224,6 +274,17 @@
   
       } // ignorableWhitespace(XMLString);
   
  +    /** Processing instruction. */
  +    public void processingInstruction(String target, XMLString data)
  +        throws XNIException {
  +        fTagCharacters += 2; // "<?"
  +        fTagCharacters += target.length();
  +        if (data.length > 0) {
  +            fTagCharacters++; // space
  +        }
  +        fTagCharacters += 2; // "?>"
  +    } // processingInstruction(String,XMLString)
  +
       //
       // ErrorHandler methods
       //
  @@ -293,6 +354,7 @@
           boolean validation = DEFAULT_VALIDATION;
           boolean schemaValidation = DEFAULT_SCHEMA_VALIDATION;
           boolean memoryUsage = DEFAULT_MEMORY_USAGE;
  +        boolean tagginess = DEFAULT_TAGGINESS;
           
           // process arguments
           for (int i = 0; i < argv.length; i++) {
  @@ -334,6 +396,10 @@
                       memoryUsage = option.equals("m");
                       continue;
                   }
  +                if (option.equalsIgnoreCase("t")) {
  +                    tagginess = option.equals("t");
  +                    continue;
  +                }
                   if (option.equals("-rem")) {
                       if (++i == argv.length) {
                           System.err.println("error: Missing argument to -# option.");
  @@ -347,6 +413,8 @@
                       printUsage();
                       continue;
                   }
  +                System.err.println("error: unknown option ("+option+").");
  +                continue;
               }
   
               // use default parser?
  @@ -399,7 +467,8 @@
                   long time = timeAfter - timeBefore;
                   long memory = memoryUsage 
                               ? memoryBefore - memoryAfter : Long.MIN_VALUE;
  -                ((Counter)parser).printResults(out, arg, time, memory);
  +                ((Counter)parser).printResults(out, arg, time, 
  +                                               memory, tagginess);
               }
               catch (SAXParseException e) {
                   // ignore
  @@ -431,6 +500,7 @@
           System.err.println("  -v | -V     Turn on/off validation.");
           System.err.println("  -s | -S     Turn on/off Schema validation support.");
           System.err.println("  -m | -M     Turn on/off memory usage report.");
  +        System.err.println("  -t | -T     Turn on/off \"tagginess\" report.");
           System.err.println("  --rem text  Output user defined comment before next parse.");
           System.err.println("  -h          This help screen.");
   
  @@ -445,6 +515,8 @@
           System.err.println(DEFAULT_SCHEMA_VALIDATION ? "on" : "off");
           System.err.print("  Memory:     ");
           System.err.println(DEFAULT_MEMORY_USAGE ? "on" : "off");
  +        System.err.print("  Tagginess:  ");
  +        System.err.println(DEFAULT_TAGGINESS ? "on" : "off");
   
           System.err.println();
           System.err.println("notes:");
  @@ -453,6 +525,13 @@
           System.err.println("  used. For better results, perform multiple document parses within the same");
           System.err.println("  virtual machine to remove class loading from parse time and memory usage.");
           System.err.println();
  +        System.out.println("  The \"tagginess\" measurement gives a rough estimate of the percentage of");
  +        System.out.println("  markup versus content in the XML document. The percent tagginess of a ");
  +        System.out.println("  document is equal to the minimum amount of tag characters required for ");
  +        System.out.println("  elements, attributes, and processing instructions divided by the total");
  +        System.out.println("  amount of characters (characters, ignorable whitespace, and tag characters)");
  +        System.out.println("  in the document.");
  +        System.out.println();
           System.err.println("  Not all features are supported by different parser configurations.");
   
       } // printUsage()
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-cvs-help@xml.apache.org