You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@directory.apache.org by ak...@apache.org on 2004/05/28 02:41:21 UTC
svn commit: rev 20523 - in incubator/directory/snickers/trunk: . ber-codec ber-codec/src/java/org/apache/snickers/ber ber-codec/src/test/org/apache/snickers/ber codec-stateful xdocs/ber-codec xdocs/codec-stateful xdocs/images

Author: akarasulu
Date: Thu May 27 17:41:20 2004
New Revision: 20523

Added:
   incubator/directory/snickers/trunk/xdocs/ber-codec/BERDecoderDesign.xml
   incubator/directory/snickers/trunk/xdocs/ber-codec/BERDecoderUserGuide.xml
   incubator/directory/snickers/trunk/xdocs/ber-codec/BEREncoderDesign.xml
   incubator/directory/snickers/trunk/xdocs/ber-codec/BEREncoderUserGuide.xml
   incubator/directory/snickers/trunk/xdocs/ber-codec/asn1berinfo.xml
   incubator/directory/snickers/trunk/xdocs/images/BERDecoder.gif   (contents, props changed)
   incubator/directory/snickers/trunk/xdocs/images/BERDecoderCallback.gif   (contents, props changed)
   incubator/directory/snickers/trunk/xdocs/images/PrimitiveTupleDecoder-uml.gif   (contents, props changed)
   incubator/directory/snickers/trunk/xdocs/images/all-uml.gif   (contents, props changed)
   incubator/directory/snickers/trunk/xdocs/images/state-helper-classes.gif   (contents, props changed)
   incubator/directory/snickers/trunk/xdocs/images/tag-integer-encoding.png   (contents, props changed)
Modified:
   incubator/directory/snickers/trunk/ber-codec/   (props changed)
   incubator/directory/snickers/trunk/ber-codec/src/java/org/apache/snickers/ber/Tag.java
   incubator/directory/snickers/trunk/ber-codec/src/java/org/apache/snickers/ber/Tuple.java
   incubator/directory/snickers/trunk/ber-codec/src/test/org/apache/snickers/ber/TagTest.java
   incubator/directory/snickers/trunk/codec-stateful/   (props changed)
   incubator/directory/snickers/trunk/maven.xml
   incubator/directory/snickers/trunk/project.properties
   incubator/directory/snickers/trunk/xdocs/ber-codec/index.xml
   incubator/directory/snickers/trunk/xdocs/codec-stateful/index.xml
Log:
Commit changes ...
 
 o fixed a bug in the Tag class
 o made respective changes to the TagTest class
 o added a few new documents which are empty
 o completed the asn1berinfo.xml xdoc
 o completed the BERDecoderDesign.xml xdoc
 o added a bunch images for the new xdocs
 o fixed javadocs and constant visibility in the Tag and Tuple classes
 


Modified: incubator/directory/snickers/trunk/ber-codec/src/java/org/apache/snickers/ber/Tag.java
==============================================================================
--- incubator/directory/snickers/trunk/ber-codec/src/java/org/apache/snickers/ber/Tag.java	(original)
+++ incubator/directory/snickers/trunk/ber-codec/src/java/org/apache/snickers/ber/Tag.java	Thu May 27 17:41:20 2004
@@ -32,9 +32,11 @@
     /** tag mask for the primitive/constructed bit - 0010 0000 - 0x20 */
     private static final int PRIMITIVE_MASK = 0x20 ;
     /** tag mask for the short tag format - 0001 1111 - 0x1F */
-    public static final int SHORT_MASK = 0x1F ;
+    static final int SHORT_MASK = 0x1F ;
     /** tag mask for the long tag format - 0111 1111 - 0x7F */
-    public static final int LONG_MASK = 0x7F ;
+    static final int LONG_MASK = 0x7F ;
+    /** tag flag indicating the use of the long tag encoding form */
+    private static final int LONG_FLAG = 0x80 ;
 
     /** tag id */
     private int id = 0 ;
@@ -44,7 +46,7 @@
     private boolean isFixated = false ;
     /** the type class of this tag */
     private TypeClass typeClass = TypeClass.APPLICATION ;
-    /** a byte buffer used to collect the arriving tag octets */
+    /** buffer backed by a Java int to collect the arriving tag octets */
     private final TagOctetCollector buf = new TagOctetCollector() ;
 
 
@@ -72,7 +74,7 @@
     void fixate() throws DecoderException
     {
         isFixated = true ;
-        id = Tag.getTagId( buf ) ;
+        id = getTagId( buf ) ;
         isPrimitive = isPrimitive( buf.get( 0 ) ) ;
         typeClass = TypeClass.getTypeClass( buf.get( 0 ) ) ;
     }
@@ -95,8 +97,8 @@
         
         if ( buf.size() == 1 )
         {
-            // its the short form so we just fixate
-            if ( ( 0x1F & octet ) != 0x1F )
+            // if its the short form so we just fixate now!
+            if ( ( SHORT_MASK & octet ) != SHORT_MASK )
             {
                 fixate() ;
             }
@@ -108,7 +110,7 @@
          * significant bit to flag the end of the train of octets for the 
          * tag id.
          */ 
-        else if ( ( octet & 0x80 ) == 0 )
+        else if ( ( octet & LONG_FLAG ) == 0 )
         {
             fixate() ;
         }
@@ -155,7 +157,7 @@
      */
     public int getRawTag()
     {
-        return buf.getIntValue() ; 
+        return buf.getIntValue() ;
     }
     
     
@@ -194,7 +196,7 @@
 
 
     /**
-     * Gets the tag id of a TLV from the tag octets.
+     * Gets the tag id of a TLV from tag octets.
      * 
      * @param octets the set of octets needed to determine the tag value 
      *      (a.k.a identifier octets)
@@ -205,12 +207,12 @@
     public final static int getTagId( byte[] octets )
         throws DecoderException
     {
-        if ( octets.length > 6 )
+        if ( octets.length > 4 )
         {
             /*
              * If this exception is ever thrown which is highly unlikely, then
              * we need to switch to another data type to return because after
-             * 5 bytes the int can no longer hold the number.
+             * 4 bytes the int can no longer hold the number.
              */
             throw new DecoderException( "Tag number is too large." ) ;
         }

Modified: incubator/directory/snickers/trunk/ber-codec/src/java/org/apache/snickers/ber/Tuple.java
==============================================================================
--- incubator/directory/snickers/trunk/ber-codec/src/java/org/apache/snickers/ber/Tuple.java	(original)
+++ incubator/directory/snickers/trunk/ber-codec/src/java/org/apache/snickers/ber/Tuple.java	Thu May 27 17:41:20 2004
@@ -178,7 +178,7 @@
     
     
     /**
-     * Gets the tag id (T-part) for this TLV Tuple.
+     * Gets the tag id for this TLV Tuple.
      * 
      * @return the tag id
      */

Modified: incubator/directory/snickers/trunk/ber-codec/src/test/org/apache/snickers/ber/TagTest.java
==============================================================================
--- incubator/directory/snickers/trunk/ber-codec/src/test/org/apache/snickers/ber/TagTest.java	(original)
+++ incubator/directory/snickers/trunk/ber-codec/src/test/org/apache/snickers/ber/TagTest.java	Thu May 27 17:41:20 2004
@@ -21,6 +21,8 @@
 
 import junit.framework.TestCase ;
 
+import java.nio.BufferOverflowException;
+
 
 /**
  * Tests the BER utility functions.
@@ -31,15 +33,15 @@
  */
 public class TagTest extends TestCase
 {
-    private static final int BIT_7 = 0x80 ;
-    private static final int BIT_6 = 0x40 ;
-    private static final int BIT_4 = 0x10 ;
-    private static final int BIT_3 = 0x08 ;
-    private static final int BIT_2 = 0x04 ;
-    private static final int BIT_1 = 0x02 ;
     private static final int BIT_0 = 0x01 ;
+    private static final int BIT_1 = 0x02 ;
+    private static final int BIT_2 = 0x04 ;
+    private static final int BIT_3 = 0x08 ;
+    private static final int BIT_4 = 0x10 ;
     private static final int BIT_5 = 0x20 ;
-    
+    private static final int BIT_6 = 0x40 ;
+    private static final int BIT_7 = 0x80 ;
+
     
     public static void main( String[] args )
     {
@@ -77,7 +79,7 @@
     
     public void getTypeClass()
     {
-        assertEquals( TypeClass.UNIVERSAL, TypeClass.getTypeClass( (byte) 0 ) );
+        assertEquals( TypeClass.UNIVERSAL, TypeClass.getTypeClass( 0 ) ) ;
     }
     
     
@@ -173,14 +175,12 @@
             }
         }
 
-        octets = new byte[6] ;
+        octets = new byte[4] ;
         octets[0] = 31 ;
         octets[1] = 0 ; // shift 0
         octets[2] = 0 ; // shift 7
         octets[3] = 0 ; // shift 14
-        octets[4] = 0 ; // shift 21
-        octets[5] = 0 ; // shift 28
-        
+
         for ( int ii = 16384 ; ii < 2100000 ; ii++ )
         {
             octets[1] = ( byte ) ( ii & Tag.LONG_MASK ) ;
@@ -199,7 +199,18 @@
         
         try
         {
-            Tag.getTagId( new byte[56] ) ;
+            Tag.getTagId( new byte[5] ) ;
+            fail( "should fail before getting here" ) ;
+        }
+        catch ( Throwable t )
+        {
+            assertNotNull( t ) ;
+        }
+
+
+        try
+        {
+            Tag.getTagId( new byte[12] ) ;
             fail( "should fail before getting here" ) ;
         }
         catch ( Throwable t )
@@ -209,6 +220,38 @@
     }
     
     
+    public void testTagLimits() throws Exception
+    {
+        byte[] bites = { (byte) 0xff, (byte) 0xff, (byte) 0x8f, (byte) 0x0f } ;
+
+        Tag tag = new Tag() ;
+        tag.add( bites[0] ) ;
+        tag.add( bites[1] ) ;
+        tag.add( bites[2] ) ;
+        tag.add( bites[3] ) ;
+
+        byte[] octets = tag.getOctets() ;
+        assertTrue( ArrayUtils.isEquals( bites, octets ) ) ;
+
+        byte[] tooMany = { (byte) 0xff, (byte) 0xff, (byte) 0x8f, (byte) 0x8f, (byte) 0x0f } ;
+
+        tag = new Tag() ;
+        tag.add( tooMany[0] ) ;
+        tag.add( tooMany[1] ) ;
+        tag.add( tooMany[2] ) ;
+        tag.add( tooMany[3] ) ;
+
+        try
+        {
+            tag.add( tooMany[4] ) ;
+            fail( "should never get here due to exception" ) ;
+        }
+        catch( BufferOverflowException e )
+        {
+        }
+    }
+
+
     public void testGetOctets() throws Exception
     {
         byte[] bites = { (byte) 0xff, (byte) 0xff, (byte) 0x0f } ;

Modified: incubator/directory/snickers/trunk/maven.xml
==============================================================================
--- incubator/directory/snickers/trunk/maven.xml	(original)
+++ incubator/directory/snickers/trunk/maven.xml	Thu May 27 17:41:20 2004
@@ -56,9 +56,9 @@
       banner="site" ignoreFailures="false"/>
   </goal>
   
-  <goal name="clean-all" description="Clean all sandbox projects">
+  <goal name="cleanall" description="Clean all sandbox projects">
     <maven:reactor basedir="${basedir}" postProcessing="true" 
-      includes="**/project.xml" excludes="./project.xml" goals="cleanall" 
+      includes="**/project.xml" excludes="./project.xml" goals="clean" 
       banner="Clean All" 
       ignoreFailures="true"/>
     <attainGoal name="clean"/>

Modified: incubator/directory/snickers/trunk/project.properties
==============================================================================
--- incubator/directory/snickers/trunk/project.properties	(original)
+++ incubator/directory/snickers/trunk/project.properties	Thu May 27 17:41:20 2004
@@ -1,18 +1,18 @@
-maven.javadoc.private=true
-maven.javadoc.customtags=tag1 tag2
-
-tag1.name=todo
-tag1.description=To Do:
-tag1.enabled=true
-tag1.scope=all
-
-tag2.name=task
-tag2.description=Task:
-tag2.enabled=false
-tag2.scope=all
-
-maven.license.licenseFile=../../../LICENSE.txt
-#maven.clover.instrument.tests=true
+maven.javadoc.private=true
+maven.javadoc.customtags=tag1 tag2
+
+tag1.name=todo
+tag1.description=To Do:
+tag1.enabled=true
+tag1.scope=all
+
+tag2.name=task
+tag2.description=Task:
+tag2.enabled=false
+tag2.scope=all
+
+maven.license.licenseFile=../../../LICENSE.txt
+#maven.clover.instrument.tests=true
 maven.junit.fork=true
 maven.changelog.factory=org.apache.maven.svnlib.SvnChangeLogFactory
 maven.license.licenseFile=../../../LICENSE.txt
@@ -20,7 +20,7 @@
 maven.ui.banner.background=#FFFFFF
 maven.xdoc.includeProjectDocumentation=no
 maven.xdoc.poweredby.image=
-maven.xdoc.jsl = file:/${basedir}/../../sitedocs/trunk/src/etc/site.jsl
+#maven.xdoc.jsl = file:/${basedir}/../../sitedocs/trunk/src/etc/site.jsl
 
 #
 # remote repository properties

Added: incubator/directory/snickers/trunk/xdocs/ber-codec/BERDecoderDesign.xml
==============================================================================
--- (empty file)
+++ incubator/directory/snickers/trunk/xdocs/ber-codec/BERDecoderDesign.xml	Thu May 27 17:41:20 2004
@@ -0,0 +1,552 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document>
+  <properties>
+    <author email="akarasulu@apache.org">Alex Karasulu</author>
+    <title>BER Decoder Design</title>
+  </properties>
+
+  <body>
+    <section name="Factors Driving the Design">
+      <p>
+        Several factors drove the design.  Some of these were covered in
+        advance in the section on stateful codecs.  While implementing these
+        stateful decoders we still must adhere to some rules to not undermine
+        the purpose for implementing stateful decoders in the first place.
+        Other design decisions come from the way the BER encoding is devised.
+        These driving factors are all covered here in this document to reveal
+        the present design while justifing it and the decisions that forged
+        it.
+      </p>
+
+      <subsection name="TLV Nesting">
+        <p>
+          A constructed TLV tuple contains other TLV tuples nested within the
+          Value field, and so on, recursively.  The nesting depth is usually
+          indefinate and dependent on the data structures being transformed.
+          This fact drastically affects the way the decoder is designed and
+          hence operates.
+        </p>
+
+        <p>
+          It's always good form to tackle the basis case[s] of any recursive
+          problem.  The simplest basis case is the processing of a simple
+          primitive TLV tuple.  So for the time being presume we are simply
+          implementing a primitive BER TLV decoder without any nesting.
+        </p>
+
+        <p>
+          While decoding a primitive tuple the decoder must maintain state.
+          State for this simple decoder is an indicator representing whether
+          the Tag, Length, or Value was being processed at the end of the last
+          chunk of substrate data.  This is required along with any data
+          already captured for these fields to continue processing where the
+          last chunk left off.  Value field data actually is not "accumulated"
+          to maintain state, but for the time being we'll ignore that fact.
+        </p>
+
+        <p>
+          The primitive TLV tuple decoder is easily designed.  Build three
+          stateful sub-decoders for each field, the Tag, Length, and the Value
+          fields.  A top level decoder deligates the processing state of the
+          fields to each sub-decoder and switches sub-decoders when the
+          indicator changes from Tag, to Length, to Value then back to Tag
+          and so on.  When a Value completes processing and before another Tag
+          is read the decoder triggers a callback event.  Here's what the
+          hypothetical primitive TLV tuple decoder would look like:
+        </p>
+
+        <center>
+          <img src="../images/PrimitiveTupleDecoder-uml.gif"/>
+        </center>
+
+        <p>
+          Now let's try to figure out how to handle constructed TLV tuples
+          which recursively nest other tuples indefinately.  State now
+          is more that just where you left off in the current tuple being
+          processed.  When processing an inner tuple the decoder must also know
+          where it left off in the outter tuple to resume processing.  More
+          accurately the decoder must maintain the state of every parent and
+          ancestor tuple of the current tuple being processed in the same
+          manner the TLV tuple decoder did for primitive TLV tuples.  Hence
+          the state of the decoder is a stack of all ancestor TLV tuple states
+          as well as the state of the tuple currently being processed.
+        </p>
+
+        <p>
+          While processing input for a nested TLV tuple the state of all
+          tuple ancestors must also be updated with the same data so the
+          decoder can determine when their processing is about to complete.
+          This way the decoder does not read TLV tuples adjacent to the
+          constructed TLV tuple, incorrectly presuming that they are part
+          of the constructed TLV tuple.
+        </p>
+
+        <p>
+          When the last inner tuple in a constructed TLV tuple completes, it
+          triggers a callback for itself, then the stack is popped and another
+          callback event is triggered for the completion of the constructed
+          TLV tuple.
+        </p>
+
+        <p>
+          In conclusion, the state of a BER decoder, used to process both
+          primitive and constructed TLV tuples, must take into accout the
+          the processing state of every tuple ancestor in the stack of nested
+          tuples.  Otherwise state cannot be maintain.  Just how this is
+          efficiently managed is the topic of the next few subsections.
+        </p>
+      </subsection>
+
+      <subsection name="Value Processing">
+        <p>
+          If the decoder accumulates encountered Value field octets to maintain
+          state then we have a problem.  First off the size of the Value could
+          be massive and often varies.  We want to maintain a fixed maximum
+          memory footprint to the decoder.  This goes out the window if Value
+          field content is accumulated within buffers to maintain tuple
+          processing state.  Furthermore, with nesting, every ancestor tuple in
+          the nesting stack would maintain a copy of the topmost tuple's Value
+          field when that tuple is about to complete processing.  The number of
+          copies is a function of the nesting depth, so the deeper the nesting,
+          the more memory is wastefully consumed.  This is totally unacceptable
+          and it undermines the reason for devising stateful codecs in the
+          first place.
+        </p>
+
+        <p>
+          To avoid this problem we must not accumulate Value field octets
+          (bytes) while maintaining state.  Unlike the Value field, the other
+          Tag and Length fields are limited and often account for only a few
+          bytes within TLV tuples.  To maintain state however the decoder still
+          has to perform some accounting to determine when outter tuples in the
+          nesting stack complete processing.  The decoder maintains state by
+          using Value byte counters processed rather than using an accumulator
+          to store the Value bytes.  This way the decoder can compare a tuple's
+          Length field with it's Value byte counter to determine if processing
+          is complete.
+        </p>
+      </subsection>
+
+      <subsection name="Extending DecoderCallback">
+        <p>
+          The next question is how the decoder propagates TLV tuple Values to
+          the target receiving the TLV tuple stream?  If the standard
+          <code>DecoderCallback.decodeOccurred()</code> method is designed to
+          be called upon TLV processing completion how do we avoid collecting
+          the Value while getting the Value bytes somehow to an decoder user
+          via callbacks?
+        </p>
+
+        <p>
+          The answer is to use yet another callback.  The DecoderCallback
+          interface is extended by actually adding three extra callback
+          methods: one for each field.  The BERDecoderCallback interface
+          extends the DecoderCallback interface and adds the following
+          methods:
+        </p>
+
+        <ul>
+          <li>void tagDecoded( Tuple tlv )</li>
+          <li>void lengthDecoded( Tuple tlv )</li>
+          <li>void partialValueDecoded( Tuple tlv )</li>
+        </ul>
+
+        <p>
+          The following diagram shows the decoder interfaces and a do nothing
+          adapter implementation for convenience:
+        </p>
+
+        <center>
+          <img src="../images/BERDecoderCallback.gif"/>
+        </center>
+
+        <p>
+          For a single TLV all methods except for the partialValueDecoded()
+          method is invoked at most once.  As its name suggests peices of
+          Value are delivered encapsulated by the Tuple argument rather than
+          the entire Value field.  Hence the method can be called zero or more
+          times while processing a TLV.
+        </p>
+
+        <p>
+          The extended decoder callback interface allows the decoder to chunk
+          Value fields and hence maintain a fixed maximum footprint.  The
+          partialValueDecoded callback is a bit misleading however.  It really
+          does not decode any Value bytes based on the Tag of the TLV.  It
+          simply hands off the raw value bytes to the callback, this part of
+          the decode is left to higher level decoders built on top of the
+          BERDecoder.  However all primitive type decode operations are
+          provided by the BER codec.
+        </p>
+      </subsection>
+
+      <subsection name="Constructed Values">
+        <p>
+          The values of constructed TLV tuples are other tuples.  Their Values
+          are already decoded by the BER decoder which triggers TLV events for
+          the nested TLV tuples.  Calls to the partialValueDecoded() method
+          hence are never made.  Furthermore the decoder transits from the
+          Length state of processing to the Tag state just after completing
+          the decode of a constructed tuple Length field.  This is because the
+          next tuple to process is a nested tuple with its Tag following the
+          constructed tuple's Length field.
+        </p>
+
+        <p>
+          Constructed TLV tuples never have partialValueDecoded() called.  Only
+          primitive TLV tuples have Value octets delivered to this callback.
+          This makes state handling withing the decoder a bit tricky but the
+          complexity for the rewards wreaked is well worth it.
+        </p>
+      </subsection>
+    </section>
+
+    <section name="State Management">
+      <p>
+        There are two parts to managing state for the BERDecoder: stack
+        based ancestor Value accounting state, and current tuple processing
+        state.
+      </p>
+
+      <subsection name="Current TLV State Management">
+        <p>
+          While processing the tuple in scope a type safe enumeration is used
+          to track the current tuple processing state which could be in either
+          Tag, Length, or Value processing states.  Subordinate decoders are
+          used to decode the Tag, and Length fields.  The Value field unlike
+          these fields does not have a corresponding decoder: it does not need
+          one since primitives TLV Values are not decoded but returned raw.
+          The sub-decoders for Tag and Length manage the accumulation of field
+          bytes between chunked decode operations.  The following diagram
+          displays the helper classes used to manage the current TLV processing
+          state:
+        </p>
+
+        <center>
+          <img src="../images/state-helper-classes.gif"/>
+        </center>
+
+        <table>
+          <tr><th>Color</th><th>Group</th></tr>
+          <tr><td>Red</td><td>Generic Helper Classes</td></tr>
+          <tr><td>Yellow</td><td>Tag Specific Classes</td></tr>
+          <tr><td>Purple</td><td>Length Specific Classes</td></tr>
+        </table>
+
+        <p>
+          The tag specific classes include the Tag and TagDecoder classes.
+          The Tag class handles the extraction of various Tag embedded fields
+          like the constructed bit and the tag's type class.  It also collects
+          tag octets up to 4 octets only using a special TagOctetCollector.
+          The TagDecoder is just a stateful decoder implementation wrapper
+          using Tag methods.
+        </p>
+
+        <p>
+          The TypeClass class is a type safe enumeration of the four type
+          classes of Tags:
+        </p>
+
+        <ul>
+          <li>UNIVERSAL</li>
+          <li>APPLICATION</li>
+          <li>CONTEXT_SPECIFIC</li>
+          <li>PRIVATE</li>
+        </ul>
+
+        <p>
+          Once the Tag accumulator collects all tag octets it determines and
+          sets the TypeClass corresponding to the tag.
+        </p>
+
+        <p>
+          The TagEnum class is an abstract base class for type safe tag
+          enumerations.  This special type safe enumeration associates a tag
+          label with two integers: the tag value and the tag id.  The tag value
+          is an integer representation of the tag whereas the id is just the
+          just the id field of the tag.  This btw is the main reason why the
+          TagCollector only accepts four bytes for building the tag: an integer
+          is essentially used as the backing store for the tag data.  The
+          reasons for this are explained within the tag handling section to
+          follow.
+        </p>
+
+        <p>
+          The Length and LengthDecoder operate very much in the same fashion
+          as do the Tag and TagDecoder.  The same pattern is applied to both
+          pairs of classes.  The primary difference is the use of a ByteBuffer
+          within the Length class rather than a custom data structure like the
+          TagOctetCollector to accumulate Length bytes (octets).  The main
+          reason for this is that a limit of 4 tag octets have been imposed on
+          the decoder which in fact is contrary to the BER specification.
+          Length values well above the 4 byte integer are surely possible for
+          TLV values although improbable.
+        </p>
+
+        <p>
+          The BERDecoderState class is another type safe enumeration with the
+          following values: TAG, LENGTH and VALUE.  It obviously represents the
+          processing state of the TLV tuple currently in scope.
+        </p>
+      </subsection>
+
+      <subsection name="Stack State Management">
+        <p>
+          The BERDecoder UML is displayed below to show some of the memebers
+          and operations available.  Pay special attention to the tlvStack
+          member and the getTupleStack() package friendly Stack accessor used
+          for stack state management:
+        </p>
+
+        <center>
+          <img src="../images/BERDecoder.gif"/>
+        </center>
+
+        <p>
+          The tlvStack is a Stack of Tuple instances.  The last subsection
+          contains a UML diagram with the Tuple class.  Tuple objects are the
+          objects handed to the the decodeOccurred() method of the callback.
+          They basically encapsulate a bunch of information associated with a
+          TLV tuple in one object.  This includes accounting information used
+          to determine the processing state of constructed TLVs.  The Stack of
+          Tuples hence stores the state information associated with ancestor
+          Tuples currently out of scope.
+        </p>
+
+        <p>
+          With every chunk of substrate processed for the tuple currently in
+          scope, the accounting information in every Tuple of the Stack is
+          updated.  Again, this tracks how much of the anscestor's Value field
+          has been processed.  Specifically the length and index fields of
+          Tuple objects are used to determine how much of the TLV has been
+          read.
+        </p>
+      </subsection>
+    </section>
+
+    <section name="Tuple Recycling">
+
+      <subsection name="TLV Tuple Density">
+        <p>
+          BER TLV streams will contain varying densities of TLV tuples.  The
+          density of the tuples depends on the nature of the content.  Streams
+          with many small primitive types crammed together will generate TLV
+          tuples very rapidly while processing the encoded substrate.  Every
+          few, even couple bytes might produce a new tuple.
+        </p>
+
+        <p>
+          If we instantiated a new Tuple instance and populated it for every
+          few bytes in the stream, then performance will degrade significantly
+          while processing streams with high TLV tuple densities.  Futhermore
+          rapid object creation rates would seriously tax the garbage
+          collector.  To avoid these negative effects of instantiating new TLV
+          tuples we need to reuse the same Tuple allowing interested parties
+          to either clone or copy the contained information while processing
+          the tuple.  More often than not, most tuples will be ignored.  It
+          would be wasteful to create a new Tuple object for every TLV tuple
+          encountered when some or most might potentially be ignored.
+        </p>
+      </subsection>
+
+      <subsection name="Problem With Recycling a Tuple">
+        <p>
+          If we avoid instantiating new TLV Tuples and resuse the same Tuple
+          object, we run into a problem.  First we'll loose data when we
+          attempt to push the tuple onto the tlvStack when the next TLV is
+          processed.
+        </p>
+
+        <p>
+          One solution to this problem is to clone constructed Tuples before
+          pushing the tuple onto the tlvStack.  Hence only primitives would
+          reuse the same Tuple.  This works well because primitive tuple data
+          does not need to be maintained past its scope.  If the data needs to
+          be copied, it can be copied by the application using the decoder.
+          This makes sense since the application determines which Tuple values
+          to store or ignore.
+        </p>
+
+        <p>
+          This solves performance bottlenecks with substrates that are dense
+          with primitive tuples.  However the problem will re-emerge if the
+          substrate is dense with deeply nested primitive tuples.  If every
+          primitive is embedded deep within its own cavern of nested TLV
+          tuples then we'll be closer to instantiating a Tuple object for
+          almost every TLV encountered.  The perfect substrate under this
+          scheme, of course, would be a single primitive element but beyond
+          that it would be flat nesting patterns where as many primitives TLV
+          tuples are stuffed into every contstructed TLV tuple as possible.
+        </p>
+
+        <p>
+          The deeply embedded high density of constructed TLV tuples is highly
+          unlikely although possible for recursive ASN.1 definitions.
+          Regardless of these situations producing a high density of
+          constructed TLV tuples, the nesting structures will often share the
+          same parents so the TLV tuple to Tuple object instantiation ration
+          would rarely approach 1:1.
+        </p>
+
+        <p>
+          Over all we cannot determine the ratio of constructed to primitive
+          TLV tuples encountered within a substrate.  However one would like
+          to believe that complex structures do not predominate, and that
+          protocol designers opt for simpler structures whenever possible.
+          With this sensible hypothesis reuse of primitive TLV tuples and the
+          cloning of constructed TLV tuples seems like a viable strategy for
+          managing excessive object instantiations.
+        </p>
+      </subsection>
+
+    </section>
+
+    <section name="Integer Representation For Tags">
+      <p>
+        According to the BER encoding specification, X.690, a Tag id can be
+        any arbitrary value: there is no limitation to the size of an id.
+        In practice ids are claimed incrementally by ASN.1 modules from the
+        CONTEXT_SPECIFIC and APPLICATION type classes.  These values for
+        any reasonable protocol are far less than 100 ids.  Experts like
+        Larmouth claim they have never seen Tag ids larger than a thousand.
+        So we don't bother representing Tags within a buffer for the full
+        expressivity of the specification when we know of reasonable soft
+        limits to the Tag id.
+      </p>
+
+      <subsection name="Four Tag Octets Are Enough">
+        <p>
+          In most cases, one or two octets suffice for encoding a tag and its
+          identifier.  In some cases three bytes may rarely be used.  It's
+          highly improbable that we'll ever see four or more bytes to be used
+          to encode a tag: even the experts have never seen this before.
+          The only way I can conceive of this is if computers begin devising
+          or generating protocols :-).
+        </p>
+
+        <p>
+          According to the specification the long form can encode the
+          following maximum identifier sizes with various octet lengths:
+        </p>
+
+        <table>
+          <tr>
+            <th>Octets</th>
+            <th>Maximum Tag Id</th>
+            <th>Calculation</th>
+          </tr>
+
+          <tr>
+            <td>1</td>
+            <td>30</td>
+            <td>2^5-1</td>
+          </tr>
+
+          <tr>
+            <td>2</td>
+            <td>127</td>
+            <td>2^7-1</td>
+          </tr>
+
+          <tr>
+            <td>3</td>
+            <td>16,383</td>
+            <td>2^14-1</td>
+          </tr>
+
+          <tr>
+            <td>4</td>
+            <td>2,097,151</td>
+            <td>2^21-1</td>
+          </tr>
+
+          <tr>
+            <td>5</td>
+            <td>268,435,455</td>
+            <td>2^28-1</td>
+          </tr>
+        </table>
+
+        <p>
+          As we can see 3-4 octets encode a maximum tag id we can live with.
+          One might expect the max tag id for say 4 octets would be 2^(4*8)-1
+          but its not.  We loose some bits, to be able to encode a variable
+          length tag with the long form.  In the long form all the bits from
+          the first octet are wasted and a bit from each octet there after is
+          lost to be able to terminate the tag field.  Hence if we started out
+          with 4 bytes or 32 bits then we're actually using 32-8-3 or 21 of
+          the original bits for storing the value of an id.  This yeilds a max
+          id value of 2^21-1 for 32 bits or 4 octets.
+        </p>
+      </subsection>
+
+      <subsection name="Integer Encoding">
+        <p>
+          Tags are used to match for TLV tuples. Nothing matches faster than an
+          integer using a switch statement.  It makes sense to store and
+          manage raw Tag octets within the bytes of a primitive Java integer
+          rather than within a byte buffer.  This way switch statements can be
+          used to quickly match TLV tuples based on their integer encoding for
+          the first four tag bytes.  Furthermore the stub compiler can
+          prefabricate type safe enums whose values equal the integer encoding
+          of a tag's four octets.  Matching for TLV tuples by tag then is as
+          fast as it can get using this integer encoding.  This btw is the sole
+          reason why we have the abstract class, TagEnum, which extends
+          ValuedEnum.  It's a type safe enumeration for Tag octets encoded as
+          integers.
+        </p>
+
+        <p>
+          Encoding only the four octets of the raw tag, limits the maximum
+          value of the id that a TLV's tag field can represent to 2^21-1.
+          This was the reason for the discussion in the section above.  We
+          simply will not need an id bigger than this.  So we decided to
+          break with the specification and restrict the max value of the tag
+          id down to 2^21-1 rather than leave it unbounded within Snickers.
+          This limitation allows us to represent the first four octets of the
+          tag field as an integer thereby speeding up TLV pattern matching
+          considerably.
+        </p>
+
+        <p>
+          The TagOctetCollector is specifically designed to accumulate the four
+          octets of the Tag used by Snickers.  It stores the first octet in the
+          most significant byte of the int, the second in the next most
+          significant and so on until the last of the four octets is stored
+          within the least significant byte of the integer.  The diagram below
+          shows just how 4 bytes are assembled into the integer:
+        </p>
+
+        <center>
+          <img src="../images/tag-integer-encoding.png"/>
+        </center>
+
+        <p>
+          Note that if their were only 3 tag octets collected, then the bits
+          for Octet 4 would all be zero: bits 0-7 in the integer would be
+          zeros.  Likewise if only one octet were used then bits 23-0 would
+          be zero'd out within the 32-bit integer.
+        </p>
+
+        <p>
+          The integer encoding for tags are not leveraged here at the level of
+          the BERDecoder.  At this low level the decoder does not care about
+          tags other than those in the UNIVERSAL type class reserved for
+          detecting TLV tuple termination sequences within the stream.  Later
+          within the BERDigester where Tag pattern matching is used to make
+          sense of these TLV tuple streams, the integer encoding and the
+          TagEnum are used heavily.  Rather than add more complexity to the
+          BERDecoder we stop here and build upon it by stacking another
+          decoder, the BERDigester on top.  The BERDecoder decodes encoded
+          substrate streams into TLV Tuples and announces their arrival by
+          callbacks which are recieved by the BERDigester.  It is then upto
+          the BERDigester to process these TLV tuples using decoding rules
+          triggered by tag nesting patterns.  How approapriate!  Data encoded
+          using Basic Encoding Rules is decoded using rules that process a TLV
+          tuple stream.  More information regarding the design of the
+          BERDigester can be found <a href="BERDigesterDesign.html">here</a>.
+        </p>
+      </subsection>
+    </section>
+  </body>
+</document>

Added: incubator/directory/snickers/trunk/xdocs/ber-codec/BERDecoderUserGuide.xml
==============================================================================
--- (empty file)
+++ incubator/directory/snickers/trunk/xdocs/ber-codec/BERDecoderUserGuide.xml	Thu May 27 17:41:20 2004
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document>
+  <properties>
+    <author email="akarasulu@apache.org">Alex Karasulu</author>
+    <title>BERDecoder Usage</title>
+  </properties>
+  <body>
+    <section name="Coming soon ... ">
+    </section>
+  </body>
+</document>

Added: incubator/directory/snickers/trunk/xdocs/ber-codec/BEREncoderDesign.xml
==============================================================================
--- (empty file)
+++ incubator/directory/snickers/trunk/xdocs/ber-codec/BEREncoderDesign.xml	Thu May 27 17:41:20 2004
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document>
+  <properties>
+    <author email="akarasulu@apache.org">Alex Karasulu</author>
+    <title>BEREncoder Design</title>
+  </properties>
+  <body>
+    <section name="Coming soon ... ">
+    </section>
+  </body>
+</document>

Added: incubator/directory/snickers/trunk/xdocs/ber-codec/BEREncoderUserGuide.xml
==============================================================================
--- (empty file)
+++ incubator/directory/snickers/trunk/xdocs/ber-codec/BEREncoderUserGuide.xml	Thu May 27 17:41:20 2004
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document>
+  <properties>
+    <author email="akarasulu@apache.org">Alex Karasulu</author>
+    <title>BEREncoder Usage</title>
+  </properties>
+  <body>
+    <section name="Coming soon ... ">
+    </section>
+  </body>
+</document>

Added: incubator/directory/snickers/trunk/xdocs/ber-codec/asn1berinfo.xml
==============================================================================
--- (empty file)
+++ incubator/directory/snickers/trunk/xdocs/ber-codec/asn1berinfo.xml	Thu May 27 17:41:20 2004
@@ -0,0 +1,89 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document>
+  <properties>
+    <author email="akarasulu@apache.org">Alex Karasulu</author>
+    <title>ASN.1 and BER Information</title>
+  </properties>
+  <body>
+
+    <section name="ASN.1 and BER Information">
+      <subsection name="Background">
+        <p>
+          The BER encoding for ASN.1 was defined within ITU specification
+          X.690 along with the Canonical and Distinguished Encoding Rules.
+          A copy of this document along with other useful documents and books
+          on ASN.1 and its encodings can be obtained for free here:
+        </p>
+      </subsection>
+
+      <subsection>
+        <table>
+        <tr><th>Document</th><th>Description</th></tr>
+        <tr>
+          <td>
+            <a href="http://lesbeshy.notlong.com">X.690 (07/02)</a>
+          </td>
+          <td>
+            Information technology - ASN.1 encoding rules: Specification of
+            Basic Encoding Rules (BER), Canonical Encoding Rules (CER) and
+            Distinguished Encoding Rules (DER)
+          </td>
+        </tr>
+
+        <tr>
+          <td>
+            <a href="http://offburie.notlong.com">X.680 (07/02)</a>
+          </td>
+          <td>
+            Information technology - Abstract Syntax Notation One (ASN.1):
+            Specification of basic notation
+          </td>
+        </tr>
+
+        <tr>
+          <td>
+            <a href="http://www.oss.com/asn1/bookreg.html">ASN.1 Complete</a>
+          </td>
+          <td>
+            A verbose yet truely complete book on ASN.1 and various encoding
+            mechanisms.  Easy to read since the author takes almost a
+            conversational tone.
+          </td>
+        </tr>
+
+        <tr>
+          <td>
+            <a href="http://www.oss.com/asn1/bookreg2.html">
+              ASN.1 - Communication between heterogeneous systems</a>
+          </td>
+          <td>
+            Also a very complete book on ASN.1 and various encoding mechanisms.
+            A little more difficult to read but seems to be much better
+            organized and more exacting.  I use both books in conjunction
+            often switching between the two based on my mood :-).  Both are
+            most excellent - thanks to both authors for graciously providing
+            their books online.
+          </td>
+        </tr>
+        </table>
+      </subsection>
+
+      <subsection name="BER Tag, Value, Length Tuples">
+        <p>
+          BER stands for Basic Encoding Rules.  These rules describe how to
+          encode and decode basic data types and composite data structures
+          to and from TLV streams.  A TLV hence may be primitive (atomic) or
+          constructed (nested) where the value component contains other TLVs.
+          The T is for the Tag a numeric type identifier, the L is for the
+          length of the data carried in the third V component, the value.
+          Outside of this very trivial introduction there is very little to
+          the encoding.  Readers should look at the relatively short
+          specification for referrence regarding the exact encoding for
+          various data types using TLV tuples.  The books above also have
+          appendices for the various encodings which are longer than the
+          actual specification yet more explanitory.
+        </p>
+      </subsection>
+    </section>
+  </body>
+</document>
\ No newline at end of file

Modified: incubator/directory/snickers/trunk/xdocs/ber-codec/index.xml
==============================================================================
--- incubator/directory/snickers/trunk/xdocs/ber-codec/index.xml	(original)
+++ incubator/directory/snickers/trunk/xdocs/ber-codec/index.xml	Thu May 27 17:41:20 2004
@@ -2,97 +2,71 @@
 <document>
   <properties>
     <author email="akarasulu@apache.org">Alex Karasulu</author>
-    <title>Snickers ASN.1 BER Library</title>
+    <title>Snickers BER Codec</title>
   </properties>
   <body>
-
     <section name="Introduction">
+      <subsection name="What is it?">
       <p>
-        The Snickers BER codec runtime is based on the stateful codec interfaces
-        defined in the <a href="http://jakarta.apache.org/commons/codec">
-        commons-codec</a> API (hopefully these stateful codec interfaces make 
-        there way there).
-      </p>
-      
-      <p>
-        The stateful codec interfaces are designed for situations where data
-        is encoded or decoded in peices when those fragments of data are made
-        available.  Between actively encoding and decoding data the codec 
-        maintains the state of the operation which occurs in parts.  Such 
-        codecs are ideal for non-blocking stateful protocol servers that 
-        maintain a client socket connection for the life of a session.  The 
-        cost of establishing a client dedicated stateful encoder/decoder pair
-        is offset by the prolonged life of the connection.  Stateful codecs 
-        unlike their blocking stateless counterparts do not need to store the
-        entire encoded image of a request to decode since they do not have to
-        complete a [en]decode operation in one method call.
-      </p>
-    </section>
-    
-    <section name="Usage">
-      <p>
-        For more involved information concerning how the BER codec runtime is
-        designed and operates see the design documentation on the decoder 
-        <a href="./design.html">here</a>.
-      </p>
-      
-      <p>
-        The stateful codec interfaces standardize the use of the BER codec.
-        Below we show how a decoder can be established to be used within a 
-        standard selector based input detection loop.  This use case is by no
-        means specific to the SnickersDecoder, it is a characteristic of 
-        StatefulDecoders in general.  Below we show how the decoder is setup:
-      </p>
-      
-      <source>
-SnickersDecoder decoder = new SnickersDecoder( 512 ) ;
-DecoderCallback cb = new DecoderCallback() {
-  decodeOccurred( StatefulDecoder decoder, Object decoded ) {
-      MyClass.this.process( ( Message ) decoded ) ;
-  }
-};
-decoder.setCallback( cb ) ;
-      </source>
-      
-      <p>
-        The stateful decoder uses a callback to deliver completely decoded 
-        messages.  The <code>Message</code> interface which the <em>decoded</em>
-        object is cast to is the super interface used by the LDAP common message
-        API.  The objects returned are LDAP PDU message envelopes but they can 
-        be any Java stub generated for ASN.1 data types using the Snickers stub
-        compiler.
+        The Snickers BER Codec is a runtime API for encoding and decoding ASN.1
+        data structures using Basic Encoding Rules (BER).  It implements
+        extentions to the <a href="http://jakarta.apache.org/commons/codec">
+        commons-codec</a> API, for building stateful chunking encoder decoder
+        pairs that maintain state between processing calls.
       </p>
-      
+      </subsection>
+
+      <subsection name="Stateful Codecs">
       <p>
-        Within a selector input detection loop which selects channels with
-        available input the decoder can be used to decode encoded BER messages.
-        The example below is a trivialized example of how the decoder can be 
-        used to decoded BER encoded data in parts as a message arrives 
-        fragmented by the tcp/ip stack:
+        More information on these new codec interfaces are availabled on the
+        <a href="../codec-stateful/index.html">stateful codec</a> home page.
+        You might want to read this before you continue since these extentions
+        are the basis to all Snickers encoders and decoders.
       </p>
-      
-      <source>
-while ( true ) {
-  ...
-  SelectionKey key = ( SelectionKey ) list.next() ;
-  if ( key.isReadable() ) {
-    SocketChannel channel = ( SocketChannel ) l_key.channel() ;
-    channel.read( buf ) ;
-    buf.flip() ;
-    decoder.decode( buf ) ;
-  }
-  ...
-}
-      </source>
-      
+      </subsection>
+
+      <subsection name="What is encoded/decoded?">
       <p>
-        As you can see from the code fragment and the API for the decode 
-        operation the decode does not return anything.  In fact the return
-        is void.  Because the callback is used to deliver the finished product
-        when it is ready the decode operation can occur asynchronously in 
-        another thread or stage of a server.  This is what makes 
-        StatefulDecoders and the SnickersDecoder in particular so exciting.
+        The BER codec is protocol or ASN.1 module independent.  The unit of
+        substrate is a BER TLV (Tag, Length, Value) so any BER based protocol
+        can be decoded and encoded by the BER codec to and from TLV tuples.
       </p>
+      </subsection>
+    </section>
+
+    <section name="BER Codec User Guides and Design Documents">
+      <table>
+        <tr>
+          <th>Subject</th>
+          <th>Description</th>
+        </tr>
+
+        <tr>
+          <td><a href="./asn1berinfo.html">ASN.1 and BER Information</a></td>
+          <td>Links to various books and specification on ASN.1 and BER</td>
+        </tr>
+
+        <tr>
+          <td><a href="./BERDecoderUserGuide.html">BER Decoder User Guide</a></td>
+          <td>Describes how to use the BERDecoder to process a TLV stream</td>
+        </tr>
+
+        <tr>
+          <td><a href="./BERDecoderDesign.html">BER Decoder Design</a></td>
+          <td>Explains how and why the BERDecoder was designed</td>
+        </tr>
+
+        <tr>
+          <td><a href="./BEREncoderUserGuide.html">BER Encoder User Guide</a></td>
+          <td>Describes how to use the BEREncoder to generate a TLV stream</td>
+        </tr>
+
+        <tr>
+          <td><a href="./BEREncoderDesign.html">BER Encoder Design</a></td>
+          <td>Explains how and why the BEREncoder was designed</td>
+        </tr>
+
+      </table>
     </section>
   </body>
 </document>

Modified: incubator/directory/snickers/trunk/xdocs/codec-stateful/index.xml
==============================================================================
--- incubator/directory/snickers/trunk/xdocs/codec-stateful/index.xml	(original)
+++ incubator/directory/snickers/trunk/xdocs/codec-stateful/index.xml	Thu May 27 17:41:20 2004
@@ -5,38 +5,119 @@
     <title>Stateful Codecs</title>
   </properties>
   <body>
-    <section name="Introduction">
+    <section>
+    <subsection name="Introduction">
       <p>
-        Stateful encoder and decoder pairs, or codecs for short, maintain state 
-        between respective operations.  By maintaining state in the codec all 
-        the data needed for the operation not be available at one time.  This 
-        leads to codecs with significantly reduced active footprints which are 
-        constant in size regardless of the size of the substrates they operate
-        upon.
+        Codecs are bidirectional data transformations.  The data transformed,
+        often referred to as the substrate, may be [en]coded or decoded hence
+        the word codec.  The word codec also refers to the actual software
+        used to encode and decode data.  We use the term stateful codec for
+        lack of a better description for encoder/decoder pairs possessing
+        certain abilities and exhibiting the following behavoirs:
       </p>
-      
+
+      <ul>
+        <li>the ability to interrupt and resume operation without loosing
+            state</li>
+        <li>the ability to process a substrate in one or more steps operating
+            on small chunks rather than all of it in one large operation</li>
+        <li>free up resources while not actively processing perhaps until more
+            of the substrate is available, or just to multiplex limited
+            resources</li>
+        <li>use a small fixed size chunk buffer rather than a variable sized
+            buffer equal to the entire size of the substrate what ever that
+            may be</li>
+      </ul>
+    </subsection>
+
+    <subsection name="Advantages">
       <p>
-        Furthermore Stateful codecs operate on data as it arrives instead of in
-        one shot.  This way the computational requirements are spread out over
-        time as the substrate is made available.
+        The abilities or behavoirs listed above make stateful codecs ideal for
+        use in resource critical situations.  Servers for example based on
+        codecs may have to perform several thousand concurrent encode/decode
+        operations.  The resources required for such operations, namely threads
+        and memory buffers will be limited.  Most of the time these operations
+        will be waiting for IO to complete so they can free up resources to
+        allow other operations to proceed.  Stateful codecs make this possible
+        and complement servers designed using non-blocking IO constructs.
       </p>
-      
+
       <p>
-        Stateful codecs must be handled with care since they maintain the state
-        of an operation.  Stateful codecs must be dedicated to a serial 
-        stream of substrate objects whatever that may be.  This makes them ideal
-        for streams which have long lifespans however sensitive to the loss of 
-        data which may retard their state and require a reset.
+        Servers cannot afford to allocate variable sized buffers for arriving
+        data.  Allowing variable sized buffers based on incoming data
+        sizes opens the door for DoS attacks where malicious clients can
+        cripple or crash servers, by pumping in massive or never ending
+        data streams.  Stateful codecs enable fixed size processing overheads
+        regardless of the size of the data unit transmitted to the server.
+        Smaller codec footprints lead to smaller server process memory
+        footprints.
       </p>
-    </section>
-    
-    <section name="StatefulDecoder Usage">
+
+      <p>
+        These advantages also make stateful codecs ideal for use in resource
+        limited environments like embedded systems, PDAs or cellular phones
+        which use ASN.1 and one of its encoding schemes to control data
+        transmission.  These systems all run on limited resources where the
+        codec's operational footprint will have dramatic effects on the
+        performance of the device.
+      </p>
+    </subsection>
+
+    <subsection name="How is a stateful codec defined?">
+      <p>
+        There are several ways to skin this cat.  To this day discussions are
+        underway at the ASF to determine the best approach.  Until a consensus
+        is reached we have decided to use an event driven approach where the
+        events are modelled as callbacks.  To better explain the approach we
+        need to discuss it within the context of encoding/decoding.
+      </p>
+
+      <p>
+        Depending on the operation being performed, available chunks of the
+        substrate are are processed using either the <code>encode()</code> or
+        the <code>decode()</code> method.  These methods hence are presumed
+        to process small chunks of the substrate.  The specific codec
+        implementation should know how to maintain state based on the encoding
+        between these calls to process a unit of substrate which likewise is
+        determined by the encoding.  So the encoding (a.k.a. codec) defines
+        what a unit of substrate is as well as any state information required
+        while peice-meal processing the substrate.  Several calls to these two
+        methods may be required to process a unit of the substrate.  When the
+        entire unit has been processed an event is fired.  Again the specific
+        codec detects the compete processing of a unit of substrate so it
+        knows when to fire this event.
+      </p>
+
+      <p>
+        Going back to our approach for defining a stateful codec, we modeled
+        the event as a callback to a specific interface.  For decoders this
+        would be a <code>DecoderCallback.decodeOccurred()</code> and for
+        encoders it would be an <code>EncoderCallback.encodeOccurred()</code>
+        method call.  These interface methods are called when an entire unit
+        of substrate is respectively decoded or encoded.
+      </p>
+
+      <p>
+        This approach also allows for codec chaining in a pipeline where
+        codecs may be stacked on top of one another.  The callback interfaces
+        are used to bridge together codecs by feeding the output of one codec
+        operation into the input of another.  Specific classes have been
+        included in the API to accomodate this usage pattern.
+      </p>
+
+      <center>
+        <img src="../images/all-uml.gif"/>
+      </center>
+
+    </subsection>
+
+    <subsection name="StatefulDecoder Usage">
       <p>
         StatefulDecoders use callbacks to notify the successful decode of a
-        unit of encoded substrate.  Other than this the definition of what a
-        'unit of encoded substrate' is depends on the codec's decoder 
-        implementation.  The definition may be size constrained or be a function
-        of context.
+        unit of encoded substrate.  Other than this, the definition of what a
+        'unit of encoded substrate' is, depends on the codec's decoder
+        implementation.  The definition may be size constrained or be a
+        function of context.
       </p>
       
       <p>
@@ -65,11 +146,11 @@
         are the decoded 'unit of encoded substrate'.  StatefulDecoders are ideal
         for use in high performance servers based on non-blocking IO.   Often
         StatefulDecoders will be used with a Selector in a loop to detect input
-        made available.  As the substrate arrives, it can be fed to the decoder
-        intermittantly.  Finally the callback delivers the decoded units of 
-        encoded substrate.  Below there is a trivialized example of how 
-        a StatefulDecoder can be used to decoded the substrate as it arrives 
-        fragmented by the tcp/ip stack:
+        as it is made available.  As the substrate arrives, it is be fed to
+        the decoder intermittantly.  Finally the callback delivers the decoded
+        units of encoded substrate.  Below there is a trivialized example of
+        how a StatefulDecoder can be used to decoded the substrate as it
+        arrives fragmented by the tcp/ip stack:
       </p>
       
       <source>
@@ -87,15 +168,15 @@
       </source>
       
       <p>
-        As you can see from the code fragment the decode() returns anything with
-        a void return type.  Because the callback is used to deliver the 
-        finished product when it is ready, the decode operation can occur 
-        asynchronously in another thread or stage of a server if so desired.  
-        This is what makes StatefulDecoders so simple yet powerful.
+        As you can see from the code fragment the decode() returns nothing
+        since it has a void return type.  Because the callback is used to
+        deliver the finished product when it is ready, the decode operation
+        can occur asynchronously in another thread or stage of a server if
+        desired.
       </p>
-    </section>
+    </subsection>
     
-    <section name="Strengths and Weaknesses">
+    <subsection name="Strengths and Weaknesses">
       <p>
         As can be seen from the section above and some of the characteristics 
         of StatefulDecoders, they are ideal for building network servers.  These
@@ -113,24 +194,25 @@
       </p>
       
       <p>
-        The cost of creating a decoder for every new connection is usually very
-        minimal however we cannot forsee every possible implementation.  
-        Regardless of the cost associated with dedicating a StatefulDecoder to
-        each new connection, stateful protocol servers will always pay a lesser
-        price.  The longer the life of the connection, the more worth while it
+        The cost of creating a decoder for every new connection is usually
+        very minimal however we cannot forsee every possible implementation.
+        Regardless of the cost associated with dedicating a StatefulDecoder
+        to each new connection, stateful protocol servers will often benefit
+        most, as opposed to a stateless server.  The reasoning is as follows:
+        the longer the life of the connection, the more worth while it
         is to create a StatefulDecoder and thereby have it amortize over the 
         life of the connection.
       </p>
       
       <p>
-        StatefulDecoders are much more complex for implementors.  They are 
-        basically state driven automata which change their state with the
-        arrival of data.  Furthermoe it is very difficult for StatefulDecoders
-        to gracefully recover from corrupt or lost input.
+        The primary drawback is that StatefulDecoders are much more complex to
+        implement.  They are basically state driven automata which change
+        their state with the arrival of data.  Furthermoe it is very difficult
+        for StatefulDecoders to gracefully recover from corrupt or lost input.
       </p>
-    </section>
+    </subsection>
     
-    <section name="StatefulDecoder Chaining/Stacking">
+    <subsection name="StatefulDecoder Chaining/Stacking">
       <p>
         StatefulDecoders can easily be chained or stacked to operate on a 
         substrate stream.  This is achieved by having the callback of one 
@@ -141,13 +223,15 @@
       
       <p>
         Because the occurence of chaining may be common and several folks have
-        already expressed their interest in it we have devised a special 
+        already expressed their interest in it, we have devised a special
         StatefulDecoder implementation called a DecoderStack.  It itself is 
         a decoder however other decoders can be pushed onto it.  When empty
-        without any decoders in the stack it operates in pass-thro mode.  When
-        StatefulDecoders are pushed decode operations invoke a chain of decoders
-        starting with the bottom most in the stack going up to the top.  The
-        final callback is the callback registered with the DecoderStack.
+        without any decoders in the stack it operates in pass-thro mode.  The
+        decode operation is basically the identity transformation.  When
+        StatefulDecoders are pushed, decode operations invoke a chain of
+        decoders starting with the bottom most in the stack going up to the
+        top.  The final callback invoked is the callback registered with the
+        DecoderStack.
       </p>
       
       <p>
@@ -184,9 +268,9 @@
   }
 }
       </source>      
-    </section>
+    </subsection>
     
-    <section name="Recommendations to Implementors">
+    <subsection name="Recommendations to Implementors">
       <p>
         Keep it simple and rely on chaining to divide and concur complex 
         decoders into several trivial decoders.  Besides simple chaining,  
@@ -203,7 +287,13 @@
         5 different possible configurations to the contents of arriving data 
         with respect to the unit of encoded substrate:
       </p>
-      
+
+      <!--
+        todo add illustrations using images here - its not that hard
+        might want to turn this into a table instead of a ul if we decide
+        to do that
+      -->
+
       <ul>
         <li>
           it contains a single complete discrete unit of encoded substrate
@@ -230,6 +320,7 @@
         of the buffer.  Tail fragments end a unit of encoded substrate and are
         found at the front of the buffer.
       </p>
+    </subsection>
     </section>
   </body>
 </document>

Added: incubator/directory/snickers/trunk/xdocs/images/BERDecoder.gif
==============================================================================
Binary file. No diff available.

Added: incubator/directory/snickers/trunk/xdocs/images/BERDecoderCallback.gif
==============================================================================
Binary file. No diff available.

Added: incubator/directory/snickers/trunk/xdocs/images/PrimitiveTupleDecoder-uml.gif
==============================================================================
Binary file. No diff available.

Added: incubator/directory/snickers/trunk/xdocs/images/all-uml.gif
==============================================================================
Binary file. No diff available.

Added: incubator/directory/snickers/trunk/xdocs/images/state-helper-classes.gif
==============================================================================
Binary file. No diff available.

Added: incubator/directory/snickers/trunk/xdocs/images/tag-integer-encoding.png
==============================================================================
Binary file. No diff available.