You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@directory.apache.org by ak...@apache.org on 2004/05/28 02:41:21 UTC
svn commit: rev 20523 - in incubator/directory/snickers/trunk: . ber-codec ber-codec/src/java/org/apache/snickers/ber ber-codec/src/test/org/apache/snickers/ber codec-stateful xdocs/ber-codec xdocs/codec-stateful xdocs/images
Author: akarasulu
Date: Thu May 27 17:41:20 2004
New Revision: 20523
Added:
incubator/directory/snickers/trunk/xdocs/ber-codec/BERDecoderDesign.xml
incubator/directory/snickers/trunk/xdocs/ber-codec/BERDecoderUserGuide.xml
incubator/directory/snickers/trunk/xdocs/ber-codec/BEREncoderDesign.xml
incubator/directory/snickers/trunk/xdocs/ber-codec/BEREncoderUserGuide.xml
incubator/directory/snickers/trunk/xdocs/ber-codec/asn1berinfo.xml
incubator/directory/snickers/trunk/xdocs/images/BERDecoder.gif (contents, props changed)
incubator/directory/snickers/trunk/xdocs/images/BERDecoderCallback.gif (contents, props changed)
incubator/directory/snickers/trunk/xdocs/images/PrimitiveTupleDecoder-uml.gif (contents, props changed)
incubator/directory/snickers/trunk/xdocs/images/all-uml.gif (contents, props changed)
incubator/directory/snickers/trunk/xdocs/images/state-helper-classes.gif (contents, props changed)
incubator/directory/snickers/trunk/xdocs/images/tag-integer-encoding.png (contents, props changed)
Modified:
incubator/directory/snickers/trunk/ber-codec/ (props changed)
incubator/directory/snickers/trunk/ber-codec/src/java/org/apache/snickers/ber/Tag.java
incubator/directory/snickers/trunk/ber-codec/src/java/org/apache/snickers/ber/Tuple.java
incubator/directory/snickers/trunk/ber-codec/src/test/org/apache/snickers/ber/TagTest.java
incubator/directory/snickers/trunk/codec-stateful/ (props changed)
incubator/directory/snickers/trunk/maven.xml
incubator/directory/snickers/trunk/project.properties
incubator/directory/snickers/trunk/xdocs/ber-codec/index.xml
incubator/directory/snickers/trunk/xdocs/codec-stateful/index.xml
Log:
Commit changes ...
o fixed a bug in the Tag class
o made respective changes to the TagTest class
o added a few new documents which are empty
o completed the asn1berinfo.xml xdoc
o completed the BERDecoderDesign.xml xdoc
o added a bunch images for the new xdocs
o fixed javadocs and constant visibility in the Tag and Tuple classes
Modified: incubator/directory/snickers/trunk/ber-codec/src/java/org/apache/snickers/ber/Tag.java
==============================================================================
--- incubator/directory/snickers/trunk/ber-codec/src/java/org/apache/snickers/ber/Tag.java (original)
+++ incubator/directory/snickers/trunk/ber-codec/src/java/org/apache/snickers/ber/Tag.java Thu May 27 17:41:20 2004
@@ -32,9 +32,11 @@
/** tag mask for the primitive/constructed bit - 0010 0000 - 0x20 */
private static final int PRIMITIVE_MASK = 0x20 ;
/** tag mask for the short tag format - 0001 1111 - 0x1F */
- public static final int SHORT_MASK = 0x1F ;
+ static final int SHORT_MASK = 0x1F ;
/** tag mask for the long tag format - 0111 1111 - 0x7F */
- public static final int LONG_MASK = 0x7F ;
+ static final int LONG_MASK = 0x7F ;
+ /** tag flag indicating the use of the long tag encoding form */
+ private static final int LONG_FLAG = 0x80 ;
/** tag id */
private int id = 0 ;
@@ -44,7 +46,7 @@
private boolean isFixated = false ;
/** the type class of this tag */
private TypeClass typeClass = TypeClass.APPLICATION ;
- /** a byte buffer used to collect the arriving tag octets */
+ /** buffer backed by a Java int to collect the arriving tag octets */
private final TagOctetCollector buf = new TagOctetCollector() ;
@@ -72,7 +74,7 @@
void fixate() throws DecoderException
{
isFixated = true ;
- id = Tag.getTagId( buf ) ;
+ id = getTagId( buf ) ;
isPrimitive = isPrimitive( buf.get( 0 ) ) ;
typeClass = TypeClass.getTypeClass( buf.get( 0 ) ) ;
}
@@ -95,8 +97,8 @@
if ( buf.size() == 1 )
{
- // its the short form so we just fixate
- if ( ( 0x1F & octet ) != 0x1F )
+ // if its the short form so we just fixate now!
+ if ( ( SHORT_MASK & octet ) != SHORT_MASK )
{
fixate() ;
}
@@ -108,7 +110,7 @@
* significant bit to flag the end of the train of octets for the
* tag id.
*/
- else if ( ( octet & 0x80 ) == 0 )
+ else if ( ( octet & LONG_FLAG ) == 0 )
{
fixate() ;
}
@@ -155,7 +157,7 @@
*/
public int getRawTag()
{
- return buf.getIntValue() ;
+ return buf.getIntValue() ;
}
@@ -194,7 +196,7 @@
/**
- * Gets the tag id of a TLV from the tag octets.
+ * Gets the tag id of a TLV from tag octets.
*
* @param octets the set of octets needed to determine the tag value
* (a.k.a identifier octets)
@@ -205,12 +207,12 @@
public final static int getTagId( byte[] octets )
throws DecoderException
{
- if ( octets.length > 6 )
+ if ( octets.length > 4 )
{
/*
* If this exception is ever thrown which is highly unlikely, then
* we need to switch to another data type to return because after
- * 5 bytes the int can no longer hold the number.
+ * 4 bytes the int can no longer hold the number.
*/
throw new DecoderException( "Tag number is too large." ) ;
}
Modified: incubator/directory/snickers/trunk/ber-codec/src/java/org/apache/snickers/ber/Tuple.java
==============================================================================
--- incubator/directory/snickers/trunk/ber-codec/src/java/org/apache/snickers/ber/Tuple.java (original)
+++ incubator/directory/snickers/trunk/ber-codec/src/java/org/apache/snickers/ber/Tuple.java Thu May 27 17:41:20 2004
@@ -178,7 +178,7 @@
/**
- * Gets the tag id (T-part) for this TLV Tuple.
+ * Gets the tag id for this TLV Tuple.
*
* @return the tag id
*/
Modified: incubator/directory/snickers/trunk/ber-codec/src/test/org/apache/snickers/ber/TagTest.java
==============================================================================
--- incubator/directory/snickers/trunk/ber-codec/src/test/org/apache/snickers/ber/TagTest.java (original)
+++ incubator/directory/snickers/trunk/ber-codec/src/test/org/apache/snickers/ber/TagTest.java Thu May 27 17:41:20 2004
@@ -21,6 +21,8 @@
import junit.framework.TestCase ;
+import java.nio.BufferOverflowException;
+
/**
* Tests the BER utility functions.
@@ -31,15 +33,15 @@
*/
public class TagTest extends TestCase
{
- private static final int BIT_7 = 0x80 ;
- private static final int BIT_6 = 0x40 ;
- private static final int BIT_4 = 0x10 ;
- private static final int BIT_3 = 0x08 ;
- private static final int BIT_2 = 0x04 ;
- private static final int BIT_1 = 0x02 ;
private static final int BIT_0 = 0x01 ;
+ private static final int BIT_1 = 0x02 ;
+ private static final int BIT_2 = 0x04 ;
+ private static final int BIT_3 = 0x08 ;
+ private static final int BIT_4 = 0x10 ;
private static final int BIT_5 = 0x20 ;
-
+ private static final int BIT_6 = 0x40 ;
+ private static final int BIT_7 = 0x80 ;
+
public static void main( String[] args )
{
@@ -77,7 +79,7 @@
public void getTypeClass()
{
- assertEquals( TypeClass.UNIVERSAL, TypeClass.getTypeClass( (byte) 0 ) );
+ assertEquals( TypeClass.UNIVERSAL, TypeClass.getTypeClass( 0 ) ) ;
}
@@ -173,14 +175,12 @@
}
}
- octets = new byte[6] ;
+ octets = new byte[4] ;
octets[0] = 31 ;
octets[1] = 0 ; // shift 0
octets[2] = 0 ; // shift 7
octets[3] = 0 ; // shift 14
- octets[4] = 0 ; // shift 21
- octets[5] = 0 ; // shift 28
-
+
for ( int ii = 16384 ; ii < 2100000 ; ii++ )
{
octets[1] = ( byte ) ( ii & Tag.LONG_MASK ) ;
@@ -199,7 +199,18 @@
try
{
- Tag.getTagId( new byte[56] ) ;
+ Tag.getTagId( new byte[5] ) ;
+ fail( "should fail before getting here" ) ;
+ }
+ catch ( Throwable t )
+ {
+ assertNotNull( t ) ;
+ }
+
+
+ try
+ {
+ Tag.getTagId( new byte[12] ) ;
fail( "should fail before getting here" ) ;
}
catch ( Throwable t )
@@ -209,6 +220,38 @@
}
+ public void testTagLimits() throws Exception
+ {
+ byte[] bites = { (byte) 0xff, (byte) 0xff, (byte) 0x8f, (byte) 0x0f } ;
+
+ Tag tag = new Tag() ;
+ tag.add( bites[0] ) ;
+ tag.add( bites[1] ) ;
+ tag.add( bites[2] ) ;
+ tag.add( bites[3] ) ;
+
+ byte[] octets = tag.getOctets() ;
+ assertTrue( ArrayUtils.isEquals( bites, octets ) ) ;
+
+ byte[] tooMany = { (byte) 0xff, (byte) 0xff, (byte) 0x8f, (byte) 0x8f, (byte) 0x0f } ;
+
+ tag = new Tag() ;
+ tag.add( tooMany[0] ) ;
+ tag.add( tooMany[1] ) ;
+ tag.add( tooMany[2] ) ;
+ tag.add( tooMany[3] ) ;
+
+ try
+ {
+ tag.add( tooMany[4] ) ;
+ fail( "should never get here due to exception" ) ;
+ }
+ catch( BufferOverflowException e )
+ {
+ }
+ }
+
+
public void testGetOctets() throws Exception
{
byte[] bites = { (byte) 0xff, (byte) 0xff, (byte) 0x0f } ;
Modified: incubator/directory/snickers/trunk/maven.xml
==============================================================================
--- incubator/directory/snickers/trunk/maven.xml (original)
+++ incubator/directory/snickers/trunk/maven.xml Thu May 27 17:41:20 2004
@@ -56,9 +56,9 @@
banner="site" ignoreFailures="false"/>
</goal>
- <goal name="clean-all" description="Clean all sandbox projects">
+ <goal name="cleanall" description="Clean all sandbox projects">
<maven:reactor basedir="${basedir}" postProcessing="true"
- includes="**/project.xml" excludes="./project.xml" goals="cleanall"
+ includes="**/project.xml" excludes="./project.xml" goals="clean"
banner="Clean All"
ignoreFailures="true"/>
<attainGoal name="clean"/>
Modified: incubator/directory/snickers/trunk/project.properties
==============================================================================
--- incubator/directory/snickers/trunk/project.properties (original)
+++ incubator/directory/snickers/trunk/project.properties Thu May 27 17:41:20 2004
@@ -1,18 +1,18 @@
-maven.javadoc.private=true
-maven.javadoc.customtags=tag1 tag2
-
-tag1.name=todo
-tag1.description=To Do:
-tag1.enabled=true
-tag1.scope=all
-
-tag2.name=task
-tag2.description=Task:
-tag2.enabled=false
-tag2.scope=all
-
-maven.license.licenseFile=../../../LICENSE.txt
-#maven.clover.instrument.tests=true
+maven.javadoc.private=true
+maven.javadoc.customtags=tag1 tag2
+
+tag1.name=todo
+tag1.description=To Do:
+tag1.enabled=true
+tag1.scope=all
+
+tag2.name=task
+tag2.description=Task:
+tag2.enabled=false
+tag2.scope=all
+
+maven.license.licenseFile=../../../LICENSE.txt
+#maven.clover.instrument.tests=true
maven.junit.fork=true
maven.changelog.factory=org.apache.maven.svnlib.SvnChangeLogFactory
maven.license.licenseFile=../../../LICENSE.txt
@@ -20,7 +20,7 @@
maven.ui.banner.background=#FFFFFF
maven.xdoc.includeProjectDocumentation=no
maven.xdoc.poweredby.image=
-maven.xdoc.jsl = file:/${basedir}/../../sitedocs/trunk/src/etc/site.jsl
+#maven.xdoc.jsl = file:/${basedir}/../../sitedocs/trunk/src/etc/site.jsl
#
# remote repository properties
Added: incubator/directory/snickers/trunk/xdocs/ber-codec/BERDecoderDesign.xml
==============================================================================
--- (empty file)
+++ incubator/directory/snickers/trunk/xdocs/ber-codec/BERDecoderDesign.xml Thu May 27 17:41:20 2004
@@ -0,0 +1,552 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document>
+ <properties>
+ <author email="akarasulu@apache.org">Alex Karasulu</author>
+ <title>BER Decoder Design</title>
+ </properties>
+
+ <body>
+ <section name="Factors Driving the Design">
+ <p>
+ Several factors drove the design. Some of these were covered in
+ advance in the section on stateful codecs. While implementing these
+ stateful decoders we still must adhere to some rules to not undermine
+ the purpose for implementing stateful decoders in the first place.
+ Other design decisions come from the way the BER encoding is devised.
+ These driving factors are all covered here in this document to reveal
+ the present design while justifing it and the decisions that forged
+ it.
+ </p>
+
+ <subsection name="TLV Nesting">
+ <p>
+ A constructed TLV tuple contains other TLV tuples nested within the
+ Value field, and so on, recursively. The nesting depth is usually
+ indefinate and dependent on the data structures being transformed.
+ This fact drastically affects the way the decoder is designed and
+ hence operates.
+ </p>
+
+ <p>
+ It's always good form to tackle the basis case[s] of any recursive
+ problem. The simplest basis case is the processing of a simple
+ primitive TLV tuple. So for the time being presume we are simply
+ implementing a primitive BER TLV decoder without any nesting.
+ </p>
+
+ <p>
+ While decoding a primitive tuple the decoder must maintain state.
+ State for this simple decoder is an indicator representing whether
+ the Tag, Length, or Value was being processed at the end of the last
+ chunk of substrate data. This is required along with any data
+ already captured for these fields to continue processing where the
+ last chunk left off. Value field data actually is not "accumulated"
+ to maintain state, but for the time being we'll ignore that fact.
+ </p>
+
+ <p>
+ The primitive TLV tuple decoder is easily designed. Build three
+ stateful sub-decoders for each field, the Tag, Length, and the Value
+ fields. A top level decoder deligates the processing state of the
+ fields to each sub-decoder and switches sub-decoders when the
+ indicator changes from Tag, to Length, to Value then back to Tag
+ and so on. When a Value completes processing and before another Tag
+ is read the decoder triggers a callback event. Here's what the
+ hypothetical primitive TLV tuple decoder would look like:
+ </p>
+
+ <center>
+ <img src="../images/PrimitiveTupleDecoder-uml.gif"/>
+ </center>
+
+ <p>
+ Now let's try to figure out how to handle constructed TLV tuples
+ which recursively nest other tuples indefinately. State now
+ is more that just where you left off in the current tuple being
+ processed. When processing an inner tuple the decoder must also know
+ where it left off in the outter tuple to resume processing. More
+ accurately the decoder must maintain the state of every parent and
+ ancestor tuple of the current tuple being processed in the same
+ manner the TLV tuple decoder did for primitive TLV tuples. Hence
+ the state of the decoder is a stack of all ancestor TLV tuple states
+ as well as the state of the tuple currently being processed.
+ </p>
+
+ <p>
+ While processing input for a nested TLV tuple the state of all
+ tuple ancestors must also be updated with the same data so the
+ decoder can determine when their processing is about to complete.
+ This way the decoder does not read TLV tuples adjacent to the
+ constructed TLV tuple, incorrectly presuming that they are part
+ of the constructed TLV tuple.
+ </p>
+
+ <p>
+ When the last inner tuple in a constructed TLV tuple completes, it
+ triggers a callback for itself, then the stack is popped and another
+ callback event is triggered for the completion of the constructed
+ TLV tuple.
+ </p>
+
+ <p>
+ In conclusion, the state of a BER decoder, used to process both
+ primitive and constructed TLV tuples, must take into accout the
+ the processing state of every tuple ancestor in the stack of nested
+ tuples. Otherwise state cannot be maintain. Just how this is
+ efficiently managed is the topic of the next few subsections.
+ </p>
+ </subsection>
+
+ <subsection name="Value Processing">
+ <p>
+ If the decoder accumulates encountered Value field octets to maintain
+ state then we have a problem. First off the size of the Value could
+ be massive and often varies. We want to maintain a fixed maximum
+ memory footprint to the decoder. This goes out the window if Value
+ field content is accumulated within buffers to maintain tuple
+ processing state. Furthermore, with nesting, every ancestor tuple in
+ the nesting stack would maintain a copy of the topmost tuple's Value
+ field when that tuple is about to complete processing. The number of
+ copies is a function of the nesting depth, so the deeper the nesting,
+ the more memory is wastefully consumed. This is totally unacceptable
+ and it undermines the reason for devising stateful codecs in the
+ first place.
+ </p>
+
+ <p>
+ To avoid this problem we must not accumulate Value field octets
+ (bytes) while maintaining state. Unlike the Value field, the other
+ Tag and Length fields are limited and often account for only a few
+ bytes within TLV tuples. To maintain state however the decoder still
+ has to perform some accounting to determine when outter tuples in the
+ nesting stack complete processing. The decoder maintains state by
+ using Value byte counters processed rather than using an accumulator
+ to store the Value bytes. This way the decoder can compare a tuple's
+ Length field with it's Value byte counter to determine if processing
+ is complete.
+ </p>
+ </subsection>
+
+ <subsection name="Extending DecoderCallback">
+ <p>
+ The next question is how the decoder propagates TLV tuple Values to
+ the target receiving the TLV tuple stream? If the standard
+ <code>DecoderCallback.decodeOccurred()</code> method is designed to
+ be called upon TLV processing completion how do we avoid collecting
+ the Value while getting the Value bytes somehow to an decoder user
+ via callbacks?
+ </p>
+
+ <p>
+ The answer is to use yet another callback. The DecoderCallback
+ interface is extended by actually adding three extra callback
+ methods: one for each field. The BERDecoderCallback interface
+ extends the DecoderCallback interface and adds the following
+ methods:
+ </p>
+
+ <ul>
+ <li>void tagDecoded( Tuple tlv )</li>
+ <li>void lengthDecoded( Tuple tlv )</li>
+ <li>void partialValueDecoded( Tuple tlv )</li>
+ </ul>
+
+ <p>
+ The following diagram shows the decoder interfaces and a do nothing
+ adapter implementation for convenience:
+ </p>
+
+ <center>
+ <img src="../images/BERDecoderCallback.gif"/>
+ </center>
+
+ <p>
+ For a single TLV all methods except for the partialValueDecoded()
+ method is invoked at most once. As its name suggests peices of
+ Value are delivered encapsulated by the Tuple argument rather than
+ the entire Value field. Hence the method can be called zero or more
+ times while processing a TLV.
+ </p>
+
+ <p>
+ The extended decoder callback interface allows the decoder to chunk
+ Value fields and hence maintain a fixed maximum footprint. The
+ partialValueDecoded callback is a bit misleading however. It really
+ does not decode any Value bytes based on the Tag of the TLV. It
+ simply hands off the raw value bytes to the callback, this part of
+ the decode is left to higher level decoders built on top of the
+ BERDecoder. However all primitive type decode operations are
+ provided by the BER codec.
+ </p>
+ </subsection>
+
+ <subsection name="Constructed Values">
+ <p>
+ The values of constructed TLV tuples are other tuples. Their Values
+ are already decoded by the BER decoder which triggers TLV events for
+ the nested TLV tuples. Calls to the partialValueDecoded() method
+ hence are never made. Furthermore the decoder transits from the
+ Length state of processing to the Tag state just after completing
+ the decode of a constructed tuple Length field. This is because the
+ next tuple to process is a nested tuple with its Tag following the
+ constructed tuple's Length field.
+ </p>
+
+ <p>
+ Constructed TLV tuples never have partialValueDecoded() called. Only
+ primitive TLV tuples have Value octets delivered to this callback.
+ This makes state handling withing the decoder a bit tricky but the
+ complexity for the rewards wreaked is well worth it.
+ </p>
+ </subsection>
+ </section>
+
+ <section name="State Management">
+ <p>
+ There are two parts to managing state for the BERDecoder: stack
+ based ancestor Value accounting state, and current tuple processing
+ state.
+ </p>
+
+ <subsection name="Current TLV State Management">
+ <p>
+ While processing the tuple in scope a type safe enumeration is used
+ to track the current tuple processing state which could be in either
+ Tag, Length, or Value processing states. Subordinate decoders are
+ used to decode the Tag, and Length fields. The Value field unlike
+ these fields does not have a corresponding decoder: it does not need
+ one since primitives TLV Values are not decoded but returned raw.
+ The sub-decoders for Tag and Length manage the accumulation of field
+ bytes between chunked decode operations. The following diagram
+ displays the helper classes used to manage the current TLV processing
+ state:
+ </p>
+
+ <center>
+ <img src="../images/state-helper-classes.gif"/>
+ </center>
+
+ <table>
+ <tr><th>Color</th><th>Group</th></tr>
+ <tr><td>Red</td><td>Generic Helper Classes</td></tr>
+ <tr><td>Yellow</td><td>Tag Specific Classes</td></tr>
+ <tr><td>Purple</td><td>Length Specific Classes</td></tr>
+ </table>
+
+ <p>
+ The tag specific classes include the Tag and TagDecoder classes.
+ The Tag class handles the extraction of various Tag embedded fields
+ like the constructed bit and the tag's type class. It also collects
+ tag octets up to 4 octets only using a special TagOctetCollector.
+ The TagDecoder is just a stateful decoder implementation wrapper
+ using Tag methods.
+ </p>
+
+ <p>
+ The TypeClass class is a type safe enumeration of the four type
+ classes of Tags:
+ </p>
+
+ <ul>
+ <li>UNIVERSAL</li>
+ <li>APPLICATION</li>
+ <li>CONTEXT_SPECIFIC</li>
+ <li>PRIVATE</li>
+ </ul>
+
+ <p>
+ Once the Tag accumulator collects all tag octets it determines and
+ sets the TypeClass corresponding to the tag.
+ </p>
+
+ <p>
+ The TagEnum class is an abstract base class for type safe tag
+ enumerations. This special type safe enumeration associates a tag
+ label with two integers: the tag value and the tag id. The tag value
+ is an integer representation of the tag whereas the id is just the
+ just the id field of the tag. This btw is the main reason why the
+ TagCollector only accepts four bytes for building the tag: an integer
+ is essentially used as the backing store for the tag data. The
+ reasons for this are explained within the tag handling section to
+ follow.
+ </p>
+
+ <p>
+ The Length and LengthDecoder operate very much in the same fashion
+ as do the Tag and TagDecoder. The same pattern is applied to both
+ pairs of classes. The primary difference is the use of a ByteBuffer
+ within the Length class rather than a custom data structure like the
+ TagOctetCollector to accumulate Length bytes (octets). The main
+ reason for this is that a limit of 4 tag octets have been imposed on
+ the decoder which in fact is contrary to the BER specification.
+ Length values well above the 4 byte integer are surely possible for
+ TLV values although improbable.
+ </p>
+
+ <p>
+ The BERDecoderState class is another type safe enumeration with the
+ following values: TAG, LENGTH and VALUE. It obviously represents the
+ processing state of the TLV tuple currently in scope.
+ </p>
+ </subsection>
+
+ <subsection name="Stack State Management">
+ <p>
+ The BERDecoder UML is displayed below to show some of the memebers
+ and operations available. Pay special attention to the tlvStack
+ member and the getTupleStack() package friendly Stack accessor used
+ for stack state management:
+ </p>
+
+ <center>
+ <img src="../images/BERDecoder.gif"/>
+ </center>
+
+ <p>
+ The tlvStack is a Stack of Tuple instances. The last subsection
+ contains a UML diagram with the Tuple class. Tuple objects are the
+ objects handed to the the decodeOccurred() method of the callback.
+ They basically encapsulate a bunch of information associated with a
+ TLV tuple in one object. This includes accounting information used
+ to determine the processing state of constructed TLVs. The Stack of
+ Tuples hence stores the state information associated with ancestor
+ Tuples currently out of scope.
+ </p>
+
+ <p>
+ With every chunk of substrate processed for the tuple currently in
+ scope, the accounting information in every Tuple of the Stack is
+ updated. Again, this tracks how much of the anscestor's Value field
+ has been processed. Specifically the length and index fields of
+ Tuple objects are used to determine how much of the TLV has been
+ read.
+ </p>
+ </subsection>
+ </section>
+
+ <section name="Tuple Recycling">
+
+ <subsection name="TLV Tuple Density">
+ <p>
+ BER TLV streams will contain varying densities of TLV tuples. The
+ density of the tuples depends on the nature of the content. Streams
+ with many small primitive types crammed together will generate TLV
+ tuples very rapidly while processing the encoded substrate. Every
+ few, even couple bytes might produce a new tuple.
+ </p>
+
+ <p>
+ If we instantiated a new Tuple instance and populated it for every
+ few bytes in the stream, then performance will degrade significantly
+ while processing streams with high TLV tuple densities. Futhermore
+ rapid object creation rates would seriously tax the garbage
+ collector. To avoid these negative effects of instantiating new TLV
+ tuples we need to reuse the same Tuple allowing interested parties
+ to either clone or copy the contained information while processing
+ the tuple. More often than not, most tuples will be ignored. It
+ would be wasteful to create a new Tuple object for every TLV tuple
+ encountered when some or most might potentially be ignored.
+ </p>
+ </subsection>
+
+ <subsection name="Problem With Recycling a Tuple">
+ <p>
+ If we avoid instantiating new TLV Tuples and resuse the same Tuple
+ object, we run into a problem. First we'll loose data when we
+ attempt to push the tuple onto the tlvStack when the next TLV is
+ processed.
+ </p>
+
+ <p>
+ One solution to this problem is to clone constructed Tuples before
+ pushing the tuple onto the tlvStack. Hence only primitives would
+ reuse the same Tuple. This works well because primitive tuple data
+ does not need to be maintained past its scope. If the data needs to
+ be copied, it can be copied by the application using the decoder.
+ This makes sense since the application determines which Tuple values
+ to store or ignore.
+ </p>
+
+ <p>
+ This solves performance bottlenecks with substrates that are dense
+ with primitive tuples. However the problem will re-emerge if the
+ substrate is dense with deeply nested primitive tuples. If every
+ primitive is embedded deep within its own cavern of nested TLV
+ tuples then we'll be closer to instantiating a Tuple object for
+ almost every TLV encountered. The perfect substrate under this
+ scheme, of course, would be a single primitive element but beyond
+ that it would be flat nesting patterns where as many primitives TLV
+ tuples are stuffed into every contstructed TLV tuple as possible.
+ </p>
+
+ <p>
+ The deeply embedded high density of constructed TLV tuples is highly
+ unlikely although possible for recursive ASN.1 definitions.
+ Regardless of these situations producing a high density of
+ constructed TLV tuples, the nesting structures will often share the
+ same parents so the TLV tuple to Tuple object instantiation ration
+ would rarely approach 1:1.
+ </p>
+
+ <p>
+ Over all we cannot determine the ratio of constructed to primitive
+ TLV tuples encountered within a substrate. However one would like
+ to believe that complex structures do not predominate, and that
+ protocol designers opt for simpler structures whenever possible.
+ With this sensible hypothesis reuse of primitive TLV tuples and the
+ cloning of constructed TLV tuples seems like a viable strategy for
+ managing excessive object instantiations.
+ </p>
+ </subsection>
+
+ </section>
+
+ <section name="Integer Representation For Tags">
+ <p>
+ According to the BER encoding specification, X.690, a Tag id can be
+ any arbitrary value: there is no limitation to the size of an id.
+ In practice ids are claimed incrementally by ASN.1 modules from the
+ CONTEXT_SPECIFIC and APPLICATION type classes. These values for
+ any reasonable protocol are far less than 100 ids. Experts like
+ Larmouth claim they have never seen Tag ids larger than a thousand.
+ So we don't bother representing Tags within a buffer for the full
+ expressivity of the specification when we know of reasonable soft
+ limits to the Tag id.
+ </p>
+
+ <subsection name="Four Tag Octets Are Enough">
+ <p>
+ In most cases, one or two octets suffice for encoding a tag and its
+ identifier. In some cases three bytes may rarely be used. It's
+ highly improbable that we'll ever see four or more bytes to be used
+ to encode a tag: even the experts have never seen this before.
+ The only way I can conceive of this is if computers begin devising
+ or generating protocols :-).
+ </p>
+
+ <p>
+ According to the specification the long form can encode the
+ following maximum identifier sizes with various octet lengths:
+ </p>
+
+ <table>
+ <tr>
+ <th>Octets</th>
+ <th>Maximum Tag Id</th>
+ <th>Calculation</th>
+ </tr>
+
+ <tr>
+ <td>1</td>
+ <td>30</td>
+ <td>2^5-1</td>
+ </tr>
+
+ <tr>
+ <td>2</td>
+ <td>127</td>
+ <td>2^7-1</td>
+ </tr>
+
+ <tr>
+ <td>3</td>
+ <td>16,383</td>
+ <td>2^14-1</td>
+ </tr>
+
+ <tr>
+ <td>4</td>
+ <td>2,097,151</td>
+ <td>2^21-1</td>
+ </tr>
+
+ <tr>
+ <td>5</td>
+ <td>268,435,455</td>
+ <td>2^28-1</td>
+ </tr>
+ </table>
+
+ <p>
+ As we can see 3-4 octets encode a maximum tag id we can live with.
+ One might expect the max tag id for say 4 octets would be 2^(4*8)-1
+ but its not. We loose some bits, to be able to encode a variable
+ length tag with the long form. In the long form all the bits from
+ the first octet are wasted and a bit from each octet there after is
+ lost to be able to terminate the tag field. Hence if we started out
+ with 4 bytes or 32 bits then we're actually using 32-8-3 or 21 of
+ the original bits for storing the value of an id. This yeilds a max
+ id value of 2^21-1 for 32 bits or 4 octets.
+ </p>
+ </subsection>
+
+ <subsection name="Integer Encoding">
+ <p>
+ Tags are used to match for TLV tuples. Nothing matches faster than an
+ integer using a switch statement. It makes sense to store and
+ manage raw Tag octets within the bytes of a primitive Java integer
+ rather than within a byte buffer. This way switch statements can be
+ used to quickly match TLV tuples based on their integer encoding for
+ the first four tag bytes. Furthermore the stub compiler can
+ prefabricate type safe enums whose values equal the integer encoding
+ of a tag's four octets. Matching for TLV tuples by tag then is as
+ fast as it can get using this integer encoding. This btw is the sole
+ reason why we have the abstract class, TagEnum, which extends
+ ValuedEnum. It's a type safe enumeration for Tag octets encoded as
+ integers.
+ </p>
+
+ <p>
+ Encoding only the four octets of the raw tag, limits the maximum
+ value of the id that a TLV's tag field can represent to 2^21-1.
+ This was the reason for the discussion in the section above. We
+ simply will not need an id bigger than this. So we decided to
+ break with the specification and restrict the max value of the tag
+ id down to 2^21-1 rather than leave it unbounded within Snickers.
+ This limitation allows us to represent the first four octets of the
+ tag field as an integer thereby speeding up TLV pattern matching
+ considerably.
+ </p>
+
+ <p>
+ The TagOctetCollector is specifically designed to accumulate the four
+ octets of the Tag used by Snickers. It stores the first octet in the
+ most significant byte of the int, the second in the next most
+ significant and so on until the last of the four octets is stored
+ within the least significant byte of the integer. The diagram below
+ shows just how 4 bytes are assembled into the integer:
+ </p>
+
+ <center>
+ <img src="../images/tag-integer-encoding.png"/>
+ </center>
+
+ <p>
+ Note that if their were only 3 tag octets collected, then the bits
+ for Octet 4 would all be zero: bits 0-7 in the integer would be
+ zeros. Likewise if only one octet were used then bits 23-0 would
+ be zero'd out within the 32-bit integer.
+ </p>
+
+ <p>
+ The integer encoding for tags are not leveraged here at the level of
+ the BERDecoder. At this low level the decoder does not care about
+ tags other than those in the UNIVERSAL type class reserved for
+ detecting TLV tuple termination sequences within the stream. Later
+ within the BERDigester where Tag pattern matching is used to make
+ sense of these TLV tuple streams, the integer encoding and the
+ TagEnum are used heavily. Rather than add more complexity to the
+ BERDecoder we stop here and build upon it by stacking another
+ decoder, the BERDigester on top. The BERDecoder decodes encoded
+ substrate streams into TLV Tuples and announces their arrival by
+ callbacks which are recieved by the BERDigester. It is then upto
+ the BERDigester to process these TLV tuples using decoding rules
+ triggered by tag nesting patterns. How approapriate! Data encoded
+ using Basic Encoding Rules is decoded using rules that process a TLV
+ tuple stream. More information regarding the design of the
+ BERDigester can be found <a href="BERDigesterDesign.html">here</a>.
+ </p>
+ </subsection>
+ </section>
+ </body>
+</document>
Added: incubator/directory/snickers/trunk/xdocs/ber-codec/BERDecoderUserGuide.xml
==============================================================================
--- (empty file)
+++ incubator/directory/snickers/trunk/xdocs/ber-codec/BERDecoderUserGuide.xml Thu May 27 17:41:20 2004
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document>
+ <properties>
+ <author email="akarasulu@apache.org">Alex Karasulu</author>
+ <title>BERDecoder Usage</title>
+ </properties>
+ <body>
+ <section name="Coming soon ... ">
+ </section>
+ </body>
+</document>
Added: incubator/directory/snickers/trunk/xdocs/ber-codec/BEREncoderDesign.xml
==============================================================================
--- (empty file)
+++ incubator/directory/snickers/trunk/xdocs/ber-codec/BEREncoderDesign.xml Thu May 27 17:41:20 2004
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document>
+ <properties>
+ <author email="akarasulu@apache.org">Alex Karasulu</author>
+ <title>BEREncoder Design</title>
+ </properties>
+ <body>
+ <section name="Coming soon ... ">
+ </section>
+ </body>
+</document>
Added: incubator/directory/snickers/trunk/xdocs/ber-codec/BEREncoderUserGuide.xml
==============================================================================
--- (empty file)
+++ incubator/directory/snickers/trunk/xdocs/ber-codec/BEREncoderUserGuide.xml Thu May 27 17:41:20 2004
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document>
+ <properties>
+ <author email="akarasulu@apache.org">Alex Karasulu</author>
+ <title>BEREncoder Usage</title>
+ </properties>
+ <body>
+ <section name="Coming soon ... ">
+ </section>
+ </body>
+</document>
Added: incubator/directory/snickers/trunk/xdocs/ber-codec/asn1berinfo.xml
==============================================================================
--- (empty file)
+++ incubator/directory/snickers/trunk/xdocs/ber-codec/asn1berinfo.xml Thu May 27 17:41:20 2004
@@ -0,0 +1,89 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document>
+ <properties>
+ <author email="akarasulu@apache.org">Alex Karasulu</author>
+ <title>ASN.1 and BER Information</title>
+ </properties>
+ <body>
+
+ <section name="ASN.1 and BER Information">
+ <subsection name="Background">
+ <p>
+ The BER encoding for ASN.1 was defined within ITU specification
+ X.690 along with the Canonical and Distinguished Encoding Rules.
+ A copy of this document along with other useful documents and books
+ on ASN.1 and its encodings can be obtained for free here:
+ </p>
+ </subsection>
+
+ <subsection>
+ <table>
+ <tr><th>Document</th><th>Description</th></tr>
+ <tr>
+ <td>
+ <a href="http://lesbeshy.notlong.com">X.690 (07/02)</a>
+ </td>
+ <td>
+ Information technology - ASN.1 encoding rules: Specification of
+ Basic Encoding Rules (BER), Canonical Encoding Rules (CER) and
+ Distinguished Encoding Rules (DER)
+ </td>
+ </tr>
+
+ <tr>
+ <td>
+ <a href="http://offburie.notlong.com">X.680 (07/02)</a>
+ </td>
+ <td>
+ Information technology - Abstract Syntax Notation One (ASN.1):
+ Specification of basic notation
+ </td>
+ </tr>
+
+ <tr>
+ <td>
+ <a href="http://www.oss.com/asn1/bookreg.html">ASN.1 Complete</a>
+ </td>
+ <td>
+ A verbose yet truely complete book on ASN.1 and various encoding
+ mechanisms. Easy to read since the author takes almost a
+ conversational tone.
+ </td>
+ </tr>
+
+ <tr>
+ <td>
+ <a href="http://www.oss.com/asn1/bookreg2.html">
+ ASN.1 - Communication between heterogeneous systems</a>
+ </td>
+ <td>
+ Also a very complete book on ASN.1 and various encoding mechanisms.
+ A little more difficult to read but seems to be much better
+ organized and more exacting. I use both books in conjunction
+ often switching between the two based on my mood :-). Both are
+ most excellent - thanks to both authors for graciously providing
+ their books online.
+ </td>
+ </tr>
+ </table>
+ </subsection>
+
+ <subsection name="BER Tag, Value, Length Tuples">
+ <p>
+ BER stands for Basic Encoding Rules. These rules describe how to
+ encode and decode basic data types and composite data structures
+ to and from TLV streams. A TLV hence may be primitive (atomic) or
+ constructed (nested) where the value component contains other TLVs.
+ The T is for the Tag a numeric type identifier, the L is for the
+ length of the data carried in the third V component, the value.
+ Outside of this very trivial introduction there is very little to
+ the encoding. Readers should look at the relatively short
+ specification for referrence regarding the exact encoding for
+ various data types using TLV tuples. The books above also have
+ appendices for the various encodings which are longer than the
+ actual specification yet more explanitory.
+ </p>
+ </subsection>
+ </section>
+ </body>
+</document>
\ No newline at end of file
Modified: incubator/directory/snickers/trunk/xdocs/ber-codec/index.xml
==============================================================================
--- incubator/directory/snickers/trunk/xdocs/ber-codec/index.xml (original)
+++ incubator/directory/snickers/trunk/xdocs/ber-codec/index.xml Thu May 27 17:41:20 2004
@@ -2,97 +2,71 @@
<document>
<properties>
<author email="akarasulu@apache.org">Alex Karasulu</author>
- <title>Snickers ASN.1 BER Library</title>
+ <title>Snickers BER Codec</title>
</properties>
<body>
-
<section name="Introduction">
+ <subsection name="What is it?">
<p>
- The Snickers BER codec runtime is based on the stateful codec interfaces
- defined in the <a href="http://jakarta.apache.org/commons/codec">
- commons-codec</a> API (hopefully these stateful codec interfaces make
- there way there).
- </p>
-
- <p>
- The stateful codec interfaces are designed for situations where data
- is encoded or decoded in peices when those fragments of data are made
- available. Between actively encoding and decoding data the codec
- maintains the state of the operation which occurs in parts. Such
- codecs are ideal for non-blocking stateful protocol servers that
- maintain a client socket connection for the life of a session. The
- cost of establishing a client dedicated stateful encoder/decoder pair
- is offset by the prolonged life of the connection. Stateful codecs
- unlike their blocking stateless counterparts do not need to store the
- entire encoded image of a request to decode since they do not have to
- complete a [en]decode operation in one method call.
- </p>
- </section>
-
- <section name="Usage">
- <p>
- For more involved information concerning how the BER codec runtime is
- designed and operates see the design documentation on the decoder
- <a href="./design.html">here</a>.
- </p>
-
- <p>
- The stateful codec interfaces standardize the use of the BER codec.
- Below we show how a decoder can be established to be used within a
- standard selector based input detection loop. This use case is by no
- means specific to the SnickersDecoder, it is a characteristic of
- StatefulDecoders in general. Below we show how the decoder is setup:
- </p>
-
- <source>
-SnickersDecoder decoder = new SnickersDecoder( 512 ) ;
-DecoderCallback cb = new DecoderCallback() {
- decodeOccurred( StatefulDecoder decoder, Object decoded ) {
- MyClass.this.process( ( Message ) decoded ) ;
- }
-};
-decoder.setCallback( cb ) ;
- </source>
-
- <p>
- The stateful decoder uses a callback to deliver completely decoded
- messages. The <code>Message</code> interface which the <em>decoded</em>
- object is cast to is the super interface used by the LDAP common message
- API. The objects returned are LDAP PDU message envelopes but they can
- be any Java stub generated for ASN.1 data types using the Snickers stub
- compiler.
+ The Snickers BER Codec is a runtime API for encoding and decoding ASN.1
+ data structures using Basic Encoding Rules (BER). It implements
+ extentions to the <a href="http://jakarta.apache.org/commons/codec">
+ commons-codec</a> API, for building stateful chunking encoder decoder
+ pairs that maintain state between processing calls.
</p>
-
+ </subsection>
+
+ <subsection name="Stateful Codecs">
<p>
- Within a selector input detection loop which selects channels with
- available input the decoder can be used to decode encoded BER messages.
- The example below is a trivialized example of how the decoder can be
- used to decoded BER encoded data in parts as a message arrives
- fragmented by the tcp/ip stack:
+ More information on these new codec interfaces are availabled on the
+ <a href="../codec-stateful/index.html">stateful codec</a> home page.
+ You might want to read this before you continue since these extentions
+ are the basis to all Snickers encoders and decoders.
</p>
-
- <source>
-while ( true ) {
- ...
- SelectionKey key = ( SelectionKey ) list.next() ;
- if ( key.isReadable() ) {
- SocketChannel channel = ( SocketChannel ) l_key.channel() ;
- channel.read( buf ) ;
- buf.flip() ;
- decoder.decode( buf ) ;
- }
- ...
-}
- </source>
-
+ </subsection>
+
+ <subsection name="What is encoded/decoded?">
<p>
- As you can see from the code fragment and the API for the decode
- operation the decode does not return anything. In fact the return
- is void. Because the callback is used to deliver the finished product
- when it is ready the decode operation can occur asynchronously in
- another thread or stage of a server. This is what makes
- StatefulDecoders and the SnickersDecoder in particular so exciting.
+ The BER codec is protocol or ASN.1 module independent. The unit of
+ substrate is a BER TLV (Tag, Length, Value) so any BER based protocol
+ can be decoded and encoded by the BER codec to and from TLV tuples.
</p>
+ </subsection>
+ </section>
+
+ <section name="BER Codec User Guides and Design Documents">
+ <table>
+ <tr>
+ <th>Subject</th>
+ <th>Description</th>
+ </tr>
+
+ <tr>
+ <td><a href="./asn1berinfo.html">ASN.1 and BER Information</a></td>
+ <td>Links to various books and specification on ASN.1 and BER</td>
+ </tr>
+
+ <tr>
+ <td><a href="./BERDecoderUserGuide.html">BER Decoder User Guide</a></td>
+ <td>Describes how to use the BERDecoder to process a TLV stream</td>
+ </tr>
+
+ <tr>
+ <td><a href="./BERDecoderDesign.html">BER Decoder Design</a></td>
+ <td>Explains how and why the BERDecoder was designed</td>
+ </tr>
+
+ <tr>
+ <td><a href="./BEREncoderUserGuide.html">BER Encoder User Guide</a></td>
+ <td>Describes how to use the BEREncoder to generate a TLV stream</td>
+ </tr>
+
+ <tr>
+ <td><a href="./BEREncoderDesign.html">BER Encoder Design</a></td>
+ <td>Explains how and why the BEREncoder was designed</td>
+ </tr>
+
+ </table>
</section>
</body>
</document>
Modified: incubator/directory/snickers/trunk/xdocs/codec-stateful/index.xml
==============================================================================
--- incubator/directory/snickers/trunk/xdocs/codec-stateful/index.xml (original)
+++ incubator/directory/snickers/trunk/xdocs/codec-stateful/index.xml Thu May 27 17:41:20 2004
@@ -5,38 +5,119 @@
<title>Stateful Codecs</title>
</properties>
<body>
- <section name="Introduction">
+ <section>
+ <subsection name="Introduction">
<p>
- Stateful encoder and decoder pairs, or codecs for short, maintain state
- between respective operations. By maintaining state in the codec all
- the data needed for the operation not be available at one time. This
- leads to codecs with significantly reduced active footprints which are
- constant in size regardless of the size of the substrates they operate
- upon.
+ Codecs are bidirectional data transformations. The data transformed,
+ often referred to as the substrate, may be [en]coded or decoded hence
+ the word codec. The word codec also refers to the actual software
+ used to encode and decode data. We use the term stateful codec for
+ lack of a better description for encoder/decoder pairs possessing
+ certain abilities and exhibiting the following behavoirs:
</p>
-
+
+ <ul>
+ <li>the ability to interrupt and resume operation without loosing
+ state</li>
+ <li>the ability to process a substrate in one or more steps operating
+ on small chunks rather than all of it in one large operation</li>
+ <li>free up resources while not actively processing perhaps until more
+ of the substrate is available, or just to multiplex limited
+ resources</li>
+ <li>use a small fixed size chunk buffer rather than a variable sized
+ buffer equal to the entire size of the substrate what ever that
+ may be</li>
+ </ul>
+ </subsection>
+
+ <subsection name="Advantages">
<p>
- Furthermore Stateful codecs operate on data as it arrives instead of in
- one shot. This way the computational requirements are spread out over
- time as the substrate is made available.
+ The abilities or behavoirs listed above make stateful codecs ideal for
+ use in resource critical situations. Servers for example based on
+ codecs may have to perform several thousand concurrent encode/decode
+ operations. The resources required for such operations, namely threads
+ and memory buffers will be limited. Most of the time these operations
+ will be waiting for IO to complete so they can free up resources to
+ allow other operations to proceed. Stateful codecs make this possible
+ and complement servers designed using non-blocking IO constructs.
</p>
-
+
<p>
- Stateful codecs must be handled with care since they maintain the state
- of an operation. Stateful codecs must be dedicated to a serial
- stream of substrate objects whatever that may be. This makes them ideal
- for streams which have long lifespans however sensitive to the loss of
- data which may retard their state and require a reset.
+ Servers cannot afford to allocate variable sized buffers for arriving
+ data. Allowing variable sized buffers based on incoming data
+ sizes opens the door for DoS attacks where malicious clients can
+ cripple or crash servers, by pumping in massive or never ending
+ data streams. Stateful codecs enable fixed size processing overheads
+ regardless of the size of the data unit transmitted to the server.
+ Smaller codec footprints lead to smaller server process memory
+ footprints.
</p>
- </section>
-
- <section name="StatefulDecoder Usage">
+
+ <p>
+ These advantages also make stateful codecs ideal for use in resource
+ limited environments like embedded systems, PDAs or cellular phones
+ which use ASN.1 and one of its encoding schemes to control data
+ transmission. These systems all run on limited resources where the
+ codec's operational footprint will have dramatic effects on the
+ performance of the device.
+ </p>
+ </subsection>
+
+ <subsection name="How is a stateful codec defined?">
+ <p>
+ There are several ways to skin this cat. To this day discussions are
+ underway at the ASF to determine the best approach. Until a consensus
+ is reached we have decided to use an event driven approach where the
+ events are modelled as callbacks. To better explain the approach we
+ need to discuss it within the context of encoding/decoding.
+ </p>
+
+ <p>
+ Depending on the operation being performed, available chunks of the
+ substrate are are processed using either the <code>encode()</code> or
+ the <code>decode()</code> method. These methods hence are presumed
+ to process small chunks of the substrate. The specific codec
+ implementation should know how to maintain state based on the encoding
+ between these calls to process a unit of substrate which likewise is
+ determined by the encoding. So the encoding (a.k.a. codec) defines
+ what a unit of substrate is as well as any state information required
+ while peice-meal processing the substrate. Several calls to these two
+ methods may be required to process a unit of the substrate. When the
+ entire unit has been processed an event is fired. Again the specific
+ codec detects the compete processing of a unit of substrate so it
+ knows when to fire this event.
+ </p>
+
+ <p>
+ Going back to our approach for defining a stateful codec, we modeled
+ the event as a callback to a specific interface. For decoders this
+ would be a <code>DecoderCallback.decodeOccurred()</code> and for
+ encoders it would be an <code>EncoderCallback.encodeOccurred()</code>
+ method call. These interface methods are called when an entire unit
+ of substrate is respectively decoded or encoded.
+ </p>
+
+ <p>
+ This approach also allows for codec chaining in a pipeline where
+ codecs may be stacked on top of one another. The callback interfaces
+ are used to bridge together codecs by feeding the output of one codec
+ operation into the input of another. Specific classes have been
+ included in the API to accomodate this usage pattern.
+ </p>
+
+ <center>
+ <img src="../images/all-uml.gif"/>
+ </center>
+
+ </subsection>
+
+ <subsection name="StatefulDecoder Usage">
<p>
StatefulDecoders use callbacks to notify the successful decode of a
- unit of encoded substrate. Other than this the definition of what a
- 'unit of encoded substrate' is depends on the codec's decoder
- implementation. The definition may be size constrained or be a function
- of context.
+ unit of encoded substrate. Other than this, the definition of what a
+ 'unit of encoded substrate' is, depends on the codec's decoder
+ implementation. The definition may be size constrained or be a
+ function of context.
</p>
<p>
@@ -65,11 +146,11 @@
are the decoded 'unit of encoded substrate'. StatefulDecoders are ideal
for use in high performance servers based on non-blocking IO. Often
StatefulDecoders will be used with a Selector in a loop to detect input
- made available. As the substrate arrives, it can be fed to the decoder
- intermittantly. Finally the callback delivers the decoded units of
- encoded substrate. Below there is a trivialized example of how
- a StatefulDecoder can be used to decoded the substrate as it arrives
- fragmented by the tcp/ip stack:
+ as it is made available. As the substrate arrives, it is be fed to
+ the decoder intermittantly. Finally the callback delivers the decoded
+ units of encoded substrate. Below there is a trivialized example of
+ how a StatefulDecoder can be used to decoded the substrate as it
+ arrives fragmented by the tcp/ip stack:
</p>
<source>
@@ -87,15 +168,15 @@
</source>
<p>
- As you can see from the code fragment the decode() returns anything with
- a void return type. Because the callback is used to deliver the
- finished product when it is ready, the decode operation can occur
- asynchronously in another thread or stage of a server if so desired.
- This is what makes StatefulDecoders so simple yet powerful.
+ As you can see from the code fragment the decode() returns nothing
+ since it has a void return type. Because the callback is used to
+ deliver the finished product when it is ready, the decode operation
+ can occur asynchronously in another thread or stage of a server if
+ desired.
</p>
- </section>
+ </subsection>
- <section name="Strengths and Weaknesses">
+ <subsection name="Strengths and Weaknesses">
<p>
As can be seen from the section above and some of the characteristics
of StatefulDecoders, they are ideal for building network servers. These
@@ -113,24 +194,25 @@
</p>
<p>
- The cost of creating a decoder for every new connection is usually very
- minimal however we cannot forsee every possible implementation.
- Regardless of the cost associated with dedicating a StatefulDecoder to
- each new connection, stateful protocol servers will always pay a lesser
- price. The longer the life of the connection, the more worth while it
+ The cost of creating a decoder for every new connection is usually
+ very minimal however we cannot forsee every possible implementation.
+ Regardless of the cost associated with dedicating a StatefulDecoder
+ to each new connection, stateful protocol servers will often benefit
+ most, as opposed to a stateless server. The reasoning is as follows:
+ the longer the life of the connection, the more worth while it
is to create a StatefulDecoder and thereby have it amortize over the
life of the connection.
</p>
<p>
- StatefulDecoders are much more complex for implementors. They are
- basically state driven automata which change their state with the
- arrival of data. Furthermoe it is very difficult for StatefulDecoders
- to gracefully recover from corrupt or lost input.
+ The primary drawback is that StatefulDecoders are much more complex to
+ implement. They are basically state driven automata which change
+ their state with the arrival of data. Furthermoe it is very difficult
+ for StatefulDecoders to gracefully recover from corrupt or lost input.
</p>
- </section>
+ </subsection>
- <section name="StatefulDecoder Chaining/Stacking">
+ <subsection name="StatefulDecoder Chaining/Stacking">
<p>
StatefulDecoders can easily be chained or stacked to operate on a
substrate stream. This is achieved by having the callback of one
@@ -141,13 +223,15 @@
<p>
Because the occurence of chaining may be common and several folks have
- already expressed their interest in it we have devised a special
+ already expressed their interest in it, we have devised a special
StatefulDecoder implementation called a DecoderStack. It itself is
a decoder however other decoders can be pushed onto it. When empty
- without any decoders in the stack it operates in pass-thro mode. When
- StatefulDecoders are pushed decode operations invoke a chain of decoders
- starting with the bottom most in the stack going up to the top. The
- final callback is the callback registered with the DecoderStack.
+ without any decoders in the stack it operates in pass-thro mode. The
+ decode operation is basically the identity transformation. When
+ StatefulDecoders are pushed, decode operations invoke a chain of
+ decoders starting with the bottom most in the stack going up to the
+ top. The final callback invoked is the callback registered with the
+ DecoderStack.
</p>
<p>
@@ -184,9 +268,9 @@
}
}
</source>
- </section>
+ </subsection>
- <section name="Recommendations to Implementors">
+ <subsection name="Recommendations to Implementors">
<p>
Keep it simple and rely on chaining to divide and concur complex
decoders into several trivial decoders. Besides simple chaining,
@@ -203,7 +287,13 @@
5 different possible configurations to the contents of arriving data
with respect to the unit of encoded substrate:
</p>
-
+
+ <!--
+ todo add illustrations using images here - its not that hard
+ might want to turn this into a table instead of a ul if we decide
+ to do that
+ -->
+
<ul>
<li>
it contains a single complete discrete unit of encoded substrate
@@ -230,6 +320,7 @@
of the buffer. Tail fragments end a unit of encoded substrate and are
found at the front of the buffer.
</p>
+ </subsection>
</section>
</body>
</document>
Added: incubator/directory/snickers/trunk/xdocs/images/BERDecoder.gif
==============================================================================
Binary file. No diff available.
Added: incubator/directory/snickers/trunk/xdocs/images/BERDecoderCallback.gif
==============================================================================
Binary file. No diff available.
Added: incubator/directory/snickers/trunk/xdocs/images/PrimitiveTupleDecoder-uml.gif
==============================================================================
Binary file. No diff available.
Added: incubator/directory/snickers/trunk/xdocs/images/all-uml.gif
==============================================================================
Binary file. No diff available.
Added: incubator/directory/snickers/trunk/xdocs/images/state-helper-classes.gif
==============================================================================
Binary file. No diff available.
Added: incubator/directory/snickers/trunk/xdocs/images/tag-integer-encoding.png
==============================================================================
Binary file. No diff available.