You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2013/11/17 16:27:34 UTC

svn commit: r1542748 [4/5] - in /pdfbox/branches/1.8: ./ examples/src/main/java/org/apache/pdfbox/examples/fdf/ examples/src/main/java/org/apache/pdfbox/examples/signature/ fontbox/src/main/java/org/apache/fontbox/cff/ fontbox/src/main/java/org/apache/...

Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDField.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDField.java?rev=1542748&r1=1542747&r2=1542748&view=diff
==============================================================================
--- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDField.java (original)
+++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDField.java Sun Nov 17 15:27:33 2013
@@ -16,34 +16,28 @@
  */
 package org.apache.pdfbox.pdmodel.interactive.form;
 
-import org.apache.pdfbox.pdmodel.interactive.action.PDFormFieldAdditionalActions;
-import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget;
-
-import org.apache.pdfbox.pdmodel.common.COSArrayList;
-import org.apache.pdfbox.pdmodel.common.COSObjectable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
 
 import org.apache.pdfbox.cos.COSArray;
 import org.apache.pdfbox.cos.COSBase;
 import org.apache.pdfbox.cos.COSDictionary;
 import org.apache.pdfbox.cos.COSInteger;
 import org.apache.pdfbox.cos.COSName;
-
+import org.apache.pdfbox.pdmodel.common.COSArrayList;
+import org.apache.pdfbox.pdmodel.common.COSObjectable;
 import org.apache.pdfbox.pdmodel.common.PDTextStream;
-
 import org.apache.pdfbox.pdmodel.fdf.FDFField;
+import org.apache.pdfbox.pdmodel.interactive.action.PDFormFieldAdditionalActions;
+import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget;
 import org.apache.pdfbox.util.BitFlagHelper;
 
-import java.io.IOException;
-
-import java.util.ArrayList;
-import java.util.List;
-
 /**
- * This is the superclass for a Field element in a PDF.
- * Based on the COS object model from PDFBox.
- *
+ * This is the superclass for a Field element in a PDF. Based on the COS object model from PDFBox.
+ * 
  * @author sug
- * @version $Revision: 1.23 $
+ * 
  */
 public abstract class PDField implements COSObjectable
 {
@@ -60,28 +54,25 @@ public abstract class PDField implements
      */
     public static final int FLAG_NO_EXPORT = 1 << 2;
 
-
     private PDAcroForm acroForm;
 
     private COSDictionary dictionary;
 
     /**
      * Constructor.
-     *
+     * 
      * @param theAcroForm The form that this field is part of.
      */
-    public PDField( PDAcroForm theAcroForm )
+    public PDField(PDAcroForm theAcroForm)
     {
         acroForm = theAcroForm;
         dictionary = new COSDictionary();
-        //no required fields in base field class
+        // no required fields in base field class
     }
 
-
     /**
-     * Creates a COSField from a COSDictionary, expected to be
-     * a correct object definition for a field in PDF.
-     *
+     * Creates a COSField from a COSDictionary, expected to be a correct object definition for a field in PDF.
+     * 
      * @param theAcroForm The form that this field is part of.
      * @param field the PDF objet to represent as a field.
      */
@@ -93,42 +84,41 @@ public abstract class PDField implements
 
     /**
      * Returns the partial name of the field.
-     *
+     * 
      * @return the name of the field
      */
     public String getPartialName()
     {
-        return getDictionary().getString( COSName.T );
+        return getDictionary().getString(COSName.T);
     }
 
     /**
      * This will set the partial name of the field.
-     *
+     * 
      * @param name The new name for the field.
      */
-    public void setPartialName( String name )
+    public void setPartialName(String name)
     {
-        getDictionary().setString( COSName.T, name );
+        getDictionary().setString(COSName.T, name);
     }
 
     /**
-     * Returns the fully qualified name of the field, which is a concatenation of
-     * the names of all the parents fields.
-     *
+     * Returns the fully qualified name of the field, which is a concatenation of the names of all the parents fields.
+     * 
      * @return the name of the field
-     *
+     * 
      * @throws IOException If there is an error generating the fully qualified name.
      */
     public String getFullyQualifiedName() throws IOException
     {
         PDField parent = getParent();
         String parentName = null;
-        if( parent != null )
+        if (parent != null)
         {
             parentName = parent.getFullyQualifiedName();
         }
         String finalName = getPartialName();
-        if( parentName != null )
+        if (parentName != null)
         {
             finalName = parentName + "." + finalName;
         }
@@ -156,135 +146,132 @@ public abstract class PDField implements
     }
 
     /**
-     * Get the FT entry of the field.  This is a read only field and is set depending
-     * on the actual type.  The field type is an inheritable attribute.  This method will
-     * return only the direct value on this object.  Use the findFieldType for an upward
-     * recursive search.
-     *
+     * Get the FT entry of the field. This is a read only field and is set depending on the actual type. The field type
+     * is an inheritable attribute. This method will return only the direct value on this object. Use the findFieldType
+     * for an upward recursive search.
+     * 
      * @return The Field type.
-     *
+     * 
      * @see PDField#findFieldType()
      */
     public String getFieldType()
     {
-        return getDictionary().getNameAsString( COSName.FT );
+        return getDictionary().getNameAsString(COSName.FT);
     }
 
     /**
-     * Find the field type and optionally do a recursive upward search.  Sometimes the fieldtype
-     * will be specified on the parent instead of the direct object.  This will look at this
-     * object for the field type, if none is specified then it will look to the parent if there
-     * is a parent.  If there is no parent and no field type has been found then this
+     * Find the field type and optionally do a recursive upward search. Sometimes the fieldtype will be specified on the
+     * parent instead of the direct object. This will look at this object for the field type, if none is specified then
+     * it will look to the parent if there is a parent. If there is no parent and no field type has been found then this
      * will return null.
-     *
+     * 
      * @return The field type or null if none was found.
      */
     public String findFieldType()
     {
-        return findFieldType( getDictionary() );
+        return findFieldType(getDictionary());
     }
 
-    private String findFieldType( COSDictionary dic )
+    private String findFieldType(COSDictionary dic)
     {
-        String retval = dic.getNameAsString( COSName.FT );
-        if( retval == null )
+        String retval = dic.getNameAsString(COSName.FT);
+        if (retval == null)
         {
-            COSDictionary parent = (COSDictionary)dic.getDictionaryObject( COSName.PARENT, COSName.P );
-            if( parent != null )
+            COSDictionary parent = (COSDictionary) dic.getDictionaryObject(COSName.PARENT, COSName.P);
+            if (parent != null)
             {
-                retval = findFieldType( parent );
+                retval = findFieldType(parent);
             }
         }
         return retval;
 
     }
 
-
     /**
      * setValue sets the fields value to a given string.
-     *
+     * 
      * @param value the string value
-     *
+     * 
      * @throws IOException If there is an error creating the appearance stream.
      */
     public abstract void setValue(String value) throws IOException;
 
     /**
      * getValue gets the fields value to as a string.
-     *
+     * 
      * @return The string value of this field.
-     *
+     * 
      * @throws IOException If there is an error getting the value.
      */
     public abstract String getValue() throws IOException;
 
     /**
      * sets the field to be read-only.
-     *
+     * 
      * @param readonly The new flag for readonly.
      */
     public void setReadonly(boolean readonly)
     {
-        BitFlagHelper.setFlag( getDictionary(), COSName.FF, FLAG_READ_ONLY, readonly );
+        BitFlagHelper.setFlag(getDictionary(), COSName.FF, FLAG_READ_ONLY, readonly);
     }
 
     /**
-     *
+     * 
      * @return true if the field is readonly
      */
     public boolean isReadonly()
     {
-        return BitFlagHelper.getFlag( getDictionary(), COSName.FF, FLAG_READ_ONLY );
+        return BitFlagHelper.getFlag(getDictionary(), COSName.FF, FLAG_READ_ONLY);
     }
 
     /**
      * sets the field to be required.
-     *
+     * 
      * @param required The new flag for required.
      */
     public void setRequired(boolean required)
     {
-        BitFlagHelper.setFlag( getDictionary(), COSName.FF, FLAG_REQUIRED, required );
+        BitFlagHelper.setFlag(getDictionary(), COSName.FF, FLAG_REQUIRED, required);
     }
 
     /**
-     *
+     * 
      * @return true if the field is required
      */
     public boolean isRequired()
     {
-        return BitFlagHelper.getFlag( getDictionary(), COSName.FF, FLAG_REQUIRED );
+        return BitFlagHelper.getFlag(getDictionary(), COSName.FF, FLAG_REQUIRED);
     }
 
     /**
      * sets the field to be not exported..
-     *
+     * 
      * @param noExport The new flag for noExport.
      */
     public void setNoExport(boolean noExport)
     {
-        BitFlagHelper.setFlag( getDictionary(), COSName.FF, FLAG_NO_EXPORT, noExport );
+        BitFlagHelper.setFlag(getDictionary(), COSName.FF, FLAG_NO_EXPORT, noExport);
     }
 
     /**
-     *
+     * 
      * @return true if the field is not to be exported.
      */
     public boolean isNoExport()
     {
-        return BitFlagHelper.getFlag( getDictionary(), COSName.FF, FLAG_NO_EXPORT );
+        return BitFlagHelper.getFlag(getDictionary(), COSName.FF, FLAG_NO_EXPORT);
     }
 
     /**
      * This will get the flags for this field.
-     *
+     * 
      * @return flags The set of flags.
      */
     public int getFieldFlags()
     {
         int retval = 0;
-        COSInteger ff = (COSInteger)getDictionary().getDictionaryObject( COSName.FF );
-        if( ff != null )
+        COSInteger ff = (COSInteger) getDictionary().getDictionaryObject(COSName.FF);
+        if (ff != null)
         {
             retval = ff.intValue();
         }
@@ -293,128 +280,128 @@ public abstract class PDField implements
 
     /**
      * This will set the flags for this field.
-     *
+     * 
      * @param flags The new flags.
      */
-    public void setFieldFlags( int flags )
+    public void setFieldFlags(int flags)
     {
-        getDictionary().setInt( COSName.FF, flags );
+        getDictionary().setInt(COSName.FF, flags);
     }
 
     /**
      * This will import a fdf field from a fdf document.
-     *
+     * 
      * @param fdfField The fdf field to import.
-     *
+     * 
      * @throws IOException If there is an error importing the data for this field.
      */
-    public void importFDF( FDFField fdfField ) throws IOException
+    public void importFDF(FDFField fdfField) throws IOException
     {
         Object fieldValue = fdfField.getValue();
         int fieldFlags = getFieldFlags();
 
-        if( fieldValue != null )
+        if (fieldValue != null)
         {
-            if( fieldValue instanceof String )
+            if (fieldValue instanceof String)
             {
-                setValue( (String)fieldValue );
+                setValue((String) fieldValue);
             }
-            else if( fieldValue instanceof PDTextStream )
+            else if (fieldValue instanceof PDTextStream)
             {
-                setValue( ((PDTextStream)fieldValue).getAsString() );
+                setValue(((PDTextStream) fieldValue).getAsString());
             }
             else
             {
-                throw new IOException( "Unknown field type:" + fieldValue.getClass().getName() );
+                throw new IOException("Unknown field type:" + fieldValue.getClass().getName());
             }
         }
         Integer ff = fdfField.getFieldFlags();
-        if( ff != null )
+        if (ff != null)
         {
-            setFieldFlags( ff.intValue() );
+            setFieldFlags(ff.intValue());
         }
         else
         {
-            //these are suppose to be ignored if the Ff is set.
+            // these are suppose to be ignored if the Ff is set.
             Integer setFf = fdfField.getSetFieldFlags();
 
-            if( setFf != null )
+            if (setFf != null)
             {
                 int setFfInt = setFf.intValue();
                 fieldFlags = fieldFlags | setFfInt;
-                setFieldFlags( fieldFlags );
+                setFieldFlags(fieldFlags);
             }
 
             Integer clrFf = fdfField.getClearFieldFlags();
-            if( clrFf != null )
+            if (clrFf != null)
             {
-                //we have to clear the bits of the document fields for every bit that is
-                //set in this field.
+                // we have to clear the bits of the document fields for every bit that is
+                // set in this field.
                 //
-                //Example:
-                //docFf = 1011
-                //clrFf = 1101
-                //clrFfValue = 0010;
-                //newValue = 1011 & 0010 which is 0010
+                // Example:
+                // docFf = 1011
+                // clrFf = 1101
+                // clrFfValue = 0010;
+                // newValue = 1011 & 0010 which is 0010
                 int clrFfValue = clrFf.intValue();
                 clrFfValue ^= 0xFFFFFFFF;
                 fieldFlags = fieldFlags & clrFfValue;
-                setFieldFlags( fieldFlags );
+                setFieldFlags(fieldFlags);
             }
         }
 
         PDAnnotationWidget widget = getWidget();
-        if( widget != null )
+        if (widget != null)
         {
             int annotFlags = widget.getAnnotationFlags();
             Integer f = fdfField.getWidgetFieldFlags();
-            if( f != null && widget != null )
+            if (f != null && widget != null)
             {
-                widget.setAnnotationFlags( f.intValue() );
+                widget.setAnnotationFlags(f.intValue());
             }
             else
             {
-                //these are suppose to be ignored if the F is set.
+                // these are suppose to be ignored if the F is set.
                 Integer setF = fdfField.getSetWidgetFieldFlags();
-                if( setF != null )
+                if (setF != null)
                 {
                     annotFlags = annotFlags | setF.intValue();
-                    widget.setAnnotationFlags( annotFlags );
+                    widget.setAnnotationFlags(annotFlags);
                 }
 
                 Integer clrF = fdfField.getClearWidgetFieldFlags();
-                if( clrF != null )
+                if (clrF != null)
                 {
-                    //we have to clear the bits of the document fields for every bit that is
-                    //set in this field.
+                    // we have to clear the bits of the document fields for every bit that is
+                    // set in this field.
                     //
-                    //Example:
-                    //docF = 1011
-                    //clrF = 1101
-                    //clrFValue = 0010;
-                    //newValue = 1011 & 0010 which is 0010
+                    // Example:
+                    // docF = 1011
+                    // clrF = 1101
+                    // clrFValue = 0010;
+                    // newValue = 1011 & 0010 which is 0010
                     int clrFValue = clrF.intValue();
                     clrFValue ^= 0xFFFFFFFFL;
                     annotFlags = annotFlags & clrFValue;
-                    widget.setAnnotationFlags( annotFlags );
+                    widget.setAnnotationFlags(annotFlags);
                 }
             }
         }
         List<FDFField> fdfKids = fdfField.getKids();
         List<COSObjectable> pdKids = getKids();
-        for( int i=0; fdfKids != null && i<fdfKids.size(); i++ )
+        for (int i = 0; fdfKids != null && i < fdfKids.size(); i++)
         {
-            FDFField fdfChild = fdfKids.get( i );
+            FDFField fdfChild = fdfKids.get(i);
             String fdfName = fdfChild.getPartialFieldName();
-            for( int j=0; j<pdKids.size(); j++ )
+            for (int j = 0; j < pdKids.size(); j++)
             {
-                Object pdChildObj = pdKids.get( j );
-                if( pdChildObj instanceof PDField )
+                Object pdChildObj = pdKids.get(j);
+                if (pdChildObj instanceof PDField)
                 {
-                    PDField pdChild = (PDField)pdChildObj;
-                    if( fdfName != null && fdfName.equals( pdChild.getPartialName() ) )
+                    PDField pdChild = (PDField) pdChildObj;
+                    if (fdfName != null && fdfName.equals(pdChild.getPartialName()))
                     {
-                        pdChild.importFDF( fdfChild );
+                        pdChild.importFDF(fdfChild);
                     }
                 }
             }
@@ -422,12 +409,10 @@ public abstract class PDField implements
     }
 
     /**
-     * This will get the single associated widget that is part of this field.  This
-     * occurs when the Widget is embedded in the fields dictionary.  Sometimes there
-     * are multiple sub widgets associated with this field, in which case you want to
-     * use getKids().  If the kids entry is specified, then the first entry in that
-     * list will be returned.
-     *
+     * This will get the single associated widget that is part of this field. This occurs when the Widget is embedded in
+     * the fields dictionary. Sometimes there are multiple sub widgets associated with this field, in which case you
+     * want to use getKids(). If the kids entry is specified, then the first entry in that list will be returned.
+     * 
      * @return The widget that is associated with this field.
      * @throws IOException If there is an error getting the widget object.
      */
@@ -435,20 +420,20 @@ public abstract class PDField implements
     {
         PDAnnotationWidget retval = null;
         List<COSObjectable> kids = getKids();
-        if( kids == null )
+        if (kids == null)
         {
-            retval = new PDAnnotationWidget( getDictionary() );
+            retval = new PDAnnotationWidget(getDictionary());
         }
-        else if( kids.size() > 0 )
+        else if (kids.size() > 0)
         {
-            Object firstKid = kids.get( 0 );
-            if( firstKid instanceof PDAnnotationWidget )
+            Object firstKid = kids.get(0);
+            if (firstKid instanceof PDAnnotationWidget)
             {
-                retval = (PDAnnotationWidget)firstKid;
+                retval = (PDAnnotationWidget) firstKid;
             }
             else
             {
-                retval = ((PDField)firstKid).getWidget();
+                retval = ((PDField) firstKid).getWidget();
             }
         }
         else
@@ -460,58 +445,57 @@ public abstract class PDField implements
 
     /**
      * Get the parent field to this field, or null if none exists.
-     *
+     * 
      * @return The parent field.
-     *
+     * 
      * @throws IOException If there is an error creating the parent field.
      */
     public PDField getParent() throws IOException
     {
         PDField parent = null;
-        COSDictionary parentDic = (COSDictionary)getDictionary().getDictionaryObject( COSName.PARENT, COSName.P );
-        if( parentDic != null )
+        COSDictionary parentDic = (COSDictionary) getDictionary().getDictionaryObject(COSName.PARENT, COSName.P);
+        if (parentDic != null)
         {
-            parent = PDFieldFactory.createField( getAcroForm(), parentDic );
+            parent = PDFieldFactory.createField(getAcroForm(), parentDic);
         }
         return parent;
     }
 
     /**
      * Set the parent of this field.
-     *
+     * 
      * @param parent The parent to this field.
      */
-    public void setParent( PDField parent )
+    public void setParent(PDField parent)
     {
-        getDictionary().setItem( "Parent", parent );
+        getDictionary().setItem("Parent", parent);
     }
 
     /**
-     * This will find one of the child elements.  The name array are the components
-     * of the name to search down the tree of names.  The nameIndex is where to
-     * start in that array.  This method is called recursively until it finds
-     * the end point based on the name array.
-     *
+     * This will find one of the child elements. The name array are the components of the name to search down the tree
+     * of names. The nameIndex is where to start in that array. This method is called recursively until it finds the end
+     * point based on the name array.
+     * 
      * @param name An array that picks the path to the field.
      * @param nameIndex The index into the array.
      * @return The field at the endpoint or null if none is found.
      * @throws IOException If there is an error creating the field.
      */
-    public PDField findKid( String[] name, int nameIndex ) throws IOException
+    public PDField findKid(String[] name, int nameIndex) throws IOException
     {
         PDField retval = null;
-        COSArray kids = (COSArray)getDictionary().getDictionaryObject( COSName.KIDS );
-        if( kids != null )
+        COSArray kids = (COSArray) getDictionary().getDictionaryObject(COSName.KIDS);
+        if (kids != null)
         {
             for (int i = 0; retval == null && i < kids.size(); i++)
             {
-                COSDictionary kidDictionary = (COSDictionary)kids.getObject(i);
-                if( name[nameIndex].equals( kidDictionary.getString( "T" ) ) )
+                COSDictionary kidDictionary = (COSDictionary) kids.getObject(i);
+                if (name[nameIndex].equals(kidDictionary.getString("T")))
                 {
-                    retval = PDFieldFactory.createField( acroForm, kidDictionary );
-                    if( name.length > nameIndex+1 )
+                    retval = PDFieldFactory.createField(acroForm, kidDictionary);
+                    if (name.length > nameIndex + 1)
                     {
-                        retval = retval.findKid( name, nameIndex+1 );
+                        retval = retval.findKid(name, nameIndex + 1);
                     }
                 }
             }
@@ -520,69 +504,72 @@ public abstract class PDField implements
     }
 
     /**
-     * This will get all the kids of this field.  The values in the list
-     * will either be PDWidget or PDField.  Normally they will be PDWidget objects
-     * unless this is a non-terminal field and they will be child PDField objects.
-     *
+     * This will get all the kids of this field. The values in the list will either be PDWidget or PDField. Normally
+     * they will be PDWidget objects unless this is a non-terminal field and they will be child PDField objects.
+     * 
      * @return A list of either PDWidget or PDField objects.
      * @throws IOException If there is an error retrieving the kids.
      */
     public List<COSObjectable> getKids() throws IOException
     {
         List<COSObjectable> retval = null;
-        COSArray kids = (COSArray)getDictionary().getDictionaryObject(COSName.KIDS);
-        if( kids != null )
+        COSArray kids = (COSArray) getDictionary().getDictionaryObject(COSName.KIDS);
+        if (kids != null)
         {
             List<COSObjectable> kidsList = new ArrayList<COSObjectable>();
             for (int i = 0; i < kids.size(); i++)
             {
-                COSDictionary kidDictionary = (COSDictionary)kids.getObject(i);
-                COSDictionary parent = (COSDictionary)kidDictionary.getDictionaryObject( COSName.PARENT, COSName.P );
-                if( kidDictionary.getDictionaryObject( COSName.FT ) != null ||
-                    (parent != null && parent.getDictionaryObject( COSName.FT ) != null ) )
+                COSDictionary kidDictionary = (COSDictionary) kids.getObject(i);
+                if (kidDictionary == null)
+                {
+                    continue;
+                }
+                COSDictionary parent = (COSDictionary) kidDictionary.getDictionaryObject(COSName.PARENT, COSName.P);
+                if (kidDictionary.getDictionaryObject(COSName.FT) != null
+                        || (parent != null && parent.getDictionaryObject(COSName.FT) != null))
                 {
-                    kidsList.add( PDFieldFactory.createField( acroForm, kidDictionary ));
+                    kidsList.add(PDFieldFactory.createField(acroForm, kidDictionary));
                 }
-                else if( "Widget".equals( kidDictionary.getNameAsString( COSName.SUBTYPE ) ) )
+                else if ("Widget".equals(kidDictionary.getNameAsString(COSName.SUBTYPE)))
                 {
-                    kidsList.add( new PDAnnotationWidget( kidDictionary ) );
+                    kidsList.add(new PDAnnotationWidget(kidDictionary));
                 }
                 else
                 {
                     //
-                    kidsList.add( PDFieldFactory.createField( acroForm, kidDictionary ));
+                    kidsList.add(PDFieldFactory.createField(acroForm, kidDictionary));
                 }
             }
-            retval = new COSArrayList( kidsList, kids );
+            retval = new COSArrayList<COSObjectable>(kidsList, kids);
         }
         return retval;
     }
 
     /**
      * This will set the list of kids.
-     *
+     * 
      * @param kids The list of child widgets.
      */
-    public void setKids( List<COSObjectable> kids )
+    public void setKids(List<COSObjectable> kids)
     {
-        COSArray kidsArray = COSArrayList.converterToCOSArray( kids );
-        getDictionary().setItem( COSName.KIDS, kidsArray );
+        COSArray kidsArray = COSArrayList.converterToCOSArray(kids);
+        getDictionary().setItem(COSName.KIDS, kidsArray);
     }
 
     /**
      * This will return a string representation of this field.
-     *
+     * 
      * @return A string representation of this field.
      */
     @Override
     public String toString()
     {
-        return "" + getDictionary().getDictionaryObject( COSName.V );
+        return "" + getDictionary().getDictionaryObject(COSName.V);
     }
 
     /**
      * This will get the acroform that this field is part of.
-     *
+     * 
      * @return The form this field is on.
      */
     public PDAcroForm getAcroForm()
@@ -592,7 +579,7 @@ public abstract class PDField implements
 
     /**
      * This will set the form this field is on.
-     *
+     * 
      * @param value The new form to use.
      */
     public void setAcroForm(PDAcroForm value)
@@ -602,7 +589,7 @@ public abstract class PDField implements
 
     /**
      * This will get the dictionary associated with this field.
-     *
+     * 
      * @return The dictionary that this class wraps.
      */
     public COSDictionary getDictionary()
@@ -612,7 +599,7 @@ public abstract class PDField implements
 
     /**
      * Convert this standard java object to a COS object.
-     *
+     * 
      * @return The cos object that matches this Java object.
      */
     public COSBase getCOSObject()
@@ -621,29 +608,29 @@ public abstract class PDField implements
     }
 
     /**
-     * Get the additional actions for this field.  This will return null
-     * if there are no additional actions for this field.
-     *
+     * Get the additional actions for this field. This will return null if there are no additional actions for this
+     * field.
+     * 
      * @return The actions of the field.
      */
     public PDFormFieldAdditionalActions getActions()
     {
-        COSDictionary aa = (COSDictionary)dictionary.getDictionaryObject( COSName.AA );
+        COSDictionary aa = (COSDictionary) dictionary.getDictionaryObject(COSName.AA);
         PDFormFieldAdditionalActions retval = null;
-        if( aa != null )
+        if (aa != null)
         {
-            retval = new PDFormFieldAdditionalActions( aa );
+            retval = new PDFormFieldAdditionalActions(aa);
         }
         return retval;
     }
 
     /**
      * Set the actions of the field.
-     *
+     * 
      * @param actions The field actions.
      */
-    public void setActions( PDFormFieldAdditionalActions actions )
+    public void setActions(PDFormFieldAdditionalActions actions)
     {
-        dictionary.setItem( COSName.AA, actions );
+        dictionary.setItem(COSName.AA, actions);
     }
 }

Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/util/DateConverter.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/util/DateConverter.java?rev=1542748&r1=1542747&r2=1542748&view=diff
==============================================================================
--- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/util/DateConverter.java (original)
+++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/util/DateConverter.java Sun Nov 17 15:27:33 2013
@@ -16,11 +16,9 @@
  */
 package org.apache.pdfbox.util;
 
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-
 import java.io.IOException;
-
+import java.text.ParsePosition;
+import java.text.SimpleDateFormat;
 import java.util.Calendar;
 import java.util.Date;
 import java.util.GregorianCalendar;
@@ -31,329 +29,789 @@ import java.util.TimeZone;
 import org.apache.pdfbox.cos.COSString;
 
 /**
+ * Date format is described in PDF Reference 1.7 section 3.8.2
+ * (www.adobe.com/devnet/acrobat/pdfs/pdf_reference_1-7.pdf)
+ * and also in PDF 32000-1:2008 
+ * (http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf))
+ * although the latter inexplicably omits the trailing apostrophe.
+ * 
+ * The interpretation of dates without timezones is unclear. 
+ * The code below assumes that such dates are in UTC+00 (aka GMT).
+ * This is in keeping with the PDF Reference's assertion that:
+ *      numerical fields default to zero values. 
+ * However, the Reference does go on to make the cryptic remark:
+ *      If no UT information is specified, the relationship of the specified  
+ *      time to UT is considered to be unknown. Whether or not the time 
+ *      zone is known, the rest of the date should be specified in local time.
+ * I understand this to refer to _creating_ a pdf date value. That is, 
+ * code that can get the wall clock time and cannot get the timezone 
+ * should write the wall clock time with a time zone of zero.
+ * When _parsing_ a PDF date, the statement talks about "the rest of the date"
+ * being local time, thus explicitly excluding the use of the local time
+ * for the time zone.
+*/ 
+
+/**
  * This class is used to convert dates to strings and back using the PDF
- * date standards.  Date are described in PDFReference1.4 section 3.8.2
+ * date standard in section 3.8.2 of PDF Reference 1.7.  
  *
  * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
- * @version $Revision: 1.14 $
+ * @author <a href="mailto:zweibieren@ahoo.com">Fred Hansen</a>
+ * 
+ * TODO Move members of this class elsewhere for shared use in pdfbox, xmpbox, and jempbox.
  */
 public class DateConverter
 {
-    //The Date format is supposed to be the PDF_DATE_FORMAT, but not all PDF documents
-    //will use that date, so I have added a couple other potential formats
-    //to try if the original one does not work.
-    private static final SimpleDateFormat[] POTENTIAL_FORMATS = new SimpleDateFormat[] {
-        new SimpleDateFormat("EEEE, dd MMM yyyy hh:mm:ss a", Locale.ENGLISH),
-        new SimpleDateFormat("EEEE, MMM dd, yyyy hh:mm:ss a", Locale.ENGLISH),
-        new SimpleDateFormat("MM/dd/yyyy hh:mm:ss", Locale.ENGLISH),
-        new SimpleDateFormat("MM/dd/yyyy", Locale.ENGLISH),
-        new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ENGLISH),
-        new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssz", Locale.ENGLISH),
-        new SimpleDateFormat("EEEE, MMM dd, yyyy", Locale.ENGLISH), // Acrobat Distiller 1.0.2 for Macintosh
-        new SimpleDateFormat("EEEE MMM dd, yyyy HH:mm:ss", Locale.ENGLISH), // ECMP5
-        new SimpleDateFormat("EEEE MMM dd HH:mm:ss z yyyy", Locale.ENGLISH), // GNU Ghostscript 7.0.7
-        new SimpleDateFormat("EEEE, MMM dd, yyyy 'at' hh:mma", Locale.ENGLISH), // Acrobat Net Distiller 1.0 for Windows
-        new SimpleDateFormat("d/MM/yyyy hh:mm:ss", Locale.ENGLISH), // PDFBOX-164
-        new SimpleDateFormat("dd/MM/yyyy hh:mm:ss", Locale.ENGLISH), // PDFBOX-170
-        new SimpleDateFormat("EEEEEEEEEE, MMMMMMMMMMMM dd, yyyy", Locale.ENGLISH),  // PDFBOX-465
-        new SimpleDateFormat("dd MMM yyyy hh:mm:ss", Locale.ENGLISH),  // for 26 May 2000 11:25:00
-        new SimpleDateFormat("dd MMM yyyy hh:mm", Locale.ENGLISH),  // for 26 May 2000 11:25
-        new SimpleDateFormat("M/dd/yyyy hh:mm:ss", Locale.ENGLISH),
-        new SimpleDateFormat("MM/d/yyyy hh:mm:ss", Locale.ENGLISH),
-        new SimpleDateFormat("M/dd/yyyy", Locale.ENGLISH),
-        new SimpleDateFormat("MM/d/yyyy", Locale.ENGLISH),
-        new SimpleDateFormat("M/d/yyyy hh:mm:ss", Locale.ENGLISH),
-        new SimpleDateFormat("M/d/yyyy", Locale.ENGLISH),
-        new SimpleDateFormat("M/d/yy hh:mm:ss", Locale.ENGLISH),
-        new SimpleDateFormat("M/d/yy", Locale.ENGLISH),
-        new SimpleDateFormat("yyyymmdd hh:mm:ss Z"), //
-        new SimpleDateFormat("yyyymmdd hh:mm:ss"),   //
-        new SimpleDateFormat("yyyymmdd'+00''00'''"), //
-        new SimpleDateFormat("yyyymmdd'+01''00'''"), //
-        new SimpleDateFormat("yyyymmdd'+02''00'''"), //
-        new SimpleDateFormat("yyyymmdd'+03''00'''"), //
-        new SimpleDateFormat("yyyymmdd'+04''00'''"), //
-        new SimpleDateFormat("yyyymmdd'+05''00'''"), //
-        new SimpleDateFormat("yyyymmdd'+06''00'''"), //
-        new SimpleDateFormat("yyyymmdd'+07''00'''"), //
-        new SimpleDateFormat("yyyymmdd'+08''00'''"), //
-        new SimpleDateFormat("yyyymmdd'+09''00'''"), //
-        new SimpleDateFormat("yyyymmdd'+10''00'''"), //
-        new SimpleDateFormat("yyyymmdd'+11''00'''"), //
-        new SimpleDateFormat("yyyymmdd'+12''00'''"), //
-        new SimpleDateFormat("yyyymmdd'-01''00'''"), //
-        new SimpleDateFormat("yyyymmdd'-02''00'''"), //
-        new SimpleDateFormat("yyyymmdd'-03''00'''"), //
-        new SimpleDateFormat("yyyymmdd'-04''00'''"), //
-        new SimpleDateFormat("yyyymmdd'-05''00'''"), //
-        new SimpleDateFormat("yyyymmdd'-06''00'''"), //
-        new SimpleDateFormat("yyyymmdd'-07''00'''"), //
-        new SimpleDateFormat("yyyymmdd'-08''00'''"), //
-        new SimpleDateFormat("yyyymmdd'-09''00'''"), //
-        new SimpleDateFormat("yyyymmdd'-10''00'''"), //
-        new SimpleDateFormat("yyyymmdd'-11''00'''"), //
-        new SimpleDateFormat("yyyymmdd'-12''00'''"), //
-        new SimpleDateFormat("yyyymmdd"), // for 20090401+0200
+    // milliseconds/1000 = seconds; seconds / 60 = minutes; minutes/60 = hours
+    private static final int MINUTES_PER_HOUR = 60;
+    private static final int SECONDS_PER_MINUTE = 60;
+    private static final int MILLIS_PER_MINUTE = SECONDS_PER_MINUTE*1000;
+    private static final int MILLIS_PER_HOUR = MINUTES_PER_HOUR * MILLIS_PER_MINUTE;
+    private static final int 
+            HALF_DAY = 12 * MINUTES_PER_HOUR * MILLIS_PER_MINUTE, 
+            DAY = 2*HALF_DAY;
+    
+    /**
+     * Error value if date is invalid. Parsing is done with 
+     * GregorianCalendar.setLenient(false), so every date field value
+     * must be within bounds. If an attempt is made to parse an invalid date 
+     * field, toCalendar(String, String[]) returns Jan 1 in year INVALID_YEAR.
+     */
+    public static final int INVALID_YEAR = 999;
+    
+    
+    /**
+     * The Date format is supposed to be the PDF_DATE_FORMAT, but other
+     * forms appear. These lists offer alternatives to be tried 
+     * if parseBigEndianDate fails.  
+     * 
+     * The time zone offset generally trails the date string, so it is processed
+     * separately with parseTZoffset. (This does not preclude having time
+     * zones in the elements below; one does.)
+     * 
+     * Alas, SimpleDateFormat is badly non-reentrant -- it modifies its 
+     * calendar field (PDFBox-402), so these lists are strings to create
+     * SimpleDate format as needed.
+     * 
+     * Some past entries have been elided because they duplicate existing 
+     * entries. See the API for SimpleDateFormat, which says 
+     *      "For parsing, the number of pattern letters is ignored 
+     *      unless it's needed to separate two adjacent fields."
+     * 
+     * toCalendar(String, String[]) tests to see that the entire input text
+     * has been consumed. Therefore the ordering of formats is important. 
+     * If one format begins with the entirety of another, the longer
+     * must precede the other in the list.
+     * 
+     * HH is for 0-23 hours and hh for 1-12 hours; an "a" field must follow "hh"
+     * Where year is yy, four digit years are accepted 
+     * and two digit years are converted to four digits in the range
+     *      [thisyear-79...thisyear+20]
+     */
+    private static final String[] ALPHA_START_FORMATS = 
+    {
+            "EEEE, dd MMM yy hh:mm:ss a",
+            "EEEE, MMM dd, yy hh:mm:ss a",
+            "EEEE, MMM dd, yy 'at' hh:mma", // Acrobat Net Distiller 1.0 for Windows
+            "EEEE, MMM dd, yy", // Acrobat Distiller 1.0.2 for Macintosh  && PDFBOX-465
+            "EEEE MMM dd, yy HH:mm:ss", // ECMP5
+            "EEEE MMM dd HH:mm:ss z yy", // GNU Ghostscript 7.0.7
+            "EEEE MMM dd HH:mm:ss yy", // GNU Ghostscript 7.0.7 variant
+    };
+    
+    private static final String[] DIGIT_START_FORMATS = 
+    {
+        "dd MMM yy HH:mm:ss",  // for 26 May 2000 11:25:00
+        "dd MMM yy HH:mm",  // for 26 May 2000 11:25
+        "yyyy MMM d",   // ambiguity resolved only by omitting time
+        "yyyymmddhh:mm:ss", // test case "200712172:2:3"
+        "H:m M/d/yy", // test case "9:47 5/12/2008"
+        "M/d/yy HH:mm:ss",
+        "M/d/yy HH:mm",
+        "M/d/yy",
+
+        // proposed rule that is unreachable due to "dd MMM yy HH:mm:ss" 
+        //     "yyyy MMM d HH:mm:ss", 
+
+        // rules made unreachable by "M/d/yy HH:mm:ss" "M/d/yy HH:mm"  "M/d/yy",
+        // (incoming digit strings do not mark themselves as y, m, or d!)
+            // "d/MM/yyyy HH:mm:ss", // PDFBOX-164 and PDFBOX-170 
+            // "M/dd/yyyy hh:mm:ss",
+            // "MM/d/yyyy hh:mm:ss",
+            // "M/d/yyyy HH:mm:ss",
+            // "M/dd/yyyy",
+            // "MM/d/yyyy",
+            // "M/d/yyyy",
+            // "M/d/yyyy HH:mm:ss",
+            // "M/d/yy HH:mm:ss",
+        // subsumed by big-endian parse
+            // "yyyy-MM-dd'T'HH:mm:ss",
+            // "yyyy-MM-dd'T'HH:mm:ss",
+            // "yyyymmdd hh:mm:ss", 
+            // "yyyymmdd", 
+            // "yyyymmddX''00''",  // covers 24 cases 
+            //    (orignally the above ended with '+00''00'''; 
+            //      the first apostrophe quoted the plus, 
+            //      '' mapped to a single ', and the ''' was invalid)
     };
 
+
     private DateConverter()
     {
         //utility class should not be constructed.
     }
 
+    ////////////////////////////////////////////
+    // C o n v e r t   t o   S t r i n g   Methods
+     
+    /**
+     * Get all know formats.
+     * 
+     * @return an array containig all known formats
+     */
+    public static String[] getFormats() 
+    {
+        String[] val = new String[ALPHA_START_FORMATS.length+DIGIT_START_FORMATS.length];
+        System.arraycopy(ALPHA_START_FORMATS, 0, val, 0, ALPHA_START_FORMATS.length);
+        System.arraycopy(DIGIT_START_FORMATS, 0, val,ALPHA_START_FORMATS.length, DIGIT_START_FORMATS.length);
+        return val;
+    }
+
     /**
-     * This will convert the calendar to a string.
+     * Converts a Calendar to a string formatted as:
+     *     D:yyyyMMddHHmmss#hh'mm'  where # is Z, +, or -.
+     * 
+     * @param cal The date to convert to a string. May be null.
+     * The DST_OFFSET is included when computing the output time zone.
      *
-     * @param date The date to convert to a string.
+     * @return The date as a String to be used in a PDF document, 
+     *      or null if the cal value is null
+     */
+    public static String toString(Calendar cal)
+    {
+        if (cal == null) 
+        {
+            return null;
+        }
+        String offset = formatTZoffset(cal.get(Calendar.ZONE_OFFSET)
+                + cal.get(Calendar.DST_OFFSET), "'");
+        return String.format("D:"
+                + "%1$4tY%1$2tm%1$2td"   // yyyyMMdd 
+                + "%1$2tH%1$2tM%1$2tS"   // HHmmss 
+                + "%2$s"                // time zone
+                + "'",                  // trailing apostrophe
+            cal, offset);      
+    }
+
+    /**
+     * Converts the date to ISO 8601 string format:
+     *     yyyy-mm-ddThh:MM:ss#hh:mm    (where '#" is '+' or '-').
      *
-     * @return The date as a String to be used in a PDF document.
+     * @param cal The date to convert.  Must not be null.
+     * The DST_OFFSET is included in the output value.
+     * 
+     * @return The date represented as an ISO 8601 string.
      */
-    public static String toString( Calendar date )
+    public static String toISO8601(Calendar cal)
     {
-        String retval = null;
-        if( date != null )
+        String offset = formatTZoffset(cal.get(Calendar.ZONE_OFFSET)
+                + cal.get(Calendar.DST_OFFSET), ":");
+        return String.format(
+                "%1$4tY"   // yyyy
+                + "-%1$2tm"   // -mm  (%tm adds one to cal month value)
+                + "-%1$2td"  // -dd  (%tm adds one to cal month value)
+                + "T"                             // T
+                + "%1$2tH:%1$2tM:%1$2tS"   // HHmmss  
+                + "%2$s",              // time zone
+            cal, offset);      
+    }
+    
+    /**
+     * Constrain a timezone offset to the range  [-11:59 thru +11:59].
+     * @param proposedOffset A value intended to be a timezone offset.
+     * @return The corresponding value reduced to the above noted range 
+     * by adding or subtracting multiples of a full day.
+     */
+    public static int restrainTZoffset(long proposedOffset) 
+    {
+        proposedOffset = ((proposedOffset+HALF_DAY)%DAY+DAY)%DAY; 
+        // 0 <= proposedOffset < DAY
+        proposedOffset = (proposedOffset-HALF_DAY)%HALF_DAY;   
+        // -HALF_DAY < proposedOffset < HALF_DAY
+        return (int)proposedOffset;
+    }
+    
+    /** 
+     * Formats a time zone offset as #hh^mm
+     * where # is + or -, hh is hours, ^ is a separator, and mm is minutes.
+     * Any separator may be specified by the second argument;
+     * the usual values are ":" (ISO 8601), "" (RFC 822), and "'" (PDF).
+     * The returned value is constrained to the range -11:59 ... 11:59.
+     * For offset of 0 millis, the String returned is "+00^00", never "Z".
+     * To get a "general" offset in form GMT#hh:mm, write
+     *      "GMT"+DateConverter.formatTZoffset(offset, ":");
+     * <p>
+     * Take thought in choosing the source for the millis value. 
+     * It can come from calendarValue.getTimeZone() or from 
+     * calendarValue.get(Calendar.ZONE_OFFSET).  If a TimeZone was created
+     * from a valid time zone ID, then it may have a daylight savings rule.
+     * (As of July 4, 2013, the data base at http://www.iana.org/time-zones 
+     * recognized 629 time zone regions. But a TimeZone created as 
+     *      new SimpleTimeZone(millisOffset, "ID"), 
+     * will not have a daylight savings rule. (Not even if there is a
+     * known time zone with the given ID. To get the TimeZone named "xDT"
+     * with its DST rule, use an ID of EST5EDT, CST6CDT, MST7MDT, or PST8PDT.
+     * <p>
+     * When parsing PDF dates, the incoming values DOES NOT have a TIMEZONE value.
+     * At most it has an OFFSET value like -04'00'. It is generally impossible to 
+     * determine what TIMEZONE corresponds to a given OFFSET. If the date is
+     * in the summer when daylight savings is in effect, an offset of -0400
+     * might correspond to any one of the 38 regions (of 53) with standard time 
+     * offset -0400 and no daylight saving. Or it might correspond to 
+     * any one of the 31 regions (out of 43) that observe daylight savings 
+     * and have standard time offset of -0500.
+     * <p>
+     * If a Calendar has not been assigned a TimeZone with setTimeZone(), 
+     * it will have by default the local TIMEZONE, not just the OFFSET.  In the
+     * USA, this TimeZone will have a daylight savings rule.
+     * <p>
+     * The offset assigned with calVal.set(Calendar.ZONE_OFFSET) differs
+     * from the offset in the TimeZone set by Calendar.setTimeZone(). Example:
+     * Suppose my local TimeZone is America/New_York. It has an offset of -05'00'.
+     * And suppose I set a GregorianCalendar's ZONE_OFFSET to -07'00'
+     *     calVal = new GregorianCalendar();   // TimeZone is the local default
+     *     calVal.set(Calendar.ZONE_OFFSET, -7* MILLIS_PER_HOUR);
+     * Four different offsets can be computed from calVal:
+     *     calVal.get(Calendar.ZONE_OFFSET)  =>  -07:00
+     *     calVal.get(Calendar.ZONE_OFFSET) + calVal.get(Calendar.DST_OFFSET) => -06:00
+     *     calVal.getTimeZone().getRawOffset()  =>  -05:00
+     *     calVal.getTimeZone().getOffset(calVal.getTimeInMillis())  =>  -04:00
+     * <p>
+     * Which is correct??? I dunno, though setTimeZone() does seem to affect
+     * ZONE_OFFSET, and not vice versa.  One cannot even test whether TimeZone 
+     * or ZONE_OFFSET has been set; both have been set by initialization code.
+     * TimeZone is initialized to the local default time zone 
+     * and ZONE_OFFSET is set from it.
+     * 
+     * My choice in this DateConverter class has been to set the 
+     * initial TimeZone of a GregorianCalendar to GMT. Thereafter
+     * the TimeZone is modified with {@link #adjustTimeZoneNicely}. 
+     * 
+     * @param millis a time zone offset expressed in milliseconds
+     *      Any value is accepted; it is normalized to [-11:59 ... +11:59]
+     * @param sep a String to insert between hh and mm. May be empty.
+     * @return the formatted String for the offset
+     */
+    public static String formatTZoffset(long millis, String sep) 
+    {
+        SimpleDateFormat sdf = new SimpleDateFormat("Z"); // #hhmm
+        sdf.setTimeZone(new SimpleTimeZone(restrainTZoffset(millis),"unknown"));
+        String tz = sdf.format(new Date());
+        return tz.substring(0,3)+sep+tz.substring(3);
+    }
+
+    //////////////////////////////////////////////
+    // P A R S E   Methods
+
+     /**
+     * Parses an integer from a string, starting at and advancing a ParsePosition.
+     * 
+     * @param text The string being parsed. If null, the remedy value is returned.
+     * @param where The ParsePosition to start the search. This value 
+     *      will be incremented by the number of digits found, but no 
+     *      more than maxlen.  That is, the ParsePosition will 
+     *      advance across at most maxlen initial digits in text.
+     *      The error index is ignored and unchanged.
+     * @param maxlen The maximum length of the integer to parse. 
+     *      Usually 2, but 4 for year fields.
+     *      If the field of length maxlen begins with a digit, 
+     *      but contains a non-digit, no error is signaled 
+     *      and the integer value is returned.
+     * @param remedy Value to be assigned if no digit is found at the
+     *      initial parse position; that is, if the field is empty.
+     * @return The integer that was at the given parse position. Or
+     *      the remedy value if no digits were found.
+     */
+    public static int parseTimeField(String text, ParsePosition where, 
+            int maxlen, int remedy) 
+    {
+        if (text == null) 
+        {
+                    return remedy; 
+        }
+        // (it would seem that DecimalFormat.parse() would be simpler;
+        //     but that class blithely ignores setMaximumIntegerDigits)
+        int retval = 0;
+        int index = where.getIndex();
+        int limit = index + Math.min(maxlen, text.length()-index);
+        for (; index < limit; index++)
         {
-            StringBuffer buffer = new StringBuffer();
-            TimeZone zone = date.getTimeZone();
-            long offsetInMinutes = zone.getOffset( date.getTimeInMillis() )/1000/60;
-            long hours = Math.abs( offsetInMinutes/60 );
-            long minutes = Math.abs( offsetInMinutes%60 );
-            buffer.append( "D:" );
-            // PDFBOX-402 , SimpleDateFormat is not thread safe, created it when you use it.
-            buffer.append( new SimpleDateFormat( "yyyyMMddHHmmss" , Locale.ENGLISH).format( date.getTime() ) );
-            if( offsetInMinutes == 0 )
+            int cval = text.charAt(index) - '0';  // convert digit to integer
+            if (cval <0 || cval > 9)   // test to see if we got a digit
             {
-                buffer.append( "Z" );
+                break;   // no digit at index
             }
-            else if( offsetInMinutes < 0 )
-            {
-                buffer.append( "-" );
+            retval = retval*10 + cval;   // append the digit to the return value
+        }   
+        if (index == where.getIndex())
+        {
+            return remedy;
+        }
+        where.setIndex(index);
+        return retval;
+    }
+ 
+    /**
+     * Advances the ParsePosition past any and all the characters 
+     *      that match those in the optionals list.
+     *      In particular, a space will skip all spaces.
+     * @param text The text to examine
+     * @param where index to start looking. 
+     *      The value is incremented by the number of optionals found.
+     *      The error index is ignored and unchanged.
+     * @param optionals A String listing all the optional characters 
+     *      to be skipped.
+     * @return The last non-space character passed over. 
+     *      Returns a space if no non-space character was found 
+     *      (even if space is not in the optionals list.)
+     */
+    public static char skipOptionals(String text, ParsePosition where, 
+            String optionals) 
+    {
+        char retval = ' ', currch;
+        while (text != null && where.getIndex() < text.length() 
+                && optionals.indexOf(
+                        (currch=text.charAt(where.getIndex()))
+                ) >= 0) 
+        {
+            retval = (currch != ' ') ? currch : retval;
+            where.setIndex(where.getIndex() + 1);
+        }
+        return retval;
+    }
+    
+    /**
+     * If the victim string is at the given position in the text,
+     * this method advances the position past that string. 
+     * 
+     * @param text The text to examine
+     * @param victim The string to look for
+     * @param where The initial position to look at. After return, this will
+     *      have been incremented by the length of the victim if it was found.
+     *      The error index is ignored and unchanged.
+     * @return true if victim was found; otherwise false.
+     */
+    public static boolean skipString(String text, String victim, ParsePosition where) 
+    {
+        if (text.startsWith(victim, where.getIndex()))
+        {
+            where.setIndex(where.getIndex()+victim.length());
+            return true;
+        }
+        return false;
+    }
+
+    /** 
+     * Construct a new GregorianCalendar and set defaults.
+     * Locale is ENGLISH.
+     * TimeZone is "UTC" (zero offset and no DST).
+     * Parsing is NOT lenient. Milliseconds are zero.
+     * 
+     * @return a new gregorian calendar
+     */
+    public static GregorianCalendar newGreg()  
+    {
+        GregorianCalendar retCal = new GregorianCalendar(Locale.ENGLISH);
+        retCal.setTimeZone(new SimpleTimeZone(0, "UTC"));
+        retCal.setLenient(false);
+        retCal.set(Calendar.MILLISECOND, 0);
+        return retCal;
+    }
+    
+    /**
+     * Install a TimeZone on a GregorianCalendar without changing the 
+     * hours value. A plain GregorianCalendat.setTimeZone() 
+     * adjusts the Calendar.HOUR value to compensate. This is *BAD*
+     * (not to say *EVIL*) when we have already set the time.
+     * @param cal The GregorianCalendar whose TimeZone to change.
+     * @param tz The new TimeZone.
+     */
+    public static void adjustTimeZoneNicely(GregorianCalendar cal, TimeZone tz) 
+    {
+        cal.setTimeZone(tz);
+        int offset = (cal.get(Calendar.ZONE_OFFSET) + cal.get(Calendar.DST_OFFSET))
+                / MILLIS_PER_HOUR;
+        cal.add(Calendar.HOUR, -offset);
+    }
+    
+    /**
+     * Parses the end of a date string for a time zone and, if one is found,
+     * sets the time zone of the GregorianCalendar. Otherwise the calendar 
+     * time zone is unchanged.
+     * 
+     * The text is parsed as
+     *      (Z|GMT|UTC)? [+- ]* h [': ]? m '?
+     * where the leading String is optional, h is two digits by default, 
+     * but may be a single digit if followed by one of space, apostrophe, 
+     * colon, or the end of string. Similarly, m is one or two digits. 
+     * This scheme accepts the format of PDF, RFC 822, and ISO8601. 
+     * If none of these applies (as for a time zone name), we try
+     * TimeZone.getTimeZone().
+     * 
+     * @param text The text expected to begin with a time zone value,
+     * possibly with leading or trailing spaces.
+     * @param cal The Calendar whose TimeZone to set. 
+     * @param initialWhere where Scanning begins at where.index. After success, the returned
+     *      index is that of the next character after the recognized string.
+     *      The error index is ignored and unchanged.
+     * @return true if parsed a time zone value; otherwise the 
+     *      time zone is unchanged and the return value is false.
+     */
+    public static boolean parseTZoffset(String text, GregorianCalendar cal, 
+            ParsePosition initialWhere) 
+    {
+        ParsePosition where = new ParsePosition(initialWhere.getIndex());
+        TimeZone tz = new SimpleTimeZone(0, "GMT");
+        int tzHours, tzMin;
+        char sign = skipOptionals(text, where, "Z+- ");
+        boolean hadGMT = (sign == 'Z' || skipString(text, "GMT", where) 
+                || skipString(text, "UTC", where));
+        sign = ( ! hadGMT) ? sign : skipOptionals(text, where, "+- "); 
+        
+        tzHours = parseTimeField(text, where, 2, -999);
+        skipOptionals(text, where, "\': ");
+        tzMin = parseTimeField(text, where, 2, 0);
+        skipOptionals(text, where, "\' "); 
+        
+        if (tzHours != -999) 
+        {                    // we parsed a time zone in default format
+            int hrSign = (sign == '-' ? -1 :+1);
+            tz.setRawOffset(restrainTZoffset(hrSign*(tzHours*MILLIS_PER_HOUR + tzMin*MILLIS_PER_MINUTE))); 
+            tz.setID("unknown");
+        }
+        else if ( ! hadGMT)
+        {            // try to process as a name; "GMT" or "UTC" has already been processed
+            String tzText = text.substring(initialWhere.getIndex()).trim();
+            tz = TimeZone.getTimeZone(tzText);
+            // getTimeZone returns "GMT" for unknown ids
+            if ("GMT".equals(tz.getID()))  
+            {                // no timezone in text
+                // cal amd initialWhere are unchanged
+                return false;
             }
             else
-            {
-                buffer.append( "+" );
-            }
-            if( hours < 10 )
-            {
-                buffer.append( "0" );
-            }
-            buffer.append( hours );
-            buffer.append( "'" );
-            if( minutes < 10 )
-            {
-                buffer.append( "0" );
+            {                // we got a tz by name; use it
+                where.setIndex(text.length());
             }
-            buffer.append( minutes );
-            buffer.append( "'" );
-            retval = buffer.toString();
-
         }
-        return retval;
+        adjustTimeZoneNicely(cal, tz);
+        initialWhere.setIndex(where.getIndex());
+        return true;
     }
-
+    
     /**
-     * This will convert a string to a calendar.
-     *
-     * @param date The string representation of the calendar.
-     *
-     * @return The calendar that this string represents.
-     *
-     * @throws IOException If the date string is not in the correct format.
+     * Parses a big-endian date: year month day hour min sec.
+     * The year must be four digits. Other fields may be adjacent 
+     * and delimited by length or they may follow appropriate delimiters.
+     *     year [ -/]* month [ -/]* dayofmonth [ T]* hour [:] min [:] sec [.secFraction]
+     * If any numeric field is omitted, all following fields must also be omitted.
+     * No time zone is processed.
+     * 
+     * Ambiguous dates can produce unexpected results. For example:
+     *      1970 12 23:08 will parse as 1970 December 23 00:08:00 
+     * 
+     * @param text The string to parse.
+     * 
+     * @param initialWhere Where to begin the parse. On return the index
+     *      is advanced to just beyond the last character processed.
+     *      The error index is ignored and unchanged.
+     * 
+     * @return a GregorianCalendar representing the parsed date. 
+     *      Or null if the text did not begin with at least four digits.
      */
-    public static Calendar toCalendar( COSString date ) throws IOException
+    public static GregorianCalendar parseBigEndianDate(String text, 
+            ParsePosition initialWhere) 
     {
-        Calendar retval = null;
-        if( date != null )
+        ParsePosition where = new ParsePosition(initialWhere.getIndex());
+        int year = parseTimeField(text, where, 4, 0);
+        if (where.getIndex() != 4 + initialWhere.getIndex()) 
+        {
+            return null;
+        }
+        skipOptionals(text, where, "/- ");
+        int month = parseTimeField(text, where, 2, 1) - 1; // Calendar months are 0...11
+        skipOptionals(text, where, "/- ");
+        int day = parseTimeField(text, where, 2, 1);
+        skipOptionals(text, where, " T");
+        int hour = parseTimeField(text, where, 2, 0);
+        skipOptionals(text, where, ": ");
+        int minute = parseTimeField(text, where, 2, 0);
+        skipOptionals(text, where, ": ");
+        int second = parseTimeField(text, where, 2, 0);
+        char nextC = skipOptionals(text, where, ".");
+        if (nextC == '.')
         {
-            retval = toCalendar( date.getString() );
+            // fractions of a second: skip upto 19 digits
+            parseTimeField(text, where, 19, 0);
         }
 
-        return retval;
+        GregorianCalendar dest = newGreg();
+        try 
+        {
+            dest.set(year, month, day, hour, minute, second);
+            dest.getTimeInMillis();    // trigger limit tests
+        }
+        catch (IllegalArgumentException ill) 
+        {
+            return  null;
+        }
+        initialWhere.setIndex(where.getIndex());
+        skipOptionals(text, initialWhere, " ");
+        return dest;    // dest has at least a year value
     }
 
     /**
-     * This will convert a string to a calendar.
-     *
-     * @param date The string representation of the calendar.
-     *
-     * @return The calendar that this string represents.
-     *
-     * @throws IOException If the date string is not in the correct format.
+     * See if text can be parsed as a date according to any of a list of 
+     * formats. The time zone may be included as part of the format, or
+     * omitted in favor of later testing for a trailing time zone.
+     * 
+     * @param text The text to be parsed.
+     * 
+     * @param fmts A list of formats to be tried. The syntax is that for 
+     *      {@link #java.text.SimpleDateFormat}
+     * 
+     * @param initialWhere At start this is the position to begin
+     *      examining the text. Upon return it will have been
+     *      incremented to refer to the next non-space character after the date.
+     *      If no date was found, the value is unchanged.
+     *      The error index is ignored and unchanged.
+     * 
+     * @return null for failure to find a date, or the GregorianCalendar
+     *      for the date that was found. Unless a time zone was 
+     *      part of the format, the time zone will be GMT+0
      */
-    public static Calendar toCalendar( String date ) throws IOException
+    public static GregorianCalendar parseSimpleDate(String text, String[] fmts, 
+            ParsePosition initialWhere) 
     {
-        Calendar retval = null;
-        if( date != null && date.trim().length() > 0 )
+        for(String fmt : fmts)
         {
-            //these are the default values
-            int year = 0;
-            int month = 1;
-            int day = 1;
-            int hour = 0;
-            int minute = 0;
-            int second = 0;
-            //first string off the prefix if it exists
-            try
+            ParsePosition where = new ParsePosition(initialWhere.getIndex());
+            SimpleDateFormat sdf = new SimpleDateFormat(fmt, Locale.ENGLISH);
+            GregorianCalendar retCal = newGreg();
+            sdf.setCalendar(retCal);
+            if (sdf.parse(text, where) != null)
             {
-                SimpleTimeZone zone = null;
-                if( date.startsWith( "D:" ) )
-                {
-                    date = date.substring( 2, date.length() );
-                }
-                if( date.length() < 4 )
-                {
-                    throw new IOException( "Error: Invalid date format '" + date + "'" );
-                }
-                year = Integer.parseInt( date.substring( 0, 4 ) );
-                if( date.length() >= 6 )
-                {
-                    month = Integer.parseInt( date.substring( 4, 6 ) );
-                }
-                if( date.length() >= 8 )
-                {
-                    day = Integer.parseInt( date.substring( 6, 8 ) );
-                }
-                if( date.length() >= 10 )
-                {
-                    hour = Integer.parseInt( date.substring( 8, 10 ) );
-                }
-                if( date.length() >= 12 )
-                {
-                    minute = Integer.parseInt( date.substring( 10, 12 ) );
-                }
-                if( date.length() >= 14 )
-                {
-                    second = Integer.parseInt( date.substring( 12, 14 ) );
-                }
+                initialWhere.setIndex(where.getIndex());
+                skipOptionals(text, initialWhere, " ");
+                return retCal;
+            }
+        }
+        return null;
+    }
 
-                if( date.length() >= 15 )
-                {
-                    char sign = date.charAt( 14 );
-                    if( sign == 'Z' )
-                    {
-                        zone = new SimpleTimeZone(0,"Unknown");
-                    }
-                    else
-                    {
-                        int hours = 0;
-                        int minutes = 0;
-                        if( date.length() >= 17 )
-                        {
-                            if( sign == '+' )
-                            {
-                                //parseInt cannot handle the + sign
-                                hours = Integer.parseInt( date.substring( 15, 17 ) );
-                            }
-                            else if (sign == '-')
-                            {
-                                hours = -Integer.parseInt(date.substring(15,17));
-                            }
-                            else
-                            {
-                                hours = -Integer.parseInt( date.substring( 14, 16 ) );
-                            }
-                        }
-                        if( date.length() > 20 )
-                        {
-                            minutes = Integer.parseInt( date.substring( 18, 20 ) );
-                        }
-                        zone = new SimpleTimeZone( hours*60*60*1000 + minutes*60*1000, "Unknown" );
-                    }
-                }
-                if( zone != null )
-                {
-                    retval = new GregorianCalendar( zone );
-                }
-                else
-                {
-                    retval = new GregorianCalendar();
-                }
+    
+    /**
+     * Parses a String to see if it begins with a date, and if so, 
+     * returns that date. The date must be strictly correct--no 
+     * field may exceed the appropriate limit.
+     * (That is, the Calendar has setLenient(false).) 
+     * Skips initial spaces, but does NOT check for "D:"
+     * 
+     * The scan first tries parseBigEndianDate and parseTZoffset
+     * and then tries parseSimpleDate with appropriate formats, 
+     * again followed by parseTZoffset. If at any stage the entire 
+     * text is consumed, that date value is returned immediately. 
+     * Otherwise the date that consumes the longest initial part
+     * of the text is returned.
+     * 
+     * - PDF format dates are among those recognized by parseBigEndianDate.
+     * - The formats tried are alphaStartFormats or digitStartFormat and
+     * any listed in the value of moreFmts.
+     * 
+     * @param text The String that may begin with a date. Must not be null.
+     *      Initial spaces and "D:" are skipped over.
+     * @param moreFmts Additional formats to be tried after trying the
+     *      built-in formats.
+     * @param initialWhere where Parsing begins at the given position in text. If the
+     *      parse succeeds, the index of where is advanced to point 
+     *      to the first unrecognized character.
+     *      The error index is ignored and unchanged.
+     * @return A GregorianCalendar for the date. If no date is found, 
+     *      returns null. The time zone will be GMT+0 unless parsing 
+     *      succeeded with a format containing a time zone. (Only one
+     *      builtin format contains a time zone.)
+     * 
+     */
+    public static Calendar parseDate(String text, String[] moreFmts, 
+            ParsePosition initialWhere) 
+    {
+        // place to remember longestr date string
+        int longestLen = -999999;  // theorem: this value will never be used
+                // proof: longestLen is only used if longestDate is not null
+        GregorianCalendar longestDate = null; // null says no date found yet
+        int whereLen;   // tempcopy of where.getIndex()
+        
+        ParsePosition where = new ParsePosition(initialWhere.getIndex());
+        // check for null (throws exception) and trim off surrounding spaces
+        skipOptionals(text, where, " ");
+        int startPosition = where.getIndex();
+
+        // try big-endian parse
+        GregorianCalendar retCal = parseBigEndianDate(text, where);
+        // check for success and a timezone
+        if (retCal != null &&
+                (where.getIndex() == text.length() 
+                || parseTZoffset(text, retCal, where))) 
+        {
+            // if text is fully consumed, return the date
+            // else remember it and its length
+            whereLen = where.getIndex();
+            if (whereLen == text.length()) 
+            {
+                initialWhere.setIndex(whereLen);
+                return retCal;
+            }
+            longestLen = whereLen;
+            longestDate = retCal;
+        }
 
-                retval.set(year, month-1, day, hour, minute, second );
-                // PDFBOX-598: PDF dates are only accurate up to a second
-                retval.set(Calendar.MILLISECOND, 0);
+        // try one of the sets of standard formats
+        where.setIndex(startPosition);
+        String [] formats 
+                = Character.isDigit(text.charAt(startPosition))
+                ? DIGIT_START_FORMATS
+                : ALPHA_START_FORMATS;
+        retCal = parseSimpleDate(text, formats, where);
+        // check for success and a timezone
+        if (retCal != null && 
+                (where.getIndex() == text.length() 
+                || parseTZoffset(text, retCal, where)))
+        {
+            // if text is fully consumed, return the date
+            // else remember it and its length
+            whereLen = where.getIndex();
+            if (whereLen == text.length()) 
+            {
+                initialWhere.setIndex(whereLen);
+                return retCal;
             }
-            catch( NumberFormatException e )
+            if (whereLen > longestLen) 
             {
-                for( int i=0; retval == null && i<POTENTIAL_FORMATS.length; i++ )
-                {
-                    try
-                    {
-                        Date utilDate = POTENTIAL_FORMATS[i].parse( date );
-                        retval = new GregorianCalendar();
-                        retval.setTime( utilDate );
-                    }
-                    catch( ParseException pe )
-                    {
-                        //ignore and move to next potential format
-                    }
-                }
-                if( retval == null )
+                longestLen = whereLen;
+                longestDate = retCal;
+            }
+        }
+        
+        // try the supplied formats
+        if (moreFmts != null)
+        {
+            where.setIndex(startPosition);
+            retCal = parseSimpleDate(text, moreFmts, where);
+            if (retCal != null && 
+                (where.getIndex() == text.length() 
+                || parseTZoffset(text, retCal, where)))
+            {
+                whereLen = where.getIndex();
+                // if text is fully consumed, return the date
+                // else remember it and its length
+                if (whereLen == text.length() || 
+                        (longestDate != null && whereLen > longestLen)) 
                 {
-                    //we didn't find a valid date format so throw an exception
-                    throw new IOException( "Error converting date:" + date );
+                    initialWhere.setIndex(whereLen);
+                    return retCal;
                 }
             }
         }
-        return retval;
-    }
-
-    private static final void zeroAppend( StringBuffer out, int number )
-    {
-        if( number < 10 )
+        if (longestDate != null) 
         {
-            out.append( "0" );
+            initialWhere.setIndex(longestLen);
+            return longestDate;
         }
-        out.append( number );
+        return retCal;
     }
-
+       
     /**
-     * Convert the date to iso 8601 string format.
+     * Converts a string to a Calendar by parsing the String for a date.
+     * @see toCalendar(String).
      *
-     * @param cal The date to convert.
-     * @return The date represented as an ISO 8601 string.
+     * The returned value will have 0 for DST_OFFSET.
+     * 
+     * @param text The COSString representation of a date.
+     * @return The Calendar that the text string represents. 
+     *      Or null if text was null.
+     * @throws IOException If the date string is not in the correct format.
+     * @deprecated This method throws an IOException for failure. Replace
+     *      calls to it with {@link #toCalendar(text.getString(), null)} 
+     *      and test for failure with
+     *          (value == null || value.get(Calendar.YEAR) == INVALID_YEAR)
      */
-    public static String toISO8601( Calendar cal )
+    public static Calendar toCalendar(COSString text) throws IOException
     {
-        StringBuffer retval = new StringBuffer();
-
-        retval.append( cal.get( Calendar.YEAR ) );
-        retval.append( "-" );
-        zeroAppend( retval, cal.get( Calendar.MONTH )+1 );
-        retval.append( "-" );
-        zeroAppend( retval, cal.get( Calendar.DAY_OF_MONTH ) );
-        retval.append( "T" );
-        zeroAppend( retval, cal.get( Calendar.HOUR_OF_DAY ));
-        retval.append( ":" );
-        zeroAppend( retval, cal.get( Calendar.MINUTE ));
-        retval.append( ":" );
-        zeroAppend( retval, cal.get( Calendar.SECOND ));
-
-        int timeZone = cal.get( Calendar.ZONE_OFFSET ) + cal.get(Calendar.DST_OFFSET );
-        if( timeZone < 0 )
+        if (text == null)
         {
-            retval.append( "-" );
+            return null;    
         }
-        else
+        return toCalendar(text.getString());
+    }
+    
+    /**
+     * Converts a string date to a Calendar date value; equivalent to 
+     * {@link #toCalendar(String, null)}, 
+     * but throws an IOException for failure.
+     * 
+     * The returned value will have 0 for DST_OFFSET.
+     * 
+     * @param text The string representation of the calendar.
+     * @return The Calendar that this string represents 
+     *      or null if the incoming text is null.
+     * @throws IOException If the date string is non-null 
+     *      and not a parseable date.
+     * @deprecated This method throws an IOException for failure. Replace
+     *      calls to it with {@link #toCalendar(text, null)} 
+     *      and test for failure with
+     *          (value == null || value.get(Calendar.YEAR) == INVALID_YEAR)
+     */
+    public static Calendar toCalendar(String text) throws IOException
+    {
+        if (text == null)
         {
-            retval.append( "+" );
+            return null;    
         }
-        timeZone = Math.abs( timeZone );
-        //milliseconds/1000 = seconds = seconds / 60 = minutes = minutes/60 = hours
-        int hours = timeZone/1000/60/60;
-        int minutes = (timeZone - (hours*1000*60*60))/1000/1000;
-        if( hours < 10 )
+        Calendar val = toCalendar(text, null);
+        if (val != null && val.get(Calendar.YEAR) == INVALID_YEAR)  
         {
-            retval.append( "0" );
+            throw new IOException("Error converting date: " + text);
         }
-        retval.append( Integer.toString( hours ) );
-        retval.append( ":" );
-        if( minutes < 10 )
+        return val;
+    }
+    
+    /**
+     * Converts a string to a calendar. The entire string must be consumed.
+     * The date must be strictly correct; that is, no field may exceed
+     * the appropriate limit. Uses {@link #parseDate} to do the actual parsing.
+     * 
+     * The returned value will have 0 for DST_OFFSET.
+     * 
+     * @param text The text to parse. Initial spaces and "D:" are skipped over.
+     * @param moreFmts An Array of formats (as Strings) to try 
+     *      in addition to the standard list.
+     * @return the Calendar value corresponding to the date text. 
+     *      If text does not represent a valid date, 
+     *      the value is January 1 on year INVALID_YEAR at 0:0:0 GMT.
+     * 
+     */
+    public static Calendar toCalendar(String text, String[] moreFmts)
+    {
+        ParsePosition where = new ParsePosition(0);
+        skipOptionals(text, where, " ");
+        skipString(text, "D:", where);
+        Calendar retCal = parseDate(text, moreFmts, where);   // PARSE THE TEXT
+        if (retCal == null || where.getIndex() != text.length()) 
         {
-            retval.append( "0" );
+            // the date string is invalid for all formats we tried,
+            retCal = newGreg();
+            retCal.set(INVALID_YEAR, 0, 1, 0, 0, 0);
         }
-        retval.append( Integer.toString( minutes ) );
-
-        return retval.toString();
+        return retCal;
     }
 }