You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2013/11/17 16:27:34 UTC
svn commit: r1542748 [4/5] - in /pdfbox/branches/1.8: ./
examples/src/main/java/org/apache/pdfbox/examples/fdf/
examples/src/main/java/org/apache/pdfbox/examples/signature/
fontbox/src/main/java/org/apache/fontbox/cff/
fontbox/src/main/java/org/apache/...
Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDField.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDField.java?rev=1542748&r1=1542747&r2=1542748&view=diff
==============================================================================
--- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDField.java (original)
+++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDField.java Sun Nov 17 15:27:33 2013
@@ -16,34 +16,28 @@
*/
package org.apache.pdfbox.pdmodel.interactive.form;
-import org.apache.pdfbox.pdmodel.interactive.action.PDFormFieldAdditionalActions;
-import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget;
-
-import org.apache.pdfbox.pdmodel.common.COSArrayList;
-import org.apache.pdfbox.pdmodel.common.COSObjectable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
-
+import org.apache.pdfbox.pdmodel.common.COSArrayList;
+import org.apache.pdfbox.pdmodel.common.COSObjectable;
import org.apache.pdfbox.pdmodel.common.PDTextStream;
-
import org.apache.pdfbox.pdmodel.fdf.FDFField;
+import org.apache.pdfbox.pdmodel.interactive.action.PDFormFieldAdditionalActions;
+import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget;
import org.apache.pdfbox.util.BitFlagHelper;
-import java.io.IOException;
-
-import java.util.ArrayList;
-import java.util.List;
-
/**
- * This is the superclass for a Field element in a PDF.
- * Based on the COS object model from PDFBox.
- *
+ * This is the superclass for a Field element in a PDF. Based on the COS object model from PDFBox.
+ *
* @author sug
- * @version $Revision: 1.23 $
+ *
*/
public abstract class PDField implements COSObjectable
{
@@ -60,28 +54,25 @@ public abstract class PDField implements
*/
public static final int FLAG_NO_EXPORT = 1 << 2;
-
private PDAcroForm acroForm;
private COSDictionary dictionary;
/**
* Constructor.
- *
+ *
* @param theAcroForm The form that this field is part of.
*/
- public PDField( PDAcroForm theAcroForm )
+ public PDField(PDAcroForm theAcroForm)
{
acroForm = theAcroForm;
dictionary = new COSDictionary();
- //no required fields in base field class
+ // no required fields in base field class
}
-
/**
- * Creates a COSField from a COSDictionary, expected to be
- * a correct object definition for a field in PDF.
- *
+ * Creates a COSField from a COSDictionary, expected to be a correct object definition for a field in PDF.
+ *
* @param theAcroForm The form that this field is part of.
* @param field the PDF objet to represent as a field.
*/
@@ -93,42 +84,41 @@ public abstract class PDField implements
/**
* Returns the partial name of the field.
- *
+ *
* @return the name of the field
*/
public String getPartialName()
{
- return getDictionary().getString( COSName.T );
+ return getDictionary().getString(COSName.T);
}
/**
* This will set the partial name of the field.
- *
+ *
* @param name The new name for the field.
*/
- public void setPartialName( String name )
+ public void setPartialName(String name)
{
- getDictionary().setString( COSName.T, name );
+ getDictionary().setString(COSName.T, name);
}
/**
- * Returns the fully qualified name of the field, which is a concatenation of
- * the names of all the parents fields.
- *
+ * Returns the fully qualified name of the field, which is a concatenation of the names of all the parents fields.
+ *
* @return the name of the field
- *
+ *
* @throws IOException If there is an error generating the fully qualified name.
*/
public String getFullyQualifiedName() throws IOException
{
PDField parent = getParent();
String parentName = null;
- if( parent != null )
+ if (parent != null)
{
parentName = parent.getFullyQualifiedName();
}
String finalName = getPartialName();
- if( parentName != null )
+ if (parentName != null)
{
finalName = parentName + "." + finalName;
}
@@ -156,135 +146,132 @@ public abstract class PDField implements
}
/**
- * Get the FT entry of the field. This is a read only field and is set depending
- * on the actual type. The field type is an inheritable attribute. This method will
- * return only the direct value on this object. Use the findFieldType for an upward
- * recursive search.
- *
+ * Get the FT entry of the field. This is a read only field and is set depending on the actual type. The field type
+ * is an inheritable attribute. This method will return only the direct value on this object. Use the findFieldType
+ * for an upward recursive search.
+ *
* @return The Field type.
- *
+ *
* @see PDField#findFieldType()
*/
public String getFieldType()
{
- return getDictionary().getNameAsString( COSName.FT );
+ return getDictionary().getNameAsString(COSName.FT);
}
/**
- * Find the field type and optionally do a recursive upward search. Sometimes the fieldtype
- * will be specified on the parent instead of the direct object. This will look at this
- * object for the field type, if none is specified then it will look to the parent if there
- * is a parent. If there is no parent and no field type has been found then this
+ * Find the field type and optionally do a recursive upward search. Sometimes the fieldtype will be specified on the
+ * parent instead of the direct object. This will look at this object for the field type, if none is specified then
+ * it will look to the parent if there is a parent. If there is no parent and no field type has been found then this
* will return null.
- *
+ *
* @return The field type or null if none was found.
*/
public String findFieldType()
{
- return findFieldType( getDictionary() );
+ return findFieldType(getDictionary());
}
- private String findFieldType( COSDictionary dic )
+ private String findFieldType(COSDictionary dic)
{
- String retval = dic.getNameAsString( COSName.FT );
- if( retval == null )
+ String retval = dic.getNameAsString(COSName.FT);
+ if (retval == null)
{
- COSDictionary parent = (COSDictionary)dic.getDictionaryObject( COSName.PARENT, COSName.P );
- if( parent != null )
+ COSDictionary parent = (COSDictionary) dic.getDictionaryObject(COSName.PARENT, COSName.P);
+ if (parent != null)
{
- retval = findFieldType( parent );
+ retval = findFieldType(parent);
}
}
return retval;
}
-
/**
* setValue sets the fields value to a given string.
- *
+ *
* @param value the string value
- *
+ *
* @throws IOException If there is an error creating the appearance stream.
*/
public abstract void setValue(String value) throws IOException;
/**
* getValue gets the fields value to as a string.
- *
+ *
* @return The string value of this field.
- *
+ *
* @throws IOException If there is an error getting the value.
*/
public abstract String getValue() throws IOException;
/**
* sets the field to be read-only.
- *
+ *
* @param readonly The new flag for readonly.
*/
public void setReadonly(boolean readonly)
{
- BitFlagHelper.setFlag( getDictionary(), COSName.FF, FLAG_READ_ONLY, readonly );
+ BitFlagHelper.setFlag(getDictionary(), COSName.FF, FLAG_READ_ONLY, readonly);
}
/**
- *
+ *
* @return true if the field is readonly
*/
public boolean isReadonly()
{
- return BitFlagHelper.getFlag( getDictionary(), COSName.FF, FLAG_READ_ONLY );
+ return BitFlagHelper.getFlag(getDictionary(), COSName.FF, FLAG_READ_ONLY);
}
/**
* sets the field to be required.
- *
+ *
* @param required The new flag for required.
*/
public void setRequired(boolean required)
{
- BitFlagHelper.setFlag( getDictionary(), COSName.FF, FLAG_REQUIRED, required );
+ BitFlagHelper.setFlag(getDictionary(), COSName.FF, FLAG_REQUIRED, required);
}
/**
- *
+ *
* @return true if the field is required
*/
public boolean isRequired()
{
- return BitFlagHelper.getFlag( getDictionary(), COSName.FF, FLAG_REQUIRED );
+ return BitFlagHelper.getFlag(getDictionary(), COSName.FF, FLAG_REQUIRED);
}
/**
* sets the field to be not exported..
- *
+ *
* @param noExport The new flag for noExport.
*/
public void setNoExport(boolean noExport)
{
- BitFlagHelper.setFlag( getDictionary(), COSName.FF, FLAG_NO_EXPORT, noExport );
+ BitFlagHelper.setFlag(getDictionary(), COSName.FF, FLAG_NO_EXPORT, noExport);
}
/**
- *
+ *
* @return true if the field is not to be exported.
*/
public boolean isNoExport()
{
- return BitFlagHelper.getFlag( getDictionary(), COSName.FF, FLAG_NO_EXPORT );
+ return BitFlagHelper.getFlag(getDictionary(), COSName.FF, FLAG_NO_EXPORT);
}
/**
* This will get the flags for this field.
- *
+ *
* @return flags The set of flags.
*/
public int getFieldFlags()
{
int retval = 0;
- COSInteger ff = (COSInteger)getDictionary().getDictionaryObject( COSName.FF );
- if( ff != null )
+ COSInteger ff = (COSInteger) getDictionary().getDictionaryObject(COSName.FF);
+ if (ff != null)
{
retval = ff.intValue();
}
@@ -293,128 +280,128 @@ public abstract class PDField implements
/**
* This will set the flags for this field.
- *
+ *
* @param flags The new flags.
*/
- public void setFieldFlags( int flags )
+ public void setFieldFlags(int flags)
{
- getDictionary().setInt( COSName.FF, flags );
+ getDictionary().setInt(COSName.FF, flags);
}
/**
* This will import a fdf field from a fdf document.
- *
+ *
* @param fdfField The fdf field to import.
- *
+ *
* @throws IOException If there is an error importing the data for this field.
*/
- public void importFDF( FDFField fdfField ) throws IOException
+ public void importFDF(FDFField fdfField) throws IOException
{
Object fieldValue = fdfField.getValue();
int fieldFlags = getFieldFlags();
- if( fieldValue != null )
+ if (fieldValue != null)
{
- if( fieldValue instanceof String )
+ if (fieldValue instanceof String)
{
- setValue( (String)fieldValue );
+ setValue((String) fieldValue);
}
- else if( fieldValue instanceof PDTextStream )
+ else if (fieldValue instanceof PDTextStream)
{
- setValue( ((PDTextStream)fieldValue).getAsString() );
+ setValue(((PDTextStream) fieldValue).getAsString());
}
else
{
- throw new IOException( "Unknown field type:" + fieldValue.getClass().getName() );
+ throw new IOException("Unknown field type:" + fieldValue.getClass().getName());
}
}
Integer ff = fdfField.getFieldFlags();
- if( ff != null )
+ if (ff != null)
{
- setFieldFlags( ff.intValue() );
+ setFieldFlags(ff.intValue());
}
else
{
- //these are suppose to be ignored if the Ff is set.
+ // these are suppose to be ignored if the Ff is set.
Integer setFf = fdfField.getSetFieldFlags();
- if( setFf != null )
+ if (setFf != null)
{
int setFfInt = setFf.intValue();
fieldFlags = fieldFlags | setFfInt;
- setFieldFlags( fieldFlags );
+ setFieldFlags(fieldFlags);
}
Integer clrFf = fdfField.getClearFieldFlags();
- if( clrFf != null )
+ if (clrFf != null)
{
- //we have to clear the bits of the document fields for every bit that is
- //set in this field.
+ // we have to clear the bits of the document fields for every bit that is
+ // set in this field.
//
- //Example:
- //docFf = 1011
- //clrFf = 1101
- //clrFfValue = 0010;
- //newValue = 1011 & 0010 which is 0010
+ // Example:
+ // docFf = 1011
+ // clrFf = 1101
+ // clrFfValue = 0010;
+ // newValue = 1011 & 0010 which is 0010
int clrFfValue = clrFf.intValue();
clrFfValue ^= 0xFFFFFFFF;
fieldFlags = fieldFlags & clrFfValue;
- setFieldFlags( fieldFlags );
+ setFieldFlags(fieldFlags);
}
}
PDAnnotationWidget widget = getWidget();
- if( widget != null )
+ if (widget != null)
{
int annotFlags = widget.getAnnotationFlags();
Integer f = fdfField.getWidgetFieldFlags();
- if( f != null && widget != null )
+ if (f != null && widget != null)
{
- widget.setAnnotationFlags( f.intValue() );
+ widget.setAnnotationFlags(f.intValue());
}
else
{
- //these are suppose to be ignored if the F is set.
+ // these are suppose to be ignored if the F is set.
Integer setF = fdfField.getSetWidgetFieldFlags();
- if( setF != null )
+ if (setF != null)
{
annotFlags = annotFlags | setF.intValue();
- widget.setAnnotationFlags( annotFlags );
+ widget.setAnnotationFlags(annotFlags);
}
Integer clrF = fdfField.getClearWidgetFieldFlags();
- if( clrF != null )
+ if (clrF != null)
{
- //we have to clear the bits of the document fields for every bit that is
- //set in this field.
+ // we have to clear the bits of the document fields for every bit that is
+ // set in this field.
//
- //Example:
- //docF = 1011
- //clrF = 1101
- //clrFValue = 0010;
- //newValue = 1011 & 0010 which is 0010
+ // Example:
+ // docF = 1011
+ // clrF = 1101
+ // clrFValue = 0010;
+ // newValue = 1011 & 0010 which is 0010
int clrFValue = clrF.intValue();
clrFValue ^= 0xFFFFFFFFL;
annotFlags = annotFlags & clrFValue;
- widget.setAnnotationFlags( annotFlags );
+ widget.setAnnotationFlags(annotFlags);
}
}
}
List<FDFField> fdfKids = fdfField.getKids();
List<COSObjectable> pdKids = getKids();
- for( int i=0; fdfKids != null && i<fdfKids.size(); i++ )
+ for (int i = 0; fdfKids != null && i < fdfKids.size(); i++)
{
- FDFField fdfChild = fdfKids.get( i );
+ FDFField fdfChild = fdfKids.get(i);
String fdfName = fdfChild.getPartialFieldName();
- for( int j=0; j<pdKids.size(); j++ )
+ for (int j = 0; j < pdKids.size(); j++)
{
- Object pdChildObj = pdKids.get( j );
- if( pdChildObj instanceof PDField )
+ Object pdChildObj = pdKids.get(j);
+ if (pdChildObj instanceof PDField)
{
- PDField pdChild = (PDField)pdChildObj;
- if( fdfName != null && fdfName.equals( pdChild.getPartialName() ) )
+ PDField pdChild = (PDField) pdChildObj;
+ if (fdfName != null && fdfName.equals(pdChild.getPartialName()))
{
- pdChild.importFDF( fdfChild );
+ pdChild.importFDF(fdfChild);
}
}
}
@@ -422,12 +409,10 @@ public abstract class PDField implements
}
/**
- * This will get the single associated widget that is part of this field. This
- * occurs when the Widget is embedded in the fields dictionary. Sometimes there
- * are multiple sub widgets associated with this field, in which case you want to
- * use getKids(). If the kids entry is specified, then the first entry in that
- * list will be returned.
- *
+ * This will get the single associated widget that is part of this field. This occurs when the Widget is embedded in
+ * the fields dictionary. Sometimes there are multiple sub widgets associated with this field, in which case you
+ * want to use getKids(). If the kids entry is specified, then the first entry in that list will be returned.
+ *
* @return The widget that is associated with this field.
* @throws IOException If there is an error getting the widget object.
*/
@@ -435,20 +420,20 @@ public abstract class PDField implements
{
PDAnnotationWidget retval = null;
List<COSObjectable> kids = getKids();
- if( kids == null )
+ if (kids == null)
{
- retval = new PDAnnotationWidget( getDictionary() );
+ retval = new PDAnnotationWidget(getDictionary());
}
- else if( kids.size() > 0 )
+ else if (kids.size() > 0)
{
- Object firstKid = kids.get( 0 );
- if( firstKid instanceof PDAnnotationWidget )
+ Object firstKid = kids.get(0);
+ if (firstKid instanceof PDAnnotationWidget)
{
- retval = (PDAnnotationWidget)firstKid;
+ retval = (PDAnnotationWidget) firstKid;
}
else
{
- retval = ((PDField)firstKid).getWidget();
+ retval = ((PDField) firstKid).getWidget();
}
}
else
@@ -460,58 +445,57 @@ public abstract class PDField implements
/**
* Get the parent field to this field, or null if none exists.
- *
+ *
* @return The parent field.
- *
+ *
* @throws IOException If there is an error creating the parent field.
*/
public PDField getParent() throws IOException
{
PDField parent = null;
- COSDictionary parentDic = (COSDictionary)getDictionary().getDictionaryObject( COSName.PARENT, COSName.P );
- if( parentDic != null )
+ COSDictionary parentDic = (COSDictionary) getDictionary().getDictionaryObject(COSName.PARENT, COSName.P);
+ if (parentDic != null)
{
- parent = PDFieldFactory.createField( getAcroForm(), parentDic );
+ parent = PDFieldFactory.createField(getAcroForm(), parentDic);
}
return parent;
}
/**
* Set the parent of this field.
- *
+ *
* @param parent The parent to this field.
*/
- public void setParent( PDField parent )
+ public void setParent(PDField parent)
{
- getDictionary().setItem( "Parent", parent );
+ getDictionary().setItem("Parent", parent);
}
/**
- * This will find one of the child elements. The name array are the components
- * of the name to search down the tree of names. The nameIndex is where to
- * start in that array. This method is called recursively until it finds
- * the end point based on the name array.
- *
+ * This will find one of the child elements. The name array are the components of the name to search down the tree
+ * of names. The nameIndex is where to start in that array. This method is called recursively until it finds the end
+ * point based on the name array.
+ *
* @param name An array that picks the path to the field.
* @param nameIndex The index into the array.
* @return The field at the endpoint or null if none is found.
* @throws IOException If there is an error creating the field.
*/
- public PDField findKid( String[] name, int nameIndex ) throws IOException
+ public PDField findKid(String[] name, int nameIndex) throws IOException
{
PDField retval = null;
- COSArray kids = (COSArray)getDictionary().getDictionaryObject( COSName.KIDS );
- if( kids != null )
+ COSArray kids = (COSArray) getDictionary().getDictionaryObject(COSName.KIDS);
+ if (kids != null)
{
for (int i = 0; retval == null && i < kids.size(); i++)
{
- COSDictionary kidDictionary = (COSDictionary)kids.getObject(i);
- if( name[nameIndex].equals( kidDictionary.getString( "T" ) ) )
+ COSDictionary kidDictionary = (COSDictionary) kids.getObject(i);
+ if (name[nameIndex].equals(kidDictionary.getString("T")))
{
- retval = PDFieldFactory.createField( acroForm, kidDictionary );
- if( name.length > nameIndex+1 )
+ retval = PDFieldFactory.createField(acroForm, kidDictionary);
+ if (name.length > nameIndex + 1)
{
- retval = retval.findKid( name, nameIndex+1 );
+ retval = retval.findKid(name, nameIndex + 1);
}
}
}
@@ -520,69 +504,72 @@ public abstract class PDField implements
}
/**
- * This will get all the kids of this field. The values in the list
- * will either be PDWidget or PDField. Normally they will be PDWidget objects
- * unless this is a non-terminal field and they will be child PDField objects.
- *
+ * This will get all the kids of this field. The values in the list will either be PDWidget or PDField. Normally
+ * they will be PDWidget objects unless this is a non-terminal field and they will be child PDField objects.
+ *
* @return A list of either PDWidget or PDField objects.
* @throws IOException If there is an error retrieving the kids.
*/
public List<COSObjectable> getKids() throws IOException
{
List<COSObjectable> retval = null;
- COSArray kids = (COSArray)getDictionary().getDictionaryObject(COSName.KIDS);
- if( kids != null )
+ COSArray kids = (COSArray) getDictionary().getDictionaryObject(COSName.KIDS);
+ if (kids != null)
{
List<COSObjectable> kidsList = new ArrayList<COSObjectable>();
for (int i = 0; i < kids.size(); i++)
{
- COSDictionary kidDictionary = (COSDictionary)kids.getObject(i);
- COSDictionary parent = (COSDictionary)kidDictionary.getDictionaryObject( COSName.PARENT, COSName.P );
- if( kidDictionary.getDictionaryObject( COSName.FT ) != null ||
- (parent != null && parent.getDictionaryObject( COSName.FT ) != null ) )
+ COSDictionary kidDictionary = (COSDictionary) kids.getObject(i);
+ if (kidDictionary == null)
+ {
+ continue;
+ }
+ COSDictionary parent = (COSDictionary) kidDictionary.getDictionaryObject(COSName.PARENT, COSName.P);
+ if (kidDictionary.getDictionaryObject(COSName.FT) != null
+ || (parent != null && parent.getDictionaryObject(COSName.FT) != null))
{
- kidsList.add( PDFieldFactory.createField( acroForm, kidDictionary ));
+ kidsList.add(PDFieldFactory.createField(acroForm, kidDictionary));
}
- else if( "Widget".equals( kidDictionary.getNameAsString( COSName.SUBTYPE ) ) )
+ else if ("Widget".equals(kidDictionary.getNameAsString(COSName.SUBTYPE)))
{
- kidsList.add( new PDAnnotationWidget( kidDictionary ) );
+ kidsList.add(new PDAnnotationWidget(kidDictionary));
}
else
{
//
- kidsList.add( PDFieldFactory.createField( acroForm, kidDictionary ));
+ kidsList.add(PDFieldFactory.createField(acroForm, kidDictionary));
}
}
- retval = new COSArrayList( kidsList, kids );
+ retval = new COSArrayList<COSObjectable>(kidsList, kids);
}
return retval;
}
/**
* This will set the list of kids.
- *
+ *
* @param kids The list of child widgets.
*/
- public void setKids( List<COSObjectable> kids )
+ public void setKids(List<COSObjectable> kids)
{
- COSArray kidsArray = COSArrayList.converterToCOSArray( kids );
- getDictionary().setItem( COSName.KIDS, kidsArray );
+ COSArray kidsArray = COSArrayList.converterToCOSArray(kids);
+ getDictionary().setItem(COSName.KIDS, kidsArray);
}
/**
* This will return a string representation of this field.
- *
+ *
* @return A string representation of this field.
*/
@Override
public String toString()
{
- return "" + getDictionary().getDictionaryObject( COSName.V );
+ return "" + getDictionary().getDictionaryObject(COSName.V);
}
/**
* This will get the acroform that this field is part of.
- *
+ *
* @return The form this field is on.
*/
public PDAcroForm getAcroForm()
@@ -592,7 +579,7 @@ public abstract class PDField implements
/**
* This will set the form this field is on.
- *
+ *
* @param value The new form to use.
*/
public void setAcroForm(PDAcroForm value)
@@ -602,7 +589,7 @@ public abstract class PDField implements
/**
* This will get the dictionary associated with this field.
- *
+ *
* @return The dictionary that this class wraps.
*/
public COSDictionary getDictionary()
@@ -612,7 +599,7 @@ public abstract class PDField implements
/**
* Convert this standard java object to a COS object.
- *
+ *
* @return The cos object that matches this Java object.
*/
public COSBase getCOSObject()
@@ -621,29 +608,29 @@ public abstract class PDField implements
}
/**
- * Get the additional actions for this field. This will return null
- * if there are no additional actions for this field.
- *
+ * Get the additional actions for this field. This will return null if there are no additional actions for this
+ * field.
+ *
* @return The actions of the field.
*/
public PDFormFieldAdditionalActions getActions()
{
- COSDictionary aa = (COSDictionary)dictionary.getDictionaryObject( COSName.AA );
+ COSDictionary aa = (COSDictionary) dictionary.getDictionaryObject(COSName.AA);
PDFormFieldAdditionalActions retval = null;
- if( aa != null )
+ if (aa != null)
{
- retval = new PDFormFieldAdditionalActions( aa );
+ retval = new PDFormFieldAdditionalActions(aa);
}
return retval;
}
/**
* Set the actions of the field.
- *
+ *
* @param actions The field actions.
*/
- public void setActions( PDFormFieldAdditionalActions actions )
+ public void setActions(PDFormFieldAdditionalActions actions)
{
- dictionary.setItem( COSName.AA, actions );
+ dictionary.setItem(COSName.AA, actions);
}
}
Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/util/DateConverter.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/util/DateConverter.java?rev=1542748&r1=1542747&r2=1542748&view=diff
==============================================================================
--- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/util/DateConverter.java (original)
+++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/util/DateConverter.java Sun Nov 17 15:27:33 2013
@@ -16,11 +16,9 @@
*/
package org.apache.pdfbox.util;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-
import java.io.IOException;
-
+import java.text.ParsePosition;
+import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
@@ -31,329 +29,789 @@ import java.util.TimeZone;
import org.apache.pdfbox.cos.COSString;
/**
+ * Date format is described in PDF Reference 1.7 section 3.8.2
+ * (www.adobe.com/devnet/acrobat/pdfs/pdf_reference_1-7.pdf)
+ * and also in PDF 32000-1:2008
+ * (http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf))
+ * although the latter inexplicably omits the trailing apostrophe.
+ *
+ * The interpretation of dates without timezones is unclear.
+ * The code below assumes that such dates are in UTC+00 (aka GMT).
+ * This is in keeping with the PDF Reference's assertion that:
+ * numerical fields default to zero values.
+ * However, the Reference does go on to make the cryptic remark:
+ * If no UT information is specified, the relationship of the specified
+ * time to UT is considered to be unknown. Whether or not the time
+ * zone is known, the rest of the date should be specified in local time.
+ * I understand this to refer to _creating_ a pdf date value. That is,
+ * code that can get the wall clock time and cannot get the timezone
+ * should write the wall clock time with a time zone of zero.
+ * When _parsing_ a PDF date, the statement talks about "the rest of the date"
+ * being local time, thus explicitly excluding the use of the local time
+ * for the time zone.
+*/
+
+/**
* This class is used to convert dates to strings and back using the PDF
- * date standards. Date are described in PDFReference1.4 section 3.8.2
+ * date standard in section 3.8.2 of PDF Reference 1.7.
*
* @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
- * @version $Revision: 1.14 $
+ * @author <a href="mailto:zweibieren@ahoo.com">Fred Hansen</a>
+ *
+ * TODO Move members of this class elsewhere for shared use in pdfbox, xmpbox, and jempbox.
*/
public class DateConverter
{
- //The Date format is supposed to be the PDF_DATE_FORMAT, but not all PDF documents
- //will use that date, so I have added a couple other potential formats
- //to try if the original one does not work.
- private static final SimpleDateFormat[] POTENTIAL_FORMATS = new SimpleDateFormat[] {
- new SimpleDateFormat("EEEE, dd MMM yyyy hh:mm:ss a", Locale.ENGLISH),
- new SimpleDateFormat("EEEE, MMM dd, yyyy hh:mm:ss a", Locale.ENGLISH),
- new SimpleDateFormat("MM/dd/yyyy hh:mm:ss", Locale.ENGLISH),
- new SimpleDateFormat("MM/dd/yyyy", Locale.ENGLISH),
- new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ENGLISH),
- new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssz", Locale.ENGLISH),
- new SimpleDateFormat("EEEE, MMM dd, yyyy", Locale.ENGLISH), // Acrobat Distiller 1.0.2 for Macintosh
- new SimpleDateFormat("EEEE MMM dd, yyyy HH:mm:ss", Locale.ENGLISH), // ECMP5
- new SimpleDateFormat("EEEE MMM dd HH:mm:ss z yyyy", Locale.ENGLISH), // GNU Ghostscript 7.0.7
- new SimpleDateFormat("EEEE, MMM dd, yyyy 'at' hh:mma", Locale.ENGLISH), // Acrobat Net Distiller 1.0 for Windows
- new SimpleDateFormat("d/MM/yyyy hh:mm:ss", Locale.ENGLISH), // PDFBOX-164
- new SimpleDateFormat("dd/MM/yyyy hh:mm:ss", Locale.ENGLISH), // PDFBOX-170
- new SimpleDateFormat("EEEEEEEEEE, MMMMMMMMMMMM dd, yyyy", Locale.ENGLISH), // PDFBOX-465
- new SimpleDateFormat("dd MMM yyyy hh:mm:ss", Locale.ENGLISH), // for 26 May 2000 11:25:00
- new SimpleDateFormat("dd MMM yyyy hh:mm", Locale.ENGLISH), // for 26 May 2000 11:25
- new SimpleDateFormat("M/dd/yyyy hh:mm:ss", Locale.ENGLISH),
- new SimpleDateFormat("MM/d/yyyy hh:mm:ss", Locale.ENGLISH),
- new SimpleDateFormat("M/dd/yyyy", Locale.ENGLISH),
- new SimpleDateFormat("MM/d/yyyy", Locale.ENGLISH),
- new SimpleDateFormat("M/d/yyyy hh:mm:ss", Locale.ENGLISH),
- new SimpleDateFormat("M/d/yyyy", Locale.ENGLISH),
- new SimpleDateFormat("M/d/yy hh:mm:ss", Locale.ENGLISH),
- new SimpleDateFormat("M/d/yy", Locale.ENGLISH),
- new SimpleDateFormat("yyyymmdd hh:mm:ss Z"), //
- new SimpleDateFormat("yyyymmdd hh:mm:ss"), //
- new SimpleDateFormat("yyyymmdd'+00''00'''"), //
- new SimpleDateFormat("yyyymmdd'+01''00'''"), //
- new SimpleDateFormat("yyyymmdd'+02''00'''"), //
- new SimpleDateFormat("yyyymmdd'+03''00'''"), //
- new SimpleDateFormat("yyyymmdd'+04''00'''"), //
- new SimpleDateFormat("yyyymmdd'+05''00'''"), //
- new SimpleDateFormat("yyyymmdd'+06''00'''"), //
- new SimpleDateFormat("yyyymmdd'+07''00'''"), //
- new SimpleDateFormat("yyyymmdd'+08''00'''"), //
- new SimpleDateFormat("yyyymmdd'+09''00'''"), //
- new SimpleDateFormat("yyyymmdd'+10''00'''"), //
- new SimpleDateFormat("yyyymmdd'+11''00'''"), //
- new SimpleDateFormat("yyyymmdd'+12''00'''"), //
- new SimpleDateFormat("yyyymmdd'-01''00'''"), //
- new SimpleDateFormat("yyyymmdd'-02''00'''"), //
- new SimpleDateFormat("yyyymmdd'-03''00'''"), //
- new SimpleDateFormat("yyyymmdd'-04''00'''"), //
- new SimpleDateFormat("yyyymmdd'-05''00'''"), //
- new SimpleDateFormat("yyyymmdd'-06''00'''"), //
- new SimpleDateFormat("yyyymmdd'-07''00'''"), //
- new SimpleDateFormat("yyyymmdd'-08''00'''"), //
- new SimpleDateFormat("yyyymmdd'-09''00'''"), //
- new SimpleDateFormat("yyyymmdd'-10''00'''"), //
- new SimpleDateFormat("yyyymmdd'-11''00'''"), //
- new SimpleDateFormat("yyyymmdd'-12''00'''"), //
- new SimpleDateFormat("yyyymmdd"), // for 20090401+0200
+ // milliseconds/1000 = seconds; seconds / 60 = minutes; minutes/60 = hours
+ private static final int MINUTES_PER_HOUR = 60;
+ private static final int SECONDS_PER_MINUTE = 60;
+ private static final int MILLIS_PER_MINUTE = SECONDS_PER_MINUTE*1000;
+ private static final int MILLIS_PER_HOUR = MINUTES_PER_HOUR * MILLIS_PER_MINUTE;
+ private static final int
+ HALF_DAY = 12 * MINUTES_PER_HOUR * MILLIS_PER_MINUTE,
+ DAY = 2*HALF_DAY;
+
+ /**
+ * Error value if date is invalid. Parsing is done with
+ * GregorianCalendar.setLenient(false), so every date field value
+ * must be within bounds. If an attempt is made to parse an invalid date
+ * field, toCalendar(String, String[]) returns Jan 1 in year INVALID_YEAR.
+ */
+ public static final int INVALID_YEAR = 999;
+
+
+ /**
+ * The Date format is supposed to be the PDF_DATE_FORMAT, but other
+ * forms appear. These lists offer alternatives to be tried
+ * if parseBigEndianDate fails.
+ *
+ * The time zone offset generally trails the date string, so it is processed
+ * separately with parseTZoffset. (This does not preclude having time
+ * zones in the elements below; one does.)
+ *
+ * Alas, SimpleDateFormat is badly non-reentrant -- it modifies its
+ * calendar field (PDFBox-402), so these lists are strings to create
+ * SimpleDate format as needed.
+ *
+ * Some past entries have been elided because they duplicate existing
+ * entries. See the API for SimpleDateFormat, which says
+ * "For parsing, the number of pattern letters is ignored
+ * unless it's needed to separate two adjacent fields."
+ *
+ * toCalendar(String, String[]) tests to see that the entire input text
+ * has been consumed. Therefore the ordering of formats is important.
+ * If one format begins with the entirety of another, the longer
+ * must precede the other in the list.
+ *
+ * HH is for 0-23 hours and hh for 1-12 hours; an "a" field must follow "hh"
+ * Where year is yy, four digit years are accepted
+ * and two digit years are converted to four digits in the range
+ * [thisyear-79...thisyear+20]
+ */
+ private static final String[] ALPHA_START_FORMATS =
+ {
+ "EEEE, dd MMM yy hh:mm:ss a",
+ "EEEE, MMM dd, yy hh:mm:ss a",
+ "EEEE, MMM dd, yy 'at' hh:mma", // Acrobat Net Distiller 1.0 for Windows
+ "EEEE, MMM dd, yy", // Acrobat Distiller 1.0.2 for Macintosh && PDFBOX-465
+ "EEEE MMM dd, yy HH:mm:ss", // ECMP5
+ "EEEE MMM dd HH:mm:ss z yy", // GNU Ghostscript 7.0.7
+ "EEEE MMM dd HH:mm:ss yy", // GNU Ghostscript 7.0.7 variant
+ };
+
+ private static final String[] DIGIT_START_FORMATS =
+ {
+ "dd MMM yy HH:mm:ss", // for 26 May 2000 11:25:00
+ "dd MMM yy HH:mm", // for 26 May 2000 11:25
+ "yyyy MMM d", // ambiguity resolved only by omitting time
+ "yyyymmddhh:mm:ss", // test case "200712172:2:3"
+ "H:m M/d/yy", // test case "9:47 5/12/2008"
+ "M/d/yy HH:mm:ss",
+ "M/d/yy HH:mm",
+ "M/d/yy",
+
+ // proposed rule that is unreachable due to "dd MMM yy HH:mm:ss"
+ // "yyyy MMM d HH:mm:ss",
+
+ // rules made unreachable by "M/d/yy HH:mm:ss" "M/d/yy HH:mm" "M/d/yy",
+ // (incoming digit strings do not mark themselves as y, m, or d!)
+ // "d/MM/yyyy HH:mm:ss", // PDFBOX-164 and PDFBOX-170
+ // "M/dd/yyyy hh:mm:ss",
+ // "MM/d/yyyy hh:mm:ss",
+ // "M/d/yyyy HH:mm:ss",
+ // "M/dd/yyyy",
+ // "MM/d/yyyy",
+ // "M/d/yyyy",
+ // "M/d/yyyy HH:mm:ss",
+ // "M/d/yy HH:mm:ss",
+ // subsumed by big-endian parse
+ // "yyyy-MM-dd'T'HH:mm:ss",
+ // "yyyy-MM-dd'T'HH:mm:ss",
+ // "yyyymmdd hh:mm:ss",
+ // "yyyymmdd",
+ // "yyyymmddX''00''", // covers 24 cases
+ // (orignally the above ended with '+00''00''';
+ // the first apostrophe quoted the plus,
+ // '' mapped to a single ', and the ''' was invalid)
};
+
private DateConverter()
{
//utility class should not be constructed.
}
+ ////////////////////////////////////////////
+ // C o n v e r t t o S t r i n g Methods
+
+ /**
+ * Get all know formats.
+ *
+ * @return an array containig all known formats
+ */
+ public static String[] getFormats()
+ {
+ String[] val = new String[ALPHA_START_FORMATS.length+DIGIT_START_FORMATS.length];
+ System.arraycopy(ALPHA_START_FORMATS, 0, val, 0, ALPHA_START_FORMATS.length);
+ System.arraycopy(DIGIT_START_FORMATS, 0, val,ALPHA_START_FORMATS.length, DIGIT_START_FORMATS.length);
+ return val;
+ }
+
/**
- * This will convert the calendar to a string.
+ * Converts a Calendar to a string formatted as:
+ * D:yyyyMMddHHmmss#hh'mm' where # is Z, +, or -.
+ *
+ * @param cal The date to convert to a string. May be null.
+ * The DST_OFFSET is included when computing the output time zone.
*
- * @param date The date to convert to a string.
+ * @return The date as a String to be used in a PDF document,
+ * or null if the cal value is null
+ */
+ public static String toString(Calendar cal)
+ {
+ if (cal == null)
+ {
+ return null;
+ }
+ String offset = formatTZoffset(cal.get(Calendar.ZONE_OFFSET)
+ + cal.get(Calendar.DST_OFFSET), "'");
+ return String.format("D:"
+ + "%1$4tY%1$2tm%1$2td" // yyyyMMdd
+ + "%1$2tH%1$2tM%1$2tS" // HHmmss
+ + "%2$s" // time zone
+ + "'", // trailing apostrophe
+ cal, offset);
+ }
+
+ /**
+ * Converts the date to ISO 8601 string format:
+ * yyyy-mm-ddThh:MM:ss#hh:mm (where '#" is '+' or '-').
*
- * @return The date as a String to be used in a PDF document.
+ * @param cal The date to convert. Must not be null.
+ * The DST_OFFSET is included in the output value.
+ *
+ * @return The date represented as an ISO 8601 string.
*/
- public static String toString( Calendar date )
+ public static String toISO8601(Calendar cal)
{
- String retval = null;
- if( date != null )
+ String offset = formatTZoffset(cal.get(Calendar.ZONE_OFFSET)
+ + cal.get(Calendar.DST_OFFSET), ":");
+ return String.format(
+ "%1$4tY" // yyyy
+ + "-%1$2tm" // -mm (%tm adds one to cal month value)
+ + "-%1$2td" // -dd (%tm adds one to cal month value)
+ + "T" // T
+ + "%1$2tH:%1$2tM:%1$2tS" // HHmmss
+ + "%2$s", // time zone
+ cal, offset);
+ }
+
+ /**
+ * Constrain a timezone offset to the range [-11:59 thru +11:59].
+ * @param proposedOffset A value intended to be a timezone offset.
+ * @return The corresponding value reduced to the above noted range
+ * by adding or subtracting multiples of a full day.
+ */
+ public static int restrainTZoffset(long proposedOffset)
+ {
+ proposedOffset = ((proposedOffset+HALF_DAY)%DAY+DAY)%DAY;
+ // 0 <= proposedOffset < DAY
+ proposedOffset = (proposedOffset-HALF_DAY)%HALF_DAY;
+ // -HALF_DAY < proposedOffset < HALF_DAY
+ return (int)proposedOffset;
+ }
+
+ /**
+ * Formats a time zone offset as #hh^mm
+ * where # is + or -, hh is hours, ^ is a separator, and mm is minutes.
+ * Any separator may be specified by the second argument;
+ * the usual values are ":" (ISO 8601), "" (RFC 822), and "'" (PDF).
+ * The returned value is constrained to the range -11:59 ... 11:59.
+ * For offset of 0 millis, the String returned is "+00^00", never "Z".
+ * To get a "general" offset in form GMT#hh:mm, write
+ * "GMT"+DateConverter.formatTZoffset(offset, ":");
+ * <p>
+ * Take thought in choosing the source for the millis value.
+ * It can come from calendarValue.getTimeZone() or from
+ * calendarValue.get(Calendar.ZONE_OFFSET). If a TimeZone was created
+ * from a valid time zone ID, then it may have a daylight savings rule.
+ * (As of July 4, 2013, the data base at http://www.iana.org/time-zones
+ * recognized 629 time zone regions. But a TimeZone created as
+ * new SimpleTimeZone(millisOffset, "ID"),
+ * will not have a daylight savings rule. (Not even if there is a
+ * known time zone with the given ID. To get the TimeZone named "xDT"
+ * with its DST rule, use an ID of EST5EDT, CST6CDT, MST7MDT, or PST8PDT.
+ * <p>
+ * When parsing PDF dates, the incoming values DOES NOT have a TIMEZONE value.
+ * At most it has an OFFSET value like -04'00'. It is generally impossible to
+ * determine what TIMEZONE corresponds to a given OFFSET. If the date is
+ * in the summer when daylight savings is in effect, an offset of -0400
+ * might correspond to any one of the 38 regions (of 53) with standard time
+ * offset -0400 and no daylight saving. Or it might correspond to
+ * any one of the 31 regions (out of 43) that observe daylight savings
+ * and have standard time offset of -0500.
+ * <p>
+ * If a Calendar has not been assigned a TimeZone with setTimeZone(),
+ * it will have by default the local TIMEZONE, not just the OFFSET. In the
+ * USA, this TimeZone will have a daylight savings rule.
+ * <p>
+ * The offset assigned with calVal.set(Calendar.ZONE_OFFSET) differs
+ * from the offset in the TimeZone set by Calendar.setTimeZone(). Example:
+ * Suppose my local TimeZone is America/New_York. It has an offset of -05'00'.
+ * And suppose I set a GregorianCalendar's ZONE_OFFSET to -07'00'
+ * calVal = new GregorianCalendar(); // TimeZone is the local default
+ * calVal.set(Calendar.ZONE_OFFSET, -7* MILLIS_PER_HOUR);
+ * Four different offsets can be computed from calVal:
+ * calVal.get(Calendar.ZONE_OFFSET) => -07:00
+ * calVal.get(Calendar.ZONE_OFFSET) + calVal.get(Calendar.DST_OFFSET) => -06:00
+ * calVal.getTimeZone().getRawOffset() => -05:00
+ * calVal.getTimeZone().getOffset(calVal.getTimeInMillis()) => -04:00
+ * <p>
+ * Which is correct??? I dunno, though setTimeZone() does seem to affect
+ * ZONE_OFFSET, and not vice versa. One cannot even test whether TimeZone
+ * or ZONE_OFFSET has been set; both have been set by initialization code.
+ * TimeZone is initialized to the local default time zone
+ * and ZONE_OFFSET is set from it.
+ *
+ * My choice in this DateConverter class has been to set the
+ * initial TimeZone of a GregorianCalendar to GMT. Thereafter
+ * the TimeZone is modified with {@link #adjustTimeZoneNicely}.
+ *
+ * @param millis a time zone offset expressed in milliseconds
+ * Any value is accepted; it is normalized to [-11:59 ... +11:59]
+ * @param sep a String to insert between hh and mm. May be empty.
+ * @return the formatted String for the offset
+ */
+ public static String formatTZoffset(long millis, String sep)
+ {
+ SimpleDateFormat sdf = new SimpleDateFormat("Z"); // #hhmm
+ sdf.setTimeZone(new SimpleTimeZone(restrainTZoffset(millis),"unknown"));
+ String tz = sdf.format(new Date());
+ return tz.substring(0,3)+sep+tz.substring(3);
+ }
+
+ //////////////////////////////////////////////
+ // P A R S E Methods
+
+ /**
+ * Parses an integer from a string, starting at and advancing a ParsePosition.
+ *
+ * @param text The string being parsed. If null, the remedy value is returned.
+ * @param where The ParsePosition to start the search. This value
+ * will be incremented by the number of digits found, but no
+ * more than maxlen. That is, the ParsePosition will
+ * advance across at most maxlen initial digits in text.
+ * The error index is ignored and unchanged.
+ * @param maxlen The maximum length of the integer to parse.
+ * Usually 2, but 4 for year fields.
+ * If the field of length maxlen begins with a digit,
+ * but contains a non-digit, no error is signaled
+ * and the integer value is returned.
+ * @param remedy Value to be assigned if no digit is found at the
+ * initial parse position; that is, if the field is empty.
+ * @return The integer that was at the given parse position. Or
+ * the remedy value if no digits were found.
+ */
+ public static int parseTimeField(String text, ParsePosition where,
+ int maxlen, int remedy)
+ {
+ if (text == null)
+ {
+ return remedy;
+ }
+ // (it would seem that DecimalFormat.parse() would be simpler;
+ // but that class blithely ignores setMaximumIntegerDigits)
+ int retval = 0;
+ int index = where.getIndex();
+ int limit = index + Math.min(maxlen, text.length()-index);
+ for (; index < limit; index++)
{
- StringBuffer buffer = new StringBuffer();
- TimeZone zone = date.getTimeZone();
- long offsetInMinutes = zone.getOffset( date.getTimeInMillis() )/1000/60;
- long hours = Math.abs( offsetInMinutes/60 );
- long minutes = Math.abs( offsetInMinutes%60 );
- buffer.append( "D:" );
- // PDFBOX-402 , SimpleDateFormat is not thread safe, created it when you use it.
- buffer.append( new SimpleDateFormat( "yyyyMMddHHmmss" , Locale.ENGLISH).format( date.getTime() ) );
- if( offsetInMinutes == 0 )
+ int cval = text.charAt(index) - '0'; // convert digit to integer
+ if (cval <0 || cval > 9) // test to see if we got a digit
{
- buffer.append( "Z" );
+ break; // no digit at index
}
- else if( offsetInMinutes < 0 )
- {
- buffer.append( "-" );
+ retval = retval*10 + cval; // append the digit to the return value
+ }
+ if (index == where.getIndex())
+ {
+ return remedy;
+ }
+ where.setIndex(index);
+ return retval;
+ }
+
+ /**
+ * Advances the ParsePosition past any and all the characters
+ * that match those in the optionals list.
+ * In particular, a space will skip all spaces.
+ * @param text The text to examine
+ * @param where index to start looking.
+ * The value is incremented by the number of optionals found.
+ * The error index is ignored and unchanged.
+ * @param optionals A String listing all the optional characters
+ * to be skipped.
+ * @return The last non-space character passed over.
+ * Returns a space if no non-space character was found
+ * (even if space is not in the optionals list.)
+ */
+ public static char skipOptionals(String text, ParsePosition where,
+ String optionals)
+ {
+ char retval = ' ', currch;
+ while (text != null && where.getIndex() < text.length()
+ && optionals.indexOf(
+ (currch=text.charAt(where.getIndex()))
+ ) >= 0)
+ {
+ retval = (currch != ' ') ? currch : retval;
+ where.setIndex(where.getIndex() + 1);
+ }
+ return retval;
+ }
+
+ /**
+ * If the victim string is at the given position in the text,
+ * this method advances the position past that string.
+ *
+ * @param text The text to examine
+ * @param victim The string to look for
+ * @param where The initial position to look at. After return, this will
+ * have been incremented by the length of the victim if it was found.
+ * The error index is ignored and unchanged.
+ * @return true if victim was found; otherwise false.
+ */
+ public static boolean skipString(String text, String victim, ParsePosition where)
+ {
+ if (text.startsWith(victim, where.getIndex()))
+ {
+ where.setIndex(where.getIndex()+victim.length());
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Construct a new GregorianCalendar and set defaults.
+ * Locale is ENGLISH.
+ * TimeZone is "UTC" (zero offset and no DST).
+ * Parsing is NOT lenient. Milliseconds are zero.
+ *
+ * @return a new gregorian calendar
+ */
+ public static GregorianCalendar newGreg()
+ {
+ GregorianCalendar retCal = new GregorianCalendar(Locale.ENGLISH);
+ retCal.setTimeZone(new SimpleTimeZone(0, "UTC"));
+ retCal.setLenient(false);
+ retCal.set(Calendar.MILLISECOND, 0);
+ return retCal;
+ }
+
+ /**
+ * Install a TimeZone on a GregorianCalendar without changing the
+ * hours value. A plain GregorianCalendat.setTimeZone()
+ * adjusts the Calendar.HOUR value to compensate. This is *BAD*
+ * (not to say *EVIL*) when we have already set the time.
+ * @param cal The GregorianCalendar whose TimeZone to change.
+ * @param tz The new TimeZone.
+ */
+ public static void adjustTimeZoneNicely(GregorianCalendar cal, TimeZone tz)
+ {
+ cal.setTimeZone(tz);
+ int offset = (cal.get(Calendar.ZONE_OFFSET) + cal.get(Calendar.DST_OFFSET))
+ / MILLIS_PER_HOUR;
+ cal.add(Calendar.HOUR, -offset);
+ }
+
+ /**
+ * Parses the end of a date string for a time zone and, if one is found,
+ * sets the time zone of the GregorianCalendar. Otherwise the calendar
+ * time zone is unchanged.
+ *
+ * The text is parsed as
+ * (Z|GMT|UTC)? [+- ]* h [': ]? m '?
+ * where the leading String is optional, h is two digits by default,
+ * but may be a single digit if followed by one of space, apostrophe,
+ * colon, or the end of string. Similarly, m is one or two digits.
+ * This scheme accepts the format of PDF, RFC 822, and ISO8601.
+ * If none of these applies (as for a time zone name), we try
+ * TimeZone.getTimeZone().
+ *
+ * @param text The text expected to begin with a time zone value,
+ * possibly with leading or trailing spaces.
+ * @param cal The Calendar whose TimeZone to set.
+ * @param initialWhere where Scanning begins at where.index. After success, the returned
+ * index is that of the next character after the recognized string.
+ * The error index is ignored and unchanged.
+ * @return true if parsed a time zone value; otherwise the
+ * time zone is unchanged and the return value is false.
+ */
+ public static boolean parseTZoffset(String text, GregorianCalendar cal,
+ ParsePosition initialWhere)
+ {
+ ParsePosition where = new ParsePosition(initialWhere.getIndex());
+ TimeZone tz = new SimpleTimeZone(0, "GMT");
+ int tzHours, tzMin;
+ char sign = skipOptionals(text, where, "Z+- ");
+ boolean hadGMT = (sign == 'Z' || skipString(text, "GMT", where)
+ || skipString(text, "UTC", where));
+ sign = ( ! hadGMT) ? sign : skipOptionals(text, where, "+- ");
+
+ tzHours = parseTimeField(text, where, 2, -999);
+ skipOptionals(text, where, "\': ");
+ tzMin = parseTimeField(text, where, 2, 0);
+ skipOptionals(text, where, "\' ");
+
+ if (tzHours != -999)
+ { // we parsed a time zone in default format
+ int hrSign = (sign == '-' ? -1 :+1);
+ tz.setRawOffset(restrainTZoffset(hrSign*(tzHours*MILLIS_PER_HOUR + tzMin*MILLIS_PER_MINUTE)));
+ tz.setID("unknown");
+ }
+ else if ( ! hadGMT)
+ { // try to process as a name; "GMT" or "UTC" has already been processed
+ String tzText = text.substring(initialWhere.getIndex()).trim();
+ tz = TimeZone.getTimeZone(tzText);
+ // getTimeZone returns "GMT" for unknown ids
+ if ("GMT".equals(tz.getID()))
+ { // no timezone in text
+ // cal amd initialWhere are unchanged
+ return false;
}
else
- {
- buffer.append( "+" );
- }
- if( hours < 10 )
- {
- buffer.append( "0" );
- }
- buffer.append( hours );
- buffer.append( "'" );
- if( minutes < 10 )
- {
- buffer.append( "0" );
+ { // we got a tz by name; use it
+ where.setIndex(text.length());
}
- buffer.append( minutes );
- buffer.append( "'" );
- retval = buffer.toString();
-
}
- return retval;
+ adjustTimeZoneNicely(cal, tz);
+ initialWhere.setIndex(where.getIndex());
+ return true;
}
-
+
/**
- * This will convert a string to a calendar.
- *
- * @param date The string representation of the calendar.
- *
- * @return The calendar that this string represents.
- *
- * @throws IOException If the date string is not in the correct format.
+ * Parses a big-endian date: year month day hour min sec.
+ * The year must be four digits. Other fields may be adjacent
+ * and delimited by length or they may follow appropriate delimiters.
+ * year [ -/]* month [ -/]* dayofmonth [ T]* hour [:] min [:] sec [.secFraction]
+ * If any numeric field is omitted, all following fields must also be omitted.
+ * No time zone is processed.
+ *
+ * Ambiguous dates can produce unexpected results. For example:
+ * 1970 12 23:08 will parse as 1970 December 23 00:08:00
+ *
+ * @param text The string to parse.
+ *
+ * @param initialWhere Where to begin the parse. On return the index
+ * is advanced to just beyond the last character processed.
+ * The error index is ignored and unchanged.
+ *
+ * @return a GregorianCalendar representing the parsed date.
+ * Or null if the text did not begin with at least four digits.
*/
- public static Calendar toCalendar( COSString date ) throws IOException
+ public static GregorianCalendar parseBigEndianDate(String text,
+ ParsePosition initialWhere)
{
- Calendar retval = null;
- if( date != null )
+ ParsePosition where = new ParsePosition(initialWhere.getIndex());
+ int year = parseTimeField(text, where, 4, 0);
+ if (where.getIndex() != 4 + initialWhere.getIndex())
+ {
+ return null;
+ }
+ skipOptionals(text, where, "/- ");
+ int month = parseTimeField(text, where, 2, 1) - 1; // Calendar months are 0...11
+ skipOptionals(text, where, "/- ");
+ int day = parseTimeField(text, where, 2, 1);
+ skipOptionals(text, where, " T");
+ int hour = parseTimeField(text, where, 2, 0);
+ skipOptionals(text, where, ": ");
+ int minute = parseTimeField(text, where, 2, 0);
+ skipOptionals(text, where, ": ");
+ int second = parseTimeField(text, where, 2, 0);
+ char nextC = skipOptionals(text, where, ".");
+ if (nextC == '.')
{
- retval = toCalendar( date.getString() );
+ // fractions of a second: skip upto 19 digits
+ parseTimeField(text, where, 19, 0);
}
- return retval;
+ GregorianCalendar dest = newGreg();
+ try
+ {
+ dest.set(year, month, day, hour, minute, second);
+ dest.getTimeInMillis(); // trigger limit tests
+ }
+ catch (IllegalArgumentException ill)
+ {
+ return null;
+ }
+ initialWhere.setIndex(where.getIndex());
+ skipOptionals(text, initialWhere, " ");
+ return dest; // dest has at least a year value
}
/**
- * This will convert a string to a calendar.
- *
- * @param date The string representation of the calendar.
- *
- * @return The calendar that this string represents.
- *
- * @throws IOException If the date string is not in the correct format.
+ * See if text can be parsed as a date according to any of a list of
+ * formats. The time zone may be included as part of the format, or
+ * omitted in favor of later testing for a trailing time zone.
+ *
+ * @param text The text to be parsed.
+ *
+ * @param fmts A list of formats to be tried. The syntax is that for
+ * {@link #java.text.SimpleDateFormat}
+ *
+ * @param initialWhere At start this is the position to begin
+ * examining the text. Upon return it will have been
+ * incremented to refer to the next non-space character after the date.
+ * If no date was found, the value is unchanged.
+ * The error index is ignored and unchanged.
+ *
+ * @return null for failure to find a date, or the GregorianCalendar
+ * for the date that was found. Unless a time zone was
+ * part of the format, the time zone will be GMT+0
*/
- public static Calendar toCalendar( String date ) throws IOException
+ public static GregorianCalendar parseSimpleDate(String text, String[] fmts,
+ ParsePosition initialWhere)
{
- Calendar retval = null;
- if( date != null && date.trim().length() > 0 )
+ for(String fmt : fmts)
{
- //these are the default values
- int year = 0;
- int month = 1;
- int day = 1;
- int hour = 0;
- int minute = 0;
- int second = 0;
- //first string off the prefix if it exists
- try
+ ParsePosition where = new ParsePosition(initialWhere.getIndex());
+ SimpleDateFormat sdf = new SimpleDateFormat(fmt, Locale.ENGLISH);
+ GregorianCalendar retCal = newGreg();
+ sdf.setCalendar(retCal);
+ if (sdf.parse(text, where) != null)
{
- SimpleTimeZone zone = null;
- if( date.startsWith( "D:" ) )
- {
- date = date.substring( 2, date.length() );
- }
- if( date.length() < 4 )
- {
- throw new IOException( "Error: Invalid date format '" + date + "'" );
- }
- year = Integer.parseInt( date.substring( 0, 4 ) );
- if( date.length() >= 6 )
- {
- month = Integer.parseInt( date.substring( 4, 6 ) );
- }
- if( date.length() >= 8 )
- {
- day = Integer.parseInt( date.substring( 6, 8 ) );
- }
- if( date.length() >= 10 )
- {
- hour = Integer.parseInt( date.substring( 8, 10 ) );
- }
- if( date.length() >= 12 )
- {
- minute = Integer.parseInt( date.substring( 10, 12 ) );
- }
- if( date.length() >= 14 )
- {
- second = Integer.parseInt( date.substring( 12, 14 ) );
- }
+ initialWhere.setIndex(where.getIndex());
+ skipOptionals(text, initialWhere, " ");
+ return retCal;
+ }
+ }
+ return null;
+ }
- if( date.length() >= 15 )
- {
- char sign = date.charAt( 14 );
- if( sign == 'Z' )
- {
- zone = new SimpleTimeZone(0,"Unknown");
- }
- else
- {
- int hours = 0;
- int minutes = 0;
- if( date.length() >= 17 )
- {
- if( sign == '+' )
- {
- //parseInt cannot handle the + sign
- hours = Integer.parseInt( date.substring( 15, 17 ) );
- }
- else if (sign == '-')
- {
- hours = -Integer.parseInt(date.substring(15,17));
- }
- else
- {
- hours = -Integer.parseInt( date.substring( 14, 16 ) );
- }
- }
- if( date.length() > 20 )
- {
- minutes = Integer.parseInt( date.substring( 18, 20 ) );
- }
- zone = new SimpleTimeZone( hours*60*60*1000 + minutes*60*1000, "Unknown" );
- }
- }
- if( zone != null )
- {
- retval = new GregorianCalendar( zone );
- }
- else
- {
- retval = new GregorianCalendar();
- }
+
+ /**
+ * Parses a String to see if it begins with a date, and if so,
+ * returns that date. The date must be strictly correct--no
+ * field may exceed the appropriate limit.
+ * (That is, the Calendar has setLenient(false).)
+ * Skips initial spaces, but does NOT check for "D:"
+ *
+ * The scan first tries parseBigEndianDate and parseTZoffset
+ * and then tries parseSimpleDate with appropriate formats,
+ * again followed by parseTZoffset. If at any stage the entire
+ * text is consumed, that date value is returned immediately.
+ * Otherwise the date that consumes the longest initial part
+ * of the text is returned.
+ *
+ * - PDF format dates are among those recognized by parseBigEndianDate.
+ * - The formats tried are alphaStartFormats or digitStartFormat and
+ * any listed in the value of moreFmts.
+ *
+ * @param text The String that may begin with a date. Must not be null.
+ * Initial spaces and "D:" are skipped over.
+ * @param moreFmts Additional formats to be tried after trying the
+ * built-in formats.
+ * @param initialWhere where Parsing begins at the given position in text. If the
+ * parse succeeds, the index of where is advanced to point
+ * to the first unrecognized character.
+ * The error index is ignored and unchanged.
+ * @return A GregorianCalendar for the date. If no date is found,
+ * returns null. The time zone will be GMT+0 unless parsing
+ * succeeded with a format containing a time zone. (Only one
+ * builtin format contains a time zone.)
+ *
+ */
+ public static Calendar parseDate(String text, String[] moreFmts,
+ ParsePosition initialWhere)
+ {
+ // place to remember longestr date string
+ int longestLen = -999999; // theorem: this value will never be used
+ // proof: longestLen is only used if longestDate is not null
+ GregorianCalendar longestDate = null; // null says no date found yet
+ int whereLen; // tempcopy of where.getIndex()
+
+ ParsePosition where = new ParsePosition(initialWhere.getIndex());
+ // check for null (throws exception) and trim off surrounding spaces
+ skipOptionals(text, where, " ");
+ int startPosition = where.getIndex();
+
+ // try big-endian parse
+ GregorianCalendar retCal = parseBigEndianDate(text, where);
+ // check for success and a timezone
+ if (retCal != null &&
+ (where.getIndex() == text.length()
+ || parseTZoffset(text, retCal, where)))
+ {
+ // if text is fully consumed, return the date
+ // else remember it and its length
+ whereLen = where.getIndex();
+ if (whereLen == text.length())
+ {
+ initialWhere.setIndex(whereLen);
+ return retCal;
+ }
+ longestLen = whereLen;
+ longestDate = retCal;
+ }
- retval.set(year, month-1, day, hour, minute, second );
- // PDFBOX-598: PDF dates are only accurate up to a second
- retval.set(Calendar.MILLISECOND, 0);
+ // try one of the sets of standard formats
+ where.setIndex(startPosition);
+ String [] formats
+ = Character.isDigit(text.charAt(startPosition))
+ ? DIGIT_START_FORMATS
+ : ALPHA_START_FORMATS;
+ retCal = parseSimpleDate(text, formats, where);
+ // check for success and a timezone
+ if (retCal != null &&
+ (where.getIndex() == text.length()
+ || parseTZoffset(text, retCal, where)))
+ {
+ // if text is fully consumed, return the date
+ // else remember it and its length
+ whereLen = where.getIndex();
+ if (whereLen == text.length())
+ {
+ initialWhere.setIndex(whereLen);
+ return retCal;
}
- catch( NumberFormatException e )
+ if (whereLen > longestLen)
{
- for( int i=0; retval == null && i<POTENTIAL_FORMATS.length; i++ )
- {
- try
- {
- Date utilDate = POTENTIAL_FORMATS[i].parse( date );
- retval = new GregorianCalendar();
- retval.setTime( utilDate );
- }
- catch( ParseException pe )
- {
- //ignore and move to next potential format
- }
- }
- if( retval == null )
+ longestLen = whereLen;
+ longestDate = retCal;
+ }
+ }
+
+ // try the supplied formats
+ if (moreFmts != null)
+ {
+ where.setIndex(startPosition);
+ retCal = parseSimpleDate(text, moreFmts, where);
+ if (retCal != null &&
+ (where.getIndex() == text.length()
+ || parseTZoffset(text, retCal, where)))
+ {
+ whereLen = where.getIndex();
+ // if text is fully consumed, return the date
+ // else remember it and its length
+ if (whereLen == text.length() ||
+ (longestDate != null && whereLen > longestLen))
{
- //we didn't find a valid date format so throw an exception
- throw new IOException( "Error converting date:" + date );
+ initialWhere.setIndex(whereLen);
+ return retCal;
}
}
}
- return retval;
- }
-
- private static final void zeroAppend( StringBuffer out, int number )
- {
- if( number < 10 )
+ if (longestDate != null)
{
- out.append( "0" );
+ initialWhere.setIndex(longestLen);
+ return longestDate;
}
- out.append( number );
+ return retCal;
}
-
+
/**
- * Convert the date to iso 8601 string format.
+ * Converts a string to a Calendar by parsing the String for a date.
+ * @see toCalendar(String).
*
- * @param cal The date to convert.
- * @return The date represented as an ISO 8601 string.
+ * The returned value will have 0 for DST_OFFSET.
+ *
+ * @param text The COSString representation of a date.
+ * @return The Calendar that the text string represents.
+ * Or null if text was null.
+ * @throws IOException If the date string is not in the correct format.
+ * @deprecated This method throws an IOException for failure. Replace
+ * calls to it with {@link #toCalendar(text.getString(), null)}
+ * and test for failure with
+ * (value == null || value.get(Calendar.YEAR) == INVALID_YEAR)
*/
- public static String toISO8601( Calendar cal )
+ public static Calendar toCalendar(COSString text) throws IOException
{
- StringBuffer retval = new StringBuffer();
-
- retval.append( cal.get( Calendar.YEAR ) );
- retval.append( "-" );
- zeroAppend( retval, cal.get( Calendar.MONTH )+1 );
- retval.append( "-" );
- zeroAppend( retval, cal.get( Calendar.DAY_OF_MONTH ) );
- retval.append( "T" );
- zeroAppend( retval, cal.get( Calendar.HOUR_OF_DAY ));
- retval.append( ":" );
- zeroAppend( retval, cal.get( Calendar.MINUTE ));
- retval.append( ":" );
- zeroAppend( retval, cal.get( Calendar.SECOND ));
-
- int timeZone = cal.get( Calendar.ZONE_OFFSET ) + cal.get(Calendar.DST_OFFSET );
- if( timeZone < 0 )
+ if (text == null)
{
- retval.append( "-" );
+ return null;
}
- else
+ return toCalendar(text.getString());
+ }
+
+ /**
+ * Converts a string date to a Calendar date value; equivalent to
+ * {@link #toCalendar(String, null)},
+ * but throws an IOException for failure.
+ *
+ * The returned value will have 0 for DST_OFFSET.
+ *
+ * @param text The string representation of the calendar.
+ * @return The Calendar that this string represents
+ * or null if the incoming text is null.
+ * @throws IOException If the date string is non-null
+ * and not a parseable date.
+ * @deprecated This method throws an IOException for failure. Replace
+ * calls to it with {@link #toCalendar(text, null)}
+ * and test for failure with
+ * (value == null || value.get(Calendar.YEAR) == INVALID_YEAR)
+ */
+ public static Calendar toCalendar(String text) throws IOException
+ {
+ if (text == null)
{
- retval.append( "+" );
+ return null;
}
- timeZone = Math.abs( timeZone );
- //milliseconds/1000 = seconds = seconds / 60 = minutes = minutes/60 = hours
- int hours = timeZone/1000/60/60;
- int minutes = (timeZone - (hours*1000*60*60))/1000/1000;
- if( hours < 10 )
+ Calendar val = toCalendar(text, null);
+ if (val != null && val.get(Calendar.YEAR) == INVALID_YEAR)
{
- retval.append( "0" );
+ throw new IOException("Error converting date: " + text);
}
- retval.append( Integer.toString( hours ) );
- retval.append( ":" );
- if( minutes < 10 )
+ return val;
+ }
+
+ /**
+ * Converts a string to a calendar. The entire string must be consumed.
+ * The date must be strictly correct; that is, no field may exceed
+ * the appropriate limit. Uses {@link #parseDate} to do the actual parsing.
+ *
+ * The returned value will have 0 for DST_OFFSET.
+ *
+ * @param text The text to parse. Initial spaces and "D:" are skipped over.
+ * @param moreFmts An Array of formats (as Strings) to try
+ * in addition to the standard list.
+ * @return the Calendar value corresponding to the date text.
+ * If text does not represent a valid date,
+ * the value is January 1 on year INVALID_YEAR at 0:0:0 GMT.
+ *
+ */
+ public static Calendar toCalendar(String text, String[] moreFmts)
+ {
+ ParsePosition where = new ParsePosition(0);
+ skipOptionals(text, where, " ");
+ skipString(text, "D:", where);
+ Calendar retCal = parseDate(text, moreFmts, where); // PARSE THE TEXT
+ if (retCal == null || where.getIndex() != text.length())
{
- retval.append( "0" );
+ // the date string is invalid for all formats we tried,
+ retCal = newGreg();
+ retCal.set(INVALID_YEAR, 0, 1, 0, 0, 0);
}
- retval.append( Integer.toString( minutes ) );
-
- return retval.toString();
+ return retCal;
}
}