You are viewing a plain text version of this content. The canonical link for it is here.

Posted to users@pdfbox.apache.org by Roberto Nibali <rn...@gmail.com> on 2015/06/28 19:14:07 UTC

Fwd: Migrate form field entries from one pdf to another

Hi

I'm working on a project that involves the migration of existing PDFs (with
filled forms) to the new template PDFs. The new templates should contain
the same fully qualified field names, so basically my naive approach was to:

1. Scan the original PDF (input PDF) and put all found PDfield entries into
a map
2. Open the empty template PDF and for each PDfield entry (the key is the
field's fqName) fill up the field accordingly
3. Save the modified template to an output PDF.

In theory this sounds great, but it's not working as I expect it to. All
PDTextbox type fields are correctly migrated in the output PDF, however the
PDCheckbox, PDRadioCollection, and PDPushButton type fields are not
migrated. Do I have to write specific code for that?

I have tried three different attempts at migrating fields and none work. I
have not found anything that helps on the usual suspects, such as google,
stackoverflow, the source code or the examples of PDFBox. I'm using PDFBox
1.8.9 and the tool is designed to be a CLI, so the other Maven dependency I
have (not that this should matter), is argparse4j. The PDFs do not contain
XFA data.

This is the code that fills up the map:

private void extractFields() throws IOException {
    // Printing and scaffolding form map
    PDDocument oldPDF = null;
    try {
        logerr("DEBUG: Opening " + inputPDF);
        oldPDF = pdfFormMigrator.loadPDFhandlingEncryption(inputPDF,
PDFPassword, true);
        @SuppressWarnings("unchecked")
        List<PDField> fields =
oldPDF.getDocumentCatalog().getAcroForm().getFields();
        for (PDField pdField : fields) {
            traverseFields(pdField);
        }
        oldPDF.close();
    } catch (Exception e) {
        logerr(e.getMessage());
    } finally {
        if (oldPDF != null) {
            oldPDF.close();
        }
    }
}

private void traverseFields(PDField field) throws IOException {
    List<COSObjectable> kids = field.getKids();
    if (kids != null) {
        for (COSObjectable pdfObj : kids) {
            if (pdfObj instanceof PDField) {
                traverseFields((PDField) pdfObj);
            }
        }
    } else {
        analyseAndPrintFields(field);
        if (!(field instanceof PDSignatureField) && field.getValue() != null) {
            //TODO: maybe field.getActions();
            PDFFormElement pdfFE = new
PDFFormElement(field.getValue(), field.getClass(),
field.getFieldFlags());
            pdfFormElement.put(field.getFullyQualifiedName(), pdfFE);
        }
    }
}

private PDDocument loadPDFhandlingEncryption(String filename, String
password, boolean removeSecurity)
        throws IOException, CryptographyException, BadSecurityHandlerException,
        ParserConfigurationException, SAXException {
    PDDocument pdDocument;
    pdDocument = PDDocument.load(filename);
    if (pdDocument.isEncrypted()) {
        StandardDecryptionMaterial sdm = new
StandardDecryptionMaterial(password);
        pdDocument.openProtection(sdm);
    }
    pdDocument.setAllSecurityToBeRemoved(removeSecurity);
    PDXFA xr = pdDocument.getDocumentCatalog().getAcroForm().getXFA();
    if (xr != null) {
        logmsg("Found XFA data:");
        logmsg(xr.getDocument().toString());
    } else {
        logmsg("No XFA data in stream");
    }
    return pdDocument;
}

Option CLONE (with two variants):

private void executeCmdClone() throws IOException {
    // Mapping the forms from the old to the new pdf
    PDDocument templatePDF = null;
    PDDocument oldPDF = null;

    try {
        logerr("DEBUG: Opening template: " + formTemplatePDF);
        templatePDF =
pdfFormMigrator.loadPDFhandlingEncryption(formTemplatePDF,
PDFPassword, true);

        logerr("DEBUG: Opening old: " + inputPDF);
        oldPDF = pdfFormMigrator.loadPDFhandlingEncryption(inputPDF,
PDFPassword, true);

        /* TODO: Does not work!!!!
        HashMap<String, PDAcroForm> pdAcroFormMap = new HashMap<>();
        @SuppressWarnings("unchecked")
        List<PDField> oldFields =
oldPDF.getDocumentCatalog().getAcroForm().getFields();
        for (PDField pdField : oldFields) {
            pdAcroFormMap.put(pdField.getFullyQualifiedName(),
pdField.getAcroForm());
            pdField.getAcroForm().exportFDF();
        }

        @SuppressWarnings("unchecked")
        List<PDField> templateFields =
oldPDF.getDocumentCatalog().getAcroForm().getFields();
        for (PDField pdField : templateFields) {
            pdField.setAcroForm(pdAcroFormMap.get(pdField.getFullyQualifiedName()));
        }*/

        FDFDocument exportFDF =
oldPDF.getDocumentCatalog().getAcroForm().exportFDF();
        templatePDF.getDocumentCatalog().getAcroForm().importFDF(exportFDF);
        exportFDF.close();

        templatePDF.encrypt(PDFPassword, "");
        templatePDF.save(outputPDF);
        templatePDF.close();
        oldPDF.close();
    } catch (Exception e) {
        logerr(e.getMessage());
    } finally {
        if (templatePDF != null) {
            templatePDF.close();
        }
        if (oldPDF != null) {
            oldPDF.close();
        }
    }
}

Option MIGRATE:

private void executeCmdMigrate() throws IOException {
    // Mapping the forms from the old to the new pdf
    PDDocument templatePDF = null;
    try {
        logerr("DEBUG: Opening " + formTemplatePDF);
        templatePDF =
pdfFormMigrator.loadPDFhandlingEncryption(formTemplatePDF,
PDFPassword, true);
        for (String keyEntry : pdfFormElement.keySet()) {
            PDFFormElement pdfFormElement =
PDFFormMigrator.pdfFormElement.get(keyEntry);
            pdfFormMigrator.setField(templatePDF, keyEntry,
                    (String) pdfFormElement.getValue(),
                    pdfFormElement.getFieldFlags());
        }
        //TODO: Figure out how to map access permissions for the user
that currently opened the PDF
        //templatePDF.protect(pdfFormMigrator.applyDefaultProtection());
        //templatePDF.protect(pdfFormMigrator.applyProtection(accessPermissions));
        templatePDF.encrypt(PDFPassword, "");
        templatePDF.save(outputPDF);
        templatePDF.close();
    } catch (Exception e) {
        logerr(e.getMessage());
    } finally {
        if (templatePDF != null) {
            templatePDF.close();
        }
    }
}

public void setField(PDDocument pdfDocument, String name, String
value, int flags) throws IOException {
    PDDocumentCatalog docCatalog = pdfDocument.getDocumentCatalog();
    PDAcroForm acroForm = docCatalog.getAcroForm();
    PDField field = acroForm.getField(name);
    if (field != null) {
        logmsg("Setting field: " + name + " to value: " + value + "
with flags: " + flags);
        field.setValue(value);
        if (setFieldFlags) {
            field.setFieldFlags(flags);
        }
    } else {
        logerr("No field found with name: " + name);
    }
}

Option REPLACE:

This option really is different to the others, since instead of migrating
fields, I try to apply the text changes that had been performed from the
old template to the new template. This would probably be the most elegant
way, however the output PDF looks completely unledigble after the search
and replace. The fonts are out of control and the text is all over the page
after the transformation. Here is the code:

private void executeCmdReplace() throws IOException {
    PDDocument oldPDF = null;

    try {
        logerr("DEBUG: Opening old: " + inputPDF);
        oldPDF = pdfFormMigrator.loadPDFhandlingEncryption(inputPDF,
PDFPassword, true);

        List pages = oldPDF.getDocumentCatalog().getAllPages();
        for (int i = 0; i < pages.size(); i++) {
            PDPage page = (PDPage) pages.get(i);
            PDStream contents = page.getContents();
            PDFStreamParser parser = new PDFStreamParser(contents.getStream());
            parser.parse();
            List tokens = parser.getTokens();
            for (int j = 0; j < tokens.size(); j++) {
                Object next = tokens.get(j);
                if (next instanceof PDFOperator) {
                    PDFOperator op = (PDFOperator) next;
                    if (op.getOperation().equals("Tj")) {
                        COSString previous = (COSString) tokens.get(j - 1);
                        String string = previous.getString();
                        string = string.replaceFirst("VISA", "Visa");
                        previous.reset();
                        previous.append(string.getBytes("ISO-8859-1"));
                    } else if (op.getOperation().equals("TJ")) {
                        COSArray previous = (COSArray) tokens.get(j - 1);
                        for (int k = 0; k < previous.size(); k++) {
                            Object arrElement = previous.getObject(k);
                            if (arrElement instanceof COSString) {
                                COSString cosString = (COSString) arrElement;
                                String string = cosString.getString();
                                string = string.replaceFirst("VISA", "Visa");
                                cosString.append(string.getBytes("ISO-8859-1"));
                            }
                        }
                    }
                }
            }
            // now that the tokens are updated we will replace the
page content stream.
            PDStream updatedStream = new PDStream(oldPDF);
            OutputStream out = updatedStream.createOutputStream();
            ContentStreamWriter tokenWriter = new ContentStreamWriter(out);
            tokenWriter.writeTokens(tokens);
            page.setContents(updatedStream);
        }
        oldPDF.save(outputPDF);
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        if (oldPDF != null) {
            try {
                oldPDF.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}


This is also the least desired path of solution, since the client cannot
guarantee that the changes in templates were only text-based.

Next steps on my side: I'll try to extend the migration code to deal with
those special type fields now, just to see if I can get better results.

Due to contractual NDAs, I am unable to share any PDFs, however I'll try to
come up with a possibility, if needed. I'm also open to have a private
Skype or Teamviewer session with a person knowledgeable about this.
Unfortunately, the project has to be delivered by the end of the month, and
I just took over from my colleague on Friday, since he's had a family
emergency.

Any pointers would be greatly appreciated.

Thanks and best regards

Roberto

Re: Migrate form field entries from one pdf to another

Posted by Roberto Nibali <rn...@gmail.com>.

Hi Maruan


>
> > This is highly confusing. Why can Acrobat deal with those checkboxes when
> > their value is null and why can't PDFBox set Checkbox values?
> >
> > How can I simply clone all static PDF form entries of a PDF into a new
> PDF?
> > Is PDF really that complex that such a simple thing is not possible?
> Right
> > now, only text form entries are copied, the rest shows null for
> getValue().
>
> the reason that getValue() returns null is that there is no value entry
> set for the filled out form field (this is held in the field dictionaries
> /V entry). But isChecked() returns true as the checkbox has been checked.
> This is bases on the appearance state of the checkbox.
>

I see; slowly I'm seeing the gist here. PDF truly is a tricky format and it
hides it so well from the everyday users through the "Acrobat" tools.


> To give you a quick explanation of that. When a form field is filled out
> the value of the form field has to be filled. But that won't give you any
> visual information. To add the visual information the form field has a
> annotation assigned to it which will have whats's called an appearance. The
> appearance is what's visible on screen or when the pdf is being printed.
>

Understood, albeit from a first notion point of view, this seems an overly
complex architecture. I'm sure there must be reasons for this. Thanks to
your explanantion I finally start to see the bigger picture.


> Normally an application set the value AND the appearance when the form
> field is filled. In you case the form filling application hasn't set the
> field value (that's why getValue() return null) and ONLY updated the
> appearance.
>

One of the applications used is the notoriously bad choice of InDesign to
create form fields, the test PDFs I created using the Adobe Acrobat Pro
tool for Mac, which I downloaded for a one month evaluation period. I used
the original PDFs and stripped out everything that would otherwise have
identified the origins of the PDF and removed all entries but a few test
form fields. Then I exchanged the partial fonts for the fields with some
available ones (I believe it was Garamond). Reading through Tilman's
replies, I learned that this also lead to issues with regard to font
handling.


> So to transfer the value from you original form to the new template you
> have to
>
> a) see if getValue() return anything but null. If that is the case use
> setValue() with the value provided by getValue() to fill out the
> corresponding field in your template
> b) if getValue() is null check using isChecked() if the checkbox has been
> checked. If this is the case use check() to check the checkbox
>

I thought that that's what I did after your last suggestion (where you
wrote exactly those two lines as well), however I have the distinct feeling
that I did something else wrong. Tilman Hausherr kindly provided me with
some test code that seems to work for the test PDF cases I provided. I have
already spotted one basic mistake in my code after quickly glancing over
his. The notion to clone fields from one PDF to another seems to involve
the instantiation of a new PDField object in the template PDF. I had
assumed that assigning the values of the fields read from the originating
PDF to the template PDF would be enough. Never would it have occurred to me
that one needs to instantiate a new PDField object.

Anyway, I'll rewrite my code again to incorporate all this new knowledge
and update to a SNAPSHOT version of PDFBox. Unfortunately, I have no idea
how to use automatic references in Maven so the newest SVN trunk state is
checked out and a JAR is generated as a reference. If the project were done
using git, one could use the https://jitpack.io/ add-on for Maven.

In fact, the canonical source SCM is SVN at apache.org:
https://svn.apache.org/repos/asf/pdfbox/
There is a copy/sync at github: https://github.com/apache/pdfbox, however
it only syncs the old 1.8.9 tree, not the current 2.0.0 snapshot tree.

I suppose that using the latest SNAPSHOT of PDFBox and all dependencies
should suffice for my test case.

We have done some changes to how checkboxes and radio buttons are handled
> in PDFBox 2.0 within the last dates (to make it easier to work with them)
> so please use the latests snapshot version of PDFBox.
>
> There will be an issue with the test template when you set the Name and
> Prename field as the field definition is incomplete (the font resource is
> missing) which will lead to an exception
>
> java.io.IOException: Could not find font: /Courier
>

That's because I probably just didn't know what I was doing when stripping
down the original PDF to provide you guys with a test case. I will
certainly try the new code with the real PDFs and report back as soon as
things progressed.


> The easiest would be to correct the template. If that's not possible we
> could help you building a short workaround. But as the template you
> provided was only a quick mock up and not the real one the final template
> might not have the issue.
>
> If you need further assistance please let us know.
>

Thanks so much!!!

Best regards

Roberto

Re: Migrate form field entries from one pdf to another

Posted by Tilman Hausherr <TH...@t-online.de>.

Am 07.07.2015 um 13:20 schrieb Roberto Nibali:
> Hi
>
> On Mon, Jul 6, 2015 at 10:14 PM, Tilman Hausherr <TH...@t-online.de>
> wrote:
>
>> Am 06.07.2015 um 18:15 schrieb Maruan Sahyoun:
>>
>>> There will be an issue with the test template when you set the Name and
>>> Prename field as the field definition is incomplete (the font resource is
>>> missing) which will lead to an exception
>>>
>>> java.io.IOException: Could not find font: /Courier
>>>
>>> The easiest would be to correct the template. If that's not possible we
>>> could help you building a short workaround. But as the template you
>>> provided was only a quick mock up and not the real one the final template
>>> might not have the issue.
>>>
> I have managed to write a TestNG class using the newest
> PDFBox-2.0.0-SNAPSHOT jar und include your source code. Now, I'm getting
> this error above as well. So far so good!
>
>
>>   I just tried a quick and dirty solution, I changed
>> PDAppearanceString.getFont(), the "if (font == null)" segment is new:
>>
>>      public PDFont getFont() throws IOException
>>      {
>>          COSName name = getFontResourceName();
>>          PDFont font = defaultResources.getFont(name);
>>
>>          if (font == null)
>>          {
>>              if ("Courier".equals(name.getName()))
>>              {
>>                  COSDictionary dict = new COSDictionary();
>>                  dict.setName(COSName.BASE_FONT, "Courier");
>>                  dict.setName(COSName.NAME, "Courier");
>>                  dict.setName(COSName.SUBTYPE, "Type1");
>>                  dict.setName(COSName.TYPE, "Font");
>>
>>                  font = PDFontFactory.createFont(dict);
>>              }
>>          }
>>
>>          // todo: handle cases where font == null with special mapping
>> logic (see PDFBOX-2661)
>>          if (font == null)
>>          {
>>              throw new IOException("Could not find font: /" +
>> name.getName());
>>          }
>>
>>          return font;
>>      }
>>
> Where would I need to do this? In my code, it won't work, since
> getFontResourceName() and defaultResources.getFont() or not known.
> @Override also does not seem to work. With this I'm stuck at the moment.

This was meant to be a change in PDFBox itself, I assumed you were 
building from source.

But in the meantime you wrote me that you don't get the font problem 
with your production PDF, so the issue is moot.

Tilman

>
>
>> Now I was able to set the text fields
>> (Roberto: in the file I sent to you earlier, uncomment
>> "newTextField.setValue(textField.getValue());")
>>
> I did, however this prompts me with the above error, since I can't find a
> place where to put the getFont() code. When commented, I can reproduce your
> results and the resulting PDF does have the checkboxes set!!!! :)
>
> Will keep trying ...
>
> Thanks for the tremendous help.
>


---------------------------------------------------------------------
To unsubscribe, e-mail: users-unsubscribe@pdfbox.apache.org
For additional commands, e-mail: users-help@pdfbox.apache.org

Re: Migrate form field entries from one pdf to another

Posted by Roberto Nibali <rn...@gmail.com>.

Hi

On Mon, Jul 6, 2015 at 10:14 PM, Tilman Hausherr <TH...@t-online.de>
wrote:

> Am 06.07.2015 um 18:15 schrieb Maruan Sahyoun:
>
>> There will be an issue with the test template when you set the Name and
>> Prename field as the field definition is incomplete (the font resource is
>> missing) which will lead to an exception
>>
>> java.io.IOException: Could not find font: /Courier
>>
>> The easiest would be to correct the template. If that's not possible we
>> could help you building a short workaround. But as the template you
>> provided was only a quick mock up and not the real one the final template
>> might not have the issue.
>>
>

I have managed to write a TestNG class using the newest
PDFBox-2.0.0-SNAPSHOT jar und include your source code. Now, I'm getting
this error above as well. So far so good!


>  I just tried a quick and dirty solution, I changed
> PDAppearanceString.getFont(), the "if (font == null)" segment is new:
>
>     public PDFont getFont() throws IOException
>     {
>         COSName name = getFontResourceName();
>         PDFont font = defaultResources.getFont(name);
>
>         if (font == null)
>         {
>             if ("Courier".equals(name.getName()))
>             {
>                 COSDictionary dict = new COSDictionary();
>                 dict.setName(COSName.BASE_FONT, "Courier");
>                 dict.setName(COSName.NAME, "Courier");
>                 dict.setName(COSName.SUBTYPE, "Type1");
>                 dict.setName(COSName.TYPE, "Font");
>
>                 font = PDFontFactory.createFont(dict);
>             }
>         }
>
>         // todo: handle cases where font == null with special mapping
> logic (see PDFBOX-2661)
>         if (font == null)
>         {
>             throw new IOException("Could not find font: /" +
> name.getName());
>         }
>
>         return font;
>     }
>

Where would I need to do this? In my code, it won't work, since
getFontResourceName() and defaultResources.getFont() or not known.
@Override also does not seem to work. With this I'm stuck at the moment.


> Now I was able to set the text fields
> (Roberto: in the file I sent to you earlier, uncomment
> "newTextField.setValue(textField.getValue());")
>

I did, however this prompts me with the above error, since I can't find a
place where to put the getFont() code. When commented, I can reproduce your
results and the resulting PDF does have the checkboxes set!!!! :)

Will keep trying ...

Thanks for the tremendous help.

Re: Migrate form field entries from one pdf to another

Posted by Tilman Hausherr <TH...@t-online.de>.

Am 06.07.2015 um 18:15 schrieb Maruan Sahyoun:
> There will be an issue with the test template when you set the Name and Prename field as the field definition is incomplete (the font resource is missing) which will lead to an exception
>
> java.io.IOException: Could not find font: /Courier
>
> The easiest would be to correct the template. If that's not possible we could help you building a short workaround. But as the template you provided was only a quick mock up and not the real one the final template might not have the issue.

I just tried a quick and dirty solution, I changed 
PDAppearanceString.getFont(), the "if (font == null)" segment is new:

     public PDFont getFont() throws IOException
     {
         COSName name = getFontResourceName();
         PDFont font = defaultResources.getFont(name);

         if (font == null)
         {
             if ("Courier".equals(name.getName()))
             {
                 COSDictionary dict = new COSDictionary();
                 dict.setName(COSName.BASE_FONT, "Courier");
                 dict.setName(COSName.NAME, "Courier");
                 dict.setName(COSName.SUBTYPE, "Type1");
                 dict.setName(COSName.TYPE, "Font");

                 font = PDFontFactory.createFont(dict);
             }
         }

         // todo: handle cases where font == null with special mapping 
logic (see PDFBOX-2661)
         if (font == null)
         {
             throw new IOException("Could not find font: /" + 
name.getName());
         }

         return font;
     }


Now I was able to set the text fields
(Roberto: in the file I sent to you earlier, uncomment 
"newTextField.setValue(textField.getValue());")

However the resources have two identical fonts now, /F2 and /Courier.

Tilman

---------------------------------------------------------------------
To unsubscribe, e-mail: users-unsubscribe@pdfbox.apache.org
For additional commands, e-mail: users-help@pdfbox.apache.org

Re: Migrate form field entries from one pdf to another

Posted by Maruan Sahyoun <sa...@fileaffairs.de>.

Hi,

<snip>
….
</snip>

> 
> This is highly confusing. Why can Acrobat deal with those checkboxes when
> their value is null and why can't PDFBox set Checkbox values?
> 
> How can I simply clone all static PDF form entries of a PDF into a new PDF?
> Is PDF really that complex that such a simple thing is not possible? Right
> now, only text form entries are copied, the rest shows null for getValue().

the reason that getValue() returns null is that there is no value entry set for the filled out form field (this is held in the field dictionaries /V entry). But isChecked() returns true as the checkbox has been checked. This is bases on the appearance state of the checkbox.

To give you a quick explanation of that. When a form field is filled out the value of the form field has to be filled. But that won't give you any visual information. To add the visual information the form field has a annotation assigned to it which will have whats's called an appearance. The appearance is what's visible on screen or when the pdf is being printed.

Normally an application set the value AND the appearance when the form field is filled. In you case the form filling application hasn't set the field value (that's why getValue() return null) and ONLY updated the appearance.

So to transfer the value from you original form to the new template you have to 

a) see if getValue() return anything but null. If that is the case use setValue() with the value provided by getValue() to fill out the corresponding field in your template
b) if getValue() is null check using isChecked() if the checkbox has been checked. If this is the case use check() to check the checkbox

We have done some changes to how checkboxes and radio buttons are handled in PDFBox 2.0 within the last dates (to make it easier to work with them) so please use the latests snapshot version of PDFBox.

There will be an issue with the test template when you set the Name and Prename field as the field definition is incomplete (the font resource is missing) which will lead to an exception

java.io.IOException: Could not find font: /Courier

The easiest would be to correct the template. If that's not possible we could help you building a short workaround. But as the template you provided was only a quick mock up and not the real one the final template might not have the issue.

If you need further assistance please let us know.

BR
Maruan



> 
> Cheers
> Roberto


---------------------------------------------------------------------
To unsubscribe, e-mail: users-unsubscribe@pdfbox.apache.org
For additional commands, e-mail: users-help@pdfbox.apache.org

Re: Migrate form field entries from one pdf to another

Posted by Roberto Nibali <rn...@gmail.com>.

Hi

Sorry for the late reply, I was travelling for business.

> I did a quick test with a newly created form using Adobe Acrobat and
> setting the checkbox also with Acrobat. There the value is not null when
> the checkbox has been checked.
> >
> > I have attached now PDFs, where with my tool the value is null.
>
>
> unfortunately the attachments didn't make it through the mailing list.
> Could you upload them to a public location?
>

It took me a while, before I realized that google offers the technology I
need:

https://drive.google.com/file/d/0B7Bzk_1dcyc5SmRpQUJPR3JGUkk/view?usp=sharing
https://drive.google.com/file/d/0B7Bzk_1dcyc5Tk1qcVo2Yk02dTA/view?usp=sharing

Those are the files I tried to send to this mailing list earlier.


> >
> > > How could I deal with this? Because this is exactly what seems to fail
> and
> > > also cause this dreaded exception message when trying to fill out the
> forms
> > > with anything other than PDTextbox.
> >
> > Without looking at the form:
> >
> > a) test if getValue returns null if not take that value
> > b) if it returns null test if the box has been checked - if yes take
> that value.
> >
> > Which value?
> >
> > use the value retrieved from a) or b) to set the fields value in the pdf
> template.
> >
> > I'm not sure which value you mean.
> >
> > What would be helpful is either a screenshot of the form fields entries
> using the PDFDebugger [
> http://pdfbox.apache.org/1.8/commandline.html#pdfDebugger <
> http://pdfbox.apache.org/1.8/commandline.html#pdfDebugger>] or the
> printout of the fields getDictionary() method so there is some more
> information about how the field definition looks lie. Best would be to have
> the form of course.
>
>
I haven't found the pdfDebugger tool yet; reckon I need to compile it
myself. Nevertheless, when I parse through the structure myself, I do not
get any dictionary entries:

DEBUG: Opening ./Test.pdf
No XFA data in stream
DEBUG: Checkbox [01.20.Entry1]:  On=1 Off=Off Checked=true Value=1
DEBUG: Checkbox [01.20.Entry2]:  On=1 Off=Off Checked=true Value=1
DEBUG: Checkbox [01.20.Entry3]:  On=1 Off=Off Checked=false Value=_n/a_
DEBUG: Checkbox [01.20.Entry4]:  On=2 Off=Off Checked=false Value=_n/a_
DEBUG: TextButton [01.011.Name]: Value=sddsds
DEBUG: TextButton [01.011.Prename]: Value=sdsdsd
DEBUG: Checkbox [01.011.Language]:  On=0 Off=Off Checked=false Value=_n/a_
DEBUG: Checkbox [01.011.Boxes]:  On=Mrs Off=Off Checked=true Value=_n/a_
DEBUG: Opening ./TestTemplate.pdf
No XFA data in stream
Setting CheckBox field: 01.011.Boxes to value: null
Dumping Checkbox field dictionary [01.011.Boxes] ----------
COSDictionary{}
----------------------------------------------------------------------------
Setting CheckBox field: 01.20.Entry4 to value: null
Dumping Checkbox field dictionary [01.20.Entry4] ----------
COSDictionary{}
----------------------------------------------------------------------------
Setting CheckBox field: 01.20.Entry3 to value: null
Dumping Checkbox field dictionary [01.20.Entry3] ----------
COSDictionary{}
----------------------------------------------------------------------------
Setting CheckBox field: 01.20.Entry2 to value: null
Dumping Checkbox field dictionary [01.20.Entry2] ----------
COSDictionary{}
----------------------------------------------------------------------------
Setting CheckBox field: 01.011.Language to value: null
Dumping Checkbox field dictionary [01.011.Language] ----------
COSDictionary{}
----------------------------------------------------------------------------
Setting CheckBox field: 01.20.Entry1 to value: null
Dumping Checkbox field dictionary [01.20.Entry1] ----------
COSDictionary{}

The relevant DEBUG code:

private void analyseAndPrintFields(PDField field) throws IOException {
    String fqName = field.getFullyQualifiedName();
    String value = (field.getValue() != null ? field.getValue() : "_n/a_");

    if (field instanceof PDCheckbox) {
        PDCheckbox checkbox = (PDCheckbox) field;
        logerr("DEBUG: Checkbox [" + fqName + "]:  On=" +
checkbox.getOnValue() +
                " Off=" + checkbox.getOffValue() +
                " Checked=" + (checkbox.isChecked() ? "true" : "false") +
                " Value=" + value);
        //TODO: Check if widgets handling is necessary: checkbox.getWidget();
    } else if (field instanceof PDRadioCollection) {
        PDRadioCollection collection = (PDRadioCollection) field;
        logerr("DEBUG: RadioButtons [" + fqName + "]: " +
                "CollectionValue=" + collection.getValue() +
                " Value=" + value);
    } else if (field instanceof PDPushButton) {
        PDPushButton button = (PDPushButton) field;
        logerr("DEBUG: Pushbuttons [" + fqName + "]: " +
                        "Export/Readonly/Required=" +
                        button.isNoExport() + "/" +
                        button.isReadonly() + "/" +
                        button.isRequired() +
                        " Value=" + value
        );
    } else if (field instanceof PDTextbox) {
        logerr("DEBUG: TextButton [" + fqName + "]: " +
                "Value=" + value);
    } else {
        logerr("DEBUG: Unhandled [" + fqName + "]: " +
                "Type=" + field.getClass().toString());
    }
}



And the dumping code:

    private void setFieldDC(PDDocument pdfDocument, String keyEntry,
PDField oldField) throws Exception {
        PDDocumentCatalog docCatalog = pdfDocument.getDocumentCatalog();
        PDAcroForm pdAcroForm = docCatalog.getAcroForm();
        //TODO: Check if this makes sense: pdAcroForm.setCacheFields(true);
        PDField field = pdAcroForm.getField(keyEntry);

        if (field == null) {
            logerr("No field found with name: " + keyEntry);
            return;
        }

        String fieldValue;
        if (oldField instanceof PDTextbox) {
            fieldValue = oldField.getValue();
            if (fieldValue != null) {
                logmsg("Setting field: " + keyEntry + " to value: " +
fieldValue);
                field.setValue(fieldValue);
                if (setFieldFlags) {
                    field.setFieldFlags(oldField.getFieldFlags());
                }
            }
        } else if (oldField instanceof PDCheckbox) {
            fieldValue = oldField.getValue();
            logmsg("Setting CheckBox field: " + keyEntry + " to value:
" + fieldValue);
            if (fieldValue != null) {
                logmsg("Setting field: " + keyEntry + " to value: " +
fieldValue);
                field.setValue(fieldValue);
                if (setFieldFlags) {
                    field.setFieldFlags(oldField.getFieldFlags());
                }
            } else {
                logerr("Dumping Checkbox field dictionary [" +
keyEntry + "] ----------");
                logerr(oldField.getDictionary().toString());

logerr("----------------------------------------------------------------------------");
            }

/*            PDCheckbox oldCheckBox = (PDCheckbox) oldField;
            PDCheckbox newCheckBox = (PDCheckbox) field;

            if (oldCheckBox == null) {
                logerr("oldCheckBox is NULL");
            } else if (newCheckBox == null) {
                logerr("newCheckBox is NULL");
            }

            if (oldCheckBox.isChecked()) {
                logerr("DEBUG: >>>>> PDCheckBox [" + keyEntry + "]
wasChecked = YES");
                newCheckBox.check();
            } else {
                logerr("DEBUG: >>>>> PDCheckBox [" + keyEntry + "]
wasChecked = NO");
                newCheckBox.unCheck();
            }*/
        } else if (oldField instanceof PDChoiceField) {
            fieldValue = oldField.getValue();
            if (fieldValue != null) {
                field.setValue(fieldValue);
                if (setFieldFlags) {
                    field.setFieldFlags(oldField.getFieldFlags());
                }
            } else {
                logerr("Dumping PDChoiceField field dictionary [" +
keyEntry + "] ----------");
                logerr(oldField.getDictionary().toString());

logerr("----------------------------------------------------------------------------");
            }
        } else if (oldField instanceof PDRadioCollection) {
            fieldValue = oldField.getValue();
            if (fieldValue != null) {
                field.setValue(fieldValue);
                if (setFieldFlags) {
                    field.setFieldFlags(oldField.getFieldFlags());
                }
            } else {
                logerr("Dumping PDRadioCollection field dictionary ["
+ keyEntry + "] ----------");
                logerr(oldField.getDictionary().toString());

logerr("----------------------------------------------------------------------------");
            }
        } else if (oldField instanceof PDPushButton) {
            fieldValue = oldField.getValue();
            if (fieldValue != null) {
                field.setValue(fieldValue);
                if (setFieldFlags) {
                    field.setFieldFlags(oldField.getFieldFlags());
                }
            } else {
                logerr("Dumping PDPushButton field dictionary [" +
keyEntry + "] ----------");
                logerr(oldField.getDictionary().toString());

logerr("----------------------------------------------------------------------------");
            }
        }  else {
            logerr("Fields of type [" + oldField.getClass().toString()
+ "] are unsupported");
        }
    }


This is highly confusing. Why can Acrobat deal with those checkboxes when
their value is null and why can't PDFBox set Checkbox values?

How can I simply clone all static PDF form entries of a PDF into a new PDF?
Is PDF really that complex that such a simple thing is not possible? Right
now, only text form entries are copied, the rest shows null for getValue().

Cheers
Roberto

Re: Migrate form field entries from one pdf to another

Posted by Maruan Sahyoun <sa...@fileaffairs.de>.

Hi,

> Am 30.06.2015 um 10:21 schrieb Roberto Nibali <rn...@gmail.com>:
> 
> Hello
> 
> Thanks for the answers. I have attached two PDFs by hopefully removing all traces that could violate the NDA, which I can't get to work.
> 
> >> Normally when a checkbox is checked the fields value AND the appearance
> >> need to be updated. This is not the case with your form. How was the value
> >> of the original form set? Which software?
> >>
> >
> > The value set in the form is by simply clicking it in Adobe Acrobat
> > (Reader). It sets the field accordingly, nevertheless for a set field,
> > getValue() still returns null. The forms themselves were created with
> > InDesign, I think. Not sure though. So, you're kind of implying that
> > independent of the field type (Textbox, PrintButton, Checkbox,
> > Radiobutton), PDFBox would normally expect the value of a field to be !null?
> 
> I did a quick test with a newly created form using Adobe Acrobat and setting the checkbox also with Acrobat. There the value is not null when the checkbox has been checked.
> 
> I have attached now PDFs, where with my tool the value is null.


unfortunately the attachments didn't make it through the mailing list. Could you upload them to a public location?

BR
Maruan

> 
> > How could I deal with this? Because this is exactly what seems to fail and
> > also cause this dreaded exception message when trying to fill out the forms
> > with anything other than PDTextbox.
> 
> Without looking at the form:
> 
> a) test if getValue returns null if not take that value
> b) if it returns null test if the box has been checked - if yes take that value.
> 
> Which value?
>  
> use the value retrieved from a) or b) to set the fields value in the pdf template.
> 
> I'm not sure which value you mean.
>  
> What would be helpful is either a screenshot of the form fields entries using the PDFDebugger [http://pdfbox.apache.org/1.8/commandline.html#pdfDebugger <http://pdfbox.apache.org/1.8/commandline.html#pdfDebugger>] or the printout of the fields getDictionary() method so there is some more information about how the field definition looks lie. Best would be to have the form of course.
> 
> Attached. Also you can find the code here: http://pastebin.com/Ra0ebSis <http://pastebin.com/Ra0ebSis>. Let me know if something else is needed.
> 
> The idea is that you call it with the template TestTemplate.pdf and the input PDF Test.pdf, resulting in an output PDF that is a carbon-copy of the input PDF, but with the template PDF CI/CD (in my test case they are identical, but it serves the purpose).
>  
> >
> > Would anybody here be willing to have a Skype or Teamviewer session with me
> > tonight or tomorrow night to have a look at the problem? We're willing to
> > pay for such support. If so, please send a private message to me, so we
> > don't clutter this mailing list.
> 
> I'm currently on travel so won't be able to do that
> 
> Thanks, anybody else?
> 
> Cheers
> Roberto
> 
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: users-unsubscribe@pdfbox.apache.org
> For additional commands, e-mail: users-help@pdfbox.apache.org

Re: Migrate form field entries from one pdf to another

Posted by Roberto Nibali <rn...@gmail.com>.

Hello

Thanks for the answers. I have attached two PDFs by hopefully removing all
traces that could violate the NDA, which I can't get to work.

>> Normally when a checkbox is checked the fields value AND the appearance
> >> need to be updated. This is not the case with your form. How was the
> value
> >> of the original form set? Which software?
> >>
> >
> > The value set in the form is by simply clicking it in Adobe Acrobat
> > (Reader). It sets the field accordingly, nevertheless for a set field,
> > getValue() still returns null. The forms themselves were created with
> > InDesign, I think. Not sure though. So, you're kind of implying that
> > independent of the field type (Textbox, PrintButton, Checkbox,
> > Radiobutton), PDFBox would normally expect the value of a field to be
> !null?
>
> I did a quick test with a newly created form using Adobe Acrobat and
> setting the checkbox also with Acrobat. There the value is not null when
> the checkbox has been checked.
>

I have attached now PDFs, where with my tool the value is null.

> How could I deal with this? Because this is exactly what seems to fail and
> > also cause this dreaded exception message when trying to fill out the
> forms
> > with anything other than PDTextbox.
>
> Without looking at the form:
>
> a) test if getValue returns null if not take that value
> b) if it returns null test if the box has been checked - if yes take that
> value.
>

Which value?


> use the value retrieved from a) or b) to set the fields value in the pdf
> template.
>

I'm not sure which value you mean.


> What would be helpful is either a screenshot of the form fields entries
> using the PDFDebugger [
> http://pdfbox.apache.org/1.8/commandline.html#pdfDebugger] or the
> printout of the fields getDictionary() method so there is some more
> information about how the field definition looks lie. Best would be to have
> the form of course.
>

Attached. Also you can find the code here: http://pastebin.com/Ra0ebSis.
Let me know if something else is needed.

The idea is that you call it with the template TestTemplate.pdf and the
input PDF Test.pdf, resulting in an output PDF that is a carbon-copy of the
input PDF, but with the template PDF CI/CD (in my test case they are
identical, but it serves the purpose).


> >
> > Would anybody here be willing to have a Skype or Teamviewer session with
> me
> > tonight or tomorrow night to have a look at the problem? We're willing to
> > pay for such support. If so, please send a private message to me, so we
> > don't clutter this mailing list.
>
> I'm currently on travel so won't be able to do that


Thanks, anybody else?

Cheers
Roberto

Re: Migrate form field entries from one pdf to another

Posted by Maruan Sahyoun <sa...@fileaffairs.de>.

Hi,

<snip>
>>> 
>>> 
>> 
>> 
>> getValue and getOnValue/getOffValue return different settings of the
>> field. getValue works on the field dictionaries 'V' entry whereas
>> getOnValue/getOffValue look at the fields appearance settings.
>> 
> 
> This explains the code in PDFBox, thanks. Unfortunately, within two days, I
> suspect I can't really understand the PDF standard to the degree necessary
> to pose intelligent questions.

what you are finding is that different PDF software might work differently. Some set the value some don't. Some need it some don't.

> 
> 
>> Normally when a checkbox is checked the fields value AND the appearance
>> need to be updated. This is not the case with your form. How was the value
>> of the original form set? Which software?
>> 
> 
> The value set in the form is by simply clicking it in Adobe Acrobat
> (Reader). It sets the field accordingly, nevertheless for a set field,
> getValue() still returns null. The forms themselves were created with
> InDesign, I think. Not sure though. So, you're kind of implying that
> independent of the field type (Textbox, PrintButton, Checkbox,
> Radiobutton), PDFBox would normally expect the value of a field to be !null?

I did a quick test with a newly created form using Adobe Acrobat and setting the checkbox also with Acrobat. There the value is not null when the checkbox has been checked.


> 
> How could I deal with this? Because this is exactly what seems to fail and
> also cause this dreaded exception message when trying to fill out the forms
> with anything other than PDTextbox.

Without looking at the form:

a) test if getValue returns null if not take that value
b) if it returns null test if the box has been checked - if yes take that value.

use the value retrieved from a) or b) to set the fields value in the pdf template.

What would be helpful is either a screenshot of the form fields entries using the PDFDebugger [http://pdfbox.apache.org/1.8/commandline.html#pdfDebugger] or the printout of the fields getDictionary() method so there is some more information about how the field definition looks lie. Best would be to have the form of course.


> 
> Would anybody here be willing to have a Skype or Teamviewer session with me
> tonight or tomorrow night to have a look at the problem? We're willing to
> pay for such support. If so, please send a private message to me, so we
> don't clutter this mailing list.

I'm currently on travel so won't be able to do that.

> 
> Best regards
> Roberto
> 
> ps.: I didn't expect PDF to be so intricately complex ... it hides well
> behind the tools which just seem to always work in conjunction with PDF.


---------------------------------------------------------------------
To unsubscribe, e-mail: users-unsubscribe@pdfbox.apache.org
For additional commands, e-mail: users-help@pdfbox.apache.org

Re: Migrate form field entries from one pdf to another

Posted by Roberto Nibali <rn...@gmail.com>.

G'day

On Mon, Jun 29, 2015 at 7:37 AM, Maruan Sahyoun <sa...@fileaffairs.de>
wrote:

> Hi,
>
> > Am 28.06.2015 um 22:18 schrieb Roberto Nibali <rn...@gmail.com>:
> >
> > Hi
> >
> >
> >                else if (f instanceof PDCheckbox)
> >>                {
> >>                    fieldValue = fieldValues.get(fieldName);
> >>
> >
> > How is this supposed to work? The value one gets via getValue() from the
> > PDField is always null for PDCheckboxes.
> >
> >
> >>                    if (("TRUE".equalsIgnoreCase(fieldValue))
> >>                            || ("CHECKED".equalsIgnoreCase(fieldValue))
> >>                            || ("YES".equalsIgnoreCase(fieldValue)))
> >>                    {
> >>                        ((PDCheckbox) f).check();
> >>                    }
> >>                    else
> >>                    {
> >>                        ((PDCheckbox) f).unCheck();
> >>                    }
> >>                }
> >>
> >
> >
> > Here is some code I use to verify the above statement:
> >
> > private void analyseAndPrintFields(PDField field) throws IOException {
> >    String fqName = field.getFullyQualifiedName();
> >    String value = (field.getValue() != null ? field.getValue() :
> "_n/a_");
> >
> >    if (field instanceof PDCheckbox) {
> >        PDCheckbox checkbox = (PDCheckbox) field;
> >        logerr("DEBUG: Checkbox [" + fqName + "]:  On=" +
> > checkbox.getOnValue() +
> >                " Off=" + checkbox.getOffValue() +
> >                " Checked=" + (checkbox.isChecked() ? "true" : "false") +
> >                " Value=" + value);
> >        //TODO: Check if widgets handling is necessary:
> checkbox.getWidget();
> >    } else if (field instanceof PDRadioCollection) {
> >        PDRadioCollection collection = (PDRadioCollection) field;
> >        logerr("DEBUG: RadioButtons [" + fqName + "]: " +
> > collection.getValue() + " Value=" + value);
> >    } else if (field instanceof PDPushButton) {
> >        PDPushButton button = (PDPushButton) field;
> >        logerr("DEBUG: Pushbuttons [" + fqName + "]: " +
> >                        "Export/Readonly/Required=" +
> >                        button.isNoExport() + "/" +
> >                        button.isReadonly() + "/" +
> >                        button.isRequired() +
> >                        " Value=" + value
> >        );
> >    }
> > }
> >
> > Running this particular code inside my loading part, reveals the
> following
> > output for any given PDF:
> >
> > DEBUG: Checkbox [01.011.hkanrede]:  On=Frau Off=Off Checked=false
> > Value=_n/a_
> > DEBUG: Checkbox [01.011.hkanrede]:  On=0 Off=Off Checked=true Value=_n/a_
> > DEBUG: Checkbox [01.011.hksprache]:  On=0 Off=Off Checked=true
> Value=_n/a_
> > DEBUG: Checkbox [01.011.hksprache]:  On=1 Off=Off Checked=false
> Value=_n/a_
> > DEBUG: Checkbox [01.011.hksprache]:  On=2 Off=Off Checked=false
> Value=_n/a_
> > DEBUG: Checkbox [01.011.hksprache]:  On=3 Off=Off Checked=false
> Value=_n/a_
> >
> > I have the distinct feeling that I am completely missing the point here.
> > The abstraction level of the PDFBox library should actually deal with
> this,
> > so the user does not have to, right? Although, you do have getOnValue()
> and
> > getOffValue() for example when it comes to PDCheckBox type fields.
>
>
> getValue and getOnValue/getOffValue return different settings of the
> field. getValue works on the field dictionaries 'V' entry whereas
> getOnValue/getOffValue look at the fields appearance settings.
>

This explains the code in PDFBox, thanks. Unfortunately, within two days, I
suspect I can't really understand the PDF standard to the degree necessary
to pose intelligent questions.


> Normally when a checkbox is checked the fields value AND the appearance
> need to be updated. This is not the case with your form. How was the value
> of the original form set? Which software?
>

The value set in the form is by simply clicking it in Adobe Acrobat
(Reader). It sets the field accordingly, nevertheless for a set field,
getValue() still returns null. The forms themselves were created with
InDesign, I think. Not sure though. So, you're kind of implying that
independent of the field type (Textbox, PrintButton, Checkbox,
Radiobutton), PDFBox would normally expect the value of a field to be !null?

How could I deal with this? Because this is exactly what seems to fail and
also cause this dreaded exception message when trying to fill out the forms
with anything other than PDTextbox.

Would anybody here be willing to have a Skype or Teamviewer session with me
tonight or tomorrow night to have a look at the problem? We're willing to
pay for such support. If so, please send a private message to me, so we
don't clutter this mailing list.

Best regards
Roberto

ps.: I didn't expect PDF to be so intricately complex ... it hides well
behind the tools which just seem to always work in conjunction with PDF.

Re: Migrate form field entries from one pdf to another

Posted by Maruan Sahyoun <sa...@fileaffairs.de>.

Hi,

> Am 28.06.2015 um 22:18 schrieb Roberto Nibali <rn...@gmail.com>:
> 
> Hi
> 
> 
>                else if (f instanceof PDCheckbox)
>>                {
>>                    fieldValue = fieldValues.get(fieldName);
>> 
> 
> How is this supposed to work? The value one gets via getValue() from the
> PDField is always null for PDCheckboxes.
> 
> 
>>                    if (("TRUE".equalsIgnoreCase(fieldValue))
>>                            || ("CHECKED".equalsIgnoreCase(fieldValue))
>>                            || ("YES".equalsIgnoreCase(fieldValue)))
>>                    {
>>                        ((PDCheckbox) f).check();
>>                    }
>>                    else
>>                    {
>>                        ((PDCheckbox) f).unCheck();
>>                    }
>>                }
>> 
> 
> 
> Here is some code I use to verify the above statement:
> 
> private void analyseAndPrintFields(PDField field) throws IOException {
>    String fqName = field.getFullyQualifiedName();
>    String value = (field.getValue() != null ? field.getValue() : "_n/a_");
> 
>    if (field instanceof PDCheckbox) {
>        PDCheckbox checkbox = (PDCheckbox) field;
>        logerr("DEBUG: Checkbox [" + fqName + "]:  On=" +
> checkbox.getOnValue() +
>                " Off=" + checkbox.getOffValue() +
>                " Checked=" + (checkbox.isChecked() ? "true" : "false") +
>                " Value=" + value);
>        //TODO: Check if widgets handling is necessary: checkbox.getWidget();
>    } else if (field instanceof PDRadioCollection) {
>        PDRadioCollection collection = (PDRadioCollection) field;
>        logerr("DEBUG: RadioButtons [" + fqName + "]: " +
> collection.getValue() + " Value=" + value);
>    } else if (field instanceof PDPushButton) {
>        PDPushButton button = (PDPushButton) field;
>        logerr("DEBUG: Pushbuttons [" + fqName + "]: " +
>                        "Export/Readonly/Required=" +
>                        button.isNoExport() + "/" +
>                        button.isReadonly() + "/" +
>                        button.isRequired() +
>                        " Value=" + value
>        );
>    }
> }
> 
> Running this particular code inside my loading part, reveals the following
> output for any given PDF:
> 
> DEBUG: Checkbox [01.011.hkanrede]:  On=Frau Off=Off Checked=false
> Value=_n/a_
> DEBUG: Checkbox [01.011.hkanrede]:  On=0 Off=Off Checked=true Value=_n/a_
> DEBUG: Checkbox [01.011.hksprache]:  On=0 Off=Off Checked=true Value=_n/a_
> DEBUG: Checkbox [01.011.hksprache]:  On=1 Off=Off Checked=false Value=_n/a_
> DEBUG: Checkbox [01.011.hksprache]:  On=2 Off=Off Checked=false Value=_n/a_
> DEBUG: Checkbox [01.011.hksprache]:  On=3 Off=Off Checked=false Value=_n/a_
> 
> I have the distinct feeling that I am completely missing the point here.
> The abstraction level of the PDFBox library should actually deal with this,
> so the user does not have to, right? Although, you do have getOnValue() and
> getOffValue() for example when it comes to PDCheckBox type fields.


getValue and getOnValue/getOffValue return different settings of the field. getValue works on the field dictionaries 'V' entry whereas getOnValue/getOffValue look at the fields appearance settings.

Normally when a checkbox is checked the fields value AND the appearance need to be updated. This is not the case with your form. How was the value of the original form set? Which software?

BR
Maruan


> 
> Cheers
> Roberto


---------------------------------------------------------------------
To unsubscribe, e-mail: users-unsubscribe@pdfbox.apache.org
For additional commands, e-mail: users-help@pdfbox.apache.org

Re: Migrate form field entries from one pdf to another

Posted by Maruan Sahyoun <sa...@fileaffairs.de>.

> Am 06.07.2015 um 18:13 schrieb Roberto Nibali <rn...@gmail.com>:
> 
> Hello
> 
> Thanks for helping us out with such good suggestions. My answers are below:
> 
> 
> I have no idea; but in the code I attached yesterday, it was possible to set the checkbox with check() or uncheck().
> 
> But if I understand you correctly, that method didn't work for you; the answer by Maruan suggests that the appearance must be set as well in some PDFs. (The appearance stream is a sequence of PDF operators that draws "what you see") And that is the moment where one would have to look at the PDFs themselves in an editor to see what's inside.
> 
> I looked in the jmactest.pdf file, that one has appearance streams for both states, this can be seen at   Root/AcroForm/Fields/[1]/AP/N   in the PDFDebugger. (Use the 2.0 version jar to jump directly with the path I mentioned, get it here:
> https://repository.apache.org/content/groups/snapshots/org/apache/pdfbox/pdfbox-app/2.0.0-SNAPSHOT/ <https://repository.apache.org/content/groups/snapshots/org/apache/pdfbox/pdfbox-app/2.0.0-SNAPSHOT/> 
> 
> 
> Thanks for this super link. I was a bit lost for a moment. The output of the two PDFs (one template and a filled out one) using the PDFDebugger is quite extensive. How shall I best display it to you guys? The PDFs are here:
> 
> https://drive.google.com/file/d/0B7Bzk_1dcyc5SmRpQUJPR3JGUkk/view?usp=sharing <https://drive.google.com/file/d/0B7Bzk_1dcyc5SmRpQUJPR3JGUkk/view?usp=sharing>
> https://drive.google.com/file/d/0B7Bzk_1dcyc5Tk1qcVo2Yk02dTA/view?usp=sharing <https://drive.google.com/file/d/0B7Bzk_1dcyc5Tk1qcVo2Yk02dTA/view?usp=sharing>


Hi,

with Test-3.pdf and the following code


        PDDocument doc = PDDocument.load(new File("Test-3.pdf"));
        PDAcroForm acroForm = doc.getDocumentCatalog().getAcroForm();
        PDCheckbox field =  (PDCheckbox) acroForm.getField("01.20.Entry1");
        System.out.println(field.getValue());
        System.out.println(field.isChecked());

gives this output

1
true

which looks fine to me.

BR
Maruan


> 
> Using the SNAPSHOT from 2015/07/04 (java -jar ../pdfbox-app-2.0.0-20150704.091928-1476.jar PDFDebugger Test.pdf), I get a stackdump when clicking on the following individual node entry:
> 
> Root/AcroForm/Fields/[0]/Kids/[0]/Kids/[0]/P
> 
> The trace is as follows:
> 
> 
> 
>  
> So you would have to see what's in the file that you generate, or what's in the template you use.
> 
> I do not understand this output at the moment. What do I have to look for?
>  
> Re skype etc, I don't even have skype or a headset, and I'm not really the best choice anyway. And sooner or later one would have to look at the PDF too, and thus likely see the name of your contact partner, thus breaking the NDA :-(
> 
> I believe that another person looking at the PDF would be a feasible exception, but it's definitely gray area.
>  
> I see that the jmactest file is compressed so you won't see much in the editor, so I used WriteDecodedDoc utility to decompress it, and here's the relevant part for the chekcbox: (it looks less messy if you use PDFDebugger!):
> 
> 
> How do I get such an output using the above mentioned PDFDebugger?
> 
> 
> Thanks and best regards
> Roberto

Re: Fwd: Migrate form field entries from one pdf to another

Posted by Tilman Hausherr <TH...@t-online.de>.

Am 06.07.2015 um 18:13 schrieb Roberto Nibali:
>
> Using the SNAPSHOT from 2015/07/04 (java -jar 
> ../pdfbox-app-2.0.0-20150704.091928-1476.jar PDFDebugger Test.pdf), I 
> get a stackdump when clicking on the following individual node entry:
>
> Root/AcroForm/Fields/[0]/Kids/[0]/Kids/[0]/P

This doesn't crash for me, but it doesn't matter, that path is identical to
Root/Pages/Kids/[0]

Sometimes the new debugger has an exception because a page is rendered 
while another is still being rendered, i.e. there's an unsupported 
multithreading.

Tilman

---------------------------------------------------------------------
To unsubscribe, e-mail: users-unsubscribe@pdfbox.apache.org
For additional commands, e-mail: users-help@pdfbox.apache.org

Re: Fwd: Migrate form field entries from one pdf to another

Posted by Tilman Hausherr <TH...@t-online.de>.

Am 06.07.2015 um 18:13 schrieb Roberto Nibali:
>
>     I see that the jmactest file is compressed so you won't see much
>     in the editor, so I used WriteDecodedDoc utility to decompress it,
>     and here's the relevant part for the chekcbox: (it looks less
>     messy if you use PDFDebugger!):
>
>
> How do I get such an output using the above mentioned PDFDebugger?

You won't. That one is from NOTEPAD++

But this is all obsolete, as there's aparently only one problem left 
with your file, the missing font resource. In your template file, there 
is a resource missing somewhere near

Root/AcroForm/Fields/[0]/Kids/[1]/Kids/[0]

Tilman

Re: Fwd: Migrate form field entries from one pdf to another

Posted by Roberto Nibali <rn...@gmail.com>.

Hello

Thanks for helping us out with such good suggestions. My answers are below:


I have no idea; but in the code I attached yesterday, it was possible to
set the checkbox with check() or uncheck().

>
> But if I understand you correctly, that method didn't work for you; the
> answer by Maruan suggests that the appearance must be set as well in some
> PDFs. (The appearance stream is a sequence of PDF operators that draws
> "what you see") And that is the moment where one would have to look at the
> PDFs themselves in an editor to see what's inside.
>
> I looked in the jmactest.pdf file, that one has appearance streams for
> both states, this can be seen at   Root/AcroForm/Fields/[1]/AP/N   in the
> PDFDebugger. (Use the 2.0 version jar to jump directly with the path I
> mentioned, get it here:
>
> https://repository.apache.org/content/groups/snapshots/org/apache/pdfbox/pdfbox-app/2.0.0-SNAPSHOT/
>


Thanks for this super link. I was a bit lost for a moment. The output of
the two PDFs (one template and a filled out one) using the PDFDebugger is
quite extensive. How shall I best display it to you guys? The PDFs are here:

https://drive.google.com/file/d/0B7Bzk_1dcyc5SmRpQUJPR3JGUkk/view?usp=sharing
https://drive.google.com/file/d/0B7Bzk_1dcyc5Tk1qcVo2Yk02dTA/view?usp=sharing

Using the SNAPSHOT from 2015/07/04 (java -jar
../pdfbox-app-2.0.0-20150704.091928-1476.jar PDFDebugger Test.pdf), I get a
stackdump when clicking on the following individual node entry:

Root/AcroForm/Fields/[0]/Kids/[0]/Kids/[0]/P

The trace is as follows:

[image: Inline image 1]



> So you would have to see what's in the file that you generate, or what's
> in the template you use.
>

I do not understand this output at the moment. What do I have to look for?


> Re skype etc, I don't even have skype or a headset, and I'm not really the
> best choice anyway. And sooner or later one would have to look at the PDF
> too, and thus likely see the name of your contact partner, thus breaking
> the NDA :-(
>

I believe that another person looking at the PDF would be a feasible
exception, but it's definitely gray area.


> I see that the jmactest file is compressed so you won't see much in the
> editor, so I used WriteDecodedDoc utility to decompress it, and here's the
> relevant part for the chekcbox: (it looks less messy if you use
> PDFDebugger!):
>
>
How do I get such an output using the above mentioned PDFDebugger?


Thanks and best regards
Roberto

Re: Fwd: Migrate form field entries from one pdf to another

Posted by Tilman Hausherr <TH...@t-online.de>.

Am 29.06.2015 um 11:43 schrieb Roberto Nibali:
> Hello
>
>
>                   else if (f instanceof PDCheckbox)
>>>>                   {
>>>>                       fieldValue = fieldValues.get(fieldName);
>>>>
>>>>   How is this supposed to work? The value one gets via getValue() from the
>>> PDField is always null for PDCheckboxes.
>>>
>> that part is not from PDFBox, this is the map that is set in the main()
>> method, in this case "m.put("Check Box1", "true");".
>
> Yep, that part I understood, nevertheless, the original map was filled most
> probably using the .getValue() method, since the map definition is only
> <string,string>, whereas I defined my map as map<string,PDfield>, to make
> sure that at a later stage I can differentiate between the field types when
> filling up the template PDF fields.
No, in that code the map was set manually, e.g. "m.put("Check Box1", 
"true");".

>
> My question is: is a standard PDCheckbox field type supposed to have a
> getValue() != null? If not, how could I set the checkbox otherwise, using
> the PDCheckbox specific enhancements in the type class?

I have no idea; but in the code I attached yesterday, it was possible to 
set the checkbox with check() or uncheck().

But if I understand you correctly, that method didn't work for you; the 
answer by Maruan suggests that the appearance must be set as well in 
some PDFs. (The appearance stream is a sequence of PDF operators that 
draws "what you see") And that is the moment where one would have to 
look at the PDFs themselves in an editor to see what's inside.

I looked in the jmactest.pdf file, that one has appearance streams for 
both states, this can be seen at   Root/AcroForm/Fields/[1]/AP/N   in 
the PDFDebugger. (Use the 2.0 version jar to jump directly with the path 
I mentioned, get it here:
https://repository.apache.org/content/groups/snapshots/org/apache/pdfbox/pdfbox-app/2.0.0-SNAPSHOT/ 
)

So you would have to see what's in the file that you generate, or what's 
in the template you use.

Re skype etc, I don't even have skype or a headset, and I'm not really 
the best choice anyway. And sooner or later one would have to look at 
the PDF too, and thus likely see the name of your contact partner, thus 
breaking the NDA :-(

I see that the jmactest file is compressed so you won't see much in the 
editor, so I used WriteDecodedDoc utility to decompress it, and here's 
the relevant part for the chekcbox: (it looks less messy if you use 
PDFDebugger!):

23 0 obj
<<
/AP <<
/D <<
/Off 39 0 R
/Yes 40 0 R
 >>
/N <<
/Off 41 0 R
/Yes 42 0 R
 >>
 >>
/AS /Off
/F 4
/FT /Btn
/MK <<
/BC [0.0]
/BG [1.0]
/CA (4)
 >>
/P 30 0 R
/Rect [151.479 648.203 169.479 666.203]
/Subtype /Widget
/T (Check Box1)
/Type /Annot
 >>
endobj


39 0 obj
<<
/BBox [0.0 0.0 18.0 18.0]
/FormType 1
/Length 44
/Matrix [1.0 0.0 0.0 1.0 0.0 0.0]
/Resources <<
/ProcSet [/PDF]
 >>
/Subtype /Form
/Type /XObject
 >>
stream
0.75293 g
0 0 18 18 re
f
0.5 0.5 17 17 re
s

endstream
endobj
40 0 obj
<<
/BBox [0.0 0.0 18.0 18.0]
/FormType 1
/Length 124
/Matrix [1.0 0.0 0.0 1.0 0.0 0.0]
/Resources <<
/Font <<
/ZaDb 72 0 R
 >>
/ProcSet [/PDF /Text]
 >>
/Subtype /Form
/Type /XObject
 >>
stream
0.75293 g
0 0 18 18 re
f
0.5 0.5 17 17 re
s
q
1 1 16 16 re
W
n
0 g
BT
/ZaDb 14.532 Tf
2.853 4.081 Td
13.9943 TL
(4) Tj
ET
Q

endstream
endobj
41 0 obj
<<
/BBox [0.0 0.0 18.0 18.0]
/FormType 1
/Length 38
/Matrix [1.0 0.0 0.0 1.0 0.0 0.0]
/Resources <<
/ProcSet [/PDF]
 >>
/Subtype /Form
/Type /XObject
 >>
stream
1 g
0 0 18 18 re
f
0.5 0.5 17 17 re
s

endstream
endobj
42 0 obj
<<
/BBox [0.0 0.0 18.0 18.0]
/FormType 1
/Length 118
/Matrix [1.0 0.0 0.0 1.0 0.0 0.0]
/Resources <<
/Font <<
/ZaDb 72 0 R
 >>
/ProcSet [/PDF /Text]
 >>
/Subtype /Form
/Type /XObject
 >>
stream
1 g
0 0 18 18 re
f
0.5 0.5 17 17 re
s
q
1 1 16 16 re
W
n
0 g
BT
/ZaDb 14.532 Tf
2.853 4.081 Td
13.9943 TL
(4) Tj
ET
Q

endstream
endobj


Tilman


>
> Adobe Acrobat seems to handle it just fine, albeit that's an easy thing to
> say. Preview under MacOSX also cannot properly handle those PDFs, btw.
> Haven't tried any Linux tools yet, but my time is extremely limited.
>
> Best regards
> Roberto
>


---------------------------------------------------------------------
To unsubscribe, e-mail: users-unsubscribe@pdfbox.apache.org
For additional commands, e-mail: users-help@pdfbox.apache.org

Re: Fwd: Migrate form field entries from one pdf to another

Posted by Roberto Nibali <rn...@gmail.com>.

Hello


                 else if (f instanceof PDCheckbox)
>>
>>>                  {
>>>                      fieldValue = fieldValues.get(fieldName);
>>>
>>>  How is this supposed to work? The value one gets via getValue() from the
>> PDField is always null for PDCheckboxes.
>>
> that part is not from PDFBox, this is the map that is set in the main()
> method, in this case "m.put("Check Box1", "true");".


Yep, that part I understood, nevertheless, the original map was filled most
probably using the .getValue() method, since the map definition is only
<string,string>, whereas I defined my map as map<string,PDfield>, to make
sure that at a later stage I can differentiate between the field types when
filling up the template PDF fields.

My question is: is a standard PDCheckbox field type supposed to have a
getValue() != null? If not, how could I set the checkbox otherwise, using
the PDCheckbox specific enhancements in the type class?

Adobe Acrobat seems to handle it just fine, albeit that's an easy thing to
say. Preview under MacOSX also cannot properly handle those PDFs, btw.
Haven't tried any Linux tools yet, but my time is extremely limited.

Best regards
Roberto

Re: Fwd: Migrate form field entries from one pdf to another

Posted by Tilman Hausherr <TH...@t-online.de>.

Am 28.06.2015 um 22:18 schrieb Roberto Nibali:
> Hi
>
>
>                  else if (f instanceof PDCheckbox)
>>                  {
>>                      fieldValue = fieldValues.get(fieldName);
>>
> How is this supposed to work? The value one gets via getValue() from the
> PDField is always null for PDCheckboxes.
that part is not from PDFBox, this is the map that is set in the main() 
method, in this case "m.put("Check Box1", "true");".

Tilman

---------------------------------------------------------------------
To unsubscribe, e-mail: users-unsubscribe@pdfbox.apache.org
For additional commands, e-mail: users-help@pdfbox.apache.org

Re: Fwd: Migrate form field entries from one pdf to another

Posted by Roberto Nibali <rn...@gmail.com>.

Hi


                else if (f instanceof PDCheckbox)
>                 {
>                     fieldValue = fieldValues.get(fieldName);
>

How is this supposed to work? The value one gets via getValue() from the
PDField is always null for PDCheckboxes.


>                     if (("TRUE".equalsIgnoreCase(fieldValue))
>                             || ("CHECKED".equalsIgnoreCase(fieldValue))
>                             || ("YES".equalsIgnoreCase(fieldValue)))
>                     {
>                         ((PDCheckbox) f).check();
>                     }
>                     else
>                     {
>                         ((PDCheckbox) f).unCheck();
>                     }
>                 }
>


Here is some code I use to verify the above statement:

private void analyseAndPrintFields(PDField field) throws IOException {
    String fqName = field.getFullyQualifiedName();
    String value = (field.getValue() != null ? field.getValue() : "_n/a_");

    if (field instanceof PDCheckbox) {
        PDCheckbox checkbox = (PDCheckbox) field;
        logerr("DEBUG: Checkbox [" + fqName + "]:  On=" +
checkbox.getOnValue() +
                " Off=" + checkbox.getOffValue() +
                " Checked=" + (checkbox.isChecked() ? "true" : "false") +
                " Value=" + value);
        //TODO: Check if widgets handling is necessary: checkbox.getWidget();
    } else if (field instanceof PDRadioCollection) {
        PDRadioCollection collection = (PDRadioCollection) field;
        logerr("DEBUG: RadioButtons [" + fqName + "]: " +
collection.getValue() + " Value=" + value);
    } else if (field instanceof PDPushButton) {
        PDPushButton button = (PDPushButton) field;
        logerr("DEBUG: Pushbuttons [" + fqName + "]: " +
                        "Export/Readonly/Required=" +
                        button.isNoExport() + "/" +
                        button.isReadonly() + "/" +
                        button.isRequired() +
                        " Value=" + value
        );
    }
}

Running this particular code inside my loading part, reveals the following
output for any given PDF:

DEBUG: Checkbox [01.011.hkanrede]:  On=Frau Off=Off Checked=false
Value=_n/a_
DEBUG: Checkbox [01.011.hkanrede]:  On=0 Off=Off Checked=true Value=_n/a_
DEBUG: Checkbox [01.011.hksprache]:  On=0 Off=Off Checked=true Value=_n/a_
DEBUG: Checkbox [01.011.hksprache]:  On=1 Off=Off Checked=false Value=_n/a_
DEBUG: Checkbox [01.011.hksprache]:  On=2 Off=Off Checked=false Value=_n/a_
DEBUG: Checkbox [01.011.hksprache]:  On=3 Off=Off Checked=false Value=_n/a_

I have the distinct feeling that I am completely missing the point here.
The abstraction level of the PDFBox library should actually deal with this,
so the user does not have to, right? Although, you do have getOnValue() and
getOffValue() for example when it comes to PDCheckBox type fields.

Cheers
Roberto

Re: Migrate form field entries from one pdf to another

Posted by Roberto Nibali <rn...@gmail.com>.

Hi

The template PDF has the exact same form fields (and also javascript code
behind the form events) as the source PDF. So, I'd like to open the source
PDF, get all filled out values (text, checkbox, pushbuttons, you name it)
and put it into the template PDF and store it as a new output PDF.

Background: the template PDF has been created after the firm's
organisational CI/CD change. It's not immediately clear what really has
changed in the template PDF (some text, maybe a logo and the firm's name;
hence my third approach of only altering the source PDFs to do a search and
replace), however forms remained the same.

Hope I explained myself a bit better.

Thanks and best regards
Roberto

On Sun, Jun 28, 2015 at 10:05 PM, Maruan Sahyoun <sa...@fileaffairs.de>
wrote:

> Hi,
>
> > Am 28.06.2015 um 21:51 schrieb Roberto Nibali <rn...@gmail.com>:
> >
> > Hi
> >
> > Thanks for the quick reply. This is exactly the approach I wanted to take
> > for my next option.
> >
> > On Sun, Jun 28, 2015 at 8:54 PM, Tilman Hausherr <TH...@t-online.de>
> > wrote:
> >
> >> Here's some code I have from working on
> >> https://issues.apache.org/jira/browse/PDFBOX-2249 with the file
> >> JMACTest.pdf that is in that issue. While that issue was about listbox
> >> controls, the PDF file JMACTest.pdf does also have some radio and
> checkbox
> >> elements. Of the attached code, I tested changing "Check Box1" and
> "Group1"
> >> and it worked as expected. What I can see is that PDCheckbox uses a
> >> different approach than yours, so it may be worth a try. The other ones
> use
> >> all the same approach, i.e. setValue().
> >>
> >> What I would also suggest:
> >> Take one of the things that don't work. Then open the "old" and the
> "new"
> >> PDF with an editor like NOTEPAD++, search for the field name and look
> what
> >> the differences are. (And have you verified that the template PDF does
> >> really have the same field names, or the same type?)
> >>
> >> You could of course, while waiting for our expert, try the 2.0 version
> >> (see https://pdfbox.apache.org/2.0/getting-started.html ) and create a
> >> 2nd project and try to see whether it gets better.
> >>
> >> Tilman
> >>
> >>
> >> package testpdfbox18;
> >>
> >>
> >> import java.io.File;
> >> import java.io.IOException;
> >> import java.util.HashMap;
> >> import java.util.Iterator;
> >> import java.util.List;
> >> import java.util.Map;
> >>
> >> import org.apache.pdfbox.pdmodel.PDDocument;
> >> import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
> >> import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
> >> import org.apache.pdfbox.pdmodel.interactive.form.PDCheckbox;
> >> import org.apache.pdfbox.pdmodel.interactive.form.PDChoiceField;
> >> import org.apache.pdfbox.pdmodel.interactive.form.PDField;
> >> import org.apache.pdfbox.pdmodel.interactive.form.PDRadioCollection;
> >> import org.apache.pdfbox.pdmodel.interactive.form.PDTextbox;
> >>
> >> public class AcroFormTest
> >> {
> >>
> >>    public static void fillPDFForm(String inputFile, String outputFile,
> >>            Map<String, String> fieldValues, boolean
> >> ignoreUnknownFieldTypes) throws Exception
> >>    {
> >>        File myFile = new File(inputFile);
> >>        PDDocument pdDoc;
> >>        String fieldName = null;
> >>        String fieldType;
> >>        try
> >>        {
> >>            pdDoc = PDDocument.loadNonSeq(myFile, null);
> >>
> >>            PDDocumentCatalog pdCatalog = pdDoc.getDocumentCatalog();
> >>            PDAcroForm pdAcroForm = pdCatalog.getAcroForm();
> >>            pdAcroForm.setCacheFields(true);
> >>            List<PDField> l = pdAcroForm.getFields();
> >>            Iterator<PDField> it = l.iterator();
> >>
> >>            while (it.hasNext())
> >>            {
> >>                PDField f = it.next();
> >>                fieldName = f.getFullyQualifiedName();
> >>                fieldType = f.getClass().getSimpleName();
> >>                System.out.println(fieldType);
> >>                System.out.println(f.getClass().getName());
> >>                String fieldValue;
> >>                if (f instanceof PDTextbox)
> >>                {
> >>                    fieldValue = fieldValues.get(fieldName);
> >>                    if (fieldValue != null)
> >>                    {
> >>                        f.setValue(fieldValue);
> >>                    }
> >>
> >>                } // end PDTextbox
> >>                else if (f instanceof PDCheckbox)
> >>                {
> >>                    fieldValue = fieldValues.get(fieldName);
> >>                    if (("TRUE".equalsIgnoreCase(fieldValue))
> >>                            || ("CHECKED".equalsIgnoreCase(fieldValue))
> >>                            || ("YES".equalsIgnoreCase(fieldValue)))
> >>                    {
> >>                        ((PDCheckbox) f).check();
> >>                    }
> >>                    else
> >>                    {
> >>                        ((PDCheckbox) f).unCheck();
> >>                    }
> >>                } // end PDCheckbox
> >>                else if (f instanceof PDChoiceField)
> >>                {
> >>                    fieldValue = fieldValues.get(fieldName);
> >>                    if (fieldValue != null)
> >>                    {
> >>                        f.setValue(fieldValue);
> >>                    }
> >>                } // PDChoiceField
> >>                else if (f instanceof PDRadioCollection)
> >>                {
> >>                    fieldValue = fieldValues.get(fieldName);
> >>                    if (fieldValue != null)
> >>                    {
> >>                        f.setValue(fieldValue);
> >>                    }
> >>                } // end PDRadioCollection
> >>                else
> >>                {
> >>                    if (!ignoreUnknownFieldTypes)
> >>                    {
> >>                        throw new Exception("Fields of type [" +
> fieldType
> >> + "] are unsupported");
> >>                    }
> >>                }
> >>
> >>            }
> >>
> >
> > However, I do get the same dreaded:
> >
> > org.apache.pdfbox.pdmodel.interactive.form.PDRadioCollection cannot be
> cast
> > to org.apache.pdfbox.pdmodel.interactive.form.PDCheckbox
> >
> > as I get with the following code:
> >
> >        HashMap<String, PDAcroForm> pdAcroFormMap = new HashMap<>();
> >        @SuppressWarnings("unchecked")
> >        List<PDField> oldFields =
> > oldPDF.getDocumentCatalog().getAcroForm().getFields();
> >        for (PDField pdField : oldFields) {
> >            pdAcroFormMap.put(pdField.getFullyQualifiedName(),
> > pdField.getAcroForm());
> >            pdField.getAcroForm().exportFDF();
> >        }
> >
> >        @SuppressWarnings("unchecked")
> >        List<PDField> templateFields =
> > oldPDF.getDocumentCatalog().getAcroForm().getFields();
> >        for (PDField pdField : templateFields) {
> >
> pdField.setAcroForm(pdAcroFormMap.get(pdField.getFullyQualifiedName()));
> >        }
> >
> > This is probably due to the class definitions of PDRadioCollection and
> > PDCheckbox.
>
> I'm not sure I understand what you are trying to achieve.
>
> Is ist that the template PDF has no form fields at all and you would like
> to add the fields from the source document to the template or would you
> like to get the values from the source and set the values of existing
> fields in the template PDF?
>
> BR
> Maruan
>
>
>
> >
> > I'll keep investigating ...
> >
> > Best regards
> > Roberto
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: users-unsubscribe@pdfbox.apache.org
> For additional commands, e-mail: users-help@pdfbox.apache.org
>
>

Re: Migrate form field entries from one pdf to another

Posted by Maruan Sahyoun <sa...@fileaffairs.de>.

Hi,

> Am 28.06.2015 um 21:51 schrieb Roberto Nibali <rn...@gmail.com>:
> 
> Hi
> 
> Thanks for the quick reply. This is exactly the approach I wanted to take
> for my next option.
> 
> On Sun, Jun 28, 2015 at 8:54 PM, Tilman Hausherr <TH...@t-online.de>
> wrote:
> 
>> Here's some code I have from working on
>> https://issues.apache.org/jira/browse/PDFBOX-2249 with the file
>> JMACTest.pdf that is in that issue. While that issue was about listbox
>> controls, the PDF file JMACTest.pdf does also have some radio and checkbox
>> elements. Of the attached code, I tested changing "Check Box1" and "Group1"
>> and it worked as expected. What I can see is that PDCheckbox uses a
>> different approach than yours, so it may be worth a try. The other ones use
>> all the same approach, i.e. setValue().
>> 
>> What I would also suggest:
>> Take one of the things that don't work. Then open the "old" and the "new"
>> PDF with an editor like NOTEPAD++, search for the field name and look what
>> the differences are. (And have you verified that the template PDF does
>> really have the same field names, or the same type?)
>> 
>> You could of course, while waiting for our expert, try the 2.0 version
>> (see https://pdfbox.apache.org/2.0/getting-started.html ) and create a
>> 2nd project and try to see whether it gets better.
>> 
>> Tilman
>> 
>> 
>> package testpdfbox18;
>> 
>> 
>> import java.io.File;
>> import java.io.IOException;
>> import java.util.HashMap;
>> import java.util.Iterator;
>> import java.util.List;
>> import java.util.Map;
>> 
>> import org.apache.pdfbox.pdmodel.PDDocument;
>> import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
>> import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
>> import org.apache.pdfbox.pdmodel.interactive.form.PDCheckbox;
>> import org.apache.pdfbox.pdmodel.interactive.form.PDChoiceField;
>> import org.apache.pdfbox.pdmodel.interactive.form.PDField;
>> import org.apache.pdfbox.pdmodel.interactive.form.PDRadioCollection;
>> import org.apache.pdfbox.pdmodel.interactive.form.PDTextbox;
>> 
>> public class AcroFormTest
>> {
>> 
>>    public static void fillPDFForm(String inputFile, String outputFile,
>>            Map<String, String> fieldValues, boolean
>> ignoreUnknownFieldTypes) throws Exception
>>    {
>>        File myFile = new File(inputFile);
>>        PDDocument pdDoc;
>>        String fieldName = null;
>>        String fieldType;
>>        try
>>        {
>>            pdDoc = PDDocument.loadNonSeq(myFile, null);
>> 
>>            PDDocumentCatalog pdCatalog = pdDoc.getDocumentCatalog();
>>            PDAcroForm pdAcroForm = pdCatalog.getAcroForm();
>>            pdAcroForm.setCacheFields(true);
>>            List<PDField> l = pdAcroForm.getFields();
>>            Iterator<PDField> it = l.iterator();
>> 
>>            while (it.hasNext())
>>            {
>>                PDField f = it.next();
>>                fieldName = f.getFullyQualifiedName();
>>                fieldType = f.getClass().getSimpleName();
>>                System.out.println(fieldType);
>>                System.out.println(f.getClass().getName());
>>                String fieldValue;
>>                if (f instanceof PDTextbox)
>>                {
>>                    fieldValue = fieldValues.get(fieldName);
>>                    if (fieldValue != null)
>>                    {
>>                        f.setValue(fieldValue);
>>                    }
>> 
>>                } // end PDTextbox
>>                else if (f instanceof PDCheckbox)
>>                {
>>                    fieldValue = fieldValues.get(fieldName);
>>                    if (("TRUE".equalsIgnoreCase(fieldValue))
>>                            || ("CHECKED".equalsIgnoreCase(fieldValue))
>>                            || ("YES".equalsIgnoreCase(fieldValue)))
>>                    {
>>                        ((PDCheckbox) f).check();
>>                    }
>>                    else
>>                    {
>>                        ((PDCheckbox) f).unCheck();
>>                    }
>>                } // end PDCheckbox
>>                else if (f instanceof PDChoiceField)
>>                {
>>                    fieldValue = fieldValues.get(fieldName);
>>                    if (fieldValue != null)
>>                    {
>>                        f.setValue(fieldValue);
>>                    }
>>                } // PDChoiceField
>>                else if (f instanceof PDRadioCollection)
>>                {
>>                    fieldValue = fieldValues.get(fieldName);
>>                    if (fieldValue != null)
>>                    {
>>                        f.setValue(fieldValue);
>>                    }
>>                } // end PDRadioCollection
>>                else
>>                {
>>                    if (!ignoreUnknownFieldTypes)
>>                    {
>>                        throw new Exception("Fields of type [" + fieldType
>> + "] are unsupported");
>>                    }
>>                }
>> 
>>            }
>> 
> 
> However, I do get the same dreaded:
> 
> org.apache.pdfbox.pdmodel.interactive.form.PDRadioCollection cannot be cast
> to org.apache.pdfbox.pdmodel.interactive.form.PDCheckbox
> 
> as I get with the following code:
> 
>        HashMap<String, PDAcroForm> pdAcroFormMap = new HashMap<>();
>        @SuppressWarnings("unchecked")
>        List<PDField> oldFields =
> oldPDF.getDocumentCatalog().getAcroForm().getFields();
>        for (PDField pdField : oldFields) {
>            pdAcroFormMap.put(pdField.getFullyQualifiedName(),
> pdField.getAcroForm());
>            pdField.getAcroForm().exportFDF();
>        }
> 
>        @SuppressWarnings("unchecked")
>        List<PDField> templateFields =
> oldPDF.getDocumentCatalog().getAcroForm().getFields();
>        for (PDField pdField : templateFields) {
>            pdField.setAcroForm(pdAcroFormMap.get(pdField.getFullyQualifiedName()));
>        }
> 
> This is probably due to the class definitions of PDRadioCollection and
> PDCheckbox.

I'm not sure I understand what you are trying to achieve. 

Is ist that the template PDF has no form fields at all and you would like to add the fields from the source document to the template or would you like to get the values from the source and set the values of existing fields in the template PDF?

BR
Maruan



> 
> I'll keep investigating ...
> 
> Best regards
> Roberto


---------------------------------------------------------------------
To unsubscribe, e-mail: users-unsubscribe@pdfbox.apache.org
For additional commands, e-mail: users-help@pdfbox.apache.org

Re: Fwd: Migrate form field entries from one pdf to another

Posted by Roberto Nibali <rn...@gmail.com>.

Hi

Thanks for the quick reply. This is exactly the approach I wanted to take
for my next option.

On Sun, Jun 28, 2015 at 8:54 PM, Tilman Hausherr <TH...@t-online.de>
wrote:

> Here's some code I have from working on
> https://issues.apache.org/jira/browse/PDFBOX-2249 with the file
> JMACTest.pdf that is in that issue. While that issue was about listbox
> controls, the PDF file JMACTest.pdf does also have some radio and checkbox
> elements. Of the attached code, I tested changing "Check Box1" and "Group1"
> and it worked as expected. What I can see is that PDCheckbox uses a
> different approach than yours, so it may be worth a try. The other ones use
> all the same approach, i.e. setValue().
>
> What I would also suggest:
> Take one of the things that don't work. Then open the "old" and the "new"
> PDF with an editor like NOTEPAD++, search for the field name and look what
> the differences are. (And have you verified that the template PDF does
> really have the same field names, or the same type?)
>
> You could of course, while waiting for our expert, try the 2.0 version
> (see https://pdfbox.apache.org/2.0/getting-started.html ) and create a
> 2nd project and try to see whether it gets better.
>
> Tilman
>
>
> package testpdfbox18;
>
>
> import java.io.File;
> import java.io.IOException;
> import java.util.HashMap;
> import java.util.Iterator;
> import java.util.List;
> import java.util.Map;
>
> import org.apache.pdfbox.pdmodel.PDDocument;
> import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
> import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
> import org.apache.pdfbox.pdmodel.interactive.form.PDCheckbox;
> import org.apache.pdfbox.pdmodel.interactive.form.PDChoiceField;
> import org.apache.pdfbox.pdmodel.interactive.form.PDField;
> import org.apache.pdfbox.pdmodel.interactive.form.PDRadioCollection;
> import org.apache.pdfbox.pdmodel.interactive.form.PDTextbox;
>
> public class AcroFormTest
> {
>
>     public static void fillPDFForm(String inputFile, String outputFile,
>             Map<String, String> fieldValues, boolean
> ignoreUnknownFieldTypes) throws Exception
>     {
>         File myFile = new File(inputFile);
>         PDDocument pdDoc;
>         String fieldName = null;
>         String fieldType;
>         try
>         {
>             pdDoc = PDDocument.loadNonSeq(myFile, null);
>
>             PDDocumentCatalog pdCatalog = pdDoc.getDocumentCatalog();
>             PDAcroForm pdAcroForm = pdCatalog.getAcroForm();
>             pdAcroForm.setCacheFields(true);
>             List<PDField> l = pdAcroForm.getFields();
>             Iterator<PDField> it = l.iterator();
>
>             while (it.hasNext())
>             {
>                 PDField f = it.next();
>                 fieldName = f.getFullyQualifiedName();
>                 fieldType = f.getClass().getSimpleName();
>                 System.out.println(fieldType);
>                 System.out.println(f.getClass().getName());
>                 String fieldValue;
>                 if (f instanceof PDTextbox)
>                 {
>                     fieldValue = fieldValues.get(fieldName);
>                     if (fieldValue != null)
>                     {
>                         f.setValue(fieldValue);
>                     }
>
>                 } // end PDTextbox
>                 else if (f instanceof PDCheckbox)
>                 {
>                     fieldValue = fieldValues.get(fieldName);
>                     if (("TRUE".equalsIgnoreCase(fieldValue))
>                             || ("CHECKED".equalsIgnoreCase(fieldValue))
>                             || ("YES".equalsIgnoreCase(fieldValue)))
>                     {
>                         ((PDCheckbox) f).check();
>                     }
>                     else
>                     {
>                         ((PDCheckbox) f).unCheck();
>                     }
>                 } // end PDCheckbox
>                 else if (f instanceof PDChoiceField)
>                 {
>                     fieldValue = fieldValues.get(fieldName);
>                     if (fieldValue != null)
>                     {
>                         f.setValue(fieldValue);
>                     }
>                 } // PDChoiceField
>                 else if (f instanceof PDRadioCollection)
>                 {
>                     fieldValue = fieldValues.get(fieldName);
>                     if (fieldValue != null)
>                     {
>                         f.setValue(fieldValue);
>                     }
>                 } // end PDRadioCollection
>                 else
>                 {
>                     if (!ignoreUnknownFieldTypes)
>                     {
>                         throw new Exception("Fields of type [" + fieldType
> + "] are unsupported");
>                     }
>                 }
>
>             }
>

However, I do get the same dreaded:

org.apache.pdfbox.pdmodel.interactive.form.PDRadioCollection cannot be cast
to org.apache.pdfbox.pdmodel.interactive.form.PDCheckbox

as I get with the following code:

        HashMap<String, PDAcroForm> pdAcroFormMap = new HashMap<>();
        @SuppressWarnings("unchecked")
        List<PDField> oldFields =
oldPDF.getDocumentCatalog().getAcroForm().getFields();
        for (PDField pdField : oldFields) {
            pdAcroFormMap.put(pdField.getFullyQualifiedName(),
pdField.getAcroForm());
            pdField.getAcroForm().exportFDF();
        }

        @SuppressWarnings("unchecked")
        List<PDField> templateFields =
oldPDF.getDocumentCatalog().getAcroForm().getFields();
        for (PDField pdField : templateFields) {
            pdField.setAcroForm(pdAcroFormMap.get(pdField.getFullyQualifiedName()));
        }

This is probably due to the class definitions of PDRadioCollection and
PDCheckbox.

I'll keep investigating ...

Best regards
Roberto

Re: Fwd: Migrate form field entries from one pdf to another

Posted by Tilman Hausherr <TH...@t-online.de>.

Here's some code I have from working on 
https://issues.apache.org/jira/browse/PDFBOX-2249 with the file 
JMACTest.pdf that is in that issue. While that issue was about listbox 
controls, the PDF file JMACTest.pdf does also have some radio and 
checkbox elements. Of the attached code, I tested changing "Check Box1" 
and "Group1" and it worked as expected. What I can see is that 
PDCheckbox uses a different approach than yours, so it may be worth a 
try. The other ones use all the same approach, i.e. setValue().

What I would also suggest:
Take one of the things that don't work. Then open the "old" and the 
"new" PDF with an editor like NOTEPAD++, search for the field name and 
look what the differences are. (And have you verified that the template 
PDF does really have the same field names, or the same type?)

You could of course, while waiting for our expert, try the 2.0 version 
(see https://pdfbox.apache.org/2.0/getting-started.html ) and create a 
2nd project and try to see whether it gets better.

Tilman


package testpdfbox18;


import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
import org.apache.pdfbox.pdmodel.interactive.form.PDCheckbox;
import org.apache.pdfbox.pdmodel.interactive.form.PDChoiceField;
import org.apache.pdfbox.pdmodel.interactive.form.PDField;
import org.apache.pdfbox.pdmodel.interactive.form.PDRadioCollection;
import org.apache.pdfbox.pdmodel.interactive.form.PDTextbox;

public class AcroFormTest
{

     public static void fillPDFForm(String inputFile, String outputFile,
             Map<String, String> fieldValues, boolean 
ignoreUnknownFieldTypes) throws Exception
     {
         File myFile = new File(inputFile);
         PDDocument pdDoc;
         String fieldName = null;
         String fieldType;
         try
         {
             pdDoc = PDDocument.loadNonSeq(myFile, null);

             PDDocumentCatalog pdCatalog = pdDoc.getDocumentCatalog();
             PDAcroForm pdAcroForm = pdCatalog.getAcroForm();
             pdAcroForm.setCacheFields(true);
             List<PDField> l = pdAcroForm.getFields();
             Iterator<PDField> it = l.iterator();

             while (it.hasNext())
             {
                 PDField f = it.next();
                 fieldName = f.getFullyQualifiedName();
                 fieldType = f.getClass().getSimpleName();
                 System.out.println(fieldType);
                 System.out.println(f.getClass().getName());
                 String fieldValue;
                 if (f instanceof PDTextbox)
                 {
                     fieldValue = fieldValues.get(fieldName);
                     if (fieldValue != null)
                     {
                         f.setValue(fieldValue);
                     }

                 } // end PDTextbox
                 else if (f instanceof PDCheckbox)
                 {
                     fieldValue = fieldValues.get(fieldName);
                     if (("TRUE".equalsIgnoreCase(fieldValue))
                             || ("CHECKED".equalsIgnoreCase(fieldValue))
                             || ("YES".equalsIgnoreCase(fieldValue)))
                     {
                         ((PDCheckbox) f).check();
                     }
                     else
                     {
                         ((PDCheckbox) f).unCheck();
                     }
                 } // end PDCheckbox
                 else if (f instanceof PDChoiceField)
                 {
                     fieldValue = fieldValues.get(fieldName);
                     if (fieldValue != null)
                     {
                         f.setValue(fieldValue);
                     }
                 } // PDChoiceField
                 else if (f instanceof PDRadioCollection)
                 {
                     fieldValue = fieldValues.get(fieldName);
                     if (fieldValue != null)
                     {
                         f.setValue(fieldValue);
                     }
                 } // end PDRadioCollection
                 else
                 {
                     if (!ignoreUnknownFieldTypes)
                     {
                         throw new Exception("Fields of type [" + 
fieldType + "] are unsupported");
                     }
                 }

             }

             pdDoc.save(outputFile);
         }
         catch (IOException e)
         {
             e.printStackTrace();
             throw new Exception("Error processing field [" + fieldName 
+ "] " + e.getMessage());
         }
     } // end fillPDFForm

     public static void main(String[] args)
     {
         String inputFile = "JMACTest.pdf";
         String outputFile = "JMAC_PDFBOX_out.pdf";
         Map<String, String> m = new HashMap();

         m.put("Text1", "TEST Textfield valuE");
         m.put("Check Box1", "true");
         m.put("Dropdown1", "1");
         m.put("List Box1", "1");
         m.put("Group1", "RB1");

         try
         {
             AcroFormTest.fillPDFForm(inputFile, outputFile, m, false);
         }
         catch (Exception e)
         {
             e.printStackTrace();
         }

     } // end main
}


---------------------------------------------------------------------
To unsubscribe, e-mail: users-unsubscribe@pdfbox.apache.org
For additional commands, e-mail: users-help@pdfbox.apache.org