You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ju...@apache.org on 2010/03/18 16:13:43 UTC

svn commit: r924853 [2/2] - in /pdfbox/site: publish/ publish/commandlineutilities/ publish/userguide/ publish/userguide/cookbook/ src/site/ src/site/fml/ src/site/fml/userguide/ src/site/xdoc/ src/site/xdoc/userguide/ src/site/xdoc/userguide/cookbook/

Added: pdfbox/site/publish/userguide/tutorials.html
URL: http://svn.apache.org/viewvc/pdfbox/site/publish/userguide/tutorials.html?rev=924853&view=auto
==============================================================================
--- pdfbox/site/publish/userguide/tutorials.html (added)
+++ pdfbox/site/publish/userguide/tutorials.html Thu Mar 18 15:13:41 2010
@@ -0,0 +1,319 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+
+
+
+
+
+
+
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+    <title>Apache PDFBox - PDFBox - Tutorials</title>
+    <style type="text/css" media="all">
+      @import url("../css/maven-base.css");
+      @import url("../css/maven-theme.css");
+      @import url("../css/site.css");
+    </style>
+    <link rel="stylesheet" href="../css/print.css" type="text/css" media="print" />
+          </head>
+  <body class="composite">
+    <div id="banner">
+                  <a href="" id="bannerLeft">
+    
+                                            <img src="../images/Logo.gif" alt="Apache PDFBox" />
+    
+            </a>
+                        <a href="http://www.apache.org/" id="bannerRight">
+    
+                                    <img src="http://www.apache.org/images/asf_logo.gif" alt="The Apache Software Foundation" />
+    
+            </a>
+            <div class="clear">
+        <hr/>
+      </div>
+    </div>
+    <div id="breadcrumbs">
+          
+  
+
+  
+    
+            
+  
+    
+              <div class="xright">      
+  
+
+  
+    
+            
+  
+    
+  </div>
+      <div class="clear">
+        <hr/>
+      </div>
+    </div>
+    <div id="leftColumn">
+      <div id="navcolumn">
+           
+  
+
+  
+    
+            
+  
+    
+                   <h5>About</h5>
+            <ul>
+              
+    <li class="none">
+                    <a href="../index.html">Welcome</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../download.html">Download</a>
+          </li>
+              
+    <li class="none">
+                    <a href="http://www.apache.org/licenses/LICENSE-2.0" class="externalLink">License</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../mailing-list.html">Mailing Lists</a>
+          </li>
+              
+    <li class="none">
+                    <a href="https://issues.apache.org/jira/browse/PDFBOX" class="externalLink">Issue Tracker</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../references.html">References</a>
+          </li>
+              
+    <li class="none">
+                    <a href="http://www.apache.org/foundation/sponsorship.html" class="externalLink">ASF Sponsorship Program</a>
+          </li>
+              
+    <li class="none">
+                    <a href="http://www.apache.org/foundation/thanks.html" class="externalLink">ASF Thanks</a>
+          </li>
+          </ul>
+              <h5>Command Line Utilities</h5>
+            <ul>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/index.html">Index</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/ConvertColorspace.html">ConvertColorspace</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/Decrypt.html">Decrypt</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/Encrypt.html">Encrypt</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/ExtractText.html">ExtractText</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/Overlay.html">Overlay</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/PrintPDF.html">PrintPDF</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/PDFDebugger.html">PDFDebugger</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/PDFMerger.html">PDFMerger</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/PDFReader.html">PDFReader</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/PDFSplit.html">PDFSplit</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/PDFToImage.html">PDFToImage</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/TextToPDF.html">TextToPDF</a>
+          </li>
+          </ul>
+              <h5>Developers Guide</h5>
+            <ul>
+              
+    <li class="none">
+                    <a href="../userguide/index.html">Index</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/bookmarks.html">Bookmarks</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/building_pdfbox.html">Building PDFBox</a>
+          </li>
+              
+    <li class="none">
+              <strong>Tutorials</strong>
+        </li>
+              
+    <li class="none">
+                    <a href="../userguide/cookbook.html">Cookbook</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/faq.html">FAQ</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/file_references.html">File References</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/fonts.html">Fonts</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/highlighting.html">Highlighting</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/metadata.html">Metadata</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/redistribution.html">Redistribution</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/dot_net.html">.NET Version</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/text_extraction.html">Text Extraction</a>
+          </li>
+          </ul>
+              <h5>Project Documentation</h5>
+            <ul>
+              
+                
+              
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+              
+        <li class="collapsed">
+                    <a href="../project-info.html">Project Information</a>
+                </li>
+          </ul>
+                                           <a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
+            <img alt="Built by Maven" src="../images/logos/maven-feather.png"></img>
+          </a>
+                       
+  
+
+  
+    
+            
+  
+    
+        </div>
+    </div>
+    <div id="bodyColumn">
+      <div id="contentBox">
+        <div class="section"><h2><a name="Tutorials"></a>Tutorials</h2>
+<p>The Tutorials for PDFBox provide a quick start into some of the most common uses cases for 
+      PDFBox as part of a PDF application.
+      In addition the <a href="cookbook.html">Cookbook</a> provides a wide range of samples including sample code. 
+      </p>
+<table class="bodyTable"><tr class="a"><th>Tutorial</th>
+<th>Description</th>
+</tr>
+<tr class="b"><td><a href="bookmarks.html">Bookmarks</a></td>
+<td>PDF Bookmarks allow for a quick navigation within documents similar to a Table of Contents. 
+      	  	The Tutorial covers how to read bookmarks as well as how to generate them.</td>
+</tr>
+<tr class="a"><td><a href="file_references.html">File References</a></td>
+<td>A PDF file can contain references to external files as well as files embedded in the PDF itself.
+      	  	The Tutorial covers how handle such references as well as how to embed a file in a PDF document.</td>
+</tr>
+<tr class="b"><td><a href="highlighting.html">Highlighting</a></td>
+<td>PDF allows for text to be highlighted. This can be useful for example to highlight results of a search.
+      	  	 There are different ways to approach this covered in the tutorial.</td>
+</tr>
+<tr class="a"><td><a href="metadata.html">Metadata</a></td>
+<td>PDF documents can have metadata associated with the document or certain objects within the document. As an example 
+      	  	metadata can be used to store the author of a document as well as copyright information for an image embedded in the document.
+      	  	The Tutorial covers how to read and generate such metadata.</td>
+</tr>
+<tr class="b"><td><a href="text_extraction.html">Text Extraction</a></td>
+<td>One of the main features of PDFBox is its ability to quickly and accurately extract text from a variety of PDF documents.
+      	  	The tutorial covers different approaches to handle that task.</td>
+</tr>
+</table>
+</div>
+
+      </div>
+    </div>
+    <div class="clear">
+      <hr/>
+    </div>
+    <div id="footer">
+      <div class="xright">&#169;  
+          2008-2010
+    
+          The Apache Software Foundation
+          
+  
+
+  
+    
+            
+  
+    
+  </div>
+      <div class="clear">
+        <hr/>
+      </div>
+    </div>
+  </body>
+</html>

Propchange: pdfbox/site/publish/userguide/tutorials.html
------------------------------------------------------------------------------
    svn:eol-style = native

Added: pdfbox/site/src/site/fml/userguide/faq.fml
URL: http://svn.apache.org/viewvc/pdfbox/site/src/site/fml/userguide/faq.fml?rev=924853&view=auto
==============================================================================
--- pdfbox/site/src/site/fml/userguide/faq.fml (added)
+++ pdfbox/site/src/site/fml/userguide/faq.fml Thu Mar 18 15:13:41 2010
@@ -0,0 +1,184 @@
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one or more
+ ! contributor license agreements.  See the NOTICE file distributed with
+ ! this work for additional information regarding copyright ownership.
+ ! The ASF licenses this file to You under the Apache License, Version 2.0
+ ! (the "License"); you may not use this file except in compliance with
+ ! the License.  You may obtain a copy of the License at
+ !
+ !      http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing, software
+ ! distributed under the License is distributed on an "AS IS" BASIS,
+ ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ! See the License for the specific language governing permissions and
+ ! limitations under the License.
+ !-->
+<!DOCTYPE faqs PUBLIC "-//APACHE//DTD FAQ V1.2//EN" "http://forrest.apache.org/dtd/faq-v12.dtd" [
+<!ENTITY s '<code>site.xml</code>'>
+]>
+
+<faqs title="Frequently Asked Questions">
+
+  <part id="general_questions">
+    <title>General Questions</title>
+    <faq id="next_version">
+      <question>
+        When will the next version of PDFBox be released?
+      </question>
+      <answer>
+          As fixes are made and integrated into the repository these changes are documented in the
+          <link href="../changes.html">release notes</link>.  An
+          estimate will be given of when
+          the next version will be released. <br />
+          Of course, this is only an estimate and could change.
+      </answer>
+    </faq>
+
+    <faq id="log4j_config">
+      <question>
+          I am getting the below Log4J warning message, how do I remove it?
+      </question>
+      <answer>
+		<source>
+		log4j:WARN No appenders could be found for logger (org.apache.pdfbox.util.ResourceLoader).
+		log4j:WARN Please initialize the log4j system properly.</source>
+        <p>
+        This message means that you need to configure the log4j logging system.
+        See the <link href="http://logging.apache.org/log4j/docs/documentation.html">log4j documentation</link> for more information.
+        </p>
+        <p>
+        PDFBox comes with a sample log4j configuration file.  To use it you set a system property like this</p>
+		<source>java -Dlog4j.configuration=log4j.xml org.apache.pdfbox.ExtractText &lt;PDF-file&gt; &lt;output-text-file&gt;
+        </source>
+        <p>If this is not working for you then you may have to specify the log4j config file using a URL path, like this:</p>
+        <source>log4j.configuration=file:///&lt;path to config file&gt;</source>
+        <p>Please see <a href="https://sourceforge.net/forum/forum.php?thread_id=1254229&amp;forum_id=267205">this</a> forum thread 
+        for more information.
+        </p>
+      </answer>
+    </faq>
+
+    <faq id="pdfbox_threadsafe">
+      <question>
+      Is PDFBox thread safe?
+      </question>
+      <answer>
+        <p>
+        No!  Only one thread may access a single document at a time.
+        You can have multiple threads each accessing their own PDDocument object.
+        </p>
+      </answer>
+    </faq>
+
+    <faq id="pdfbox_close_warning">
+      <question>
+      Why do I get a "Warning: You did not close the PDF Document"?
+      </question>
+      <answer>
+        <p>
+        You need to call close() on the PDDocument inside the finally block, if you
+        don't then the document will not be closed properly.  Also, you must close all
+        PDDocument objects that get created.  The following code creates <b>two</b>
+        PDDocument objects; one from the "new PDDocument()" and the second by the load method.
+        </p>
+        <source>
+		    PDDocument doc = new PDDocument();
+		    try
+		    {
+		        doc = PDDocument.load( "my.pdf" );
+		    }
+		    finally
+		    {
+		        if( doc != null )
+		        {
+		            doc.close();
+		        }
+		    }
+        </source>
+
+      </answer>
+    </faq>
+
+
+  </part>
+
+  <part id="text_extraction">
+    <title>Text Extraction</title>
+    <faq id="no_text_extraction">
+      <question>
+        How come I am not getting any text from the PDF document?
+      </question>
+      <answer>
+        <p>
+          Text extraction from a pdf document is a complicated task and there are many factors
+          involved that effect the possibility and accuracy of text extraction.  It would be helpful
+          to the PDFBox team if you could try a couple things.
+        </p>
+          <ul>
+          	<li>Open the PDF in Acrobat and try to extract text from there.  If Acrobat can extract text
+          	then PDFBox should be able to as well and it is a bug if it cannot.  If Acrobat cannot extract text then
+          	PDFBox 'probably' cannot either.</li>
+          	<li>It might really be an image instead of text.  Some PDF documents are just images that have
+          	been scanned in.  You can tell by using the selection tool in Acrobat, if you can't select
+          	any text then it is probably an image.</li>
+          </ul>
+
+      </answer>
+    </faq>
+    <faq id="gibberish_text">
+      <question>
+        How come I am getting gibberish(G38G43G36G51G5) when extracting text?
+      </question>
+      <answer>
+        <p>
+          This is because the characters in a PDF document can use a custom encoding
+          instead of unicode or ASCII.  When you see gibberish text then it
+          probably means that a meaningless internal encoding is being used.  The
+		   only way to access the text is to use OCR.  This may be a future
+		   enhancement.
+        </p>
+      </answer>
+    </faq>
+    <faq id="cant_handle_font_width">
+      <question>
+        What does "java.io.IOException: Can't handle font width" mean?
+      </question>
+      <answer>
+        <p>
+          This probably means that the "Resources" directory is not in your classpath.  The
+          Resources directory is included in the PDFBox jar so this is only a problem if you
+          are building PDFBox yourself and not using the binary.
+        </p>
+      </answer>
+    </faq>
+    <faq id="no_permission">
+      <question>
+        Why do I get "You do not have permission to extract text" on some documents?
+      </question>
+      <answer>
+        <p>
+          PDF documents have certain security permissions that can
+          be applied to them and two passwords associated with them, a user password and a master password.
+          If the "cannot extract text" permission bit is set then you need
+          to decrypt the document with the master password in order to extract the text.
+        </p>
+      </answer>
+    </faq>
+    <faq id="parse_whole_document">
+        <question>Can't we just extract the text without parsing the whole document or extract text as it is parsed.</question>
+        <answer>
+        <p>
+        Not really, for a couple reasons.
+        </p>
+        <ol>
+            <li>If the document is encrypted then you need to parse at least until the encryption dictionary before you can decrypt.</li>
+            <li>Sometimes the PDFont contains vital information needed for text extraction.</li>
+            <li>Text on a page does not have to be drawn in reading order.  For example; if the page said "Hello World", the pdf could
+                have been written such that "World" gets drawn and then the cursor moves to the left and the word "Hello" is drawn.</li>
+        </ol>
+        </answer>
+    </faq>
+  </part>
+
+</faqs>

Modified: pdfbox/site/src/site/site.xml
URL: http://svn.apache.org/viewvc/pdfbox/site/src/site/site.xml?rev=924853&r1=924852&r2=924853&view=diff
==============================================================================
--- pdfbox/site/src/site/site.xml (original)
+++ pdfbox/site/src/site/site.xml Thu Mar 18 15:13:41 2010
@@ -69,6 +69,8 @@
       <item name="Index" href="userguide/index.html"/>
       <item name="Bookmarks" href="userguide/bookmarks.html"/>
       <item name="Building PDFBox" href="userguide/building_pdfbox.html"/>
+      <item name="Tutorials" href="userguide/tutorials.html"/>
+      <item name="Cookbook" href="userguide/cookbook.html"/>
       <item name="FAQ" href="userguide/faq.html"/>
       <item name="File References" href="userguide/file_references.html"/>
       <item name="Fonts" href="userguide/fonts.html"/>

Modified: pdfbox/site/src/site/xdoc/references.xml
URL: http://svn.apache.org/viewvc/pdfbox/site/src/site/xdoc/references.xml?rev=924853&r1=924852&r2=924853&view=diff
==============================================================================
--- pdfbox/site/src/site/xdoc/references.xml (original)
+++ pdfbox/site/src/site/xdoc/references.xml Thu Mar 18 15:13:41 2010
@@ -105,7 +105,7 @@
       		<td>Lucenemodule is a plugin (module) for the MMBase content management system that enables Lucene full text search through it's content, and thanks to PDFBox also PDF content.</td>
       	</tr>
       	<tr>
-      		<td><a href="http://incubator.apache.org/nutch/">Nutch</a></td>
+      		<td><a href="http://lucene.apache.org/nutch/">Nutch</a></td>
       		<td>ASL</td>
       		<td>Nutch is open source web-search software. It builds on Lucene, adding web-specifics, such as a crawler, a link-graph database, parsers for HTML and other document formats, etc.</td>
       	</tr>

Modified: pdfbox/site/src/site/xdoc/userguide/bookmarks.xml
URL: http://svn.apache.org/viewvc/pdfbox/site/src/site/xdoc/userguide/bookmarks.xml?rev=924853&r1=924852&r2=924853&view=diff
==============================================================================
--- pdfbox/site/src/site/xdoc/userguide/bookmarks.xml (original)
+++ pdfbox/site/src/site/xdoc/userguide/bookmarks.xml Thu Mar 18 15:13:41 2010
@@ -56,13 +56,10 @@
       }
       </source>
     </section>
-    <section>
-      <title>Creating Bookmarks</title>
-      <p>
-      <p>
-      See example:<a href="../apidocs/org/apache/pdfbox/examples/pdmodel/CreateBookmarks.html">CreateBookmarks</a>
+    <section name="Creating Bookmarks">
+      <p>See example:<a href="../apidocs/org/apache/pdfbox/examples/pdmodel/CreateBookmarks.html">CreateBookmarks</a>
       </p>
-      Creating bookmarks is just as easy.  You first need to create the PDDocumentOutline and then
+      <p>Creating bookmarks is just as easy.  You first need to create the PDDocumentOutline and then
       add some PDOutlineItem objects to it.
       </p>
       <source>
@@ -83,7 +80,6 @@
       firstPageItem.setTitle( "First Page of document" );
       firstPageItem.setDestination( firstPage );
       root.appendChild( firstPageItem );
-
       </source>
       <note>
       By default all nodes in the outline tree are closed.  You need to call openNode() if you want

Added: pdfbox/site/src/site/xdoc/userguide/cookbook.xml
URL: http://svn.apache.org/viewvc/pdfbox/site/src/site/xdoc/userguide/cookbook.xml?rev=924853&view=auto
==============================================================================
--- pdfbox/site/src/site/xdoc/userguide/cookbook.xml (added)
+++ pdfbox/site/src/site/xdoc/userguide/cookbook.xml Thu Mar 18 15:13:41 2010
@@ -0,0 +1,273 @@
+	<!--
+		! Licensed to the Apache Software Foundation (ASF) under one or more !
+		contributor license agreements. See the NOTICE file distributed with !
+		this work for additional information regarding copyright ownership. !
+		The ASF licenses this file to You under the Apache License, Version
+		2.0 ! (the "License"); you may not use this file except in compliance
+		with ! the License. You may obtain a copy of the License at ! !
+		http://www.apache.org/licenses/LICENSE-2.0 ! ! Unless required by
+		applicable law or agreed to in writing, software ! distributed under
+		the License is distributed on an "AS IS" BASIS, ! WITHOUT WARRANTIES
+		OR CONDITIONS OF ANY KIND, either express or implied. ! See the
+		License for the specific language governing permissions and !
+		limitations under the License. !
+	-->
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
+<document>
+	<properties>
+		<title>PDFBox - Cookbook</title>
+	</properties>
+	<body>
+		<section name="Cookbook">
+			<p>
+				The Cookbook for PDFBox is a collection of source code samples to
+				help using PDFBox.
+				The samples are a growing collection of individual topics covering a
+				wide range of PDF applications.
+				In addition the <a href="tutorials.html">Tutorials</a>
+				cover some of the most common applications of PDFBox.
+			</p>
+
+			<subsection name="Document creation">
+				<table>
+					<tr>
+						<th>Sample</th>
+						<th>Description</th>
+					</tr>
+					<tr>
+						<td><a href="./cookbook/creation.html#CreateBlankPDF">CreateBlankPDF</a>
+						</td>
+						<td>This small sample shows how to create a new PDF document using PDFBox.</td>
+					</tr>
+					<tr>
+						<td><a href="./cookbook/creation.html#HelloWorld">HelloWorld</a>
+						</td>
+						<td>This small sample shows how to create a new document and print the text "Hello World" using one of the PDF base fonts.</td>
+					</tr>
+					<tr>
+						<td><a href="./cookbook/creation.html#HelloWorldTTF">HelloWorldTTF</a>
+						</td>
+						<td>This small sample shows how to create a new document and print the text "Hello World" using a TrueType Font.</td>
+					</tr>
+					<tr>
+						<td><a href="../apidocs/org/apache/pdfbox/examples/pdmodel/HelloWorldType1AfmPfb.html">HelloWorldType1AfmPfb</a>
+						</td>
+						<td>This is an example that creates a simple document with a Type 1 font (afm + pfb).</td>
+					</tr>
+					<tr>
+						<td><a href="../apidocs/org/apache/pdfbox/examples/pdmodel/ImageToPDF.html">ImageToPDF</a>
+						</td>
+						<td>This is an example that creates a simple document from an image.</td>
+					</tr>
+					<tr>
+						<td><a href="../apidocs/org/apache/pdfbox/examples/pdmodel/ShowColorBoxes.html">ShowColorBoxes</a>
+						</td>
+						<td>This is an example that creates a simple document with different boxes.</td>
+					</tr>
+
+				</table>
+			</subsection>
+
+			<subsection name="Working with metadata">
+				<table>
+					<tr>
+						<th>Sample</th>
+						<th>Description</th>
+					</tr>
+					<tr>
+						<td><a href="../apidocs/org/apache/pdfbox/examples/pdmodel/PrintDocumentMetaData.html">PrintDocumentMetaData</a>
+						</td>
+						<td>This is an example on how to get a documents metadata information.</td>
+					</tr>
+					<tr>
+						<td><a href="../apidocs/org/apache/pdfbox/examples/pdmodel/AddMetadataFromDocInfo.html">AddMetadataFromDocInfo</a>
+						</td>
+						<td>This is an example on how to add metadata to a document.</td>
+					</tr>
+					<tr>
+						<td><a href="../apidocs/org/apache/pdfbox/examples/pdmodel/ExtractMetadata.html">ExtractMetadata</a>
+						</td>
+						<td>This is an example on how to extract metadata from a PDF document.</td>
+					</tr>
+
+				</table>
+			</subsection>
+
+			<subsection name="Dealing with forms">
+				<table>
+					<tr>
+						<th>Sample</th>
+						<th>Description</th>
+					</tr>
+					<tr>
+						<td><a href="../apidocs/org/apache/pdfbox/examples/fdf/PrintFields.html">PrintFields</a>
+						</td>
+						<td>Shows how to print all the fields from a PDF document</td>
+					</tr>
+					<tr>
+						<td><a href="../apidocs/org/apache/pdfbox/examples/fdf/SetField.html">SetField</a>
+						</td>
+						<td>Shows how to set the value of a form field in a PDF document</td>
+					</tr>
+				</table>
+			</subsection>
+				
+			<subsection name="Using the PDModel">	
+				<table>
+					<tr>
+						<th>Sample</th>
+						<th>Description</th>
+					</tr>
+					<tr>
+						<td><a href="../apidocs/org/apache/pdfbox/examples/pdmodel/AddImageToPDF.html">AddImageToPDF</a>
+						</td>
+						<td>This is an example that reads a PDF document and adds an image to it.</td>
+					</tr>
+					<tr>
+						<td><a href="../apidocs/org/apache/pdfbox/examples/pdmodel/AddJavascript.html">AddJavascript</a>
+						</td>
+						<td>This is an example of how to set some JavaScript in the document.</td>
+					</tr>
+					<tr>
+						<td><a href="../apidocs/org/apache/pdfbox/examples/pdmodel/AddMessageToEachPage.html">AddMessageToEachPage</a>
+						</td>
+						<td>This is an example of how to add a message to every page in a PDF document.</td>
+					</tr>
+					<tr>
+						<td><a href="../apidocs/org/apache/pdfbox/examples/pdmodel/Annotation.html">Annotation</a>
+						</td>
+						<td>This is an example on how to add annotations to pages of a PDF document.</td>
+					</tr>
+					<tr>
+						<td><a href="../apidocs/org/apache/pdfbox/examples/pdmodel/CreateBookmarks.html">CreateBookmarks</a>
+						</td>
+						<td>This is an example on how to add bookmarks to a PDF document.</td>
+					</tr>
+					<tr>
+						<td><a href="../apidocs/org/apache/pdfbox/examples/pdmodel/EmbeddedFiles.html">EmbeddedFiles</a>
+						</td>
+						<td>This is an example that creates a simple document and embeds a file into it..</td>
+					</tr>
+					<tr>
+						<td><a href="../apidocs/org/apache/pdfbox/examples/pdmodel/GoToSecondBookmarkOnOpen.html">GoToSecondBookmarkOnOpen</a>
+						</td>
+						<td>This is an example on how to an action to go to the second page when the PDF is opened.</td>
+					</tr>
+					<tr>
+						<td><a href="../apidocs/org/apache/pdfbox/examples/pdmodel/PrintBookmarks.html">PrintBookmarks</a>
+						</td>
+						<td>This is an example on how to access the bookmarks that are part of a pdf document.</td>
+					</tr>
+					<tr>
+						<td><a href="../apidocs/org/apache/pdfbox/examples/pdmodel/PrintURLs.html">PrintURLs</a>
+						</td>
+						<td>This is an example of how to access a URL in a PDF document.</td>
+					</tr>
+					<tr>
+						<td><a href="../apidocs/org/apache/pdfbox/examples/pdmodel/RemoveFirstPage.html">RemoveFirstPage</a>
+						</td>
+						<td>This is an example on how to remove pages from a PDF document.</td>
+					</tr>
+					<tr>
+						<td><a href="../apidocs/org/apache/pdfbox/examples/pdmodel/ReplaceString.html">ReplaceString</a>
+						</td>
+						<td>This is an example that will replace a string in a PDF with a new one.</td>
+					</tr>
+					<tr>
+						<td><a href="../apidocs/org/apache/pdfbox/examples/pdmodel/ReplaceURLs.html">ReplaceURLs</a>
+						</td>
+						<td>This is an example of how to replace a URL in a PDF document.</td>
+					</tr>
+					<tr>
+						<td><a href="../apidocs/org/apache/pdfbox/examples/pdmodel/RubberStamp.html">RubberStamp</a>
+						</td>
+						<td>This is an example on how to add annotations to pages of a PDF document.</td>
+					</tr>
+					<tr>
+						<td><a href="../apidocs/org/apache/pdfbox/examples/pdmodel/RubberStampWithImage.html">RubberStampWithImage</a>
+						</td>
+						<td>This is an example on how to add a rubber stamp with an image to pages of a PDF document.</td>
+					</tr>
+					<tr>
+						<td><a href="../apidocs/org/apache/pdfbox/examples/pdmodel/UsingTextMatrix.html">UsingTextMatrix</a>
+						</td>
+						<td>This is an example of how to use a text matrix.</td>
+					</tr>
+				</table>
+			</subsection>
+	
+			<subsection name="PDFBox persistence features">
+				<table>
+					<tr>
+						<th>Sample</th>
+						<th>Description</th>
+					</tr>
+					<tr>
+						<td>
+						<a href="../apidocs/org/apache/pdfbox/examples/persistence/CopyDoc.html">CopyDoc</a>
+						</td>
+						<td>This is an example used to copy a documents contents from a source doc to destination doc via an in-memory document representation</td>
+					</tr>
+					<tr>
+						<td>
+						<a href="../apidocs/org/apache/pdfbox/examples/persistence/WriteDecodedDoc.html">PrintFields</a>
+						</td>
+						<td>Shows how to load a PDF document and write with all streams decoded.</td>
+					</tr>
+				</table>
+			</subsection>
+			
+			<subsection name="Working with signatures">
+				<table>
+					<tr>
+						<th>Sample</th>
+						<th>Description</th>
+					</tr>
+					<tr>
+						<td>
+						<a href="../apidocs/org/apache/pdfbox/examples/signature/ShowSignature.html">ShowSignature</a>
+						</td>
+						<td>This examples will show how to gain access to the PDF signature</td>
+					</tr>
+				</table>
+			</subsection>
+
+			<subsection name="Text and Image locations">
+				<table>
+					<tr>
+						<th>Sample</th>
+						<th>Description</th>
+					</tr>
+					<tr>
+						<td>
+						<a href="../apidocs/org/apache/pdfbox/examples/util/ExtractTextByArea.html">ExtractTextByArea</a>
+						</td>
+						<td>This is an example on how to extract text from a specific area on the PDF document.</td>
+					</tr>
+					<tr>
+						<td>
+						<a href="../apidocs/org/apache/pdfbox/examples/util/PrintImageLocations.html">PrintImageLocations</a>
+						</td>
+						<td>This is an example on how to get the x/y coordinates of image locations.</td>
+					</tr>
+					<tr>
+						<td>
+						<a href="../apidocs/org/apache/pdfbox/examples/util/PrintTextLocations.html">PrintTextLocations</a>
+						</td>
+						<td>This is an example on how to get some x/y coordinates of text.</td>
+					</tr>
+					<tr>
+						<td>
+						<a href="../apidocs/org/apache/pdfbox/examples/util/RemoveAllText.html">RemoveAllText</a>
+						</td>
+						<td>This is an example on how to remove all text from PDF document.</td>
+					</tr>
+				</table>
+			</subsection>
+
+			
+		</section>
+
+
+	</body>
+</document>

Propchange: pdfbox/site/src/site/xdoc/userguide/cookbook.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: pdfbox/site/src/site/xdoc/userguide/cookbook/creation.xml
URL: http://svn.apache.org/viewvc/pdfbox/site/src/site/xdoc/userguide/cookbook/creation.xml?rev=924853&view=auto
==============================================================================
--- pdfbox/site/src/site/xdoc/userguide/cookbook/creation.xml (added)
+++ pdfbox/site/src/site/xdoc/userguide/cookbook/creation.xml Thu Mar 18 15:13:41 2010
@@ -0,0 +1,122 @@
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one or more
+ ! contributor license agreements.  See the NOTICE file distributed with
+ ! this work for additional information regarding copyright ownership.
+ ! The ASF licenses this file to You under the Apache License, Version 2.0
+ ! (the "License"); you may not use this file except in compliance with
+ ! the License.  You may obtain a copy of the License at
+ !
+ !      http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing, software
+ ! distributed under the License is distributed on an "AS IS" BASIS,
+ ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ! See the License for the specific language governing permissions and
+ ! limitations under the License.
+ !-->
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
+<document>
+  <properties>
+    <title>PDFBox - Cookbook</title>
+  </properties>
+  <body>
+    <section name="Cookbook - Document Creation">
+		<subsection name="CreateBlankPDF" id="CreateBlankPDF">    
+        <p>This small sample shows how to create a new PDF document using PDFBox.</p>
+        <p>First we need a new empty document:</p>
+        <source>
+		document = new PDDocument();
+		</source>
+		<p>As every document needs at least one page we have to add a blank page to the newly created document:</p>
+		<source>
+		PDPage blankPage = new PDPage();
+		document.addPage( blankPage );
+		</source>
+		<p>we can save the newly created document:</p>
+		<source>
+		document.save("BlankPage.pdf");
+        </source>
+        <p>finally we have to ensure that the document is properly closed:</p>
+        <source>
+		document.close();
+        </source>
+        <p>Full source code at
+        <a href="http://svn.apache.org/repos/asf/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/pdmodel/CreateBlankPDF.java">CreateBlankPDF</a></p>
+        </subsection>
+
+		<subsection name="HelloWorld" id="HelloWorld">    
+        <p>This small sample shows how to create a new document and print the text "Hello World" using one of the PDF base fonts.</p>
+        <p>First we need a new empty document and add a page to it:</p>
+        <source>
+		document = new PDDocument();
+		PDPage page = new PDPage();
+		document.addPage( page );
+		</source>
+		<p>Next we have to create a new font object selecting one of the PDF base fonts:</p>
+		<source>
+		PDFont font = PDType1Font.HELVETICA_BOLD;
+		</source>
+		<p>Next we start a new content stream which will "hold" the to be created content:</p>
+		<source>
+		PDPageContentStream contentStream = new PDPageContentStream(document, page);
+		</source>
+		<p>Next we define a text content stream using the selected font, moving the cursor and drawing the text "Hello World":</p>
+		<source>
+		contentStream.beginText();
+		contentStream.setFont( font, 12 );
+		contentStream.moveTextPositionByAmount( 100, 700 );
+		contentStream.drawString( "Hello World" );
+		contentStream.endText();
+		</source>
+		<p>We <strong>have to</strong> make sure that the content stream is closed:</p>
+		<source>
+		contentStream.close();
+		</source>
+		<p>Finally we save the results and ensure that the document is properly closed:</p>
+		<source>
+		document.save( "Hello World.pdf");
+		document.close();
+        </source>
+        <p>Full source code at
+        <a href="http://svn.apache.org/repos/asf/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/pdmodel/HelloWorld.java">HelloWorld</a></p>
+        </subsection>
+
+		<subsection name="HelloWorldTTF" id="HelloWorldTTF">    
+        <p>This small sample shows how to create a new document and print the text "Hello World" using using a TrueType Font.</p>
+        <p>First we need a new empty document and add a page to it:</p>
+        <source>
+		document = new PDDocument();
+		PDPage page = new PDPage();
+		document.addPage( page );
+		</source>
+		<p>Next we have to create a new font object loading a TrueType font into the document:</p>
+		<source>
+		PDFont font = PDTrueTypeFont.loadTTF(document, "Arial.ttf");
+		</source>
+		<p>Next we start a new content stream which will "hold" the to be created content:</p>
+		<source>
+		PDPageContentStream contentStream = new PDPageContentStream(document, page);
+		</source>
+		<p>Next we define a text content stream using the selected font, moving the cursor and drawing the text "Hello World":</p>
+		<source>
+		contentStream.beginText();
+		contentStream.setFont( font, 12 );
+		contentStream.moveTextPositionByAmount( 100, 700 );
+		contentStream.drawString( "Hello World" );
+		contentStream.endText();
+		</source>
+		<p>We <strong>have to</strong> make sure that the content stream is closed:</p>
+		<source>
+		contentStream.close();
+		</source>
+		<p>Finally we save the results and ensure that the document is properly closed:</p>
+		<source>
+		document.save( "Hello World.pdf");
+		document.close();
+        </source>
+        <p>Full source code at
+        <a href="http://svn.apache.org/repos/asf/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/pdmodel/HelloWorldTTF.java">HelloWorldTTF</a></p>
+        </subsection>
+    </section>
+  </body>
+</document>

Propchange: pdfbox/site/src/site/xdoc/userguide/cookbook/creation.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: pdfbox/site/src/site/xdoc/userguide/fonts.xml
URL: http://svn.apache.org/viewvc/pdfbox/site/src/site/xdoc/userguide/fonts.xml?rev=924853&r1=924852&r2=924853&view=diff
==============================================================================
--- pdfbox/site/src/site/xdoc/userguide/fonts.xml (original)
+++ pdfbox/site/src/site/xdoc/userguide/fonts.xml Thu Mar 18 15:13:41 2010
@@ -20,7 +20,7 @@
     <title>PDFBox - PDF Fonts</title>
   </properties>
   <body>
-  	<section name="tandard 14 Fonts">
+  	<section name="Standard 14 Fonts">
   		<p>
   		The PDF specification states that a standard set of 14 fonts will always be available when consuming
   		PDF documents.  In PDFBox these are defined as constants in the PDType1Font class.
@@ -44,7 +44,7 @@
   		</table>
   	</section>
     <section name="TrueType Fonts">
-      <section name="Embedding TrueType Fonts">
+      <subsection name="Embedding TrueType Fonts">
       	<p>
       	PDFBox supports embedding TrueType fonts.  Loading a new font is easy.
       	</p>
@@ -52,8 +52,8 @@
       PDDocument doc = PDDocument.load( ... );
       PDFont font = PDTrueTypeFont.loadTTF( doc, new File( "SpecialFont.ttf" ) );</source>
 
-      </section>
-      <section name="External TrueType Fonts">
+      </subsection>
+      <subsection name="External TrueType Fonts">
       	<p>
       	While it is recommended to embed all fonts for greatest portability not all PDF producer applications
       	will do this.  When displaying a PDF it is necessary to find an external font to use.
@@ -63,7 +63,7 @@
       	font names to TTF font files.  The <i>UNKNOWN_FONT</i> property in that file will tell PDFBox which font
       	to use when no mapping exists.
       	</p>
-      </section>
+      </subsection>
     </section>
   </body>
 </document>

Modified: pdfbox/site/src/site/xdoc/userguide/highlighting.xml
URL: http://svn.apache.org/viewvc/pdfbox/site/src/site/xdoc/userguide/highlighting.xml?rev=924853&r1=924852&r2=924853&view=diff
==============================================================================
--- pdfbox/site/src/site/xdoc/userguide/highlighting.xml (original)
+++ pdfbox/site/src/site/xdoc/userguide/highlighting.xml Thu Mar 18 15:13:41 2010
@@ -16,10 +16,10 @@
  !-->
 <!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
 <document>
-  <properties>
+  <header>
     <title>PDFBox - PDF Highlighting</title>
     <meta name="keywords">Java PDF Library, highlight, highlight pdf, highlight pdf text, java</meta>
-  </properties>
+  </header>
   <body>
     <section name="Highlighting text in a PDF">
         <p>

Modified: pdfbox/site/src/site/xdoc/userguide/metadata.xml
URL: http://svn.apache.org/viewvc/pdfbox/site/src/site/xdoc/userguide/metadata.xml?rev=924853&r1=924852&r2=924853&view=diff
==============================================================================
--- pdfbox/site/src/site/xdoc/userguide/metadata.xml (original)
+++ pdfbox/site/src/site/xdoc/userguide/metadata.xml Thu Mar 18 15:13:41 2010
@@ -21,6 +21,32 @@
     <meta name="keywords">Java PDF Library, XMP, PDF metadata, pdfbox</meta>
   </properties>
   <body>
+  	<section name="Introduction">
+  		<p>PDF documents can contain information describing the document itself or certain objects within the document such as 
+  		the author of the document or it's creation date. Basic information can be set and retrieved using the 
+  		<a href="../apidocs/org/apache/pdfbox/pdmodel/PDDocumentInformation.html">PDDocumentInformation</a> object.
+  		</p>
+  		<p>In addition to that more metadata can be retrieved using the XML metadata as decribed below.</p>
+  	</section>
+    <section name="Getting basic Metadata">
+      <p>
+      To set or retrieve basic information about the document the PDDocumentInformation object provides a high level API to that information: 
+      </p>
+      <source>
+      PDDocumentInformation info = document.getDocumentInformation();
+      System.out.println( "Page Count=" + document.getNumberOfPages() );
+      System.out.println( "Title=" + info.getTitle() );
+      System.out.println( "Author=" + info.getAuthor() );
+      System.out.println( "Subject=" + info.getSubject() );
+      System.out.println( "Keywords=" + info.getKeywords() );
+      System.out.println( "Creator=" + info.getCreator() );
+      System.out.println( "Producer=" + info.getProducer() );
+      System.out.println( "Creation Date=" + info.getCreationDate() );
+      System.out.println( "Modification Date=" + info.getModificationDate());
+      System.out.println( "Trapped=" + info.getTrapped() );      
+      </source>
+	</section>  
+  
     <section name="Accessing PDF Metadata">
       <p>
       See class:<a href="../apidocs/org/apache/pdfbox/pdmodel/common/PDMetadata.html">org.apache.pdfbox.pdmodel.common.PDMetadata</a> <br/>

Added: pdfbox/site/src/site/xdoc/userguide/tutorials.xml
URL: http://svn.apache.org/viewvc/pdfbox/site/src/site/xdoc/userguide/tutorials.xml?rev=924853&view=auto
==============================================================================
--- pdfbox/site/src/site/xdoc/userguide/tutorials.xml (added)
+++ pdfbox/site/src/site/xdoc/userguide/tutorials.xml Thu Mar 18 15:13:41 2010
@@ -0,0 +1,64 @@
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one or more
+ ! contributor license agreements.  See the NOTICE file distributed with
+ ! this work for additional information regarding copyright ownership.
+ ! The ASF licenses this file to You under the Apache License, Version 2.0
+ ! (the "License"); you may not use this file except in compliance with
+ ! the License.  You may obtain a copy of the License at
+ !
+ !      http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing, software
+ ! distributed under the License is distributed on an "AS IS" BASIS,
+ ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ! See the License for the specific language governing permissions and
+ ! limitations under the License.
+ !-->
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
+<document>
+  <properties>
+    <title>PDFBox - Tutorials</title>
+  </properties>
+  <body>
+    <section name="Tutorials">
+      <p>The Tutorials for PDFBox provide a quick start into some of the most common uses cases for 
+      PDFBox as part of a PDF application.
+      In addition the <a href="cookbook.html">Cookbook</a> provides a wide range of samples including sample code. 
+      </p>
+      
+      <table>
+          <tr>
+      		<th>Tutorial</th>
+      		<th>Description</th>
+      	  </tr>
+      	  <tr>
+      	  	<td><a href="bookmarks.html">Bookmarks</a></td>
+      	  	<td>PDF Bookmarks allow for a quick navigation within documents similar to a Table of Contents. 
+      	  	The Tutorial covers how to read bookmarks as well as how to generate them.</td>
+      	  </tr>
+      	  <tr>
+      	  	<td><a href="file_references.html">File References</a></td>
+      	  	<td>A PDF file can contain references to external files as well as files embedded in the PDF itself.
+      	  	The Tutorial covers how handle such references as well as how to embed a file in a PDF document.</td>
+      	  </tr>
+      	  <tr>
+      	  	<td><a href="highlighting.html">Highlighting</a></td>
+      	  	<td>PDF allows for text to be highlighted. This can be useful for example to highlight results of a search.
+      	  	 There are different ways to approach this covered in the tutorial.</td>
+      	  </tr>
+      	  <tr>
+      	  	<td><a href="metadata.html">Metadata</a></td>
+      	  	<td>PDF documents can have metadata associated with the document or certain objects within the document. As an example 
+      	  	metadata can be used to store the author of a document as well as copyright information for an image embedded in the document.
+      	  	The Tutorial covers how to read and generate such metadata.</td>
+      	  </tr>
+      	  <tr>
+      	  	<td><a href="text_extraction.html">Text Extraction</a></td>
+      	  	<td>One of the main features of PDFBox is its ability to quickly and accurately extract text from a variety of PDF documents.
+      	  	The tutorial covers different approaches to handle that task.</td>
+      	  </tr>
+      </table>
+     
+    </section>
+  </body>
+</document>

Propchange: pdfbox/site/src/site/xdoc/userguide/tutorials.xml
------------------------------------------------------------------------------
    svn:eol-style = native