You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ni...@apache.org on 2014/03/19 19:34:05 UTC

svn commit: r1579344 - in /poi/site/publish/document: ./ docoverview.html index.html projectplan.html quick-guide.html

Author: nick
Date: Wed Mar 19 18:34:04 2014
New Revision: 1579344

URL: http://svn.apache.org/r1579344
Log:
Add the new document directory

Added:
    poi/site/publish/document/
    poi/site/publish/document/docoverview.html
    poi/site/publish/document/index.html
    poi/site/publish/document/projectplan.html
    poi/site/publish/document/quick-guide.html

Added: poi/site/publish/document/docoverview.html
URL: http://svn.apache.org/viewvc/poi/site/publish/document/docoverview.html?rev=1579344&view=auto
==============================================================================
--- poi/site/publish/document/docoverview.html (added)
+++ poi/site/publish/document/docoverview.html Wed Mar 19 18:34:04 2014
@@ -0,0 +1,302 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<!--*** This is a generated file.  Do not edit.  ***-->
+<link rel="stylesheet" href="../skin/tigris.css" type="text/css">
+<link rel="stylesheet" href="../skin/mysite.css" type="text/css">
+<link rel="stylesheet" href="../skin/site.css" type="text/css">
+<link media="print" rel="stylesheet" href="../skin/print.css" type="text/css">
+<title>Apache POI - HWPF - Java API to Handle Microsoft Word Files</title>
+</head>
+<body bgcolor="white" class="composite">
+<!--================= start Banner ==================-->
+<div id="banner">
+<table width="100%" cellpadding="8" cellspacing="0" summary="banner" border="0">
+<tbody>
+<tr>
+<!--================= start Group Logo ==================-->
+<td width="50%" align="left">
+<div class="groupLogo">
+<a href="http://poi.apache.org"><img border="0" class="logoImage" alt="Apache POI" src="../resources/images/group-logo.jpg"></a>
+</div>
+</td>
+<!--================= end Group Logo ==================-->
+<!--================= start Project Logo ==================--><td width="50%" align="right">
+<div align="right" class="projectLogo">
+<a href="http://poi.apache.org/"><img border="0" class="logoImage" alt="POI" src="../resources/images/project-logo.jpg"></a>
+</div>
+</td>
+<!--================= end Project Logo ==================-->
+</tr>
+</tbody>
+</table>
+</div>
+<!--================= end Banner ==================-->
+<!--================= start Main ==================-->
+<table width="100%" cellpadding="0" cellspacing="0" border="0" summary="nav" id="breadcrumbs">
+<tbody>
+<!--================= start Status ==================-->
+<tr class="status">
+<td>
+<!--================= start BreadCrumb ==================--><a href="http://www.apache.org/">Apache</a> | <a href="http://poi.apache.org/">POI</a><a href=""></a>
+<!--================= end BreadCrumb ==================--></td><td id="tabs">
+<!--================= start Tabs ==================-->
+<div class="tab">
+<span class="selectedTab"><a class="base-selected" href="../index.html">Home</a></span> | <script language="Javascript" type="text/javascript">
+function printit() {  
+if (window.print) {
+    window.print() ;  
+} else {
+    var WebBrowser = '<OBJECT ID="WebBrowser1" WIDTH="0" HEIGHT="0" CLASSID="CLSID:8856F961-340A-11D0-A96B-00C04FD705A2"></OBJECT>';
+document.body.insertAdjacentHTML('beforeEnd', WebBrowser);
+    WebBrowser1.ExecWB(6, 2);//Use a 1 vs. a 2 for a prompting dialog box    WebBrowser1.outerHTML = "";  
+}
+}
+</script><script language="Javascript" type="text/javascript">
+var NS = (navigator.appName == "Netscape");
+var VERSION = parseInt(navigator.appVersion);
+if (VERSION > 3) {
+    document.write('  <a title="PRINT this page OUT" href="javascript:printit()">PRINT</a>');
+}
+</script>
+</div>
+<!--================= end Tabs ==================-->
+</td>
+</tr>
+</tbody>
+</table>
+<!--================= end Status ==================-->
+<table id="main" width="100%" cellpadding="8" cellspacing="0" summary="" border="0">
+<tbody>
+<tr valign="top">
+<!--================= start Menu ==================-->
+<td id="leftcol">
+<div id="navcolumn">
+<div class="menuBar">
+<div class="menu">
+<span class="menuLabel">Apache POI</span>
+		
+<div class="menuItem">
+<a href="../index.html">Top</a>
+</div>
+	
+</div>
+<div class="menu">
+<span class="menuLabel">HWPF+XWPF</span>
+		
+<div class="menuItem">
+<a href="index.html">Overview</a>
+</div>
+		
+<div class="menuItem">
+<a href="quick-guide.html">Quick Guide</a>
+</div>
+		
+<div class="menuItem">
+<span class="menuSelected">HWPF Format</span>
+</div>
+		
+<div class="menuItem">
+<a href="projectplan.html">HWPF Project plan</a>
+</div>
+	
+</div>
+</div>
+</div>
+<form target="_blank" action="http://www.google.com/search" method="get">
+<table summary="search" border="0" cellspacing="0" cellpadding="0">
+<tr>
+<td><img height="1" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td><td nowrap="nowrap">
+                          Search Apache POI<br>
+<input value="poi.apache.org" name="sitesearch" type="hidden"><input size="10" name="q" id="query" type="text"><img height="1" width="5" alt="" src="../skin/images/spacer.gif" class="spacer"><input name="Search" value="GO" type="submit"></td><td><img height="1" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td>
+</tr>
+<tr>
+<td colspan="3"><img height="7" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td>
+</tr>
+<tr>
+<td class="bottom-left-thick"></td><td bgcolor="#a5b6c6"><img height="1" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td><td class="bottom-right-thick"></td>
+</tr>
+</table>
+</form>
+</td>
+<!--================= end Menu ==================-->
+<!--================= start Content ==================--><td>
+<div id="bodycol">
+<div class="app">
+<div align="center">
+<h1>Apache POI - HWPF - Java API to Handle Microsoft Word Files</h1>
+</div>
+<div class="h3">
+ 
+
+ 
+  
+<a name="The+Word+97+File+Format+in+semi-plain+English"></a>
+<div class="h3">
+<h3>The Word 97 File Format in semi-plain English</h3>
+</div>
+
+   
+<p>The purpose of this document is to give a brief high level overview of the
+      HWPF document format. This document does not go into in-depth technical
+      detail and is only meant as a supplement to the Microsoft Word 97-2007 
+      Binary File Format freely available from 
+      <a href="http://www.microsoft.com/interop/docs/officebinaryformats.mspx">Microsoft</a>.</p>
+   
+<p>The OLE file format is not discussed in this document. It is assumed that
+      the reader has a working knowledge of the POIFS API. </p>
+
+   
+<a name="Word+file+structure"></a>
+<div class="h4">
+<h4>Word file structure</h4>
+</div>
+    
+<p>A Word file is made up of the document text and data structures
+       containing formatting information about the text. Of course, this is a
+       very simplified illustration. There are fields and macros and other
+       things that have not been considered. At this stage, HWPF is mainly
+       concerned with formatted text.</p>
+   
+   
+<a name="Reading+Word+files"></a>
+<div class="h4">
+<h4>Reading Word files</h4>
+</div>
+    
+<p>The entry point for HWPF's reading of a Word file is the File Information
+       Block (FIB). This structure is the entry point for the locations and size
+       of a document's text and data structures. The FIB is located at the
+       beginning of the main stream.</p>
+    
+<a name="Text"></a>
+<div class="h2">
+<h2>Text</h2>
+</div>
+     
+<p>The document's text is also located in the main stream. Its starting
+        location is given as FIB.fcMin and its length is given in bytes by
+        FIB.ccpText. These two values are not very useful in getting the text
+        because of unicode. There may be unicode text intermingled with ASCII
+        text. That brings us to the piece table.</p>
+     
+<p>The piece table is used to divide the text into non-unicode and unicode
+        pieces. The size and offset are given in FIB.fcClx and FIB.lcbClx
+        respectively. The piece table may contain Property Modifiers (prm).
+        These are for complex(fast-saved) files and are skipped. Each text piece
+        contains offsets in the main stream that contain text for that piece.
+        If the piece uses unicode, the file offset is masked with a certain bit.
+        Then you have to unmask the bit and divide by 2 to get the real file
+        offset. </p>
+    
+    
+<a name="Text+Formatting"></a>
+<div class="h2">
+<h2>Text Formatting</h2>
+</div>
+     
+<a name="Stylesheet"></a>
+<div class="h5">
+<h5>Stylesheet</h5>
+</div>
+      
+<p>All text formatting is based on styles contained in the StyleSheet.
+         The StyleSheet is a data structure containing among other things, style
+         descriptions. Each style description can contain a paragraph style and
+         a character style or simply a character style. Each style description
+         is stored in a compressed version on file. Basically these are deltas
+         from another style.</p>
+      
+<p>Eventually, you have to chain back to the nil style which is an
+         imaginary style with certain implied values.</p>
+     
+     
+<a name="Paragraph+and+Character+styles"></a>
+<div class="h5">
+<h5>Paragraph and Character styles</h5>
+</div>
+      
+<p>Paragraph and Character formatting properties for a document's text are
+         stored on file as deltas from some base style in the Stylesheet. The
+         deltas are used to create a complete uncompressed style in memory.</p>
+      
+<p>Uncompressed paragraph styles are represented by the Pargraph
+         Properties(PAP) data structure. Uncompressed character styles are
+         represented by the Character Properties(CHP) data structure. The styles
+         for the document text are stored in compressed format in the
+         corresponding Formatted Disk Pages (FKP). A compressed PAP is referred
+         to as a PAPX and a compressed CHP is a CHPX. The FKP locations are
+         stored in the bin table. There are seperate bin tables for CHPXs and
+         PAPXs. The bin tables' locations and sizes are stored in the FIB.</p>
+      
+<p>A FKP is a 512 byte OLE page. It contains the offsets of the beginning
+         and end of each paragraph/character run in the main stream and the
+         compressed properties for that interval. The compessed PAPX is based on
+         its base style in the StyleSheet. The compressed CHPX is based on the
+         enclosing paragraph's base style in the Stylesheet.</p>
+     
+     
+<a name="Uncompressing+styles+and+other+data+structures"></a>
+<div class="h5">
+<h5>Uncompressing styles and other data structures</h5>
+</div>
+      
+<p>All compressed properties(CHPX, PAPX, SEPX) contain a grpprl. A grpprl
+         is an array of sprms. A sprm defines a delta from some base property.
+         There is a table of possible sprms in the Word 97 spec. Each sprm is a
+         two byte operand followed by a parameter. The parameter size depends on
+         the sprm. Each sprm describes an operation that should be performed on
+         the base style. After every sprm in the grpprl is performed on the base
+         style you will have the style for the paragraph, character run,
+         section, etc.</p>
+     
+    
+   
+  
+ 
+
+<div id="authors" align="right">by&nbsp;S. Ryan Ackley</div>
+</div>
+</div>
+</div>
+</td>
+<!--================= end Content ==================-->
+</tr>
+</tbody>
+</table>
+<!--================= end Main ==================-->
+<!--================= start Footer ==================-->
+<div id="footer">
+<table summary="footer" cellspacing="0" cellpadding="4" width="100%" border="0">
+<tbody>
+<tr>
+<!--================= start Copyright ==================-->
+<td colspan="2">
+<div align="center">
+<div class="copyright">
+              Copyright &copy; 2002-2012&nbsp;The Apache Software Foundation. All rights reserved.<br>
+              Apache POI, POI, Apache, the Apache feather logo, and the Apache 
+              POI project logo are trademarks of The Apache Software Foundation.
+            </div>
+</div>
+</td>
+<!--================= end Copyright ==================-->
+</tr>
+<tr>
+<td align="left">
+<!--================= start Host ==================-->
+<!--================= end Host ==================--></td><td align="right">
+<!--================= start Credits ==================-->
+<div align="right">
+<div class="credit"></div>
+</div>
+<!--================= end Credits ==================-->
+</td>
+</tr>
+</tbody>
+</table>
+</div>
+<!--================= end Footer ==================-->
+</body>
+</html>

Added: poi/site/publish/document/index.html
URL: http://svn.apache.org/viewvc/poi/site/publish/document/index.html?rev=1579344&view=auto
==============================================================================
--- poi/site/publish/document/index.html (added)
+++ poi/site/publish/document/index.html Wed Mar 19 18:34:04 2014
@@ -0,0 +1,392 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<!--*** This is a generated file.  Do not edit.  ***-->
+<link rel="stylesheet" href="../skin/tigris.css" type="text/css">
+<link rel="stylesheet" href="../skin/mysite.css" type="text/css">
+<link rel="stylesheet" href="../skin/site.css" type="text/css">
+<link media="print" rel="stylesheet" href="../skin/print.css" type="text/css">
+<title>Apache POI - HWPF and XWPF - Java API to Handle Microsoft Word Files</title>
+</head>
+<body bgcolor="white" class="composite">
+<!--================= start Banner ==================-->
+<div id="banner">
+<table width="100%" cellpadding="8" cellspacing="0" summary="banner" border="0">
+<tbody>
+<tr>
+<!--================= start Group Logo ==================-->
+<td width="50%" align="left">
+<div class="groupLogo">
+<a href="http://poi.apache.org"><img border="0" class="logoImage" alt="Apache POI" src="../resources/images/group-logo.jpg"></a>
+</div>
+</td>
+<!--================= end Group Logo ==================-->
+<!--================= start Project Logo ==================--><td width="50%" align="right">
+<div align="right" class="projectLogo">
+<a href="http://poi.apache.org/"><img border="0" class="logoImage" alt="POI" src="../resources/images/project-logo.jpg"></a>
+</div>
+</td>
+<!--================= end Project Logo ==================-->
+</tr>
+</tbody>
+</table>
+</div>
+<!--================= end Banner ==================-->
+<!--================= start Main ==================-->
+<table width="100%" cellpadding="0" cellspacing="0" border="0" summary="nav" id="breadcrumbs">
+<tbody>
+<!--================= start Status ==================-->
+<tr class="status">
+<td>
+<!--================= start BreadCrumb ==================--><a href="http://www.apache.org/">Apache</a> | <a href="http://poi.apache.org/">POI</a><a href=""></a>
+<!--================= end BreadCrumb ==================--></td><td id="tabs">
+<!--================= start Tabs ==================-->
+<div class="tab">
+<span class="selectedTab"><a class="base-selected" href="../index.html">Home</a></span> | <script language="Javascript" type="text/javascript">
+function printit() {  
+if (window.print) {
+    window.print() ;  
+} else {
+    var WebBrowser = '<OBJECT ID="WebBrowser1" WIDTH="0" HEIGHT="0" CLASSID="CLSID:8856F961-340A-11D0-A96B-00C04FD705A2"></OBJECT>';
+document.body.insertAdjacentHTML('beforeEnd', WebBrowser);
+    WebBrowser1.ExecWB(6, 2);//Use a 1 vs. a 2 for a prompting dialog box    WebBrowser1.outerHTML = "";  
+}
+}
+</script><script language="Javascript" type="text/javascript">
+var NS = (navigator.appName == "Netscape");
+var VERSION = parseInt(navigator.appVersion);
+if (VERSION > 3) {
+    document.write('  <a title="PRINT this page OUT" href="javascript:printit()">PRINT</a>');
+}
+</script>
+</div>
+<!--================= end Tabs ==================-->
+</td>
+</tr>
+</tbody>
+</table>
+<!--================= end Status ==================-->
+<table id="main" width="100%" cellpadding="8" cellspacing="0" summary="" border="0">
+<tbody>
+<tr valign="top">
+<!--================= start Menu ==================-->
+<td id="leftcol">
+<div id="navcolumn">
+<div class="menuBar">
+<div class="menu">
+<span class="menuLabel">Apache POI</span>
+<div class="menuItem">
+<a href="../index.html">Top</a>
+</div>
+</div>
+<div class="menu">
+<span class="menuLabel">HWPF+XWPF</span>
+<div class="menuItem">
+<span class="menuSelected">Overview</span>
+</div>
+<div class="menuItem">
+<a href="quick-guide.html">Quick Guide</a>
+</div>
+<div class="menuItem">
+<a href="docoverview.html">HWPF Format</a>
+</div>
+<div class="menuItem">
+<a href="projectplan.html">HWPF Project plan</a>
+</div>
+</div>
+</div>
+</div>
+<form target="_blank" action="http://www.google.com/search" method="get">
+<table summary="search" border="0" cellspacing="0" cellpadding="0">
+<tr>
+<td><img height="1" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td><td nowrap="nowrap">
+                          Search Apache POI<br>
+<input value="poi.apache.org" name="sitesearch" type="hidden"><input size="10" name="q" id="query" type="text"><img height="1" width="5" alt="" src="../skin/images/spacer.gif" class="spacer"><input name="Search" value="GO" type="submit"></td><td><img height="1" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td>
+</tr>
+<tr>
+<td colspan="3"><img height="7" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td>
+</tr>
+<tr>
+<td class="bottom-left-thick"></td><td bgcolor="#a5b6c6"><img height="1" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td><td class="bottom-right-thick"></td>
+</tr>
+</table>
+</form>
+</td>
+<!--================= end Menu ==================-->
+<!--================= start Content ==================--><td>
+<div id="bodycol">
+<div class="app">
+<div align="center">
+<h1>Apache POI - HWPF and XWPF - Java API to Handle Microsoft Word Files</h1>
+</div>
+<div class="h3">
+ 
+
+ 
+ 
+<a name="Overview"></a>
+<div class="h3">
+<h3>Overview</h3>
+</div>
+
+  
+<p>HWPF is the name of our port of the Microsoft Word 97(-2007) file format
+    to pure Java. It also provides limited read only support for the older
+    Word 6 and Word 95 file formats.</p>
+
+  
+<p>The partner to HWPF for the new Word 2007 .docx format is <em>XWPF</em>.
+    Whilst HWPF and XWPF provide similar features, there is not a common
+    interface across the two of them at this time.</p>
+
+  
+<p>Both HWPF and XWPF could be described as "moderately functional". For some
+    use cases, especially around text extraction, support is very strong. For
+    others, support may be limited or incomplete, and it may be necessary to
+    dig down into low-level code. Error checking may be missing in places,
+    so it may be possible to accidently generate invalid files. Enhancements
+    to fix such things are generally very well received!</p>
+
+  
+<p>As detailed in the <a href="/overview.html#components">Components 
+    Page</a>, HWPF is contained within the Scratchpad jar, while XWPF
+    is in the OOXML jar. You will need to ensure you include the appropriate
+    jars (and their dependencies!) in your classpath to use HWPF or XWPF.</p>
+
+   
+   
+<a name="An+overview+of+the+code"></a>
+<div class="h3">
+<h3>An overview of the code</h3>
+</div>
+    
+    
+<p>
+        Source code in the
+        <em>org.apache.poi.hdf</em>
+        tree is the old legacy code. Source in the
+        <em>org.apache.poi.hwpf.model</em>
+        tree is the old legacy code refactored into an new object model. Those packages contains
+        Java representation of internal Word format structure. This code is "internal", it shall not
+        be used by your code. Because of backward-compatibility some API still has references to
+        those packages. They are subject to be deprecated and removed. Code from
+        <em>org.apache.poi.hwpf.usermodel</em>
+        package is actual public and user-friendly (as much as possible) API to access document
+        parts. Source code in the
+        <em>org.apache.poi.hwpf.extractor</em>
+        tree is a wrapper of this to facilitate easy extraction of interesting things (eg the Text),
+        and
+        <em>org.apache.poi.hwpf.converter</em>
+        package contains Word-to-HTML and Word-to-FO converters (latest can be used to generate PDF
+        from Word files when using with
+        <a href="http://xmlgraphics.apache.org/fop/">Apache FOP</a>
+        ). Also there is a small file-structure-dumping utility in
+        <em>org.apache.poi.hwpf.dev</em>
+        package, primally for developing purposes.
+    </p>
+
+    
+<p>
+        The main entry point to HWPF is HWPFDocument. Currently it has a lot of references both to
+        internal interfaces (
+        <em>org.apache.poi.hwpf.model</em>
+        package) and public API (
+        <em>org.apache.poi.hwpf.usermodel</em>
+        ) package. It is possible that it will be split into two different interfaces (like WordFile
+        and WordDocument) in later versions.
+    </p>
+
+    
+<p>
+      The main entry point to XWPF is XWPFDocument. From there, you can get the
+      paragraphs, pictures, tables, sections, headers etc.
+    </p>
+    
+<p>
+      Currently, there are only a handful of example programs using HWPF and XWPF
+      available. They can be found in svn in the examples section, under
+      <a href="http://svn.apache.org/repos/asf/poi/trunk/src/examples/src/org/apache/poi/hwpf">HWPF</a>
+      and
+      <a href="http://svn.apache.org/repos/asf/poi/trunk/src/examples/src/org/apache/poi/xwpf">XWPF</a>.
+      Both HWPF and XWPF have fairly high levels of unit test coverage, which
+      provides examples of using the various areas of functionality of both
+      modules. These can be found in svn, under
+      <a href="http://svn.apache.org/repos/asf/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf">HWPF</a>
+      and
+      <a href="http://svn.apache.org/repos/asf/poi/trunk/src/ooxml/testcases/org/apache/poi/xwpf">XWPF</a>.
+      Contributions of more examples, whether inspired by the unit tests or
+      not, would be most welcomed!
+    </p>
+
+   
+   
+<a name="HWPF+Notes"></a>
+<div class="h3">
+<h3>HWPF Notes</h3>
+</div>
+    
+
+    
+<p>A .doc Word document, as handled by HWPF, can be considered as very long single 
+        text buffer. The HWPF API provides "pointers"
+        to document parts, like sections, paragraphs and character runs. Usually user will iterates
+        over main document part sections, paragraphs from sections and character runs from
+        paragraph. Each such interface is a pointer to document text subrange along with additional
+        properties (and they all extends same Range parent class). There is additional Range
+        implementations like Table, TableRow, TableCell, etc. Some structures like Bookmark or Field
+        can also provide subranges pointers.
+    </p>
+
+    
+<p>Changing file content usually requires a lot of synchronized changes in those structures like
+        updating property boundaries, position handlers, etc. Because of that HWPF API shall be
+        considered as not thread safe. In addition, there is a "one pointer" rule for changing
+        content. It means you should not use two different Range instances at one time. More
+        precisely, if you are changing file content using some range pointer, all other range
+        pointers except parents' ones become invalid. For example if you obtain overall range (1),
+        paragraph range (2) from overall range and character run range (3) from paragraph range and
+        change text of paragraph, character run range is now invalid and should not be used, but
+        overall range pointer still valid. Each time you obtaining range (pointer) new instance is
+        created. It means if you obtained two range pointers and changed document text using first
+        range pointer, second one became invalid.
+    </p>
+
+   
+   
+<a name="XWPF+Patches+Required%21"></a>
+<div class="h3">
+<h3>XWPF Patches Required!</h3>
+</div>
+    
+
+    
+<p>At the moment, XWPF covers many common use cases for reading and writing
+     .docx files. Whilst this is a great thing, it does mean that XWPF does
+     everything that the current POI committers need it to do, and so none of
+     the committers are actively adding new features.</p>
+
+    
+<p>If you come across a feature in XWPF that you need, and isn't currently 
+     there, please do send in a patch to add the extra functionality! More details
+     on contributing patches are available on the <a href="../guidelines.html">"Contribution to POI" page</a>.</p>
+   
+
+   
+<a name="HWPF+Patches+Required%21"></a>
+<div class="h3">
+<h3>HWPF Patches Required!</h3>
+</div>
+    
+
+    
+<p>At the moment we unfortunately do not have someone taking care for HWPF
+     and fostering its development. What we need is someone to stand up, take
+     this thing under his hood as his baby and push it forward. Ryan Ackley,
+     who put a lot of effort into HWPF, is no longer on board, so HWPF is an
+     orphan child waiting to be adopted.</p>
+
+    
+<p>If <strong>you</strong> are interested in becoming the new HWPF
+     pointman, you should look into the Microsoft Word internals. A good
+     starting point seems to be Ryan Ackley's  <a href="docoverview.html">overview</a>. Full details on the word format
+     is available from 
+     <a href="http://www.microsoft.com/interop/docs/OfficeBinaryFormats.mspx">Microsoft</a>,
+     but the documentation can be a little hard to get into at first... Try reading the
+     <a href="docoverview.html">overview</a> first, and looking at the existing
+     code, then finally look up the documentation for specific missing features.</p>
+
+    
+<p>As a first step you should familiarize yourself with the source code,
+     examples, test cases, and the HWPF patches available at <a href="http://issues.apache.org/">Bugzilla</a> (if any). Then you
+     should compile an overview of</p>
+
+    
+<ul>
+     
+<li>the current HWPF status,</li>
+     
+<li>the patches in <a href="http://issues.apache.org/bugzilla/">Bugzilla</a> to be checked
+      in (and those that should better be ditched),</li>
+     
+<li>the available test cases and the test cases still to be written,</li>
+     
+<li>the available documentation and the docs to be written,</li>
+     
+<li>anything else that seems reasonable</li>
+    
+</ul>
+
+    
+<p>When you start coding, you will not yet have write access to the
+     SVN repository. Please submit your patches to <a href="http://issues.apache.org/">Bugzilla</a> and nag <a href="mailto:dev@poi.apache.org">the dev list</a> until someone commits
+     them. Besides the actual checking in of HWPF patches, current POI
+     committers will also do some minor reviews now and then of your source code 
+     patches, test cases and documentation to help ensure software quality. But 
+     most of the time you will be on your own. However, anyone offering useful
+     contributions over a period of time will be offered committership!</p>
+
+    
+<p>Please do not forget to write <a href="http://www.junit.org/">JUnit</a> test cases and documentation!
+     We won't accept code that doesn't come with test cases. And please
+     consider that other contributors should be able to understand your source
+     code easily. If you need any help getting started with JUnit test cases
+     for HWPF, please ask on the developers' mailing list! If you show that you
+     are prepared to stick at it you will most likely be given SVN commit
+     access. See <a href="../guidelines.html">"Contribution to POI" page</a>
+     for more details and help getting started.</p>
+
+    
+<p>Of course we will help you as best as we can. However, presently there
+     is no committer who is really familiar with the Word format, so you'll be
+     mostly on your own. We are looking forward for you and your contributions!
+     Honor and glory of becoming a POI committer are waiting!</p>
+   
+ 
+
+<div id="authors" align="right">by&nbsp;Nicola Ken Barozzi,&nbsp;Andrew C. Oliver,&nbsp;Ryan Ackley,&nbsp;Rainer Klute</div>
+</div>
+</div>
+</div>
+</td>
+<!--================= end Content ==================-->
+</tr>
+</tbody>
+</table>
+<!--================= end Main ==================-->
+<!--================= start Footer ==================-->
+<div id="footer">
+<table summary="footer" cellspacing="0" cellpadding="4" width="100%" border="0">
+<tbody>
+<tr>
+<!--================= start Copyright ==================-->
+<td colspan="2">
+<div align="center">
+<div class="copyright">
+              Copyright &copy; 2002-2012&nbsp;The Apache Software Foundation. All rights reserved.<br>
+              Apache POI, POI, Apache, the Apache feather logo, and the Apache 
+              POI project logo are trademarks of The Apache Software Foundation.
+            </div>
+</div>
+</td>
+<!--================= end Copyright ==================-->
+</tr>
+<tr>
+<td align="left">
+<!--================= start Host ==================-->
+<!--================= end Host ==================--></td><td align="right">
+<!--================= start Credits ==================-->
+<div align="right">
+<div class="credit">
+<a href="http://validator.w3.org/check/referer"><img width="88" height="31" alt="Valid HTML 4.01!" src="../skin/images/valid-html401.png" class="logoImage"></a><a href="http://jigsaw.w3.org/css-validator/"><img width="88" height="31" alt="Valid CSS!" src="../skin/images/vcss.png" class="logoImage"></a><a href="http://forrest.apache.org/"><img border="0" class="logoImage" alt="Built with Apache Forrest" src="../skin/images/built-with-forrest-button.png" width="88" height="31"></a>
+</div>
+</div>
+<!--================= end Credits ==================-->
+</td>
+</tr>
+</tbody>
+</table>
+</div>
+<!--================= end Footer ==================-->
+</body>
+</html>

Added: poi/site/publish/document/projectplan.html
URL: http://svn.apache.org/viewvc/poi/site/publish/document/projectplan.html?rev=1579344&view=auto
==============================================================================
--- poi/site/publish/document/projectplan.html (added)
+++ poi/site/publish/document/projectplan.html Wed Mar 19 18:34:04 2014
@@ -0,0 +1,635 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<!--*** This is a generated file.  Do not edit.  ***-->
+<link rel="stylesheet" href="../skin/tigris.css" type="text/css">
+<link rel="stylesheet" href="../skin/mysite.css" type="text/css">
+<link rel="stylesheet" href="../skin/site.css" type="text/css">
+<link media="print" rel="stylesheet" href="../skin/print.css" type="text/css">
+<title>Apache POI - HWPF - Java API to Handle Microsoft Word Files</title>
+</head>
+<body bgcolor="white" class="composite">
+<!--================= start Banner ==================-->
+<div id="banner">
+<table width="100%" cellpadding="8" cellspacing="0" summary="banner" border="0">
+<tbody>
+<tr>
+<!--================= start Group Logo ==================-->
+<td width="50%" align="left">
+<div class="groupLogo">
+<a href="http://poi.apache.org"><img border="0" class="logoImage" alt="Apache POI" src="../resources/images/group-logo.jpg"></a>
+</div>
+</td>
+<!--================= end Group Logo ==================-->
+<!--================= start Project Logo ==================--><td width="50%" align="right">
+<div align="right" class="projectLogo">
+<a href="http://poi.apache.org/"><img border="0" class="logoImage" alt="POI" src="../resources/images/project-logo.jpg"></a>
+</div>
+</td>
+<!--================= end Project Logo ==================-->
+</tr>
+</tbody>
+</table>
+</div>
+<!--================= end Banner ==================-->
+<!--================= start Main ==================-->
+<table width="100%" cellpadding="0" cellspacing="0" border="0" summary="nav" id="breadcrumbs">
+<tbody>
+<!--================= start Status ==================-->
+<tr class="status">
+<td>
+<!--================= start BreadCrumb ==================--><a href="http://www.apache.org/">Apache</a> | <a href="http://poi.apache.org/">POI</a><a href=""></a>
+<!--================= end BreadCrumb ==================--></td><td id="tabs">
+<!--================= start Tabs ==================-->
+<div class="tab">
+<span class="selectedTab"><a class="base-selected" href="../index.html">Home</a></span> | <script language="Javascript" type="text/javascript">
+function printit() {  
+if (window.print) {
+    window.print() ;  
+} else {
+    var WebBrowser = '<OBJECT ID="WebBrowser1" WIDTH="0" HEIGHT="0" CLASSID="CLSID:8856F961-340A-11D0-A96B-00C04FD705A2"></OBJECT>';
+document.body.insertAdjacentHTML('beforeEnd', WebBrowser);
+    WebBrowser1.ExecWB(6, 2);//Use a 1 vs. a 2 for a prompting dialog box    WebBrowser1.outerHTML = "";  
+}
+}
+</script><script language="Javascript" type="text/javascript">
+var NS = (navigator.appName == "Netscape");
+var VERSION = parseInt(navigator.appVersion);
+if (VERSION > 3) {
+    document.write('  <a title="PRINT this page OUT" href="javascript:printit()">PRINT</a>');
+}
+</script>
+</div>
+<!--================= end Tabs ==================-->
+</td>
+</tr>
+</tbody>
+</table>
+<!--================= end Status ==================-->
+<table id="main" width="100%" cellpadding="8" cellspacing="0" summary="" border="0">
+<tbody>
+<tr valign="top">
+<!--================= start Menu ==================-->
+<td id="leftcol">
+<div id="navcolumn">
+<div class="menuBar">
+<div class="menu">
+<span class="menuLabel">Apache POI</span>
+		
+<div class="menuItem">
+<a href="../index.html">Top</a>
+</div>
+	
+</div>
+<div class="menu">
+<span class="menuLabel">HWPF+XWPF</span>
+		
+<div class="menuItem">
+<a href="index.html">Overview</a>
+</div>
+		
+<div class="menuItem">
+<a href="quick-guide.html">Quick Guide</a>
+</div>
+		
+<div class="menuItem">
+<a href="docoverview.html">HWPF Format</a>
+</div>
+		
+<div class="menuItem">
+<span class="menuSelected">HWPF Project plan</span>
+</div>
+	
+</div>
+</div>
+</div>
+<form target="_blank" action="http://www.google.com/search" method="get">
+<table summary="search" border="0" cellspacing="0" cellpadding="0">
+<tr>
+<td><img height="1" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td><td nowrap="nowrap">
+                          Search Apache POI<br>
+<input value="poi.apache.org" name="sitesearch" type="hidden"><input size="10" name="q" id="query" type="text"><img height="1" width="5" alt="" src="../skin/images/spacer.gif" class="spacer"><input name="Search" value="GO" type="submit"></td><td><img height="1" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td>
+</tr>
+<tr>
+<td colspan="3"><img height="7" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td>
+</tr>
+<tr>
+<td class="bottom-left-thick"></td><td bgcolor="#a5b6c6"><img height="1" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td><td class="bottom-right-thick"></td>
+</tr>
+</table>
+</form>
+</td>
+<!--================= end Menu ==================-->
+<!--================= start Content ==================--><td>
+<div id="bodycol">
+<div class="app">
+<div align="center">
+<h1>Apache POI - HWPF - Java API to Handle Microsoft Word Files</h1>
+</div>
+<div class="h3">
+ 
+	
+		
+<p>HWPF Milestones</p>
+		
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+			
+<tr class="b">
+				
+<th colspan="1" rowspan="1">
+					Milestones
+				</th>
+				<th colspan="1" rowspan="1">
+					Target Date
+				</th>
+				<th colspan="1" rowspan="1">
+					Owner
+				</th>
+			
+</tr>
+			
+<tr class="a">
+				
+<td colspan="1" rowspan="1">
+					Read in a Word document
+with minimum formatting
+(no lists, tables, footnotes,
+endnotes, headers, footers)
+and write it back out with the
+result viewable in Word
+97/2000
+				</td>
+				<td colspan="1" rowspan="1">
+					07/11/2003
+				</td>
+				<td colspan="1" rowspan="1">
+					Ryan
+				</td>
+			
+</tr>			
+			
+<tr class="b">
+				
+<td colspan="1" rowspan="1">
+					Add support for Lists and
+Tables
+				</td>
+				<td colspan="1" rowspan="1">
+					8/15/2003
+				</td>
+				<td colspan="1" rowspan="1">
+					&nbsp;
+				</td>
+			
+</tr>
+			
+<tr class="a">
+				
+<td colspan="1" rowspan="1">
+					HWPF 1.0-alpha release with
+documentation and examples
+				</td>
+				<td colspan="1" rowspan="1">
+					8/18/2003
+				</td>
+				<td colspan="1" rowspan="1">
+					Praveen/Ryan
+				</td>
+			
+</tr>
+			
+<tr class="b">
+				
+<td colspan="1" rowspan="1">
+					Add support for Headers,
+Footers, endnotes, and
+footnotes 
+				</td>
+				<td colspan="1" rowspan="1">
+					8/31/2003
+				</td>
+				<td colspan="1" rowspan="1">
+					?
+				</td>
+			
+</tr>
+			
+<tr class="a">
+				
+<td colspan="1" rowspan="1">
+					Add support for forms and
+mail merge
+				</td>
+				<td colspan="1" rowspan="1">
+					September/October 2003
+				</td>
+				<td colspan="1" rowspan="1">
+					?
+				</td>
+			
+</tr>
+		
+</table>
+		
+<p>HWPF Task Lists</p>
+		
+<p>Read in a Word document with minimum formatting (no lists, tables, footnotes,
+endnotes, headers, footers) and write it back out with the result viewable in Word 97/2000</p>
+		
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+			
+<tr class="b">
+				
+<th colspan="1" rowspan="1">
+					Task
+				</th>
+				<th colspan="1" rowspan="1">
+					Target Date
+				</th>
+				<th colspan="1" rowspan="1">
+					Owner
+				</th>
+			
+</tr>
+			
+<tr class="a">
+				
+<td colspan="1" rowspan="1">
+					Create classes to read and
+write low level data
+structures with test cases
+				</td>
+				<td colspan="1" rowspan="1">
+					7/10/2003
+				</td>
+				<td colspan="1" rowspan="1">
+					Ryan
+				</td>
+			
+</tr>
+			
+<tr class="b">
+				
+<td colspan="1" rowspan="1">
+					Create classes to read and
+write FontTable and Font
+names with test case
+				</td>
+				<td colspan="1" rowspan="1">
+					7/10/2003
+				</td>
+				<td colspan="1" rowspan="1">
+					Praveen
+				</td>
+			
+</tr>
+			
+<tr class="a">
+				
+<td colspan="1" rowspan="1">
+					Final test
+				</td>
+				<td colspan="1" rowspan="1">
+					7/11/2003
+				</td>
+				<td colspan="1" rowspan="1">
+					Ryan
+				</td>
+			
+</tr>
+		
+</table>
+		
+<p>Develop user friendly API so it is fun and easy to read and write word documents
+with java.</p>
+		
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+			
+<tr class="b">
+				
+<th colspan="1" rowspan="1">
+					Task
+				</th>
+				<th colspan="1" rowspan="1">
+					Target Date
+				</th>
+				<th colspan="1" rowspan="1">
+					Owner
+				</th>
+			
+</tr>
+			
+<tr class="a">
+				
+<td colspan="1" rowspan="1">
+					Develop a way for SPRMS to
+be compressed and
+uncompressed
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+			
+</tr>
+			
+<tr class="b">
+				
+<td colspan="1" rowspan="1">
+					Override CHPAbstractType
+with a concrete class that
+exposes attributes with
+human readable names
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+			
+</tr>
+			
+<tr class="a">
+				
+<td colspan="1" rowspan="1">
+					Override PAPAbstractType
+with a concrete class that
+exposes attributes with
+human readable names
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+			
+</tr>
+			
+<tr class="b">
+				
+<td colspan="1" rowspan="1">
+					Override SEPAbstractType
+with a concrete class that
+exposes attributes with
+human readable names
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+			
+</tr>
+			
+<tr class="a">
+				
+<td colspan="1" rowspan="1">
+					Override DOPAbstractType
+with a concrete class that
+exposes attributes with
+human readable names
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+			
+</tr>
+			
+<tr class="b">
+				
+<td colspan="1" rowspan="1">
+					Override TAPAbstractType
+with a concrete class that
+exposes attributes with
+human readable names
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+			
+</tr>
+			
+<tr class="a">
+				
+<td colspan="1" rowspan="1">
+					Override TCAbstractType
+with a concrete class that
+exposes attributes with
+human readable names
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+			
+</tr>
+			
+<tr class="b">
+				
+<td colspan="1" rowspan="1">
+					Develop a VerifyIntegrity
+class for testing so it is easy
+to determine if a Word
+Document is well-formed.
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+			
+</tr>
+			
+<tr class="a">
+				
+<td colspan="1" rowspan="1">
+					Develop general intuitive
+API to tie everything together
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+			
+</tr>
+		
+</table>
+		
+<p>Add support for lists and tables</p>
+		
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+			
+<tr class="b">
+				
+<th colspan="1" rowspan="1">
+					Task
+				</th>
+				<th colspan="1" rowspan="1">
+					Target Date
+				</th>
+				<th colspan="1" rowspan="1">
+					Owner
+				</th>
+			
+</tr>
+			
+<tr class="a">
+				
+<td colspan="1" rowspan="1">
+					Add data structures for
+reading and writing list data
+with test cases.
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+			
+</tr>
+			
+<tr class="b">
+				
+<td colspan="1" rowspan="1">
+					Add data structures for
+reading and writing tables
+with test cases.
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+			
+</tr>
+		
+</table>
+		
+<p>HWPF 1.0-alpha release with documentation and examples</p>
+		
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+			
+<tr class="b">
+				
+<th colspan="1" rowspan="1">
+					Task
+				</th>
+				<th colspan="1" rowspan="1">
+					Target Date
+				</th>
+				<th colspan="1" rowspan="1">
+					Owner
+				</th>
+			
+</tr>
+			
+<tr class="a">
+				
+<td colspan="1" rowspan="1">
+					Document the user model
+API
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+			
+</tr>
+			
+<tr class="b">
+				
+<td colspan="1" rowspan="1">
+					Document the low level
+classes
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+			
+</tr>
+			
+<tr class="a">
+				
+<td colspan="1" rowspan="1">
+					Come up with detailed How-To&rsquo;s
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+				<td colspan="1" rowspan="1">
+					
+				</td>
+			
+</tr>
+		
+</table>
+	
+
+<div id="authors" align="right">by&nbsp;Ryan Ackley</div>
+</div>
+</div>
+</div>
+</td>
+<!--================= end Content ==================-->
+</tr>
+</tbody>
+</table>
+<!--================= end Main ==================-->
+<!--================= start Footer ==================-->
+<div id="footer">
+<table summary="footer" cellspacing="0" cellpadding="4" width="100%" border="0">
+<tbody>
+<tr>
+<!--================= start Copyright ==================-->
+<td colspan="2">
+<div align="center">
+<div class="copyright">
+              Copyright &copy; 2002-2012&nbsp;The Apache Software Foundation. All rights reserved.<br>
+              Apache POI, POI, Apache, the Apache feather logo, and the Apache 
+              POI project logo are trademarks of The Apache Software Foundation.
+            </div>
+</div>
+</td>
+<!--================= end Copyright ==================-->
+</tr>
+<tr>
+<td align="left">
+<!--================= start Host ==================-->
+<!--================= end Host ==================--></td><td align="right">
+<!--================= start Credits ==================-->
+<div align="right">
+<div class="credit"></div>
+</div>
+<!--================= end Credits ==================-->
+</td>
+</tr>
+</tbody>
+</table>
+</div>
+<!--================= end Footer ==================-->
+</body>
+</html>

Added: poi/site/publish/document/quick-guide.html
URL: http://svn.apache.org/viewvc/poi/site/publish/document/quick-guide.html?rev=1579344&view=auto
==============================================================================
--- poi/site/publish/document/quick-guide.html (added)
+++ poi/site/publish/document/quick-guide.html Wed Mar 19 18:34:04 2014
@@ -0,0 +1,259 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<!--*** This is a generated file.  Do not edit.  ***-->
+<link rel="stylesheet" href="../skin/tigris.css" type="text/css">
+<link rel="stylesheet" href="../skin/mysite.css" type="text/css">
+<link rel="stylesheet" href="../skin/site.css" type="text/css">
+<link media="print" rel="stylesheet" href="../skin/print.css" type="text/css">
+<title>POI-HWPF - A Quick Guide</title>
+</head>
+<body bgcolor="white" class="composite">
+<!--================= start Banner ==================-->
+<div id="banner">
+<table width="100%" cellpadding="8" cellspacing="0" summary="banner" border="0">
+<tbody>
+<tr>
+<!--================= start Group Logo ==================-->
+<td width="50%" align="left">
+<div class="groupLogo">
+<a href="http://poi.apache.org"><img border="0" class="logoImage" alt="Apache POI" src="../resources/images/group-logo.jpg"></a>
+</div>
+</td>
+<!--================= end Group Logo ==================-->
+<!--================= start Project Logo ==================--><td width="50%" align="right">
+<div align="right" class="projectLogo">
+<a href="http://poi.apache.org/"><img border="0" class="logoImage" alt="POI" src="../resources/images/project-logo.jpg"></a>
+</div>
+</td>
+<!--================= end Project Logo ==================-->
+</tr>
+</tbody>
+</table>
+</div>
+<!--================= end Banner ==================-->
+<!--================= start Main ==================-->
+<table width="100%" cellpadding="0" cellspacing="0" border="0" summary="nav" id="breadcrumbs">
+<tbody>
+<!--================= start Status ==================-->
+<tr class="status">
+<td>
+<!--================= start BreadCrumb ==================--><a href="http://www.apache.org/">Apache</a> | <a href="http://poi.apache.org/">POI</a><a href=""></a>
+<!--================= end BreadCrumb ==================--></td><td id="tabs">
+<!--================= start Tabs ==================-->
+<div class="tab">
+<span class="selectedTab"><a class="base-selected" href="../index.html">Home</a></span> | <script language="Javascript" type="text/javascript">
+function printit() {  
+if (window.print) {
+    window.print() ;  
+} else {
+    var WebBrowser = '<OBJECT ID="WebBrowser1" WIDTH="0" HEIGHT="0" CLASSID="CLSID:8856F961-340A-11D0-A96B-00C04FD705A2"></OBJECT>';
+document.body.insertAdjacentHTML('beforeEnd', WebBrowser);
+    WebBrowser1.ExecWB(6, 2);//Use a 1 vs. a 2 for a prompting dialog box    WebBrowser1.outerHTML = "";  
+}
+}
+</script><script language="Javascript" type="text/javascript">
+var NS = (navigator.appName == "Netscape");
+var VERSION = parseInt(navigator.appVersion);
+if (VERSION > 3) {
+    document.write('  <a title="PRINT this page OUT" href="javascript:printit()">PRINT</a>');
+}
+</script>
+</div>
+<!--================= end Tabs ==================-->
+</td>
+</tr>
+</tbody>
+</table>
+<!--================= end Status ==================-->
+<table id="main" width="100%" cellpadding="8" cellspacing="0" summary="" border="0">
+<tbody>
+<tr valign="top">
+<!--================= start Menu ==================-->
+<td id="leftcol">
+<div id="navcolumn">
+<div class="menuBar">
+<div class="menu">
+<span class="menuLabel">Apache POI</span>
+		
+<div class="menuItem">
+<a href="../index.html">Top</a>
+</div>
+	
+</div>
+<div class="menu">
+<span class="menuLabel">HWPF+XWPF</span>
+		
+<div class="menuItem">
+<a href="index.html">Overview</a>
+</div>
+		
+<div class="menuItem">
+<span class="menuSelected">Quick Guide</span>
+</div>
+		
+<div class="menuItem">
+<a href="docoverview.html">HWPF Format</a>
+</div>
+		
+<div class="menuItem">
+<a href="projectplan.html">HWPF Project plan</a>
+</div>
+	
+</div>
+</div>
+</div>
+<form target="_blank" action="http://www.google.com/search" method="get">
+<table summary="search" border="0" cellspacing="0" cellpadding="0">
+<tr>
+<td><img height="1" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td><td nowrap="nowrap">
+                          Search Apache POI<br>
+<input value="poi.apache.org" name="sitesearch" type="hidden"><input size="10" name="q" id="query" type="text"><img height="1" width="5" alt="" src="../skin/images/spacer.gif" class="spacer"><input name="Search" value="GO" type="submit"></td><td><img height="1" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td>
+</tr>
+<tr>
+<td colspan="3"><img height="7" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td>
+</tr>
+<tr>
+<td class="bottom-left-thick"></td><td bgcolor="#a5b6c6"><img height="1" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td><td class="bottom-right-thick"></td>
+</tr>
+</table>
+</form>
+</td>
+<!--================= end Menu ==================-->
+<!--================= start Content ==================--><td>
+<div id="bodycol">
+<div class="app">
+<div align="center">
+<h1>POI-HWPF - A Quick Guide</h1>
+</div>
+<div class="h3">
+    
+
+    
+		
+<p>HWPF is still in early development. It is in the <a href="http://svn.apache.org/viewcvs.cgi/poi/trunk/src/scratchpad/">
+		scratchpad section of the SVN.</a> You will need to ensure you
+		either have a recent SVN checkout, or a recent SVN nightly build
+		(including the scratchpad jar!)</p>
+
+        
+<a name="Basic+Text+Extraction"></a>
+<div class="h3">
+<h3>Basic Text Extraction</h3>
+</div>
+        
+<p>For basic text extraction, make use of 
+<span class="codefrag">org.apache.poi.hwpf.extractor.WordExtractor</span>. It accepts an input
+stream or a <span class="codefrag">HWPFDocument</span>. The <span class="codefrag">getText()</span> 
+method can be used to 
+get the text from all the paragraphs, or <span class="codefrag">getParagraphText()</span>
+can be used to fetch the text from each paragraph in turn. The other
+option is <span class="codefrag">getTextFromPieces()</span>, which is very fast, but
+tends to return things that aren't text from the page. YMMV.
+		</p>
+		
+		
+		
+<a name="Specific+Text+Extraction"></a>
+<div class="h3">
+<h3>Specific Text Extraction</h3>
+</div>
+		
+<p>To get specific bits of text, first create a 
+<span class="codefrag">org.apache.poi.hwpf.HWPFDocument</span>. Fetch the range 
+with <span class="codefrag">getRange()</span>, then get paragraphs from that. You
+can then get text and other properties.
+		</p>
+		
+		
+		
+<a name="Headers+and+Footers"></a>
+<div class="h3">
+<h3>Headers and Footers</h3>
+</div>
+		
+<p>To get at the headers and footers of a word document, first create a
+<span class="codefrag">org.apache.poi.hwpf.HWPFDocument</span>. Next, you need to create a
+<span class="codefrag">org.apache.poi.hwpf.usermodel.HeaderStores</span>, passing it your
+HWPFDocument. Finally, the HeaderStores gives you access to the headers and
+footers, including first / even / odd page ones if defined in your
+document. Additionally, HeaderStores provides a method for removing
+any macros in the text, which is helpful as many headers and footers
+do end up with macros in them.</p>
+		
+		
+		
+<a name="Changing+Text"></a>
+<div class="h3">
+<h3>Changing Text</h3>
+</div>
+		
+<p>It is possible to change the text via 
+		<span class="codefrag">insertBefore()</span> and <span class="codefrag">insertAfter()</span>
+		on a <span class="codefrag">Range</span> object (either a <span class="codefrag">Range</span>,
+		<span class="codefrag">Paragraph</span> or <span class="codefrag">CharacterRun</span>).
+		It is also possible to delete a <span class="codefrag">Range</span>.
+		This code will work in many, but not all cases, and patches to
+        improve it are gratefully received!
+		</p>
+		
+
+		
+<a name="Further+Examples"></a>
+<div class="h3">
+<h3>Further Examples</h3>
+</div>
+		
+<p>For now, the best source of additional examples is in the unit 
+		tests. <a href="http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/">
+		Browse the HWPF unit tests.</a>
+		
+</p>
+		
+	
+
+<div id="authors" align="right">by&nbsp;Nick Burch</div>
+</div>
+</div>
+</div>
+</td>
+<!--================= end Content ==================-->
+</tr>
+</tbody>
+</table>
+<!--================= end Main ==================-->
+<!--================= start Footer ==================-->
+<div id="footer">
+<table summary="footer" cellspacing="0" cellpadding="4" width="100%" border="0">
+<tbody>
+<tr>
+<!--================= start Copyright ==================-->
+<td colspan="2">
+<div align="center">
+<div class="copyright">
+              Copyright &copy; 2002-2012&nbsp;The Apache Software Foundation. All rights reserved.<br>
+              Apache POI, POI, Apache, the Apache feather logo, and the Apache 
+              POI project logo are trademarks of The Apache Software Foundation.
+            </div>
+</div>
+</td>
+<!--================= end Copyright ==================-->
+</tr>
+<tr>
+<td align="left">
+<!--================= start Host ==================-->
+<!--================= end Host ==================--></td><td align="right">
+<!--================= start Credits ==================-->
+<div align="right">
+<div class="credit"></div>
+</div>
+<!--================= end Credits ==================-->
+</td>
+</tr>
+</tbody>
+</table>
+</div>
+<!--================= end Footer ==================-->
+</body>
+</html>



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org