You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ni...@apache.org on 2014/03/19 19:34:05 UTC
svn commit: r1579344 - in /poi/site/publish/document: ./ docoverview.html
index.html projectplan.html quick-guide.html
Author: nick
Date: Wed Mar 19 18:34:04 2014
New Revision: 1579344
URL: http://svn.apache.org/r1579344
Log:
Add the new document directory
Added:
poi/site/publish/document/
poi/site/publish/document/docoverview.html
poi/site/publish/document/index.html
poi/site/publish/document/projectplan.html
poi/site/publish/document/quick-guide.html
Added: poi/site/publish/document/docoverview.html
URL: http://svn.apache.org/viewvc/poi/site/publish/document/docoverview.html?rev=1579344&view=auto
==============================================================================
--- poi/site/publish/document/docoverview.html (added)
+++ poi/site/publish/document/docoverview.html Wed Mar 19 18:34:04 2014
@@ -0,0 +1,302 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<!--*** This is a generated file. Do not edit. ***-->
+<link rel="stylesheet" href="../skin/tigris.css" type="text/css">
+<link rel="stylesheet" href="../skin/mysite.css" type="text/css">
+<link rel="stylesheet" href="../skin/site.css" type="text/css">
+<link media="print" rel="stylesheet" href="../skin/print.css" type="text/css">
+<title>Apache POI - HWPF - Java API to Handle Microsoft Word Files</title>
+</head>
+<body bgcolor="white" class="composite">
+<!--================= start Banner ==================-->
+<div id="banner">
+<table width="100%" cellpadding="8" cellspacing="0" summary="banner" border="0">
+<tbody>
+<tr>
+<!--================= start Group Logo ==================-->
+<td width="50%" align="left">
+<div class="groupLogo">
+<a href="http://poi.apache.org"><img border="0" class="logoImage" alt="Apache POI" src="../resources/images/group-logo.jpg"></a>
+</div>
+</td>
+<!--================= end Group Logo ==================-->
+<!--================= start Project Logo ==================--><td width="50%" align="right">
+<div align="right" class="projectLogo">
+<a href="http://poi.apache.org/"><img border="0" class="logoImage" alt="POI" src="../resources/images/project-logo.jpg"></a>
+</div>
+</td>
+<!--================= end Project Logo ==================-->
+</tr>
+</tbody>
+</table>
+</div>
+<!--================= end Banner ==================-->
+<!--================= start Main ==================-->
+<table width="100%" cellpadding="0" cellspacing="0" border="0" summary="nav" id="breadcrumbs">
+<tbody>
+<!--================= start Status ==================-->
+<tr class="status">
+<td>
+<!--================= start BreadCrumb ==================--><a href="http://www.apache.org/">Apache</a> | <a href="http://poi.apache.org/">POI</a><a href=""></a>
+<!--================= end BreadCrumb ==================--></td><td id="tabs">
+<!--================= start Tabs ==================-->
+<div class="tab">
+<span class="selectedTab"><a class="base-selected" href="../index.html">Home</a></span> | <script language="Javascript" type="text/javascript">
+function printit() {
+if (window.print) {
+ window.print() ;
+} else {
+ var WebBrowser = '<OBJECT ID="WebBrowser1" WIDTH="0" HEIGHT="0" CLASSID="CLSID:8856F961-340A-11D0-A96B-00C04FD705A2"></OBJECT>';
+document.body.insertAdjacentHTML('beforeEnd', WebBrowser);
+ WebBrowser1.ExecWB(6, 2);//Use a 1 vs. a 2 for a prompting dialog box WebBrowser1.outerHTML = "";
+}
+}
+</script><script language="Javascript" type="text/javascript">
+var NS = (navigator.appName == "Netscape");
+var VERSION = parseInt(navigator.appVersion);
+if (VERSION > 3) {
+ document.write(' <a title="PRINT this page OUT" href="javascript:printit()">PRINT</a>');
+}
+</script>
+</div>
+<!--================= end Tabs ==================-->
+</td>
+</tr>
+</tbody>
+</table>
+<!--================= end Status ==================-->
+<table id="main" width="100%" cellpadding="8" cellspacing="0" summary="" border="0">
+<tbody>
+<tr valign="top">
+<!--================= start Menu ==================-->
+<td id="leftcol">
+<div id="navcolumn">
+<div class="menuBar">
+<div class="menu">
+<span class="menuLabel">Apache POI</span>
+
+<div class="menuItem">
+<a href="../index.html">Top</a>
+</div>
+
+</div>
+<div class="menu">
+<span class="menuLabel">HWPF+XWPF</span>
+
+<div class="menuItem">
+<a href="index.html">Overview</a>
+</div>
+
+<div class="menuItem">
+<a href="quick-guide.html">Quick Guide</a>
+</div>
+
+<div class="menuItem">
+<span class="menuSelected">HWPF Format</span>
+</div>
+
+<div class="menuItem">
+<a href="projectplan.html">HWPF Project plan</a>
+</div>
+
+</div>
+</div>
+</div>
+<form target="_blank" action="http://www.google.com/search" method="get">
+<table summary="search" border="0" cellspacing="0" cellpadding="0">
+<tr>
+<td><img height="1" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td><td nowrap="nowrap">
+ Search Apache POI<br>
+<input value="poi.apache.org" name="sitesearch" type="hidden"><input size="10" name="q" id="query" type="text"><img height="1" width="5" alt="" src="../skin/images/spacer.gif" class="spacer"><input name="Search" value="GO" type="submit"></td><td><img height="1" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td>
+</tr>
+<tr>
+<td colspan="3"><img height="7" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td>
+</tr>
+<tr>
+<td class="bottom-left-thick"></td><td bgcolor="#a5b6c6"><img height="1" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td><td class="bottom-right-thick"></td>
+</tr>
+</table>
+</form>
+</td>
+<!--================= end Menu ==================-->
+<!--================= start Content ==================--><td>
+<div id="bodycol">
+<div class="app">
+<div align="center">
+<h1>Apache POI - HWPF - Java API to Handle Microsoft Word Files</h1>
+</div>
+<div class="h3">
+
+
+
+
+<a name="The+Word+97+File+Format+in+semi-plain+English"></a>
+<div class="h3">
+<h3>The Word 97 File Format in semi-plain English</h3>
+</div>
+
+
+<p>The purpose of this document is to give a brief high level overview of the
+ HWPF document format. This document does not go into in-depth technical
+ detail and is only meant as a supplement to the Microsoft Word 97-2007
+ Binary File Format freely available from
+ <a href="http://www.microsoft.com/interop/docs/officebinaryformats.mspx">Microsoft</a>.</p>
+
+<p>The OLE file format is not discussed in this document. It is assumed that
+ the reader has a working knowledge of the POIFS API. </p>
+
+
+<a name="Word+file+structure"></a>
+<div class="h4">
+<h4>Word file structure</h4>
+</div>
+
+<p>A Word file is made up of the document text and data structures
+ containing formatting information about the text. Of course, this is a
+ very simplified illustration. There are fields and macros and other
+ things that have not been considered. At this stage, HWPF is mainly
+ concerned with formatted text.</p>
+
+
+<a name="Reading+Word+files"></a>
+<div class="h4">
+<h4>Reading Word files</h4>
+</div>
+
+<p>The entry point for HWPF's reading of a Word file is the File Information
+ Block (FIB). This structure is the entry point for the locations and size
+ of a document's text and data structures. The FIB is located at the
+ beginning of the main stream.</p>
+
+<a name="Text"></a>
+<div class="h2">
+<h2>Text</h2>
+</div>
+
+<p>The document's text is also located in the main stream. Its starting
+ location is given as FIB.fcMin and its length is given in bytes by
+ FIB.ccpText. These two values are not very useful in getting the text
+ because of unicode. There may be unicode text intermingled with ASCII
+ text. That brings us to the piece table.</p>
+
+<p>The piece table is used to divide the text into non-unicode and unicode
+ pieces. The size and offset are given in FIB.fcClx and FIB.lcbClx
+ respectively. The piece table may contain Property Modifiers (prm).
+ These are for complex(fast-saved) files and are skipped. Each text piece
+ contains offsets in the main stream that contain text for that piece.
+ If the piece uses unicode, the file offset is masked with a certain bit.
+ Then you have to unmask the bit and divide by 2 to get the real file
+ offset. </p>
+
+
+<a name="Text+Formatting"></a>
+<div class="h2">
+<h2>Text Formatting</h2>
+</div>
+
+<a name="Stylesheet"></a>
+<div class="h5">
+<h5>Stylesheet</h5>
+</div>
+
+<p>All text formatting is based on styles contained in the StyleSheet.
+ The StyleSheet is a data structure containing among other things, style
+ descriptions. Each style description can contain a paragraph style and
+ a character style or simply a character style. Each style description
+ is stored in a compressed version on file. Basically these are deltas
+ from another style.</p>
+
+<p>Eventually, you have to chain back to the nil style which is an
+ imaginary style with certain implied values.</p>
+
+
+<a name="Paragraph+and+Character+styles"></a>
+<div class="h5">
+<h5>Paragraph and Character styles</h5>
+</div>
+
+<p>Paragraph and Character formatting properties for a document's text are
+ stored on file as deltas from some base style in the Stylesheet. The
+ deltas are used to create a complete uncompressed style in memory.</p>
+
+<p>Uncompressed paragraph styles are represented by the Pargraph
+ Properties(PAP) data structure. Uncompressed character styles are
+ represented by the Character Properties(CHP) data structure. The styles
+ for the document text are stored in compressed format in the
+ corresponding Formatted Disk Pages (FKP). A compressed PAP is referred
+ to as a PAPX and a compressed CHP is a CHPX. The FKP locations are
+ stored in the bin table. There are seperate bin tables for CHPXs and
+ PAPXs. The bin tables' locations and sizes are stored in the FIB.</p>
+
+<p>A FKP is a 512 byte OLE page. It contains the offsets of the beginning
+ and end of each paragraph/character run in the main stream and the
+ compressed properties for that interval. The compessed PAPX is based on
+ its base style in the StyleSheet. The compressed CHPX is based on the
+ enclosing paragraph's base style in the Stylesheet.</p>
+
+
+<a name="Uncompressing+styles+and+other+data+structures"></a>
+<div class="h5">
+<h5>Uncompressing styles and other data structures</h5>
+</div>
+
+<p>All compressed properties(CHPX, PAPX, SEPX) contain a grpprl. A grpprl
+ is an array of sprms. A sprm defines a delta from some base property.
+ There is a table of possible sprms in the Word 97 spec. Each sprm is a
+ two byte operand followed by a parameter. The parameter size depends on
+ the sprm. Each sprm describes an operation that should be performed on
+ the base style. After every sprm in the grpprl is performed on the base
+ style you will have the style for the paragraph, character run,
+ section, etc.</p>
+
+
+
+
+
+
+<div id="authors" align="right">by S. Ryan Ackley</div>
+</div>
+</div>
+</div>
+</td>
+<!--================= end Content ==================-->
+</tr>
+</tbody>
+</table>
+<!--================= end Main ==================-->
+<!--================= start Footer ==================-->
+<div id="footer">
+<table summary="footer" cellspacing="0" cellpadding="4" width="100%" border="0">
+<tbody>
+<tr>
+<!--================= start Copyright ==================-->
+<td colspan="2">
+<div align="center">
+<div class="copyright">
+ Copyright © 2002-2012 The Apache Software Foundation. All rights reserved.<br>
+ Apache POI, POI, Apache, the Apache feather logo, and the Apache
+ POI project logo are trademarks of The Apache Software Foundation.
+ </div>
+</div>
+</td>
+<!--================= end Copyright ==================-->
+</tr>
+<tr>
+<td align="left">
+<!--================= start Host ==================-->
+<!--================= end Host ==================--></td><td align="right">
+<!--================= start Credits ==================-->
+<div align="right">
+<div class="credit"></div>
+</div>
+<!--================= end Credits ==================-->
+</td>
+</tr>
+</tbody>
+</table>
+</div>
+<!--================= end Footer ==================-->
+</body>
+</html>
Added: poi/site/publish/document/index.html
URL: http://svn.apache.org/viewvc/poi/site/publish/document/index.html?rev=1579344&view=auto
==============================================================================
--- poi/site/publish/document/index.html (added)
+++ poi/site/publish/document/index.html Wed Mar 19 18:34:04 2014
@@ -0,0 +1,392 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<!--*** This is a generated file. Do not edit. ***-->
+<link rel="stylesheet" href="../skin/tigris.css" type="text/css">
+<link rel="stylesheet" href="../skin/mysite.css" type="text/css">
+<link rel="stylesheet" href="../skin/site.css" type="text/css">
+<link media="print" rel="stylesheet" href="../skin/print.css" type="text/css">
+<title>Apache POI - HWPF and XWPF - Java API to Handle Microsoft Word Files</title>
+</head>
+<body bgcolor="white" class="composite">
+<!--================= start Banner ==================-->
+<div id="banner">
+<table width="100%" cellpadding="8" cellspacing="0" summary="banner" border="0">
+<tbody>
+<tr>
+<!--================= start Group Logo ==================-->
+<td width="50%" align="left">
+<div class="groupLogo">
+<a href="http://poi.apache.org"><img border="0" class="logoImage" alt="Apache POI" src="../resources/images/group-logo.jpg"></a>
+</div>
+</td>
+<!--================= end Group Logo ==================-->
+<!--================= start Project Logo ==================--><td width="50%" align="right">
+<div align="right" class="projectLogo">
+<a href="http://poi.apache.org/"><img border="0" class="logoImage" alt="POI" src="../resources/images/project-logo.jpg"></a>
+</div>
+</td>
+<!--================= end Project Logo ==================-->
+</tr>
+</tbody>
+</table>
+</div>
+<!--================= end Banner ==================-->
+<!--================= start Main ==================-->
+<table width="100%" cellpadding="0" cellspacing="0" border="0" summary="nav" id="breadcrumbs">
+<tbody>
+<!--================= start Status ==================-->
+<tr class="status">
+<td>
+<!--================= start BreadCrumb ==================--><a href="http://www.apache.org/">Apache</a> | <a href="http://poi.apache.org/">POI</a><a href=""></a>
+<!--================= end BreadCrumb ==================--></td><td id="tabs">
+<!--================= start Tabs ==================-->
+<div class="tab">
+<span class="selectedTab"><a class="base-selected" href="../index.html">Home</a></span> | <script language="Javascript" type="text/javascript">
+function printit() {
+if (window.print) {
+ window.print() ;
+} else {
+ var WebBrowser = '<OBJECT ID="WebBrowser1" WIDTH="0" HEIGHT="0" CLASSID="CLSID:8856F961-340A-11D0-A96B-00C04FD705A2"></OBJECT>';
+document.body.insertAdjacentHTML('beforeEnd', WebBrowser);
+ WebBrowser1.ExecWB(6, 2);//Use a 1 vs. a 2 for a prompting dialog box WebBrowser1.outerHTML = "";
+}
+}
+</script><script language="Javascript" type="text/javascript">
+var NS = (navigator.appName == "Netscape");
+var VERSION = parseInt(navigator.appVersion);
+if (VERSION > 3) {
+ document.write(' <a title="PRINT this page OUT" href="javascript:printit()">PRINT</a>');
+}
+</script>
+</div>
+<!--================= end Tabs ==================-->
+</td>
+</tr>
+</tbody>
+</table>
+<!--================= end Status ==================-->
+<table id="main" width="100%" cellpadding="8" cellspacing="0" summary="" border="0">
+<tbody>
+<tr valign="top">
+<!--================= start Menu ==================-->
+<td id="leftcol">
+<div id="navcolumn">
+<div class="menuBar">
+<div class="menu">
+<span class="menuLabel">Apache POI</span>
+<div class="menuItem">
+<a href="../index.html">Top</a>
+</div>
+</div>
+<div class="menu">
+<span class="menuLabel">HWPF+XWPF</span>
+<div class="menuItem">
+<span class="menuSelected">Overview</span>
+</div>
+<div class="menuItem">
+<a href="quick-guide.html">Quick Guide</a>
+</div>
+<div class="menuItem">
+<a href="docoverview.html">HWPF Format</a>
+</div>
+<div class="menuItem">
+<a href="projectplan.html">HWPF Project plan</a>
+</div>
+</div>
+</div>
+</div>
+<form target="_blank" action="http://www.google.com/search" method="get">
+<table summary="search" border="0" cellspacing="0" cellpadding="0">
+<tr>
+<td><img height="1" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td><td nowrap="nowrap">
+ Search Apache POI<br>
+<input value="poi.apache.org" name="sitesearch" type="hidden"><input size="10" name="q" id="query" type="text"><img height="1" width="5" alt="" src="../skin/images/spacer.gif" class="spacer"><input name="Search" value="GO" type="submit"></td><td><img height="1" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td>
+</tr>
+<tr>
+<td colspan="3"><img height="7" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td>
+</tr>
+<tr>
+<td class="bottom-left-thick"></td><td bgcolor="#a5b6c6"><img height="1" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td><td class="bottom-right-thick"></td>
+</tr>
+</table>
+</form>
+</td>
+<!--================= end Menu ==================-->
+<!--================= start Content ==================--><td>
+<div id="bodycol">
+<div class="app">
+<div align="center">
+<h1>Apache POI - HWPF and XWPF - Java API to Handle Microsoft Word Files</h1>
+</div>
+<div class="h3">
+
+
+
+
+<a name="Overview"></a>
+<div class="h3">
+<h3>Overview</h3>
+</div>
+
+
+<p>HWPF is the name of our port of the Microsoft Word 97(-2007) file format
+ to pure Java. It also provides limited read only support for the older
+ Word 6 and Word 95 file formats.</p>
+
+
+<p>The partner to HWPF for the new Word 2007 .docx format is <em>XWPF</em>.
+ Whilst HWPF and XWPF provide similar features, there is not a common
+ interface across the two of them at this time.</p>
+
+
+<p>Both HWPF and XWPF could be described as "moderately functional". For some
+ use cases, especially around text extraction, support is very strong. For
+ others, support may be limited or incomplete, and it may be necessary to
+ dig down into low-level code. Error checking may be missing in places,
+ so it may be possible to accidently generate invalid files. Enhancements
+ to fix such things are generally very well received!</p>
+
+
+<p>As detailed in the <a href="/overview.html#components">Components
+ Page</a>, HWPF is contained within the Scratchpad jar, while XWPF
+ is in the OOXML jar. You will need to ensure you include the appropriate
+ jars (and their dependencies!) in your classpath to use HWPF or XWPF.</p>
+
+
+
+<a name="An+overview+of+the+code"></a>
+<div class="h3">
+<h3>An overview of the code</h3>
+</div>
+
+
+<p>
+ Source code in the
+ <em>org.apache.poi.hdf</em>
+ tree is the old legacy code. Source in the
+ <em>org.apache.poi.hwpf.model</em>
+ tree is the old legacy code refactored into an new object model. Those packages contains
+ Java representation of internal Word format structure. This code is "internal", it shall not
+ be used by your code. Because of backward-compatibility some API still has references to
+ those packages. They are subject to be deprecated and removed. Code from
+ <em>org.apache.poi.hwpf.usermodel</em>
+ package is actual public and user-friendly (as much as possible) API to access document
+ parts. Source code in the
+ <em>org.apache.poi.hwpf.extractor</em>
+ tree is a wrapper of this to facilitate easy extraction of interesting things (eg the Text),
+ and
+ <em>org.apache.poi.hwpf.converter</em>
+ package contains Word-to-HTML and Word-to-FO converters (latest can be used to generate PDF
+ from Word files when using with
+ <a href="http://xmlgraphics.apache.org/fop/">Apache FOP</a>
+ ). Also there is a small file-structure-dumping utility in
+ <em>org.apache.poi.hwpf.dev</em>
+ package, primally for developing purposes.
+ </p>
+
+
+<p>
+ The main entry point to HWPF is HWPFDocument. Currently it has a lot of references both to
+ internal interfaces (
+ <em>org.apache.poi.hwpf.model</em>
+ package) and public API (
+ <em>org.apache.poi.hwpf.usermodel</em>
+ ) package. It is possible that it will be split into two different interfaces (like WordFile
+ and WordDocument) in later versions.
+ </p>
+
+
+<p>
+ The main entry point to XWPF is XWPFDocument. From there, you can get the
+ paragraphs, pictures, tables, sections, headers etc.
+ </p>
+
+<p>
+ Currently, there are only a handful of example programs using HWPF and XWPF
+ available. They can be found in svn in the examples section, under
+ <a href="http://svn.apache.org/repos/asf/poi/trunk/src/examples/src/org/apache/poi/hwpf">HWPF</a>
+ and
+ <a href="http://svn.apache.org/repos/asf/poi/trunk/src/examples/src/org/apache/poi/xwpf">XWPF</a>.
+ Both HWPF and XWPF have fairly high levels of unit test coverage, which
+ provides examples of using the various areas of functionality of both
+ modules. These can be found in svn, under
+ <a href="http://svn.apache.org/repos/asf/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf">HWPF</a>
+ and
+ <a href="http://svn.apache.org/repos/asf/poi/trunk/src/ooxml/testcases/org/apache/poi/xwpf">XWPF</a>.
+ Contributions of more examples, whether inspired by the unit tests or
+ not, would be most welcomed!
+ </p>
+
+
+
+<a name="HWPF+Notes"></a>
+<div class="h3">
+<h3>HWPF Notes</h3>
+</div>
+
+
+
+<p>A .doc Word document, as handled by HWPF, can be considered as very long single
+ text buffer. The HWPF API provides "pointers"
+ to document parts, like sections, paragraphs and character runs. Usually user will iterates
+ over main document part sections, paragraphs from sections and character runs from
+ paragraph. Each such interface is a pointer to document text subrange along with additional
+ properties (and they all extends same Range parent class). There is additional Range
+ implementations like Table, TableRow, TableCell, etc. Some structures like Bookmark or Field
+ can also provide subranges pointers.
+ </p>
+
+
+<p>Changing file content usually requires a lot of synchronized changes in those structures like
+ updating property boundaries, position handlers, etc. Because of that HWPF API shall be
+ considered as not thread safe. In addition, there is a "one pointer" rule for changing
+ content. It means you should not use two different Range instances at one time. More
+ precisely, if you are changing file content using some range pointer, all other range
+ pointers except parents' ones become invalid. For example if you obtain overall range (1),
+ paragraph range (2) from overall range and character run range (3) from paragraph range and
+ change text of paragraph, character run range is now invalid and should not be used, but
+ overall range pointer still valid. Each time you obtaining range (pointer) new instance is
+ created. It means if you obtained two range pointers and changed document text using first
+ range pointer, second one became invalid.
+ </p>
+
+
+
+<a name="XWPF+Patches+Required%21"></a>
+<div class="h3">
+<h3>XWPF Patches Required!</h3>
+</div>
+
+
+
+<p>At the moment, XWPF covers many common use cases for reading and writing
+ .docx files. Whilst this is a great thing, it does mean that XWPF does
+ everything that the current POI committers need it to do, and so none of
+ the committers are actively adding new features.</p>
+
+
+<p>If you come across a feature in XWPF that you need, and isn't currently
+ there, please do send in a patch to add the extra functionality! More details
+ on contributing patches are available on the <a href="../guidelines.html">"Contribution to POI" page</a>.</p>
+
+
+
+<a name="HWPF+Patches+Required%21"></a>
+<div class="h3">
+<h3>HWPF Patches Required!</h3>
+</div>
+
+
+
+<p>At the moment we unfortunately do not have someone taking care for HWPF
+ and fostering its development. What we need is someone to stand up, take
+ this thing under his hood as his baby and push it forward. Ryan Ackley,
+ who put a lot of effort into HWPF, is no longer on board, so HWPF is an
+ orphan child waiting to be adopted.</p>
+
+
+<p>If <strong>you</strong> are interested in becoming the new HWPF
+ pointman, you should look into the Microsoft Word internals. A good
+ starting point seems to be Ryan Ackley's <a href="docoverview.html">overview</a>. Full details on the word format
+ is available from
+ <a href="http://www.microsoft.com/interop/docs/OfficeBinaryFormats.mspx">Microsoft</a>,
+ but the documentation can be a little hard to get into at first... Try reading the
+ <a href="docoverview.html">overview</a> first, and looking at the existing
+ code, then finally look up the documentation for specific missing features.</p>
+
+
+<p>As a first step you should familiarize yourself with the source code,
+ examples, test cases, and the HWPF patches available at <a href="http://issues.apache.org/">Bugzilla</a> (if any). Then you
+ should compile an overview of</p>
+
+
+<ul>
+
+<li>the current HWPF status,</li>
+
+<li>the patches in <a href="http://issues.apache.org/bugzilla/">Bugzilla</a> to be checked
+ in (and those that should better be ditched),</li>
+
+<li>the available test cases and the test cases still to be written,</li>
+
+<li>the available documentation and the docs to be written,</li>
+
+<li>anything else that seems reasonable</li>
+
+</ul>
+
+
+<p>When you start coding, you will not yet have write access to the
+ SVN repository. Please submit your patches to <a href="http://issues.apache.org/">Bugzilla</a> and nag <a href="mailto:dev@poi.apache.org">the dev list</a> until someone commits
+ them. Besides the actual checking in of HWPF patches, current POI
+ committers will also do some minor reviews now and then of your source code
+ patches, test cases and documentation to help ensure software quality. But
+ most of the time you will be on your own. However, anyone offering useful
+ contributions over a period of time will be offered committership!</p>
+
+
+<p>Please do not forget to write <a href="http://www.junit.org/">JUnit</a> test cases and documentation!
+ We won't accept code that doesn't come with test cases. And please
+ consider that other contributors should be able to understand your source
+ code easily. If you need any help getting started with JUnit test cases
+ for HWPF, please ask on the developers' mailing list! If you show that you
+ are prepared to stick at it you will most likely be given SVN commit
+ access. See <a href="../guidelines.html">"Contribution to POI" page</a>
+ for more details and help getting started.</p>
+
+
+<p>Of course we will help you as best as we can. However, presently there
+ is no committer who is really familiar with the Word format, so you'll be
+ mostly on your own. We are looking forward for you and your contributions!
+ Honor and glory of becoming a POI committer are waiting!</p>
+
+
+
+<div id="authors" align="right">by Nicola Ken Barozzi, Andrew C. Oliver, Ryan Ackley, Rainer Klute</div>
+</div>
+</div>
+</div>
+</td>
+<!--================= end Content ==================-->
+</tr>
+</tbody>
+</table>
+<!--================= end Main ==================-->
+<!--================= start Footer ==================-->
+<div id="footer">
+<table summary="footer" cellspacing="0" cellpadding="4" width="100%" border="0">
+<tbody>
+<tr>
+<!--================= start Copyright ==================-->
+<td colspan="2">
+<div align="center">
+<div class="copyright">
+ Copyright © 2002-2012 The Apache Software Foundation. All rights reserved.<br>
+ Apache POI, POI, Apache, the Apache feather logo, and the Apache
+ POI project logo are trademarks of The Apache Software Foundation.
+ </div>
+</div>
+</td>
+<!--================= end Copyright ==================-->
+</tr>
+<tr>
+<td align="left">
+<!--================= start Host ==================-->
+<!--================= end Host ==================--></td><td align="right">
+<!--================= start Credits ==================-->
+<div align="right">
+<div class="credit">
+<a href="http://validator.w3.org/check/referer"><img width="88" height="31" alt="Valid HTML 4.01!" src="../skin/images/valid-html401.png" class="logoImage"></a><a href="http://jigsaw.w3.org/css-validator/"><img width="88" height="31" alt="Valid CSS!" src="../skin/images/vcss.png" class="logoImage"></a><a href="http://forrest.apache.org/"><img border="0" class="logoImage" alt="Built with Apache Forrest" src="../skin/images/built-with-forrest-button.png" width="88" height="31"></a>
+</div>
+</div>
+<!--================= end Credits ==================-->
+</td>
+</tr>
+</tbody>
+</table>
+</div>
+<!--================= end Footer ==================-->
+</body>
+</html>
Added: poi/site/publish/document/projectplan.html
URL: http://svn.apache.org/viewvc/poi/site/publish/document/projectplan.html?rev=1579344&view=auto
==============================================================================
--- poi/site/publish/document/projectplan.html (added)
+++ poi/site/publish/document/projectplan.html Wed Mar 19 18:34:04 2014
@@ -0,0 +1,635 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<!--*** This is a generated file. Do not edit. ***-->
+<link rel="stylesheet" href="../skin/tigris.css" type="text/css">
+<link rel="stylesheet" href="../skin/mysite.css" type="text/css">
+<link rel="stylesheet" href="../skin/site.css" type="text/css">
+<link media="print" rel="stylesheet" href="../skin/print.css" type="text/css">
+<title>Apache POI - HWPF - Java API to Handle Microsoft Word Files</title>
+</head>
+<body bgcolor="white" class="composite">
+<!--================= start Banner ==================-->
+<div id="banner">
+<table width="100%" cellpadding="8" cellspacing="0" summary="banner" border="0">
+<tbody>
+<tr>
+<!--================= start Group Logo ==================-->
+<td width="50%" align="left">
+<div class="groupLogo">
+<a href="http://poi.apache.org"><img border="0" class="logoImage" alt="Apache POI" src="../resources/images/group-logo.jpg"></a>
+</div>
+</td>
+<!--================= end Group Logo ==================-->
+<!--================= start Project Logo ==================--><td width="50%" align="right">
+<div align="right" class="projectLogo">
+<a href="http://poi.apache.org/"><img border="0" class="logoImage" alt="POI" src="../resources/images/project-logo.jpg"></a>
+</div>
+</td>
+<!--================= end Project Logo ==================-->
+</tr>
+</tbody>
+</table>
+</div>
+<!--================= end Banner ==================-->
+<!--================= start Main ==================-->
+<table width="100%" cellpadding="0" cellspacing="0" border="0" summary="nav" id="breadcrumbs">
+<tbody>
+<!--================= start Status ==================-->
+<tr class="status">
+<td>
+<!--================= start BreadCrumb ==================--><a href="http://www.apache.org/">Apache</a> | <a href="http://poi.apache.org/">POI</a><a href=""></a>
+<!--================= end BreadCrumb ==================--></td><td id="tabs">
+<!--================= start Tabs ==================-->
+<div class="tab">
+<span class="selectedTab"><a class="base-selected" href="../index.html">Home</a></span> | <script language="Javascript" type="text/javascript">
+function printit() {
+if (window.print) {
+ window.print() ;
+} else {
+ var WebBrowser = '<OBJECT ID="WebBrowser1" WIDTH="0" HEIGHT="0" CLASSID="CLSID:8856F961-340A-11D0-A96B-00C04FD705A2"></OBJECT>';
+document.body.insertAdjacentHTML('beforeEnd', WebBrowser);
+ WebBrowser1.ExecWB(6, 2);//Use a 1 vs. a 2 for a prompting dialog box WebBrowser1.outerHTML = "";
+}
+}
+</script><script language="Javascript" type="text/javascript">
+var NS = (navigator.appName == "Netscape");
+var VERSION = parseInt(navigator.appVersion);
+if (VERSION > 3) {
+ document.write(' <a title="PRINT this page OUT" href="javascript:printit()">PRINT</a>');
+}
+</script>
+</div>
+<!--================= end Tabs ==================-->
+</td>
+</tr>
+</tbody>
+</table>
+<!--================= end Status ==================-->
+<table id="main" width="100%" cellpadding="8" cellspacing="0" summary="" border="0">
+<tbody>
+<tr valign="top">
+<!--================= start Menu ==================-->
+<td id="leftcol">
+<div id="navcolumn">
+<div class="menuBar">
+<div class="menu">
+<span class="menuLabel">Apache POI</span>
+
+<div class="menuItem">
+<a href="../index.html">Top</a>
+</div>
+
+</div>
+<div class="menu">
+<span class="menuLabel">HWPF+XWPF</span>
+
+<div class="menuItem">
+<a href="index.html">Overview</a>
+</div>
+
+<div class="menuItem">
+<a href="quick-guide.html">Quick Guide</a>
+</div>
+
+<div class="menuItem">
+<a href="docoverview.html">HWPF Format</a>
+</div>
+
+<div class="menuItem">
+<span class="menuSelected">HWPF Project plan</span>
+</div>
+
+</div>
+</div>
+</div>
+<form target="_blank" action="http://www.google.com/search" method="get">
+<table summary="search" border="0" cellspacing="0" cellpadding="0">
+<tr>
+<td><img height="1" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td><td nowrap="nowrap">
+ Search Apache POI<br>
+<input value="poi.apache.org" name="sitesearch" type="hidden"><input size="10" name="q" id="query" type="text"><img height="1" width="5" alt="" src="../skin/images/spacer.gif" class="spacer"><input name="Search" value="GO" type="submit"></td><td><img height="1" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td>
+</tr>
+<tr>
+<td colspan="3"><img height="7" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td>
+</tr>
+<tr>
+<td class="bottom-left-thick"></td><td bgcolor="#a5b6c6"><img height="1" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td><td class="bottom-right-thick"></td>
+</tr>
+</table>
+</form>
+</td>
+<!--================= end Menu ==================-->
+<!--================= start Content ==================--><td>
+<div id="bodycol">
+<div class="app">
+<div align="center">
+<h1>Apache POI - HWPF - Java API to Handle Microsoft Word Files</h1>
+</div>
+<div class="h3">
+
+
+
+<p>HWPF Milestones</p>
+
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+
+<tr class="b">
+
+<th colspan="1" rowspan="1">
+ Milestones
+ </th>
+ <th colspan="1" rowspan="1">
+ Target Date
+ </th>
+ <th colspan="1" rowspan="1">
+ Owner
+ </th>
+
+</tr>
+
+<tr class="a">
+
+<td colspan="1" rowspan="1">
+ Read in a Word document
+with minimum formatting
+(no lists, tables, footnotes,
+endnotes, headers, footers)
+and write it back out with the
+result viewable in Word
+97/2000
+ </td>
+ <td colspan="1" rowspan="1">
+ 07/11/2003
+ </td>
+ <td colspan="1" rowspan="1">
+ Ryan
+ </td>
+
+</tr>
+
+<tr class="b">
+
+<td colspan="1" rowspan="1">
+ Add support for Lists and
+Tables
+ </td>
+ <td colspan="1" rowspan="1">
+ 8/15/2003
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+
+</tr>
+
+<tr class="a">
+
+<td colspan="1" rowspan="1">
+ HWPF 1.0-alpha release with
+documentation and examples
+ </td>
+ <td colspan="1" rowspan="1">
+ 8/18/2003
+ </td>
+ <td colspan="1" rowspan="1">
+ Praveen/Ryan
+ </td>
+
+</tr>
+
+<tr class="b">
+
+<td colspan="1" rowspan="1">
+ Add support for Headers,
+Footers, endnotes, and
+footnotes
+ </td>
+ <td colspan="1" rowspan="1">
+ 8/31/2003
+ </td>
+ <td colspan="1" rowspan="1">
+ ?
+ </td>
+
+</tr>
+
+<tr class="a">
+
+<td colspan="1" rowspan="1">
+ Add support for forms and
+mail merge
+ </td>
+ <td colspan="1" rowspan="1">
+ September/October 2003
+ </td>
+ <td colspan="1" rowspan="1">
+ ?
+ </td>
+
+</tr>
+
+</table>
+
+<p>HWPF Task Lists</p>
+
+<p>Read in a Word document with minimum formatting (no lists, tables, footnotes,
+endnotes, headers, footers) and write it back out with the result viewable in Word 97/2000</p>
+
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+
+<tr class="b">
+
+<th colspan="1" rowspan="1">
+ Task
+ </th>
+ <th colspan="1" rowspan="1">
+ Target Date
+ </th>
+ <th colspan="1" rowspan="1">
+ Owner
+ </th>
+
+</tr>
+
+<tr class="a">
+
+<td colspan="1" rowspan="1">
+ Create classes to read and
+write low level data
+structures with test cases
+ </td>
+ <td colspan="1" rowspan="1">
+ 7/10/2003
+ </td>
+ <td colspan="1" rowspan="1">
+ Ryan
+ </td>
+
+</tr>
+
+<tr class="b">
+
+<td colspan="1" rowspan="1">
+ Create classes to read and
+write FontTable and Font
+names with test case
+ </td>
+ <td colspan="1" rowspan="1">
+ 7/10/2003
+ </td>
+ <td colspan="1" rowspan="1">
+ Praveen
+ </td>
+
+</tr>
+
+<tr class="a">
+
+<td colspan="1" rowspan="1">
+ Final test
+ </td>
+ <td colspan="1" rowspan="1">
+ 7/11/2003
+ </td>
+ <td colspan="1" rowspan="1">
+ Ryan
+ </td>
+
+</tr>
+
+</table>
+
+<p>Develop user friendly API so it is fun and easy to read and write word documents
+with java.</p>
+
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+
+<tr class="b">
+
+<th colspan="1" rowspan="1">
+ Task
+ </th>
+ <th colspan="1" rowspan="1">
+ Target Date
+ </th>
+ <th colspan="1" rowspan="1">
+ Owner
+ </th>
+
+</tr>
+
+<tr class="a">
+
+<td colspan="1" rowspan="1">
+ Develop a way for SPRMS to
+be compressed and
+uncompressed
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+
+</tr>
+
+<tr class="b">
+
+<td colspan="1" rowspan="1">
+ Override CHPAbstractType
+with a concrete class that
+exposes attributes with
+human readable names
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+
+</tr>
+
+<tr class="a">
+
+<td colspan="1" rowspan="1">
+ Override PAPAbstractType
+with a concrete class that
+exposes attributes with
+human readable names
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+
+</tr>
+
+<tr class="b">
+
+<td colspan="1" rowspan="1">
+ Override SEPAbstractType
+with a concrete class that
+exposes attributes with
+human readable names
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+
+</tr>
+
+<tr class="a">
+
+<td colspan="1" rowspan="1">
+ Override DOPAbstractType
+with a concrete class that
+exposes attributes with
+human readable names
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+
+</tr>
+
+<tr class="b">
+
+<td colspan="1" rowspan="1">
+ Override TAPAbstractType
+with a concrete class that
+exposes attributes with
+human readable names
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+
+</tr>
+
+<tr class="a">
+
+<td colspan="1" rowspan="1">
+ Override TCAbstractType
+with a concrete class that
+exposes attributes with
+human readable names
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+
+</tr>
+
+<tr class="b">
+
+<td colspan="1" rowspan="1">
+ Develop a VerifyIntegrity
+class for testing so it is easy
+to determine if a Word
+Document is well-formed.
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+
+</tr>
+
+<tr class="a">
+
+<td colspan="1" rowspan="1">
+ Develop general intuitive
+API to tie everything together
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+
+</tr>
+
+</table>
+
+<p>Add support for lists and tables</p>
+
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+
+<tr class="b">
+
+<th colspan="1" rowspan="1">
+ Task
+ </th>
+ <th colspan="1" rowspan="1">
+ Target Date
+ </th>
+ <th colspan="1" rowspan="1">
+ Owner
+ </th>
+
+</tr>
+
+<tr class="a">
+
+<td colspan="1" rowspan="1">
+ Add data structures for
+reading and writing list data
+with test cases.
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+
+</tr>
+
+<tr class="b">
+
+<td colspan="1" rowspan="1">
+ Add data structures for
+reading and writing tables
+with test cases.
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+
+</tr>
+
+</table>
+
+<p>HWPF 1.0-alpha release with documentation and examples</p>
+
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+
+<tr class="b">
+
+<th colspan="1" rowspan="1">
+ Task
+ </th>
+ <th colspan="1" rowspan="1">
+ Target Date
+ </th>
+ <th colspan="1" rowspan="1">
+ Owner
+ </th>
+
+</tr>
+
+<tr class="a">
+
+<td colspan="1" rowspan="1">
+ Document the user model
+API
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+
+</tr>
+
+<tr class="b">
+
+<td colspan="1" rowspan="1">
+ Document the low level
+classes
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+
+</tr>
+
+<tr class="a">
+
+<td colspan="1" rowspan="1">
+ Come up with detailed How-To’s
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+ <td colspan="1" rowspan="1">
+
+ </td>
+
+</tr>
+
+</table>
+
+
+<div id="authors" align="right">by Ryan Ackley</div>
+</div>
+</div>
+</div>
+</td>
+<!--================= end Content ==================-->
+</tr>
+</tbody>
+</table>
+<!--================= end Main ==================-->
+<!--================= start Footer ==================-->
+<div id="footer">
+<table summary="footer" cellspacing="0" cellpadding="4" width="100%" border="0">
+<tbody>
+<tr>
+<!--================= start Copyright ==================-->
+<td colspan="2">
+<div align="center">
+<div class="copyright">
+ Copyright © 2002-2012 The Apache Software Foundation. All rights reserved.<br>
+ Apache POI, POI, Apache, the Apache feather logo, and the Apache
+ POI project logo are trademarks of The Apache Software Foundation.
+ </div>
+</div>
+</td>
+<!--================= end Copyright ==================-->
+</tr>
+<tr>
+<td align="left">
+<!--================= start Host ==================-->
+<!--================= end Host ==================--></td><td align="right">
+<!--================= start Credits ==================-->
+<div align="right">
+<div class="credit"></div>
+</div>
+<!--================= end Credits ==================-->
+</td>
+</tr>
+</tbody>
+</table>
+</div>
+<!--================= end Footer ==================-->
+</body>
+</html>
Added: poi/site/publish/document/quick-guide.html
URL: http://svn.apache.org/viewvc/poi/site/publish/document/quick-guide.html?rev=1579344&view=auto
==============================================================================
--- poi/site/publish/document/quick-guide.html (added)
+++ poi/site/publish/document/quick-guide.html Wed Mar 19 18:34:04 2014
@@ -0,0 +1,259 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<!--*** This is a generated file. Do not edit. ***-->
+<link rel="stylesheet" href="../skin/tigris.css" type="text/css">
+<link rel="stylesheet" href="../skin/mysite.css" type="text/css">
+<link rel="stylesheet" href="../skin/site.css" type="text/css">
+<link media="print" rel="stylesheet" href="../skin/print.css" type="text/css">
+<title>POI-HWPF - A Quick Guide</title>
+</head>
+<body bgcolor="white" class="composite">
+<!--================= start Banner ==================-->
+<div id="banner">
+<table width="100%" cellpadding="8" cellspacing="0" summary="banner" border="0">
+<tbody>
+<tr>
+<!--================= start Group Logo ==================-->
+<td width="50%" align="left">
+<div class="groupLogo">
+<a href="http://poi.apache.org"><img border="0" class="logoImage" alt="Apache POI" src="../resources/images/group-logo.jpg"></a>
+</div>
+</td>
+<!--================= end Group Logo ==================-->
+<!--================= start Project Logo ==================--><td width="50%" align="right">
+<div align="right" class="projectLogo">
+<a href="http://poi.apache.org/"><img border="0" class="logoImage" alt="POI" src="../resources/images/project-logo.jpg"></a>
+</div>
+</td>
+<!--================= end Project Logo ==================-->
+</tr>
+</tbody>
+</table>
+</div>
+<!--================= end Banner ==================-->
+<!--================= start Main ==================-->
+<table width="100%" cellpadding="0" cellspacing="0" border="0" summary="nav" id="breadcrumbs">
+<tbody>
+<!--================= start Status ==================-->
+<tr class="status">
+<td>
+<!--================= start BreadCrumb ==================--><a href="http://www.apache.org/">Apache</a> | <a href="http://poi.apache.org/">POI</a><a href=""></a>
+<!--================= end BreadCrumb ==================--></td><td id="tabs">
+<!--================= start Tabs ==================-->
+<div class="tab">
+<span class="selectedTab"><a class="base-selected" href="../index.html">Home</a></span> | <script language="Javascript" type="text/javascript">
+function printit() {
+if (window.print) {
+ window.print() ;
+} else {
+ var WebBrowser = '<OBJECT ID="WebBrowser1" WIDTH="0" HEIGHT="0" CLASSID="CLSID:8856F961-340A-11D0-A96B-00C04FD705A2"></OBJECT>';
+document.body.insertAdjacentHTML('beforeEnd', WebBrowser);
+ WebBrowser1.ExecWB(6, 2);//Use a 1 vs. a 2 for a prompting dialog box WebBrowser1.outerHTML = "";
+}
+}
+</script><script language="Javascript" type="text/javascript">
+var NS = (navigator.appName == "Netscape");
+var VERSION = parseInt(navigator.appVersion);
+if (VERSION > 3) {
+ document.write(' <a title="PRINT this page OUT" href="javascript:printit()">PRINT</a>');
+}
+</script>
+</div>
+<!--================= end Tabs ==================-->
+</td>
+</tr>
+</tbody>
+</table>
+<!--================= end Status ==================-->
+<table id="main" width="100%" cellpadding="8" cellspacing="0" summary="" border="0">
+<tbody>
+<tr valign="top">
+<!--================= start Menu ==================-->
+<td id="leftcol">
+<div id="navcolumn">
+<div class="menuBar">
+<div class="menu">
+<span class="menuLabel">Apache POI</span>
+
+<div class="menuItem">
+<a href="../index.html">Top</a>
+</div>
+
+</div>
+<div class="menu">
+<span class="menuLabel">HWPF+XWPF</span>
+
+<div class="menuItem">
+<a href="index.html">Overview</a>
+</div>
+
+<div class="menuItem">
+<span class="menuSelected">Quick Guide</span>
+</div>
+
+<div class="menuItem">
+<a href="docoverview.html">HWPF Format</a>
+</div>
+
+<div class="menuItem">
+<a href="projectplan.html">HWPF Project plan</a>
+</div>
+
+</div>
+</div>
+</div>
+<form target="_blank" action="http://www.google.com/search" method="get">
+<table summary="search" border="0" cellspacing="0" cellpadding="0">
+<tr>
+<td><img height="1" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td><td nowrap="nowrap">
+ Search Apache POI<br>
+<input value="poi.apache.org" name="sitesearch" type="hidden"><input size="10" name="q" id="query" type="text"><img height="1" width="5" alt="" src="../skin/images/spacer.gif" class="spacer"><input name="Search" value="GO" type="submit"></td><td><img height="1" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td>
+</tr>
+<tr>
+<td colspan="3"><img height="7" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td>
+</tr>
+<tr>
+<td class="bottom-left-thick"></td><td bgcolor="#a5b6c6"><img height="1" width="1" alt="" src="../skin/images/spacer.gif" class="spacer"></td><td class="bottom-right-thick"></td>
+</tr>
+</table>
+</form>
+</td>
+<!--================= end Menu ==================-->
+<!--================= start Content ==================--><td>
+<div id="bodycol">
+<div class="app">
+<div align="center">
+<h1>POI-HWPF - A Quick Guide</h1>
+</div>
+<div class="h3">
+
+
+
+
+<p>HWPF is still in early development. It is in the <a href="http://svn.apache.org/viewcvs.cgi/poi/trunk/src/scratchpad/">
+ scratchpad section of the SVN.</a> You will need to ensure you
+ either have a recent SVN checkout, or a recent SVN nightly build
+ (including the scratchpad jar!)</p>
+
+
+<a name="Basic+Text+Extraction"></a>
+<div class="h3">
+<h3>Basic Text Extraction</h3>
+</div>
+
+<p>For basic text extraction, make use of
+<span class="codefrag">org.apache.poi.hwpf.extractor.WordExtractor</span>. It accepts an input
+stream or a <span class="codefrag">HWPFDocument</span>. The <span class="codefrag">getText()</span>
+method can be used to
+get the text from all the paragraphs, or <span class="codefrag">getParagraphText()</span>
+can be used to fetch the text from each paragraph in turn. The other
+option is <span class="codefrag">getTextFromPieces()</span>, which is very fast, but
+tends to return things that aren't text from the page. YMMV.
+ </p>
+
+
+
+<a name="Specific+Text+Extraction"></a>
+<div class="h3">
+<h3>Specific Text Extraction</h3>
+</div>
+
+<p>To get specific bits of text, first create a
+<span class="codefrag">org.apache.poi.hwpf.HWPFDocument</span>. Fetch the range
+with <span class="codefrag">getRange()</span>, then get paragraphs from that. You
+can then get text and other properties.
+ </p>
+
+
+
+<a name="Headers+and+Footers"></a>
+<div class="h3">
+<h3>Headers and Footers</h3>
+</div>
+
+<p>To get at the headers and footers of a word document, first create a
+<span class="codefrag">org.apache.poi.hwpf.HWPFDocument</span>. Next, you need to create a
+<span class="codefrag">org.apache.poi.hwpf.usermodel.HeaderStores</span>, passing it your
+HWPFDocument. Finally, the HeaderStores gives you access to the headers and
+footers, including first / even / odd page ones if defined in your
+document. Additionally, HeaderStores provides a method for removing
+any macros in the text, which is helpful as many headers and footers
+do end up with macros in them.</p>
+
+
+
+<a name="Changing+Text"></a>
+<div class="h3">
+<h3>Changing Text</h3>
+</div>
+
+<p>It is possible to change the text via
+ <span class="codefrag">insertBefore()</span> and <span class="codefrag">insertAfter()</span>
+ on a <span class="codefrag">Range</span> object (either a <span class="codefrag">Range</span>,
+ <span class="codefrag">Paragraph</span> or <span class="codefrag">CharacterRun</span>).
+ It is also possible to delete a <span class="codefrag">Range</span>.
+ This code will work in many, but not all cases, and patches to
+ improve it are gratefully received!
+ </p>
+
+
+
+<a name="Further+Examples"></a>
+<div class="h3">
+<h3>Further Examples</h3>
+</div>
+
+<p>For now, the best source of additional examples is in the unit
+ tests. <a href="http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/">
+ Browse the HWPF unit tests.</a>
+
+</p>
+
+
+
+<div id="authors" align="right">by Nick Burch</div>
+</div>
+</div>
+</div>
+</td>
+<!--================= end Content ==================-->
+</tr>
+</tbody>
+</table>
+<!--================= end Main ==================-->
+<!--================= start Footer ==================-->
+<div id="footer">
+<table summary="footer" cellspacing="0" cellpadding="4" width="100%" border="0">
+<tbody>
+<tr>
+<!--================= start Copyright ==================-->
+<td colspan="2">
+<div align="center">
+<div class="copyright">
+ Copyright © 2002-2012 The Apache Software Foundation. All rights reserved.<br>
+ Apache POI, POI, Apache, the Apache feather logo, and the Apache
+ POI project logo are trademarks of The Apache Software Foundation.
+ </div>
+</div>
+</td>
+<!--================= end Copyright ==================-->
+</tr>
+<tr>
+<td align="left">
+<!--================= start Host ==================-->
+<!--================= end Host ==================--></td><td align="right">
+<!--================= start Credits ==================-->
+<div align="right">
+<div class="credit"></div>
+</div>
+<!--================= end Credits ==================-->
+</td>
+</tr>
+</tbody>
+</table>
+</div>
+<!--================= end Footer ==================-->
+</body>
+</html>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org