You are viewing a plain text version of this content. The canonical link for it is here.

Posted to dev@forrest.apache.org by ch...@apache.org on 2003/09/12 21:07:31 UTC

cvs commit: xml-forrest/src/scratchpad/src/java/org/apache/forrest/search ForrestDocument.java ForrestDocumentSAXParser.java ForrestIndexer.java ForrestSearchRenderer.java ForrestSearchServlet.java ForrestSearcher.java

cheche      2003/09/12 12:07:31

  Modified:    .        status.xml
               src/resources/conf web.xml
               src/resources/forrest-shbat forrest.build.xml
               src/resources/fresh-site/src/documentation skinconf.xml
               src/resources/schema/relaxng skinconf.rnc
               src/resources/skins/common/xslt/html document2html.xsl
               src/resources/skins/forrest-site/xslt/html site2xhtml.xsl
  Added:       src/scratchpad/src/java/org/apache/forrest/search
                        ForrestDocument.java ForrestDocumentSAXParser.java
                        ForrestIndexer.java ForrestSearchRenderer.java
                        ForrestSearchServlet.java ForrestSearcher.java
  Log:
  Added first version of Lucene integrated within Forrest.
  PR: FOR-9
  Submitted by:	Ram�n Prades rprades@porcelanosa.com
  
  Revision  Changes    Path
  1.210     +14 -2     xml-forrest/status.xml
  
  Index: status.xml
  ===================================================================
  RCS file: /home/cvs/xml-forrest/status.xml,v
  retrieving revision 1.209
  retrieving revision 1.210
  diff -u -r1.209 -r1.210
  --- status.xml	12 Sep 2003 15:53:20 -0000	1.209
  +++ status.xml	12 Sep 2003 19:07:30 -0000	1.210
  @@ -24,6 +24,18 @@
     <!-- ===================================================================== -->
   
     <changes>
  +    <release version="0.6-dev" date="unreleased">
  +      <action dev="JJP" type="add" context="core" fixes-bug="FOR-9" 
  +        due-to="Ramón Prades" due-to-email="rprades@porcelanosa.com" >
  +	Integrate Lucene on Forrest. For more info
  +      </action>
  +       <action dev="SN" type="add" context="skins">
  +          adding @label support for notes and warning.
  +       </action>
  +       <action dev="JJP" type="add" context="core">
  +          Added XSP support.
  +       </action>
  +    </release>
       <release version="0.5-dev" date="unreleased">
         <action dev="DC" type="fix" context="core">
           New and old resolver.jars in the classpath produces reflection error.
  @@ -90,7 +102,7 @@
           attribute from entries in site.xml or book.xml
         </action>
         <action dev="DC" type="update" context="skins"
  -        due-to="Ramon Prades" due-to-email="rprades@porcelanosa.com" >
  +        due-to="Ramón Prades" due-to-email="rprades@porcelanosa.com" >
           The generated Tables of Contents can be configured to show a certain
           number of section levels. Default is two levels. Level 0 means show
           no ToC at all. See the fresh-site skinconf.xml for example usage.
  
  
  
  1.7       +21 -1     xml-forrest/src/resources/conf/web.xml
  
  Index: web.xml
  ===================================================================
  RCS file: /home/cvs/xml-forrest/src/resources/conf/web.xml,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- web.xml	22 Jun 2003 12:31:47 -0000	1.6
  +++ web.xml	12 Sep 2003 19:07:30 -0000	1.7
  @@ -305,8 +305,28 @@
       <load-on-startup>1</load-on-startup>
     </servlet>
   
  +  <!--
  +    SearchServlet - Used to search in site (Lucene)
  +  -->
  +  <servlet>
  +    <servlet-name>SearchServlet</servlet-name>
  +    <servlet-class>org.apache.forrest.search.ForrestSearchServlet</servlet-class>
  +    <init-param>
  +      <param-name>project-skin</param-name>
  +      <param-value>@skin@</param-value>
  +    </init-param>
  +  </servlet>
  +  
  +  
     <!-- URL space mappings ============================================= -->
  -
  +  <!--
  +    Calls to "search.cmd" are processed by this servlet
  +   -->
  +  <servlet-mapping>
  +     <servlet-name>SearchServlet</servlet-name>
  +     <url-pattern>/search.cmd</url-pattern>
  +  </servlet-mapping>
  +		    
     <!--
       Cocoon handles all the URL space assigned to the webapp using its sitemap.
       It is recommended to leave it unchanged. Under some circumstances though
  
  
  
  1.96      +44 -20    xml-forrest/src/resources/forrest-shbat/forrest.build.xml
  
  Index: forrest.build.xml
  ===================================================================
  RCS file: /home/cvs/xml-forrest/src/resources/forrest-shbat/forrest.build.xml,v
  retrieving revision 1.95
  retrieving revision 1.96
  diff -u -r1.95 -r1.96
  --- forrest.build.xml	10 Sep 2003 09:37:10 -0000	1.95
  +++ forrest.build.xml	12 Sep 2003 19:07:30 -0000	1.96
  @@ -62,6 +62,7 @@
       <property name="project.site-dir"    location="${project.build-dir}/site"    />
       <property name="project.war"         location="${project.build-dir}/${project.name}.war" />
       <property name="project.webapp"      location="${project.build-dir}/webapp" />
  +    <property name="project.index-dir"   location="${project.webapp}/lucene-index" />
   
       <property name="project.temp-dir"    location="${project.build-dir}/tmp"     />
       <property name="project.work-dir"    location="${project.temp-dir}/work"      />
  @@ -349,7 +350,7 @@
       <filter token="project-logo.href" value="${skinconfig.project-url}"/>
       <filter token="project-logo.alt"  value="${skinconfig.project-name} logo"/>
   
  -    <copy todir="${project.ctxt-dir}/" filtering="off">
  +    <copy todir="${project.ctxt-dir}/" >
         <fileset dir="${forrest.home}/context" >
           <exclude name="*.xmap" />
           <exclude name="skins/**" />
  @@ -360,7 +361,7 @@
         </fileset>
       </copy>
   
  -    <copy todir="${project.ctxt-dir}/" filtering="on" overwrite="true">
  +    <copy todir="${project.ctxt-dir}/" filtering="true" overwrite="true">
       <!-- everything in the skins, except the images , and the sitemap -->
         <fileset dir="${forrest.home}/context" >
           <include name="*.xmap" />
  @@ -443,7 +444,7 @@
     </target>
   
     <target name="copy-resources" if="resources.present">
  -    <copy toDir="${project.ctxt-dir}/resources" filtering="false" failonerror="false">
  +    <copy toDir="${project.ctxt-dir}/resources" failonerror="false">
         <fileset dir="${project.resources-dir}">
           <exclude name="stylesheets"/>
           <exclude name="images"/>
  @@ -453,21 +454,21 @@
     </target>
   
     <target name="copy-stylesheets" if="stylesheets.present">
  -    <copy toDir="${project.ctxt-dir}/resources/stylesheets" filtering="false"
  +    <copy toDir="${project.ctxt-dir}/resources/stylesheets" 
         overwrite="true" failonerror="false">
         <fileset dir="${project.stylesheets-dir}"/>
       </copy>
     </target>
   
     <target name="copy-images" if="images.present">
  -    <copy toDir="${project.ctxt-dir}/resources/images" filtering="false"
  +    <copy toDir="${project.ctxt-dir}/resources/images" 
         overwrite="true" failonerror="false">
         <fileset dir="${project.images-dir}"/>
       </copy>
     </target>
   
     <target name="copy-grammars" if="grammars.present">
  -    <copy toDir="${project.ctxt-dir}/resources/grammars" filtering="false"
  +    <copy toDir="${project.ctxt-dir}/resources/grammars" 
         overwrite="true" failonerror="false">
         <fileset dir="${project.grammars-dir}"/>
       </copy>
  @@ -475,7 +476,7 @@
   
     <!-- Copy all non-xdocs content -->
     <target name="copy-content" if="real-content.present">
  -    <copy toDir="${project.ctxt-dir}/content" filtering="false"
  +    <copy toDir="${project.ctxt-dir}/content" 
         overwrite="false" failonerror="false">
         <fileset dir="${project.real-content-dir}">
           <exclude name="xdocs/**"/>
  @@ -484,26 +485,26 @@
     </target>
   
     <target name="copy-lib" if="lib.present">
  -    <copy toDir="${project.ctxt-dir}/WEB-INF/lib" filtering="false"
  +    <copy toDir="${project.ctxt-dir}/WEB-INF/lib" 
         overwrite="true" failonerror="false">
         <fileset dir="${project.lib-dir}"/>
       </copy>
     </target>
   
     <target name="copy-classes" if="classes.present">
  -    <copy toDir="${project.ctxt-dir}/WEB-INF/classes" filtering="false"
  +    <copy toDir="${project.ctxt-dir}/WEB-INF/classes" 
         overwrite="true" failonerror="false">
         <fileset dir="${project.classes-dir}"/>
       </copy>
     </target>
   
     <target name="copy-skins" if="skins.present">
  -    <copy toDir="${project.ctxt-dir}/skins" filtering="off" overwrite="true" failonerror="false">
  +    <copy toDir="${project.ctxt-dir}/skins" overwrite="true" failonerror="false">
          <fileset dir="${project.skins-dir}" >
           <include name="**/images/**" />
         </fileset>
       </copy>
  -    <copy toDir="${project.ctxt-dir}/skins" filtering="on" overwrite="true" failonerror="false">
  +    <copy toDir="${project.ctxt-dir}/skins" filtering="true" overwrite="true" failonerror="false">
         <fileset dir="${project.skins-dir}" >
           <exclude name="**/images/**" />
         </fileset>
  @@ -512,12 +513,12 @@
   
     <target name="copy-skinconf" if="skinconf.present">
       <copy file="${project.skinconf}"
  -      toDir="${project.ctxt-dir}" filtering="false"
  +      toDir="${project.ctxt-dir}" 
         overwrite="true" failonerror="false"/>
     </target>
   
     <target name="copy-status" if="status.present">
  -    <copy file="${project.status}" todir="${project.ctxt-dir}" filtering="false"/>
  +    <copy file="${project.status}" todir="${project.ctxt-dir}" />
     </target>
   
   
  @@ -650,7 +651,7 @@
           | please ensure they keep in synch.
           -->
           <property name="skindir" location="${project.ctxt-dir}/skins/${project.skin}"/>
  -        <copy toDir="${project.site-dir}" filtering="false"
  +        <copy toDir="${project.site-dir}" 
             overwrite="true" failonerror="false">
             <fileset dir="${project.ctxt-dir}/content">
               <exclude name="xdocs/**"/>
  @@ -660,13 +661,13 @@
   
           <!-- Copy skin images -->
           <mkdir dir="${project.site-dir}/skin/images"/>
  -        <copy toDir="${project.site-dir}/skin/images" filtering="false" overwrite="true"
  +        <copy toDir="${project.site-dir}/skin/images" overwrite="true"
             failonerror="false">
             <fileset dir="${skindir}/images"/>
           </copy>
   
           <!-- Copy all other non-resource files -->
  -        <copy toDir="${project.site-dir}/skin" filtering="false"
  +        <copy toDir="${project.site-dir}/skin" 
             overwrite="true"
             failonerror="false">
             <mapper type="flatten"/>
  @@ -690,7 +691,24 @@
       </echo>
     </target>
   
  -
  +  <!-- ===============================================================
  +       Prepares the Lucene context indexing the site. [RPR]
  +       =============================================================== -->
  +  <target name="lucene-index" depends="init, -prepare-classpath" unless="stop-lucene-indexer">
  +    <java classname="org.apache.forrest.search.ForrestIndexer"
  +      dir="${project.ctxt-dir}"
  +      fork="true"
  +      failonerror="true"
  +      maxmemory="${forrest.maxmemory}">
  +      <jvmarg line="${forrest.jvmargs}"/>
  +      <jvmarg value="-Djava.endorsed.dirs=${forrest.home}/lib/endorsed${path.separator}${java.endorsed.dirs}"/>
  +      <arg line="-index ${project.index-dir}"/>
  +      <arg value="${project.xdocs-dir}"/>
  +      <classpath>
  +        <path refid="forrest.cp"/>
  +      </classpath>
  +    </java>
  +  </target>
   
     <target name="project.webapp.defined" unless="project.webapp">
       <fail>
  @@ -711,7 +729,8 @@
       <!-- this will need revisions in the case of the siteplan
       extra project-custom classes then will need to be warred in as well. -->
       <mkdir dir="${project.webapp}/WEB-INF"/>
  -    <copy file="${forrest.home}/WEB-INF/web.xml" todir="${project.webapp}/WEB-INF"/>
  +    <copy file="${forrest.home}/WEB-INF/web.xml" filtering="true" 
  +      todir="${project.webapp}/WEB-INF"/>
       <copy todir="${project.webapp}" preservelastmodified="true">
         <fileset dir="${project.ctxt-dir}" >
           <exclude name="*.xconf" /> <!-- CLI wants them there, webapp finds them in WEB-INF -->
  @@ -733,6 +752,11 @@
           <include name="*jar" unless="jdk1.4+"/>
         </fileset>
       </copy>
  +    <!-- Index files for Lucene [RPR] -->
  +    <condition property="stop-lucene-indexer">
  +      <istrue value="${skinconfig.disable-lucene}"/>
  +    </condition>
  +    <antcall target="lucene-index"/>
       <echo>
   ---------------------------------
   Webapp generated in ${project.webapp}
  @@ -1020,7 +1044,7 @@
       This translates to: copy all docs that are not in the Forrest context
       directory, or that are, but are different.
       -->
  -    <copy toDir="${project.content-dir}" filtering="false" overwrite="false">
  +    <copy toDir="${project.content-dir}" overwrite="false">
         <fileset dir="${project.webapp}" excludes="${forrest.backcopy.excludes}">
           <or>
             <not>
  
  
  
  1.16      +4 -1      xml-forrest/src/resources/fresh-site/src/documentation/skinconf.xml
  
  Index: skinconf.xml
  ===================================================================
  RCS file: /home/cvs/xml-forrest/src/resources/fresh-site/src/documentation/skinconf.xml,v
  retrieving revision 1.15
  retrieving revision 1.16
  diff -u -r1.15 -r1.16
  --- skinconf.xml	7 Sep 2003 12:30:36 -0000	1.15
  +++ skinconf.xml	12 Sep 2003 19:07:30 -0000	1.16
  @@ -9,7 +9,7 @@
   
     <!ENTITY % links.att 'name CDATA #REQUIRED'>
     <!ENTITY % link.att 'name CDATA #REQUIRED href CDATA #REQUIRED'>
  -  <!ELEMENT skinconfig (disable-search?, disable-print-link?, disable-pdf-link?,
  +  <!ELEMENT skinconfig (disable-lucene?, disable-search?, disable-print-link?, disable-pdf-link?,
     disable-xml-link?, disable-compliance-links?, searchsite-domain?, searchsite-name?,
     project-name, project-description, project-url, project-logo, group-name?, group-description?, group-url?, group-logo?,
     host-url?, host-logo?, year?, vendor?, trail?, toc?, credits?)*>
  @@ -18,6 +18,7 @@
     <!-- id uniquely identifies the tool, and role indicates its function -->
     <!ATTLIST credit id   CDATA #IMPLIED
                      role CDATA #IMPLIED>
  +  <!ELEMENT disable-lucene (#PCDATA)>
     <!ELEMENT disable-search (#PCDATA)>
     <!ELEMENT disable-print-link (#PCDATA)>
     <!ELEMENT disable-pdf-link (#PCDATA)>
  @@ -55,6 +56,8 @@
     ]>
   
   <skinconfig>
  +  <!-- Do we want to disable the Lucene search box? -->
  +  <disable-lucene>false</disable-lucene>
     <!-- Do we want to disable the Google search box? -->
     <disable-search>false</disable-search>
     <!-- Do we want to disable the print link? If enabled, invalid HTML 4.0.1 -->
  
  
  
  1.7       +2 -1      xml-forrest/src/resources/schema/relaxng/skinconf.rnc
  
  Index: skinconf.rnc
  ===================================================================
  RCS file: /home/cvs/xml-forrest/src/resources/schema/relaxng/skinconf.rnc,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- skinconf.rnc	2 Sep 2003 13:13:21 -0000	1.6
  +++ skinconf.rnc	12 Sep 2003 19:07:30 -0000	1.7
  @@ -6,6 +6,7 @@
   
   skinconfig = element skinconfig {
               (
  +	    element disable-lucene {xsd:boolean}?,   # Disable lucene search feature. 'true' or 'false'
               element disable-search {xsd:boolean}?,   # Disable search feature. 'true' or 'false'
               element disable-print-link{xsd:boolean}?,   # Disable print link feature. 'true' or 'false'
               element disable-pdf-link{xsd:boolean}?,   # Disable print link feature. 'true' or 'false'
  @@ -23,7 +24,7 @@
               element group-logo {text}?,              # Eg images/group-logo.gif
               element host-url {xsd:anyURI}?,          # Eg Sourceforge URL
               element host-logo {text}?,
  -            element year {text}?,               # Used in Copyright
  +            element year {text}?,                    # Used in Copyright
               element vendor {text}?,                  # Used in Copyright
               trail?,                                  # 'Breadcrumbs' trail in skins that support it
               element toc {(attribute level {text})}?, # toc generation
  
  
  
  1.33      +13 -8     xml-forrest/src/resources/skins/common/xslt/html/document2html.xsl
  
  Index: document2html.xsl
  ===================================================================
  RCS file: /home/cvs/xml-forrest/src/resources/skins/common/xslt/html/document2html.xsl,v
  retrieving revision 1.32
  retrieving revision 1.33
  diff -u -r1.32 -r1.33
  --- document2html.xsl	12 Sep 2003 10:25:55 -0000	1.32
  +++ document2html.xsl	12 Sep 2003 19:07:31 -0000	1.33
  @@ -39,7 +39,8 @@
         <xsl:otherwise>2</xsl:otherwise>
       </xsl:choose>
     </xsl:variable>
  -    
  +
  +  <xsl:param name="dynamic-page" select="'false'"/>  
     <xsl:param name="notoc"/>
     <xsl:param name="path"/>
     <xsl:param name="obfuscate-mail-links" select="'false'"/>
  @@ -129,22 +130,26 @@
   
     <!-- Generates the PDF link -->
     <xsl:template name="pdflink">
  -    <xsl:if test="not($config/disable-pdf-link) or $disable-pdf-link = 'false'"> 
  -      <td align="center" width="40" nowrap="nowrap"><a href="{$filename-noext}.pdf" class="dida">
  +    <xsl:if test="$dynamic-page='false'">
  +      <xsl:if test="not($config/disable-pdf-link) or $disable-pdf-link = 'false'"> 
  +        <td align="center" width="40" nowrap="nowrap"><a href="{$filename-noext}.pdf" class="dida">
             <img class="skin" src="{$skin-img-dir}/pdfdoc.gif" alt="PDF"/><br/>
             PDF</a>
  -      </td>
  +        </td>
  +      </xsl:if>
       </xsl:if>
     </xsl:template>
     
   
     <!-- Generates the XML link -->
     <xsl:template name="xmllink">
  -    <xsl:if test="$disable-xml-link = 'false'">
  -      <td align="center" width="40" nowrap="nowrap"><a href="{$filename-noext}.xml" class="dida">
  +    <xsl:if test="$dynamic-page='false'">
  +      <xsl:if test="$disable-xml-link = 'false'">
  +        <td align="center" width="40" nowrap="nowrap"><a href="{$filename-noext}.xml" class="dida">
             <img class="skin" src="{$skin-img-dir}/xmldoc.gif" alt="xml"/><br/>
             xml</a>
  -      </td>
  +        </td>
  +      </xsl:if>
       </xsl:if>
     </xsl:template>
     
  
  
  
  1.23      +62 -12    xml-forrest/src/resources/skins/forrest-site/xslt/html/site2xhtml.xsl
  
  Index: site2xhtml.xsl
  ===================================================================
  RCS file: /home/cvs/xml-forrest/src/resources/skins/forrest-site/xslt/html/site2xhtml.xsl,v
  retrieving revision 1.22
  retrieving revision 1.23
  diff -u -r1.22 -r1.23
  --- site2xhtml.xsl	2 Sep 2003 13:13:21 -0000	1.22
  +++ site2xhtml.xsl	12 Sep 2003 19:07:31 -0000	1.23
  @@ -83,10 +83,59 @@
           <xsl:comment>================= end Project Logo ==================</xsl:comment>
   
           <xsl:comment>================= start Search ==================</xsl:comment>
  +	<!-- Display search box if lucene-search or google-search enabled -->
  +	<!-- Lucene has precedence over Google (if lucene set, don't use Google) -->
  +	<xsl:variable name="lucene">
  +	  <xsl:choose>
  +	    <xsl:when test="(not($config/disable-lucene) or
  +            $config/disable-lucene='false')">true</xsl:when>
  +	    <xsl:otherwise>false</xsl:otherwise>
  +	  </xsl:choose>
  +	</xsl:variable>
  +	<xsl:variable name="google">
  +	  <xsl:choose>
  +	    <xsl:when test="$lucene='true'">false</xsl:when>
  +	    <xsl:when test="not($config/disable-search) or $config/disable-search='false'
  +	    and $config/searchsite-domain and $config/searchsite-name">true</xsl:when>
  +	    <xsl:otherwise>false</xsl:otherwise>
  +	  </xsl:choose>
  +	</xsl:variable>	
  +
           <td bgcolor="{$header-color}" rowspan="2" valign="top">
  -          <xsl:if test="not($config/disable-search) or
  -            $config/disable-search='false' and $config/searchsite-domain and
  -            $config/searchsite-name">
  +          <xsl:if test="$lucene='true'">
  +	    <xsl:variable name="search-url" select="concat(string($root), 'search.cmd')"/>
  +	     <form method="get" action="{$search-url}">
  +              <table bgcolor="{$menu-border}" cellpadding="0" cellspacing="0" border="0" summary="search">
  +                <tr>
  +                  <td colspan="3"><img class="spacer" src="{$spacer}" alt="" width="1" height="10" /></td>
  +                </tr>
  +                <tr>
  +                  <td><img class="spacer" src="{$spacer}" alt="" width="1" height="1" /></td>
  +                  <td nowrap="nowrap">
  +                    <input type="text" id="query" name="query" size="15"/>
  +                    <img class="spacer" src="{$spacer}" alt="" width="5" height="1" />
  +		    <input type="submit" value="Search" name="Search"/> <br />
  +		    <font color="white" size="2" face="Arial, Helvetica, Sans-serif">
  +		    <xsl:text>Powered by Apache Lucene</xsl:text></font>
  +
  +	            <!-- setting search options off for the moment -->
  +		    <!--
  +		    <input type="radio" name="web" value="web"/>web site&#160;&#160;
  +		    <input type="radio" name="mail" value="mail"/>mail lists
  +		    -->
  +                  </td>
  +                  <td><img class="spacer" src="{$spacer}" alt="" width="1" height="1" /></td>
  +                </tr>
  +                <tr>
  +                  <td><img src="{$skin-img-dir}/search-left.gif" width="9" height="10" border="0" alt="" /></td>
  +                  <td><img class="spacer" src="{$spacer}" alt="" width="1" height="1" /></td>
  +                  <td><img src="{$skin-img-dir}/search-right.gif" width="9" height="10" border="0" alt="" /></td>
  +                </tr>
  +              </table>
  +            </form>
  +          </xsl:if>
  +
  +          <xsl:if test="$google='true'">
               <form method="get" action="http://www.google.com/search" target="_blank">
                 <table bgcolor="{$menu-border}" cellpadding="0" cellspacing="0" border="0" summary="search">
                   <tr>
  @@ -98,15 +147,16 @@
                       <input type="hidden" name="sitesearch" value="{$config/searchsite-domain}"/>
                       <input type="text" id="query" name="q" size="15"/>
                       <img class="spacer" src="{$spacer}" alt="" width="5" height="1" />
  -                    <input type="submit" value="Search" name="Search"/>
  -                    <br />
  -                    <font color="white" size="2" face="Arial, Helvetica, Sans-serif">
  -                      the <xsl:value-of select="$config/searchsite-name"/> site
  -                      <!-- setting search options off for the moment -->
  -                      <!--
  -                      <input type="radio" name="web" value="web"/>web site&#160;&#160;<input type="radio" name="mail" value="mail"/>mail lists
  -                      -->
  -                    </font>
  +		    <input type="submit" value="Search" name="Search"/><br />
  +		    <font color="white" size="2" face="Arial, Helvetica, Sans-serif">
  +		    <xsl:text>the </xsl:text><xsl:value-of select="$config/searchsite-name"/><xsl:text> site</xsl:text>
  +	            </font>
  +
  +		    <!-- setting search options off for the moment -->
  +		    <!--
  +		    <input type="radio" name="web" value="web"/>web site&#160;&#160;
  +		    <input type="radio" name="mail" value="mail"/>mail lists
  +		    -->
                     </td>
                     <td><img class="spacer" src="{$spacer}" alt="" width="1" height="1" /></td>
                   </tr>
  
  
  
  1.1                  xml-forrest/src/scratchpad/src/java/org/apache/forrest/search/ForrestDocument.java
  
  Index: ForrestDocument.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   *
   * Copyright (c) 2001, 2002 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache Forrest" and "Apache Software Foundation" must
   *    not be used to endorse or promote products derived from this
   *    software without prior written permission. For written
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    nor may "Apache" appear in their name, without prior written
   *    permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation and was
   * originally based on software copyright (c) 1999, International
   * Business Machines, Inc., http://www.apache.org.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  package org.apache.forrest.search;
  
  import java.io.File;
  import java.util.HashMap;
  import org.apache.lucene.document.*;
  
  /**
   * Utility class to make Lucene Documents from Forrest Documents
   * @author Ramon Prades [RPR]
   * @version $Id: ForrestDocument.java,v 1.1 2003/09/12 19:07:31 cheche Exp $
   */
  public class ForrestDocument {
  
    /**
     * Makes the Lucene document asking the parser to extract
     * the relevant information.
     */
    public static Document document(File file) {
      // Instantiate a parser for this file
      Document doc = null;
      ForrestDocumentSAXParser parser = new ForrestDocumentSAXParser();
      try {
        HashMap results = parser.parseDocument(file);
        doc = processInfo(file, results);
      }
      catch (Exception ex) {
        // Not a forrest doc
      }
      return doc;
    }
  
    /**
     * Process the results returned from the parser and creates the
     * Lucene document
     */
    private static Document processInfo(File file, HashMap results) {
      Document doc = new Document();
      // Get info
      String docTitle = (String) getFromResults("title", results);
      String docSummary = (String) getFromResults("abstract", results);
      String docAuthor = (String) getFromResults("author", results);
      String docContents = (String) getFromResults("body", results);
      // Index and store title and summary
      doc.add(Field.Text("title", docTitle));
      doc.add(Field.Text("summary", docSummary));
      doc.add(Field.Text("author", docAuthor));
      // Index but don't store contents
      doc.add(Field.UnStored("contents", docTitle + " " + docSummary + " " + docContents));
      return doc;
    } // document
  
    /*
     * Utility method to extract a key from a hashmap
     */
    private static Object getFromResults(String key, HashMap results) {
      if (results.containsKey(key)) {
        return results.get(key);
      } else {
        return "";
      }
    } // getFromResults  }
  
  } // Class ForrestDocument
  
  
  1.1                  xml-forrest/src/scratchpad/src/java/org/apache/forrest/search/ForrestDocumentSAXParser.java
  
  Index: ForrestDocumentSAXParser.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   *
   * Copyright (c) 2001, 2002 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache Forrest" and "Apache Software Foundation" must
   *    not be used to endorse or promote products derived from this
   *    software without prior written permission. For written
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    nor may "Apache" appear in their name, without prior written
   *    permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation and was
   * originally based on software copyright (c) 1999, International
   * Business Machines, Inc., http://www.apache.org.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  package org.apache.forrest.search;
  
  import org.apache.xerces.parsers.SAXParser;
  import java.io.*;
  import org.xml.sax.*;
  import org.xml.sax.helpers.*;
  import org.xml.sax.ext.LexicalHandler;
  import java.util.Vector;
  import java.util.HashMap;
  
  /**
   * <p>Parses a Forrest Document and extracts the information to use when
   * generating Lucene indexes.</p>
   * <p>The parser scans the document searching for a number of tags. When a match
   * is found, it buffers all the text contained in the full subtree. When the parser
   * is buffering text, it ignores all tags and just keeps the text.</p>
   * <p>As an example consider the following document:</p>
   * <code>
   * <pre>
   * &lt;document&gt;
   * &lt;header&gt;
   * &lt;title&gt;The title&lt;/title&gt;
   * &lt;abstract&gt;An example&lt;/abstract&gt;
   * &lt;/header&gt;
   * &lt;body&gt;
   * &lt;section&gt;
   * &lt;title&gt;The Section&lt;/title&gt;
   * &lt;p&gt;Some text with &lt;strong&gt;embedded&lt;/strong&gt; tags&lt;/p&gt;
   * &lt;section&gt;
   * &lt;/body&gt;
   * &lt;/document&gt;
   * </pre>
   * </code>
   * <p>If the parser is applied to <code>body</code> the result will be
   * "The Section Some text with embedded tags". This permits the parser to generate
   * fields with the full content of the body, so it can be indexed and searched later.</p>
   * <p>If the parser now checks for <code>title</code> and <code>body</code> the
   * results will be "The title" for <code>title</code> and the same as above for <code>body</code>.
   * This demosntrates the parser is ignoring the <code>title</code> inside the
   * <code>body</code>, since while the parser is buffering <code>body</code> is
   * ignoring all the tags. This feature is useful to capture information inside
   * the header.</p>
   * <p>This is all what is needed to pass the information to Lucene, and by using this
   * algorithm the class gets quite simple.</p>
   *
   * <p><em>(Hope my English it's not too bad ;-)</em></p>
   *
   * @author Ramon Prades [RPR]
   * @version $Id: ForrestDocumentSAXParser.java,v 1.1 2003/09/12 19:07:31 cheche Exp $
   */
  
  public class ForrestDocumentSAXParser extends DefaultHandler {
  
    // Parser configuration constants
    static final String DEFAULT_PARSER_NAME = "org.apache.xerces.parsers.SAXParser";
    static final String VALIDATION_FEATURE_ID = "http://xml.org/sax/features/validation";
    static final String EXTERNAL_DTD_FEATURE_ID = "http://apache.org/xml/features/nonvalidating/load-external-dtd";
  
  
    // List with the tags to capture
    static final String[] FORREST_HEADER_INDEXERS = {"title","abstract","body"};
    static String docAuthors = "";
    // Control variables
    XMLReader parser = null;
    HashMap results = null;
    String currentElement = "";
    StringBuffer textBuffer = new StringBuffer();
    Vector tags = null;
    boolean buffering = false;
    boolean isForrest = false;
  
    /**
     * Constructor. Initiliazes the parser.
     */
    public ForrestDocumentSAXParser() {
      super();
      // Load the list of interesting tags in a vector for later processing
      tags = new Vector();
      for (int i=0; i<FORREST_HEADER_INDEXERS.length; i++) {
        tags.add(FORREST_HEADER_INDEXERS[i]);
      }
      // Instantiate the SAX parser
      try {
        parser = XMLReaderFactory.createXMLReader(DEFAULT_PARSER_NAME);
        parser.setFeature(VALIDATION_FEATURE_ID, false);
        parser.setFeature(EXTERNAL_DTD_FEATURE_ID, false);
        parser.setContentHandler(this);
        parser.setErrorHandler(this);
      } catch (SAXException ex) {
        System.err.println("Error getting the parser (" + ex.getMessage() + ")");
      }
    } // Constructor
  
    /**
     * Gets a parser and parses the selected document
     * @param fileName Forrest document file name
     */
    public HashMap parseDocument(String fileName) throws SAXException {
      try {
        parser.parse(new InputSource(fileName));
      } catch (IOException ex) {
        ex.printStackTrace();
      }
      return results;
    } // parseDocument
  
    /**
     * Gets a parser and parses the selected document
     * @param file Forrest document file
     */
    public HashMap parseDocument(File file) throws SAXException {
      try {
        parser.parse(new InputSource(new java.io.FileInputStream(file)));
      } catch (IOException ex) {
        ex.printStackTrace();
      }
      return results;
    } // parseDocument
  
    /**
     * Gets the results
     * @return
     */
    public HashMap getResults() {
        return results;
    } // getResults
  
    /**
     * Triggered when a new document is about to be parsed
     */
    public void startDocument() {
      // Reset control variables
      textBuffer.setLength(0);
      results = new HashMap();
      isForrest = false;
      docAuthors = "";
    } // startDocument
  
    /**
     * Saves authors when document fully parsed
     */
    public void endDocument() {
      results.put("author", docAuthors);
    }
  
    /**
     * Triggered when a new element is about to be parsed
     */
    public void startElement(String uri, String localName, String qName, Attributes attributes)
        throws SAXException  {
      // Check the new tag only when not buffering
      if (!buffering) {
        // Check the root element to see if the document is a Forrest one
        if (!isForrest && (!localName.equals("document"))) {
          results = null;
          // If not forrest, throw an exception to stop parsing (speed matters!)
          throw new SAXException("The document is not a Forrest document!");
        }
        // Is Forrest, so carry on processing
        isForrest = true;
        // Check "person". Here we want the attribute "@name"
        if (localName.equals("person")) {
          String separator = "";
          if (docAuthors!=null && docAuthors.length()>0) {
            separator = ";";
          }
          docAuthors += separator + attributes.getValue("name");
        } else if (tags.contains(localName)) {
          currentElement = localName;
          buffering = true;
        }
      }
    } // startElement
  
    /**
     * End of element detected. If the closing element is the one the parser is
     * bufferig, store the text, otherwise don't do anything
     */
    public void endElement(String uri, String localName, String qName) {
      if (buffering) {
       if (localName.equals(currentElement)) {
         buffering = false;
         results.put(currentElement, textBuffer.toString());
         textBuffer.setLength(0); // reset buffer
       } else {
         // add an extra space to avoid the following case:
         // <body>
         //   <section>
         //     <title>A title</title>
         //     <p>A paragraph</p>
         //   </section>
         // </body>
         // Unless an extra space is added the result would be: "A titleA paragraph"
         textBuffer.append(' ');
       }
      }
    } // endElement
  
    /**
     * Buffer the parsed character when "doCapture" tells so.
     */
    public void characters(char[] cbuf, int start, int len) {
      if (buffering) {
        textBuffer.append(cbuf, start, len);
      }
    } // characters
  
  } // ForrestDocumentSAXParser
  
  
  1.1                  xml-forrest/src/scratchpad/src/java/org/apache/forrest/search/ForrestIndexer.java
  
  Index: ForrestIndexer.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   *
   * Copyright (c) 2001, 2002 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache Forrest" and "Apache Software Foundation" must
   *    not be used to endorse or promote products derived from this
   *    software without prior written permission. For written
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    nor may "Apache" appear in their name, without prior written
   *    permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation and was
   * originally based on software copyright (c) 1999, International
   * Business Machines, Inc., http://www.apache.org.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  package org.apache.forrest.search;
  
  import java.io.*;
  import java.net.*;
  import java.util.*;
  import java.lang.StringBuffer;
  
  import org.apache.lucene.analysis.standard.*;
  import org.apache.lucene.document.*;
  import org.apache.lucene.index.*;
  import org.apache.lucene.util.Arrays;
  
  /**
   * <p>Indexes all xml forrest documents below a given directory.</p>
   * <p>Parametres:</p>
   * <ul>
   * <li>
   * <strong><code>-index index_directory</code></strong> Directory where
   * the index is to be created
   * </li>
   * <li>
   * <strong><code>root_directory</code></strong> forrest 'xdocs' directory
   * </li>
   * </ul>
   * <h3>Current Limitations/todo</h3>
   * <ul>
   * <li>This version indexes Forrest XML documents only. Would be nice if PDF and
   * HTML could be added.</li>
   * <li>FAQ and TODO aren't indexed. Add support to that.</li>
   * <li>Full index created every time. Create sort of incremental indexing.</li>
   * <li>Could be a good idea to create a list of "reserved" filenames (i.e. book.xml
   * or status.xml) and force the indexer to skip them.</li>
   *
   * @author Ramon Prades [RPR]
   * @version CVS $Id: ForrestIndexer.java,v 1.1 2003/09/12 19:07:31 cheche Exp $
   */
  public class ForrestIndexer {
  
    // Info about the class itself
    private static final String VERSION = "Version 0.21 (2003-08-08)";
    private static final String DIVIDER =
        "==============================================================================";
    private static final String BANNER = "ForrestIndexer (Powered by Lucene) " + VERSION;
    private static final String COPYRIGHT =
        "Copyright (c) 2001, 2003 The Apache Software Foundation.  All rights reserved.";
    private static final String USAGE =
        "ForrestIndexer [-index <index_directory>] <root_directory>";
  
    // Some vars
    private static IndexReader reader; // Existing index
    private static IndexWriter writer; // New index being built
    private static String rootPath = "";
  
    /**
     * Main method. See parametres at class javadoc.
     */
    public static void main(String[] argv) {
      try {
        String index = "";
        boolean create = true;
        File root = null;
        if (argv.length == 0) {
          System.err.println("Usage: " + USAGE);
          return;
        }
  
        // Get parametres from args
        for (int i = 0; i < argv.length; i++) {
          if (argv[i].equals("-index")) { // parse -index option
            index = argv[++i];
          } else if (i != argv.length - 1) {
            System.err.println("Usage: " + USAGE);
            return;
          } else {
            root = new File(argv[i]);
          }
        }
        // Debugging
        // index = "C:/dev/uimlsite/build/webapp/index";
        // root = new File("C:/dev/uimlsite/src/documentation/content/xdocs");
  
        // Print banner
        System.out.println(DIVIDER);
        System.out.println(BANNER);
        System.out.println(COPYRIGHT);
        System.out.println(DIVIDER);
        System.out.println("");
        rootPath = root.getPath().trim();
        System.out.println("Source Directory: " + rootPath);
        System.out.println("Index Directory: " + index);
        System.out.println("");
  
        Date start = new Date();
        writer = new IndexWriter(index, new StandardAnalyzer(), create);
        writer.maxFieldLength = 1000000;
        indexDocs(root); // add new docs
        System.out.print("Index created! - Total milliseconds ");
        System.out.println(new Date().getTime() - start.getTime());
        System.out.println("");
  
        System.out.println("Optimizing index...");
        writer.optimize();
        writer.close();
        System.out.print("Index optimized! - Total milliseconds ");
        System.out.println(new Date().getTime() - start.getTime());
      } catch (Exception e) {
        System.err.println(" Exception in " + e.getClass() +
                           "\n with message: " + e.getMessage());
        e.printStackTrace();
      }
    } // main
  
    /*
     * Create the index
     */
    private static void indexDocs(File file) {
      if (file.isDirectory()) { // if a directory
        String[] files = file.list(); // list its files
        Arrays.sort(files); // sort the files
        for (int i = 0; i < files.length; i++) { // recursively index them
          indexDocs(new File(file, files[i]));
        }
      } else if (file.getPath().endsWith(".xml")) { // index .txt files
        String filePath = getRelativePath(file.getPath(), rootPath);
        System.out.print("Indexing ... " + filePath);
        Document doc = ForrestDocument.document(file);
        if (doc == null) {
          System.out.println(" [Ignored]");
        } else {
          try {
            // Add last modified and path
            doc.add(Field.Keyword("modified", new Long(file.lastModified()).toString()));
            doc.add(Field.Keyword("path", filePath));
            writer.addDocument(doc); // add docs unconditionally
          } catch (IOException ex) {
            System.out.println(" [Error: " + ex.getMessage() + "]");
          }
          System.out.println(" [Done]");
        }
      }
    } // indexDocs
  
    /*
     * Utility method to calculate the relative path of a file
     */
    private static String getRelativePath(String filePath, String rootPath) {
      return filePath.substring(rootPath.length()+1);
    } // getRelativePath
  } // Class ForrestLuceneIndexer
  
  
  
  1.1                  xml-forrest/src/scratchpad/src/java/org/apache/forrest/search/ForrestSearchRenderer.java
  
  Index: ForrestSearchRenderer.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   *
   * Copyright (c) 2001, 2002 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache Forrest" and "Apache Software Foundation" must
   *    not be used to endorse or promote products derived from this
   *    software without prior written permission. For written
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    nor may "Apache" appear in their name, without prior written
   *    permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation and was
   * originally based on software copyright (c) 1999, International
   * Business Machines, Inc., http://www.apache.org.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  package org.apache.forrest.search;
  
  import org.w3c.dom.*;
  import javax.xml.transform.*;
  import javax.xml.transform.dom.*;
  import java.io.*;
  import org.apache.xerces.dom.*;
  import javax.xml.transform.sax.*;
  import javax.xml.transform.stream.StreamSource;
  import org.xml.sax.XMLReader;
  import org.xml.sax.helpers.XMLReaderFactory;
  
  
  /**
   * <p>Title: </p>
   * <p>Description: </p>
   * <p>Copyright: Copyright (c) 2003</p>
   * <p>Company: </p>
   * @author not attributable
   * @version 1.0
   */
  
  public class ForrestSearchRenderer {
    Transformer transformer = null;
    Transformer transformer2 = null;
    private String skinconf = "";
    private static final String doc2html = "document2html.xsl";
    private static final String site2xhtml = "site2xhtml.xsl";
  
    public ForrestSearchRenderer(String rootPath, String skin) {
      String fullPath = rootPath + "/skins/" + skin + "/xslt/html/";
      // Instantiate  a TransformerFactory.
      TransformerFactory tFactory = TransformerFactory.newInstance();
      try {
        skinconf = rootPath + "/skinconf.xml";
        transformer = tFactory.newTransformer
            (new javax.xml.transform.stream.StreamSource(fullPath + doc2html));
        transformer.setParameter("config-file", skinconf);
        transformer.setParameter("notoc", "true");
        transformer.setParameter("dynamic-page", "true");
        transformer2 = tFactory.newTransformer
            (new javax.xml.transform.stream.StreamSource(fullPath + site2xhtml));
        transformer2.setParameter("config-file", skinconf);
      } catch (TransformerConfigurationException ex) {
        System.err.println("Transformer Config exception");
      }
    } // Constructor
  
    public String render(Document dom) {
      String page = null;
  
      try {
        Document doc = new DocumentImpl();
        Element root = doc.createElement("site");
        DOMResult domResult = new DOMResult(root);
        transformer.transform(new DOMSource(dom.getDocumentElement()), domResult);
  
        OutputStream result = new ByteArrayOutputStream();
        javax.xml.transform.stream.StreamResult theResult = new javax.xml.transform.stream.StreamResult(result);
  
        transformer2.transform(new DOMSource(domResult.getNode()), theResult);
        page = result.toString();
      } catch (TransformerException ex) {
        ex.printStackTrace();
      }
  
      return page;
    } // render
  
  }
  
  
  1.1                  xml-forrest/src/scratchpad/src/java/org/apache/forrest/search/ForrestSearchServlet.java
  
  Index: ForrestSearchServlet.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   *
   * Copyright (c) 2001, 2002 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache Forrest" and "Apache Software Foundation" must
   *    not be used to endorse or promote products derived from this
   *    software without prior written permission. For written
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    nor may "Apache" appear in their name, without prior written
   *    permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation and was
   * originally based on software copyright (c) 1999, International
   * Business Machines, Inc., http://www.apache.org.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  package org.apache.forrest.search;
  
  import javax.servlet.*;
  import javax.servlet.http.*;
  import java.io.*;
  import java.util.*;
  import org.apache.xalan.transformer.*;
  import org.w3c.dom.*;
  import javax.xml.transform.*;
  import javax.xml.transform.dom.*;
  import javax.xml.transform.stream.*;
  import javax.xml.transform.*;
  import java.net.*;
  
  /**
   * <p>This sevlet processes all search request inside a Forrest site.</p>
   * @author Ramon Prades [RPR]
   * @version $Id: ForrestSearchServlet.java,v 1.1 2003/09/12 19:07:31 cheche Exp $
   */
  public class ForrestSearchServlet extends HttpServlet {
  
    private static final String CONTENT_TYPE = "text/html";
    private ForrestSearcher searcher = null;
    private static ForrestSearchRenderer renderer = null;
    private String servletPath = "";
    private String indexDir = "";     // Full path to lucene index directory
    private String skin = "";         // Skin configured
    private String searchPage = "/search.html";
    private static StringBuffer cache = null;
  
  
    /**
     * Prepares the servlet
     * @throws ServletException
     */
    public void init() throws ServletException {
      servletPath = this.getServletContext().getRealPath("");
      // FIXME: indexDir is hardcoded
      indexDir = servletPath + "/lucene-index";
      searcher = new ForrestSearcher();
      String skin = this.getInitParameter("project-skin");
      renderer = new ForrestSearchRenderer(servletPath, skin);
   } // init
  
    /**
     * Process the HTTP Get request
     */
    public void doGet(HttpServletRequest request, HttpServletResponse response)
        throws ServletException, IOException {
      response.setContentType("text/html");
      PrintWriter out = response.getWriter();
      // Query string should be in parametre "query".
      // A valid forrest document is returned.
      String query = request.getParameter("query");
      // Render the resulting document. Ideally the document
      // should be passed to Cocoon, but for the time being
      // use the renderer
      Document doc = searcher.search(indexDir, query);
      String page = renderer.render(doc);
      out.print(page);
    }
  
    //Clean up resources
    public void destroy() {
    }
  
  } // ForrestSearchServlet
  
  
  
  1.1                  xml-forrest/src/scratchpad/src/java/org/apache/forrest/search/ForrestSearcher.java
  
  Index: ForrestSearcher.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   *
   * Copyright (c) 2001, 2002 The Apache Software Foundation.  All rights
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Apache Forrest" and "Apache Software Foundation" must
   *    not be used to endorse or promote products derived from this
   *    software without prior written permission. For written
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    nor may "Apache" appear in their name, without prior written
   *    permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation and was
   * originally based on software copyright (c) 1999, International
   * Business Machines, Inc., http://www.apache.org.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  package org.apache.forrest.search;
  
  import java.io.IOException;
  import java.io.StringReader;
  import java.util.*;
  import org.apache.lucene.analysis.standard.StandardAnalyzer;
  import org.apache.lucene.queryParser.QueryParser;
  import org.apache.lucene.queryParser.*;
  import org.apache.lucene.search.*;
  import org.apache.lucene.index.Term;
  import org.apache.xerces.dom.DocumentImpl;
  import org.apache.xerces.dom.DocumentTypeImpl;
  import org.w3c.dom.*;
  
  /**
   * <p>Searches the index for a given query string.</p>
   * @author Ramon Prades [RPR]
   * @version $Id: ForrestSearcher.java,v 1.1 2003/09/12 19:07:31 cheche Exp $
   */
  public class ForrestSearcher {
    public ForrestSearcher() {
    }
  
    /**
     * Searches "queryString" in "indexDir" and returns a Forrest Document (v1.2)
     * with the list of matches.
     * @param indexDir Directory with the Lucene index
     * @param queryString String to search
     * @return Forrest document
     */
    public Document search(String indexDir, String queryString) {
      // Create a Forrest document with the results
      DOMImplementation domImpl = new org.apache.xerces.dom.DOMImplementationImpl();
      DocumentType docType =
          domImpl.createDocumentType("document", "-//APACHE//DTD Documentation V1.1//EN", "document-v12.dtd");
      Document doc = domImpl.createDocument("", "document", docType);
      Element rootNode = doc.getDocumentElement();
      Element headerNode = doc.createElement("header");
      headerNode.appendChild(this.makeElement(doc, "title", "Search Results"));
      rootNode.appendChild(headerNode);
      Element bodyNode = doc.createElement("body");
      rootNode.appendChild(bodyNode);
  
      // Element sectionNode = doc.createElement("section");
      // bodyNode.appendChild(sectionNode);
      // sectionNode.appendChild(makeElement(doc, "title", "List of Matches"));
  
      IndexSearcher searcher = null;
      try {
        searcher = new IndexSearcher(indexDir);
      } catch (IOException ex) {
        System.err.println("Error: Index dir not found!");
        ex.printStackTrace();
      }
      Hits hits = null;
      int count = 0;
      if (queryString==null || queryString.length()==0) {
        Element pNode = doc.createElement("p");
        String txt = "Please enter a valid query";
        pNode.appendChild(doc.createTextNode(txt));
        bodyNode.appendChild(pNode);
      } else {
        Query query = null;
        try {
          query = QueryParser.parse(queryString, "contents", new StandardAnalyzer());
        } catch (ParseException ex3) {
          System.out.println("QueryParser error!");
          ex3.printStackTrace();
        }
        try {
          hits = searcher.search(query);
        } catch (IOException ex1) {
          System.err.println("Error in search");
          ex1.printStackTrace();
        }
  
        // Build the section with the list of matches
        count = hits.length();
        Element pNode = doc.createElement("p");
        String txt = "";
        if (count == 0) {
          txt = "No documents found matching: ";
          pNode.appendChild(doc.createTextNode(txt));
          Element emNode = doc.createElement("em");
          pNode.appendChild(emNode);
          emNode.appendChild(doc.createTextNode(queryString));
          bodyNode.appendChild(pNode);
        } else {
          if (count == 1) {
            txt = count + " document found matching: ";
          } else {
            txt = count + " documents found matching: ";
          }
          pNode.appendChild(doc.createTextNode(txt));
          Element emNode = doc.createElement("em");
          pNode.appendChild(emNode);
          emNode.appendChild(doc.createTextNode(queryString));
          //pNode.appendChild(doc.createElement("em").appendChild(doc.createTextNode(queryString)));
          bodyNode.appendChild(pNode);
          Element listNode = doc.createElement("ul");
          // sectionNode.appendChild(listNode);
          bodyNode.appendChild(listNode);
  
          for (int i = 0; i < count; i++) {
            try {
              String title = hits.doc(i).get("title");
              String summary = hits.doc(i).get("summary");
              String authors = hits.doc(i).get("author");
              String path = hits.doc(i).get("path").replaceAll(".xml", ".html");
              float score = hits.score(i);
              Date modified = new Date(new Long(hits.doc(i).get("modified")).
                                       longValue());
              java.text.DateFormat formatter = new java.text.SimpleDateFormat();
              String strModified = formatter.format(modified);
  
              Element listItem = doc.createElement("li");
              listNode.appendChild(listItem);
              Element strongNode = doc.createElement("strong");
              listItem.appendChild(strongNode);
              Element linkNode = doc.createElement("link");
              linkNode.setAttribute("href", path);
              linkNode.appendChild(doc.createTextNode(title));
              strongNode.appendChild(linkNode);
  
              String scoreText = " [" + score + "]";
              listItem.appendChild(doc.createTextNode(scoreText));
              listItem.appendChild(doc.createElement("br"));
  
              if (summary != null && summary.length() > 0) {
                listItem.appendChild(doc.createTextNode(summary));
                listItem.appendChild(doc.createElement("br"));
              }
              Element lastLine = doc.createElement("em");
              listItem.appendChild(lastLine);
              lastLine.appendChild(doc.createTextNode("url: " + path));
              if (authors != null && authors.length() > 0) {
                lastLine.appendChild(doc.createTextNode(" - author: " + authors));
              }
              lastLine.appendChild(doc.createTextNode(" - last modified: " +
                  strModified));
              listItem.appendChild(doc.createElement("br"));
              listItem.appendChild(doc.createElement("br"));
  
            } catch (DOMException ex2) {
              System.err.println("DOM Error building results document (" +
                                 ex2.getMessage() + ")");
            } catch (IOException ex2) {
              System.err.println("IO Error building results document (" +
                                 ex2.getMessage() + ")");
            } catch (NumberFormatException ex2) {
              System.err.println("NUMBERFORMAT Error building results document (" +
                                 ex2.getMessage() + ")");
            }
          } // for
        } // if (count==0) ...
        } // if queryString not null
      return doc;
    } // search
  
    /*
     * Utility method to contruct a DOM element with no attributes and
     * ine text child
     */
    private Element makeElement(Document doc, String name, String text) {
      Element e = doc.createElement(name);
      e.appendChild(doc.createTextNode(text));
      return e;
    }
  } // ForrestSearcher

Re: cvs commit: xml-forrest/src/scratchpad/src/java/org/apache/forrest/search ForrestDocument.java ForrestDocumentSAXParser.java ForrestIndexer.java ForrestSearchRenderer.java ForrestSearchServlet.java ForrestSearcher.java

Posted by Juan Jose Pablos <ch...@che-che.com>.

Jeff Turner wrote:
>>  Added first version of Lucene integrated within Forrest.
>>  PR: FOR-9
>>  Submitted by:	Ramón Prades rprades@porcelanosa.com
> 
> 
> Has a JDK 1.4 dependency:
> 

I will add a note on the bug.

I notice that ForrestSearcher is a helper class.is that an issue? I 
remeber having a lot of problems with caching and helper classes.

Re: cvs commit: xml-forrest/src/scratchpad/src/java/org/apache/forrest/search ForrestDocument.java ForrestDocumentSAXParser.java ForrestIndexer.java ForrestSearchRenderer.java ForrestSearchServlet.java ForrestSearcher.java

Posted by Jeff Turner <je...@apache.org>.

On Fri, Sep 12, 2003 at 07:07:31PM -0000, cheche@apache.org wrote:
> cheche      2003/09/12 12:07:31
> 
>   Modified:    .        status.xml
>                src/resources/conf web.xml
>                src/resources/forrest-shbat forrest.build.xml
>                src/resources/fresh-site/src/documentation skinconf.xml
>                src/resources/schema/relaxng skinconf.rnc
>                src/resources/skins/common/xslt/html document2html.xsl
>                src/resources/skins/forrest-site/xslt/html site2xhtml.xsl
>   Added:       src/scratchpad/src/java/org/apache/forrest/search
>                         ForrestDocument.java ForrestDocumentSAXParser.java
>                         ForrestIndexer.java ForrestSearchRenderer.java
>                         ForrestSearchServlet.java ForrestSearcher.java
>   Log:
>   Added first version of Lucene integrated within Forrest.
>   PR: FOR-9
>   Submitted by:	Ramón Prades rprades@porcelanosa.com

Has a JDK 1.4 dependency:


scratchpad.compile:
Compiling 6 source files to /usr/serverlocal/src/apache/xml-forrest-cvs/build/scratchpad/classes
/usr/serverlocal/src/apache/xml-forrest-cvs/src/scratchpad/src/java/org/apache/forrest/search/ForrestSearcher.java:181: cannot resolve symbol
symbol  : method replaceAll  (java.lang.String,java.lang.String)
location: class java.lang.String
            String path = hits.doc(i).get("path").replaceAll(".xml", ".html");
                                  ^
1 error


--Jeff

Re: cvs commit: xml-forrest/src/scratchpad/src/java/org/apache/forrest/search ForrestDocument.java ForrestDocumentSAXParser.java ForrestIndexer.java ForrestSearchRenderer.java ForrestSearchServlet.java ForrestSearcher.java

Posted by Juan Jose Pablos <ch...@che-che.com>.

Jeff Turner wrote:
> 
> 
> Incidentally, do you know if these are necessary?  Filtering is
> deliberately switched off so that one day when the webapp reads content
> directly from src/documentation/content/xdocs, we don't break
> compatibility with everyone expecting their @tokens@ to be replaced.
> 
> 
> --Jeff


I only tidy that up filtering="false" is the default value on copy:

http://ant.apache.org/manual/CoreTasks/copy.html

That change does not change any behavior.

Cheche

Re: cvs commit: xml-forrest/src/scratchpad/src/java/org/apache/forrest/search ForrestDocument.java ForrestDocumentSAXParser.java ForrestIndexer.java ForrestSearchRenderer.java ForrestSearchServlet.java ForrestSearcher.java

Posted by Jeff Turner <je...@apache.org>.

On Fri, Sep 12, 2003 at 07:07:31PM -0000, cheche@apache.org wrote:
> cheche      2003/09/12 12:07:31
> 
>   Modified:    .        status.xml
>                src/resources/conf web.xml
>                src/resources/forrest-shbat forrest.build.xml
>                src/resources/fresh-site/src/documentation skinconf.xml
>                src/resources/schema/relaxng skinconf.rnc
>                src/resources/skins/common/xslt/html document2html.xsl
>                src/resources/skins/forrest-site/xslt/html site2xhtml.xsl
>   Added:       src/scratchpad/src/java/org/apache/forrest/search
>                         ForrestDocument.java ForrestDocumentSAXParser.java
>                         ForrestIndexer.java ForrestSearchRenderer.java
>                         ForrestSearchServlet.java ForrestSearcher.java
>   Log:
>   Added first version of Lucene integrated within Forrest.
>   PR: FOR-9
>   Submitted by:	Ramón Prades rprades@porcelanosa.com
....  
>   -    <copy todir="${project.ctxt-dir}/" filtering="on" overwrite="true">
>   +    <copy todir="${project.ctxt-dir}/" filtering="true" overwrite="true">
>        <!-- everything in the skins, except the images , and the sitemap -->
>          <fileset dir="${forrest.home}/context" >
>            <include name="*.xmap" />
>   @@ -443,7 +444,7 @@
>      </target>
>    
>      <target name="copy-resources" if="resources.present">
>   -    <copy toDir="${project.ctxt-dir}/resources" filtering="false" failonerror="false">
>   +    <copy toDir="${project.ctxt-dir}/resources" failonerror="false">
>          <fileset dir="${project.resources-dir}">
>            <exclude name="stylesheets"/>
>            <exclude name="images"/>

Incidentally, do you know if these are necessary?  Filtering is
deliberately switched off so that one day when the webapp reads content
directly from src/documentation/content/xdocs, we don't break
compatibility with everyone expecting their @tokens@ to be replaced.


--Jeff