You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by Mark Harwood <ma...@totalise.co.uk> on 2002/06/28 22:31:03 UTC

Please add to CVS: Improved HTMLParser.jj

I've added these features to the latest version of HTMLParser:

* Support for parsing metatags - new method getMetaTags()
* Fix to ignore inline <Style> tags

The "<Style>" fix sorts out the problem where a document's summary would end 
up consisting of just CSS declarations - these are now ignored in the same way 
as "<script>" declarations.
Tested out OK parsing over 1000 html docs

Thanks in advance
Mark


============CODE BEGINS ==============================

/* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2001 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation" and
 *    "Apache Lucene" must not be used to endorse or promote products
 *    derived from this software without prior written permission. For
 *    written permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    "Apache Lucene", nor may "Apache" appear in their name, without
 *    prior written permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */

// HTMLParser.jj

options {
  STATIC = false;
  OPTIMIZE_TOKEN_MANAGER = true;
  //DEBUG_LOOKAHEAD = true;
  //DEBUG_TOKEN_MANAGER = true;
}

PARSER_BEGIN(HTMLParser)

package org.apache.lucene.demo.html;

import java.io.*;
import java.util.Properties;

public class HTMLParser {
  public static int SUMMARY_LENGTH = 200;

  StringBuffer title = new StringBuffer(SUMMARY_LENGTH);
  StringBuffer summary = new StringBuffer(SUMMARY_LENGTH * 2);
  Properties metaTags=new Properties();
  String currentMetaTag="";
  int length = 0;
  boolean titleComplete = false;
  boolean inTitle = false;
  boolean inMetaTag = false;
  boolean inStyle = false;
  boolean inScript = false;
  boolean afterTag = false;
  boolean afterSpace = false;
  String eol = System.getProperty("line.separator");
  PipedReader pipeIn = null;
  PipedWriter pipeOut;

  public HTMLParser(File file) throws FileNotFoundException {
    this(new FileInputStream(file));
  }

  public String getTitle() throws IOException, InterruptedException {
    if (pipeIn == null)
      getReader();				  // spawn parsing thread
    while (true) {
      synchronized(this) {
	if (titleComplete || (length > SUMMARY_LENGTH))
	  break;
	wait(10);
      }
    }
    return title.toString().trim();
  }

  public Properties getMetaTags() throws IOException, InterruptedException {
    if (pipeIn == null)
      getReader();				  // spawn parsing thread
    while (true) {
      synchronized(this) {
	if (titleComplete || (length > SUMMARY_LENGTH))
	  break;
	wait(10);
      }
    }
    return metaTags;
  }


  public String getSummary() throws IOException, InterruptedException {
    if (pipeIn == null)
      getReader();				  // spawn parsing thread
    while (true) {
      synchronized(this) {
	if (summary.length() >= SUMMARY_LENGTH)
	  break;
	wait(10);
      }
    }
    if (summary.length() > SUMMARY_LENGTH)
      summary.setLength(SUMMARY_LENGTH);

    String sum = summary.toString().trim();
    String tit = getTitle();
    if (sum.startsWith(tit))
      return sum.substring(tit.length());
    else
      return sum;
  }

  public Reader getReader() throws IOException {
    if (pipeIn == null) {
      pipeIn = new PipedReader();
      pipeOut = new PipedWriter(pipeIn);

      Thread thread = new ParserThread(this);
      thread.start();				  // start parsing
    }

    return pipeIn;
  }

  void addToSummary(String text) {
    if (summary.length() < SUMMARY_LENGTH) {
      summary.append(text);
      if (summary.length() >= SUMMARY_LENGTH) {
	synchronized(this) {
	  notifyAll();
	}
      }
    }
  }

  void addText(String text) throws IOException {
    if (inScript)
      return;
    if (inStyle)
      return;
    if (inMetaTag)
    {
	metaTags.setProperty(currentMetaTag, text);
      	return;
    }
    if (inTitle)
      title.append(text);
    else {
      addToSummary(text);
      if (!titleComplete && !title.equals("")) {  // finished title
	synchronized(this) {
	  titleComplete = true;			  // tell waiting threads
	  notifyAll();
	}
      }
    }

    length += text.length();
    pipeOut.write(text);

    afterSpace = false;
  }

  void addSpace() throws IOException {
    if (inScript)
      return;
    if (!afterSpace) {
      if (inTitle)
	title.append(" ");
      else
	addToSummary(" ");

      String space = afterTag ? eol : " ";
      length += space.length();
      pipeOut.write(space);
      afterSpace = true;
    }
  }

//    void handleException(Exception e) {
//      System.out.println(e.toString());  // print the error message
//      System.out.println("Skipping...");
//      Token t;
//      do {
//        t = getNextToken();
//      } while (t.kind != TagEnd);
//    }
}

PARSER_END(HTMLParser)


void HTMLDocument() throws IOException :
{
  Token t;
}
{
//  try {
    ( Tag()         { afterTag = true; }
    | t=Decl()      { afterTag = true; }
    | CommentTag()  { afterTag = true; }
    | t=<Word>      { addText(t.image); afterTag = false; }
    | t=<Entity>    { addText(Entities.decode(t.image)); afterTag = false; }
    | t=<Punct>     { addText(t.image); afterTag = false; }
    | <Space>       { addSpace(); afterTag = false; }
    )* <EOF>
//  } catch (ParseException e) {
//    handleException(e);
//  }
}

void Tag() throws IOException :
{
  Token t1, t2;
  boolean inImg = false;
}
{
  t1=<TagName> {
    inTitle = t1.image.equalsIgnoreCase("<title"); // keep track if in <TITLE>
    inMetaTag = t1.image.equalsIgnoreCase("<META"); // keep track if in <META>
    inStyle = t1.image.equalsIgnoreCase("<STYLE"); // keep track if in <STYLE>
    inImg = t1.image.equalsIgnoreCase("<img");	  // keep track if in <IMG>
    if (inScript) {				  // keep track if in <SCRIPT>
      inScript = !t1.image.equalsIgnoreCase("</script");
    } else {
      inScript = t1.image.equalsIgnoreCase("<script");
    }
  }
  (t1=<ArgName>
   (<ArgEquals>
    (t2=ArgValue()				  // save ALT text in IMG tag
     {
       if (inImg && t1.image.equalsIgnoreCase("alt") && t2 != null)
         addText("[" + t2.image + "]");

    	if(inMetaTag &&
			(  t1.image.equalsIgnoreCase("name") ||
			   t1.image.equalsIgnoreCase("HTTP-EQUIV")
			)
	   && t2 != null)
	{
		currentMetaTag=t2.image.toLowerCase();
	}
    	if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 != null)
	{
		addText(t2.image);
	}
     }
    )?
   )?
  )*
  <TagEnd>
}

Token ArgValue() :
{
  Token t = null;
}
{
  t=<ArgValue>                              { return t; }
| LOOKAHEAD(2)
  <ArgQuote1> <CloseQuote1>                 { return t; }
| <ArgQuote1> t=<Quote1Text> <CloseQuote1>  { return t; }
| LOOKAHEAD(2)
  <ArgQuote2> <CloseQuote2>                 { return t; }
| <ArgQuote2> t=<Quote2Text> <CloseQuote2>  { return t; }
}


Token Decl() :
{
  Token t;
}
{
  t=<DeclName> ( <ArgName> | ArgValue() | <ArgEquals> )* <TagEnd>
  { return t; }
}


void CommentTag() :
{}
{
  (<Comment1> ( <CommentText1> )* <CommentEnd1>)
 |
  (<Comment2> ( <CommentText2> )* <CommentEnd2>)
}


TOKEN :
{
  < TagName:  "<" ("/")? ["A"-"Z","a"-"z"] (<ArgName>)? > : WithinTag
| < DeclName: "<"  "!"   ["A"-"Z","a"-"z"] (<ArgName>)? > : WithinTag

| < Comment1:  "<!--" > : WithinComment1
| < Comment2:  "<!" >   : WithinComment2

| < Word:     ( <LET> | <LET> (["+","/"])+ | <NUM> ["\""] |
                <LET> ["-","'"] <LET> | ("$")? <NUM> [",","."] <NUM> )+ >
| < #LET:     ["A"-"Z","a"-"z","0"-"9"] >
| < #NUM:     ["0"-"9"] >

| < Entity:   ( "&" (["A"-"Z","a"-"z"])+ (";")? | "&" "#" (<NUM>)+ (";")? ) >

| < Space:    (<SP>)+ >
| < #SP:      [" ","\t","\r","\n"] >

| < Punct:    ~[] > // Keep this last.  It is a catch-all.
}


<WithinTag> TOKEN:
{
  < ArgName:   (~[" ","\t","\r","\n","=",">","'","\""])
               (~[" ","\t","\r","\n","=",">"])* >
| < ArgEquals: "=" >  : AfterEquals
| < TagEnd:    ">" | "=>" >  : DEFAULT
}

<AfterEquals> TOKEN:
{
  < ArgValue:  (~[" ","\t","\r","\n","=",">","'","\""])
	       (~[" ","\t","\r","\n",">"])* > : WithinTag
}

<WithinTag, AfterEquals> TOKEN:
{
  < ArgQuote1: "'"  > : WithinQuote1
| < ArgQuote2: "\"" > : WithinQuote2
}

<WithinTag, AfterEquals> SKIP:
{
  < <Space> >
}

<WithinQuote1> TOKEN:
{
  < Quote1Text:  (~["'"])+ >
| < CloseQuote1: <ArgQuote1> > : WithinTag
}

<WithinQuote2> TOKEN:
{
  < Quote2Text:  (~["\""])+ >
| < CloseQuote2: <ArgQuote2> > : WithinTag
}


<WithinComment1> TOKEN :
{
  < CommentText1:  (~["-"])+ | "-" >
| < CommentEnd1:   "-->" > : DEFAULT
}

<WithinComment2> TOKEN :
{
  < CommentText2:  (~[">"])+ >
| < CommentEnd2:   ">" > : DEFAULT
}


--
To unsubscribe, e-mail:   <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>


Re: Please add to CVS: Improved HTMLParser.jj

Posted by Otis Gospodnetic <ot...@yahoo.com>.
Thanks for the contribution.

I haven't tested it (I don't have any ready code to do it), but I've
made a diff (attached for those who prefer that), looked over it, run
'ant demo', and got no errors, so if anyone sees any problems please
yell.

Otis


--- Mark Harwood <ma...@totalise.co.uk> wrote:
> I've added these features to the latest version of HTMLParser:
> 
> * Support for parsing metatags - new method getMetaTags()
> * Fix to ignore inline <Style></style> tags
> 
> The "<Style></style>" fix sorts out the problem where a document's
summary
> would end 
> up consisting of just CSS declarations - these are now ignored in the
> same way 
> as "<script>" declarations.
> Tested out OK parsing over 1000 html docs
> 
> Thanks in advance
> Mark
> 
> 
> ============CODE BEGINS ==============================
> 
> /*
> ====================================================================
>  * The Apache Software License, Version 1.1
>  *
>  * Copyright (c) 2001 The Apache Software Foundation.  All rights
>  * reserved.
>  *
>  * Redistribution and use in source and binary forms, with or without
>  * modification, are permitted provided that the following conditions
>  * are met:
>  *
>  * 1. Redistributions of source code must retain the above copyright
>  *    notice, this list of conditions and the following disclaimer.
>  *
>  * 2. Redistributions in binary form must reproduce the above
> copyright
>  *    notice, this list of conditions and the following disclaimer in
>  *    the documentation and/or other materials provided with the
>  *    distribution.
>  *
>  * 3. The end-user documentation included with the redistribution,
>  *    if any, must include the following acknowledgment:
>  *       "This product includes software developed by the
>  *        Apache Software Foundation (http://www.apache.org/)."
>  *    Alternately, this acknowledgment may appear in the software
> itself,
>  *    if and wherever such third-party acknowledgments normally
> appear.
>  *
>  * 4. The names "Apache" and "Apache Software Foundation" and
>  *    "Apache Lucene" must not be used to endorse or promote products
>  *    derived from this software without prior written permission.
> For
>  *    written permission, please contact apache@apache.org.
>  *
>  * 5. Products derived from this software may not be called "Apache",
>  *    "Apache Lucene", nor may "Apache" appear in their name, without
>  *    prior written permission of the Apache Software Foundation.
>  *
>  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
>  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
>  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
>  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
>  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
>  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> AND
>  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
>  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
>  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
>  * SUCH DAMAGE.
>  *
> ====================================================================
>  *
>  * This software consists of voluntary contributions made by many
>  * individuals on behalf of the Apache Software Foundation.  For more
>  * information on the Apache Software Foundation, please see
>  * <http://www.apache.org/>.
>  */
> 
> // HTMLParser.jj
> 
> options {
>   STATIC = false;
>   OPTIMIZE_TOKEN_MANAGER = true;
>   //DEBUG_LOOKAHEAD = true;
>   //DEBUG_TOKEN_MANAGER = true;
> }
> 
> PARSER_BEGIN(HTMLParser)
> 
> package org.apache.lucene.demo.html;
> 
> import java.io.*;
> import java.util.Properties;
> 
> public class HTMLParser {
>   public static int SUMMARY_LENGTH = 200;
> 
>   StringBuffer title = new StringBuffer(SUMMARY_LENGTH);
>   StringBuffer summary = new StringBuffer(SUMMARY_LENGTH * 2);
>   Properties metaTags=new Properties();
>   String currentMetaTag="";
>   int length = 0;
>   boolean titleComplete = false;
>   boolean inTitle = false;
>   boolean inMetaTag = false;
>   boolean inStyle = false;
>   boolean inScript = false;
>   boolean afterTag = false;
>   boolean afterSpace = false;
>   String eol = System.getProperty("line.separator");
>   PipedReader pipeIn = null;
>   PipedWriter pipeOut;
> 
>   public HTMLParser(File file) throws FileNotFoundException {
>     this(new FileInputStream(file));
>   }
> 
>   public String getTitle() throws IOException, InterruptedException {
>     if (pipeIn == null)
>       getReader();				  // spawn parsing thread
>     while (true) {
>       synchronized(this) {
> 	if (titleComplete || (length > SUMMARY_LENGTH))
> 	  break;
> 	wait(10);
>       }
>     }
>     return title.toString().trim();
>   }
> 
>   public Properties getMetaTags() throws IOException,
> InterruptedException {
>     if (pipeIn == null)
>       getReader();				  // spawn parsing thread
>     while (true) {
>       synchronized(this) {
> 	if (titleComplete || (length > SUMMARY_LENGTH))
> 	  break;
> 	wait(10);
>       }
>     }
>     return metaTags;
>   }
> 
> 
>   public String getSummary() throws IOException, InterruptedException
> {
>     if (pipeIn == null)
>       getReader();				  // spawn parsing thread
>     while (true) {
>       synchronized(this) {
> 	if (summary.length() >= SUMMARY_LENGTH)
> 	  break;
> 	wait(10);
>       }
>     }
>     if (summary.length() > SUMMARY_LENGTH)
>       summary.setLength(SUMMARY_LENGTH);
> 
>     String sum = summary.toString().trim();
>     String tit = getTitle();
>     if (sum.startsWith(tit))
>       return sum.substring(tit.length());
>     else
>       return sum;
>   }
> 
>   public Reader getReader() throws IOException {
>     if (pipeIn == null) {
>       pipeIn = new PipedReader();
>       pipeOut = new PipedWriter(pipeIn);
> 
>       Thread thread = new ParserThread(this);
>       thread.start();				  // start parsing
>     }
> 
>     return pipeIn;
>   }
> 
>   void addToSummary(String text) {
>     if (summary.length() < SUMMARY_LENGTH) {
>       summary.append(text);
>       if (summary.length() >= SUMMARY_LENGTH) {
> 	synchronized(this) {
> 	  notifyAll();
> 	}
>       }
>     }
>   }
> 
>   void addText(String text) throws IOException {
>     if (inScript)
>       return;
>     if (inStyle)
>       return;
>     if (inMetaTag)
>     {
> 	metaTags.setProperty(currentMetaTag, text);
>       	return;
>     }
>     if (inTitle)
>       title.append(text);
>     else {
>       addToSummary(text);
>       if (!titleComplete && !title.equals("")) {  // finished title
> 	synchronized(this) {
> 	  titleComplete = true;			  // tell waiting threads
> 	  notifyAll();
> 	}
>       }
>     }
> 
>     length += text.length();
>     pipeOut.write(text);
> 
>     afterSpace = false;
>   }
> 
>   void addSpace() throws IOException {
>     if (inScript)
>       return;
>     if (!afterSpace) {
>       if (inTitle)
> 	title.append(" ");
>       else
> 	addToSummary(" ");
> 
>       String space = afterTag ? eol : " ";
>       length += space.length();
>       pipeOut.write(space);
>       afterSpace = true;
>     }
>   }
> 
> //    void handleException(Exception e) {
> //      System.out.println(e.toString());  // print the error message
> //      System.out.println("Skipping...");
> //      Token t;
> //      do {
> //        t = getNextToken();
> //      } while (t.kind != TagEnd);
> //    }
> }
> 
> PARSER_END(HTMLParser)
> 
> 
> void HTMLDocument() throws IOException :
> {
>   Token t;
> }
> {
> //  try {
>     ( Tag()         { afterTag = true; }
>     | t=Decl()      { afterTag = true; }
>     | CommentTag()  { afterTag = true; }
>     | t=<Word>      { addText(t.image); afterTag = false; }
>     | t=<Entity>    { addText(Entities.decode(t.image)); afterTag =
> false; }
>     | t=<Punct>     { addText(t.image); afterTag = false; }
>     | <Space>       { addSpace(); afterTag = false; }
>     )* <EOF>
> //  } catch (ParseException e) {
> //    handleException(e);
> //  }
> }
> 
> void Tag() throws IOException :
> {
>   Token t1, t2;
>   boolean inImg = false;
> }
> {
>   t1=<TagName> {
>     inTitle = t1.image.equalsIgnoreCase("<title"); // keep track if
> in <TITLE>
>     inMetaTag = t1.image.equalsIgnoreCase("<META"); // keep track if
> in <META>
>     inStyle = t1.image.equalsIgnoreCase("<STYLE"); // keep track if
> in <STYLE>
>     inImg = t1.image.equalsIgnoreCase("<img");	  // keep track if in
> <IMG>
>     if (inScript) {				  // keep track if in <SCRIPT>
>       inScript = !t1.image.equalsIgnoreCase("</script");
>     } else {
>       inScript = t1.image.equalsIgnoreCase("<script");
>     }
>   }
>   (t1=<ArgName>
>    (<ArgEquals>
>     (t2=ArgValue()				  // save ALT text in IMG tag
>      {
>        if (inImg && t1.image.equalsIgnoreCase("alt") && t2 != null)
>          addText("[" + t2.image + "]");
> 
>     	if(inMetaTag &&
> 			(  t1.image.equalsIgnoreCase("name") ||
> 			   t1.image.equalsIgnoreCase("HTTP-EQUIV")
> 			)
> 	   && t2 != null)
> 	{
> 		currentMetaTag=t2.image.toLowerCase();
> 	}
>     	if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 !=
> null)
> 	{
> 		addText(t2.image);
> 	}
>      }
>     )?
>    )?
>   )*
>   <TagEnd>
> }
> 
> Token ArgValue() :
> {
>   Token t = null;
> }
> {
>   t=<ArgValue>                              { return t; }
> | LOOKAHEAD(2)
>   <ArgQuote1> <CloseQuote1>                 { return t; }
> | <ArgQuote1> t=<Quote1Text> <CloseQuote1>  { return t; }
> | LOOKAHEAD(2)
>   <ArgQuote2> <CloseQuote2>                 { return t; }
> | <ArgQuote2> t=<Quote2Text> <CloseQuote2>  { return t; }
> }
> 
> 
> Token Decl() :
> {
>   Token t;
> }
> {
>   t=<DeclName> ( <ArgName> | ArgValue() | <ArgEquals> )* <TagEnd>
>   { return t; }
> }
> 
> 
> void CommentTag() :
> {}
> {
>   (<Comment1> ( <CommentText1> )* <CommentEnd1>)
>  |
>   (<Comment2> ( <CommentText2> )* <CommentEnd2>)
> }
> 
> 
> TOKEN :
> {
>   < TagName:  "<" ("/")? ["A"-"Z","a"-"z"] (<ArgName>)? > : WithinTag
> | < DeclName: "<"  "!"   ["A"-"Z","a"-"z"] (<ArgName>)? > : WithinTag
> 
> | < Comment1:  "<!--" > : WithinComment1
> | < Comment2:  "<!" >   : WithinComment2
> 
> | < Word:     ( <LET> | <LET> (["+","/"])+ | <NUM> ["\""] |
>                 <LET> ["-","'"] <LET> | ("$")? <NUM> [",","."] <NUM>
> )+ >
> | < #LET:     ["A"-"Z","a"-"z","0"-"9"] >
> | < #NUM:     ["0"-"9"] >
> 
> | < Entity:   ( "&" (["A"-"Z","a"-"z"])+ (";")? | "&" "#" (<NUM>)+
> (";")? ) >
> 
> | < Space:    (<SP>)+ >
> | < #SP:      [" ","\t","\r","\n"] >
> 
> | < Punct:    ~[] > // Keep this last.  It is a catch-all.
> }
> 
> 
> <WithinTag> TOKEN:
> {
>   < ArgName:   (~[" ","\t","\r","\n","=",">","'","\""])
>                (~[" ","\t","\r","\n","=",">"])* >
> | < ArgEquals: "=" >  : AfterEquals
> | < TagEnd:    ">" | "=>" >  : DEFAULT
> }
> 
> <AfterEquals> TOKEN:
> {
>   < ArgValue:  (~[" ","\t","\r","\n","=",">","'","\""])
> 	       (~[" ","\t","\r","\n",">"])* > : WithinTag
> }
> 
> <WithinTag, AfterEquals> TOKEN:
> {
>   < ArgQuote1: "'"  > : WithinQuote1
> | < ArgQuote2: "\"" > : WithinQuote2
> }
> 
> <WithinTag, AfterEquals> SKIP:
> {
>   < <Space> >
> }
> 
> <WithinQuote1> TOKEN:
> {
>   < Quote1Text:  (~["'"])+ >
> | < CloseQuote1: <ArgQuote1> > : WithinTag
> }
> 
> <WithinQuote2> TOKEN:
> {
>   < Quote2Text:  (~["\""])+ >
> | < CloseQuote2: <ArgQuote2> > : WithinTag
> }
> 
> 
> <WithinComment1> TOKEN :
> {
>   < CommentText1:  (~["-"])+ | "-" >
> | < CommentEnd1:   "-->" > : DEFAULT
> }
> 
> <WithinComment2> TOKEN :
> {
>   < CommentText2:  (~[">"])+ >
> | < CommentEnd2:   ">" > : DEFAULT
> }
> 
> 
> --
> To unsubscribe, e-mail:  
> <ma...@jakarta.apache.org>
> For additional commands, e-mail:
> <ma...@jakarta.apache.org>
> 

</textarea></font>


					</td></tr>
					<tr>
					    <td><input type=checkbox name="SigAtt" value="1" id="SA">Use
my signature</td>
					    <td align=right>
						<input type=checkbox name=Format value="" onclick="setFormat()">
						Allow HTML tags [<a href="javascript:Preview()">Preview</a>]
					    </td>
					</tr>
				    </table>
				</td>
			    </tr>
			    <tr class=frmb>
				<td align=right><b>Options:</b></td>
				<td>
				    <table border=0 cellpadding=2 cellspacing=0>
					<tr><td colspan=2><input type=checkbox name="SaveCopy" value="yes"
id="Save">Save a copy in your <b>Sent Items</b> folder</td></tr>

				    </table>
				</td>
			    </tr>
			</table>
		    </td>
		</tr>
		<tr>
		    <td>
			<table cellpadding=4 cellspacing=0 border=0 width="100%">
			    <tr class=bbar bgcolor="#3f6c96">
				<td nowrap>
				    <input type=submit name=SEND
value="&nbsp;&nbsp;Send&nbsp;&nbsp;" title="Send Message" class=abutton
 >
				    &nbsp;
				    <input type=submit name=SD value="Save as a Draft" title="Save
Message in your Drafts folder" class=fbutton  >
				    &nbsp;
				    <input type=submit name=SC value="Spell Check" title="Check
your message's spelling before sending" class=fbutton  >
				    &nbsp;
				    <input type=submit name=CAN value="Cancel" title="Cancel"
class=fbutton>
				</td>
			    </tr>
			</table>
		    </td>
		</tr>
	      </table>
	</td>
    </tr>
</table>
<table width="100%" cellpadding=0 cellspacing=0 border=0>
    <tr><td class=bgd bgcolor="#9bbad6"  height=4><img
src="http://us.i1.yimg.com/us.yimg.com/i/space.gif" width=2
height=3></td></tr>
</table>

<center>
    <br>
    <table cellpadding=4 cellspacing=0 border=0>
        <tr>
	    <td><a href="/ym/Welcome?YY=99325">Mail</a> - <a
href="http://address.yahoo.com/yab/us">Address Book</a> - <a
href="http://calendar.yahoo.com">Calendar</a> - <a
href="http://notepad.yahoo.com">Notepad</a></td>
	</tr>
    </table>
</center>

<br>
<table cellpadding=0 cellspacing=0 border=0 width="100%"><tr><td
bgcolor=#a0b8c8>
<table cellpadding=1 cellspacing=1 border=0 width="100%">

<tr valign=top bgcolor=#ffffff><td align=center>
<font face="arial" size=-2>
<A
href="http://rd.yahoo.com/footer/?http://address.yahoo.com/">Address&nbsp;Book</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://auctions.yahoo.com/">Auctions</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://autos.yahoo.com/">Autos</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://briefcase.yahoo.com/">Briefcase</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://calendar.yahoo.com/">Calendar</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://careers.yahoo.com/">Careers</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://chat.yahoo.com/">Chat</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://classifieds.yahoo.com/">Classifieds</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://finance.yahoo.com/">Finance</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://games.yahoo.com/">Games</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://geocities.yahoo.com/">Geocities</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://greetings.yahoo.com/">Greetings</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://groups.yahoo.com/">Groups</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://www.yahooligans.com/">Kids</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://mail.yahoo.com/">Mail</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://maps.yahoo.com/">Maps</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://members.yahoo.com/">Member&nbsp;Directory</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://messenger.yahoo.com/">Messenger</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://mobile.yahoo.com/">Mobile</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://movies.yahoo.com/">Movies</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://music.yahoo.com/">Music</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://my.yahoo.com/">My&nbsp;Yahoo!</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://news.yahoo.com/">News</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://paydirect.yahoo.com/">PayDirect</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://people.yahoo.com/">People&nbsp;Search</A>
&#183; <A 

href="http://rd.yahoo.com/O=1/footer/?http://personals.yahoo.com/">Personals</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://photos.yahoo.com/">Photos</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://radio.yahoo.com/">Radio</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://shopping.yahoo.com/">Shopping</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://sports.yahoo.com/">Sports</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://tv.yahoo.com/">TV</A> &#183;
<A 

href="http://rd.yahoo.com/footer/?http://travel.yahoo.com/">Travel</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://warehouse.yahoo.com/">Warehouse</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://weather.yahoo.com/">Weather</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://yp.yahoo.com/">Yellow&nbsp;Pages</A>
&#183; <A 

href="http://rd.yahoo.com/footer/?http://docs.yahoo.com/docs/family/more.html">more...</A>
</font>
</td></tr></table>
</td></tr></table>
<center><small><br>Copyright &copy; 1994-2002 <a
href="http://rd.yahoo.com/M=224039.2020109.3495275.1958505/D=mail/P=m2a0au1411000200/S=150500005:FOOT2/A=1052425/R=0/*http://rd.yahoo.com/mail_us/tos/?http://www.yahoo.com"
target="_blank">Yahoo!</a> Inc. All rights reserved.<a
href="http://rd.yahoo.com/M=224039.2020109.3495275.1958505/D=mail/P=m2a0au1411000200/S=150500005:FOOT2/A=1052425/R=1/*http://docs.yahoo.com/info/terms/">Terms
of Service</a> - <a
href="http://rd.yahoo.com/M=224039.2020109.3495275.1958505/D=mail/P=m2a0au1411000200/S=150500005:FOOT2/A=1052425/R=2/*http://docs.yahoo.com/info/guidelines/mail.html">Guidelines</a><br>NOTICE:
We collect personal information on this site.<br>To learn more about
how we use your information, see our <a
href="http://rd.yahoo.com/M=224039.2020109.3495275.1958505/D=mail/P=m2a0au1411000200/S=150500005:FOOT2/A=1052425/R=3/*http://privacy.yahoo.com/privacy/us/mail/">Privacy
Policy</a></small></center><script>
var ypim_MA_Farm_URL = "http://us.f127.mail.yahoo.com";
var ypim_AB_URL = "http://address.yahoo.com/yab/us";
var ypim_CA_URL = "http://calendar.yahoo.com";
var ypim_NP_URL = "http://notepad.yahoo.com";
var ypim_MA_YY = "418009";
var ypim_IMG = "http://us.i1.yimg.com/us.yimg.com/i/us/pim";
var ypim_Loc = "us";
var ypim_IsCalendarView = false;
var ypim_IsNotepadView = false;
var ypim_i18n_CheckMail = "Check Mail";
var ypim_i18n_Compose = "Compose";
var ypim_i18n_Folders = "Folders";
var ypim_i18n_Search = "Search";
var ypim_i18n_Options = "Options";
var ypim_i18n_Help = "Help";
var ypim_i18n_AddContact = "Add Contact";
var ypim_i18n_AddCategory = "Add Category";
var ypim_i18n_AddList = "Add List";
var ypim_i18n_ViewContacts = "View Contacts";
var ypim_i18n_ViewLists = "View Lists";
var ypim_i18n_Quickbuilder = "Quickbuilder";
var ypim_i18n_ImportContacts = "Import Contacts";
var ypim_i18n_Synchronize = "Synchronize";
var ypim_i18n_AddressesOptions = "Addresses Options";
var ypim_i18n_AddressesHelp = "Addresses Help";
var ypim_i18n_AddEvent = "Add Event";
var ypim_i18n_AddTask = "Add Task";
var ypim_i18n_AddBirthday = "Add Birthday";
var ypim_i18n_Day = "Day";
var ypim_i18n_Week = "Week";
var ypim_i18n_Month = "Month";
var ypim_i18n_Year = "Year";
var ypim_i18n_EventList = "Event List";
var ypim_i18n_Reminders = "Reminders";
var ypim_i18n_Tasks = "Tasks";
var ypim_i18n_Sharing = "Sharing";
var ypim_i18n_Synchronize = "Synchronize";
var ypim_i18n_CalendarOptions = "Calendar Options";
var ypim_i18n_CalendarHelp = "Calendar Help";
var ypim_i18n_AddNote = "Add Note";
var ypim_i18n_AddFolder = "Add Folder";
var ypim_i18n_ViewNotes = "View Notes";
var ypim_i18n_NotepadOptions = "Notepad Options";
var ypim_i18n_NotepadHelp = "Notepad Help";
</script>
<script src="/lib_web/pulldowns.js"></script>
<div id="mail">
</div>
<div id="addr">				
</div>
<div id="cal">
</div>
<div id="note" class=menubg>
</div>
      </form>
</body>
</html>

<!-- v2.2.20 1025161582 -->
<!--0.31828-->
<!-- compressed -->

__________________________________________________
Do You Yahoo!?
Yahoo! - Official partner of 2002 FIFA World Cup
http://fifaworldcup.yahoo.com