You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ar...@apache.org on 2006/06/04 04:41:25 UTC
svn commit: r411501 [2/30] - in /incubator/lucene.net/trunk/C#/src: ./
Demo/DeleteFiles/ Demo/DemoLib/ Demo/DemoLib/HTML/ Demo/IndexFiles/
Demo/IndexHtml/ Demo/SearchFiles/ Lucene.Net/ Lucene.Net/Analysis/
Lucene.Net/Analysis/Standard/ Lucene.Net/Docum...
Modified: incubator/lucene.net/trunk/C#/src/Demo/DemoLib/HTML/HTMLParser.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Demo/DemoLib/HTML/HTMLParser.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Demo/DemoLib/HTML/HTMLParser.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Demo/DemoLib/HTML/HTMLParser.cs Sat Jun 3 19:41:13 2006
@@ -15,1039 +15,1026 @@
*/
/* Generated By:JavaCC: Do not edit this line. HTMLParser.java */
+
using System;
+
namespace Lucene.Net.Demo.Html
{
-
- public class HTMLParser : HTMLParserConstants
- {
- private void InitBlock()
- {
- jj_2_rtns = new JJCalls[2];
- jj_ls = new LookaheadSuccess();
- }
- public static int SUMMARY_LENGTH = 200;
-
- internal System.Text.StringBuilder title = new System.Text.StringBuilder(SUMMARY_LENGTH);
- internal System.Text.StringBuilder summary = new System.Text.StringBuilder(SUMMARY_LENGTH * 2);
- internal System.Collections.Specialized.NameValueCollection metaTags = new System.Collections.Specialized.NameValueCollection();
- internal System.String currentMetaTag = null;
- internal System.String currentMetaContent = null;
- internal int length = 0;
- internal bool titleComplete = false;
- internal bool summaryComplete = false;
- internal bool inTitle = false;
- internal bool inMetaTag = false;
- internal bool inStyle = false;
- internal bool afterTag = false;
- internal bool afterSpace = false;
- internal System.String eol = System.Environment.NewLine;
- internal System.IO.StreamReader pipeIn = null;
- internal System.IO.StreamWriter pipeOut;
- private MyPipedInputStream pipeInStream = null;
- private System.IO.StreamWriter pipeOutStream = null;
-
- private class MyPipedInputStream : System.IO.StreamReader
- {
- private void InitBlock(HTMLParser enclosingInstance)
- {
- this.enclosingInstance = enclosingInstance;
- }
- private HTMLParser enclosingInstance;
- public HTMLParser Enclosing_Instance
- {
- get
- {
- return enclosingInstance;
- }
+
+ public class HTMLParser : HTMLParserConstants
+ {
+ private void InitBlock()
+ {
+ jj_2_rtns = new JJCalls[2];
+ jj_ls = new LookaheadSuccess();
+ }
+ public static int SUMMARY_LENGTH = 200;
+
+ internal System.Text.StringBuilder title = new System.Text.StringBuilder(SUMMARY_LENGTH);
+ internal System.Text.StringBuilder summary = new System.Text.StringBuilder(SUMMARY_LENGTH * 2);
+ internal System.Collections.Specialized.NameValueCollection metaTags = new System.Collections.Specialized.NameValueCollection();
+ internal System.String currentMetaTag = null;
+ internal System.String currentMetaContent = null;
+ internal int length = 0;
+ internal bool titleComplete = false;
+ internal bool inTitle = false;
+ internal bool inMetaTag = false;
+ internal bool inStyle = false;
+ internal bool afterTag = false;
+ internal bool afterSpace = false;
+ internal System.String eol = System.Environment.NewLine;
+ internal System.IO.StreamReader pipeIn = null;
+ internal System.IO.StreamWriter pipeOut;
+ private MyPipedInputStream pipeInStream = null;
+ private System.IO.StreamWriter pipeOutStream = null;
+
+ private class MyPipedInputStream : System.IO.StreamReader
+ {
+ private void InitBlock(HTMLParser enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private HTMLParser enclosingInstance;
+ public HTMLParser Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
- }
-
- //public MyPipedInputStream(HTMLParser enclosingInstance) : base(new System.IO.MemoryStream())
- // // base(System.IO.Stream.Null)
- //{
- // InitBlock(enclosingInstance);
- //}
-
- public MyPipedInputStream(HTMLParser enclosingInstance, System.IO.StreamReader src) : base(src.BaseStream)
- {
- InitBlock(enclosingInstance);
- }
-
- public virtual bool Full()
- {
- return enclosingInstance.summaryComplete;
-
- /*
- try
- {
- if (this.Peek() == -1)
- {
- return (true);
- }
- }
- finally
- {
- //return (true);
- }
- return (false);
- */
- }
- }
-
- public HTMLParser(System.IO.FileInfo file) :
- this(new System.IO.FileStream(file.FullName, System.IO.FileMode.Open))
- {
- }
-
- public virtual System.String GetTitle()
- {
- if (pipeIn == null)
- GetReader(); // spawn parsing thread
- while (true)
- {
- lock (this)
- {
- if (titleComplete || pipeInStream.Full())
- break;
- System.Threading.Monitor.Wait(this, TimeSpan.FromMilliseconds(10));
- }
- }
- return title.ToString().Trim();
- }
-
- public virtual System.Collections.Specialized.NameValueCollection GetMetaTags()
- {
- if (pipeIn == null)
- GetReader(); // spawn parsing thread
- while (true)
- {
- lock (this)
- {
- if (titleComplete || pipeInStream.Full())
- break;
- System.Threading.Monitor.Wait(this, TimeSpan.FromMilliseconds(10));
- }
- }
- return metaTags;
- }
-
-
- public virtual System.String GetSummary()
- {
- if (pipeIn == null)
- GetReader(); // spawn parsing thread
- while (true)
- {
- lock (this)
- {
- if (summary.Length >= SUMMARY_LENGTH || pipeInStream.Full())
- break;
- System.Threading.Monitor.Wait(this, TimeSpan.FromMilliseconds(10));
- }
- }
- if (summary.Length > SUMMARY_LENGTH)
- summary.Length = SUMMARY_LENGTH;
+ }
- System.String sum = summary.ToString().Trim();
- System.String tit = GetTitle();
- if (sum.StartsWith(tit) || sum.Equals(""))
- return tit;
- else
- return sum;
- }
-
- public virtual System.IO.StreamReader GetReader()
- {
- if (pipeIn == null)
- {
- pipeInStream = new MyPipedInputStream(this, new System.IO.StreamReader(new System.IO.MemoryStream(1024)));
- pipeOutStream = new System.IO.StreamWriter(pipeInStream.BaseStream);
- pipeIn = new System.IO.StreamReader(pipeInStream.BaseStream, System.Text.Encoding.Default);
- pipeOut = new System.IO.StreamWriter(pipeOutStream.BaseStream, System.Text.Encoding.Default);
+ // {{Aroush}} -- fix me
+ //public MyPipedInputStream(HTMLParser enclosingInstance) : base()
+ //{
+ // InitBlock(enclosingInstance);
+ //}
+
+ public MyPipedInputStream(HTMLParser enclosingInstance, System.IO.StreamWriter src) : base(src.BaseStream)
+ {
+ InitBlock(enclosingInstance);
+ }
+
+ public virtual bool Full()
+ {
+ return true; // return this.available() >= PipedInputStream.PIPE_SIZE; // {{Aroush}} -- fix me.
+ }
+ }
+
+ /// <deprecated> Use HTMLParser(FileInputStream) instead
+ /// </deprecated>
+ public HTMLParser(System.IO.FileInfo file) :
+ this(new System.IO.FileStream(file.FullName, System.IO.FileMode.Open, System.IO.FileAccess.Read))
+ {
+ }
+
+ public virtual System.String GetTitle()
+ {
+ if (pipeIn == null)
+ GetReader(); // spawn parsing thread
+ while (true)
+ {
+ lock (this)
+ {
+ if (titleComplete || pipeInStream.Full())
+ break;
+ System.Threading.Monitor.Wait(this, TimeSpan.FromMilliseconds(10));
+ }
+ }
+ return title.ToString().Trim();
+ }
+
+ public virtual System.Collections.Specialized.NameValueCollection GetMetaTags()
+ {
+ if (pipeIn == null)
+ GetReader(); // spawn parsing thread
+ while (true)
+ {
+ lock (this)
+ {
+ if (titleComplete || pipeInStream.Full())
+ break;
+ System.Threading.Monitor.Wait(this, TimeSpan.FromMilliseconds(10));
+ }
+ }
+ return metaTags;
+ }
+
+
+ public virtual System.String GetSummary()
+ {
+ if (pipeIn == null)
+ GetReader(); // spawn parsing thread
+ while (true)
+ {
+ lock (this)
+ {
+ if (summary.Length >= SUMMARY_LENGTH || pipeInStream.Full())
+ break;
+ System.Threading.Monitor.Wait(this, TimeSpan.FromMilliseconds(10));
+ }
+ }
+ if (summary.Length > SUMMARY_LENGTH)
+ summary.Length = SUMMARY_LENGTH;
+
+ System.String sum = summary.ToString().Trim();
+ System.String tit = GetTitle();
+ if (sum.StartsWith(tit) || sum.Equals(""))
+ return tit;
+ else
+ return sum;
+ }
+
+ public virtual System.IO.StreamReader GetReader()
+ {
+ if (pipeIn == null)
+ {
+ pipeInStream = null; // pipeInStream = new MyPipedInputStream(this); // {{Aroush-1.9}} -- fix me.
+ pipeOutStream = new System.IO.StreamWriter(pipeInStream.BaseStream);
+ pipeIn = new System.IO.StreamReader(pipeInStream.BaseStream, System.Text.Encoding.GetEncoding("UTF-16BE"));
+ pipeOut = new System.IO.StreamWriter(pipeOutStream.BaseStream, System.Text.Encoding.GetEncoding("UTF-16BE"));
- SupportClass.ThreadClass thread = new ParserThread(this);
- thread.Start(); // start parsing
- }
-
- return pipeIn;
- }
-
- internal virtual void AddToSummary(System.String text)
- {
- if (summary.Length < SUMMARY_LENGTH)
- {
- summary.Append(text);
- if (summary.Length >= SUMMARY_LENGTH)
- {
- lock (this)
- {
- summaryComplete = true;
- System.Threading.Monitor.PulseAll(this);
- }
- }
- }
- }
-
- internal virtual void AddText(System.String text)
- {
- if (inStyle)
- return ;
- if (inTitle)
- title.Append(text);
- else
- {
- AddToSummary(text);
- if (!titleComplete && !title.Equals(""))
- {
- // finished title
- lock (this)
- {
- titleComplete = true; // tell waiting threads
- System.Threading.Monitor.PulseAll(this);
- }
- }
- }
-
- length += text.Length;
- pipeOut.Write(text);
-
- afterSpace = false;
- }
-
- internal virtual void AddMetaTag()
- {
- metaTags[currentMetaTag] = currentMetaContent;
- currentMetaTag = null;
- currentMetaContent = null;
- return ;
- }
-
- internal virtual void AddSpace()
- {
- if (!afterSpace)
- {
- if (inTitle)
- title.Append(" ");
- else
- AddToSummary(" ");
+ SupportClass.ThreadClass thread = new ParserThread(this);
+ thread.Start(); // start parsing
+ }
+
+ return pipeIn;
+ }
+
+ internal virtual void AddToSummary(System.String text)
+ {
+ if (summary.Length < SUMMARY_LENGTH)
+ {
+ summary.Append(text);
+ if (summary.Length >= SUMMARY_LENGTH)
+ {
+ lock (this)
+ {
+ System.Threading.Monitor.PulseAll(this);
+ }
+ }
+ }
+ }
+
+ internal virtual void AddText(System.String text)
+ {
+ if (inStyle)
+ return ;
+ if (inTitle)
+ title.Append(text);
+ else
+ {
+ AddToSummary(text);
+ if (!titleComplete && !title.Equals(""))
+ {
+ // finished title
+ lock (this)
+ {
+ titleComplete = true; // tell waiting threads
+ System.Threading.Monitor.PulseAll(this);
+ }
+ }
+ }
+
+ length += text.Length;
+ pipeOut.Write(text);
+
+ afterSpace = false;
+ }
+
+ internal virtual void AddMetaTag()
+ {
+ metaTags[currentMetaTag] = currentMetaContent;
+ currentMetaTag = null;
+ currentMetaContent = null;
+ return ;
+ }
+
+ internal virtual void AddSpace()
+ {
+ if (!afterSpace)
+ {
+ if (inTitle)
+ title.Append(" ");
+ else
+ AddToSummary(" ");
- System.String space = afterTag ? eol : " ";
- length += space.Length;
- pipeOut.Write(space);
- afterSpace = true;
- }
- }
-
- public void HTMLDocument()
- {
- Token t;
- while (true)
- {
- switch ((jj_ntk_Renamed_Field == -1) ? jj_ntk() : jj_ntk_Renamed_Field)
- {
-
- case Lucene.Net.Demo.Html.HTMLParserConstants.ScriptStart:
- case Lucene.Net.Demo.Html.HTMLParserConstants.TagName:
- case Lucene.Net.Demo.Html.HTMLParserConstants.DeclName:
- case Lucene.Net.Demo.Html.HTMLParserConstants.Comment1:
- case Lucene.Net.Demo.Html.HTMLParserConstants.Comment2:
- case Lucene.Net.Demo.Html.HTMLParserConstants.Word:
- case Lucene.Net.Demo.Html.HTMLParserConstants.Entity:
- case Lucene.Net.Demo.Html.HTMLParserConstants.Space:
- case Lucene.Net.Demo.Html.HTMLParserConstants.Punct:
- ;
- break;
-
- default:
- jj_la1[0] = jj_gen;
- goto label_1_brk;
-
- }
- switch ((jj_ntk_Renamed_Field == -1) ? jj_ntk() : jj_ntk_Renamed_Field)
- {
-
- case Lucene.Net.Demo.Html.HTMLParserConstants.TagName:
- Tag();
- afterTag = true;
- break;
-
- case Lucene.Net.Demo.Html.HTMLParserConstants.DeclName:
- t = Decl();
- afterTag = true;
- break;
-
- case Lucene.Net.Demo.Html.HTMLParserConstants.Comment1:
- case Lucene.Net.Demo.Html.HTMLParserConstants.Comment2:
- CommentTag();
- afterTag = true;
- break;
-
- case Lucene.Net.Demo.Html.HTMLParserConstants.ScriptStart:
- ScriptTag();
- afterTag = true;
- break;
-
- case Lucene.Net.Demo.Html.HTMLParserConstants.Word:
- t = jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.Word);
- AddText(t.image); afterTag = false;
- break;
-
- case Lucene.Net.Demo.Html.HTMLParserConstants.Entity:
- t = jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.Entity);
- AddText(Entities.Decode(t.image)); afterTag = false;
- break;
-
- case Lucene.Net.Demo.Html.HTMLParserConstants.Punct:
- t = jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.Punct);
- AddText(t.image); afterTag = false;
- break;
-
- case Lucene.Net.Demo.Html.HTMLParserConstants.Space:
- jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.Space);
- AddSpace(); afterTag = false;
- break;
-
- default:
- jj_la1[1] = jj_gen;
- jj_consume_token(- 1);
- throw new ParseException();
+ System.String space = afterTag ? eol : " ";
+ length += space.Length;
+ pipeOut.Write(space);
+ afterSpace = true;
+ }
+ }
+
+ public void HTMLDocument()
+ {
+ Token t;
+ while (true)
+ {
+ switch ((jj_ntk == - 1) ? Jj_ntk() : jj_ntk)
+ {
+
+ case Lucene.Net.Demo.Html.HTMLParserConstants.ScriptStart:
+ case Lucene.Net.Demo.Html.HTMLParserConstants.TagName:
+ case Lucene.Net.Demo.Html.HTMLParserConstants.DeclName:
+ case Lucene.Net.Demo.Html.HTMLParserConstants.Comment1:
+ case Lucene.Net.Demo.Html.HTMLParserConstants.Comment2:
+ case Lucene.Net.Demo.Html.HTMLParserConstants.Word:
+ case Lucene.Net.Demo.Html.HTMLParserConstants.Entity:
+ case Lucene.Net.Demo.Html.HTMLParserConstants.Space:
+ case Lucene.Net.Demo.Html.HTMLParserConstants.Punct:
+ ;
+ break;
+
+ default:
+ jj_la1[0] = jj_gen;
+ goto label_1_brk;
+
+ }
+ switch ((jj_ntk == - 1) ? Jj_ntk() : jj_ntk)
+ {
+
+ case Lucene.Net.Demo.Html.HTMLParserConstants.TagName:
+ Tag();
+ afterTag = true;
+ break;
+
+ case Lucene.Net.Demo.Html.HTMLParserConstants.DeclName:
+ t = Decl();
+ afterTag = true;
+ break;
+
+ case Lucene.Net.Demo.Html.HTMLParserConstants.Comment1:
+ case Lucene.Net.Demo.Html.HTMLParserConstants.Comment2:
+ CommentTag();
+ afterTag = true;
+ break;
+
+ case Lucene.Net.Demo.Html.HTMLParserConstants.ScriptStart:
+ ScriptTag();
+ afterTag = true;
+ break;
+
+ case Lucene.Net.Demo.Html.HTMLParserConstants.Word:
+ t = Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.Word);
+ AddText(t.image); afterTag = false;
+ break;
+
+ case Lucene.Net.Demo.Html.HTMLParserConstants.Entity:
+ t = Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.Entity);
+ AddText(Entities.Decode(t.image)); afterTag = false;
+ break;
+
+ case Lucene.Net.Demo.Html.HTMLParserConstants.Punct:
+ t = Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.Punct);
+ AddText(t.image); afterTag = false;
+ break;
+
+ case Lucene.Net.Demo.Html.HTMLParserConstants.Space:
+ Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.Space);
+ AddSpace(); afterTag = false;
+ break;
+
+ default:
+ jj_la1[1] = jj_gen;
+ Jj_consume_token(- 1);
+ throw new ParseException();
- }
- }
+ }
+ }
label_1_brk: ;
- jj_consume_token(0);
- }
+ Jj_consume_token(0);
+ }
- public void Tag()
- {
- Token t1, t2;
- bool inImg = false;
- t1 = jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.TagName);
- System.String tagName = t1.image.ToLower();
- if (Tags.WS_ELEMS.Contains(tagName))
- {
- AddSpace();
- }
- inTitle = tagName.ToUpper().Equals("<title".ToUpper()); // keep track if in <TITLE>
- inMetaTag = tagName.ToUpper().Equals("<META".ToUpper()); // keep track if in <META>
- inStyle = tagName.ToUpper().Equals("<STYLE".ToUpper()); // keep track if in <STYLE>
- inImg = tagName.ToUpper().Equals("<img".ToUpper()); // keep track if in <IMG>
-
- while (true)
- {
- switch ((jj_ntk_Renamed_Field == - 1)?jj_ntk():jj_ntk_Renamed_Field)
- {
-
- case Lucene.Net.Demo.Html.HTMLParserConstants.ArgName:
- ;
- break;
-
- default:
- jj_la1[2] = jj_gen;
- goto label_2_brk;
-
- }
- t1 = jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.ArgName);
- switch ((jj_ntk_Renamed_Field == -1) ? jj_ntk() : jj_ntk_Renamed_Field)
- {
-
- case Lucene.Net.Demo.Html.HTMLParserConstants.ArgEquals:
- jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.ArgEquals);
- switch ((jj_ntk_Renamed_Field == -1) ? jj_ntk() : jj_ntk_Renamed_Field)
- {
-
- case Lucene.Net.Demo.Html.HTMLParserConstants.ArgValue:
- case Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote1:
- case Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote2:
- t2 = ArgValue();
- if (inImg && t1.image.ToUpper().Equals("alt".ToUpper()) && t2 != null)
- AddText("[" + t2.image + "]");
+ public void Tag()
+ {
+ Token t1, t2;
+ bool inImg = false;
+ t1 = Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.TagName);
+ System.String tagName = t1.image.ToLower();
+ if (Tags.WS_ELEMS.Contains(tagName))
+ {
+ AddSpace();
+ }
+ inTitle = tagName.ToUpper().Equals("<title".ToUpper()); // keep track if in <TITLE>
+ inMetaTag = tagName.ToUpper().Equals("<META".ToUpper()); // keep track if in <META>
+ inStyle = tagName.ToUpper().Equals("<STYLE".ToUpper()); // keep track if in <STYLE>
+ inImg = tagName.ToUpper().Equals("<img".ToUpper()); // keep track if in <IMG>
+
+ while (true)
+ {
+ switch ((jj_ntk == - 1) ? Jj_ntk() : jj_ntk)
+ {
+
+ case Lucene.Net.Demo.Html.HTMLParserConstants.ArgName:
+ ;
+ break;
+
+ default:
+ jj_la1[2] = jj_gen;
+ goto label_2_brk;
+
+ }
+ t1 = Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.ArgName);
+ switch ((jj_ntk == - 1) ? Jj_ntk() : jj_ntk)
+ {
+
+ case Lucene.Net.Demo.Html.HTMLParserConstants.ArgEquals:
+ Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.ArgEquals);
+ switch ((jj_ntk == - 1) ? Jj_ntk() : jj_ntk)
+ {
+
+ case Lucene.Net.Demo.Html.HTMLParserConstants.ArgValue:
+ case Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote1:
+ case Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote2:
+ t2 = ArgValue();
+ if (inImg && t1.image.ToUpper().Equals("alt".ToUpper()) && t2 != null)
+ AddText("[" + t2.image + "]");
- if (inMetaTag && (t1.image.ToUpper().Equals("name".ToUpper()) || t1.image.ToUpper().Equals("HTTP-EQUIV".ToUpper())) && t2 != null)
- {
- currentMetaTag = t2.image.ToLower();
- if (currentMetaTag != null && currentMetaContent != null)
- {
- AddMetaTag();
- }
- }
- if (inMetaTag && t1.image.ToUpper().Equals("content".ToUpper()) && t2 != null)
- {
- currentMetaContent = t2.image.ToLower();
- if (currentMetaTag != null && currentMetaContent != null)
- {
- AddMetaTag();
- }
- }
- break;
-
- default:
- jj_la1[3] = jj_gen;
- ;
- break;
-
- }
- break;
+ if (inMetaTag && (t1.image.ToUpper().Equals("name".ToUpper()) || t1.image.ToUpper().Equals("HTTP-EQUIV".ToUpper())) && t2 != null)
+ {
+ currentMetaTag = t2.image.ToLower();
+ if (currentMetaTag != null && currentMetaContent != null)
+ {
+ AddMetaTag();
+ }
+ }
+ if (inMetaTag && t1.image.ToUpper().Equals("content".ToUpper()) && t2 != null)
+ {
+ currentMetaContent = t2.image.ToLower();
+ if (currentMetaTag != null && currentMetaContent != null)
+ {
+ AddMetaTag();
+ }
+ }
+ break;
+
+ default:
+ jj_la1[3] = jj_gen;
+ ;
+ break;
+
+ }
+ break;
+
+ default:
+ jj_la1[4] = jj_gen;
+ ;
+ break;
- default:
- jj_la1[4] = jj_gen;
- ;
- break;
-
- }
- }
+ }
+ }
label_2_brk: ;
- jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.TagEnd);
- }
+ Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.TagEnd);
+ }
- public Token ArgValue()
- {
- Token t = null;
- switch ((jj_ntk_Renamed_Field == -1) ? jj_ntk() : jj_ntk_Renamed_Field)
- {
+ public Token ArgValue()
+ {
+ Token t = null;
+ switch ((jj_ntk == - 1) ? Jj_ntk() : jj_ntk)
+ {
- case Lucene.Net.Demo.Html.HTMLParserConstants.ArgValue:
- t = jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.ArgValue);
- {
- if (true)
- return t;
- }
- break;
+ case Lucene.Net.Demo.Html.HTMLParserConstants.ArgValue:
+ t = Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.ArgValue);
+ {
+ if (true)
+ return t;
+ }
+ break;
- default:
- jj_la1[5] = jj_gen;
- if (jj_2_1(2))
- {
- jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote1);
- jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.CloseQuote1);
- {
- if (true)
- return t;
- }
- }
- else
- {
- switch ((jj_ntk_Renamed_Field == -1) ? jj_ntk() : jj_ntk_Renamed_Field)
- {
-
- case Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote1:
- jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote1);
- t = jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.Quote1Text);
- jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.CloseQuote1);
- {
- if (true)
- return t;
- }
- break;
-
- default:
- jj_la1[6] = jj_gen;
- if (jj_2_2(2))
- {
- jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote2);
- jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.CloseQuote2);
- {
- if (true)
- return t;
- }
- }
- else
- {
- switch ((jj_ntk_Renamed_Field == -1) ? jj_ntk() : jj_ntk_Renamed_Field)
- {
+ default:
+ jj_la1[5] = jj_gen;
+ if (Jj_2_1(2))
+ {
+ Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote1);
+ Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.CloseQuote1);
+ {
+ if (true)
+ return t;
+ }
+ }
+ else
+ {
+ switch ((jj_ntk == - 1) ? Jj_ntk() : jj_ntk)
+ {
+
+ case Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote1:
+ Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote1);
+ t = Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.Quote1Text);
+ Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.CloseQuote1);
+ {
+ if (true)
+ return t;
+ }
+ break;
+
+ default:
+ jj_la1[6] = jj_gen;
+ if (Jj_2_2(2))
+ {
+ Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote2);
+ Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.CloseQuote2);
+ {
+ if (true)
+ return t;
+ }
+ }
+ else
+ {
+ switch ((jj_ntk == - 1) ? Jj_ntk() : jj_ntk)
+ {
- case Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote2:
- jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote2);
- t = jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.Quote2Text);
- jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.CloseQuote2);
- {
- if (true)
- return t;
- }
- break;
+ case Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote2:
+ Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote2);
+ t = Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.Quote2Text);
+ Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.CloseQuote2);
+ {
+ if (true)
+ return t;
+ }
+ break;
- default:
- jj_la1[7] = jj_gen;
- jj_consume_token(- 1);
- throw new ParseException();
+ default:
+ jj_la1[7] = jj_gen;
+ Jj_consume_token(- 1);
+ throw new ParseException();
- }
- }
- break;
-
- }
- }
- break;
+ }
+ }
+ break;
+
+ }
+ }
+ break;
- }
- throw new System.ApplicationException("Missing return statement in function");
- }
-
- public Token Decl()
- {
- Token t;
- t = jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.DeclName);
- while (true)
- {
- switch ((jj_ntk_Renamed_Field == -1 ) ?jj_ntk() : jj_ntk_Renamed_Field)
- {
-
- case Lucene.Net.Demo.Html.HTMLParserConstants.ArgName:
- case Lucene.Net.Demo.Html.HTMLParserConstants.ArgEquals:
- case Lucene.Net.Demo.Html.HTMLParserConstants.ArgValue:
- case Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote1:
- case Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote2:
- ;
- break;
-
- default:
- jj_la1[8] = jj_gen;
- goto label_3_brk;
-
- }
- switch ((jj_ntk_Renamed_Field == -1) ? jj_ntk() : jj_ntk_Renamed_Field)
- {
-
- case Lucene.Net.Demo.Html.HTMLParserConstants.ArgName:
- jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.ArgName);
- break;
-
- case Lucene.Net.Demo.Html.HTMLParserConstants.ArgValue:
- case Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote1:
- case Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote2:
- ArgValue();
- break;
-
- case Lucene.Net.Demo.Html.HTMLParserConstants.ArgEquals:
- jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.ArgEquals);
- break;
-
- default:
- jj_la1[9] = jj_gen;
- jj_consume_token(- 1);
- throw new ParseException();
+ }
+ throw new System.ApplicationException("Missing return statement in function");
+ }
+
+ public Token Decl()
+ {
+ Token t;
+ t = Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.DeclName);
+ while (true)
+ {
+ switch ((jj_ntk == - 1) ? Jj_ntk() : jj_ntk)
+ {
+
+ case Lucene.Net.Demo.Html.HTMLParserConstants.ArgName:
+ case Lucene.Net.Demo.Html.HTMLParserConstants.ArgEquals:
+ case Lucene.Net.Demo.Html.HTMLParserConstants.ArgValue:
+ case Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote1:
+ case Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote2:
+ ;
+ break;
+
+ default:
+ jj_la1[8] = jj_gen;
+ goto label_3_brk;
+
+ }
+ switch ((jj_ntk == - 1) ? Jj_ntk() : jj_ntk)
+ {
+
+ case Lucene.Net.Demo.Html.HTMLParserConstants.ArgName:
+ Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.ArgName);
+ break;
+
+ case Lucene.Net.Demo.Html.HTMLParserConstants.ArgValue:
+ case Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote1:
+ case Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote2:
+ ArgValue();
+ break;
+
+ case Lucene.Net.Demo.Html.HTMLParserConstants.ArgEquals:
+ Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.ArgEquals);
+ break;
+
+ default:
+ jj_la1[9] = jj_gen;
+ Jj_consume_token(- 1);
+ throw new ParseException();
- }
- }
+ }
+ }
- label_3_brk: ;
+label_3_brk: ;
- jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.TagEnd);
- {
- if (true)
- return t;
- }
- throw new System.ApplicationException("Missing return statement in function");
- }
-
- public void CommentTag()
- {
- switch ((jj_ntk_Renamed_Field == -1) ? jj_ntk() : jj_ntk_Renamed_Field)
- {
+ Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.TagEnd);
+ {
+ if (true)
+ return t;
+ }
+ throw new System.ApplicationException("Missing return statement in function");
+ }
+
+ public void CommentTag()
+ {
+ switch ((jj_ntk == - 1) ? Jj_ntk() : jj_ntk)
+ {
- case Lucene.Net.Demo.Html.HTMLParserConstants.Comment1:
- jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.Comment1);
- while (true)
- {
- switch ((jj_ntk_Renamed_Field == - 1)?jj_ntk():jj_ntk_Renamed_Field)
- {
-
- case Lucene.Net.Demo.Html.HTMLParserConstants.CommentText1:
- ;
- break;
-
- default:
- jj_la1[10] = jj_gen;
- goto label_4_brk;
-
- }
- jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.CommentText1);
- }
+ case Lucene.Net.Demo.Html.HTMLParserConstants.Comment1:
+ Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.Comment1);
+ while (true)
+ {
+ switch ((jj_ntk == - 1) ? Jj_ntk() : jj_ntk)
+ {
+
+ case Lucene.Net.Demo.Html.HTMLParserConstants.CommentText1:
+ ;
+ break;
+
+ default:
+ jj_la1[10] = jj_gen;
+ goto label_4_brk;
+
+ }
+ Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.CommentText1);
+ }
- label_4_brk: ;
+label_4_brk: ;
- jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.CommentEnd1);
- break;
+ Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.CommentEnd1);
+ break;
- case Lucene.Net.Demo.Html.HTMLParserConstants.Comment2:
- jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.Comment2);
- while (true)
- {
- switch ((jj_ntk_Renamed_Field == -1) ? jj_ntk() : jj_ntk_Renamed_Field)
- {
-
- case Lucene.Net.Demo.Html.HTMLParserConstants.CommentText2:
- ;
- break;
-
- default:
- jj_la1[11] = jj_gen;
- goto label_5_brk;
-
- }
- jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.CommentText2);
- }
+ case Lucene.Net.Demo.Html.HTMLParserConstants.Comment2:
+ Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.Comment2);
+ while (true)
+ {
+ switch ((jj_ntk == - 1) ? Jj_ntk() : jj_ntk)
+ {
+
+ case Lucene.Net.Demo.Html.HTMLParserConstants.CommentText2:
+ ;
+ break;
+
+ default:
+ jj_la1[11] = jj_gen;
+ goto label_5_brk;
+
+ }
+ Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.CommentText2);
+ }
- label_5_brk: ;
+label_5_brk: ;
- jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.CommentEnd2);
- break;
+ Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.CommentEnd2);
+ break;
- default:
- jj_la1[12] = jj_gen;
- jj_consume_token(- 1);
- throw new ParseException();
+ default:
+ jj_la1[12] = jj_gen;
+ Jj_consume_token(- 1);
+ throw new ParseException();
- }
- }
+ }
+ }
- public void ScriptTag()
- {
- jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.ScriptStart);
- while (true)
- {
- switch ((jj_ntk_Renamed_Field == -1) ? jj_ntk() : jj_ntk_Renamed_Field)
- {
-
- case Lucene.Net.Demo.Html.HTMLParserConstants.ScriptText:
- ;
- break;
-
- default:
- jj_la1[13] = jj_gen;
- goto label_6_brk;
-
- }
- jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.ScriptText);
- }
+ public void ScriptTag()
+ {
+ Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.ScriptStart);
+ while (true)
+ {
+ switch ((jj_ntk == - 1) ? Jj_ntk() : jj_ntk)
+ {
+
+ case Lucene.Net.Demo.Html.HTMLParserConstants.ScriptText:
+ ;
+ break;
+
+ default:
+ jj_la1[13] = jj_gen;
+ goto label_6_brk;
+
+ }
+ Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.ScriptText);
+ }
- label_6_brk: ;
+label_6_brk: ;
- jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.ScriptEnd);
- }
+ Jj_consume_token(Lucene.Net.Demo.Html.HTMLParserConstants.ScriptEnd);
+ }
- private bool jj_2_1(int xla)
- {
- jj_la = xla; jj_lastpos = jj_scanpos = token;
- try
- {
- return !jj_3_1();
- }
- catch (LookaheadSuccess ls)
- {
- return true;
- }
- finally
- {
- jj_save(0, xla);
- }
- }
-
- private bool jj_2_2(int xla)
- {
- jj_la = xla; jj_lastpos = jj_scanpos = token;
- try
- {
- return !jj_3_2();
- }
- catch (LookaheadSuccess ls)
- {
- return true;
- }
- finally
- {
- jj_save(1, xla);
- }
- }
-
- private bool jj_3_1()
- {
- if (jj_scan_token(Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote1))
- return true;
- if (jj_scan_token(Lucene.Net.Demo.Html.HTMLParserConstants.CloseQuote1))
- return true;
- return false;
- }
-
- private bool jj_3_2()
- {
- if (jj_scan_token(Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote2))
- return true;
- if (jj_scan_token(Lucene.Net.Demo.Html.HTMLParserConstants.CloseQuote2))
- return true;
- return false;
- }
-
- public HTMLParserTokenManager token_source;
- internal SimpleCharStream jj_input_stream;
- public Token token, jj_nt;
- private int jj_ntk_Renamed_Field;
- private Token jj_scanpos, jj_lastpos;
- private int jj_la;
- public bool lookingAhead = false;
- private bool jj_semLA;
- private int jj_gen;
- private int[] jj_la1 = new int[14];
- private static int[] jj_la1_0_Renamed_Field;
- private static void jj_la1_0()
- {
- jj_la1_0_Renamed_Field = new int[]{0x167e, 0x167e, 0x8000, 0x1c0000, 0x10000, 0x40000, 0x80000, 0x100000, 0x1d8000, 0x1d8000, 0x4000000, 0x10000000, 0x30, 0x2000};
- }
- private JJCalls[] jj_2_rtns;
- private bool jj_rescan = false;
- private int jj_gc = 0;
-
- public HTMLParser(System.IO.Stream stream)
- {
- InitBlock();
- jj_input_stream = new SimpleCharStream(stream, 1, 1);
- token_source = new HTMLParserTokenManager(jj_input_stream);
- token = new Token();
- jj_ntk_Renamed_Field = -1;
- jj_gen = 0;
- for (int i = 0; i < 14; i++)
- jj_la1[i] = - 1;
- for (int i = 0; i < jj_2_rtns.Length; i++)
- jj_2_rtns[i] = new JJCalls();
- }
-
- public virtual void ReInit(System.IO.Stream stream)
- {
- jj_input_stream.ReInit(stream, 1, 1);
- token_source.ReInit(jj_input_stream);
- token = new Token();
- jj_ntk_Renamed_Field = -1;
- jj_gen = 0;
- for (int i = 0; i < 14; i++)
- jj_la1[i] = -1;
- for (int i = 0; i < jj_2_rtns.Length; i++)
- jj_2_rtns[i] = new JJCalls();
- }
-
- public HTMLParser(System.IO.StreamReader stream)
- {
- InitBlock();
- jj_input_stream = new SimpleCharStream(stream, 1, 1);
- token_source = new HTMLParserTokenManager(jj_input_stream);
- token = new Token();
- jj_ntk_Renamed_Field = -1;
- jj_gen = 0;
- for (int i = 0; i < 14; i++)
- jj_la1[i] = - 1;
- for (int i = 0; i < jj_2_rtns.Length; i++)
- jj_2_rtns[i] = new JJCalls();
- }
-
- public virtual void ReInit(System.IO.StreamReader stream)
- {
- jj_input_stream.ReInit(stream, 1, 1);
- token_source.ReInit(jj_input_stream);
- token = new Token();
- jj_ntk_Renamed_Field = - 1;
- jj_gen = 0;
- for (int i = 0; i < 14; i++)
- jj_la1[i] = - 1;
- for (int i = 0; i < jj_2_rtns.Length; i++)
- jj_2_rtns[i] = new JJCalls();
- }
-
- public HTMLParser(HTMLParserTokenManager tm)
- {
- InitBlock();
- token_source = tm;
- token = new Token();
- jj_ntk_Renamed_Field = - 1;
- jj_gen = 0;
- for (int i = 0; i < 14; i++)
- jj_la1[i] = - 1;
- for (int i = 0; i < jj_2_rtns.Length; i++)
- jj_2_rtns[i] = new JJCalls();
- }
-
- public virtual void ReInit(HTMLParserTokenManager tm)
- {
- token_source = tm;
- token = new Token();
- jj_ntk_Renamed_Field = - 1;
- jj_gen = 0;
- for (int i = 0; i < 14; i++)
- jj_la1[i] = - 1;
- for (int i = 0; i < jj_2_rtns.Length; i++)
- jj_2_rtns[i] = new JJCalls();
- }
-
- private Token jj_consume_token(int kind)
- {
- Token oldToken;
- if ((oldToken = token).next != null)
- token = token.next;
- else
- token = token.next = token_source.GetNextToken();
- jj_ntk_Renamed_Field = -1;
- if (token.kind == kind)
- {
- jj_gen++;
- if (++jj_gc > 100)
- {
- jj_gc = 0;
- for (int i = 0; i < jj_2_rtns.Length; i++)
- {
- JJCalls c = jj_2_rtns[i];
- while (c != null)
- {
- if (c.gen < jj_gen)
- c.first = null;
- c = c.next;
- }
- }
- }
- return token;
- }
- token = oldToken;
- jj_kind = kind;
- throw GenerateParseException();
- }
-
- [Serializable]
- private sealed class LookaheadSuccess : System.ApplicationException
- {
- }
+ private bool Jj_2_1(int xla)
+ {
+ jj_la = xla; jj_lastpos = jj_scanpos = token;
+ try
+ {
+ return !Jj_3_1();
+ }
+ catch (LookaheadSuccess ls)
+ {
+ return true;
+ }
+ finally
+ {
+ Jj_save(0, xla);
+ }
+ }
+
+ private bool Jj_2_2(int xla)
+ {
+ jj_la = xla; jj_lastpos = jj_scanpos = token;
+ try
+ {
+ return !Jj_3_2();
+ }
+ catch (LookaheadSuccess ls)
+ {
+ return true;
+ }
+ finally
+ {
+ Jj_save(1, xla);
+ }
+ }
+
+ private bool Jj_3_1()
+ {
+ if (Jj_scan_token(Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote1))
+ return true;
+ if (Jj_scan_token(Lucene.Net.Demo.Html.HTMLParserConstants.CloseQuote1))
+ return true;
+ return false;
+ }
+
+ private bool Jj_3_2()
+ {
+ if (Jj_scan_token(Lucene.Net.Demo.Html.HTMLParserConstants.ArgQuote2))
+ return true;
+ if (Jj_scan_token(Lucene.Net.Demo.Html.HTMLParserConstants.CloseQuote2))
+ return true;
+ return false;
+ }
+
+ public HTMLParserTokenManager token_source;
+ internal SimpleCharStream jj_input_stream;
+ public Token token, jj_nt;
+ private int jj_ntk;
+ private Token jj_scanpos, jj_lastpos;
+ private int jj_la;
+ public bool lookingAhead = false;
+ private bool jj_semLA;
+ private int jj_gen;
+ private int[] jj_la1 = new int[14];
+ private static int[] jj_la1_0;
+ private static void Jj_la1_0()
+ {
+ jj_la1_0 = new int[]{0x2c7e, 0x2c7e, 0x10000, 0x380000, 0x20000, 0x80000, 0x100000, 0x200000, 0x3b0000, 0x3b0000, 0x8000000, 0x20000000, 0x30, 0x4000};
+ }
+ private JJCalls[] jj_2_rtns;
+ private bool jj_rescan = false;
+ private int jj_gc = 0;
+
+ public HTMLParser(System.IO.Stream stream)
+ {
+ InitBlock();
+ jj_input_stream = new SimpleCharStream(stream, 1, 1);
+ token_source = new HTMLParserTokenManager(jj_input_stream);
+ token = new Token();
+ jj_ntk = - 1;
+ jj_gen = 0;
+ for (int i = 0; i < 14; i++)
+ jj_la1[i] = - 1;
+ for (int i = 0; i < jj_2_rtns.Length; i++)
+ jj_2_rtns[i] = new JJCalls();
+ }
+
+ public virtual void ReInit(System.IO.Stream stream)
+ {
+ jj_input_stream.ReInit(stream, 1, 1);
+ token_source.ReInit(jj_input_stream);
+ token = new Token();
+ jj_ntk = - 1;
+ jj_gen = 0;
+ for (int i = 0; i < 14; i++)
+ jj_la1[i] = - 1;
+ for (int i = 0; i < jj_2_rtns.Length; i++)
+ jj_2_rtns[i] = new JJCalls();
+ }
+
+ public HTMLParser(System.IO.StreamReader stream)
+ {
+ InitBlock();
+ jj_input_stream = new SimpleCharStream(stream, 1, 1);
+ token_source = new HTMLParserTokenManager(jj_input_stream);
+ token = new Token();
+ jj_ntk = - 1;
+ jj_gen = 0;
+ for (int i = 0; i < 14; i++)
+ jj_la1[i] = - 1;
+ for (int i = 0; i < jj_2_rtns.Length; i++)
+ jj_2_rtns[i] = new JJCalls();
+ }
+
+ public virtual void ReInit(System.IO.StreamReader stream)
+ {
+ jj_input_stream.ReInit(stream, 1, 1);
+ token_source.ReInit(jj_input_stream);
+ token = new Token();
+ jj_ntk = - 1;
+ jj_gen = 0;
+ for (int i = 0; i < 14; i++)
+ jj_la1[i] = - 1;
+ for (int i = 0; i < jj_2_rtns.Length; i++)
+ jj_2_rtns[i] = new JJCalls();
+ }
+
+ public HTMLParser(HTMLParserTokenManager tm)
+ {
+ InitBlock();
+ token_source = tm;
+ token = new Token();
+ jj_ntk = - 1;
+ jj_gen = 0;
+ for (int i = 0; i < 14; i++)
+ jj_la1[i] = - 1;
+ for (int i = 0; i < jj_2_rtns.Length; i++)
+ jj_2_rtns[i] = new JJCalls();
+ }
+
+ public virtual void ReInit(HTMLParserTokenManager tm)
+ {
+ token_source = tm;
+ token = new Token();
+ jj_ntk = - 1;
+ jj_gen = 0;
+ for (int i = 0; i < 14; i++)
+ jj_la1[i] = - 1;
+ for (int i = 0; i < jj_2_rtns.Length; i++)
+ jj_2_rtns[i] = new JJCalls();
+ }
+
+ private Token Jj_consume_token(int kind)
+ {
+ Token oldToken;
+ if ((oldToken = token).next != null)
+ token = token.next;
+ else
+ token = token.next = token_source.GetNextToken();
+ jj_ntk = - 1;
+ if (token.kind == kind)
+ {
+ jj_gen++;
+ if (++jj_gc > 100)
+ {
+ jj_gc = 0;
+ for (int i = 0; i < jj_2_rtns.Length; i++)
+ {
+ JJCalls c = jj_2_rtns[i];
+ while (c != null)
+ {
+ if (c.gen < jj_gen)
+ c.first = null;
+ c = c.next;
+ }
+ }
+ }
+ return token;
+ }
+ token = oldToken;
+ jj_kind = kind;
+ throw GenerateParseException();
+ }
+
+ [Serializable]
+ private sealed class LookaheadSuccess : System.ApplicationException
+ {
+ }
private LookaheadSuccess jj_ls;
- private bool jj_scan_token(int kind)
- {
- if (jj_scanpos == jj_lastpos)
- {
- jj_la--;
- if (jj_scanpos.next == null)
- {
- jj_lastpos = jj_scanpos = jj_scanpos.next = token_source.GetNextToken();
- }
- else
- {
- jj_lastpos = jj_scanpos = jj_scanpos.next;
- }
- }
- else
- {
- jj_scanpos = jj_scanpos.next;
- }
- if (jj_rescan)
- {
- int i = 0; Token tok = token;
- while (tok != null && tok != jj_scanpos)
- {
- i++; tok = tok.next;
- }
- if (tok != null)
- jj_add_error_token(kind, i);
- }
- if (jj_scanpos.kind != kind)
- return true;
- if (jj_la == 0 && jj_scanpos == jj_lastpos)
- throw jj_ls;
- return false;
- }
-
- public Token GetNextToken()
- {
- if (token.next != null)
- token = token.next;
- else
- token = token.next = token_source.GetNextToken();
- jj_ntk_Renamed_Field = - 1;
- jj_gen++;
- return token;
- }
-
- public Token GetToken(int index)
- {
- Token t = lookingAhead?jj_scanpos:token;
- for (int i = 0; i < index; i++)
- {
- if (t.next != null)
- t = t.next;
- else
- t = t.next = token_source.GetNextToken();
- }
- return t;
- }
-
- private int jj_ntk()
- {
- if ((jj_nt = token.next) == null)
- return (jj_ntk_Renamed_Field = (token.next = token_source.GetNextToken()).kind);
- else
- return (jj_ntk_Renamed_Field = jj_nt.kind);
- }
-
- private System.Collections.ArrayList jj_expentries = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
- private int[] jj_expentry;
- private int jj_kind = -1;
- private int[] jj_lasttokens = new int[100];
- private int jj_endpos;
-
- private void jj_add_error_token(int kind, int pos)
- {
- if (pos >= 100)
- return ;
- if (pos == jj_endpos + 1)
- {
- jj_lasttokens[jj_endpos++] = kind;
- }
- else if (jj_endpos != 0)
- {
- jj_expentry = new int[jj_endpos];
- for (int i = 0; i < jj_endpos; i++)
- {
- jj_expentry[i] = jj_lasttokens[i];
- }
- bool exists = false;
- for (System.Collections.IEnumerator e = jj_expentries.GetEnumerator(); e.MoveNext(); )
- {
- int[] oldentry = (int[]) (e.Current);
- if (oldentry.Length == jj_expentry.Length)
- {
- exists = true;
- for (int i = 0; i < jj_expentry.Length; i++)
- {
- if (oldentry[i] != jj_expentry[i])
- {
- exists = false;
- break;
- }
- }
- if (exists)
- break;
- }
- }
- if (!exists)
- jj_expentries.Add(jj_expentry);
- if (pos != 0)
- jj_lasttokens[(jj_endpos = pos) - 1] = kind;
- }
- }
-
- public virtual ParseException GenerateParseException()
- {
- jj_expentries.Clear();
- bool[] la1tokens = new bool[30];
- for (int i = 0; i < 30; i++)
- {
- la1tokens[i] = false;
- }
- if (jj_kind >= 0)
- {
- la1tokens[jj_kind] = true;
- jj_kind = - 1;
- }
- for (int i = 0; i < 14; i++)
- {
- if (jj_la1[i] == jj_gen)
- {
- for (int j = 0; j < 32; j++)
- {
- if ((jj_la1_0_Renamed_Field[i] & (1 << j)) != 0)
- {
- la1tokens[j] = true;
- }
- }
- }
- }
- for (int i = 0; i < 30; i++)
- {
- if (la1tokens[i])
- {
- jj_expentry = new int[1];
- jj_expentry[0] = i;
- jj_expentries.Add(jj_expentry);
- }
- }
- jj_endpos = 0;
- jj_rescan_token();
- jj_add_error_token(0, 0);
- int[][] exptokseq = new int[jj_expentries.Count][];
- for (int i = 0; i < jj_expentries.Count; i++)
- {
- exptokseq[i] = (int[]) jj_expentries[i];
- }
- return new ParseException(token, exptokseq, Lucene.Net.Demo.Html.HTMLParserConstants.tokenImage);
- }
-
- public void Enable_Tracing()
- {
- }
-
- public void Disable_Tracing()
- {
- }
-
- private void jj_rescan_token()
- {
- jj_rescan = true;
- for (int i = 0; i < 2; i++)
- {
- JJCalls p = jj_2_rtns[i];
- do
- {
- if (p.gen > jj_gen)
- {
- jj_la = p.arg; jj_lastpos = jj_scanpos = p.first;
- switch (i)
- {
-
- case 0: jj_3_1(); break;
-
- case 1: jj_3_2(); break;
- }
- }
- p = p.next;
- }
- while (p != null);
- }
- jj_rescan = false;
- }
-
- private void jj_save(int index, int xla)
- {
- JJCalls p = jj_2_rtns[index];
- while (p.gen > jj_gen)
- {
- if (p.next == null)
- {
- p = p.next = new JJCalls(); break;
- }
- p = p.next;
- }
- p.gen = jj_gen + xla - jj_la; p.first = token; p.arg = xla;
- }
-
- internal sealed class JJCalls
- {
- internal int gen;
- internal Token first;
- internal int arg;
- internal JJCalls next;
- }
-
- // void handleException(Exception e) {
- // System.out.println(e.toString()); // print the error message
- // System.out.println("Skipping...");
- // Token t;
- // do {
- // t = getNextToken();
- // } while (t.kind != TagEnd);
- // }
- static HTMLParser()
- {
- {
- jj_la1_0();
- }
- }
- }
-}
+ private bool Jj_scan_token(int kind)
+ {
+ if (jj_scanpos == jj_lastpos)
+ {
+ jj_la--;
+ if (jj_scanpos.next == null)
+ {
+ jj_lastpos = jj_scanpos = jj_scanpos.next = token_source.GetNextToken();
+ }
+ else
+ {
+ jj_lastpos = jj_scanpos = jj_scanpos.next;
+ }
+ }
+ else
+ {
+ jj_scanpos = jj_scanpos.next;
+ }
+ if (jj_rescan)
+ {
+ int i = 0; Token tok = token;
+ while (tok != null && tok != jj_scanpos)
+ {
+ i++; tok = tok.next;
+ }
+ if (tok != null)
+ Jj_add_error_token(kind, i);
+ }
+ if (jj_scanpos.kind != kind)
+ return true;
+ if (jj_la == 0 && jj_scanpos == jj_lastpos)
+ throw jj_ls;
+ return false;
+ }
+
+ public Token GetNextToken()
+ {
+ if (token.next != null)
+ token = token.next;
+ else
+ token = token.next = token_source.GetNextToken();
+ jj_ntk = - 1;
+ jj_gen++;
+ return token;
+ }
+
+ public Token GetToken(int index)
+ {
+ Token t = lookingAhead?jj_scanpos:token;
+ for (int i = 0; i < index; i++)
+ {
+ if (t.next != null)
+ t = t.next;
+ else
+ t = t.next = token_source.GetNextToken();
+ }
+ return t;
+ }
+
+ private int Jj_ntk()
+ {
+ if ((jj_nt = token.next) == null)
+ return (jj_ntk = (token.next = token_source.GetNextToken()).kind);
+ else
+ return (jj_ntk = jj_nt.kind);
+ }
+
+ private System.Collections.ArrayList jj_expentries = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
+ private int[] jj_expentry;
+ private int jj_kind = - 1;
+ private int[] jj_lasttokens = new int[100];
+ private int jj_endpos;
+
+ private void Jj_add_error_token(int kind, int pos)
+ {
+ if (pos >= 100)
+ return ;
+ if (pos == jj_endpos + 1)
+ {
+ jj_lasttokens[jj_endpos++] = kind;
+ }
+ else if (jj_endpos != 0)
+ {
+ jj_expentry = new int[jj_endpos];
+ for (int i = 0; i < jj_endpos; i++)
+ {
+ jj_expentry[i] = jj_lasttokens[i];
+ }
+ bool exists = false;
+ for (System.Collections.IEnumerator e = jj_expentries.GetEnumerator(); e.MoveNext(); )
+ {
+ int[] oldentry = (int[]) (e.Current);
+ if (oldentry.Length == jj_expentry.Length)
+ {
+ exists = true;
+ for (int i = 0; i < jj_expentry.Length; i++)
+ {
+ if (oldentry[i] != jj_expentry[i])
+ {
+ exists = false;
+ break;
+ }
+ }
+ if (exists)
+ break;
+ }
+ }
+ if (!exists)
+ jj_expentries.Add(jj_expentry);
+ if (pos != 0)
+ jj_lasttokens[(jj_endpos = pos) - 1] = kind;
+ }
+ }
+
+ public virtual ParseException GenerateParseException()
+ {
+ jj_expentries.Clear();
+ bool[] la1tokens = new bool[31];
+ for (int i = 0; i < 31; i++)
+ {
+ la1tokens[i] = false;
+ }
+ if (jj_kind >= 0)
+ {
+ la1tokens[jj_kind] = true;
+ jj_kind = - 1;
+ }
+ for (int i = 0; i < 14; i++)
+ {
+ if (jj_la1[i] == jj_gen)
+ {
+ for (int j = 0; j < 32; j++)
+ {
+ if ((jj_la1_0[i] & (1 << j)) != 0)
+ {
+ la1tokens[j] = true;
+ }
+ }
+ }
+ }
+ for (int i = 0; i < 31; i++)
+ {
+ if (la1tokens[i])
+ {
+ jj_expentry = new int[1];
+ jj_expentry[0] = i;
+ jj_expentries.Add(jj_expentry);
+ }
+ }
+ jj_endpos = 0;
+ Jj_rescan_token();
+ Jj_add_error_token(0, 0);
+ int[][] exptokseq = new int[jj_expentries.Count][];
+ for (int i = 0; i < jj_expentries.Count; i++)
+ {
+ exptokseq[i] = (int[]) jj_expentries[i];
+ }
+ return new ParseException(token, exptokseq, Lucene.Net.Demo.Html.HTMLParserConstants.tokenImage);
+ }
+
+ public void Enable_tracing()
+ {
+ }
+
+ public void Disable_tracing()
+ {
+ }
+
+ private void Jj_rescan_token()
+ {
+ jj_rescan = true;
+ for (int i = 0; i < 2; i++)
+ {
+ JJCalls p = jj_2_rtns[i];
+ do
+ {
+ if (p.gen > jj_gen)
+ {
+ jj_la = p.arg; jj_lastpos = jj_scanpos = p.first;
+ switch (i)
+ {
+
+ case 0: Jj_3_1(); break;
+
+ case 1: Jj_3_2(); break;
+ }
+ }
+ p = p.next;
+ }
+ while (p != null);
+ }
+ jj_rescan = false;
+ }
+
+ private void Jj_save(int index, int xla)
+ {
+ JJCalls p = jj_2_rtns[index];
+ while (p.gen > jj_gen)
+ {
+ if (p.next == null)
+ {
+ p = p.next = new JJCalls(); break;
+ }
+ p = p.next;
+ }
+ p.gen = jj_gen + xla - jj_la; p.first = token; p.arg = xla;
+ }
+
+ internal sealed class JJCalls
+ {
+ internal int gen;
+ internal Token first;
+ internal int arg;
+ internal JJCalls next;
+ }
+
+ // void handleException(Exception e) {
+ // System.out.println(e.toString()); // print the error message
+ // System.out.println("Skipping...");
+ // Token t;
+ // do {
+ // t = getNextToken();
+ // } while (t.kind != TagEnd);
+ // }
+ static HTMLParser()
+ {
+ {
+ Jj_la1_0();
+ }
+ }
+ }
+}
\ No newline at end of file
Modified: incubator/lucene.net/trunk/C#/src/Demo/DemoLib/HTML/HTMLParser.jj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Demo/DemoLib/HTML/HTMLParser.jj?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Demo/DemoLib/HTML/HTMLParser.jj (original)
+++ incubator/lucene.net/trunk/C#/src/Demo/DemoLib/HTML/HTMLParser.jj Sat Jun 3 19:41:13 2006
@@ -1,425 +1,391 @@
-/* ====================================================================
- * The Apache Software License, Version 1.1
- *
- * Copyright (c) 2001 The Apache Software Foundation. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowledgment may appear in the software itself,
- * if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Apache" and "Apache Software Foundation" and
- * "Apache Lucene" must not be used to endorse or promote products
- * derived from this software without prior written permission. For
- * written permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- * "Apache Lucene", nor may "Apache" appear in their name, without
- * prior written permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation. For more
- * information on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
- */
-
-// HTMLParser.jj
-
-options {
- STATIC = false;
- OPTIMIZE_TOKEN_MANAGER = true;
- //DEBUG_LOOKAHEAD = true;
- //DEBUG_TOKEN_MANAGER = true;
-}
-
-PARSER_BEGIN(HTMLParser)
-
-package org.apache.lucene.demo.html;
-
-import java.io.*;
-import java.util.Properties;
-
-public class HTMLParser {
- public static int SUMMARY_LENGTH = 200;
-
- StringBuffer title = new StringBuffer(SUMMARY_LENGTH);
- StringBuffer summary = new StringBuffer(SUMMARY_LENGTH * 2);
- Properties metaTags=new Properties();
- String currentMetaTag=null;
- String currentMetaContent=null;
- int length = 0;
- boolean titleComplete = false;
- boolean inTitle = false;
- boolean inMetaTag = false;
- boolean inStyle = false;
- boolean afterTag = false;
- boolean afterSpace = false;
- String eol = System.getProperty("line.separator");
- Reader pipeIn = null;
- Writer pipeOut;
- private MyPipedInputStream pipeInStream = null;
- private PipedOutputStream pipeOutStream = null;
-
- private class MyPipedInputStream extends PipedInputStream{
-
- public MyPipedInputStream(){
- super();
- }
-
- public MyPipedInputStream(PipedOutputStream src) throws IOException{
- super(src);
- }
-
- public boolean full() throws IOException{
- return this.available() >= PipedInputStream.PIPE_SIZE;
- }
- }
-
- public HTMLParser(File file) throws FileNotFoundException {
- this(new FileInputStream(file));
- }
-
- public String getTitle() throws IOException, InterruptedException {
- if (pipeIn == null)
- getReader(); // spawn parsing thread
- while (true) {
- synchronized(this) {
- if (titleComplete || pipeInStream.full())
- break;
- wait(10);
- }
- }
- return title.toString().trim();
- }
-
- public Properties getMetaTags() throws IOException,
-InterruptedException {
- if (pipeIn == null)
- getReader(); // spawn parsing thread
- while (true) {
- synchronized(this) {
- if (titleComplete || pipeInStream.full())
- break;
- wait(10);
- }
- }
- return metaTags;
- }
-
-
- public String getSummary() throws IOException, InterruptedException {
- if (pipeIn == null)
- getReader(); // spawn parsing thread
- while (true) {
- synchronized(this) {
- if (summary.length() >= SUMMARY_LENGTH || pipeInStream.full())
- break;
- wait(10);
- }
- }
- if (summary.length() > SUMMARY_LENGTH)
- summary.setLength(SUMMARY_LENGTH);
-
- String sum = summary.toString().trim();
- String tit = getTitle();
- if (sum.startsWith(tit) || sum.equals(""))
- return tit;
- else
- return sum;
- }
-
- public Reader getReader() throws IOException {
- if (pipeIn == null) {
- pipeInStream = new MyPipedInputStream();
- pipeOutStream = new PipedOutputStream(pipeInStream);
- pipeIn = new InputStreamReader(pipeInStream);
- pipeOut = new OutputStreamWriter(pipeOutStream);
-
- Thread thread = new ParserThread(this);
- thread.start(); // start parsing
- }
-
- return pipeIn;
- }
-
- void addToSummary(String text) {
- if (summary.length() < SUMMARY_LENGTH) {
- summary.append(text);
- if (summary.length() >= SUMMARY_LENGTH) {
- synchronized(this) {
- notifyAll();
- }
- }
- }
- }
-
- void addText(String text) throws IOException {
- if (inStyle)
- return;
- if (inTitle)
- title.append(text);
- else {
- addToSummary(text);
- if (!titleComplete && !title.equals("")) { // finished title
- synchronized(this) {
- titleComplete = true; // tell waiting threads
- notifyAll();
- }
- }
- }
-
- length += text.length();
- pipeOut.write(text);
-
- afterSpace = false;
- }
-
- void addMetaTag() throws IOException {
- metaTags.setProperty(currentMetaTag, currentMetaContent);
- currentMetaTag = null;
- currentMetaContent = null;
- return;
- }
-
- void addSpace() throws IOException {
- if (!afterSpace) {
- if (inTitle)
- title.append(" ");
- else
- addToSummary(" ");
-
- String space = afterTag ? eol : " ";
- length += space.length();
- pipeOut.write(space);
- afterSpace = true;
- }
- }
-
-// void handleException(Exception e) {
-// System.out.println(e.toString()); // print the error message
-// System.out.println("Skipping...");
-// Token t;
-// do {
-// t = getNextToken();
-// } while (t.kind != TagEnd);
-// }
-}
-
-PARSER_END(HTMLParser)
-
-
-void HTMLDocument() throws IOException :
-{
- Token t;
-}
-{
-// try {
- ( Tag() { afterTag = true; }
- | t=Decl() { afterTag = true; }
- | CommentTag() { afterTag = true; }
- | ScriptTag() { afterTag = true; }
- | t=<Word> { addText(t.image); afterTag = false; }
- | t=<Entity> { addText(Entities.decode(t.image)); afterTag = false; }
- | t=<Punct> { addText(t.image); afterTag = false; }
- | <Space> { addSpace(); afterTag = false; }
- )* <EOF>
-// } catch (ParseException e) {
-// handleException(e);
-// }
-}
-
-void Tag() throws IOException :
-{
- Token t1, t2;
- boolean inImg = false;
-}
-{
- t1=<TagName> {
- String tagName = t1.image.toLowerCase();
- if(Tags.WS_ELEMS.contains(tagName) ) {
- addSpace();
- }
- inTitle = tagName.equalsIgnoreCase("<title"); // keep track if in <TITLE>
- inMetaTag = tagName.equalsIgnoreCase("<META"); // keep track if in <META>
- inStyle = tagName.equalsIgnoreCase("<STYLE"); // keep track if in <STYLE>
- inImg = tagName.equalsIgnoreCase("<img"); // keep track if in <IMG>
- }
- (t1=<ArgName>
- (<ArgEquals>
- (t2=ArgValue() // save ALT text in IMG tag
- {
- if (inImg && t1.image.equalsIgnoreCase("alt") && t2 != null)
- addText("[" + t2.image + "]");
-
- if(inMetaTag &&
- ( t1.image.equalsIgnoreCase("name") ||
- t1.image.equalsIgnoreCase("HTTP-EQUIV")
- )
- && t2 != null)
- {
- currentMetaTag=t2.image.toLowerCase();
- if(currentMetaTag != null && currentMetaContent != null) {
- addMetaTag();
- }
- }
- if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 !=
-null)
- {
- currentMetaContent=t2.image.toLowerCase();
- if(currentMetaTag != null && currentMetaContent != null) {
- addMetaTag();
- }
- }
- }
- )?
- )?
- )*
- <TagEnd>
-}
-
-Token ArgValue() :
-{
- Token t = null;
-}
-{
- t=<ArgValue> { return t; }
-| LOOKAHEAD(2)
- <ArgQuote1> <CloseQuote1> { return t; }
-| <ArgQuote1> t=<Quote1Text> <CloseQuote1> { return t; }
-| LOOKAHEAD(2)
- <ArgQuote2> <CloseQuote2> { return t; }
-| <ArgQuote2> t=<Quote2Text> <CloseQuote2> { return t; }
-}
-
-
-Token Decl() :
-{
- Token t;
-}
-{
- t=<DeclName> ( <ArgName> | ArgValue() | <ArgEquals> )* <TagEnd>
- { return t; }
-}
-
-
-void CommentTag() :
-{}
-{
- (<Comment1> ( <CommentText1> )* <CommentEnd1>)
- |
- (<Comment2> ( <CommentText2> )* <CommentEnd2>)
-}
-
-void ScriptTag() :
-{}
-{
- <ScriptStart> ( <ScriptText> )* <ScriptEnd>
-}
-
-
-TOKEN :
-{
- < ScriptStart: "<script" > : WithinScript
-| < TagName: "<" ("/")? ["A"-"Z","a"-"z"] (<ArgName>)? > : WithinTag
-| < DeclName: "<" "!" ["A"-"Z","a"-"z"] (<ArgName>)? > : WithinTag
-
-| < Comment1: "<!--" > : WithinComment1
-| < Comment2: "<!" > : WithinComment2
-
-| < Word: ( <LET> | <LET> (["+","/"])+ | <NUM> ["\""] |
- <LET> ["-","'"] <LET> | ("$")? <NUM> [",","."] <NUM> )+ >
-| < #LET: ["A"-"Z","a"-"z","0"-"9"] >
-| < #NUM: ["0"-"9"] >
-
-| < Entity: ( "&" (["A"-"Z","a"-"z"])+ (";")? | "&" "#" (<NUM>)+ (";")? ) >
-
-| < Space: (<SP>)+ >
-| < #SP: [" ","\t","\r","\n"] >
-
-| < Punct: ~[] > // Keep this last. It is a catch-all.
-}
-
-<WithinScript> TOKEN:
-{
- < ScriptText: (~["<",">"])+ | "<" | ">" >
-| < ScriptEnd: "</script" (~["<",">"])* ">" > : DEFAULT
-}
-
-<WithinTag> TOKEN:
-{
- < ArgName: (~[" ","\t","\r","\n","=",">","'","\""])
- (~[" ","\t","\r","\n","=",">"])* >
-| < ArgEquals: "=" > : AfterEquals
-| < TagEnd: ">" | "=>" > : DEFAULT
-}
-
-<AfterEquals> TOKEN:
-{
- < ArgValue: (~[" ","\t","\r","\n","=",">","'","\""])
- (~[" ","\t","\r","\n",">"])* > : WithinTag
-}
-
-<WithinTag, AfterEquals> TOKEN:
-{
- < ArgQuote1: "'" > : WithinQuote1
-| < ArgQuote2: "\"" > : WithinQuote2
-}
-
-<WithinTag, AfterEquals> SKIP:
-{
- < <Space> >
-}
-
-<WithinQuote1> TOKEN:
-{
- < Quote1Text: (~["'"])+ >
-| < CloseQuote1: <ArgQuote1> > : WithinTag
-}
-
-<WithinQuote2> TOKEN:
-{
- < Quote2Text: (~["\""])+ >
-| < CloseQuote2: <ArgQuote2> > : WithinTag
-}
-
-
-<WithinComment1> TOKEN :
-{
- < CommentText1: (~["-"])+ | "-" >
-| < CommentEnd1: "-->" > : DEFAULT
-}
-
-<WithinComment2> TOKEN :
-{
- < CommentText2: (~[">"])+ >
-| < CommentEnd2: ">" > : DEFAULT
-}
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// HTMLParser.jj
+
+options {
+ STATIC = false;
+ OPTIMIZE_TOKEN_MANAGER = true;
+ //DEBUG_LOOKAHEAD = true;
+ //DEBUG_TOKEN_MANAGER = true;
+}
+
+PARSER_BEGIN(HTMLParser)
+
+package org.apache.lucene.demo.html;
+
+import java.io.*;
+import java.util.Properties;
+
+public class HTMLParser {
+ public static int SUMMARY_LENGTH = 200;
+
+ StringBuffer title = new StringBuffer(SUMMARY_LENGTH);
+ StringBuffer summary = new StringBuffer(SUMMARY_LENGTH * 2);
+ Properties metaTags=new Properties();
+ String currentMetaTag=null;
+ String currentMetaContent=null;
+ int length = 0;
+ boolean titleComplete = false;
+ boolean inTitle = false;
+ boolean inMetaTag = false;
+ boolean inStyle = false;
+ boolean afterTag = false;
+ boolean afterSpace = false;
+ String eol = System.getProperty("line.separator");
+ Reader pipeIn = null;
+ Writer pipeOut;
+ private MyPipedInputStream pipeInStream = null;
+ private PipedOutputStream pipeOutStream = null;
+
+ private class MyPipedInputStream extends PipedInputStream{
+
+ public MyPipedInputStream(){
+ super();
+ }
+
+ public MyPipedInputStream(PipedOutputStream src) throws IOException{
+ super(src);
+ }
+
+ public boolean full() throws IOException{
+ return this.available() >= PipedInputStream.PIPE_SIZE;
+ }
+ }
+
+ /**
+ * @deprecated Use HTMLParser(FileInputStream) instead
+ */
+ public HTMLParser(File file) throws FileNotFoundException {
+ this(new FileInputStream(file));
+ }
+
+ public String getTitle() throws IOException, InterruptedException {
+ if (pipeIn == null)
+ getReader(); // spawn parsing thread
+ while (true) {
+ synchronized(this) {
+ if (titleComplete || pipeInStream.full())
+ break;
+ wait(10);
+ }
+ }
+ return title.toString().trim();
+ }
+
+ public Properties getMetaTags() throws IOException,
+InterruptedException {
+ if (pipeIn == null)
+ getReader(); // spawn parsing thread
+ while (true) {
+ synchronized(this) {
+ if (titleComplete || pipeInStream.full())
+ break;
+ wait(10);
+ }
+ }
+ return metaTags;
+ }
+
+
+ public String getSummary() throws IOException, InterruptedException {
+ if (pipeIn == null)
+ getReader(); // spawn parsing thread
+ while (true) {
+ synchronized(this) {
+ if (summary.length() >= SUMMARY_LENGTH || pipeInStream.full())
+ break;
+ wait(10);
+ }
+ }
+ if (summary.length() > SUMMARY_LENGTH)
+ summary.setLength(SUMMARY_LENGTH);
+
+ String sum = summary.toString().trim();
+ String tit = getTitle();
+ if (sum.startsWith(tit) || sum.equals(""))
+ return tit;
+ else
+ return sum;
+ }
+
+ public Reader getReader() throws IOException {
+ if (pipeIn == null) {
+ pipeInStream = new MyPipedInputStream();
+ pipeOutStream = new PipedOutputStream(pipeInStream);
+ pipeIn = new InputStreamReader(pipeInStream, "UTF-16BE");
+ pipeOut = new OutputStreamWriter(pipeOutStream, "UTF-16BE");
+
+ Thread thread = new ParserThread(this);
+ thread.start(); // start parsing
+ }
+
+ return pipeIn;
+ }
+
+ void addToSummary(String text) {
+ if (summary.length() < SUMMARY_LENGTH) {
+ summary.append(text);
+ if (summary.length() >= SUMMARY_LENGTH) {
+ synchronized(this) {
+ notifyAll();
+ }
+ }
+ }
+ }
+
+ void addText(String text) throws IOException {
+ if (inStyle)
+ return;
+ if (inTitle)
+ title.append(text);
+ else {
+ addToSummary(text);
+ if (!titleComplete && !title.equals("")) { // finished title
+ synchronized(this) {
+ titleComplete = true; // tell waiting threads
+ notifyAll();
+ }
+ }
+ }
+
+ length += text.length();
+ pipeOut.write(text);
+
+ afterSpace = false;
+ }
+
+ void addMetaTag() {
+ metaTags.setProperty(currentMetaTag, currentMetaContent);
+ currentMetaTag = null;
+ currentMetaContent = null;
+ return;
+ }
+
+ void addSpace() throws IOException {
+ if (!afterSpace) {
+ if (inTitle)
+ title.append(" ");
+ else
+ addToSummary(" ");
+
+ String space = afterTag ? eol : " ";
+ length += space.length();
+ pipeOut.write(space);
+ afterSpace = true;
+ }
+ }
+
+// void handleException(Exception e) {
+// System.out.println(e.toString()); // print the error message
+// System.out.println("Skipping...");
+// Token t;
+// do {
+// t = getNextToken();
+// } while (t.kind != TagEnd);
+// }
+}
+
+PARSER_END(HTMLParser)
+
+
+void HTMLDocument() throws IOException :
+{
+ Token t;
+}
+{
+// try {
+ ( Tag() { afterTag = true; }
+ | t=Decl() { afterTag = true; }
+ | CommentTag() { afterTag = true; }
+ | ScriptTag() { afterTag = true; }
+ | t=<Word> { addText(t.image); afterTag = false; }
+ | t=<Entity> { addText(Entities.decode(t.image)); afterTag = false; }
+ | t=<Punct> { addText(t.image); afterTag = false; }
+ | <Space> { addSpace(); afterTag = false; }
+ )* <EOF>
+// } catch (ParseException e) {
+// handleException(e);
+// }
+}
+
+void Tag() throws IOException :
+{
+ Token t1, t2;
+ boolean inImg = false;
+}
+{
+ t1=<TagName> {
+ String tagName = t1.image.toLowerCase();
+ if(Tags.WS_ELEMS.contains(tagName) ) {
+ addSpace();
+ }
+ inTitle = tagName.equalsIgnoreCase("<title"); // keep track if in <TITLE>
+ inMetaTag = tagName.equalsIgnoreCase("<META"); // keep track if in <META>
+ inStyle = tagName.equalsIgnoreCase("<STYLE"); // keep track if in <STYLE>
+ inImg = tagName.equalsIgnoreCase("<img"); // keep track if in <IMG>
+ }
+ (t1=<ArgName>
+ (<ArgEquals>
+ (t2=ArgValue() // save ALT text in IMG tag
+ {
+ if (inImg && t1.image.equalsIgnoreCase("alt") && t2 != null)
+ addText("[" + t2.image + "]");
+
+ if(inMetaTag &&
+ ( t1.image.equalsIgnoreCase("name") ||
+ t1.image.equalsIgnoreCase("HTTP-EQUIV")
+ )
+ && t2 != null)
+ {
+ currentMetaTag=t2.image.toLowerCase();
+ if(currentMetaTag != null && currentMetaContent != null) {
+ addMetaTag();
+ }
+ }
+ if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 !=
+null)
+ {
+ currentMetaContent=t2.image.toLowerCase();
+ if(currentMetaTag != null && currentMetaContent != null) {
+ addMetaTag();
+ }
+ }
+ }
+ )?
+ )?
+ )*
+ <TagEnd>
+}
+
+Token ArgValue() :
+{
+ Token t = null;
+}
+{
+ t=<ArgValue> { return t; }
+| LOOKAHEAD(2)
+ <ArgQuote1> <CloseQuote1> { return t; }
+| <ArgQuote1> t=<Quote1Text> <CloseQuote1> { return t; }
+| LOOKAHEAD(2)
+ <ArgQuote2> <CloseQuote2> { return t; }
+| <ArgQuote2> t=<Quote2Text> <CloseQuote2> { return t; }
+}
+
+
+Token Decl() :
+{
+ Token t;
+}
+{
+ t=<DeclName> ( <ArgName> | ArgValue() | <ArgEquals> )* <TagEnd>
+ { return t; }
+}
+
+
+void CommentTag() :
+{}
+{
+ (<Comment1> ( <CommentText1> )* <CommentEnd1>)
+ |
+ (<Comment2> ( <CommentText2> )* <CommentEnd2>)
+}
+
+void ScriptTag() :
+{}
+{
+ <ScriptStart> ( <ScriptText> )* <ScriptEnd>
+}
+
+
+TOKEN :
+{
+ < ScriptStart: "<script" > : WithinScript
+| < TagName: "<" ("/")? ["A"-"Z","a"-"z"] (<ArgName>)? > : WithinTag
+| < DeclName: "<" "!" ["A"-"Z","a"-"z"] (<ArgName>)? > : WithinTag
+
+| < Comment1: "<!--" > : WithinComment1
+| < Comment2: "<!" > : WithinComment2
+
+| < Word: ( <LET> | <LET> (["+","/"])+ | <NUM> ["\""] |
+ <LET> ["-","'"] <LET> | ("$")? <NUM> [",","."] <NUM> )+ >
+| < #LET: ["A"-"Z","a"-"z","0"-"9"] >
+| < #NUM: ["0"-"9"] >
+| < #HEX: ["0"-"9","A"-"F","a"-"f"] >
+
+| < Entity: ( "&" (["A"-"Z","a"-"z"])+ (";")? | "&" "#" (<NUM>)+ (";")? | "&" "#" ["X","x"] (<HEX>)+ (";")? ) >
+
+| < Space: (<SP>)+ >
+| < #SP: [" ","\t","\r","\n"] >
+
+| < Punct: ~[] > // Keep this last. It is a catch-all.
+}
+
+<WithinScript> TOKEN:
+{
+ < ScriptText: (~["<",">"])+ | "<" | ">" >
+| < ScriptEnd: "</script" (~["<",">"])* ">" > : DEFAULT
+}
+
+<WithinTag> TOKEN:
+{
+ < ArgName: (~[" ","\t","\r","\n","=",">","'","\""])
+ (~[" ","\t","\r","\n","=",">"])* >
+| < ArgEquals: "=" > : AfterEquals
+| < TagEnd: ">" | "=>" > : DEFAULT
+}
+
+<AfterEquals> TOKEN:
+{
+ < ArgValue: (~[" ","\t","\r","\n","=",">","'","\""])
+ (~[" ","\t","\r","\n",">"])* > : WithinTag
+}
+
+<WithinTag, AfterEquals> TOKEN:
+{
+ < ArgQuote1: "'" > : WithinQuote1
+| < ArgQuote2: "\"" > : WithinQuote2
+}
+
+<WithinTag, AfterEquals> SKIP:
+{
+ < <Space> >
+}
+
+<WithinQuote1> TOKEN:
+{
+ < Quote1Text: (~["'"])+ >
+| < CloseQuote1: <ArgQuote1> > : WithinTag
+}
+
+<WithinQuote2> TOKEN:
+{
+ < Quote2Text: (~["\""])+ >
+| < CloseQuote2: <ArgQuote2> > : WithinTag
+}
+
+
+<WithinComment1> TOKEN :
+{
+ < CommentText1: (~["-"])+ | "-" >
+| < CommentEnd1: "-->" > : DEFAULT
+}
+
+<WithinComment2> TOKEN :
+{
+ < CommentText2: (~[">"])+ >
+| < CommentEnd2: ">" > : DEFAULT
+}