You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-user@lucene.apache.org by Otis Gospodnetic <ot...@yahoo.com> on 2002/10/17 20:31:25 UTC

Re: stopwords

Thanks.  I may stick this in the Lucene CVS repository somewhere.

Otis

--- John Caron <ca...@unidata.ucar.edu> wrote:
> i am just starting to use lucene, and it it very impressive! I hope
> to try 
> Dmitri's new term vectors when he gets them in, in order to do vector
> model 
> research, in particular LSA. i will port my existing code to use
> lucene 
> framework, and make it available when it is ready.
> 
> I am appending a longer list of stop words, mostly from SMART, in
> case these are 
> useful to anyone.
> 
> Thanks again!
> 
> private static String smart[] =  {
>    "a",
>    "able",
>    "about",
>    "above",
>    "according",
>    "accordingly",
>    "across",
>    "actually",
>    "after",
>    "afterwards",
>    "again",
>    "against",
>    "all",
>    "allow",
>    "allows",
>    "almost",
>    "alone",
>    "along",
>    "already",
>    "also",
>    "although",
>    "always",
>    "am",
>    "among",
>    "amongst",
>    "an",
>    "and",
>    "another",
>    "any",
>    "anybody",
>    "anyhow",
>    "anyone",
>    "anything",
>    "anyway",
>    "anyways",
>    "anywhere",
>    "apart",
>    "appear",
>    "appreciate",
>    "appropriate",
>    "are",
>    "around",
>    "as",
>    "aside",
>    "ask",
>    "asking",
>    "associated",
>    "at",
>    "available",
>    "away",
>    "awfully",
>    "b",
>    "be",
>    "became",
>    "because",
>    "become",
>    "becomes",
>    "becoming",
>    "been",
>    "before",
>    "beforehand",
>    "behind",
>    "being",
>    "believe",
>    "below",
>    "beside",
>    "besides",
>    "best",
>    "better",
>    "between",
>    "beyond",
>    "both",
>    "brief",
>    "but",
>    "by",
>    "c",
>    "came",
>    "can",
>    "cannot",
>    "cant",
>    "cause",
>    "causes",
>    "certain",
>    "certainly",
>    "changes",
>    "clearly",
>    "co",
>    "com",
>    "come",
>    "comes",
>    "concerning",
>    "consequently",
>    "consider",
>    "considering",
>    "contain",
>    "containing",
>    "contains",
>    "corresponding",
>    "could",
>    "course",
>    "currently",
>    "d",
>    "definitely",
>    "described",
>    "despite",
>    "did",
>    "different",
>    "do",
>    "does",
>    "doing",
>    "done",
>    "down",
>    "downwards",
>    "during",
>    "e",
>    "each",
>    "edu",
>    "eg",
>    "eight",
>    "either",
>    "else",
>    "elsewhere",
>    "enough",
>    "entirely",
>    "especially",
>    "et",
>    "etc",
>    "even",
>    "ever",
>    "every",
>    "everybody",
>    "everyone",
>    "everything",
>    "everywhere",
>    "ex",
>    "exactly",
>    "example",
>    "except",
>    "f",
>    "far",
>    "few",
>    "fifth",
>    "first",
>    "five",
>    "followed",
>    "following",
>    "follows",
>    "for",
>    "former",
>    "formerly",
>    "forth",
>    "four",
>    "from",
>    "further",
>    "furthermore",
>    "g",
>    "get",
>    "gets",
>    "getting",
>    "given",
>    "gives",
>    "go",
>    "goes",
>    "going",
>    "gone",
>    "got",
>    "gotten",
>    "greetings",
>    "h",
>    "had",
>    "happens",
>    "hardly",
>    "has",
>    "have",
>    "having",
>    "he",
>    "hello",
>    "help",
>    "hence",
>    "her",
>    "here",
>    "hereafter",
>    "hereby",
>    "herein",
>    "hereupon",
>    "hers",
>    "herself",
>    "hi",
>    "him",
>    "himself",
>    "his",
>    "hither",
>    "hopefully",
>    "how",
>    "howbeit",
>    "however",
>    "i",
>    "ie",
>    "if",
>    "ignored",
>    "immediate",
>    "in",
>    "inasmuch",
>    "inc",
>    "indeed",
>    "indicate",
>    "indicated",
>    "indicates",
>    "inner",
>    "insofar",
>    "instead",
>    "into",
>    "inward",
>    "is",
>    "it",
>    "its",
>    "itself",
>    "j",
>    "just",
>    "k",
>    "keep",
>    "keeps",
>    "kept",
>    "know",
>    "knows",
>    "known",
>    "l",
>    "last",
>    "lately",
>    "later",
>    "latter",
>    "latterly",
>    "least",
>    "less",
>    "lest",
>    "let",
>    "like",
>    "liked",
>    "likely",
>    "little",
>    "look",
>    "looking",
>    "looks",
>    "ltd",
>    "m",
>    "mainly",
>    "many",
>    "may",
>    "maybe",
>    "me",
>    "mean",
>    "meanwhile",
>    "merely",
>    "might",
>    "more",
>    "moreover",
>    "most",
>    "mostly",
>    "much",
>    "must",
>    "my",
>    "myself",
>    "n",
>    "name",
>    "namely",
>    "nd",
>    "near",
>    "nearly",
>    "necessary",
>    "need",
>    "needs",
>    "neither",
>    "never",
>    "nevertheless",
>    "new",
>    "next",
>    "nine",
>    "no",
>    "nobody",
>    "non",
>    "none",
>    "noone",
>    "nor",
>    "normally",
>    "not",
>    "nothing",
>    "novel",
>    "now",
>    "nowhere",
>    "o",
>    "obviously",
>    "of",
>    "off",
>    "often",
>    "oh",
>    "ok",
>    "okay",
>    "old",
>    "on",
>    "once",
>    "one",
>    "ones",
>    "only",
>    "onto",
>    "or",
>    "other",
>    "others",
>    "otherwise",
>    "ought",
>    "our",
>    "ours",
>    "ourselves",
>    "out",
>    "outside",
>    "over",
>    "overall",
>    "own",
>    "p",
>    "particular",
>    "particularly",
>    "per",
>    "perhaps",
>    "placed",
>    "please",
>    "plus",
>    "possible",
>    "presumably",
>    "probably",
>    "provides",
>    "q",
>    "que",
>    "quite",
>    "qv",
>    "r",
>    "rather",
>    "rd",
>    "re",
>    "really",
>    "reasonably",
>    "regarding",
>    "regardless",
>    "regards",
>    "relatively",
>    "respectively",
>    "right",
>    "s",
>    "said",
>    "same",
>    "saw",
>    "say",
>    "saying",
>    "says",
>    "second",
>    "secondly",
>    "see",
>    "seeing",
>    "seem",
>    "seemed",
>    "seeming",
>    "seems",
>    "seen",
>    "self",
>    "selves",
>    "sensible",
>    "sent",
>    "serious",
>    "seriously",
>    "seven",
>    "several",
>    "shall",
>    "she",
>    "should",
>    "since",
>    "six",
>    "so",
>    "some",
>    "somebody",
>    "somehow",
>    "someone",
>    "something",
>    "sometime",
>    "sometimes",
>    "somewhat",
>    "somewhere",
>    "soon",
>    "sorry",
>    "specified",
>    "specify",
>    "specifying",
>    "still",
>    "sub",
>    "such",
>    "sup",
>    "sure",
>    "t",
>    "take",
>    "taken",
>    "tell",
>    "tends",
>    "th",
>    "than",
>    "thank",
>    "thanks",
>    "thanx",
>    "that",
>    "thats",
>    "the",
>    "their",
>    "theirs",
>    "them",
>    "themselves",
>    "then",
>    "thence",
>    "there",
>    "thereafter",
>    "thereby",
>    "therefore",
>    "therein",
>    "theres",
>    "thereupon",
>    "these",
>    "they",
>    "think",
>    "third",
>    "this",
>    "thorough",
>    "thoroughly",
>    "those",
>    "though",
>    "three",
>    "through",
>    "throughout",
>    "thru",
>    "thus",
>    "to",
>    "together",
>    "too",
>    "took",
>    "toward",
>    "towards",
>    "tried",
>    "tries",
>    "truly",
>    "try",
>    "trying",
>    "twice",
>    "two",
>    "u",
>    "un",
>    "under",
>    "unfortunately",
>    "unless",
>    "unlikely",
>    "until",
>    "unto",
>    "up",
>    "upon",
>    "us",
>    "use",
>    "used",
>    "useful",
>    "uses",
>    "using",
>    "usually",
>    "uucp",
>    "v",
>    "value",
>    "various",
>    "very",
>    "via",
>    "viz",
>    "vs",
>    "w",
>    "want",
>    "wants",
>    "was",
>    "way",
>    "we",
>    "welcome",
>    "well",
>    "went",
>    "were",
>    "what",
>    "whatever",
>    "when",
>    "whence",
>    "whenever",
>    "where",
>    "whereafter",
>    "whereas",
>    "whereby",
>    "wherein",
>    "whereupon",
>    "wherever",
>    "whether",
>    "which",
>    "while",
>    "whither",
>    "who",
>    "whoever",
>    "whole",
>    "whom",
>    "whose",
>    "why",
>    "will",
>    "willing",
>    "wish",
>    "with",
>    "within",
>    "without",
>    "wonder",
>    "would",
>    "would",
>    "x",
>    "y",
>    "yes",
>    "yet",
>    "you",
>    "your",
>    "yours",
>    "yourself",
>    "yourselves",
>    "z",
>    "zero"
>    };
> 
> 
> --
> To unsubscribe, e-mail:  
> <ma...@jakarta.apache.org>
> For additional commands, e-mail:
> <ma...@jakarta.apache.org>
> 


__________________________________________________
Do you Yahoo!?
Faith Hill - Exclusive Performances, Videos & More
http://faith.yahoo.com

--
To unsubscribe, e-mail:   <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>