You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-user@lucene.apache.org by Otis Gospodnetic <ot...@yahoo.com> on 2002/10/17 20:31:25 UTC
Re: stopwords
Thanks. I may stick this in the Lucene CVS repository somewhere.
Otis
--- John Caron <ca...@unidata.ucar.edu> wrote:
> i am just starting to use lucene, and it it very impressive! I hope
> to try
> Dmitri's new term vectors when he gets them in, in order to do vector
> model
> research, in particular LSA. i will port my existing code to use
> lucene
> framework, and make it available when it is ready.
>
> I am appending a longer list of stop words, mostly from SMART, in
> case these are
> useful to anyone.
>
> Thanks again!
>
> private static String smart[] = {
> "a",
> "able",
> "about",
> "above",
> "according",
> "accordingly",
> "across",
> "actually",
> "after",
> "afterwards",
> "again",
> "against",
> "all",
> "allow",
> "allows",
> "almost",
> "alone",
> "along",
> "already",
> "also",
> "although",
> "always",
> "am",
> "among",
> "amongst",
> "an",
> "and",
> "another",
> "any",
> "anybody",
> "anyhow",
> "anyone",
> "anything",
> "anyway",
> "anyways",
> "anywhere",
> "apart",
> "appear",
> "appreciate",
> "appropriate",
> "are",
> "around",
> "as",
> "aside",
> "ask",
> "asking",
> "associated",
> "at",
> "available",
> "away",
> "awfully",
> "b",
> "be",
> "became",
> "because",
> "become",
> "becomes",
> "becoming",
> "been",
> "before",
> "beforehand",
> "behind",
> "being",
> "believe",
> "below",
> "beside",
> "besides",
> "best",
> "better",
> "between",
> "beyond",
> "both",
> "brief",
> "but",
> "by",
> "c",
> "came",
> "can",
> "cannot",
> "cant",
> "cause",
> "causes",
> "certain",
> "certainly",
> "changes",
> "clearly",
> "co",
> "com",
> "come",
> "comes",
> "concerning",
> "consequently",
> "consider",
> "considering",
> "contain",
> "containing",
> "contains",
> "corresponding",
> "could",
> "course",
> "currently",
> "d",
> "definitely",
> "described",
> "despite",
> "did",
> "different",
> "do",
> "does",
> "doing",
> "done",
> "down",
> "downwards",
> "during",
> "e",
> "each",
> "edu",
> "eg",
> "eight",
> "either",
> "else",
> "elsewhere",
> "enough",
> "entirely",
> "especially",
> "et",
> "etc",
> "even",
> "ever",
> "every",
> "everybody",
> "everyone",
> "everything",
> "everywhere",
> "ex",
> "exactly",
> "example",
> "except",
> "f",
> "far",
> "few",
> "fifth",
> "first",
> "five",
> "followed",
> "following",
> "follows",
> "for",
> "former",
> "formerly",
> "forth",
> "four",
> "from",
> "further",
> "furthermore",
> "g",
> "get",
> "gets",
> "getting",
> "given",
> "gives",
> "go",
> "goes",
> "going",
> "gone",
> "got",
> "gotten",
> "greetings",
> "h",
> "had",
> "happens",
> "hardly",
> "has",
> "have",
> "having",
> "he",
> "hello",
> "help",
> "hence",
> "her",
> "here",
> "hereafter",
> "hereby",
> "herein",
> "hereupon",
> "hers",
> "herself",
> "hi",
> "him",
> "himself",
> "his",
> "hither",
> "hopefully",
> "how",
> "howbeit",
> "however",
> "i",
> "ie",
> "if",
> "ignored",
> "immediate",
> "in",
> "inasmuch",
> "inc",
> "indeed",
> "indicate",
> "indicated",
> "indicates",
> "inner",
> "insofar",
> "instead",
> "into",
> "inward",
> "is",
> "it",
> "its",
> "itself",
> "j",
> "just",
> "k",
> "keep",
> "keeps",
> "kept",
> "know",
> "knows",
> "known",
> "l",
> "last",
> "lately",
> "later",
> "latter",
> "latterly",
> "least",
> "less",
> "lest",
> "let",
> "like",
> "liked",
> "likely",
> "little",
> "look",
> "looking",
> "looks",
> "ltd",
> "m",
> "mainly",
> "many",
> "may",
> "maybe",
> "me",
> "mean",
> "meanwhile",
> "merely",
> "might",
> "more",
> "moreover",
> "most",
> "mostly",
> "much",
> "must",
> "my",
> "myself",
> "n",
> "name",
> "namely",
> "nd",
> "near",
> "nearly",
> "necessary",
> "need",
> "needs",
> "neither",
> "never",
> "nevertheless",
> "new",
> "next",
> "nine",
> "no",
> "nobody",
> "non",
> "none",
> "noone",
> "nor",
> "normally",
> "not",
> "nothing",
> "novel",
> "now",
> "nowhere",
> "o",
> "obviously",
> "of",
> "off",
> "often",
> "oh",
> "ok",
> "okay",
> "old",
> "on",
> "once",
> "one",
> "ones",
> "only",
> "onto",
> "or",
> "other",
> "others",
> "otherwise",
> "ought",
> "our",
> "ours",
> "ourselves",
> "out",
> "outside",
> "over",
> "overall",
> "own",
> "p",
> "particular",
> "particularly",
> "per",
> "perhaps",
> "placed",
> "please",
> "plus",
> "possible",
> "presumably",
> "probably",
> "provides",
> "q",
> "que",
> "quite",
> "qv",
> "r",
> "rather",
> "rd",
> "re",
> "really",
> "reasonably",
> "regarding",
> "regardless",
> "regards",
> "relatively",
> "respectively",
> "right",
> "s",
> "said",
> "same",
> "saw",
> "say",
> "saying",
> "says",
> "second",
> "secondly",
> "see",
> "seeing",
> "seem",
> "seemed",
> "seeming",
> "seems",
> "seen",
> "self",
> "selves",
> "sensible",
> "sent",
> "serious",
> "seriously",
> "seven",
> "several",
> "shall",
> "she",
> "should",
> "since",
> "six",
> "so",
> "some",
> "somebody",
> "somehow",
> "someone",
> "something",
> "sometime",
> "sometimes",
> "somewhat",
> "somewhere",
> "soon",
> "sorry",
> "specified",
> "specify",
> "specifying",
> "still",
> "sub",
> "such",
> "sup",
> "sure",
> "t",
> "take",
> "taken",
> "tell",
> "tends",
> "th",
> "than",
> "thank",
> "thanks",
> "thanx",
> "that",
> "thats",
> "the",
> "their",
> "theirs",
> "them",
> "themselves",
> "then",
> "thence",
> "there",
> "thereafter",
> "thereby",
> "therefore",
> "therein",
> "theres",
> "thereupon",
> "these",
> "they",
> "think",
> "third",
> "this",
> "thorough",
> "thoroughly",
> "those",
> "though",
> "three",
> "through",
> "throughout",
> "thru",
> "thus",
> "to",
> "together",
> "too",
> "took",
> "toward",
> "towards",
> "tried",
> "tries",
> "truly",
> "try",
> "trying",
> "twice",
> "two",
> "u",
> "un",
> "under",
> "unfortunately",
> "unless",
> "unlikely",
> "until",
> "unto",
> "up",
> "upon",
> "us",
> "use",
> "used",
> "useful",
> "uses",
> "using",
> "usually",
> "uucp",
> "v",
> "value",
> "various",
> "very",
> "via",
> "viz",
> "vs",
> "w",
> "want",
> "wants",
> "was",
> "way",
> "we",
> "welcome",
> "well",
> "went",
> "were",
> "what",
> "whatever",
> "when",
> "whence",
> "whenever",
> "where",
> "whereafter",
> "whereas",
> "whereby",
> "wherein",
> "whereupon",
> "wherever",
> "whether",
> "which",
> "while",
> "whither",
> "who",
> "whoever",
> "whole",
> "whom",
> "whose",
> "why",
> "will",
> "willing",
> "wish",
> "with",
> "within",
> "without",
> "wonder",
> "would",
> "would",
> "x",
> "y",
> "yes",
> "yet",
> "you",
> "your",
> "yours",
> "yourself",
> "yourselves",
> "z",
> "zero"
> };
>
>
> --
> To unsubscribe, e-mail:
> <ma...@jakarta.apache.org>
> For additional commands, e-mail:
> <ma...@jakarta.apache.org>
>
__________________________________________________
Do you Yahoo!?
Faith Hill - Exclusive Performances, Videos & More
http://faith.yahoo.com
--
To unsubscribe, e-mail: <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>