You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2013/12/03 22:41:05 UTC

[2/9] PORTED KStemmer and KStemFilter

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1a18965e/src/contrib/Analyzers/En/KStemData8.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/En/KStemData8.cs b/src/contrib/Analyzers/En/KStemData8.cs
new file mode 100644
index 0000000..99a7a82
--- /dev/null
+++ b/src/contrib/Analyzers/En/KStemData8.cs
@@ -0,0 +1,655 @@
+/*
+Copyright © 2003,
+Center for Intelligent Information Retrieval,
+University of Massachusetts, Amherst.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+3. The names "Center for Intelligent Information Retrieval" and
+"University of Massachusetts" must not be used to endorse or promote products
+derived from this software without prior written permission. To obtain
+permission, contact info@ciir.cs.umass.edu.
+
+THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
+*/
+/* This is a C# version of Bob Krovetz' KStem.
+ *
+ * Ported from the Java version by Sergio Guzman-Lara.
+ * CIIR-UMass Amherst http://ciir.cs.umass.edu
+ */
+
+using System;
+
+namespace Lucene.Net.Analysis.En
+{
+    /** A list of words used by Kstem
+     */
+    internal class KStemData8
+    {
+        private KStemData8()
+        {
+        }
+
+        internal static readonly String[] data =
+            {
+                "tenor", "tenpin", "tense", "tensile", "tension",
+                "tent", "tentacle", "tentative", "tenterhooks", "tenuity",
+                "tenuous", "tenure", "tepee", "tepid", "tequila",
+                "tercentenary", "tercentennial", "term", "termagant", "terminable",
+                "terminal", "terminate", "termination", "terminology", "terminus",
+                "termite", "terms", "tern", "terpsichorean", "terrace",
+                "terracotta", "terrain", "terrapin", "terrestrial", "terrible",
+                "terribly", "terrier", "terrific", "terrifically", "terrify",
+                "territorial", "territory", "terror", "terrorise", "terrorism",
+                "terrorize", "terrycloth", "terse", "tertian", "tertiary",
+                "terylene", "tessellated", "test", "testament", "testamentary",
+                "testate", "testator", "tester", "testicle", "testify",
+                "testimonial", "testimony", "testis", "testy", "tetanus",
+                "tetchy", "tether", "teutonic", "text", "textbook",
+                "textile", "textual", "texture", "thalidomide", "than",
+                "thane", "thank", "thankful", "thankless", "thanks",
+                "thanksgiving", "thankyou", "that", "thatch", "thaw",
+                "the", "theater", "theatergoer", "theatre", "theatregoer",
+                "theatrical", "theatricals", "thee", "theft", "thegn",
+                "their", "theirs", "theism", "them", "theme",
+                "themselves", "then", "thence", "thenceforth", "theocracy",
+                "theocratic", "theodolite", "theologian", "theology", "theorem",
+                "theoretical", "theoretically", "theorise", "theorist", "theorize",
+                "theory", "theosophy", "therapeutic", "therapeutics", "therapist",
+                "therapy", "there", "thereabouts", "thereafter", "thereby",
+                "therefore", "therein", "thereinafter", "thereof", "thereon",
+                "thereto", "thereunder", "thereupon", "therm", "thermal",
+                "thermionic", "thermionics", "thermodynamics", "thermometer", "thermonuclear",
+                "thermoplastic", "thermos", "thermosetting", "thermostat", "thesaurus",
+                "these", "thesis", "thespian", "thews", "they",
+                "thick", "thicken", "thickener", "thicket", "thickheaded",
+                "thickness", "thickset", "thief", "thieve", "thieving",
+                "thievish", "thigh", "thimble", "thimbleful", "thin",
+                "thine", "thing", "thingamajig", "thingamujig", "things",
+                "think", "thinkable", "thinking", "thinner", "third",
+                "thirst", "thirsty", "thirteen", "thirty", "this",
+                "thistle", "thistledown", "thither", "thole", "thong",
+                "thorax", "thorn", "thorny", "thorough", "thoroughbred",
+                "thoroughfare", "thoroughgoing", "those", "thou", "though",
+                "thought", "thoughtful", "thoughtless", "thousand", "thraldom",
+                "thrall", "thralldom", "thrash", "thrashing", "thread",
+                "threadbare", "threadlike", "threat", "threaten", "three",
+                "threepence", "threnody", "thresh", "thresher", "threshold",
+                "threw", "thrice", "thrift", "thrifty", "thrill",
+                "thriller", "thrive", "throat", "throaty", "throb",
+                "throes", "thrombosis", "throne", "throng", "throstle",
+                "throttle", "through", "throughout", "throughput", "throughway",
+                "throw", "throwaway", "throwback", "thru", "thrum",
+                "thrush", "thrust", "thruster", "thruway", "thud",
+                "thug", "thuggery", "thumb", "thumbnail", "thumbscrew",
+                "thumbtack", "thump", "thumping", "thunder", "thunderbolt",
+                "thunderclap", "thundercloud", "thundering", "thunderous", "thunderstorm",
+                "thunderstruck", "thundery", "thurible", "thursday", "thus",
+                "thwack", "thwart", "thy", "thyme", "thyroid",
+                "thyself", "tiara", "tibia", "tic", "tick",
+                "ticker", "tickertape", "ticket", "ticking", "tickle",
+                "tickler", "ticklish", "tidal", "tidbit", "tiddler",
+                "tiddley", "tiddleywinks", "tiddly", "tiddlywinks", "tide",
+                "tidemark", "tidewater", "tideway", "tidings", "tidy",
+                "tie", "tiebreaker", "tiepin", "tier", "tiff",
+                "tiffin", "tig", "tiger", "tigerish", "tight",
+                "tighten", "tightfisted", "tightrope", "tights", "tightwad",
+                "tigress", "tike", "tilde", "tile", "till",
+                "tillage", "tiller", "tilt", "timber", "timbered",
+                "timberline", "timbre", "timbrel", "time", "timekeeper",
+                "timeless", "timely", "timepiece", "timer", "times",
+                "timesaving", "timeserver", "timeserving", "timetable", "timework",
+                "timeworn", "timid", "timing", "timorous", "timothy",
+                "timpani", "timpanist", "tin", "tincture", "tinder",
+                "tinderbox", "tinfoil", "ting", "tingaling", "tinge",
+                "tingle", "tinker", "tinkle", "tinny", "tinplate",
+                "tinsel", "tint", "tintack", "tintinnabulation", "tiny",
+                "tip", "tippet", "tipple", "tipstaff", "tipster",
+                "tipsy", "tiptoe", "tirade", "tire", "tired",
+                "tireless", "tiresome", "tiro", "tissue", "tit",
+                "titan", "titanic", "titanium", "titbit", "titfer",
+                "tithe", "titillate", "titivate", "title", "titled",
+                "titleholder", "titmouse", "titter", "tittivate", "tittle",
+                "titty", "titular", "tizzy", "tnt", "toad",
+                "toadstool", "toady", "toast", "toaster", "toastmaster",
+                "tobacco", "tobacconist", "toboggan", "toccata", "tocsin",
+                "tod", "today", "toddle", "toddler", "toddy",
+                "toe", "toehold", "toenail", "toff", "toffee",
+                "toffy", "tog", "toga", "together", "togetherness",
+                "toggle", "togs", "toil", "toilet", "toiletries",
+                "toiletry", "toils", "tokay", "token", "told",
+                "tolerable", "tolerably", "tolerance", "tolerant", "tolerate",
+                "toleration", "toll", "tollgate", "tollhouse", "tomahawk",
+                "tomato", "tomb", "tombola", "tomboy", "tombstone",
+                "tomcat", "tome", "tomfoolery", "tommyrot", "tomorrow",
+                "tomtit", "ton", "tonal", "tonality", "tone",
+                "toneless", "tong", "tongs", "tongue", "tonic",
+                "tonight", "tonnage", "tonne", "tonsil", "tonsilitis",
+                "tonsillitis", "tonsorial", "tonsure", "tontine", "too",
+                "took", "tool", "toot", "tooth", "toothache",
+                "toothbrush", "toothcomb", "toothpaste", "toothpick", "toothsome",
+                "toothy", "tootle", "toots", "tootsie", "top",
+                "topaz", "topcoat", "topdressing", "topee", "topgallant",
+                "topi", "topiary", "topic", "topical", "topicality",
+                "topknot", "topless", "topmast", "topmost", "topographer",
+                "topographical", "topography", "topper", "topping", "topple",
+                "tops", "topsail", "topside", "topsoil", "topspin",
+                "toque", "tor", "torch", "torchlight", "tore",
+                "toreador", "torment", "tormentor", "torn", "tornado",
+                "torpedo", "torpid", "torpor", "torque", "torrent",
+                "torrential", "torrid", "torsion", "torso", "tort",
+                "tortilla", "tortoise", "tortoiseshell", "tortuous", "torture",
+                "tory", "toss", "tot", "total", "totalisator",
+                "totalitarian", "totalitarianism", "totality", "totalizator", "tote",
+                "totem", "totter", "tottery", "toucan", "touch",
+                "touchdown", "touched", "touching", "touchline", "touchstone",
+                "touchy", "tough", "toughen", "toupee", "tour",
+                "tourism", "tourist", "tournament", "tourney", "tourniquet",
+                "tousle", "tout", "tow", "towards", "towel",
+                "toweling", "towelling", "tower", "towering", "towline",
+                "town", "townscape", "township", "townsman", "townspeople",
+                "towpath", "toxaemia", "toxemia", "toxic", "toxicologist",
+                "toxicology", "toxin", "toy", "toyshop", "trace",
+                "tracer", "tracery", "trachea", "trachoma", "tracing",
+                "track", "trackless", "tracksuit", "tract", "tractable",
+                "traction", "tractor", "trad", "trade", "trademark",
+                "trader", "trades", "tradesman", "tradespeople", "tradition",
+                "traditional", "traditionalism", "traduce", "traffic", "trafficator",
+                "trafficker", "tragedian", "tragedienne", "tragedy", "tragic",
+                "tragicomedy", "trail", "trailer", "train", "trainbearer",
+                "trainee", "training", "trainman", "traipse", "trait",
+                "traitor", "traitorous", "trajectory", "tram", "tramline",
+                "trammel", "trammels", "tramp", "trample", "trampoline",
+                "trance", "tranny", "tranquil", "tranquiliser", "tranquillise",
+                "tranquillize", "tranquillizer", "transact", "transaction", "transactions",
+                "transalpine", "transatlantic", "transcend", "transcendence", "transcendent",
+                "transcendental", "transcendentalism", "transcontinental", "transcribe", "transcript",
+                "transcription", "transept", "transfer", "transference", "transfiguration",
+                "transfigure", "transfix", "transform", "transformation", "transformer",
+                "transfuse", "transgress", "tranship", "transience", "transient",
+                "transistor", "transistorise", "transistorize", "transit", "transition",
+                "transitive", "translate", "translator", "transliterate", "translucence",
+                "translucent", "transmigration", "transmission", "transmit", "transmitter",
+                "transmogrify", "transmute", "transoceanic", "transom", "transparency",
+                "transparent", "transpiration", "transpire", "transplant", "transpolar",
+                "transport", "transportation", "transporter", "transpose", "transship",
+                "transubstantiation", "transverse", "transvestism", "transvestite", "trap",
+                "trapdoor", "trapeze", "trapezium", "trapezoid", "trapper",
+                "trappings", "trappist", "trapse", "trapshooting", "trash",
+                "trashcan", "trashy", "trauma", "traumatic", "travail",
+                "travel", "traveled", "traveler", "travelled", "traveller",
+                "travelog", "travelogue", "travels", "travelsick", "traverse",
+                "travesty", "trawl", "trawler", "tray", "treacherous",
+                "treachery", "treacle", "treacly", "tread", "treadle",
+                "treadmill", "treason", "treasonable", "treasure", "treasurer",
+                "treasury", "treat", "treatise", "treatment", "treaty",
+                "treble", "tree", "trefoil", "trek", "trellis",
+                "tremble", "tremendous", "tremolo", "tremor", "tremulous",
+                "trench", "trenchant", "trencher", "trencherman", "trend",
+                "trendsetter", "trendy", "trepan", "trephine", "trepidation",
+                "trespass", "tresses", "trestle", "trews", "triad",
+                "trial", "triangle", "triangular", "tribal", "tribalism",
+                "tribe", "tribesman", "tribulation", "tribunal", "tribune",
+                "tributary", "tribute", "trice", "triceps", "trichinosis",
+                "trick", "trickery", "trickle", "trickster", "tricky",
+                "tricolor", "tricolour", "tricycle", "trident", "triennial",
+                "trier", "trifle", "trifler", "trifling", "trigger",
+                "trigonometry", "trike", "trilateral", "trilby", "trilingual",
+                "trill", "trillion", "trilobite", "trilogy", "trim",
+                "trimaran", "trimester", "trimmer", "trimming", "trinitrotoluene",
+                "trinity", "trinket", "trio", "trip", "tripartite",
+                "triple", "triplet", "triplex", "triplicate", "tripod",
+                "tripos", "tripper", "tripping", "triptych", "tripwire",
+                "trireme", "trisect", "trite", "triumph", "triumphal",
+                "triumphant", "triumvir", "triumvirate", "trivet", "trivia",
+                "trivial", "trivialise", "triviality", "trivialize", "trochaic",
+                "trochee", "trod", "trodden", "troglodyte", "troika",
+                "trojan", "troll", "trolley", "trolleybus", "trollop",
+                "trombone", "trombonist", "troop", "trooper", "troops",
+                "troopship", "trope", "trophy", "tropic", "tropical",
+                "tropics", "trot", "troth", "trotskyist", "trotter",
+                "troubadour", "trouble", "troublemaker", "troubleshooter", "troublesome",
+                "trough", "trounce", "troupe", "trouper", "trouser",
+                "trousers", "trousseau", "trout", "trove", "trowel",
+                "truancy", "truant", "truce", "truck", "trucking",
+                "truckle", "truculence", "truculent", "trudge", "true",
+                "trueborn", "truehearted", "truelove", "truffle", "trug",
+                "truism", "truly", "trump", "trumpery", "trumpet",
+                "trumps", "truncate", "truncheon", "trundle", "trunk",
+                "trunks", "truss", "trust", "trustee", "trusteeship",
+                "trustful", "trustworthy", "trusty", "truth", "truthful",
+                "try", "tryst", "tsar", "tsarina", "tsp",
+                "tub", "tuba", "tubby", "tube", "tubeless",
+                "tuber", "tubercular", "tuberculosis", "tubful", "tubing",
+                "tubular", "tuck", "tucker", "tuckerbag", "tuesday",
+                "tuft", "tug", "tugboat", "tuition", "tulip",
+                "tulle", "tumble", "tumbledown", "tumbler", "tumbleweed",
+                "tumbrel", "tumbril", "tumescent", "tumid", "tummy",
+                "tumor", "tumour", "tumult", "tumultuous", "tumulus",
+                "tun", "tuna", "tundra", "tune", "tuneful",
+                "tuneless", "tuner", "tungsten", "tunic", "tunnel",
+                "tunny", "tup", "tuppence", "tuppenny", "turban",
+                "turbid", "turbine", "turbojet", "turboprop", "turbot",
+                "turbulence", "turbulent", "turd", "tureen", "turf",
+                "turgid", "turkey", "turmeric", "turmoil", "turn",
+                "turnabout", "turncoat", "turncock", "turner", "turning",
+                "turnip", "turnkey", "turnout", "turnover", "turnpike",
+                "turnstile", "turntable", "turpentine", "turpitude", "turquoise",
+                "turret", "turtle", "turtledove", "turtleneck", "tush",
+                "tusk", "tusker", "tussle", "tussock", "tut",
+                "tutelage", "tutelary", "tutor", "tutorial", "tutu",
+                "tuxedo", "twaddle", "twain", "twang", "twat",
+                "tweak", "twee", "tweed", "tweeds", "tweedy",
+                "tweet", "tweeter", "tweezers", "twelfth", "twelve",
+                "twelvemonth", "twenty", "twerp", "twice", "twiddle",
+                "twig", "twilight", "twill", "twin", "twinge",
+                "twinkle", "twinkling", "twirl", "twirp", "twist",
+                "twister", "twit", "twitch", "twitter", "twixt",
+                "two", "twofaced", "twopence", "twopenny", "twosome",
+                "tycoon", "tyke", "tympanum", "type", "typecast",
+                "typeface", "typescript", "typesetter", "typewriter", "typewritten",
+                "typhoid", "typhoon", "typhus", "typical", "typically",
+                "typify", "typist", "typographer", "typographic", "typography",
+                "tyrannical", "tyrannise", "tyrannize", "tyrannosaurus", "tyranny",
+                "tyrant", "tyre", "tyro", "tzar", "tzarina",
+                "ubiquitous", "ucca", "udder", "ufo", "ugh",
+                "ugly", "uhf", "ukulele", "ulcer", "ulcerate",
+                "ulcerous", "ullage", "ulna", "ult", "ulterior",
+                "ultimate", "ultimately", "ultimatum", "ultimo", "ultramarine",
+                "ultrasonic", "ultraviolet", "umber", "umbrage", "umbrella",
+                "umlaut", "umpire", "umpteen", "unabashed", "unabated",
+                "unable", "unabridged", "unaccompanied", "unaccountable", "unaccustomed",
+                "unadopted", "unadulterated", "unadvised", "unaffected", "unalloyed",
+                "unanimous", "unannounced", "unanswerable", "unapproachable", "unarmed",
+                "unasked", "unassuming", "unattached", "unattended", "unavailing",
+                "unawares", "unbalance", "unbar", "unbearable", "unbearably",
+                "unbeknown", "unbelief", "unbelievable", "unbeliever", "unbelieving",
+                "unbend", "unbending", "unbidden", "unbind", "unblushing",
+                "unborn", "unbosom", "unbounded", "unbowed", "unbridled",
+                "unbuckle", "unburden", "unbuttoned", "uncanny", "unceremonious",
+                "uncertain", "uncertainty", "uncharitable", "uncharted", "unchecked",
+                "unchristian", "unclad", "uncle", "unclean", "unclouded",
+                "uncolored", "uncoloured", "uncomfortable", "uncommitted", "uncommonly",
+                "uncompromising", "unconcerned", "unconditional", "unconscionable", "unconscious",
+                "unconsidered", "uncork", "uncouple", "uncouth", "uncover",
+                "uncritical", "uncrowned", "uncrushable", "unction", "unctuous",
+                "uncut", "undaunted", "undeceive", "undecided", "undeclared",
+                "undeniable", "under", "underact", "underarm", "underbelly",
+                "underbrush", "undercarriage", "undercharge", "underclothes", "undercoat",
+                "undercover", "undercurrent", "undercut", "underdog", "underdone",
+                "underestimate", "underfelt", "underfloor", "underfoot", "undergarment",
+                "undergo", "undergraduate", "underground", "undergrowth", "underhand",
+                "underhanded", "underhung", "underlay", "underlie", "underline",
+                "underling", "underlying", "undermanned", "undermentioned", "undermine",
+                "underneath", "undernourish", "underpants", "underpass", "underpin",
+                "underplay", "underprivileged", "underproof", "underquote", "underrate",
+                "underscore", "undersecretary", "undersell", "undersexed", "undershirt",
+                "underside", "undersigned", "undersized", "underslung", "understaffed",
+                "understand", "understanding", "understate", "understatement", "understudy",
+                "undertake", "undertaker", "undertaking", "undertone", "undertow",
+                "underwater", "underwear", "underweight", "underwent", "underworld",
+                "underwrite", "underwriter", "undesirable", "undeveloped", "undies",
+                "undischarged", "undistinguished", "undivided", "undo", "undoing",
+                "undomesticated", "undone", "undoubted", "undress", "undressed",
+                "undue", "undulate", "undulation", "unduly", "undying",
+                "unearth", "unearthly", "unease", "uneasy", "uneconomic",
+                "uneducated", "unemployed", "unemployment", "unenlightened", "unenviable",
+                "unequal", "unequaled", "unequalled", "unequivocal", "unerring",
+                "unesco", "uneven", "uneventful", "unexampled", "unexceptionable",
+                "unfailing", "unfaithful", "unfaltering", "unfathomable", "unfathomed",
+                "unfavorable", "unfavourable", "unfeeling", "unfettered", "unfit",
+                "unflagging", "unflappable", "unflinching", "unfold", "unforeseen",
+                "unforgettable", "unfortunate", "unfortunately", "unfounded", "unfrequented",
+                "unfrock", "unfurl", "ungainly", "ungenerous", "ungodly",
+                "ungovernable", "ungracious", "ungrateful", "ungrudging", "unguarded",
+                "unguent", "unhallowed", "unhand", "unhappily", "unhappy",
+                "unhealthy", "unheard", "unhinge", "unholy", "unhook",
+                "unhorse", "unicef", "unicorn", "unidentified", "unification",
+                "uniform", "uniformed", "unify", "unilateral", "unimpeachable",
+                "uninformed", "uninhabitable", "uninhibited", "uninterested", "uninterrupted",
+                "union", "unionise", "unionism", "unionist", "unionize",
+                "unique", "unisex", "unison", "unit", "unitarian",
+                "unite", "united", "unity", "universal", "universally",
+                "universe", "university", "unkempt", "unkind", "unkindly",
+                "unknowing", "unknown", "unlawful", "unlearn", "unleash",
+                "unleavened", "unless", "unlettered", "unlike", "unlikely",
+                "unload", "unlock", "unloose", "unloosen", "unmade",
+                "unmannerly", "unmarried", "unmask", "unmatched", "unmeasured",
+                "unmentionable", "unmentionables", "unmindful", "unmistakable", "unmitigated",
+                "unmoved", "unnatural", "unnecessary", "unnerve", "unnumbered",
+                "uno", "unobtrusive", "unofficial", "unorthodox", "unpack",
+                "unparalleled", "unparliamentary", "unperson", "unpick", "unplaced",
+                "unplayable", "unpleasant", "unplumbed", "unpracticed", "unpractised",
+                "unprecedented", "unprejudiced", "unpretentious", "unprincipled", "unprintable",
+                "unprofessional", "unprompted", "unprovoked", "unqualified", "unquestionable",
+                "unquestioning", "unquiet", "unquote", "unravel", "unreadable",
+                "unreal", "unreasonable", "unreasoning", "unrelenting", "unrelieved",
+                "unremitting", "unrequited", "unreserved", "unrest", "unrestrained",
+                "unrip", "unrivaled", "unrivalled", "unroll", "unruffled",
+                "unruly", "unsaddle", "unsaid", "unsavory", "unsavoury",
+                "unsay", "unscathed", "unschooled", "unscramble", "unscrew",
+                "unscripted", "unscrupulous", "unseat", "unseeing", "unseemly",
+                "unseen", "unserviceable", "unsettle", "unsettled", "unsex",
+                "unsexed", "unshakable", "unshakeable", "unshod", "unsightly",
+                "unskilled", "unsociable", "unsocial", "unsophisticated", "unsound",
+                "unsparing", "unspeakable", "unspotted", "unstop", "unstrung",
+                "unstuck", "unstudied", "unsullied", "unsung", "unswerving",
+                "untangle", "untapped", "untenable", "unthinkable", "unthinking",
+                "untie", "until", "untimely", "untinged", "untiring",
+                "unto", "untold", "untouchable", "untoward", "untruth",
+                "untruthful", "untutored", "unused", "unusual", "unusually",
+                "unutterable", "unvarnished", "unveil", "unversed", "unvoiced",
+                "unwarranted", "unwed", "unwell", "unwieldy", "unwind",
+                "unwitting", "unwonted", "unzip", "upbeat", "upbraid",
+                "upbringing", "upcoming", "update", "upend", "upgrade",
+                "upheaval", "uphill", "uphold", "upholster", "upholsterer",
+                "upholstery", "upkeep", "upland", "uplift", "upon",
+                "upper", "uppercut", "uppermost", "uppish", "uppity",
+                "upright", "uprising", "uproar", "uproarious", "uproot",
+                "upset", "upshot", "upstage", "upstairs", "upstanding",
+                "upstart", "upstream", "upsurge", "upswing", "uptake",
+                "uptight", "uptown", "upturn", "upturned", "upward",
+                "upwards", "uranium", "uranus", "urban", "urbane",
+                "urbanise", "urbanize", "urchin", "urge", "urgent",
+                "uric", "urinal", "urinary", "urinate", "urine",
+                "urn", "usage", "use", "useful", "usefulness",
+                "useless", "user", "usher", "usherette", "ussr",
+                "usual", "usually", "usurer", "usurious", "usurp",
+                "usury", "utensil", "uterine", "uterus", "utilise",
+                "utilitarian", "utilitarianism", "utility", "utilize", "utmost",
+                "utopia", "utopian", "utter", "utterance", "utterly",
+                "uvula", "uvular", "uxorious", "vac", "vacancy",
+                "vacant", "vacate", "vacation", "vaccinate", "vaccination",
+                "vaccine", "vacillate", "vacuity", "vacuous", "vacuum",
+                "vagabond", "vagary", "vagina", "vaginal", "vagrancy",
+                "vagrant", "vague", "vain", "vainglorious", "vainglory",
+                "valance", "vale", "valediction", "valedictory", "valency",
+                "valentine", "valerian", "valet", "valetudinarian", "valiant",
+                "valiantly", "valid", "validate", "valise", "valley",
+                "valor", "valour", "valse", "valuable", "valuation",
+                "value", "valuer", "valve", "valvular", "vamoose",
+                "vamp", "vampire", "van", "vanadium", "vandal",
+                "vandalise", "vandalism", "vandalize", "vane", "vanguard",
+                "vanilla", "vanish", "vanity", "vanquish", "vantagepoint",
+                "vapid", "vapidity", "vapor", "vaporise", "vaporize",
+                "vaporous", "vapors", "vapour", "vapours", "variability",
+                "variable", "variance", "variant", "variation", "varicolored",
+                "varicoloured", "varicose", "varied", "variegated", "variegation",
+                "variety", "variform", "variorum", "various", "variously",
+                "varlet", "varmint", "varnish", "varsity", "vary",
+                "vascular", "vase", "vasectomy", "vaseline", "vassal",
+                "vassalage", "vast", "vastly", "vastness", "vat",
+                "vatican", "vaudeville", "vault", "vaulted", "vaulting",
+                "vaunt", "veal", "vector", "veer", "veg",
+                "vegan", "vegetable", "vegetarian", "vegetarianism", "vegetate",
+                "vegetation", "vehement", "vehicle", "vehicular", "veil",
+                "veiled", "vein", "veined", "veining", "velar",
+                "velarize", "veld", "veldt", "vellum", "velocipede",
+                "velocity", "velour", "velours", "velvet", "velveteen",
+                "velvety", "venal", "vend", "vendee", "vender",
+                "vendetta", "vendor", "veneer", "venerable", "venerate",
+                "venereal", "vengeance", "vengeful", "venial", "venison",
+                "venom", "venomous", "venous", "vent", "ventilate",
+                "ventilation", "ventilator", "ventricle", "ventriloquism", "ventriloquist",
+                "venture", "venturer", "venturesome", "venue", "veracious",
+                "veracity", "veranda", "verandah", "verb", "verbal",
+                "verbalise", "verbalize", "verbally", "verbatim", "verbena",
+                "verbiage", "verbose", "verbosity", "verdant", "verdict",
+                "verdigris", "verdure", "verge", "verger", "verify",
+                "verily", "verisimilitude", "veritable", "verity", "vermicelli",
+                "vermiculite", "vermiform", "vermifuge", "vermilion", "vermin",
+                "verminous", "vermouth", "vernacular", "vernal", "veronal",
+                "veronica", "verruca", "versatile", "verse", "versed",
+                "versification", "versify", "version", "verso", "versus",
+                "vertebra", "vertebrate", "vertex", "vertical", "vertiginous",
+                "vertigo", "verve", "very", "vesicle", "vesicular",
+                "vesper", "vespers", "vessel", "vest", "vestibule",
+                "vestige", "vestigial", "vestment", "vestry", "vestryman",
+                "vesture", "vet", "vetch", "veteran", "veterinary",
+                "veto", "vex", "vexation", "vexatious", "vhf",
+                "via", "viable", "viaduct", "vial", "viands",
+                "vibes", "vibrancy", "vibrant", "vibraphone", "vibrate",
+                "vibration", "vibrato", "vibrator", "vicar", "vicarage",
+                "vicarious", "vice", "vicelike", "viceregal", "vicereine",
+                "viceroy", "vicinity", "vicious", "vicissitudes", "victim",
+                "victimise", "victimize", "victor", "victorian", "victorious",
+                "victory", "victual", "victualer", "victualler", "victuals",
+                "vicuaa", "vicuana", "vide", "videlicet", "video",
+                "videotape", "vie", "view", "viewer", "viewfinder",
+                "viewless", "viewpoint", "vigil", "vigilance", "vigilant",
+                "vigilante", "vignette", "vigor", "vigorous", "vigour",
+                "viking", "vile", "vilification", "vilify", "villa",
+                "village", "villager", "villain", "villainies", "villainous",
+                "villainy", "villein", "villeinage", "villenage", "vim",
+                "vinaigrette", "vindicate", "vindication", "vindictive", "vine",
+                "vinegar", "vinegary", "vinery", "vineyard", "vino",
+                "vinous", "vintage", "vintner", "vinyl", "viol",
+                "viola", "violate", "violence", "violent", "violet",
+                "violin", "violoncello", "vip", "viper", "virago",
+                "virgin", "virginal", "virginals", "virginia", "virginity",
+                "virgo", "virgule", "virile", "virility", "virologist",
+                "virology", "virtu", "virtual", "virtually", "virtue",
+                "virtuosity", "virtuoso", "virtuous", "virulence", "virulent",
+                "virus", "visa", "visage", "viscera", "visceral",
+                "viscosity", "viscount", "viscountcy", "viscountess", "viscous",
+                "vise", "visibility", "visible", "visibly", "vision",
+                "visionary", "visit", "visitant", "visitation", "visiting",
+                "visitor", "visor", "vista", "visual", "visualise",
+                "visualize", "visually", "vital", "vitalise", "vitality",
+                "vitalize", "vitally", "vitals", "vitamin", "vitiate",
+                "viticulture", "vitreous", "vitrify", "vitriol", "vitriolic",
+                "vituperate", "vituperation", "vituperative", "vivace", "vivacious",
+                "vivarium", "vivid", "viviparous", "vivisect", "vivisection",
+                "vivisectionist", "vixen", "vixenish", "vizier", "vocab",
+                "vocabulary", "vocal", "vocalise", "vocalist", "vocalize",
+                "vocation", "vocational", "vocative", "vociferate", "vociferation",
+                "vociferous", "vodka", "vogue", "voice", "voiceless",
+                "void", "voile", "vol", "volatile", "volcanic",
+                "volcano", "vole", "volition", "volitional", "volley",
+                "volleyball", "volt", "voltage", "voluble", "volume",
+                "volumes", "voluminous", "voluntary", "volunteer", "voluptuary",
+                "voluptuous", "volute", "vomit", "voodoo", "voracious",
+                "vortex", "votary", "vote", "voter", "votive",
+                "vouch", "voucher", "vouchsafe", "vow", "vowel",
+                "voyage", "voyager", "voyages", "voyeur", "vtol",
+                "vulcanise", "vulcanite", "vulcanize", "vulgar", "vulgarian",
+                "vulgarise", "vulgarism", "vulgarity", "vulgarize", "vulgate",
+                "vulnerable", "vulpine", "vulture", "vulva", "wac",
+                "wack", "wacky", "wad", "wadding", "waddle",
+                "wade", "wader", "wadge", "wadi", "wady",
+                "wafer", "waffle", "waft", "wag", "wage",
+                "wager", "wages", "waggery", "waggish", "waggle",
+                "waggon", "waggoner", "waggonette", "wagon", "wagoner",
+                "wagonette", "wagtail", "waif", "wail", "wain",
+                "wainscot", "waist", "waistband", "waistcoat", "waistline",
+                "wait", "waiter", "waits", "waive", "waiver",
+                "wake", "wakeful", "waken", "waking", "walk",
+                "walkabout", "walkaway", "walker", "walking", "walkout",
+                "walkover", "wall", "walla", "wallaby", "wallah",
+                "wallet", "wallflower", "wallop", "walloping", "wallow",
+                "wallpaper", "walnut", "walrus", "waltz", "wampum",
+                "wan", "wand", "wander", "wanderer", "wandering",
+                "wanderings", "wanderlust", "wane", "wangle", "wank",
+                "wanker", "want", "wanting", "wanton", "wants",
+                "wapiti", "war", "warble", "warbler", "ward",
+                "warden", "warder", "wardrobe", "wardroom", "warehouse",
+                "wares", "warfare", "warhead", "warhorse", "warily",
+                "warlike", "warlock", "warlord", "warm", "warmonger",
+                "warmth", "warn", "warning", "warp", "warpath",
+                "warrant", "warrantee", "warrantor", "warranty", "warren",
+                "warrior", "warship", "wart", "warthog", "wartime",
+                "wary", "was", "wash", "washable", "washbasin",
+                "washboard", "washbowl", "washcloth", "washday", "washer",
+                "washerwoman", "washhouse", "washing", "washout", "washroom",
+                "washstand", "washwoman", "washy", "wasp", "waspish",
+                "wassail", "wast", "wastage", "waste", "wasteful",
+                "waster", "wastrel", "watch", "watchband", "watchdog",
+                "watches", "watchful", "watchmaker", "watchman", "watchtower",
+                "watchword", "water", "waterborne", "watercolor", "watercolour",
+                "watercourse", "watercress", "waterfall", "waterfowl", "waterfront",
+                "waterhole", "waterline", "waterlogged", "waterloo", "waterman",
+                "watermark", "watermelon", "watermill", "waterpower", "waterproof",
+                "waters", "watershed", "waterside", "waterspout", "watertight",
+                "waterway", "waterwheel", "waterwings", "waterworks", "watery",
+                "watt", "wattage", "wattle", "wave", "wavelength",
+                "waver", "wavy", "wax", "waxen", "waxworks",
+                "waxy", "way", "waybill", "wayfarer", "wayfaring",
+                "waylay", "ways", "wayside", "wayward", "weak",
+                "weaken", "weakling", "weakness", "weal", "weald",
+                "wealth", "wealthy", "wean", "weapon", "weaponry",
+                "wear", "wearing", "wearisome", "weary", "weasel",
+                "weather", "weatherboard", "weathercock", "weatherglass", "weatherman",
+                "weatherproof", "weathers", "weave", "weaver", "web",
+                "webbed", "webbing", "wed", "wedded", "wedding",
+                "wedge", "wedged", "wedgwood", "wedlock", "wednesday",
+                "wee", "weed", "weeds", "weedy", "week",
+                "weekday", "weekend", "weekender", "weekly", "weeknight",
+                "weeny", "weep", "weeping", "weepy", "weevil",
+                "weft", "weigh", "weighbridge", "weight", "weighted",
+                "weighting", "weightless", "weighty", "weir", "weird",
+                "weirdie", "weirdo", "welch", "welcome", "weld",
+                "welder", "welfare", "welkin", "well", "wellbeing",
+                "wellborn", "wellington", "wellspring", "welsh", "welt",
+                "weltanschauung", "welter", "welterweight", "wen", "wench",
+                "wend", "wensleydale", "went", "wept", "were",
+                "werewolf", "wert", "wesleyan", "west", "westbound",
+                "westerly", "western", "westerner", "westernise", "westernize",
+                "westernmost", "westward", "westwards", "wet", "wether",
+                "wetting", "whack", "whacked", "whacker", "whacking",
+                "whale", "whalebone", "whaler", "whaling", "wham",
+                "wharf", "what", "whatever", "whatnot", "wheat",
+                "wheaten", "wheedle", "wheel", "wheelbarrow", "wheelbase",
+                "wheelchair", "wheelhouse", "wheeling", "wheels", "wheelwright",
+                "wheeze", "wheezy", "whelk", "whelp", "when",
+                "whence", "whenever", "where", "whereabouts", "whereas",
+                "whereat", "whereby", "wherefore", "wherefores", "wherein",
+                "whereof", "whereon", "wheresoever", "whereto", "whereupon",
+                "wherever", "wherewithal", "wherry", "whet", "whether",
+                "whetstone", "whew", "whey", "which", "whichever",
+                "whiff", "whiffy", "whig", "while", "whim",
+                "whimper", "whimsey", "whimsical", "whimsicality", "whimsy",
+                "whin", "whine", "whiner", "whinny", "whip",
+                "whipcord", "whiplash", "whippersnapper", "whippet", "whipping",
+                "whippoorwill", "whippy", "whir", "whirl", "whirligig",
+                "whirlpool", "whirlwind", "whirlybird", "whirr", "whisk",
+                "whisker", "whiskered", "whiskers", "whiskey", "whisky",
+                "whisper", "whist", "whistle", "whit", "white",
+                "whitebait", "whitehall", "whiten", "whitening", "whites",
+                "whitethorn", "whitethroat", "whitewash", "whither", "whiting",
+                "whitlow", "whitsun", "whitsuntide", "whittle", "whiz",
+                "whizz", "who", "whoa", "whodunit", "whoever",
+                "whole", "wholemeal", "wholesale", "wholesaler", "wholesome",
+                "wholly", "whom", "whoop", "whoopee", "whoosh",
+                "whop", "whopper", "whopping", "whore", "whorehouse",
+                "whoremonger", "whorl", "whortleberry", "whose", "whosoever",
+                "why", "whys", "wick", "wicked", "wicker",
+                "wickerwork", "wicket", "wide", "widely", "widen",
+                "widespread", "widgeon", "widow", "widowed", "widower",
+                "widowhood", "width", "wield", "wife", "wifely",
+                "wig", "wigged", "wigging", "wiggle", "wight",
+                "wigwam", "wilco", "wild", "wildcat", "wildebeest",
+                "wilderness", "wildfire", "wildfowl", "wildlife", "wildly",
+                "wile", "wiles", "wilful", "wiliness", "will",
+                "willful", "willies", "willing", "willow", "willowy",
+                "willpower", "wilt", "wily", "wimple", "wimpy",
+                "win", "wince", "winceyette", "winch", "wind",
+                "windbag", "windbreak", "windcheater", "windfall", "windily",
+                "winding", "windjammer", "windlass", "windless", "windmill",
+                "window", "windowpane", "windowsill", "windpipe", "windscreen",
+                "windshield", "windsock", "windstorm", "windswept", "windward",
+                "windy", "wine", "winebibbing", "wineglass", "winepress",
+                "wineskin", "wing", "winger", "wings", "wingspan",
+                "wink", "winkers", "winkle", "winner", "winning",
+                "winnings", "winnow", "winsome", "winter", "wintergreen",
+                "wintertime", "wintry", "wipe", "wiper", "wire",
+                "wirecutters", "wireless", "wiretap", "wireworm", "wiring",
+                "wiry", "wisdom", "wise", "wisecrack", "wish",
+                "wishbone", "wisp", "wispy", "wisteria", "wistful",
+                "wit", "witch", "witchcraft", "witchdoctor", "witchery",
+                "witching", "with", "withal", "withdraw", "withdrawal",
+                "withdrawn", "withe", "wither", "withering", "withers",
+                "withhold", "within", "without", "withstand", "withy",
+                "witless", "witness", "witticism", "witting", "witty",
+                "wives", "wizard", "wizardry", "wizened", "woad",
+                "wobble", "wobbly", "woe", "woebegone", "woeful",
+                "wog", "woke", "woken", "wold", "wolf",
+                "wolfhound", "wolfram", "wolfsbane", "woman", "womanhood",
+                "womanise", "womanish", "womanize", "womankind", "womanly",
+                "womb", "wombat", "womenfolk", "won", "wonder",
+                "wonderful", "wonderland", "wonderment", "wonders", "wondrous",
+                "wonky", "wont", "wonted", "woo", "wood",
+                "woodbine", "woodblock", "woodcock", "woodcraft", "woodcut",
+                "woodcutter", "wooded", "wooden", "woodenheaded", "woodland",
+                "woodlouse", "woodpecker", "woodpile", "woodshed", "woodsman",
+                "woodwind", "woodwork", "woodworm", "woody", "wooer",
+                "woof", "woofer", "wool", "woolen", "woolens",
+                "woolgather", "woolgathering", "woollen", "woollens", "woolly",
+                "woolsack", "woozy", "wop", "word", "wording",
+                "wordless", "wordplay", "words", "wordy", "wore",
+                "work", "workable", "workaday", "workbag", "workbasket",
+                "workbench", "workbook", "workday", "worker", "workhorse",
+                "workhouse", "working", "workings", "workman", "workmanlike",
+                "workmanship", "workout", "workpeople", "workroom", "works",
+                "workshop", "worktop", "world", "worldly", "worldshaking",
+                "worldwide", "worm", "wormhole", "wormwood", "wormy",
+                "worn", "worried", "worrisome", "worry", "worse",
+                "worsen", "worship", "worshipful", "worst", "worsted",
+                "wort", "worth", "worthless", "worthwhile", "worthy",
+                "wot", "wotcher", "would", "wouldst", "wound",
+                "wove", "woven", "wow", "wrac", "wrack",
+                "wraith", "wrangle", "wrangler", "wrap", "wrapper",
+                "wrapping", "wrath", "wreak", "wreath", "wreathe",
+                "wreck", "wreckage", "wrecker", "wren", "wrench",
+                "wrest", "wrestle", "wretch", "wretched", "wriggle",
+                "wright", "wring", "wringer", "wrinkle", "wrist",
+                "wristband", "wristlet", "wristwatch", "wristy", "writ",
+                "write", "writer", "writhe", "writing", "writings",
+                "written", "wrong", "wrongdoing", "wrongful", "wrongheaded",
+                "wrote", "wroth", "wrought", "wrung", "wry",
+                "wurst", "wyvern", "xenon", "xenophobia", "xerox",
+                "xylophone", "yacht", "yachting", "yachtsman", "yahoo",
+                "yak", "yam", "yammer", "yang", "yank",
+                "yankee", "yap", "yard", "yardage", "yardarm",
+                "yardstick", "yarn", "yarrow", "yashmak", "yaw",
+                "yawl", "yawn", "yaws", "yea", "yeah",
+                "year", "yearbook", "yearling", "yearlong", "yearly",
+                "yearn", "yearning", "years", "yeast", "yeasty",
+                "yell", "yellow", "yelp", "yen", "yeoman",
+                "yeomanry", "yes", "yesterday", "yet", "yeti",
+                "yew", "yid", "yiddish", "yield", "yielding",
+                "yin", "yippee", "yobbo", "yodel", "yoga",
+                "yoghurt", "yogi", "yogurt", "yoke", "yokel",
+                "yolk", "yonder", "yonks", "yore", "yorker",
+                "you", "young", "younger", "youngster", "your",
+                "yours", "yourself", "youth", "youthful", "yowl",
+                "yoyo", "yucca", "yule", "yuletide", "zany",
+                "zeal", "zealot", "zealotry", "zealous", "zebra",
+                "zebu", "zed", "zeitgeist", "zen", "zenana",
+                "zenith", "zephyr", "zeppelin", "zero", "zest",
+                "ziggurat", "zigzag", "zinc", "zinnia", "zionism",
+                "zip", "zipper", "zippy", "zither", "zizz",
+                "zodiac", "zombi", "zombie", "zonal", "zone",
+                "zoning", "zonked", "zoo", "zoologist", "zoology",
+                "zoom", "zoophyte", "zouave", "zucchini", "zulu",
+            };
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1a18965e/src/contrib/Analyzers/En/KStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/En/KStemFilter.cs b/src/contrib/Analyzers/En/KStemFilter.cs
new file mode 100644
index 0000000..32ba36b
--- /dev/null
+++ b/src/contrib/Analyzers/En/KStemFilter.cs
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis.Tokenattributes;
+
+namespace Lucene.Net.Analysis.En
+{
+    public class KStemFilter : TokenFilter
+    {
+        private readonly KStemmer stemmer = new KStemmer();
+        private readonly ITermAttribute termAttribute;
+        // private IKeywordAttribute keywordAtt; TODO
+
+        public KStemFilter(TokenStream input)
+            : base(input)
+        {
+            termAttribute = AddAttribute<ITermAttribute>();
+        }
+
+        public override bool IncrementToken()
+        {
+            if (!input.IncrementToken())
+                return false;
+
+            if (/*(!keywordAtt.isKeyword()) && */ stemmer.stem(termAttribute.Term, 0))
+            {
+                termAttribute.SetTermBuffer(stemmer.asString());
+            }
+
+            return true;
+        }
+
+    }
+}