You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2013/12/03 22:41:05 UTC
[2/9] PORTED KStemmer and KStemFilter
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1a18965e/src/contrib/Analyzers/En/KStemData8.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/En/KStemData8.cs b/src/contrib/Analyzers/En/KStemData8.cs
new file mode 100644
index 0000000..99a7a82
--- /dev/null
+++ b/src/contrib/Analyzers/En/KStemData8.cs
@@ -0,0 +1,655 @@
+/*
+Copyright © 2003,
+Center for Intelligent Information Retrieval,
+University of Massachusetts, Amherst.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+3. The names "Center for Intelligent Information Retrieval" and
+"University of Massachusetts" must not be used to endorse or promote products
+derived from this software without prior written permission. To obtain
+permission, contact info@ciir.cs.umass.edu.
+
+THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
+*/
+/* This is a C# version of Bob Krovetz' KStem.
+ *
+ * Ported from the Java version by Sergio Guzman-Lara.
+ * CIIR-UMass Amherst http://ciir.cs.umass.edu
+ */
+
+using System;
+
+namespace Lucene.Net.Analysis.En
+{
+ /** A list of words used by Kstem
+ */
+ internal class KStemData8
+ {
+ private KStemData8()
+ {
+ }
+
+ internal static readonly String[] data =
+ {
+ "tenor", "tenpin", "tense", "tensile", "tension",
+ "tent", "tentacle", "tentative", "tenterhooks", "tenuity",
+ "tenuous", "tenure", "tepee", "tepid", "tequila",
+ "tercentenary", "tercentennial", "term", "termagant", "terminable",
+ "terminal", "terminate", "termination", "terminology", "terminus",
+ "termite", "terms", "tern", "terpsichorean", "terrace",
+ "terracotta", "terrain", "terrapin", "terrestrial", "terrible",
+ "terribly", "terrier", "terrific", "terrifically", "terrify",
+ "territorial", "territory", "terror", "terrorise", "terrorism",
+ "terrorize", "terrycloth", "terse", "tertian", "tertiary",
+ "terylene", "tessellated", "test", "testament", "testamentary",
+ "testate", "testator", "tester", "testicle", "testify",
+ "testimonial", "testimony", "testis", "testy", "tetanus",
+ "tetchy", "tether", "teutonic", "text", "textbook",
+ "textile", "textual", "texture", "thalidomide", "than",
+ "thane", "thank", "thankful", "thankless", "thanks",
+ "thanksgiving", "thankyou", "that", "thatch", "thaw",
+ "the", "theater", "theatergoer", "theatre", "theatregoer",
+ "theatrical", "theatricals", "thee", "theft", "thegn",
+ "their", "theirs", "theism", "them", "theme",
+ "themselves", "then", "thence", "thenceforth", "theocracy",
+ "theocratic", "theodolite", "theologian", "theology", "theorem",
+ "theoretical", "theoretically", "theorise", "theorist", "theorize",
+ "theory", "theosophy", "therapeutic", "therapeutics", "therapist",
+ "therapy", "there", "thereabouts", "thereafter", "thereby",
+ "therefore", "therein", "thereinafter", "thereof", "thereon",
+ "thereto", "thereunder", "thereupon", "therm", "thermal",
+ "thermionic", "thermionics", "thermodynamics", "thermometer", "thermonuclear",
+ "thermoplastic", "thermos", "thermosetting", "thermostat", "thesaurus",
+ "these", "thesis", "thespian", "thews", "they",
+ "thick", "thicken", "thickener", "thicket", "thickheaded",
+ "thickness", "thickset", "thief", "thieve", "thieving",
+ "thievish", "thigh", "thimble", "thimbleful", "thin",
+ "thine", "thing", "thingamajig", "thingamujig", "things",
+ "think", "thinkable", "thinking", "thinner", "third",
+ "thirst", "thirsty", "thirteen", "thirty", "this",
+ "thistle", "thistledown", "thither", "thole", "thong",
+ "thorax", "thorn", "thorny", "thorough", "thoroughbred",
+ "thoroughfare", "thoroughgoing", "those", "thou", "though",
+ "thought", "thoughtful", "thoughtless", "thousand", "thraldom",
+ "thrall", "thralldom", "thrash", "thrashing", "thread",
+ "threadbare", "threadlike", "threat", "threaten", "three",
+ "threepence", "threnody", "thresh", "thresher", "threshold",
+ "threw", "thrice", "thrift", "thrifty", "thrill",
+ "thriller", "thrive", "throat", "throaty", "throb",
+ "throes", "thrombosis", "throne", "throng", "throstle",
+ "throttle", "through", "throughout", "throughput", "throughway",
+ "throw", "throwaway", "throwback", "thru", "thrum",
+ "thrush", "thrust", "thruster", "thruway", "thud",
+ "thug", "thuggery", "thumb", "thumbnail", "thumbscrew",
+ "thumbtack", "thump", "thumping", "thunder", "thunderbolt",
+ "thunderclap", "thundercloud", "thundering", "thunderous", "thunderstorm",
+ "thunderstruck", "thundery", "thurible", "thursday", "thus",
+ "thwack", "thwart", "thy", "thyme", "thyroid",
+ "thyself", "tiara", "tibia", "tic", "tick",
+ "ticker", "tickertape", "ticket", "ticking", "tickle",
+ "tickler", "ticklish", "tidal", "tidbit", "tiddler",
+ "tiddley", "tiddleywinks", "tiddly", "tiddlywinks", "tide",
+ "tidemark", "tidewater", "tideway", "tidings", "tidy",
+ "tie", "tiebreaker", "tiepin", "tier", "tiff",
+ "tiffin", "tig", "tiger", "tigerish", "tight",
+ "tighten", "tightfisted", "tightrope", "tights", "tightwad",
+ "tigress", "tike", "tilde", "tile", "till",
+ "tillage", "tiller", "tilt", "timber", "timbered",
+ "timberline", "timbre", "timbrel", "time", "timekeeper",
+ "timeless", "timely", "timepiece", "timer", "times",
+ "timesaving", "timeserver", "timeserving", "timetable", "timework",
+ "timeworn", "timid", "timing", "timorous", "timothy",
+ "timpani", "timpanist", "tin", "tincture", "tinder",
+ "tinderbox", "tinfoil", "ting", "tingaling", "tinge",
+ "tingle", "tinker", "tinkle", "tinny", "tinplate",
+ "tinsel", "tint", "tintack", "tintinnabulation", "tiny",
+ "tip", "tippet", "tipple", "tipstaff", "tipster",
+ "tipsy", "tiptoe", "tirade", "tire", "tired",
+ "tireless", "tiresome", "tiro", "tissue", "tit",
+ "titan", "titanic", "titanium", "titbit", "titfer",
+ "tithe", "titillate", "titivate", "title", "titled",
+ "titleholder", "titmouse", "titter", "tittivate", "tittle",
+ "titty", "titular", "tizzy", "tnt", "toad",
+ "toadstool", "toady", "toast", "toaster", "toastmaster",
+ "tobacco", "tobacconist", "toboggan", "toccata", "tocsin",
+ "tod", "today", "toddle", "toddler", "toddy",
+ "toe", "toehold", "toenail", "toff", "toffee",
+ "toffy", "tog", "toga", "together", "togetherness",
+ "toggle", "togs", "toil", "toilet", "toiletries",
+ "toiletry", "toils", "tokay", "token", "told",
+ "tolerable", "tolerably", "tolerance", "tolerant", "tolerate",
+ "toleration", "toll", "tollgate", "tollhouse", "tomahawk",
+ "tomato", "tomb", "tombola", "tomboy", "tombstone",
+ "tomcat", "tome", "tomfoolery", "tommyrot", "tomorrow",
+ "tomtit", "ton", "tonal", "tonality", "tone",
+ "toneless", "tong", "tongs", "tongue", "tonic",
+ "tonight", "tonnage", "tonne", "tonsil", "tonsilitis",
+ "tonsillitis", "tonsorial", "tonsure", "tontine", "too",
+ "took", "tool", "toot", "tooth", "toothache",
+ "toothbrush", "toothcomb", "toothpaste", "toothpick", "toothsome",
+ "toothy", "tootle", "toots", "tootsie", "top",
+ "topaz", "topcoat", "topdressing", "topee", "topgallant",
+ "topi", "topiary", "topic", "topical", "topicality",
+ "topknot", "topless", "topmast", "topmost", "topographer",
+ "topographical", "topography", "topper", "topping", "topple",
+ "tops", "topsail", "topside", "topsoil", "topspin",
+ "toque", "tor", "torch", "torchlight", "tore",
+ "toreador", "torment", "tormentor", "torn", "tornado",
+ "torpedo", "torpid", "torpor", "torque", "torrent",
+ "torrential", "torrid", "torsion", "torso", "tort",
+ "tortilla", "tortoise", "tortoiseshell", "tortuous", "torture",
+ "tory", "toss", "tot", "total", "totalisator",
+ "totalitarian", "totalitarianism", "totality", "totalizator", "tote",
+ "totem", "totter", "tottery", "toucan", "touch",
+ "touchdown", "touched", "touching", "touchline", "touchstone",
+ "touchy", "tough", "toughen", "toupee", "tour",
+ "tourism", "tourist", "tournament", "tourney", "tourniquet",
+ "tousle", "tout", "tow", "towards", "towel",
+ "toweling", "towelling", "tower", "towering", "towline",
+ "town", "townscape", "township", "townsman", "townspeople",
+ "towpath", "toxaemia", "toxemia", "toxic", "toxicologist",
+ "toxicology", "toxin", "toy", "toyshop", "trace",
+ "tracer", "tracery", "trachea", "trachoma", "tracing",
+ "track", "trackless", "tracksuit", "tract", "tractable",
+ "traction", "tractor", "trad", "trade", "trademark",
+ "trader", "trades", "tradesman", "tradespeople", "tradition",
+ "traditional", "traditionalism", "traduce", "traffic", "trafficator",
+ "trafficker", "tragedian", "tragedienne", "tragedy", "tragic",
+ "tragicomedy", "trail", "trailer", "train", "trainbearer",
+ "trainee", "training", "trainman", "traipse", "trait",
+ "traitor", "traitorous", "trajectory", "tram", "tramline",
+ "trammel", "trammels", "tramp", "trample", "trampoline",
+ "trance", "tranny", "tranquil", "tranquiliser", "tranquillise",
+ "tranquillize", "tranquillizer", "transact", "transaction", "transactions",
+ "transalpine", "transatlantic", "transcend", "transcendence", "transcendent",
+ "transcendental", "transcendentalism", "transcontinental", "transcribe", "transcript",
+ "transcription", "transept", "transfer", "transference", "transfiguration",
+ "transfigure", "transfix", "transform", "transformation", "transformer",
+ "transfuse", "transgress", "tranship", "transience", "transient",
+ "transistor", "transistorise", "transistorize", "transit", "transition",
+ "transitive", "translate", "translator", "transliterate", "translucence",
+ "translucent", "transmigration", "transmission", "transmit", "transmitter",
+ "transmogrify", "transmute", "transoceanic", "transom", "transparency",
+ "transparent", "transpiration", "transpire", "transplant", "transpolar",
+ "transport", "transportation", "transporter", "transpose", "transship",
+ "transubstantiation", "transverse", "transvestism", "transvestite", "trap",
+ "trapdoor", "trapeze", "trapezium", "trapezoid", "trapper",
+ "trappings", "trappist", "trapse", "trapshooting", "trash",
+ "trashcan", "trashy", "trauma", "traumatic", "travail",
+ "travel", "traveled", "traveler", "travelled", "traveller",
+ "travelog", "travelogue", "travels", "travelsick", "traverse",
+ "travesty", "trawl", "trawler", "tray", "treacherous",
+ "treachery", "treacle", "treacly", "tread", "treadle",
+ "treadmill", "treason", "treasonable", "treasure", "treasurer",
+ "treasury", "treat", "treatise", "treatment", "treaty",
+ "treble", "tree", "trefoil", "trek", "trellis",
+ "tremble", "tremendous", "tremolo", "tremor", "tremulous",
+ "trench", "trenchant", "trencher", "trencherman", "trend",
+ "trendsetter", "trendy", "trepan", "trephine", "trepidation",
+ "trespass", "tresses", "trestle", "trews", "triad",
+ "trial", "triangle", "triangular", "tribal", "tribalism",
+ "tribe", "tribesman", "tribulation", "tribunal", "tribune",
+ "tributary", "tribute", "trice", "triceps", "trichinosis",
+ "trick", "trickery", "trickle", "trickster", "tricky",
+ "tricolor", "tricolour", "tricycle", "trident", "triennial",
+ "trier", "trifle", "trifler", "trifling", "trigger",
+ "trigonometry", "trike", "trilateral", "trilby", "trilingual",
+ "trill", "trillion", "trilobite", "trilogy", "trim",
+ "trimaran", "trimester", "trimmer", "trimming", "trinitrotoluene",
+ "trinity", "trinket", "trio", "trip", "tripartite",
+ "triple", "triplet", "triplex", "triplicate", "tripod",
+ "tripos", "tripper", "tripping", "triptych", "tripwire",
+ "trireme", "trisect", "trite", "triumph", "triumphal",
+ "triumphant", "triumvir", "triumvirate", "trivet", "trivia",
+ "trivial", "trivialise", "triviality", "trivialize", "trochaic",
+ "trochee", "trod", "trodden", "troglodyte", "troika",
+ "trojan", "troll", "trolley", "trolleybus", "trollop",
+ "trombone", "trombonist", "troop", "trooper", "troops",
+ "troopship", "trope", "trophy", "tropic", "tropical",
+ "tropics", "trot", "troth", "trotskyist", "trotter",
+ "troubadour", "trouble", "troublemaker", "troubleshooter", "troublesome",
+ "trough", "trounce", "troupe", "trouper", "trouser",
+ "trousers", "trousseau", "trout", "trove", "trowel",
+ "truancy", "truant", "truce", "truck", "trucking",
+ "truckle", "truculence", "truculent", "trudge", "true",
+ "trueborn", "truehearted", "truelove", "truffle", "trug",
+ "truism", "truly", "trump", "trumpery", "trumpet",
+ "trumps", "truncate", "truncheon", "trundle", "trunk",
+ "trunks", "truss", "trust", "trustee", "trusteeship",
+ "trustful", "trustworthy", "trusty", "truth", "truthful",
+ "try", "tryst", "tsar", "tsarina", "tsp",
+ "tub", "tuba", "tubby", "tube", "tubeless",
+ "tuber", "tubercular", "tuberculosis", "tubful", "tubing",
+ "tubular", "tuck", "tucker", "tuckerbag", "tuesday",
+ "tuft", "tug", "tugboat", "tuition", "tulip",
+ "tulle", "tumble", "tumbledown", "tumbler", "tumbleweed",
+ "tumbrel", "tumbril", "tumescent", "tumid", "tummy",
+ "tumor", "tumour", "tumult", "tumultuous", "tumulus",
+ "tun", "tuna", "tundra", "tune", "tuneful",
+ "tuneless", "tuner", "tungsten", "tunic", "tunnel",
+ "tunny", "tup", "tuppence", "tuppenny", "turban",
+ "turbid", "turbine", "turbojet", "turboprop", "turbot",
+ "turbulence", "turbulent", "turd", "tureen", "turf",
+ "turgid", "turkey", "turmeric", "turmoil", "turn",
+ "turnabout", "turncoat", "turncock", "turner", "turning",
+ "turnip", "turnkey", "turnout", "turnover", "turnpike",
+ "turnstile", "turntable", "turpentine", "turpitude", "turquoise",
+ "turret", "turtle", "turtledove", "turtleneck", "tush",
+ "tusk", "tusker", "tussle", "tussock", "tut",
+ "tutelage", "tutelary", "tutor", "tutorial", "tutu",
+ "tuxedo", "twaddle", "twain", "twang", "twat",
+ "tweak", "twee", "tweed", "tweeds", "tweedy",
+ "tweet", "tweeter", "tweezers", "twelfth", "twelve",
+ "twelvemonth", "twenty", "twerp", "twice", "twiddle",
+ "twig", "twilight", "twill", "twin", "twinge",
+ "twinkle", "twinkling", "twirl", "twirp", "twist",
+ "twister", "twit", "twitch", "twitter", "twixt",
+ "two", "twofaced", "twopence", "twopenny", "twosome",
+ "tycoon", "tyke", "tympanum", "type", "typecast",
+ "typeface", "typescript", "typesetter", "typewriter", "typewritten",
+ "typhoid", "typhoon", "typhus", "typical", "typically",
+ "typify", "typist", "typographer", "typographic", "typography",
+ "tyrannical", "tyrannise", "tyrannize", "tyrannosaurus", "tyranny",
+ "tyrant", "tyre", "tyro", "tzar", "tzarina",
+ "ubiquitous", "ucca", "udder", "ufo", "ugh",
+ "ugly", "uhf", "ukulele", "ulcer", "ulcerate",
+ "ulcerous", "ullage", "ulna", "ult", "ulterior",
+ "ultimate", "ultimately", "ultimatum", "ultimo", "ultramarine",
+ "ultrasonic", "ultraviolet", "umber", "umbrage", "umbrella",
+ "umlaut", "umpire", "umpteen", "unabashed", "unabated",
+ "unable", "unabridged", "unaccompanied", "unaccountable", "unaccustomed",
+ "unadopted", "unadulterated", "unadvised", "unaffected", "unalloyed",
+ "unanimous", "unannounced", "unanswerable", "unapproachable", "unarmed",
+ "unasked", "unassuming", "unattached", "unattended", "unavailing",
+ "unawares", "unbalance", "unbar", "unbearable", "unbearably",
+ "unbeknown", "unbelief", "unbelievable", "unbeliever", "unbelieving",
+ "unbend", "unbending", "unbidden", "unbind", "unblushing",
+ "unborn", "unbosom", "unbounded", "unbowed", "unbridled",
+ "unbuckle", "unburden", "unbuttoned", "uncanny", "unceremonious",
+ "uncertain", "uncertainty", "uncharitable", "uncharted", "unchecked",
+ "unchristian", "unclad", "uncle", "unclean", "unclouded",
+ "uncolored", "uncoloured", "uncomfortable", "uncommitted", "uncommonly",
+ "uncompromising", "unconcerned", "unconditional", "unconscionable", "unconscious",
+ "unconsidered", "uncork", "uncouple", "uncouth", "uncover",
+ "uncritical", "uncrowned", "uncrushable", "unction", "unctuous",
+ "uncut", "undaunted", "undeceive", "undecided", "undeclared",
+ "undeniable", "under", "underact", "underarm", "underbelly",
+ "underbrush", "undercarriage", "undercharge", "underclothes", "undercoat",
+ "undercover", "undercurrent", "undercut", "underdog", "underdone",
+ "underestimate", "underfelt", "underfloor", "underfoot", "undergarment",
+ "undergo", "undergraduate", "underground", "undergrowth", "underhand",
+ "underhanded", "underhung", "underlay", "underlie", "underline",
+ "underling", "underlying", "undermanned", "undermentioned", "undermine",
+ "underneath", "undernourish", "underpants", "underpass", "underpin",
+ "underplay", "underprivileged", "underproof", "underquote", "underrate",
+ "underscore", "undersecretary", "undersell", "undersexed", "undershirt",
+ "underside", "undersigned", "undersized", "underslung", "understaffed",
+ "understand", "understanding", "understate", "understatement", "understudy",
+ "undertake", "undertaker", "undertaking", "undertone", "undertow",
+ "underwater", "underwear", "underweight", "underwent", "underworld",
+ "underwrite", "underwriter", "undesirable", "undeveloped", "undies",
+ "undischarged", "undistinguished", "undivided", "undo", "undoing",
+ "undomesticated", "undone", "undoubted", "undress", "undressed",
+ "undue", "undulate", "undulation", "unduly", "undying",
+ "unearth", "unearthly", "unease", "uneasy", "uneconomic",
+ "uneducated", "unemployed", "unemployment", "unenlightened", "unenviable",
+ "unequal", "unequaled", "unequalled", "unequivocal", "unerring",
+ "unesco", "uneven", "uneventful", "unexampled", "unexceptionable",
+ "unfailing", "unfaithful", "unfaltering", "unfathomable", "unfathomed",
+ "unfavorable", "unfavourable", "unfeeling", "unfettered", "unfit",
+ "unflagging", "unflappable", "unflinching", "unfold", "unforeseen",
+ "unforgettable", "unfortunate", "unfortunately", "unfounded", "unfrequented",
+ "unfrock", "unfurl", "ungainly", "ungenerous", "ungodly",
+ "ungovernable", "ungracious", "ungrateful", "ungrudging", "unguarded",
+ "unguent", "unhallowed", "unhand", "unhappily", "unhappy",
+ "unhealthy", "unheard", "unhinge", "unholy", "unhook",
+ "unhorse", "unicef", "unicorn", "unidentified", "unification",
+ "uniform", "uniformed", "unify", "unilateral", "unimpeachable",
+ "uninformed", "uninhabitable", "uninhibited", "uninterested", "uninterrupted",
+ "union", "unionise", "unionism", "unionist", "unionize",
+ "unique", "unisex", "unison", "unit", "unitarian",
+ "unite", "united", "unity", "universal", "universally",
+ "universe", "university", "unkempt", "unkind", "unkindly",
+ "unknowing", "unknown", "unlawful", "unlearn", "unleash",
+ "unleavened", "unless", "unlettered", "unlike", "unlikely",
+ "unload", "unlock", "unloose", "unloosen", "unmade",
+ "unmannerly", "unmarried", "unmask", "unmatched", "unmeasured",
+ "unmentionable", "unmentionables", "unmindful", "unmistakable", "unmitigated",
+ "unmoved", "unnatural", "unnecessary", "unnerve", "unnumbered",
+ "uno", "unobtrusive", "unofficial", "unorthodox", "unpack",
+ "unparalleled", "unparliamentary", "unperson", "unpick", "unplaced",
+ "unplayable", "unpleasant", "unplumbed", "unpracticed", "unpractised",
+ "unprecedented", "unprejudiced", "unpretentious", "unprincipled", "unprintable",
+ "unprofessional", "unprompted", "unprovoked", "unqualified", "unquestionable",
+ "unquestioning", "unquiet", "unquote", "unravel", "unreadable",
+ "unreal", "unreasonable", "unreasoning", "unrelenting", "unrelieved",
+ "unremitting", "unrequited", "unreserved", "unrest", "unrestrained",
+ "unrip", "unrivaled", "unrivalled", "unroll", "unruffled",
+ "unruly", "unsaddle", "unsaid", "unsavory", "unsavoury",
+ "unsay", "unscathed", "unschooled", "unscramble", "unscrew",
+ "unscripted", "unscrupulous", "unseat", "unseeing", "unseemly",
+ "unseen", "unserviceable", "unsettle", "unsettled", "unsex",
+ "unsexed", "unshakable", "unshakeable", "unshod", "unsightly",
+ "unskilled", "unsociable", "unsocial", "unsophisticated", "unsound",
+ "unsparing", "unspeakable", "unspotted", "unstop", "unstrung",
+ "unstuck", "unstudied", "unsullied", "unsung", "unswerving",
+ "untangle", "untapped", "untenable", "unthinkable", "unthinking",
+ "untie", "until", "untimely", "untinged", "untiring",
+ "unto", "untold", "untouchable", "untoward", "untruth",
+ "untruthful", "untutored", "unused", "unusual", "unusually",
+ "unutterable", "unvarnished", "unveil", "unversed", "unvoiced",
+ "unwarranted", "unwed", "unwell", "unwieldy", "unwind",
+ "unwitting", "unwonted", "unzip", "upbeat", "upbraid",
+ "upbringing", "upcoming", "update", "upend", "upgrade",
+ "upheaval", "uphill", "uphold", "upholster", "upholsterer",
+ "upholstery", "upkeep", "upland", "uplift", "upon",
+ "upper", "uppercut", "uppermost", "uppish", "uppity",
+ "upright", "uprising", "uproar", "uproarious", "uproot",
+ "upset", "upshot", "upstage", "upstairs", "upstanding",
+ "upstart", "upstream", "upsurge", "upswing", "uptake",
+ "uptight", "uptown", "upturn", "upturned", "upward",
+ "upwards", "uranium", "uranus", "urban", "urbane",
+ "urbanise", "urbanize", "urchin", "urge", "urgent",
+ "uric", "urinal", "urinary", "urinate", "urine",
+ "urn", "usage", "use", "useful", "usefulness",
+ "useless", "user", "usher", "usherette", "ussr",
+ "usual", "usually", "usurer", "usurious", "usurp",
+ "usury", "utensil", "uterine", "uterus", "utilise",
+ "utilitarian", "utilitarianism", "utility", "utilize", "utmost",
+ "utopia", "utopian", "utter", "utterance", "utterly",
+ "uvula", "uvular", "uxorious", "vac", "vacancy",
+ "vacant", "vacate", "vacation", "vaccinate", "vaccination",
+ "vaccine", "vacillate", "vacuity", "vacuous", "vacuum",
+ "vagabond", "vagary", "vagina", "vaginal", "vagrancy",
+ "vagrant", "vague", "vain", "vainglorious", "vainglory",
+ "valance", "vale", "valediction", "valedictory", "valency",
+ "valentine", "valerian", "valet", "valetudinarian", "valiant",
+ "valiantly", "valid", "validate", "valise", "valley",
+ "valor", "valour", "valse", "valuable", "valuation",
+ "value", "valuer", "valve", "valvular", "vamoose",
+ "vamp", "vampire", "van", "vanadium", "vandal",
+ "vandalise", "vandalism", "vandalize", "vane", "vanguard",
+ "vanilla", "vanish", "vanity", "vanquish", "vantagepoint",
+ "vapid", "vapidity", "vapor", "vaporise", "vaporize",
+ "vaporous", "vapors", "vapour", "vapours", "variability",
+ "variable", "variance", "variant", "variation", "varicolored",
+ "varicoloured", "varicose", "varied", "variegated", "variegation",
+ "variety", "variform", "variorum", "various", "variously",
+ "varlet", "varmint", "varnish", "varsity", "vary",
+ "vascular", "vase", "vasectomy", "vaseline", "vassal",
+ "vassalage", "vast", "vastly", "vastness", "vat",
+ "vatican", "vaudeville", "vault", "vaulted", "vaulting",
+ "vaunt", "veal", "vector", "veer", "veg",
+ "vegan", "vegetable", "vegetarian", "vegetarianism", "vegetate",
+ "vegetation", "vehement", "vehicle", "vehicular", "veil",
+ "veiled", "vein", "veined", "veining", "velar",
+ "velarize", "veld", "veldt", "vellum", "velocipede",
+ "velocity", "velour", "velours", "velvet", "velveteen",
+ "velvety", "venal", "vend", "vendee", "vender",
+ "vendetta", "vendor", "veneer", "venerable", "venerate",
+ "venereal", "vengeance", "vengeful", "venial", "venison",
+ "venom", "venomous", "venous", "vent", "ventilate",
+ "ventilation", "ventilator", "ventricle", "ventriloquism", "ventriloquist",
+ "venture", "venturer", "venturesome", "venue", "veracious",
+ "veracity", "veranda", "verandah", "verb", "verbal",
+ "verbalise", "verbalize", "verbally", "verbatim", "verbena",
+ "verbiage", "verbose", "verbosity", "verdant", "verdict",
+ "verdigris", "verdure", "verge", "verger", "verify",
+ "verily", "verisimilitude", "veritable", "verity", "vermicelli",
+ "vermiculite", "vermiform", "vermifuge", "vermilion", "vermin",
+ "verminous", "vermouth", "vernacular", "vernal", "veronal",
+ "veronica", "verruca", "versatile", "verse", "versed",
+ "versification", "versify", "version", "verso", "versus",
+ "vertebra", "vertebrate", "vertex", "vertical", "vertiginous",
+ "vertigo", "verve", "very", "vesicle", "vesicular",
+ "vesper", "vespers", "vessel", "vest", "vestibule",
+ "vestige", "vestigial", "vestment", "vestry", "vestryman",
+ "vesture", "vet", "vetch", "veteran", "veterinary",
+ "veto", "vex", "vexation", "vexatious", "vhf",
+ "via", "viable", "viaduct", "vial", "viands",
+ "vibes", "vibrancy", "vibrant", "vibraphone", "vibrate",
+ "vibration", "vibrato", "vibrator", "vicar", "vicarage",
+ "vicarious", "vice", "vicelike", "viceregal", "vicereine",
+ "viceroy", "vicinity", "vicious", "vicissitudes", "victim",
+ "victimise", "victimize", "victor", "victorian", "victorious",
+ "victory", "victual", "victualer", "victualler", "victuals",
+ "vicuaa", "vicuana", "vide", "videlicet", "video",
+ "videotape", "vie", "view", "viewer", "viewfinder",
+ "viewless", "viewpoint", "vigil", "vigilance", "vigilant",
+ "vigilante", "vignette", "vigor", "vigorous", "vigour",
+ "viking", "vile", "vilification", "vilify", "villa",
+ "village", "villager", "villain", "villainies", "villainous",
+ "villainy", "villein", "villeinage", "villenage", "vim",
+ "vinaigrette", "vindicate", "vindication", "vindictive", "vine",
+ "vinegar", "vinegary", "vinery", "vineyard", "vino",
+ "vinous", "vintage", "vintner", "vinyl", "viol",
+ "viola", "violate", "violence", "violent", "violet",
+ "violin", "violoncello", "vip", "viper", "virago",
+ "virgin", "virginal", "virginals", "virginia", "virginity",
+ "virgo", "virgule", "virile", "virility", "virologist",
+ "virology", "virtu", "virtual", "virtually", "virtue",
+ "virtuosity", "virtuoso", "virtuous", "virulence", "virulent",
+ "virus", "visa", "visage", "viscera", "visceral",
+ "viscosity", "viscount", "viscountcy", "viscountess", "viscous",
+ "vise", "visibility", "visible", "visibly", "vision",
+ "visionary", "visit", "visitant", "visitation", "visiting",
+ "visitor", "visor", "vista", "visual", "visualise",
+ "visualize", "visually", "vital", "vitalise", "vitality",
+ "vitalize", "vitally", "vitals", "vitamin", "vitiate",
+ "viticulture", "vitreous", "vitrify", "vitriol", "vitriolic",
+ "vituperate", "vituperation", "vituperative", "vivace", "vivacious",
+ "vivarium", "vivid", "viviparous", "vivisect", "vivisection",
+ "vivisectionist", "vixen", "vixenish", "vizier", "vocab",
+ "vocabulary", "vocal", "vocalise", "vocalist", "vocalize",
+ "vocation", "vocational", "vocative", "vociferate", "vociferation",
+ "vociferous", "vodka", "vogue", "voice", "voiceless",
+ "void", "voile", "vol", "volatile", "volcanic",
+ "volcano", "vole", "volition", "volitional", "volley",
+ "volleyball", "volt", "voltage", "voluble", "volume",
+ "volumes", "voluminous", "voluntary", "volunteer", "voluptuary",
+ "voluptuous", "volute", "vomit", "voodoo", "voracious",
+ "vortex", "votary", "vote", "voter", "votive",
+ "vouch", "voucher", "vouchsafe", "vow", "vowel",
+ "voyage", "voyager", "voyages", "voyeur", "vtol",
+ "vulcanise", "vulcanite", "vulcanize", "vulgar", "vulgarian",
+ "vulgarise", "vulgarism", "vulgarity", "vulgarize", "vulgate",
+ "vulnerable", "vulpine", "vulture", "vulva", "wac",
+ "wack", "wacky", "wad", "wadding", "waddle",
+ "wade", "wader", "wadge", "wadi", "wady",
+ "wafer", "waffle", "waft", "wag", "wage",
+ "wager", "wages", "waggery", "waggish", "waggle",
+ "waggon", "waggoner", "waggonette", "wagon", "wagoner",
+ "wagonette", "wagtail", "waif", "wail", "wain",
+ "wainscot", "waist", "waistband", "waistcoat", "waistline",
+ "wait", "waiter", "waits", "waive", "waiver",
+ "wake", "wakeful", "waken", "waking", "walk",
+ "walkabout", "walkaway", "walker", "walking", "walkout",
+ "walkover", "wall", "walla", "wallaby", "wallah",
+ "wallet", "wallflower", "wallop", "walloping", "wallow",
+ "wallpaper", "walnut", "walrus", "waltz", "wampum",
+ "wan", "wand", "wander", "wanderer", "wandering",
+ "wanderings", "wanderlust", "wane", "wangle", "wank",
+ "wanker", "want", "wanting", "wanton", "wants",
+ "wapiti", "war", "warble", "warbler", "ward",
+ "warden", "warder", "wardrobe", "wardroom", "warehouse",
+ "wares", "warfare", "warhead", "warhorse", "warily",
+ "warlike", "warlock", "warlord", "warm", "warmonger",
+ "warmth", "warn", "warning", "warp", "warpath",
+ "warrant", "warrantee", "warrantor", "warranty", "warren",
+ "warrior", "warship", "wart", "warthog", "wartime",
+ "wary", "was", "wash", "washable", "washbasin",
+ "washboard", "washbowl", "washcloth", "washday", "washer",
+ "washerwoman", "washhouse", "washing", "washout", "washroom",
+ "washstand", "washwoman", "washy", "wasp", "waspish",
+ "wassail", "wast", "wastage", "waste", "wasteful",
+ "waster", "wastrel", "watch", "watchband", "watchdog",
+ "watches", "watchful", "watchmaker", "watchman", "watchtower",
+ "watchword", "water", "waterborne", "watercolor", "watercolour",
+ "watercourse", "watercress", "waterfall", "waterfowl", "waterfront",
+ "waterhole", "waterline", "waterlogged", "waterloo", "waterman",
+ "watermark", "watermelon", "watermill", "waterpower", "waterproof",
+ "waters", "watershed", "waterside", "waterspout", "watertight",
+ "waterway", "waterwheel", "waterwings", "waterworks", "watery",
+ "watt", "wattage", "wattle", "wave", "wavelength",
+ "waver", "wavy", "wax", "waxen", "waxworks",
+ "waxy", "way", "waybill", "wayfarer", "wayfaring",
+ "waylay", "ways", "wayside", "wayward", "weak",
+ "weaken", "weakling", "weakness", "weal", "weald",
+ "wealth", "wealthy", "wean", "weapon", "weaponry",
+ "wear", "wearing", "wearisome", "weary", "weasel",
+ "weather", "weatherboard", "weathercock", "weatherglass", "weatherman",
+ "weatherproof", "weathers", "weave", "weaver", "web",
+ "webbed", "webbing", "wed", "wedded", "wedding",
+ "wedge", "wedged", "wedgwood", "wedlock", "wednesday",
+ "wee", "weed", "weeds", "weedy", "week",
+ "weekday", "weekend", "weekender", "weekly", "weeknight",
+ "weeny", "weep", "weeping", "weepy", "weevil",
+ "weft", "weigh", "weighbridge", "weight", "weighted",
+ "weighting", "weightless", "weighty", "weir", "weird",
+ "weirdie", "weirdo", "welch", "welcome", "weld",
+ "welder", "welfare", "welkin", "well", "wellbeing",
+ "wellborn", "wellington", "wellspring", "welsh", "welt",
+ "weltanschauung", "welter", "welterweight", "wen", "wench",
+ "wend", "wensleydale", "went", "wept", "were",
+ "werewolf", "wert", "wesleyan", "west", "westbound",
+ "westerly", "western", "westerner", "westernise", "westernize",
+ "westernmost", "westward", "westwards", "wet", "wether",
+ "wetting", "whack", "whacked", "whacker", "whacking",
+ "whale", "whalebone", "whaler", "whaling", "wham",
+ "wharf", "what", "whatever", "whatnot", "wheat",
+ "wheaten", "wheedle", "wheel", "wheelbarrow", "wheelbase",
+ "wheelchair", "wheelhouse", "wheeling", "wheels", "wheelwright",
+ "wheeze", "wheezy", "whelk", "whelp", "when",
+ "whence", "whenever", "where", "whereabouts", "whereas",
+ "whereat", "whereby", "wherefore", "wherefores", "wherein",
+ "whereof", "whereon", "wheresoever", "whereto", "whereupon",
+ "wherever", "wherewithal", "wherry", "whet", "whether",
+ "whetstone", "whew", "whey", "which", "whichever",
+ "whiff", "whiffy", "whig", "while", "whim",
+ "whimper", "whimsey", "whimsical", "whimsicality", "whimsy",
+ "whin", "whine", "whiner", "whinny", "whip",
+ "whipcord", "whiplash", "whippersnapper", "whippet", "whipping",
+ "whippoorwill", "whippy", "whir", "whirl", "whirligig",
+ "whirlpool", "whirlwind", "whirlybird", "whirr", "whisk",
+ "whisker", "whiskered", "whiskers", "whiskey", "whisky",
+ "whisper", "whist", "whistle", "whit", "white",
+ "whitebait", "whitehall", "whiten", "whitening", "whites",
+ "whitethorn", "whitethroat", "whitewash", "whither", "whiting",
+ "whitlow", "whitsun", "whitsuntide", "whittle", "whiz",
+ "whizz", "who", "whoa", "whodunit", "whoever",
+ "whole", "wholemeal", "wholesale", "wholesaler", "wholesome",
+ "wholly", "whom", "whoop", "whoopee", "whoosh",
+ "whop", "whopper", "whopping", "whore", "whorehouse",
+ "whoremonger", "whorl", "whortleberry", "whose", "whosoever",
+ "why", "whys", "wick", "wicked", "wicker",
+ "wickerwork", "wicket", "wide", "widely", "widen",
+ "widespread", "widgeon", "widow", "widowed", "widower",
+ "widowhood", "width", "wield", "wife", "wifely",
+ "wig", "wigged", "wigging", "wiggle", "wight",
+ "wigwam", "wilco", "wild", "wildcat", "wildebeest",
+ "wilderness", "wildfire", "wildfowl", "wildlife", "wildly",
+ "wile", "wiles", "wilful", "wiliness", "will",
+ "willful", "willies", "willing", "willow", "willowy",
+ "willpower", "wilt", "wily", "wimple", "wimpy",
+ "win", "wince", "winceyette", "winch", "wind",
+ "windbag", "windbreak", "windcheater", "windfall", "windily",
+ "winding", "windjammer", "windlass", "windless", "windmill",
+ "window", "windowpane", "windowsill", "windpipe", "windscreen",
+ "windshield", "windsock", "windstorm", "windswept", "windward",
+ "windy", "wine", "winebibbing", "wineglass", "winepress",
+ "wineskin", "wing", "winger", "wings", "wingspan",
+ "wink", "winkers", "winkle", "winner", "winning",
+ "winnings", "winnow", "winsome", "winter", "wintergreen",
+ "wintertime", "wintry", "wipe", "wiper", "wire",
+ "wirecutters", "wireless", "wiretap", "wireworm", "wiring",
+ "wiry", "wisdom", "wise", "wisecrack", "wish",
+ "wishbone", "wisp", "wispy", "wisteria", "wistful",
+ "wit", "witch", "witchcraft", "witchdoctor", "witchery",
+ "witching", "with", "withal", "withdraw", "withdrawal",
+ "withdrawn", "withe", "wither", "withering", "withers",
+ "withhold", "within", "without", "withstand", "withy",
+ "witless", "witness", "witticism", "witting", "witty",
+ "wives", "wizard", "wizardry", "wizened", "woad",
+ "wobble", "wobbly", "woe", "woebegone", "woeful",
+ "wog", "woke", "woken", "wold", "wolf",
+ "wolfhound", "wolfram", "wolfsbane", "woman", "womanhood",
+ "womanise", "womanish", "womanize", "womankind", "womanly",
+ "womb", "wombat", "womenfolk", "won", "wonder",
+ "wonderful", "wonderland", "wonderment", "wonders", "wondrous",
+ "wonky", "wont", "wonted", "woo", "wood",
+ "woodbine", "woodblock", "woodcock", "woodcraft", "woodcut",
+ "woodcutter", "wooded", "wooden", "woodenheaded", "woodland",
+ "woodlouse", "woodpecker", "woodpile", "woodshed", "woodsman",
+ "woodwind", "woodwork", "woodworm", "woody", "wooer",
+ "woof", "woofer", "wool", "woolen", "woolens",
+ "woolgather", "woolgathering", "woollen", "woollens", "woolly",
+ "woolsack", "woozy", "wop", "word", "wording",
+ "wordless", "wordplay", "words", "wordy", "wore",
+ "work", "workable", "workaday", "workbag", "workbasket",
+ "workbench", "workbook", "workday", "worker", "workhorse",
+ "workhouse", "working", "workings", "workman", "workmanlike",
+ "workmanship", "workout", "workpeople", "workroom", "works",
+ "workshop", "worktop", "world", "worldly", "worldshaking",
+ "worldwide", "worm", "wormhole", "wormwood", "wormy",
+ "worn", "worried", "worrisome", "worry", "worse",
+ "worsen", "worship", "worshipful", "worst", "worsted",
+ "wort", "worth", "worthless", "worthwhile", "worthy",
+ "wot", "wotcher", "would", "wouldst", "wound",
+ "wove", "woven", "wow", "wrac", "wrack",
+ "wraith", "wrangle", "wrangler", "wrap", "wrapper",
+ "wrapping", "wrath", "wreak", "wreath", "wreathe",
+ "wreck", "wreckage", "wrecker", "wren", "wrench",
+ "wrest", "wrestle", "wretch", "wretched", "wriggle",
+ "wright", "wring", "wringer", "wrinkle", "wrist",
+ "wristband", "wristlet", "wristwatch", "wristy", "writ",
+ "write", "writer", "writhe", "writing", "writings",
+ "written", "wrong", "wrongdoing", "wrongful", "wrongheaded",
+ "wrote", "wroth", "wrought", "wrung", "wry",
+ "wurst", "wyvern", "xenon", "xenophobia", "xerox",
+ "xylophone", "yacht", "yachting", "yachtsman", "yahoo",
+ "yak", "yam", "yammer", "yang", "yank",
+ "yankee", "yap", "yard", "yardage", "yardarm",
+ "yardstick", "yarn", "yarrow", "yashmak", "yaw",
+ "yawl", "yawn", "yaws", "yea", "yeah",
+ "year", "yearbook", "yearling", "yearlong", "yearly",
+ "yearn", "yearning", "years", "yeast", "yeasty",
+ "yell", "yellow", "yelp", "yen", "yeoman",
+ "yeomanry", "yes", "yesterday", "yet", "yeti",
+ "yew", "yid", "yiddish", "yield", "yielding",
+ "yin", "yippee", "yobbo", "yodel", "yoga",
+ "yoghurt", "yogi", "yogurt", "yoke", "yokel",
+ "yolk", "yonder", "yonks", "yore", "yorker",
+ "you", "young", "younger", "youngster", "your",
+ "yours", "yourself", "youth", "youthful", "yowl",
+ "yoyo", "yucca", "yule", "yuletide", "zany",
+ "zeal", "zealot", "zealotry", "zealous", "zebra",
+ "zebu", "zed", "zeitgeist", "zen", "zenana",
+ "zenith", "zephyr", "zeppelin", "zero", "zest",
+ "ziggurat", "zigzag", "zinc", "zinnia", "zionism",
+ "zip", "zipper", "zippy", "zither", "zizz",
+ "zodiac", "zombi", "zombie", "zonal", "zone",
+ "zoning", "zonked", "zoo", "zoologist", "zoology",
+ "zoom", "zoophyte", "zouave", "zucchini", "zulu",
+ };
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1a18965e/src/contrib/Analyzers/En/KStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/En/KStemFilter.cs b/src/contrib/Analyzers/En/KStemFilter.cs
new file mode 100644
index 0000000..32ba36b
--- /dev/null
+++ b/src/contrib/Analyzers/En/KStemFilter.cs
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis.Tokenattributes;
+
+namespace Lucene.Net.Analysis.En
+{
+ public class KStemFilter : TokenFilter
+ {
+ private readonly KStemmer stemmer = new KStemmer();
+ private readonly ITermAttribute termAttribute;
+ // private IKeywordAttribute keywordAtt; TODO
+
+ public KStemFilter(TokenStream input)
+ : base(input)
+ {
+ termAttribute = AddAttribute<ITermAttribute>();
+ }
+
+ public override bool IncrementToken()
+ {
+ if (!input.IncrementToken())
+ return false;
+
+ if (/*(!keywordAtt.isKeyword()) && */ stemmer.stem(termAttribute.Term, 0))
+ {
+ termAttribute.SetTermBuffer(stemmer.asString());
+ }
+
+ return true;
+ }
+
+ }
+}