You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@flex.apache.org by jm...@apache.org on 2014/08/28 02:58:16 UTC
[11/21] remove Adobe from directory names (package name still
contains Adobe)
http://git-wip-us.apache.org/repos/asf/flex-utilities/blob/4e4f9830/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/LinguisticRule.as
----------------------------------------------------------------------
diff --git a/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/LinguisticRule.as b/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/LinguisticRule.as
new file mode 100644
index 0000000..6ad78c6
--- /dev/null
+++ b/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/LinguisticRule.as
@@ -0,0 +1,1118 @@
+////////////////////////////////////////////////////////////////////////////////
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+
+package com.adobe.linguistics.spelling.core
+{
+// import com.adobe.linguistics.spelling.core.PhoneticTable;
+ import com.adobe.linguistics.spelling.core.env.*;
+ import com.adobe.linguistics.spelling.core.error.*;
+ import com.adobe.linguistics.spelling.core.rule.*;
+ import com.adobe.linguistics.spelling.core.utils.SimpleNumberParser;
+
+ import flash.utils.Dictionary;
+
+
+ public class LinguisticRule
+ {
+
+
+ private var _encoding:String // ToDo, not sure how to handle this encoding stuff...
+
+ private var snp:SimpleNumberParser = new SimpleNumberParser();
+
+ private var _prefixFlagTable:Array;
+ private var _prefixKeyTable:Array;
+ private var _suffixFlagTable:Array;
+ private var _suffixKeyTable:Array;
+ private var _optSuffixKeyTable:Dictionary;
+ private var _optPrefixKeyTable:Dictionary;
+//these are attributes
+ private var _keyString:String;
+ private var _tryString:String;
+ private var _noSuggest:Number;// don't suggest words signed with NOSUGGEST flag
+ private var _forbiddenWord:Number; // forbidden word signing flag
+ private var _circumfix:Number=0; //Circumfix Flag
+ private var _ignoredChars:String; // letters + spec. word characters
+ private var _wordChars:String; //extends tokenizer of Hunspell command line interface with additional word character. For example, dot, dash, n-dash, numbers, percent sign are word character in Hungarian.
+ private var _languageCode:String;
+ private var _version:String;
+ private var _maxngramsugs:int = -1; // undefined
+ private var _nosplitsugs:int = 0;
+ private var _sugswithdots:int = 0;
+ private var _fullStrip:int;
+ private var _keepCase:Number;
+ private var _haveContClass:Boolean;//added to support (double) prefixes
+
+ private var _flagMode:int;
+ private var _needAffix:Number;
+ private var _contClasses:Dictionary;//this is list of all possible contclasses
+ /* ToDo */
+
+ private var _onlyInCompound:Number = 0;
+// private var _phoneTable:PhoneticTable; //phone table
+/*
+
+
+ToDO: should be removed after we have complex-affix support and compound-word support..
+
+
+ pHMgr = ptr[0];
+ alldic = ptr;
+ maxdic = md;
+ keystring = NULL;
+ trystring = NULL;
+ encoding=NULL;
+ utf8 = 0;
+ complexprefixes = 0;
+ maptable = NULL;
+ nummap = 0;
+ breaktable = NULL;
+ numbreak = 0;
+ reptable = NULL;
+ numrep = 0;
+ iconvtable = NULL;
+ oconvtable = NULL;
+ checkcpdtable = NULL;
+ // allow simplified compound forms (see 3rd field of CHECKCOMPOUNDPATTERN)
+ simplifiedcpd = 0;
+ numcheckcpd = 0;
+ defcpdtable = NULL;
+ numdefcpd = 0;
+ phone = NULL;
+ compoundflag = FLAG_NULL; // permits word in compound forms
+ compoundbegin = FLAG_NULL; // may be first word in compound forms
+ compoundmiddle = FLAG_NULL; // may be middle word in compound forms
+ compoundend = FLAG_NULL; // may be last word in compound forms
+ compoundroot = FLAG_NULL; // compound word signing flag
+ compoundpermitflag = FLAG_NULL; // compound permitting flag for suffixed word
+ compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word
+ checkcompounddup = 0; // forbid double words in compounds
+ checkcompoundrep = 0; // forbid bad compounds (may be non compound word with a REP substitution)
+ checkcompoundcase = 0; // forbid upper and lowercase combinations at word bounds
+ checkcompoundtriple = 0; // forbid compounds with triple letters
+ simplifiedtriple = 0; // allow simplified triple letters in compounds (Schiff+fahrt -> Schiffahrt)
+ forbiddenword = FORBIDDENWORD; // forbidden word signing flag
+ nosuggest = FLAG_NULL; // don't suggest words signed with NOSUGGEST flag
+ lang = NULL; // language
+ langnum = 0; // language code (see http://l10n.openoffice.org/languages.html)
+ needaffix = FLAG_NULL; // forbidden root, allowed only with suffixes
+ cpdwordmax = -1; // default: unlimited wordcount in compound words
+ cpdmin = -1; // undefined
+ cpdmaxsyllable = 0; // default: unlimited syllablecount in compound words
+ cpdvowels=NULL; // vowels (for calculating of Hungarian compounding limit, O(n) search! XXX)
+ cpdvowels_utf16=NULL; // vowels for UTF-8 encoding (bsearch instead of O(n) search)
+ cpdvowels_utf16_len=0; // vowels
+ pfxappnd=NULL; // previous prefix for counting the syllables of prefix BUG
+ sfxappnd=NULL; // previous suffix for counting a special syllables BUG
+ cpdsyllablenum=NULL; // syllable count incrementing flag
+ checknum=0; // checking numbers, and word with numbers
+ wordchars=NULL; // letters + spec. word characters
+ wordchars_utf16=NULL; // letters + spec. word characters
+ wordchars_utf16_len=0; // letters + spec. word characters
+ ignorechars=NULL; // letters + spec. word characters
+ ignorechars_utf16=NULL; // letters + spec. word characters
+ ignorechars_utf16_len=0; // letters + spec. word characters
+ version=NULL; // affix and dictionary file version string
+ havecontclass=0; // flags of possible continuing classes (double affix)
+ // LEMMA_PRESENT: not put root into the morphological output. Lemma presents
+ // in morhological description in dictionary file. It's often combined with PSEUDOROOT.
+ lemma_present = FLAG_NULL;
+ circumfix = FLAG_NULL;
+ onlyincompound = FLAG_NULL;
+ maxngramsugs = -1; // undefined
+ nosplitsugs = 0;
+ sugswithdots = 0;
+ keepcase = 0;
+ checksharps = 0;
+ substandard = FLAG_NULL;
+ fullstrip = 0;
+ */
+
+ private var _simpleFilterTable:Array;
+ private var _mapFilterTable:Array;
+ private var _iconvFilterTable:Array; //Contains conversion table for ICONV conversion
+ private var _oconvFilterTable:Array;//Contains conversion table for OCONV conversion
+ private var _breakTable:Array;//Contains list of characters in BREAK rule
+ private var _aliasfTable:Array;//Contains conversion table for AF rule
+ /* internal use only properties. */
+ private var _pfxEntry:PrefixEntry;
+ private var _sfxEntry:SuffixEntry;
+ private var _optSfxEntry:OptimizedSuffixEntry;
+ private var _optPfxEntry:OptimizedPrefixEntry;
+ private var _dictMgr:DictionaryManager;
+
+ public function LinguisticRule()
+ {
+
+ this._prefixFlagTable = new Array()
+ this._prefixKeyTable = new Array();
+ this._suffixFlagTable = new Array();
+ this._suffixKeyTable = new Array();
+ this._optSuffixKeyTable = new Dictionary(true);
+ this._optPrefixKeyTable = new Dictionary(true);
+
+ this._simpleFilterTable = new Array();
+ this._mapFilterTable = new Array();
+ this._iconvFilterTable=new Array();
+ this._oconvFilterTable=new Array();
+ this._breakTable=new Array();//We are not adding any break points by default. Hunspell C does this for -, ^-, -$
+ this._aliasfTable=new Array();
+// this._phoneTable=new PhoneticTable();
+
+
+ /* init the attributes */
+ this.noSuggest = InternalConstants.FLAG_NULL;
+ this.tryString= null;
+ this.keyString= null;
+ this.ignoredChars = null;
+ this.wordChars = null;
+ this.version = null;
+ this.languageCode = null;
+ this.forbiddenWord = InternalConstants.FORBIDDENWORD;
+ this.needAffix=InternalConstants.FLAG_NULL;
+ this.circumfix=InternalConstants.FLAG_NULL;
+ this.maxNgramSuggestions = -1; // undefined
+ this.nosplitSuggestions = 0;
+ this.suggestionsWithDots = 0;
+ this.fullStrip = 0;
+ this.keepCase = 0;
+ this.onlyInCompound = 0;
+ this.flagMode = InternalConstants.FLAG_CHAR;
+ this._contClasses= new Dictionary;
+ /* */
+
+
+ this._dictMgr = null;
+
+
+ }
+
+ /*
+ * Deprecated function for now...
+ * History:
+ * A pre-version of implementation for error detection. After I optimized the code for performance,
+ * I drop this function by that time, but you know performance meassuring is a tricky problem...
+ * ToDo: Need a revisit when we implementing complex-affix support and compound-word support.
+ */
+ // check if word with affixes is correctly spelled
+ public function affixCheck( word:String, needFlag:int, inCompound:int ):HashEntry {
+ var rv:HashEntry = null;
+ // check all prefixes (also crossed with suffixes if allowed)
+ rv = optSuffixCheck(word, needFlag, inCompound);
+ if( rv ) return rv;
+ // if still not found check all suffixes
+ rv = optPrefixCheck(word, 0, null, inCompound, needFlag);
+ return rv;
+ }
+
+ // This function checks if word with affixes is correctly spelled.
+ public function affixCheck2( word:String, needFlag:int, inCompound:int ):HashEntry {
+ var rv:HashEntry = null;
+ if ( word.length <2 ) return rv;
+ // check onelevel prefix case or one level prefix+one level suffix: un->run or under->taker (note: hypothetical words) also will check milli->litre->s and d'->autre->s
+ rv = optPrefixCheck2(word, inCompound, needFlag);
+ if( rv ) return rv;
+ // check all one level suffix drink->able or drink->s
+ rv = optSuffixCheck2(word,0,null, needFlag, inCompound);
+
+
+ //double affix checking
+ if(this.haveContClass)
+ {
+ if(rv) return rv;
+ //check all 2 level suffixes case: drink->able->s
+ rv= optTwoSuffixCheck(word,0, null, needFlag,0);
+
+ if(rv) return rv;
+ //check prefix and then 2 level suffix case un->drink->able->s
+ rv= optTwoPrefixCheck(word, 0, needFlag);
+
+ }
+
+ return rv;
+ }
+
+
+ /*
+ * Deprecated function for now...
+ * History:
+ * A pre-version of implementation for error detection. After I optimized the code for performance,
+ * I drop this function by that time, but you know performance meassuring is a tricky problem...
+ * ToDo: Need a revisit when we implementing complex-affix support and compound-word support.
+ */
+ public function optPrefixCheck(word:String, sfxopts:int, ppfx:AffixEntry, needFlag:int, inCompound:int) :HashEntry {
+ var rv:HashEntry = null;
+ var tmpWord:String;
+ // first handle the special case of 0 length prefixes
+ if ( _optPrefixKeyTable[''] != undefined ) {
+ _optPfxEntry = _optPrefixKeyTable[''];
+ while ( _optPfxEntry ) {
+ // fogemorpheme
+ // permit prefixes in compounds
+ // check prefix
+ rv = _optPfxEntry.checkWord(word, sfxopts, ppfx, inCompound, needFlag);
+ if ( rv ) {
+ return rv;
+ }
+ _optPfxEntry = _optPfxEntry.nextElementWithKey;
+ }
+ }
+
+ // now handle the general case
+ for ( var i:int =1; i < word.length ; ++i ) {
+ tmpWord = word.substr(0,i);
+ if ( _optPrefixKeyTable[tmpWord] != undefined ) {
+ _optPfxEntry = _optPrefixKeyTable[tmpWord];
+ // fogemorpheme
+ // permit prefixes in compounds
+ // check prefix
+ while ( _optPfxEntry ) {
+ rv = _optPfxEntry.checkWord(word, sfxopts, ppfx, inCompound, needFlag);
+ if ( rv) {
+ return rv;
+ }
+ _optPfxEntry = _optPfxEntry.nextElementWithKey;
+ }
+ }
+ }
+
+ return rv;
+ }
+
+// This function checks one level prefix OR one level prefix+ one level suffix
+ public function optPrefixCheck2(word:String, inCompound:int, needFlag:int) :HashEntry {
+ var rv:HashEntry = null;
+ var tmpWord:String;
+ var i:int;
+ var locOptPfxEntry:OptimizedPrefixEntry=null;//local optimised prefix entry added because we are adding optTwoPrefixCheck
+ // first handle the special case of 0 length prefixes
+ if ( _optPrefixKeyTable[''] != undefined ) {
+ for ( i=0; i<_optPrefixKeyTable[''].length; ++i ) {
+ locOptPfxEntry=_optPrefixKeyTable[''][i];
+ // fogemorpheme
+ // permit prefixes in compounds
+ // check prefix
+ while ( locOptPfxEntry ) {
+ rv = locOptPfxEntry.checkWord2(word, inCompound, needFlag);
+ if ( rv) {
+ return rv;
+ }
+ locOptPfxEntry = locOptPfxEntry.nextElementWithKey;
+ }
+ }
+ }
+
+ // now handle the general case
+ var firstKeyStr:String = word.charAt(0);
+ var secondKeyNum:Number = word.charCodeAt(1);
+ var breakFlag:Boolean = false;
+ if ( _optPrefixKeyTable[firstKeyStr] != undefined ) {
+ for ( i=0; i< _optPrefixKeyTable[firstKeyStr].length; ++i ) {
+ locOptPfxEntry=_optPrefixKeyTable[firstKeyStr][i];
+ if ( (locOptPfxEntry.affixKey.length!=1) ) {
+ if ( locOptPfxEntry.affixKey.charCodeAt(1)> secondKeyNum )
+ break;
+ if ( locOptPfxEntry.affixKey.charCodeAt(1)< secondKeyNum) {
+ if (breakFlag) break;
+ else continue;
+ }
+ breakFlag = true;
+ }
+ if (word.indexOf(locOptPfxEntry.affixKey) != 0)
+ continue;
+ // fogemorpheme
+ // permit prefixes in compounds
+ // check prefix
+ while ( locOptPfxEntry ) {
+ rv = locOptPfxEntry.checkWord2(word, inCompound, needFlag);
+ if ( rv) {
+ return rv;
+ }
+ locOptPfxEntry = locOptPfxEntry.nextElementWithKey;
+ }
+ }
+ }
+
+ return rv;
+ }
+
+ // This is a new function added to include one level prefix checking followed by two level suffix checking
+ public function optTwoPrefixCheck(word:String, inCompound:int, needFlag:int) :HashEntry {
+ var rv:HashEntry = null;
+ //pfx=null;//TODO:Need to figure these out, seems they will be needed for compound rules. keeping for some time
+ //sfxrevkey=null;//TODO:Need to figure these out, seems they will be needed for compound rules. keeping for some time
+ var tmpWord:String;
+ var i:int;
+ var locOptPfxEntry:OptimizedPrefixEntry=null;//local optimised prefix entry
+ // first handle the special case of 0 length prefixes
+ if ( _optPrefixKeyTable[''] != undefined ) {
+ for ( i=0; i<_optPrefixKeyTable[''].length; ++i ) {
+ locOptPfxEntry=_optPrefixKeyTable[''][i];
+
+ while ( locOptPfxEntry ) {
+ rv = locOptPfxEntry.checkTwoWord(word, inCompound, needFlag);
+ if ( rv) {
+ return rv;
+ }
+ locOptPfxEntry = locOptPfxEntry.nextElementWithKey;
+ }
+ }
+ }
+
+ // now handle the general case
+ var firstKeyStr:String = word.charAt(0);
+ var secondKeyNum:Number = word.charCodeAt(1);
+ var breakFlag:Boolean = false;
+ if ( _optPrefixKeyTable[firstKeyStr] != undefined ) {
+ for ( i=0; i< _optPrefixKeyTable[firstKeyStr].length; ++i ) {
+ locOptPfxEntry=_optPrefixKeyTable[firstKeyStr][i];
+ if ( (locOptPfxEntry.affixKey.length!=1) ) {
+ if ( locOptPfxEntry.affixKey.charCodeAt(1)> secondKeyNum )
+ break;
+ if ( locOptPfxEntry.affixKey.charCodeAt(1)< secondKeyNum) {
+ if (breakFlag) break;
+ else continue;
+ }
+ breakFlag = true;
+ }
+ if (word.indexOf(locOptPfxEntry.affixKey) != 0)
+ continue;
+ while ( locOptPfxEntry ) {
+ rv = locOptPfxEntry.checkTwoWord(word, inCompound, needFlag);
+ if ( rv) {
+ return rv;
+ }
+ locOptPfxEntry = locOptPfxEntry.nextElementWithKey;
+ }
+ }
+ }
+ return rv;//this most certainly will return NULL
+ }
+
+
+ /*
+ * Deprecated function for now...
+ * History:
+ * A pre-version of implementation for error detection. After I optimized the code for performance,
+ * I drop this function by that time, but you know performance meassuring is a tricky problem...
+ * ToDo: Need a revisit when we implementing complex-affix support and compound-word support.
+ */
+ public function optSuffixCheck( word:String, needFlag:int, inCompound:int):HashEntry {
+ var rv:HashEntry = null;
+ var tmpWord:String;
+ // first handle the special case of 0 length suffixes
+ if ( this._optSuffixKeyTable[''] != undefined ) {
+ _optSfxEntry = this._optSuffixKeyTable[''];
+ while ( _optSfxEntry ) {
+ // fogemorpheme
+ // permit prefixes in compounds
+ // check prefix
+ rv = _optSfxEntry.checkWord(word, inCompound, needFlag);
+ if ( rv ) {
+ return rv;
+ }
+ _optSfxEntry = _optSfxEntry.nextElementWithKey;
+ }
+
+ }
+ // now handle the general case
+ for ( var i:int =word.length-1; i > 0 ; --i ) {
+ tmpWord = word.substr(i);
+ if ( _optSuffixKeyTable[tmpWord] != undefined ) {
+ _optSfxEntry = _optSuffixKeyTable[tmpWord];
+ // fogemorpheme
+ // permit prefixes in compounds
+ // check prefix
+ while ( _optSfxEntry ) {
+ rv = _optSfxEntry.checkWord(word, inCompound, needFlag);
+ if ( rv) {
+ return rv;
+ }
+ _optSfxEntry = _optSfxEntry.nextElementWithKey;
+ }
+ }
+ }
+
+ return rv;
+ }
+
+//This function takes care of all one level suffix stripping. This is called from other affix stripping functions also
+ public function optSuffixCheck2( word:String, sfxopts:int, ppfx:AffixEntry, needFlag:int, inCompound:int, cclass:int=0, pfxcclass:int=0):HashEntry {
+ var rv:HashEntry = null;
+ var tmpWord:String;
+ var locOptSfxEntry:OptimizedSuffixEntry=null;//local optimised suffic entry
+ // first handle the special case of 0 length suffixes
+ if ( this._optSuffixKeyTable[''] != undefined ) {
+ locOptSfxEntry=this._optSuffixKeyTable[''];
+ while ( locOptSfxEntry ) {
+ //if(!cclass|| locOptSfxEntry.contclass)
+ //{
+
+
+
+ // fogemorpheme
+ // permit prefixes in compounds
+ // check prefix
+
+ //if((_optSfxEntry &&!(_optSfxEntry.contclass && HashEntry.TESTAFF(_optSfxEntry.contclass, this._needAffix)))||(ppfx&& !(ppfx.contclass && HashEntry.TESTAFF(ppfx.contclass,this._needAffix))))// needaffix on prefix or first suffix
+ //{
+
+ rv = locOptSfxEntry.checkWord2(word, sfxopts, ppfx, inCompound, needFlag, cclass, pfxcclass);
+ if ( rv ) {
+ _optSfxEntry = locOptSfxEntry;//WIll possibily needed in compound check
+ return rv;
+ }
+ // }
+ //}
+ locOptSfxEntry = locOptSfxEntry.nextElementWithKey;
+ }
+
+ }
+ // now handle the general case
+ for ( var i:int =word.length-1; i >= 0 ; --i ) {
+ tmpWord = word.substr(i);
+ if ( _optSuffixKeyTable[tmpWord] != undefined ) {
+ locOptSfxEntry = (_optSuffixKeyTable[tmpWord] is OptimizedSuffixEntry)? _optSuffixKeyTable[tmpWord] : null;
+ // fogemorpheme
+ // permit prefixes in compounds
+ // check prefix
+ while ( locOptSfxEntry ) {
+ //if(_optSfxEntry &&HashEntry.TESTAFF(_optSfxEntry.contclass, this._needAffix)||(ppfx&& HashEntry.TESTAFF(ppfx.contclass,this._needAffix)))// needaffix on prefix or first suffix
+ //{
+
+ rv = locOptSfxEntry.checkWord2(word, sfxopts, ppfx, inCompound, needFlag, cclass, pfxcclass);
+ if ( rv) {
+ _optSfxEntry = locOptSfxEntry;//WIll possibily needed in compound check
+ return rv;
+ }
+ //}
+ locOptSfxEntry = locOptSfxEntry.nextElementWithKey;
+ }
+ }
+ }
+
+ return rv;
+ }
+
+ // This is a new function added to include two level suffix checking
+ public function optTwoSuffixCheck(word:String, sfxopts:int, ppfx:AffixEntry,needFlag:int,pfxcclass:int=0) :HashEntry {
+ var rv:HashEntry = null;
+ var tmpWord:String;
+ var locOptSfxEntry:OptimizedSuffixEntry;//local optimised suffic entry
+ // first handle the special case of 0 length suffixes
+ if ( this._optSuffixKeyTable[''] != undefined )
+ {
+ locOptSfxEntry=this._optSuffixKeyTable[''];
+ while ( locOptSfxEntry )
+ {
+ for(var j:int=0; locOptSfxEntry.flags && j<locOptSfxEntry.flags.length; j++)
+ {
+ if(this.contClasses[locOptSfxEntry.flags[j]]==true)
+ { //if this can be a possible contclass check furthur
+ rv = locOptSfxEntry.checkTwoWord(word, sfxopts, ppfx, needFlag, locOptSfxEntry.flags[j], pfxcclass );
+ if (rv)
+ {
+ _optSfxEntry = locOptSfxEntry;//WIll possibily needed in compound check
+ return rv;
+ }
+
+ }
+ }
+ // get next suffix entry from table
+ locOptSfxEntry = locOptSfxEntry.nextElementWithKey;
+ }
+ }
+
+ //now handle the general case
+ for ( var i:int =word.length-1; i >= 0 ; --i )
+ {
+ tmpWord = word.substr(i);
+ if ( _optSuffixKeyTable[tmpWord] != undefined )
+ {
+ locOptSfxEntry = (_optSuffixKeyTable[tmpWord] is OptimizedSuffixEntry)? _optSuffixKeyTable[tmpWord] : null;
+
+ while ( locOptSfxEntry )
+ {
+ for(j=0;locOptSfxEntry.flags && j<locOptSfxEntry.flags.length; j++)
+ {
+ if(this.contClasses[locOptSfxEntry.flags[j]]==true)
+ {
+ //if this can be a possible contclass check furthur
+ rv = locOptSfxEntry.checkTwoWord(word, sfxopts, ppfx, needFlag,locOptSfxEntry.flags[j], pfxcclass );
+ if ( rv)
+ {
+ _optSfxEntry = locOptSfxEntry;//WIll possibily needed in compound check
+ return rv;
+ }
+ }
+ }
+ locOptSfxEntry = locOptSfxEntry.nextElementWithKey;
+ }
+ }
+ }
+
+ return rv;//will be null in most cases
+
+ }
+
+ /*
+ * Deprecated function for now...
+ * History:
+ * A pre-version of implementation for error detection. After I optimized the code for performance,
+ * I drop this function by that time, but you know performance meassuring is a tricky problem...
+ * ToDo: Need a revisit when we implementing complex-affix support and compound-word support.
+ */
+ public function suffixCheck2( word:String, sfxopts:int, ppfx:AffixEntry, needFlag:int, inCompound:int):HashEntry {
+ var rv:HashEntry = null;
+ var tmpWord:String;
+ // first handle the special case of 0 length suffixes
+ if ( this._suffixKeyTable[''] != undefined ) {
+ _sfxEntry = this._suffixKeyTable[''];
+ while ( _sfxEntry ) {
+ // fogemorpheme
+ // permit prefixes in compounds
+ // check prefix
+ rv = _sfxEntry.checkWord(word, sfxopts, ppfx, inCompound, needFlag);
+ if ( rv ) {
+ return rv;
+ }
+ _sfxEntry = _sfxEntry.nextElementWithKey;
+ }
+
+ }
+ // now handle the general case
+ for ( var i:int =word.length-1; i > 0 ; --i ) {
+ tmpWord = word.substr(i);
+ if ( _suffixKeyTable[tmpWord] != undefined ) {
+ _sfxEntry = _suffixKeyTable[tmpWord];
+ // fogemorpheme
+ // permit prefixes in compounds
+ // check prefix
+ while ( _sfxEntry ) {
+ rv = _sfxEntry.checkWord(word, sfxopts, ppfx, inCompound, needFlag);
+ if ( rv) {
+ return rv;
+ }
+ _sfxEntry = _sfxEntry.nextElementWithKey;
+ }
+ }
+ }
+
+ return rv;
+ }
+
+
+ /*
+ * Deprecated function for now...
+ * History:
+ * A pre-version of implementation for error detection. After I optimized the code for performance,
+ * I drop this function by that time, but you know performance meassuring is a tricky problem...
+ * ToDo: Need a revisit when we implementing complex-affix support and compound-word support.
+ */
+ private function prefixCheck2(word:String, inCompound:int, needFlag:int) :HashEntry {
+ var rv:HashEntry = null;
+ var tmpWord:String;
+ // first handle the special case of 0 length prefixes
+ if ( _prefixKeyTable[''] != undefined ) {
+ _pfxEntry = _prefixKeyTable[''];
+ while ( _pfxEntry ) {
+ // fogemorpheme
+ // permit prefixes in compounds
+ // check prefix
+ rv = _pfxEntry.checkWord(word, inCompound, needFlag);
+ if ( rv ) {
+ return rv;
+ }
+ _pfxEntry = _pfxEntry.nextElementWithKey;
+ }
+ }
+
+ // now handle the general case
+ for ( var i:int =1; i < word.length ; ++i ) {
+ tmpWord = word.substr(0,i);
+ if ( _prefixKeyTable[tmpWord] != undefined ) {
+ _pfxEntry = _prefixKeyTable[tmpWord];
+ // fogemorpheme
+ // permit prefixes in compounds
+ // check prefix
+ while ( _pfxEntry ) {
+ rv = _pfxEntry.checkWord(word, inCompound, needFlag);
+ if ( rv) {
+ return rv;
+ }
+ _pfxEntry = _pfxEntry.nextElementWithKey;
+ }
+ }
+ }
+
+ return rv;
+ }
+
+ public function lookup(word:String ):HashEntry {
+ var he:HashEntry = null;
+ var i:int;
+ // look word in hash table
+ for ( i=0; i < this._dictMgr.dictonaryList.length && !he; ++i ) {
+ he = this._dictMgr.dictonaryList[i].getElement(word);
+ }
+ return he;
+ }
+
+ public function set flagMode(value:int) :void {
+ this._flagMode = value;
+ }
+
+ public function get flagMode():int {
+ return this._flagMode;
+ }
+
+ public function set encoding(value:String) :void {
+ this._encoding = value;
+ }
+
+ public function get encoding():String {
+ return this._encoding;
+ }
+
+ public function set keepCase(value:Number):void {
+ this._keepCase = value;
+ }
+
+ public function get keepCase():Number {
+ return this._keepCase;
+ }
+
+ public function set haveContClass(value:Boolean):void {
+ this._haveContClass = value;
+ }
+
+ public function get haveContClass():Boolean {
+ return this._haveContClass;
+ }
+
+ public function set needAffix(value:Number):void {
+ this._needAffix = value;
+ }
+ public function get needAffix():Number {
+ return this._needAffix;
+ }
+
+
+ public function set circumfix(value:Number):void {
+ this._circumfix = value;
+ }
+ public function get circumfix():Number {
+ return this._circumfix;
+ }
+
+ public function set onlyInCompound(value:Number):void {
+ this._onlyInCompound = value;
+ }
+ public function get onlyInCompound():Number {
+ return this._onlyInCompound;
+ }
+
+ public function set dictionaryManager(value:DictionaryManager) :void {
+ this._dictMgr = value;
+ }
+
+ public function get dictionaryManager():DictionaryManager {
+ return this._dictMgr;
+ }
+
+
+ public function set fullStrip(value:int):void {
+ this._fullStrip = value;
+ }
+
+ public function get fullStrip():int {
+ return this._fullStrip;
+ }
+
+ public function set suggestionsWithDots(value:int):void {
+ this._sugswithdots = value;
+ }
+
+ public function get suggestionsWithDots():int {
+ return this._sugswithdots;
+ }
+
+ public function set nosplitSuggestions(value:int ) :void {
+ this._nosplitsugs = value;
+ }
+
+ public function get nosplitSuggestions():int {
+ return this._nosplitsugs;
+ }
+
+ public function set maxNgramSuggestions(value:int ) :void {
+ this._maxngramsugs = value;
+ }
+
+ public function get maxNgramSuggestions():int {
+ return this._maxngramsugs;
+ }
+
+ public function set version(value:String) :void {
+ this._version = value;
+ }
+
+ public function get version():String {
+ return this._version;
+ }
+
+ public function set languageCode(value:String) :void {
+ this._languageCode = value;
+ }
+
+ public function get languageCode():String {
+ return this._languageCode;
+ }
+
+ public function set wordChars(value:String):void {
+ this._wordChars= value;
+ }
+
+ public function get wordChars():String {
+ return this._wordChars;
+ }
+
+ public function addMapFilter(mapString:String ):Boolean {
+ var mf:MapFilter = new MapFilter(mapString);
+ for ( var i:int; i< this._mapFilterTable.length; ++i ) {
+ if ( this._mapFilterTable[i].mapCharSet == mapString ) {
+ return false;
+ }
+ }
+ this._mapFilterTable.push(mf);
+ return true;
+ }
+
+ public function addSimpleFilter(matchString:String, replacement:String):Boolean {
+ var sf:SimpleFilter = new SimpleFilter( matchString, replacement);
+ for ( var i:int; i< this._simpleFilterTable.length; ++i ) {
+ if ( (this._simpleFilterTable[i].matchString==matchString) && (this._simpleFilterTable[i].replacement==replacement ) ) {
+ return false;
+ }
+ }
+ this._simpleFilterTable.push(sf);
+ return true;
+ }
+
+
+ //--adding to iconv/oconv table
+
+ public function addConvFilter(matchString:String, replacement:String, ioflag:Boolean):Boolean {
+ var convTable:Array;
+ convTable=(ioflag==true)?this._iconvFilterTable:this._oconvFilterTable;
+ for ( var i:int; convTable && i< convTable.length; ++i ) {
+ if ( (convTable[i].matchString==matchString) && (convTable[i].replacement==replacement ) ) {
+ return false;
+ }
+ }
+ var sf:SimpleFilter = new SimpleFilter( matchString, replacement);
+ convTable.push(sf);
+ return true;
+ }
+
+ public function addAffixEntry(affixFlag:int, stripString:String, affixValue:String, conditionsStr:String, morph:String = "", permission:Boolean = false, affixType:int = 0, contclass:String=null):Boolean{
+ if ( stripString == null || affixValue == null || conditionsStr==null || conditionsStr=="" ) return false;
+ if ( affixType == 0 ) {
+ if ( stripString == null || affixValue == null || conditionsStr==null || conditionsStr=="" ) return false;
+ var pfxEntry:PrefixEntry = new PrefixEntry(affixFlag,stripString,affixValue,conditionsStr,morph,permission,contclass);
+ pfxEntry.attributeManager = this;
+ addPrefixEntry(pfxEntry);
+ addOptPrefixEntry(pfxEntry);
+ }else {
+ if ( stripString == null || affixValue == null || conditionsStr==null || conditionsStr=="" ) return false;
+ var sfxEntry:SuffixEntry = new SuffixEntry(affixFlag,stripString,affixValue,conditionsStr,morph,permission,contclass);
+ sfxEntry.attributeManager = this;
+ addSuffixEntry(sfxEntry);
+ addOptSuffixEntry(sfxEntry);
+ }
+ return true;
+ }
+
+ private function addOptPrefixEntry(pfxEntry:PrefixEntry):Boolean {
+ var optPfxEntry:OptimizedPrefixEntry
+ var hashKey:String = pfxEntry.affixKey.charAt(0);
+ optPfxEntry = new OptimizedPrefixEntry(pfxEntry);
+ optPfxEntry.attributeManager = this;
+ //insert prefix key table....
+ if ( _optPrefixKeyTable[hashKey] == undefined ) {
+ _optPrefixKeyTable[hashKey] = new Array();
+ _optPrefixKeyTable[hashKey].push(optPfxEntry);
+ }
+ else {
+ for each( var optPfxKeyEntry:OptimizedPrefixEntry in _optPrefixKeyTable[hashKey] ){
+ if ( optPfxKeyEntry.affixKey == pfxEntry.affixKey ) {
+ while( optPfxKeyEntry.nextElementWithKey != null ) {
+ if ( optPfxKeyEntry.isSimilarObject(pfxEntry) ) {
+ optPfxKeyEntry.extendObject(pfxEntry);
+ return true;
+ }
+ optPfxKeyEntry = optPfxKeyEntry.nextElementWithKey;
+ }
+ if ( optPfxKeyEntry.isSimilarObject(pfxEntry) ) {
+ optPfxKeyEntry.extendObject(pfxEntry);
+ return true;
+ }
+ optPfxKeyEntry.nextElementWithKey = optPfxEntry;
+ return true;
+ }
+ }
+ _optPrefixKeyTable[hashKey].push(optPfxEntry);
+ _optPrefixKeyTable[hashKey].sortOn("affixKey");
+ }
+ return true;
+
+ }
+
+
+ private function addOptSuffixEntry(sfxEntry:SuffixEntry):Boolean {
+ var optSfxEntry:OptimizedSuffixEntry
+
+ //insert suffix key table....
+ if ( _optSuffixKeyTable[sfxEntry.affixKey] == undefined ) {
+ optSfxEntry = new OptimizedSuffixEntry(sfxEntry);
+ optSfxEntry.attributeManager = this;
+ _optSuffixKeyTable[sfxEntry.affixKey] = optSfxEntry;
+ }
+ else {
+ var optSfxKeyEntry:OptimizedSuffixEntry = _optSuffixKeyTable[sfxEntry.affixKey];
+ while( optSfxKeyEntry.nextElementWithKey != null ) {
+ if ( optSfxKeyEntry.isSimilarObject(sfxEntry) ) {
+ optSfxKeyEntry.extendObject(sfxEntry);
+ return true;
+ }
+ optSfxKeyEntry = optSfxKeyEntry.nextElementWithKey;
+ }
+ if ( optSfxKeyEntry.isSimilarObject(sfxEntry) ) {
+ optSfxKeyEntry.extendObject(sfxEntry);
+ return true;
+ }
+ optSfxEntry = new OptimizedSuffixEntry(sfxEntry);
+ optSfxEntry.attributeManager = this;
+ optSfxKeyEntry.nextElementWithKey = optSfxEntry;
+ }
+ return true;
+
+ }
+
+
+
+ private function addPrefixEntry(pfxEntry:PrefixEntry):Boolean {
+ // We may combine prefix/suffix insertion into one function in the future, it could be good for reduce the code size.
+ // Since may there is some difference between prefix and suffix, so leave it with different class and different table....
+ // need better consideration for performance and code style in next step...
+ var flagChar:String;
+ flagChar = String.fromCharCode(pfxEntry.flag);
+ // insert prefix flag table...
+ if ( _prefixFlagTable[flagChar] == undefined )
+ _prefixFlagTable[flagChar] = pfxEntry;
+ else {
+ var pfxFlagEntry:PrefixEntry = _prefixFlagTable[flagChar];
+ while( pfxFlagEntry.nextElementWithFlag != null ) {
+ pfxFlagEntry = pfxFlagEntry.nextElementWithFlag;
+ }
+ pfxFlagEntry.nextElementWithFlag = pfxEntry;
+ }
+
+ //insert prefix key table....
+ if ( _prefixKeyTable[pfxEntry.affixKey] == undefined )
+ _prefixKeyTable[pfxEntry.affixKey] = pfxEntry;
+ else {
+ var pfxKeyEntry:PrefixEntry = _prefixKeyTable[pfxEntry.affixKey];
+ while( pfxKeyEntry.nextElementWithKey != null ) {
+ pfxKeyEntry = pfxKeyEntry.nextElementWithKey;
+ }
+ pfxKeyEntry.nextElementWithKey = pfxEntry;
+ }
+ return true;
+ }
+
+ private function addSuffixEntry(sfxEntry:SuffixEntry ):Boolean {
+ // We may combine prefix/suffix insertion into one function in the future, it could be good for reduce the code size.
+ // Since may there is some difference between prefix and suffix, so leave it with different class and different table....
+ // need better consideration for performance and code style in next step...
+ var flagChar:String;
+ flagChar = String.fromCharCode(sfxEntry.flag);
+ // insert suffix flag table...
+ if ( _suffixFlagTable[flagChar] == undefined )
+ _suffixFlagTable[flagChar] = sfxEntry;
+ else {
+ var sfxFlagEntry:SuffixEntry = _suffixFlagTable[flagChar];
+ while( sfxFlagEntry.nextElementWithFlag != null ) {
+ sfxFlagEntry = sfxFlagEntry.nextElementWithFlag;
+ }
+ sfxFlagEntry.nextElementWithFlag = sfxEntry;
+ }
+
+ //insert suffix key table....
+ if ( _suffixKeyTable[sfxEntry.affixKey] == undefined )
+ _suffixKeyTable[sfxEntry.affixKey] = sfxEntry;
+ else {
+ var sfxKeyEntry:SuffixEntry = _suffixKeyTable[sfxEntry.affixKey];
+ while( sfxKeyEntry.nextElementWithKey != null ) {
+ sfxKeyEntry = sfxKeyEntry.nextElementWithKey;
+ }
+ sfxKeyEntry.nextElementWithKey = sfxEntry;
+ }
+
+ return true;
+ }
+
+ /*
+ * Deprecated function for now...
+ * History:
+ * A pre-version of implementation for error detection. After I optimized the code for performance,
+ * I drop this function by that time, but you know performance meassuring is a tricky problem...
+ * ToDo: Need a revisit when we implementing complex-affix support and compound-word support.
+ */
+ public function addAffixEntry2(affixFlag:int, stripString:String, affixValue:String, conditionsStr:String, morph:String = "", permission:Boolean = false, affixType:int = 0):Boolean{
+ if ( stripString == null || affixValue == null || conditionsStr==null || conditionsStr=="" ) return false;
+ if ( affixType == 0 ) {
+ if ( stripString == null || affixValue == null || conditionsStr==null || conditionsStr=="" ) return false;
+ var pfxEntry:PrefixEntry = new PrefixEntry(affixFlag,stripString,affixValue,conditionsStr,morph,permission);
+ pfxEntry.attributeManager = this;
+ addPrefixEntry(pfxEntry);
+ }else {
+ if ( stripString == null || affixValue == null || conditionsStr==null || conditionsStr=="" ) return false;
+ var sfxEntry:SuffixEntry = new SuffixEntry(affixFlag,stripString,affixValue,conditionsStr,morph,permission);
+ sfxEntry.attributeManager = this;
+ addSuffixEntry(sfxEntry);
+ }
+ return true;
+ }
+
+ public function get prefixFlagTable():Array {
+ return this._prefixFlagTable;
+ }
+
+ public function get prefixKeyTable():Array {
+ return this._prefixKeyTable;
+ }
+
+ public function get suffixFlagTable():Array {
+ return this._suffixFlagTable;
+ }
+
+ public function get suffixKeyTable():Array {
+ return this._suffixKeyTable;
+ }
+
+ public function set forbiddenWord(value:Number) :void {
+ this._forbiddenWord = value;
+ }
+
+ public function get forbiddenWord():Number {
+ return this._forbiddenWord;
+ }
+
+ public function set ignoredChars(value:String ) :void {
+ this._ignoredChars = value;
+ }
+
+ public function get ignoredChars():String {
+ return this._ignoredChars;
+ }
+
+ public function set keyString(value:String):void {
+ this._keyString = value;
+ }
+
+ public function get keyString():String {
+ if ( this._keyString == null ) this._keyString=InternalConstants.SPELL_KEYSTRING;
+ return this._keyString;
+ }
+
+ public function set tryString(value:String):void {
+ this._tryString = value;
+ }
+
+ public function get tryString():String {
+ return this._tryString;
+ }
+
+ public function get contClasses():Dictionary {
+ return _contClasses;
+ }
+
+
+ public function set noSuggest(value:Number ):void {
+ this._noSuggest = value;
+ }
+ public function get noSuggest():Number {
+ return this._noSuggest;
+ }
+
+ public function get simpleFilterTable():Array {
+ return this._simpleFilterTable;
+ }
+
+ public function get iconvFilterTable():Array {
+ return this._iconvFilterTable;
+ }
+
+ public function get oconvFilterTable():Array {
+ return this._oconvFilterTable;
+ }
+
+/* public function get phoneTable():PhoneticTable {
+ return this._phoneTable;
+ }
+*/
+ public function get breakTable():Array {
+ return this._breakTable;
+ }
+ public function get aliasfTable():Array{
+ return this._aliasfTable;
+ }
+
+
+ public function get mapFilterTable():Array {
+ return this._mapFilterTable;
+ }
+
+ /*This function is used for supporting ICONV/OCONV rule. This function is called whenever an input or output conversion is needed.*/
+ public function conv(word:String,convWord:Array,ioflag:Boolean):Boolean{
+ var searchIndex:int=0;
+ var change:Boolean=false;
+ var wspace:String;
+ var convTable:Array=(ioflag)?this._iconvFilterTable:this._oconvFilterTable;
+ if ( (convTable==null) || (convTable.length == 0) ) return false;
+ for ( var i:int = 0; i < convTable.length; ++i ) {
+ while ( (searchIndex = word.indexOf( convTable[i].matchString,searchIndex)) != -1 ){
+ searchIndex = searchIndex + convTable[i].matchString.length;
+ wspace = word.substr(0, searchIndex-convTable[i].matchString.length) +
+ convTable[i].replacement +
+ word.substr(searchIndex);
+ if(wspace)
+ word=wspace;
+ change=true;
+ }
+
+ }
+ convWord.push(wspace);
+ return change;
+ }
+
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/flex-utilities/blob/4e4f9830/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/SpellingInfo.as
----------------------------------------------------------------------
diff --git a/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/SpellingInfo.as b/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/SpellingInfo.as
new file mode 100644
index 0000000..2b874dd
--- /dev/null
+++ b/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/SpellingInfo.as
@@ -0,0 +1,32 @@
+////////////////////////////////////////////////////////////////////////////////
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+
+package com.adobe.linguistics.spelling.core
+{
+ public class SpellingInfo
+ {
+ public var Info:Number;
+ public function SpellingInfo(information:Number)
+ {
+ Info=information;
+ }
+
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/flex-utilities/blob/4e4f9830/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/SquigglyDictionary.as
----------------------------------------------------------------------
diff --git a/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/SquigglyDictionary.as b/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/SquigglyDictionary.as
new file mode 100644
index 0000000..4b80d7c
--- /dev/null
+++ b/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/SquigglyDictionary.as
@@ -0,0 +1,207 @@
+////////////////////////////////////////////////////////////////////////////////
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+
+
+package com.adobe.linguistics.spelling.core
+{
+
+ import com.adobe.linguistics.spelling.core.container.HashTable;
+ import com.adobe.linguistics.spelling.core.env.InternalConstants;
+ import com.adobe.linguistics.spelling.core.error.*;
+ import com.adobe.linguistics.spelling.core.utils.StringUtils;
+
+ import flash.utils.Dictionary;
+
+
+ public class SquigglyDictionary
+ {
+ private var _hashtbl:HashTable;
+ private var _forbiddenword:int;
+ private var _ignoredCharSet:String;
+ private var _flag_mode:int;
+ private var _languageCode:String;
+
+
+
+ public function SquigglyDictionary(attrMgr:LinguisticRule)
+ {
+ _hashtbl = new HashTable( true ); // useWeakReferences:Boolean = true
+
+ if ( !attrMgr) {
+ _forbiddenword = InternalConstants.FORBIDDENWORD;
+ _ignoredCharSet = null;
+ _flag_mode = InternalConstants.FLAG_CHAR;
+ _languageCode = null;
+ }
+ }
+
+ private function set forbiddenword(value:int ) :void {
+ this._forbiddenword = value;
+ }
+
+ public function get forbiddenword():int {
+ return this._forbiddenword;
+ }
+
+ private function set ignoredCharSet(value:String):void {
+ this._ignoredCharSet = value;
+ }
+
+ public function get ignoredCharSet():String {
+ return this._ignoredCharSet;
+ }
+
+ private function set flag_mode(value:int):void {
+ this._flag_mode = value;
+ }
+
+ public function get flag_mode():int {
+ return this._flag_mode;
+ }
+
+ private function set languageCode(value:String ) :void {
+ this._languageCode = value;
+ }
+
+ public function get languageCode():String {
+ return this._languageCode;
+ }
+
+ public function containsKey(key:String ):Boolean
+ {
+ return _hashtbl.containsKey(key );
+ }
+
+ public function getElement( key:String ):HashEntry {
+ var res:* = _hashtbl.getElement(key );
+ return (res is HashEntry) ? res:null;
+// return _hashtbl.getElement(key );
+ }
+
+ public function put(key:String, affixString:String=null, description:String = null):Boolean {
+ return addWord( key, affixString, description );
+ }
+
+ public function get dictionary():Dictionary {
+ return this._hashtbl.hashMap;
+ }
+
+ public function filter( callback:Function, thisObject:* = null):Array {
+ var res:Array = new Array();
+ var index:int;
+ var dict:Dictionary = this._hashtbl.hashMap;
+ for ( var key:* in dict ) {
+ if ( callback( key, index, res ) ) {
+ res.push( key );
+ }
+ }
+
+ return (res.length == 0) ? null: res;
+ }
+
+
+ public function addWord( word:String, affix:String = null, desc:String = null ) :Boolean {
+ var res:Boolean = false;
+ if ( word == null ) return false;
+ var captype:int = StringUtils.getCapType(word);
+ if ( addWordWithAffix(word,affix,desc,false ) )
+ res = true;
+ addHiddenCapitalizedWord(word,captype, affix,desc);
+ return res;
+ }
+
+ private function addHiddenCapitalizedWord( word:String, captype:int, affix:String=null, desc:String=null ) :Boolean {
+ // add inner capitalized forms to handle the following allcap forms:
+ // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG
+ // Allcaps with suffixes: CIA's -> CIA'S
+ if (((captype == InternalConstants.HUHCAP) || (captype == InternalConstants.HUHINITCAP) ||((captype == InternalConstants.ALLCAP) && (affix != null))) &&
+ !((affix != null) && HashEntry.TESTAFF(affix, _forbiddenword))) {
+ affix += String.fromCharCode(InternalConstants.ONLYUPCASEFLAG);
+ word = word.toLocaleLowerCase();
+ word = word.charAt(0).toLocaleUpperCase() + word.substr(1);
+ addWordWithAffix(word,affix,desc,true);
+ }
+ return true;
+ }
+
+ private function addWordWithAffix( word:String, affix:String, desc:String, onlyupcase:Boolean ):Boolean {
+ var upcasehomonym:Boolean = false;
+ if (_ignoredCharSet != null) {
+ word = StringUtils.removeIgnoredChars(word, _ignoredCharSet);
+ }
+//ToDo: the following comment should be removed after we have complex-affix support.
+//
+// if (complexprefixes) {
+// reverseword(word);
+// }
+// hp->var = H_OPT;
+// if (aliasm) {
+// hp->var += H_OPT_ALIASM;
+// store_pointer(hpw + wbl + 1, get_aliasm(atoi(desc)));
+// } else {
+// strcpy(hpw + wbl + 1, desc);
+// if (complexprefixes) {
+// if (utf8) reverseword_utf(HENTRY_DATA(hp));
+// else reverseword(HENTRY_DATA(hp));
+// }
+// }
+// if (strstr(HENTRY_DATA(hp), MORPH_PHON)) hp->var += H_OPT_PHON;
+
+ if ( _hashtbl.containsKey(word) ) {
+
+ var hentry:HashEntry = _hashtbl.getElement(word);
+ while ( hentry.next != null ) {
+ // remove hidden onlyupcase homonym
+ if ( !onlyupcase ) {
+ if ( (hentry.affixFlagVector != null) && hentry.testAffix(InternalConstants.ONLYUPCASEFLAG) ) {
+ hentry.affixFlagVector = affix;
+ hentry.variableFields = desc; /* need a better implementation,refer the beginning of this function */
+ return true;
+ }
+ }else {
+ upcasehomonym = true;
+ }
+ hentry = hentry.next;
+ }
+ // remove hidden onlyupcase homonym
+ if ( !onlyupcase ) {
+ if ( (hentry.affixFlagVector != null) && hentry.testAffix(InternalConstants.ONLYUPCASEFLAG) ) {
+ hentry.affixFlagVector = affix;
+ hentry.variableFields = desc; /* need a better implementation,refer the beginning of this function */
+ return true;
+ }
+ }else {
+ upcasehomonym = true;
+ }
+
+ if ( !upcasehomonym ) {
+ hentry.addEntry(affix,desc);
+ return true;
+ }else
+ return false;
+ }else {
+ _hashtbl.put(word, new HashEntry(affix,desc) );
+ return true;
+ }
+ }
+
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/flex-utilities/blob/4e4f9830/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/SquigglyEngine.as
----------------------------------------------------------------------
diff --git a/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/SquigglyEngine.as b/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/SquigglyEngine.as
new file mode 100644
index 0000000..d7fdc5c
--- /dev/null
+++ b/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/SquigglyEngine.as
@@ -0,0 +1,426 @@
+////////////////////////////////////////////////////////////////////////////////
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+
+package com.adobe.linguistics.spelling.core
+{
+
+ import com.adobe.linguistics.spelling.core.env.ExternalConstants;
+ import com.adobe.linguistics.spelling.core.env.InternalConstants;
+ import com.adobe.linguistics.spelling.core.utils.*;
+
+ public class SquigglyEngine
+ {
+
+ private var _ignoreCappedWord:Boolean; // Hello is always correct
+ private var _ignoreAllUpperCase:Boolean; // HELLO is always correct
+ private var _ignoreWordWithNumber:Boolean; // win2003 is always correct
+ private var wordBreak:Array; // Used to hold BREAK characters for BREAK rule
+ private const SPELL_COMPOUND:int = (1 << 0);
+ private const SPELL_FORBIDDEN:int = (1 << 1);
+ private const SPELL_ALLCAP:int = (1 << 2);
+ private const SPELL_NOCAP:int = (1 << 3);
+ private const SPELL_INITCAP:int = (1 << 4);
+
+ private const MAXDIC:int = 20;
+ private const MAXSHARPS:int = 5;
+
+ private var attributeMgr:LinguisticRule;
+ private var dictMgr:DictionaryManager;
+ private var sugestionMgr:SuggestionManager;
+ private var encoding:String;
+ private var wordbreak:Array;//an Array that holds the word breaks
+ private var langCode:int;
+ private var complexPrefixes:int;
+ private var maxWordLength:int;
+
+ public function SquigglyEngine( rule:LinguisticRule, dict:SquigglyDictionary )
+ {
+ if ( rule == null ) throw new Error("illegal argument for constructor", 200901);
+ if ( dict == null ) throw new Error("illegal argument for constructor", 200901);
+
+ maxWordLength = InternalConstants.MAXWORDLEN;
+
+ dictMgr = new DictionaryManager();
+ dictMgr.addDictionary(dict);
+ attributeMgr = rule;
+ attributeMgr.dictionaryManager = dictMgr;
+ sugestionMgr = new SuggestionManager( rule, false);
+ this.wordbreak=attributeMgr.breakTable;
+ this.ignoreWordWithNumber = false;
+ this.ignoreCappedWord = false;
+ this.ignoreAllUpperCase = false;
+
+ }
+
+ public function set ignoreWordWithNumber( value:Boolean):void {
+ this._ignoreWordWithNumber =value;
+ }
+ public function get ignoreWordWithNumber():Boolean {
+ return this._ignoreWordWithNumber;
+ }
+
+ public function set ignoreCappedWord(value:Boolean):void {
+ this._ignoreCappedWord = value;
+ }
+ public function get ignoreCappedWord():Boolean {
+ return this._ignoreCappedWord;
+ }
+
+ public function set ignoreAllUpperCase(value:Boolean ):void {
+ this._ignoreAllUpperCase = value;
+ }
+ public function get ignoreAllUpperCase():Boolean {
+ return this._ignoreAllUpperCase;
+ }
+
+ public function set fastMode(value:Boolean ) :void {
+ this.sugestionMgr.fastMode = value;
+ }
+
+ public function get fastMode():Boolean {
+ return this.sugestionMgr.fastMode;
+ }
+
+ public function addDictionary( dict:SquigglyDictionary ) : Boolean {
+ return dictMgr.addDictionary(dict);
+ }
+
+ public function spell( word:String ) :Boolean {
+ if ( word.length > maxWordLength ) return false;
+
+ word = StringUtils.normalize(word);
+
+ var captype:int = InternalConstants.NOCAP;
+ var hasNumber:Boolean =false; //assuming that there are no numbers;
+ var abbv:int = 0;
+ var i:int;
+ var rv:HashEntry = null;
+ var info:SpellingInfo = new SpellingInfo(0);
+ var wspace:String;
+ // input conversion USING ICONV TABLE
+/* //Commented code is a unit test code
+ var teststr:String="marùvîà ";
+ var teststr2:String;
+this.attributeMgr.conv(teststr,convWord,true);
+ teststr2=convWord.pop();
+ if(teststr2){
+ trace("Called Word "+teststr+"converted word "+teststr2);
+ }
+ else
+ trace("NUUUllll");
+*/
+
+ var convWord:Array=new Array;
+ if(this.attributeMgr && this.attributeMgr.iconvFilterTable && this.attributeMgr.iconvFilterTable.length!=0){
+ this.attributeMgr.conv(word,convWord,InternalConstants.CONV_ICONV);
+ wspace=convWord.pop();
+ if(wspace) word=wspace;
+ }
+
+ // first skip over any leading or trailing blanks
+ word = StringUtils.trim( word );
+
+ // now strip off any trailing periods (recording their presence)
+ for ( i = word.length-1; (i>=0) && (word.charCodeAt(i) == 46) ; --i ) { // '.'
+ abbv++;
+ }
+ word = word.substr(0, word.length- abbv );
+ captype = StringUtils.getCapType(word);
+ hasNumber=StringUtils.getHasNumber(word);
+ if ( (dictMgr.isEmpty()) || (word.length == 0) ) return false;
+
+ // allow numbers with dots, dashes and commas (but forbid double separators: "..", "--" etc.)
+ const NBEGIN:int = 0, NNUM:int=1, NSEP:int=2;
+ var nstate:int = NBEGIN;
+ var charCode:int;
+ for ( i=0 ; i < word.length ; ++i ) {
+ charCode = word.charCodeAt(i);
+ if ( (charCode <= 57 ) && ( charCode >= 48) ) { // '0' to '9'
+ nstate = NNUM;
+ }else if ( (charCode==44) || (charCode==45) || (charCode==46) ) { //',' or '.' or '-'
+ if ( (nstate == NSEP) || ( i==0 ) ) return false;
+ nstate = NSEP;
+ }else break;
+ }
+ if ( (i==word.length) && ( nstate == NNUM ) ) return true;//checks if all are just numbers
+ // ignore word with Number.
+ if ( ignoreWordWithNumber && hasNumber)return true;//Ignore word with numbers!
+
+ // ignore cappitalized word or ignore all upper case word.
+ if ( (ignoreCappedWord &&( (captype&InternalConstants.HUHINITCAP) || (captype&InternalConstants.INITCAP))&&(hasNumber==false) ) || (ignoreAllUpperCase&&(captype & InternalConstants.ALLCAP)&&(hasNumber==false)) ) return true; //return only if it does not have number
+
+
+ switch(captype) {
+ case InternalConstants.HUHCAP:
+ case InternalConstants.HUHINITCAP:
+ case InternalConstants.NOCAP:
+ rv = checkWord(word,info);
+ if ( (abbv!=0) && (rv == null ) ) {
+ word += ".";
+ rv = checkWord(word,info);
+ }
+ break;
+ case InternalConstants.ALLCAP:
+ rv = checkWord(word,info);
+ if( rv ) break;
+ if ( (abbv!=0 ) ) {
+ word +=".";
+ rv = checkWord(word,info);
+ if ( rv ) break;
+ }
+ // ToDo: Spec. prefix handling for Catalan, French, Italian:
+ // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
+ // need better understand...
+
+ //sharps handle....
+
+ word = word.charAt(0).toUpperCase()+word.slice(1).toLocaleLowerCase();
+
+ case InternalConstants.INITCAP:
+ if (captype == InternalConstants.INITCAP) info.Info +=ExternalConstants.SPELL_INITCAP;
+ wspace = word.toLocaleLowerCase();
+ rv = checkWord(word,info);
+ if (captype == InternalConstants.INITCAP) info.Info -=ExternalConstants.SPELL_INITCAP;
+
+ // forbid bad capitalization
+ // (for example, ijs -> Ijs instead of IJs in Dutch)
+ // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
+ if (info.Info & ExternalConstants.SPELL_FORBIDDEN) {
+ rv = null;
+ }
+
+ if ( rv && (captype == InternalConstants.ALLCAP ) ) {
+ if ( attributeMgr && rv.affixFlagVector && attributeMgr.keepCase && rv.testAffix(attributeMgr.keepCase) ) rv = null;
+ }
+ if ( rv) break;
+
+ rv = checkWord(wspace,info);
+ if ( !rv && abbv ) {
+ wspace += ".";
+ rv = checkWord(wspace,info);
+ if ( !rv) {
+ word += ".";
+ if (captype == InternalConstants.INITCAP) info.Info +=ExternalConstants.SPELL_INITCAP;
+ rv = checkWord(word,info);
+ if (captype == InternalConstants.INITCAP) info.Info -=ExternalConstants.SPELL_INITCAP;
+ if ( rv && (captype == InternalConstants.ALLCAP ) ) {
+ if ( attributeMgr && rv.affixFlagVector && attributeMgr.keepCase && rv.testAffix(attributeMgr.keepCase) ) rv = null;
+ }
+ }
+ }
+ if ( rv && (captype == InternalConstants.ALLCAP ) ) {
+ if ( attributeMgr && rv.affixFlagVector && attributeMgr.keepCase && rv.testAffix(attributeMgr.keepCase) ) rv = null;
+ }
+ break;
+ default:
+ }
+
+ if ( rv ) return true;
+
+ //implementation break-table... recursive breaking at break points
+
+ if(wordbreak){
+ var nbr:int=0;
+ var parseArr:Array;
+ var searchIndex:int=0;
+ for(i=0; i<wordbreak.length;i++){
+ //Search for number of break points in this word
+ searchIndex=0;
+ wspace=word;
+ while (wspace && ((searchIndex=wspace.indexOf(wordbreak[i])) != -1 )) {
+ nbr++;
+ if(nbr>InternalConstants.MAX_WORD_BREAKS) return false;//Limiting maximum Word breaks
+ if(searchIndex<word.length)wspace=wspace.substr(searchIndex+1);
+ }
+ }
+
+
+ for(var j:int=0; j<wordbreak.length;j++){
+
+ if(word.search(wordbreak[j])!=-1 && (parseArr=word.split(wordbreak[j]))!=null)
+ {
+
+ for(i=0;i<parseArr.length;i++)
+ if(! spell(parseArr[i]) ) return false;//keep checking all parts of the input word. If any part is wrongly spelt send false
+
+ return true;//no part is spelled wrong so send correct
+ }
+
+ }
+
+ }
+
+ return false;
+ }
+
+ public function suggest( word:String ) : Array {
+ if ( word.length > maxWordLength ) return null;
+ var captype:int = InternalConstants.NOCAP;
+ var capwords:int = 0;
+
+ var abbv:int = 0;
+ var i:int,ns:int;
+ var wspace:String;
+ var slst:Array = new Array();
+ var convWord:Array=new Array;
+ // input conversion USING ICONV TABLE
+ if(this.attributeMgr && this.attributeMgr.iconvFilterTable.length!=0){
+ this.attributeMgr.conv(word,convWord,InternalConstants.CONV_ICONV);
+ wspace=convWord.pop();
+ if(wspace)word=wspace;
+ }
+
+ // first skip over any leading or trailing blanks
+ word = StringUtils.trim( word );
+ // now strip off any trailing periods (recording their presence)
+ for ( i = word.length-1; (i>=0) && (word.charCodeAt(i) == 46) ; --i ) { // '.'
+ abbv++;
+ }
+ word = word.substr(0, word.length- abbv );
+ captype = StringUtils.getCapType(word);
+ if ( (dictMgr.isEmpty()) || (word.length == 0) ) return null;
+ switch(captype) {
+ case InternalConstants.NOCAP: {
+ ns = sugestionMgr.suggest( slst, word, InternalConstants.NOCAP );
+ break;
+ }
+ case InternalConstants.INITCAP:{
+ capwords = 1;
+ ns = sugestionMgr.suggest( slst, word, InternalConstants.INITCAP );
+ if ( ns == -1) break;
+ wspace = word.toLocaleLowerCase();
+ ns = sugestionMgr.suggest( slst, wspace, InternalConstants.NOCAP );
+ break;
+ }
+ case InternalConstants.HUHINITCAP:{
+ capwords = 1;
+ }
+ case InternalConstants.HUHCAP: { // ToDo: still a lot of work...
+ ns = sugestionMgr.suggest( slst, word, InternalConstants.HUHCAP );
+ break;
+ }
+ case InternalConstants.ALLCAP: {
+ wspace = word.toLocaleLowerCase();
+ ns = sugestionMgr.suggest( slst, wspace, InternalConstants.NOCAP );
+ if ( ns == -1) break;
+ if ( this.attributeMgr.keepCase && spell(word ) ) {
+ //ns = insert_sug(slst, wspace, ns); ToDo
+ }
+ wspace = word.charAt(0).toUpperCase()+word.slice(1).toLocaleLowerCase();
+ ns = sugestionMgr.suggest( slst, wspace, InternalConstants.INITCAP );
+ break;
+ }
+ }
+
+ // try ngram approach since found nothing
+ if ( this.attributeMgr && (this.attributeMgr.maxNgramSuggestions != 0)) {
+ ns = sugestionMgr.nsuggest(slst,word);
+ }
+
+ // try dash suggestion (Afo-American -> Afro-American)
+
+ // capitalize
+ if (capwords) {
+ for ( i=0;i<slst.length; ++i ) {
+ slst[i] = slst[i].charAt(0).toUpperCase()+slst[i].slice(1);
+ }
+ }
+
+ // expand suggestions with dot(s)
+ if ( abbv && this.attributeMgr.suggestionsWithDots ) {
+ for ( i=0;i<slst.length; ++i ) {
+ slst[i] += ".";
+ }
+
+ }
+
+ // remove bad capitalized and forbidden forms
+
+ // remove original one
+ for ( i=0;i<slst.length;++i) {
+ if ( slst[i] == word )
+ slst.splice(i,1);
+ }
+
+ // remove duplications
+
+ // output conversion
+
+ if(this.attributeMgr && this.attributeMgr.oconvFilterTable && this.attributeMgr.oconvFilterTable.length!=0){
+ for(i=0;i<slst.length;++i){
+ if(this.attributeMgr.conv(slst[i],convWord,InternalConstants.CONV_OCONV))
+ {wspace=convWord.pop();delete(slst[i]); slst[i]=wspace;}
+ }
+ }
+
+ // if suggestions removed by nosuggest, onlyincompound parameters
+
+
+ return (slst.length!=0) ? slst :null;
+ }
+
+ private function checkWord( word:String, info:SpellingInfo ):HashEntry {
+ var i:int;
+ var he:HashEntry = null;
+ if ( attributeMgr.ignoredChars ) {
+ word = StringUtils.removeIgnoredChars(word, attributeMgr.ignoredChars);
+ }
+ // word reversing wrapper for complex prefixes
+ /*
+ if(complexprefixes) {
+ word=reverseword(word);
+ }
+ */
+
+ // look word in hash table
+ for ( i=0; i < dictMgr.dictonaryList.length && !he; ++i ) {
+ he = dictMgr.dictonaryList[i].getElement(word);
+ // check forbidden and onlyincompound words
+ if ( he && (he.affixFlagVector != null) &&
+ ((attributeMgr) && ( he.testAffix(attributeMgr.forbiddenWord)))
+
+ ) {
+ // ToDo: LANG_hu section: set dash information for suggestions
+ return null;
+ }
+ // ToDo: he = next not needaffix, onlyincompound homonym or onlyupcase word
+/* while (he && (he.affixFlagVector) &&
+ ((attributeMgr.needAffix && testAffix(he.affixFlagVector, attributeMgr.needAffix)) ||
+ (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
+ (info && (*info & SPELL_INITCAP) && TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))
+ )) //he = he.next; should maintain a next homonym which is not being maintained as of now next_homonym;
+*/ }
+
+ // check with affixes
+ if ( !he && attributeMgr ) {
+ he = attributeMgr.affixCheck2(word,0,0);
+ //DO not allow affixed forms of forbidden words
+ if ( he && (he.affixFlagVector != null) && (attributeMgr) && he.testAffix(attributeMgr.forbiddenWord) ) {
+ // ToDo: LANG_hu section: set dash information for suggestions
+ return null;
+ }
+ }
+
+ return he;
+ }
+
+
+ }
+}
\ No newline at end of file