You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by gg...@apache.org on 2013/11/07 20:54:07 UTC
svn commit: r1539783 - in /commons/proper/codec/trunk/src:
main/java/org/apache/commons/codec/language/bm/
test/java/org/apache/commons/codec/language/bm/
Author: ggregory
Date: Thu Nov 7 19:54:07 2013
New Revision: 1539783
URL: http://svn.apache.org/r1539783
Log:
[CODEC-174] Apply patch https://issues.apache.org/jira/secure/attachment/12611963/CODEC-174-change-rules-storage-to-Map.patch. Thanks to Thomas Champagne.
Modified:
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/bm/Rule.java
commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/bm/PhoneticEnginePerformanceTest.java
Modified: commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java?rev=1539783&r1=1539782&r2=1539783&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java (original)
+++ commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java Thu Nov 7 19:54:07 2013
@@ -164,7 +164,7 @@ public class PhoneticEngine {
* @since 1.6
*/
private static final class RulesApplication {
- private final List<Rule> finalRules;
+ private final Map<String, List<Rule>> finalRules;
private final CharSequence input;
private PhonemeBuilder phonemeBuilder;
@@ -172,7 +172,7 @@ public class PhoneticEngine {
private final int maxPhonemes;
private boolean found;
- public RulesApplication(final List<Rule> finalRules, final CharSequence input,
+ public RulesApplication(final Map<String, List<Rule>> finalRules, final CharSequence input,
final PhonemeBuilder phonemeBuilder, final int i, final int maxPhonemes) {
if (finalRules == null) {
throw new NullPointerException("The finalRules argument must not be null");
@@ -201,18 +201,18 @@ public class PhoneticEngine {
*/
public RulesApplication invoke() {
this.found = false;
- int patternLength = 0;
- for (final Rule rule : this.finalRules) {
- final String pattern = rule.getPattern();
- patternLength = pattern.length();
-
- if (!rule.patternAndContextMatches(this.input, this.i)) {
- continue;
- }
-
- this.phonemeBuilder = this.phonemeBuilder.apply(rule.getPhoneme(), maxPhonemes);
- this.found = true;
- break;
+ int patternLength = 1;
+ List<Rule> rules = this.finalRules.get(input.subSequence(i, i+patternLength));
+ if (rules != null) {
+ for (Rule rule : rules) {
+ final String pattern = rule.getPattern();
+ patternLength = pattern.length();
+ if (rule.patternAndContextMatches(this.input, this.i)) {
+ this.phonemeBuilder = this.phonemeBuilder.apply(rule.getPhoneme(), maxPhonemes);
+ this.found = true;
+ break;
+ }
+ }
}
if (!this.found) {
@@ -358,7 +358,7 @@ public class PhoneticEngine {
* @param finalRules the final rules to apply
* @return the resulting phonemes
*/
- private PhonemeBuilder applyFinalRules(final PhonemeBuilder phonemeBuilder, final List<Rule> finalRules) {
+ private PhonemeBuilder applyFinalRules(final PhonemeBuilder phonemeBuilder, final Map<String, List<Rule>> finalRules) {
if (finalRules == null) {
throw new NullPointerException("finalRules can not be null");
}
@@ -414,11 +414,11 @@ public class PhoneticEngine {
* of the input
*/
public String encode(String input, final Languages.LanguageSet languageSet) {
- final List<Rule> rules = Rule.getInstance(this.nameType, RuleType.RULES, languageSet);
+ final Map<String, List<Rule>> rules = Rule.getInstance(this.nameType, RuleType.RULES, languageSet);
// rules common across many (all) languages
- final List<Rule> finalRules1 = Rule.getInstance(this.nameType, this.ruleType, "common");
+ final Map<String, List<Rule>> finalRules1 = Rule.getInstance(this.nameType, this.ruleType, "common");
// rules that apply to a specific language that may be ambiguous or wrong if applied to other languages
- final List<Rule> finalRules2 = Rule.getInstance(this.nameType, this.ruleType, languageSet);
+ final Map<String, List<Rule>> finalRules2 = Rule.getInstance(this.nameType, this.ruleType, languageSet);
// tidy the input
// lower case is a locale-dependent operation
Modified: commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/bm/Rule.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/bm/Rule.java?rev=1539783&r1=1539782&r2=1539783&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/bm/Rule.java (original)
+++ commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/bm/Rule.java Thu Nov 7 19:54:07 2013
@@ -169,16 +169,16 @@ public class Rule {
private static final String HASH_INCLUDE = "#include";
- private static final Map<NameType, Map<RuleType, Map<String, List<Rule>>>> RULES =
- new EnumMap<NameType, Map<RuleType, Map<String, List<Rule>>>>(NameType.class);
+ private static final Map<NameType, Map<RuleType, Map<String, Map<String, List<Rule>>>>> RULES =
+ new EnumMap<NameType, Map<RuleType, Map<String, Map<String, List<Rule>>>>>(NameType.class);
static {
for (final NameType s : NameType.values()) {
- final Map<RuleType, Map<String, List<Rule>>> rts =
- new EnumMap<RuleType, Map<String, List<Rule>>>(RuleType.class);
+ final Map<RuleType, Map<String, Map<String, List<Rule>>>> rts =
+ new EnumMap<RuleType, Map<String, Map<String, List<Rule>>>>(RuleType.class);
for (final RuleType rt : RuleType.values()) {
- final Map<String, List<Rule>> rs = new HashMap<String, List<Rule>>();
+ final Map<String, Map<String, List<Rule>>> rs = new HashMap<String, Map<String, List<Rule>>>();
final Languages ls = Languages.getInstance(s);
for (final String l : ls.getLanguages()) {
@@ -258,7 +258,7 @@ public class Rule {
* the set of languages to consider
* @return a list of Rules that apply
*/
- public static List<Rule> getInstance(final NameType nameType, final RuleType rt,
+ public static Map<String, List<Rule>> getInstance(final NameType nameType, final RuleType rt,
final Languages.LanguageSet langs) {
return langs.isSingleton() ? getInstance(nameType, rt, langs.getAny()) :
getInstance(nameType, rt, Languages.ANY);
@@ -275,8 +275,8 @@ public class Rule {
* the language to consider
* @return a list rules for a combination of name type, rule type and a single language.
*/
- public static List<Rule> getInstance(final NameType nameType, final RuleType rt, final String lang) {
- final List<Rule> rules = RULES.get(nameType).get(rt).get(lang);
+ public static Map<String, List<Rule>> getInstance(final NameType nameType, final RuleType rt, final String lang) {
+ final Map<String, List<Rule>> rules = RULES.get(nameType).get(rt).get(lang);
if (rules == null) {
throw new IllegalArgumentException(String.format("No rules found for %s, %s, %s.",
@@ -323,8 +323,8 @@ public class Rule {
}
}
- private static List<Rule> parseRules(final Scanner scanner, final String location) {
- final List<Rule> lines = new ArrayList<Rule>();
+ private static Map<String, List<Rule>> parseRules(final Scanner scanner, final String location) {
+ final Map<String, List<Rule>> lines = new HashMap<String, List<Rule>>();
int currentLine = 0;
boolean inMultilineComment = false;
@@ -361,7 +361,7 @@ public class Rule {
throw new IllegalArgumentException("Malformed import statement '" + rawLine + "' in " +
location);
} else {
- lines.addAll(parseRules(createScanner(incl), location + "->" + incl));
+ lines.putAll(parseRules(createScanner(incl), location + "->" + incl));
}
} else {
// rule
@@ -390,7 +390,13 @@ public class Rule {
return sb.toString();
}
};
- lines.add(r);
+ String patternKey = r.pattern.substring(0,1);
+ List<Rule> rules = lines.get(patternKey);
+ if (rules == null) {
+ rules = new ArrayList<Rule>();
+ lines.put(patternKey, rules);
+ }
+ rules.add(r);
} catch (final IllegalArgumentException e) {
throw new IllegalStateException("Problem parsing line '" + currentLine + "' in " +
location, e);
Modified: commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/bm/PhoneticEnginePerformanceTest.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/bm/PhoneticEnginePerformanceTest.java?rev=1539783&r1=1539782&r2=1539783&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/bm/PhoneticEnginePerformanceTest.java (original)
+++ commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/bm/PhoneticEnginePerformanceTest.java Thu Nov 7 19:54:07 2013
@@ -21,19 +21,33 @@ import org.junit.Test;
/**
* Tests performance for {@link PhoneticEngine}.
* <p>
- * See <a href="https://issues.apache.org/jira/browse/CODEC-174">[CODEC-174] Improve performance of Beider Morse encoder</a>.
+ * See <a href="https://issues.apache.org/jira/browse/CODEC-174">[CODEC-174] Improve performance of Beider Morse
+ * encoder</a>.
* </p>
* <p>
- * Results for November 7, 2013, SVN revision 1539678.
+ * Results for November 7, 2013, project SVN revision 1539678.
* </p>
* <ol>
* <li>Time for encoding 80,000 times the input 'Angelo': 33,039 millis.</li>
* <li>Time for encoding 80,000 times the input 'Angelo': 32,297 millis.</li>
* <li>Time for encoding 80,000 times the input 'Angelo': 32,857 millis.</li>
- * <li>Time for encoding 80,000 times the input 'Angelo': 31,561 millis.</li>
+ * <li>Time for encoding 80,000 times the input 'Angelo': <b>31,561 millis.</b></li>
* <li>Time for encoding 80,000 times the input 'Angelo': 32,665 millis.</li>
* <li>Time for encoding 80,000 times the input 'Angelo': 32,215 millis.</li>
* </ol>
+ * <p>
+ * On this file's revision 1539678, with patch <a
+ * href="https://issues.apache.org/jira/secure/attachment/12611963/CODEC-174-change-rules-storage-to-Map.patch"
+ * >CODEC-174-change-rules-storage-to-Map</a>:
+ * </p>
+ * <ol>
+ * <li>Time for encoding 80,000 times the input 'Angelo': 18,196 millis.</li>
+ * <li>Time for encoding 80,000 times the input 'Angelo': 13,858 millis.</li>
+ * <li>Time for encoding 80,000 times the input 'Angelo': 13,644 millis.</li>
+ * <li>Time for encoding 80,000 times the input 'Angelo': <b>13,591 millis.</b></li>
+ * <li>Time for encoding 80,000 times the input 'Angelo': 13,861 millis.</li>
+ * <li>Time for encoding 80,000 times the input 'Angelo': 13,696 millis.</li>
+ * </ol>
*/
public class PhoneticEnginePerformanceTest {