You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by gg...@apache.org on 2013/11/07 20:54:07 UTC

svn commit: r1539783 - in /commons/proper/codec/trunk/src: main/java/org/apache/commons/codec/language/bm/ test/java/org/apache/commons/codec/language/bm/

Author: ggregory
Date: Thu Nov  7 19:54:07 2013
New Revision: 1539783

URL: http://svn.apache.org/r1539783
Log:
[CODEC-174] Apply patch https://issues.apache.org/jira/secure/attachment/12611963/CODEC-174-change-rules-storage-to-Map.patch. Thanks to Thomas Champagne.

Modified:
    commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java
    commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/bm/Rule.java
    commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/bm/PhoneticEnginePerformanceTest.java

Modified: commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java?rev=1539783&r1=1539782&r2=1539783&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java (original)
+++ commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java Thu Nov  7 19:54:07 2013
@@ -164,7 +164,7 @@ public class PhoneticEngine {
      * @since 1.6
      */
     private static final class RulesApplication {
-        private final List<Rule> finalRules;
+        private final Map<String, List<Rule>> finalRules;
         private final CharSequence input;
 
         private PhonemeBuilder phonemeBuilder;
@@ -172,7 +172,7 @@ public class PhoneticEngine {
         private final int maxPhonemes;
         private boolean found;
 
-        public RulesApplication(final List<Rule> finalRules, final CharSequence input,
+        public RulesApplication(final Map<String, List<Rule>> finalRules, final CharSequence input,
                                 final PhonemeBuilder phonemeBuilder, final int i, final int maxPhonemes) {
             if (finalRules == null) {
                 throw new NullPointerException("The finalRules argument must not be null");
@@ -201,18 +201,18 @@ public class PhoneticEngine {
          */
         public RulesApplication invoke() {
             this.found = false;
-            int patternLength = 0;
-            for (final Rule rule : this.finalRules) {
-                final String pattern = rule.getPattern();
-                patternLength = pattern.length();
-
-                if (!rule.patternAndContextMatches(this.input, this.i)) {
-                    continue;
-                }
-
-                this.phonemeBuilder = this.phonemeBuilder.apply(rule.getPhoneme(), maxPhonemes);
-                this.found = true;
-                break;
+            int patternLength = 1;
+            List<Rule> rules = this.finalRules.get(input.subSequence(i, i+patternLength));
+            if (rules != null) {
+            	for (Rule rule : rules) {
+	            	final String pattern = rule.getPattern();
+	                patternLength = pattern.length();
+	            	if (rule.patternAndContextMatches(this.input, this.i)) {
+		                this.phonemeBuilder = this.phonemeBuilder.apply(rule.getPhoneme(), maxPhonemes);
+		                this.found = true;
+		                break;
+	            	}
+            	}
             }
 
             if (!this.found) {
@@ -358,7 +358,7 @@ public class PhoneticEngine {
      * @param finalRules the final rules to apply
      * @return the resulting phonemes
      */
-    private PhonemeBuilder applyFinalRules(final PhonemeBuilder phonemeBuilder, final List<Rule> finalRules) {
+    private PhonemeBuilder applyFinalRules(final PhonemeBuilder phonemeBuilder, final Map<String, List<Rule>> finalRules) {
         if (finalRules == null) {
             throw new NullPointerException("finalRules can not be null");
         }
@@ -414,11 +414,11 @@ public class PhoneticEngine {
      *   of the input
      */
     public String encode(String input, final Languages.LanguageSet languageSet) {
-        final List<Rule> rules = Rule.getInstance(this.nameType, RuleType.RULES, languageSet);
+        final Map<String, List<Rule>> rules = Rule.getInstance(this.nameType, RuleType.RULES, languageSet);
         // rules common across many (all) languages
-        final List<Rule> finalRules1 = Rule.getInstance(this.nameType, this.ruleType, "common");
+        final Map<String, List<Rule>> finalRules1 = Rule.getInstance(this.nameType, this.ruleType, "common");
         // rules that apply to a specific language that may be ambiguous or wrong if applied to other languages
-        final List<Rule> finalRules2 = Rule.getInstance(this.nameType, this.ruleType, languageSet);
+        final Map<String, List<Rule>> finalRules2 = Rule.getInstance(this.nameType, this.ruleType, languageSet);
 
         // tidy the input
         // lower case is a locale-dependent operation

Modified: commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/bm/Rule.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/bm/Rule.java?rev=1539783&r1=1539782&r2=1539783&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/bm/Rule.java (original)
+++ commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/bm/Rule.java Thu Nov  7 19:54:07 2013
@@ -169,16 +169,16 @@ public class Rule {
 
     private static final String HASH_INCLUDE = "#include";
 
-    private static final Map<NameType, Map<RuleType, Map<String, List<Rule>>>> RULES =
-            new EnumMap<NameType, Map<RuleType, Map<String, List<Rule>>>>(NameType.class);
+    private static final Map<NameType, Map<RuleType, Map<String, Map<String, List<Rule>>>>> RULES =
+            new EnumMap<NameType, Map<RuleType, Map<String, Map<String, List<Rule>>>>>(NameType.class);
 
     static {
         for (final NameType s : NameType.values()) {
-            final Map<RuleType, Map<String, List<Rule>>> rts =
-                    new EnumMap<RuleType, Map<String, List<Rule>>>(RuleType.class);
+            final Map<RuleType, Map<String, Map<String, List<Rule>>>> rts =
+                    new EnumMap<RuleType, Map<String, Map<String, List<Rule>>>>(RuleType.class);
 
             for (final RuleType rt : RuleType.values()) {
-                final Map<String, List<Rule>> rs = new HashMap<String, List<Rule>>();
+                final Map<String, Map<String, List<Rule>>> rs = new HashMap<String, Map<String, List<Rule>>>();
 
                 final Languages ls = Languages.getInstance(s);
                 for (final String l : ls.getLanguages()) {
@@ -258,7 +258,7 @@ public class Rule {
      *            the set of languages to consider
      * @return a list of Rules that apply
      */
-    public static List<Rule> getInstance(final NameType nameType, final RuleType rt,
+    public static Map<String, List<Rule>> getInstance(final NameType nameType, final RuleType rt,
                                          final Languages.LanguageSet langs) {
         return langs.isSingleton() ? getInstance(nameType, rt, langs.getAny()) :
                                      getInstance(nameType, rt, Languages.ANY);
@@ -275,8 +275,8 @@ public class Rule {
      *            the language to consider
      * @return a list rules for a combination of name type, rule type and a single language.
      */
-    public static List<Rule> getInstance(final NameType nameType, final RuleType rt, final String lang) {
-        final List<Rule> rules = RULES.get(nameType).get(rt).get(lang);
+    public static Map<String, List<Rule>> getInstance(final NameType nameType, final RuleType rt, final String lang) {
+        final Map<String, List<Rule>> rules = RULES.get(nameType).get(rt).get(lang);
 
         if (rules == null) {
             throw new IllegalArgumentException(String.format("No rules found for %s, %s, %s.",
@@ -323,8 +323,8 @@ public class Rule {
         }
     }
 
-    private static List<Rule> parseRules(final Scanner scanner, final String location) {
-        final List<Rule> lines = new ArrayList<Rule>();
+    private static Map<String, List<Rule>> parseRules(final Scanner scanner, final String location) {
+        final Map<String, List<Rule>> lines = new HashMap<String, List<Rule>>();
         int currentLine = 0;
 
         boolean inMultilineComment = false;
@@ -361,7 +361,7 @@ public class Rule {
                             throw new IllegalArgumentException("Malformed import statement '" + rawLine + "' in " +
                                                                location);
                         } else {
-                            lines.addAll(parseRules(createScanner(incl), location + "->" + incl));
+                            lines.putAll(parseRules(createScanner(incl), location + "->" + incl));
                         }
                     } else {
                         // rule
@@ -390,7 +390,13 @@ public class Rule {
                                         return sb.toString();
                                     }
                                 };
-                                lines.add(r);
+                                String patternKey = r.pattern.substring(0,1);
+                                List<Rule> rules = lines.get(patternKey);
+                                if (rules == null) {
+                                	rules = new ArrayList<Rule>();
+                                	lines.put(patternKey, rules);
+                                }
+                                rules.add(r);
                             } catch (final IllegalArgumentException e) {
                                 throw new IllegalStateException("Problem parsing line '" + currentLine + "' in " +
                                                                 location, e);

Modified: commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/bm/PhoneticEnginePerformanceTest.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/bm/PhoneticEnginePerformanceTest.java?rev=1539783&r1=1539782&r2=1539783&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/bm/PhoneticEnginePerformanceTest.java (original)
+++ commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/bm/PhoneticEnginePerformanceTest.java Thu Nov  7 19:54:07 2013
@@ -21,19 +21,33 @@ import org.junit.Test;
 /**
  * Tests performance for {@link PhoneticEngine}.
  * <p>
- * See <a href="https://issues.apache.org/jira/browse/CODEC-174">[CODEC-174] Improve performance of Beider Morse encoder</a>.
+ * See <a href="https://issues.apache.org/jira/browse/CODEC-174">[CODEC-174] Improve performance of Beider Morse
+ * encoder</a>.
  * </p>
  * <p>
- * Results for November 7, 2013, SVN revision 1539678.
+ * Results for November 7, 2013, project SVN revision 1539678.
  * </p>
  * <ol>
  * <li>Time for encoding 80,000 times the input 'Angelo': 33,039 millis.</li>
  * <li>Time for encoding 80,000 times the input 'Angelo': 32,297 millis.</li>
  * <li>Time for encoding 80,000 times the input 'Angelo': 32,857 millis.</li>
- * <li>Time for encoding 80,000 times the input 'Angelo': 31,561 millis.</li>
+ * <li>Time for encoding 80,000 times the input 'Angelo': <b>31,561 millis.</b></li>
  * <li>Time for encoding 80,000 times the input 'Angelo': 32,665 millis.</li>
  * <li>Time for encoding 80,000 times the input 'Angelo': 32,215 millis.</li>
  * </ol>
+ * <p>
+ * On this file's revision 1539678, with patch <a
+ * href="https://issues.apache.org/jira/secure/attachment/12611963/CODEC-174-change-rules-storage-to-Map.patch"
+ * >CODEC-174-change-rules-storage-to-Map</a>:
+ * </p>
+ * <ol>
+ * <li>Time for encoding 80,000 times the input 'Angelo': 18,196 millis.</li>
+ * <li>Time for encoding 80,000 times the input 'Angelo': 13,858 millis.</li>
+ * <li>Time for encoding 80,000 times the input 'Angelo': 13,644 millis.</li>
+ * <li>Time for encoding 80,000 times the input 'Angelo': <b>13,591 millis.</b></li>
+ * <li>Time for encoding 80,000 times the input 'Angelo': 13,861 millis.</li>
+ * <li>Time for encoding 80,000 times the input 'Angelo': 13,696 millis.</li>
+ * </ol>
  */
 public class PhoneticEnginePerformanceTest {