You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@devicemap.apache.org by vy...@apache.org on 2015/07/20 21:48:58 UTC

svn commit: r1692010 - in /devicemap/trunk/clients/1.0/java/src: main/java/org/apache/devicemap/DeviceMapClient.java test/resources/uas.data

Author: vy
Date: Mon Jul 20 19:48:57 2015
New Revision: 1692010

URL: http://svn.apache.org/r1692010
Log:
DMAP-107 Performance optimizations for DeviceMapClient.classify().

Modified:
    devicemap/trunk/clients/1.0/java/src/main/java/org/apache/devicemap/DeviceMapClient.java
    devicemap/trunk/clients/1.0/java/src/test/resources/uas.data

Modified: devicemap/trunk/clients/1.0/java/src/main/java/org/apache/devicemap/DeviceMapClient.java
URL: http://svn.apache.org/viewvc/devicemap/trunk/clients/1.0/java/src/main/java/org/apache/devicemap/DeviceMapClient.java?rev=1692010&r1=1692009&r2=1692010&view=diff
==============================================================================
--- devicemap/trunk/clients/1.0/java/src/main/java/org/apache/devicemap/DeviceMapClient.java (original)
+++ devicemap/trunk/clients/1.0/java/src/main/java/org/apache/devicemap/DeviceMapClient.java Mon Jul 20 19:48:57 2015
@@ -19,11 +19,7 @@
 package org.apache.devicemap;
 
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
@@ -36,6 +32,7 @@ import org.apache.devicemap.loader.Loade
 public class DeviceMapClient {
 
     private final static Logger LOG = Logger.getLogger(DeviceMapClient.class.getName());
+    private final static java.util.regex.Pattern TEXT_SPLIT_PATTERN = java.util.regex.Pattern.compile(" |-|_|/|\\\\|\\[|\\]|\\(|\\)|;");
 
     private static long initCount = 0;
 
@@ -103,33 +100,28 @@ public class DeviceMapClient {
             throw new RuntimeException("Uninitialized device index");
         }
 
-        Map<String, List<DeviceType>> hits = new HashMap<String, List<DeviceType>>(100);
-        DeviceType winner = null;
-        Pattern winnerPattern = null;
-
         if (text == null) {
             return null;
         }
 
+        Set<String> hitPatterns = new HashSet<String>();
+        Set<DeviceType> hitDevices = new HashSet<DeviceType>();
+        DeviceType winner = null;
+        Pattern winnerPattern = null;
+
         LOG.log(Level.FINE, "classify: ''{0}''", text);
 
-        String[] parts = text.split(" |-|_|/|\\\\|\\[|\\]|\\(|\\)|;");
+        List<String> parts = split(text);
 
         //generate ngrams upto size 4
-        for (int i = 0; i < parts.length; i++) {
+        for (int i = 0; i < parts.size(); i++) {
             String pattern = "";
-            for (int j = 0; j < 4 && (j + i) < parts.length; j++) {
-                if (parts[i + j].isEmpty()) {
-                    continue;
-                }
-
-                pattern += Pattern.normalize(parts[i + j]);
-
+            for (int j = 0; j < 4 && (j + i) < parts.size(); j++) {
+                pattern += parts.get(i + j);
                 List<DeviceType> dlist = patterns.get(pattern);
-
                 if (dlist != null) {
-                    hits.put(pattern, dlist);
-
+                    hitPatterns.add(pattern);
+                    hitDevices.addAll(dlist);
                     for (DeviceType device : dlist) {
                         LOG.log(Level.FINER, "Hit found: ''{0}'' => id: ''{1}'' {2}", new Object[]{pattern, device.getId(), device.getPatternSet()});
                     }
@@ -138,19 +130,17 @@ public class DeviceMapClient {
         }
 
         //look for the strongest hit
-        for (String hit : hits.keySet()) {
-            for (DeviceType device : hits.get(hit)) {
-                Pattern pattern = device.getPatternSet().isValid(hits.keySet());
-                if (pattern == null) {
-                    continue;
-                }
+        for (DeviceType device : hitDevices) {
+            Pattern pattern = device.getPatternSet().isValid(hitPatterns);
+            if (pattern == null) {
+                continue;
+            }
 
-                LOG.log(Level.FINER, "Hit candidate: ''{0}'' => ''{1}'' ({2},{3})", new Object[]{hit, device.getId(), pattern.getType(), pattern.getRank()});
+            LOG.log(Level.FINER, "Hit candidate: ''{0}'' => ({1},{2})", new Object[]{device.getId(), pattern.getType(), pattern.getRank()});
 
-                if (winnerPattern == null || pattern.getRank() > winnerPattern.getRank()) {
-                    winner = device;
-                    winnerPattern = pattern;
-                }
+            if (winnerPattern == null || pattern.getRank() > winnerPattern.getRank()) {
+                winner = device;
+                winnerPattern = pattern;
             }
         }
 
@@ -162,6 +152,17 @@ public class DeviceMapClient {
         }
     }
 
+    private static List<String> split(String text) {
+        String[] parts = TEXT_SPLIT_PATTERN.split(text);
+        List<String> nonemptyParts = new ArrayList<String>();
+        for (String part : parts) {
+            String normalizedPart = Pattern.normalize(part);
+            if (normalizedPart != null && !normalizedPart.isEmpty())
+                nonemptyParts.add(normalizedPart);
+        }
+        return nonemptyParts;
+    }
+
     public Device classifyDevice(String text) {
         Map<String, String> m = classify(text);
         if (m == null) {

Modified: devicemap/trunk/clients/1.0/java/src/test/resources/uas.data
URL: http://svn.apache.org/viewvc/devicemap/trunk/clients/1.0/java/src/test/resources/uas.data?rev=1692010&r1=1692009&r2=1692010&view=diff
==============================================================================
--- devicemap/trunk/clients/1.0/java/src/test/resources/uas.data (original)
+++ devicemap/trunk/clients/1.0/java/src/test/resources/uas.data Mon Jul 20 19:48:57 2015
@@ -992,7 +992,6 @@ Mozilla/5.0 (Linux; U; Android 4.0.3; en
 Mozilla/5.0 (Linux; U; Android 4.0.3; en-us; ADR6425LVW 4G Build/IML74K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30||ADR6425LVW
 Mozilla/5.0 (Linux; U; Android 4.0.3; en-us; GT-N7000 Build/IML74K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30||GT-N7000
 Mozilla/5.0 (Linux; U; Android 4.0.3; en-us; GT-P5110 Build/IML74K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Safari/534.30||GT-P5110
-Mozilla/5.0 (Linux; U; Android 4.0.3; en-us; HTC One X Build/IML74K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30||HTC One X+
 Mozilla/5.0 (Linux; U; Android 4.0.3; en-us; KFOT Build/IML74K) AppleWebKit/535.19 (KHTML, like Gecko) Silk/2.4 Mobile Safari/535.19 Silk-Accelerated=true||genericAndroid
 Mozilla/5.0 (Linux; U; Android 4.0.3; en-us; SGH-T959 Build/IML74K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30||SGH-T959
 Mozilla/5.0 (Linux; U; Android 4.0.3; en-us; SGH-T989 Build/IML74K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30||SGH-T989
@@ -1002,7 +1001,6 @@ Mozilla/5.0 (Linux; U; Android 4.0.3; sl
 Mozilla/5.0 (Linux; U; Android 4.0.3; zh-tw; Sony Tablet S Build/TISU0R0110) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Safari/534.30||Sony Tablet S
 Mozilla/5.0 (Linux; U; Android 4.0.4; cs-cz; GT-P7510 Build/IMM76D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Safari/534.30||GT-P7510
 Mozilla/5.0 (Linux; U; Android 4.0.4; de-de; GT-P5100 Build/IMM76D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Safari/534.30||GT-P5100
-Mozilla/5.0 (Linux; U; Android 4.0.4; en-ae; HTC_One_X Build/IMM76D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30||HTC One X+
 Mozilla/5.0 (Linux; U; Android 4.0.4; en-gb; GT-N7000 Build/IMM76D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30||GT-N7000
 Mozilla/5.0 (Linux; U; Android 4.0.4; en-gb; GT-P7500 Build/IMM76D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Safari/534.30||GT-P7500
 Mozilla/5.0 (Linux; U; Android 4.0.4; en-gb; NOOK BNTV400 Build/ICS) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Safari/534.30||genericAndroid