You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@devicemap.apache.org by vy...@apache.org on 2015/07/20 21:48:58 UTC
svn commit: r1692010 - in /devicemap/trunk/clients/1.0/java/src:
main/java/org/apache/devicemap/DeviceMapClient.java test/resources/uas.data
Author: vy
Date: Mon Jul 20 19:48:57 2015
New Revision: 1692010
URL: http://svn.apache.org/r1692010
Log:
DMAP-107 Performance optimizations for DeviceMapClient.classify().
Modified:
devicemap/trunk/clients/1.0/java/src/main/java/org/apache/devicemap/DeviceMapClient.java
devicemap/trunk/clients/1.0/java/src/test/resources/uas.data
Modified: devicemap/trunk/clients/1.0/java/src/main/java/org/apache/devicemap/DeviceMapClient.java
URL: http://svn.apache.org/viewvc/devicemap/trunk/clients/1.0/java/src/main/java/org/apache/devicemap/DeviceMapClient.java?rev=1692010&r1=1692009&r2=1692010&view=diff
==============================================================================
--- devicemap/trunk/clients/1.0/java/src/main/java/org/apache/devicemap/DeviceMapClient.java (original)
+++ devicemap/trunk/clients/1.0/java/src/main/java/org/apache/devicemap/DeviceMapClient.java Mon Jul 20 19:48:57 2015
@@ -19,11 +19,7 @@
package org.apache.devicemap;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -36,6 +32,7 @@ import org.apache.devicemap.loader.Loade
public class DeviceMapClient {
private final static Logger LOG = Logger.getLogger(DeviceMapClient.class.getName());
+ private final static java.util.regex.Pattern TEXT_SPLIT_PATTERN = java.util.regex.Pattern.compile(" |-|_|/|\\\\|\\[|\\]|\\(|\\)|;");
private static long initCount = 0;
@@ -103,33 +100,28 @@ public class DeviceMapClient {
throw new RuntimeException("Uninitialized device index");
}
- Map<String, List<DeviceType>> hits = new HashMap<String, List<DeviceType>>(100);
- DeviceType winner = null;
- Pattern winnerPattern = null;
-
if (text == null) {
return null;
}
+ Set<String> hitPatterns = new HashSet<String>();
+ Set<DeviceType> hitDevices = new HashSet<DeviceType>();
+ DeviceType winner = null;
+ Pattern winnerPattern = null;
+
LOG.log(Level.FINE, "classify: ''{0}''", text);
- String[] parts = text.split(" |-|_|/|\\\\|\\[|\\]|\\(|\\)|;");
+ List<String> parts = split(text);
//generate ngrams upto size 4
- for (int i = 0; i < parts.length; i++) {
+ for (int i = 0; i < parts.size(); i++) {
String pattern = "";
- for (int j = 0; j < 4 && (j + i) < parts.length; j++) {
- if (parts[i + j].isEmpty()) {
- continue;
- }
-
- pattern += Pattern.normalize(parts[i + j]);
-
+ for (int j = 0; j < 4 && (j + i) < parts.size(); j++) {
+ pattern += parts.get(i + j);
List<DeviceType> dlist = patterns.get(pattern);
-
if (dlist != null) {
- hits.put(pattern, dlist);
-
+ hitPatterns.add(pattern);
+ hitDevices.addAll(dlist);
for (DeviceType device : dlist) {
LOG.log(Level.FINER, "Hit found: ''{0}'' => id: ''{1}'' {2}", new Object[]{pattern, device.getId(), device.getPatternSet()});
}
@@ -138,19 +130,17 @@ public class DeviceMapClient {
}
//look for the strongest hit
- for (String hit : hits.keySet()) {
- for (DeviceType device : hits.get(hit)) {
- Pattern pattern = device.getPatternSet().isValid(hits.keySet());
- if (pattern == null) {
- continue;
- }
+ for (DeviceType device : hitDevices) {
+ Pattern pattern = device.getPatternSet().isValid(hitPatterns);
+ if (pattern == null) {
+ continue;
+ }
- LOG.log(Level.FINER, "Hit candidate: ''{0}'' => ''{1}'' ({2},{3})", new Object[]{hit, device.getId(), pattern.getType(), pattern.getRank()});
+ LOG.log(Level.FINER, "Hit candidate: ''{0}'' => ({1},{2})", new Object[]{device.getId(), pattern.getType(), pattern.getRank()});
- if (winnerPattern == null || pattern.getRank() > winnerPattern.getRank()) {
- winner = device;
- winnerPattern = pattern;
- }
+ if (winnerPattern == null || pattern.getRank() > winnerPattern.getRank()) {
+ winner = device;
+ winnerPattern = pattern;
}
}
@@ -162,6 +152,17 @@ public class DeviceMapClient {
}
}
+ private static List<String> split(String text) {
+ String[] parts = TEXT_SPLIT_PATTERN.split(text);
+ List<String> nonemptyParts = new ArrayList<String>();
+ for (String part : parts) {
+ String normalizedPart = Pattern.normalize(part);
+ if (normalizedPart != null && !normalizedPart.isEmpty())
+ nonemptyParts.add(normalizedPart);
+ }
+ return nonemptyParts;
+ }
+
public Device classifyDevice(String text) {
Map<String, String> m = classify(text);
if (m == null) {
Modified: devicemap/trunk/clients/1.0/java/src/test/resources/uas.data
URL: http://svn.apache.org/viewvc/devicemap/trunk/clients/1.0/java/src/test/resources/uas.data?rev=1692010&r1=1692009&r2=1692010&view=diff
==============================================================================
--- devicemap/trunk/clients/1.0/java/src/test/resources/uas.data (original)
+++ devicemap/trunk/clients/1.0/java/src/test/resources/uas.data Mon Jul 20 19:48:57 2015
@@ -992,7 +992,6 @@ Mozilla/5.0 (Linux; U; Android 4.0.3; en
Mozilla/5.0 (Linux; U; Android 4.0.3; en-us; ADR6425LVW 4G Build/IML74K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30||ADR6425LVW
Mozilla/5.0 (Linux; U; Android 4.0.3; en-us; GT-N7000 Build/IML74K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30||GT-N7000
Mozilla/5.0 (Linux; U; Android 4.0.3; en-us; GT-P5110 Build/IML74K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Safari/534.30||GT-P5110
-Mozilla/5.0 (Linux; U; Android 4.0.3; en-us; HTC One X Build/IML74K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30||HTC One X+
Mozilla/5.0 (Linux; U; Android 4.0.3; en-us; KFOT Build/IML74K) AppleWebKit/535.19 (KHTML, like Gecko) Silk/2.4 Mobile Safari/535.19 Silk-Accelerated=true||genericAndroid
Mozilla/5.0 (Linux; U; Android 4.0.3; en-us; SGH-T959 Build/IML74K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30||SGH-T959
Mozilla/5.0 (Linux; U; Android 4.0.3; en-us; SGH-T989 Build/IML74K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30||SGH-T989
@@ -1002,7 +1001,6 @@ Mozilla/5.0 (Linux; U; Android 4.0.3; sl
Mozilla/5.0 (Linux; U; Android 4.0.3; zh-tw; Sony Tablet S Build/TISU0R0110) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Safari/534.30||Sony Tablet S
Mozilla/5.0 (Linux; U; Android 4.0.4; cs-cz; GT-P7510 Build/IMM76D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Safari/534.30||GT-P7510
Mozilla/5.0 (Linux; U; Android 4.0.4; de-de; GT-P5100 Build/IMM76D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Safari/534.30||GT-P5100
-Mozilla/5.0 (Linux; U; Android 4.0.4; en-ae; HTC_One_X Build/IMM76D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30||HTC One X+
Mozilla/5.0 (Linux; U; Android 4.0.4; en-gb; GT-N7000 Build/IMM76D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30||GT-N7000
Mozilla/5.0 (Linux; U; Android 4.0.4; en-gb; GT-P7500 Build/IMM76D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Safari/534.30||GT-P7500
Mozilla/5.0 (Linux; U; Android 4.0.4; en-gb; NOOK BNTV400 Build/ICS) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Safari/534.30||genericAndroid