You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by si...@apache.org on 2012/05/08 14:18:25 UTC

svn commit: r1335455 - /incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java

Author: simonetripodi
Date: Tue May  8 12:18:25 2012
New Revision: 1335455

URL: http://svn.apache.org/viewvc?rev=1335455&view=rev
Log:
improved randomly failing normalize() method implementation

Modified:
    incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java

Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java?rev=1335455&r1=1335454&r2=1335455&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java Tue May  8 12:18:25 2012
@@ -17,6 +17,8 @@
 
 package org.apache.any23.extractor.csv;
 
+import static java.lang.Character.toUpperCase;
+
 import org.apache.any23.extractor.ExtractionContext;
 import org.apache.any23.extractor.ExtractionException;
 import org.apache.any23.extractor.ExtractionParameters;
@@ -39,6 +41,7 @@ import org.openrdf.model.vocabulary.XMLS
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.Arrays;
+import java.util.StringTokenizer;
 
 /**
  * This extractor produces <i>RDF</i> from a <i>CSV file</i> .
@@ -206,15 +209,18 @@ public class CSVExtractor implements Ext
     }
 
     private URI normalize(String toBeNormalized, URI documentURI) {
-        String candidate = toBeNormalized;
-        candidate = candidate.trim().toLowerCase().replace("?", "").replace("&", "");
-        String[] tokens = candidate.split(" ");
-        candidate = tokens[0];
-        for (int i = 1; i < tokens.length; i++) {
-            String firstChar = ("" + tokens[i].charAt(0)).toUpperCase();
-            candidate += firstChar + tokens[i].substring(1);
+        toBeNormalized = toBeNormalized.trim().toLowerCase().replace("?", "").replace("&", "");
+
+        StringBuilder result = new StringBuilder(documentURI.toString());
+
+        StringTokenizer tokenizer = new StringTokenizer(toBeNormalized, " ");
+        while (tokenizer.hasMoreTokens()) {
+            String current = tokenizer.nextToken();
+
+            result.append(toUpperCase(current.charAt(0))).append(current.substring(1));
         }
-        return new URIImpl(documentURI.toString() + candidate);
+
+        return new URIImpl(result.toString());
     }
 
     /**