You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by si...@apache.org on 2012/05/08 14:18:25 UTC
svn commit: r1335455 -
/incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java
Author: simonetripodi
Date: Tue May 8 12:18:25 2012
New Revision: 1335455
URL: http://svn.apache.org/viewvc?rev=1335455&view=rev
Log:
improved randomly failing normalize() method implementation
Modified:
incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java
Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java?rev=1335455&r1=1335454&r2=1335455&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java Tue May 8 12:18:25 2012
@@ -17,6 +17,8 @@
package org.apache.any23.extractor.csv;
+import static java.lang.Character.toUpperCase;
+
import org.apache.any23.extractor.ExtractionContext;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractionParameters;
@@ -39,6 +41,7 @@ import org.openrdf.model.vocabulary.XMLS
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
+import java.util.StringTokenizer;
/**
* This extractor produces <i>RDF</i> from a <i>CSV file</i> .
@@ -206,15 +209,18 @@ public class CSVExtractor implements Ext
}
private URI normalize(String toBeNormalized, URI documentURI) {
- String candidate = toBeNormalized;
- candidate = candidate.trim().toLowerCase().replace("?", "").replace("&", "");
- String[] tokens = candidate.split(" ");
- candidate = tokens[0];
- for (int i = 1; i < tokens.length; i++) {
- String firstChar = ("" + tokens[i].charAt(0)).toUpperCase();
- candidate += firstChar + tokens[i].substring(1);
+ toBeNormalized = toBeNormalized.trim().toLowerCase().replace("?", "").replace("&", "");
+
+ StringBuilder result = new StringBuilder(documentURI.toString());
+
+ StringTokenizer tokenizer = new StringTokenizer(toBeNormalized, " ");
+ while (tokenizer.hasMoreTokens()) {
+ String current = tokenizer.nextToken();
+
+ result.append(toUpperCase(current.charAt(0))).append(current.substring(1));
}
- return new URIImpl(documentURI.toString() + candidate);
+
+ return new URIImpl(result.toString());
}
/**