You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@edgent.apache.org by dl...@apache.org on 2016/07/12 18:28:28 UTC

[1/2] incubator-quarks git commit: Quarks-220 help with using csv strings

Repository: incubator-quarks
Updated Branches:
  refs/heads/master 94a853418 -> e2a22b513


Quarks-220 help with using csv strings

Project: http://git-wip-us.apache.org/repos/asf/incubator-quarks/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quarks/commit/2f0ea64d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quarks/tree/2f0ea64d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quarks/diff/2f0ea64d

Branch: refs/heads/master
Commit: 2f0ea64d09323074ff98686287fb7fed248ae5da
Parents: 4356348
Author: Dale LaBossiere <dl...@us.ibm.com>
Authored: Fri Jul 1 12:55:03 2016 -0400
Committer: Dale LaBossiere <dl...@us.ibm.com>
Committed: Fri Jul 1 12:55:03 2016 -0400

----------------------------------------------------------------------
 build.xml                                       |   2 +
 connectors/.classpath                           |   2 +
 connectors/csv/build.gradle                     |  20 +++
 connectors/csv/build.xml                        |  47 +++++
 .../main/java/quarks/connectors/csv/Csv.java    | 176 +++++++++++++++++++
 .../quarks/connectors/csv/package-info.java     |  22 +++
 .../quarks/test/connectors/csv/CsvTest.java     | 129 ++++++++++++++
 7 files changed, 398 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quarks/blob/2f0ea64d/build.xml
----------------------------------------------------------------------
diff --git a/build.xml b/build.xml
index 2b75546..8084878 100644
--- a/build.xml
+++ b/build.xml
@@ -117,6 +117,7 @@
 
         <ant dir="providers/direct" target="@{target}" useNativeBasedir="true"/>
         <ant dir="connectors/common" target="@{target}" useNativeBasedir="true"/>
+        <ant dir="connectors/csv" target="@{target}" useNativeBasedir="true"/>
         <ant dir="connectors/iot" target="@{target}" useNativeBasedir="true"/>
         <ant dir="connectors/serial" target="@{target}" useNativeBasedir="true"/>
         <ant dir="connectors/file" target="@{target}" useNativeBasedir="true"/>
@@ -228,6 +229,7 @@
       </classfiles>
     <sourcefiles encoding="UTF-8">
         <fileset dir="${basedir}/connectors/common/src/main/java" includes="**/*.java"/>
+        <fileset dir="${basedir}/connectors/csv/src/main/java" includes="**/*.java"/>
         <fileset dir="${basedir}/connectors/file/src/main/java" includes="**/*.java"/>
         <fileset dir="${basedir}/connectors/iot/src/main/java" includes="**/*.java"/>
         <fileset dir="${basedir}/connectors/iotf/src/main/java" includes="**/*.java"/>

http://git-wip-us.apache.org/repos/asf/incubator-quarks/blob/2f0ea64d/connectors/.classpath
----------------------------------------------------------------------
diff --git a/connectors/.classpath b/connectors/.classpath
index 851e734..9eeb7d7 100644
--- a/connectors/.classpath
+++ b/connectors/.classpath
@@ -2,6 +2,8 @@
 <classpath>
 	<classpathentry kind="src" path="common/src/main/java"/>
 	<classpathentry kind="src" path="common/src/test/java"/>
+	<classpathentry kind="src" path="csv/src/main/java"/>
+	<classpathentry kind="src" path="csv/src/test/java"/>
 	<classpathentry kind="src" path="file/src/main/java"/>
 	<classpathentry kind="src" path="file/src/test/java"/>
 	<classpathentry kind="src" path="http/src/main/java"/>

http://git-wip-us.apache.org/repos/asf/incubator-quarks/blob/2f0ea64d/connectors/csv/build.gradle
----------------------------------------------------------------------
diff --git a/connectors/csv/build.gradle b/connectors/csv/build.gradle
new file mode 100644
index 0000000..6924ae3
--- /dev/null
+++ b/connectors/csv/build.gradle
@@ -0,0 +1,20 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+dependencies {
+  compile project(':api:topology')
+  compile ext_classpath
+  testCompile project(':providers:direct')
+}
+
+addCompileTestDependencies ':api:topology', ':providers:direct', ':connectors:common'

http://git-wip-us.apache.org/repos/asf/incubator-quarks/blob/2f0ea64d/connectors/csv/build.xml
----------------------------------------------------------------------
diff --git a/connectors/csv/build.xml b/connectors/csv/build.xml
new file mode 100644
index 0000000..7112922
--- /dev/null
+++ b/connectors/csv/build.xml
@@ -0,0 +1,47 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+ -->
+<project name="quarks.connectors.csv" default="all"
+    xmlns:jacoco="antlib:org.jacoco.ant"
+    >
+    <description>
+        Build CSV connectors.
+    </description>
+
+  <property name="component.path" value="connectors/csv"/>
+  <import file="../../common-build.xml"/>
+
+  <path id="compile.classpath">
+    <path refid="quarks.ext.classpath" />
+  </path>
+
+
+  <path id="test.compile.classpath">
+    <pathelement location="${jar}" />
+    <pathelement location="${quarks.lib}/quarks.providers.direct.jar"/>
+    <pathelement location="../../api/topology/test.classes"/>
+    <pathelement location="../../providers/direct/test.classes"/>
+    <pathelement location="../../connectors/common/test.classes"/>
+    <path refid="compile.classpath"/>
+  </path>
+
+  <path id="test.classpath">
+    <pathelement location="${test.classes}" />
+    <path refid="test.compile.classpath"/>
+    <path refid="test.common.classpath" />
+  </path>
+
+</project>

http://git-wip-us.apache.org/repos/asf/incubator-quarks/blob/2f0ea64d/connectors/csv/src/main/java/quarks/connectors/csv/Csv.java
----------------------------------------------------------------------
diff --git a/connectors/csv/src/main/java/quarks/connectors/csv/Csv.java b/connectors/csv/src/main/java/quarks/connectors/csv/Csv.java
new file mode 100644
index 0000000..ed9246f
--- /dev/null
+++ b/connectors/csv/src/main/java/quarks/connectors/csv/Csv.java
@@ -0,0 +1,176 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+*/
+package quarks.connectors.csv;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import com.google.gson.JsonObject;
+
+/**
+ * Utilities for working with CSV strings.
+ * <P>
+ * Basically, per IETF RFC-4180:
+ * <UL>
+ * <LI>Fields are separated by a single character.  The default field
+ * separator is a comma.  A different separator may be specified
+ * (e.g., a colon, tab).</LI>
+ * <LI>All whitespace between field separators is part of the field's parsed value.</LI>
+ * <LI>A field may be quoted using the double-quote character.
+ * A field containing the field separator must be quoted.
+ * The double-quotes surrounding a quoted field are omitted in the
+ * field's parsed value.</LI>
+ * <LI>A double-quote to be included in a field's parsed value must be
+ * represented by a pair of double quotes.</LI>
+ * </UL>
+ * 
+ * Sample use:
+ * <pre>{@code
+ * // Create a stream of "car" JsonObjects from a "cars.csv" file.
+ * String fieldNames = new String[]{"year", "make", "model"});
+ *
+ * TStream<String> pathnames = topology.strings("cars.csv");
+ * TStream<JsonObject> cars = FileStreams.textFileReader(topology, pathnames)
+ *    .map(csv -> toJson(parseCsv(csv), fieldNames);
+ * cars.print(); 
+ * }</pre>
+ */
+public class Csv {
+  private Csv() { }
+
+  /**
+   * Parse a CSV string into its fields using comma for the field separator.
+   * 
+   * @param csv the csv string
+   * @return the fields
+   * @throws IllegalArgumentException if the csv is malformed
+   * 
+   * @see #parseCsv(String, char)
+   */
+  public static List<String> parseCsv(String csv) {
+    return parseCsv(csv, ',');
+  }
+  
+  /**
+   * Parse a CSV string into its fields using the specified field separator.
+   * 
+   * @param csv the csv string
+   * @param separator the separator to use
+   * @return the fields
+   * @throws IllegalArgumentException if the csv is malformed
+   * 
+   * @see #parseCsv(String)
+   */
+  public static List<String> parseCsv(String csv, char separator) {
+    final char QC = '"';
+    List<String> list = new ArrayList<>();
+    StringBuilder field = new StringBuilder();
+    
+    boolean inQuote = false;
+    for (int i=0; i<csv.length(); i++) {
+      char c = csv.charAt(i);
+      if (c == QC) {
+        if (i+1 < csv.length() && csv.charAt(i+1) == QC) {
+          // a quoted quote yields a quote. no affect on inQuote status.
+          i++;
+          field.append(QC);
+          continue;
+        }
+        else {
+          inQuote = !inQuote;  // either in or out now
+          
+          // if now IN, it must be the start of a field
+          if (inQuote) {
+            // because of quoted quote handling the field can have just quotes
+            for (int j = 0; j < field.length(); j++) {
+              if (field.charAt(j) != QC)
+                break; // trigger the malformed check below
+            }
+          }
+          else {
+            // if now OUT, it must be the end of a field
+            if (!inQuote) {
+              if (i+1 == csv.length() || csv.charAt(i+1) == separator)
+                ; // ok
+              else {
+                inQuote = true; // a lie but trigger the malformed check below
+                break;
+              }
+            }
+          }
+        }
+      }
+      else if (c == separator) {
+        if (inQuote) {
+          field.append(c);
+        } else {
+          list.add(field.toString());
+          field.setLength(0);
+        }
+      }
+      else {
+        field.append(c);
+      }
+    }
+    if (inQuote)
+      throw new IllegalArgumentException("malformed csv string: unbalanced quotes in csv: " + csv);
+    
+    if (field.length() != 0) {
+      list.add(field.toString());
+    }
+    
+    return list;    
+  }
+
+  /**
+   * Create a {@link JsonObject} containing the specified {@code fieldNames}
+   * properties each with its corresponding value from {@code fields}.
+   * <P>
+   * Each property is set as a string value.
+   * The {@code JsonObject.getAsJsonPrimitive().getAs*()} methods allowing
+   * accessing the property as the requested type.
+   * </P>
+   * <P>
+   * A field is omitted from the JsonObject if its corresponding
+   * field name is null or the empty string.
+   *  
+   * @param fields the field values
+   * @param fieldNames the corresponding field value names
+   * @return the JsonObject
+   * @throws IllegalArgumentException if the number of fields and the number
+   * of fieldNames don't match
+   */
+  public static JsonObject toJson(List<String> fields, String... fieldNames) {
+    if (fields.size() != fieldNames.length) {
+      throw new IllegalArgumentException("Mismatched number of fields and names");
+    }
+    JsonObject jo = new JsonObject();
+    for (int i = 0; i < fieldNames.length; i++) {
+      String name = fieldNames[i];
+      
+      // skip the field if so indicated
+      if (name == null || name.isEmpty())
+        continue;
+      
+      jo.addProperty(name, fields.get(i));
+    }
+    return jo;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-quarks/blob/2f0ea64d/connectors/csv/src/main/java/quarks/connectors/csv/package-info.java
----------------------------------------------------------------------
diff --git a/connectors/csv/src/main/java/quarks/connectors/csv/package-info.java b/connectors/csv/src/main/java/quarks/connectors/csv/package-info.java
new file mode 100644
index 0000000..49f587b
--- /dev/null
+++ b/connectors/csv/src/main/java/quarks/connectors/csv/package-info.java
@@ -0,0 +1,22 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+*/
+/**
+ * Utilities for handling Comma Separated Value strings.
+ */
+package quarks.connectors.csv;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-quarks/blob/2f0ea64d/connectors/csv/src/test/java/quarks/test/connectors/csv/CsvTest.java
----------------------------------------------------------------------
diff --git a/connectors/csv/src/test/java/quarks/test/connectors/csv/CsvTest.java b/connectors/csv/src/test/java/quarks/test/connectors/csv/CsvTest.java
new file mode 100644
index 0000000..07b82f3
--- /dev/null
+++ b/connectors/csv/src/test/java/quarks/test/connectors/csv/CsvTest.java
@@ -0,0 +1,129 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+*/
+package quarks.test.connectors.csv;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.junit.Test;
+
+import com.google.gson.JsonObject;
+
+import quarks.connectors.csv.Csv;
+import quarks.test.providers.direct.DirectTestSetup;
+import quarks.test.topology.TopologyAbstractTest;
+
+public class CsvTest extends TopologyAbstractTest implements DirectTestSetup {
+
+  @Test
+  public void testParse() {
+    testParse("abc,1,def", new String[]{"abc", "1", "def"});
+    // spaces are part of the field
+    testParse("ab c, 1 ,d ef", new String[]{"ab c", " 1 ", "d ef"});
+    
+    // check alternate separator
+    testParse("ab,c;1;d,ef", ";", new String[]{"ab,c", "1", "d,ef"});
+    
+    // check empty field
+    testParse("abc,,def", new String[]{"abc", "", "def"});
+    
+    // simple quoting - no quoted quotes;  with and w/o embedded separator
+    testParse("\"ab c\",\"d ef\"", new String[]{"ab c", "d ef"});
+    testParse("\"ab,,c\",\"d ef\"", new String[]{"ab,,c", "d ef"});
+
+    // simple quoted quotes - not embedded in a quoted field
+    testParse("\"\"ab c,d ef", new String[]{"\"ab c", "d ef"});
+    testParse("\"\"\"\"ab c,d ef", new String[]{"\"\"ab c", "d ef"});
+    // middle
+    testParse("ab \"\"c,d ef", new String[]{"ab \"c", "d ef"});
+    testParse("ab \"\"\"\"c,d ef", new String[]{"ab \"\"c", "d ef"});
+    // end
+    testParse("ab c\"\",d ef", new String[]{"ab c\"", "d ef"});
+    testParse("ab c\"\"\"\",d ef", new String[]{"ab c\"\"", "d ef"});
+    // beginning, middle and end
+    testParse("\"\"ab \"\"c\"\",d ef", new String[]{"\"ab \"c\"", "d ef"});
+    testParse("\"\"\"\"ab \"\"\"\"c\"\"\"\",d ef", new String[]{"\"\"ab \"\"c\"\"", "d ef"});
+    
+    // quoted quotes in a quoted field
+    testParse("\"\"\"ab \"\"c\"\"\",d ef", new String[]{"\"ab \"c\"", "d ef"});
+    testParse("\"\"\"\"\"ab \"\"\"\"c\"\"\"\"\",d ef", new String[]{"\"\"ab \"\"c\"\"", "d ef"});
+    
+    testParseMalformed("\"ab c,d ef");  // non-escaped quote at start of field, or missing end of quoted field
+    testParseMalformed("ab \"c,d ef");  // non-escaped quote in middle of field
+    testParseMalformed("ab c\",d ef");  // non-escaped quote in end of field, or missing start of quoted field
+    testParseMalformed("ab c,d ef\"");  // non-escaped quote in end of field, or missing start of quoted field
+  }
+ 
+  private void testParse(String csv, String[] expected) {
+    testParse(csv, null, expected);
+  }
+
+  private void testParse(String csv, String sep, String[] expected) {
+    List<String> exp = Arrays.asList(expected);
+    List<String> fields;
+    if (sep == null)
+      fields = Csv.parseCsv(csv);
+    else
+      fields = Csv.parseCsv(csv, sep.charAt((0)));
+    assertEquals("csv: "+csv, exp, fields);
+  }
+
+  private void testParseMalformed(String csv) {
+    try {
+      List<String> fields = Csv.parseCsv(csv);
+      fail("expected malformed for csv: "+csv+" but got fields: "+fields);
+    }
+    catch(IllegalArgumentException e) {
+      System.out.println("Got expected exception for malformed for csv: "+csv+"  :" + e); // expected one
+    }
+  }
+  
+  @Test 
+  public void testToJson() {
+    List<String> fields = Arrays.asList("one","two","three");
+    String[] names = new String[]{"fieldOne","fieldTwo","fieldThree"};
+    String[] names2 = new String[]{"fieldOne",null,"fieldThree"};
+    String[] names3 = new String[]{"fieldOne","fieldTwo",""};
+    
+    testToJson(fields, names);
+    testToJson(fields, names2);
+    testToJson(fields, names3);
+    try {
+      testToJson(fields, names[0], names[1]);  // field/name length mismatch
+    }
+    catch(IllegalArgumentException e) {
+      System.out.println("Got expected exception: " + e);
+    }
+  }
+  
+  private void testToJson(List<String> fields, String... fieldNames) {
+    JsonObject jo = Csv.toJson(fields, fieldNames);
+    
+    for (int i = 0; i < fieldNames.length; i++) {
+      String value = fields.get(i);
+      String name = fieldNames[i];
+      if (name != null && !name.isEmpty()) {
+        assertEquals("name:"+name, value, jo.get(name).getAsString());
+      }
+    }
+  }
+}


[2/2] incubator-quarks git commit: Merge pull request #159

Posted by dl...@apache.org.
Merge pull request #159

This closes #159


Project: http://git-wip-us.apache.org/repos/asf/incubator-quarks/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quarks/commit/e2a22b51
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quarks/tree/e2a22b51
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quarks/diff/e2a22b51

Branch: refs/heads/master
Commit: e2a22b51354ee278cb63b6da92cd1fc32657455a
Parents: 94a8534 2f0ea64
Author: Dale LaBossiere <dl...@us.ibm.com>
Authored: Tue Jul 12 14:28:13 2016 -0400
Committer: Dale LaBossiere <dl...@us.ibm.com>
Committed: Tue Jul 12 14:28:13 2016 -0400

----------------------------------------------------------------------
 build.xml                                       |   2 +
 connectors/.classpath                           |   2 +
 connectors/csv/build.gradle                     |  20 +++
 connectors/csv/build.xml                        |  47 +++++
 .../main/java/quarks/connectors/csv/Csv.java    | 176 +++++++++++++++++++
 .../quarks/connectors/csv/package-info.java     |  22 +++
 .../quarks/test/connectors/csv/CsvTest.java     | 129 ++++++++++++++
 7 files changed, 398 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quarks/blob/e2a22b51/build.xml
----------------------------------------------------------------------