You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@geode.apache.org by ji...@apache.org on 2016/04/21 19:16:45 UTC
[10/50] [abbrv] incubator-geode git commit: GEODE-1244: Package,
directory, project and file rename for geode-spark-connector
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/760c6e22/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/internal/rdd/GeodeRDDPartition.scala
----------------------------------------------------------------------
diff --git a/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/internal/rdd/GeodeRDDPartition.scala b/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/internal/rdd/GeodeRDDPartition.scala
new file mode 100644
index 0000000..24fe72e
--- /dev/null
+++ b/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/internal/rdd/GeodeRDDPartition.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.pivotal.geode.spark.connector.internal.rdd
+
+import org.apache.spark.Partition
+
+/**
+ * This serializable class represents a GeodeRDD partition. Each partition is mapped
+ * to one or more buckets of region. The GeodeRDD can materialize the data of the
+ * partition based on all information contained here.
+ * @param partitionId partition id, a 0 based number.
+ * @param bucketSet region bucket id set for this partition. Set.empty means whole
+ * region (used for replicated region)
+ * @param locations preferred location for this partition
+ */
+case class GeodeRDDPartition (
+ partitionId: Int, bucketSet: Set[Int], locations: Seq[String] = Nil)
+ extends Partition {
+
+ override def index: Int = partitionId
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/760c6e22/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/internal/rdd/GeodeRDDPartitioner.scala
----------------------------------------------------------------------
diff --git a/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/internal/rdd/GeodeRDDPartitioner.scala b/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/internal/rdd/GeodeRDDPartitioner.scala
new file mode 100644
index 0000000..d960cab
--- /dev/null
+++ b/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/internal/rdd/GeodeRDDPartitioner.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.pivotal.geode.spark.connector.internal.rdd
+
+import io.pivotal.geode.spark.connector.GeodeConnection
+import io.pivotal.geode.spark.connector.internal.RegionMetadata
+import org.apache.spark.{Logging, Partition}
+
+import scala.reflect.ClassTag
+
+/**
+ * A GeodeRDD partitioner is used to partition the region into multiple RDD partitions.
+ */
+trait GeodeRDDPartitioner extends Serializable {
+
+ def name: String
+
+ /** the function that generates partitions */
+ def partitions[K: ClassTag, V: ClassTag]
+ (conn: GeodeConnection, md: RegionMetadata, env: Map[String, String]): Array[Partition]
+}
+
+object GeodeRDDPartitioner extends Logging {
+
+ /** To add new partitioner, just add it to the following list */
+ final val partitioners: Map[String, GeodeRDDPartitioner] =
+ List(OnePartitionPartitioner, ServerSplitsPartitioner).map(e => (e.name, e)).toMap
+
+ /**
+ * Get a partitioner based on given name, a default partitioner will be returned if there's
+ * no partitioner for the given name.
+ */
+ def apply(name: String = defaultPartitionedRegionPartitioner.name): GeodeRDDPartitioner = {
+ val p = partitioners.get(name)
+ if (p.isDefined) p.get else {
+ logWarning(s"Invalid preferred partitioner name $name.")
+ defaultPartitionedRegionPartitioner
+ }
+ }
+
+ val defaultReplicatedRegionPartitioner = OnePartitionPartitioner
+
+ val defaultPartitionedRegionPartitioner = ServerSplitsPartitioner
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/760c6e22/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/internal/rdd/GeodeRDDPartitionerImpl.scala
----------------------------------------------------------------------
diff --git a/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/internal/rdd/GeodeRDDPartitionerImpl.scala b/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/internal/rdd/GeodeRDDPartitionerImpl.scala
new file mode 100644
index 0000000..4606114
--- /dev/null
+++ b/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/internal/rdd/GeodeRDDPartitionerImpl.scala
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.pivotal.geode.spark.connector.internal.rdd
+
+import io.pivotal.geode.spark.connector.GeodeConnection
+import io.pivotal.geode.spark.connector.internal.RegionMetadata
+import io.pivotal.geode.spark.connector.NumberPartitionsPerServerPropKey
+import org.apache.spark.Partition
+import scala.collection.JavaConversions._
+import scala.collection.immutable.SortedSet
+import scala.collection.mutable
+import scala.reflect.ClassTag
+
+/** This partitioner maps whole region to one GeodeRDDPartition */
+object OnePartitionPartitioner extends GeodeRDDPartitioner {
+
+ override val name = "OnePartition"
+
+ override def partitions[K: ClassTag, V: ClassTag]
+ (conn: GeodeConnection, md: RegionMetadata, env: Map[String, String]): Array[Partition] =
+ Array[Partition](new GeodeRDDPartition(0, Set.empty))
+}
+
+/**
+ * This partitioner maps whole region to N * M Geode RDD partitions, where M is the number of
+ * Geode servers that contain the data for the given region. Th default value of N is 1.
+ */
+object ServerSplitsPartitioner extends GeodeRDDPartitioner {
+
+ override val name = "ServerSplits"
+
+ override def partitions[K: ClassTag, V: ClassTag]
+ (conn: GeodeConnection, md: RegionMetadata, env: Map[String, String]): Array[Partition] = {
+ if (md == null) throw new RuntimeException("RegionMetadata is null")
+ val n = try { env.getOrElse(NumberPartitionsPerServerPropKey, "2").toInt } catch { case e: NumberFormatException => 2 }
+ if (!md.isPartitioned || md.getServerBucketMap == null || md.getServerBucketMap.isEmpty)
+ Array[Partition](new GeodeRDDPartition(0, Set.empty))
+ else {
+ val map = mapAsScalaMap(md.getServerBucketMap)
+ .map { case (srv, set) => (srv, asScalaSet(set).map(_.toInt)) }.toList
+ .map { case (srv, set) => (srv.getHostName, set) }
+ doPartitions(map, md.getTotalBuckets, n)
+ }
+ }
+
+ /** Converts server to bucket ID set list to array of RDD partitions */
+ def doPartitions(serverBucketMap: List[(String, mutable.Set[Int])], totalBuckets: Int, n: Int)
+ : Array[Partition] = {
+
+ // method that calculates the group size for splitting "k" items into "g" groups
+ def groupSize(k: Int, g: Int): Int = scala.math.ceil(k / g.toDouble).toInt
+
+ // 1. convert list of server and bucket set pairs to a list of server and sorted bucket set pairs
+ val srvToSortedBucketSet = serverBucketMap.map { case (srv, set) => (srv, SortedSet[Int]() ++ set) }
+
+ // 2. split bucket set of each server into n splits if possible, and server to Seq(server)
+ val srvToSplitedBuckeSet = srvToSortedBucketSet.flatMap { case (host, set) =>
+ if (set.isEmpty) Nil else set.grouped(groupSize(set.size, n)).toList.map(s => (Seq(host), s)) }
+
+ // 3. calculate empty bucket IDs by removing all bucket sets of all servers from the full bucket sets
+ val emptyIDs = SortedSet[Int]() ++ ((0 until totalBuckets).toSet /: srvToSortedBucketSet) {case (s1, (k, s2)) => s1 &~ s2}
+
+ // 4. distribute empty bucket IDs to all partitions evenly.
+ // The empty buckets do not contain data when partitions are created, but they may contain data
+ // when RDD is materialized, so need to include those bucket IDs in the partitions.
+ val srvToFinalBucketSet = if (emptyIDs.isEmpty) srvToSplitedBuckeSet
+ else srvToSplitedBuckeSet.zipAll(
+ emptyIDs.grouped(groupSize(emptyIDs.size, srvToSplitedBuckeSet.size)).toList, (Nil, Set.empty), Set.empty).map
+ { case ((server, set1), set2) => (server, SortedSet[Int]() ++ set1 ++ set2) }
+
+ // 5. create array of partitions w/ 0-based index
+ (0 until srvToFinalBucketSet.size).toList.zip(srvToFinalBucketSet).map
+ { case (i, (srv, set)) => new GeodeRDDPartition(i, set, srv) }.toArray
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/760c6e22/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/internal/rdd/GeodeRDDWriter.scala
----------------------------------------------------------------------
diff --git a/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/internal/rdd/GeodeRDDWriter.scala b/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/internal/rdd/GeodeRDDWriter.scala
new file mode 100644
index 0000000..dba15f3
--- /dev/null
+++ b/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/internal/rdd/GeodeRDDWriter.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.pivotal.geode.spark.connector.internal.rdd
+
+import com.gemstone.gemfire.cache.Region
+import io.pivotal.geode.spark.connector._
+import org.apache.spark.{Logging, TaskContext}
+
+import scala.collection.Iterator
+import java.util.{HashMap => JMap}
+
+/** This trait provide some common code for pair and non-pair RDD writer */
+private[rdd] abstract class GeodeRDDWriterBase (opConf: Map[String, String]) extends Serializable {
+
+ val batchSize = try { opConf.getOrElse(RDDSaveBatchSizePropKey, RDDSaveBatchSizeDefault.toString).toInt}
+ catch { case e: NumberFormatException => RDDSaveBatchSizeDefault }
+
+ def mapDump(map: Map[_, _], num: Int): String = {
+ val firstNum = map.take(num + 1)
+ if (firstNum.size > num) s"$firstNum ..." else s"$firstNum"
+ }
+}
+
+/**
+ * Writer object that provides write function that saves non-pair RDD partitions to Geode.
+ * Those functions will be executed on Spark executors.
+ * @param regionPath the full path of the region where the data is written to
+ */
+class GeodeRDDWriter[T, K, V]
+ (regionPath: String, connConf: GeodeConnectionConf, opConf: Map[String, String] = Map.empty)
+ extends GeodeRDDWriterBase(opConf) with Serializable with Logging {
+
+ def write(func: T => (K, V))(taskContext: TaskContext, data: Iterator[T]): Unit = {
+ val region: Region[K, V] = connConf.getConnection.getRegionProxy[K, V](regionPath)
+ var count = 0
+ val chunks = data.grouped(batchSize)
+ chunks.foreach { chunk =>
+ val map = chunk.foldLeft(new JMap[K, V]()){case (m, t) => val (k, v) = func(t); m.put(k, v); m}
+ region.putAll(map)
+ count += chunk.length
+ }
+ logDebug(s"$count entries (batch.size = $batchSize) are saved to region $regionPath")
+ }
+}
+
+
+/**
+ * Writer object that provides write function that saves pair RDD partitions to Geode.
+ * Those functions will be executed on Spark executors.
+ * @param regionPath the full path of the region where the data is written to
+ */
+class GeodePairRDDWriter[K, V]
+ (regionPath: String, connConf: GeodeConnectionConf, opConf: Map[String, String] = Map.empty)
+ extends GeodeRDDWriterBase(opConf) with Serializable with Logging {
+
+ def write(taskContext: TaskContext, data: Iterator[(K, V)]): Unit = {
+ val region: Region[K, V] = connConf.getConnection.getRegionProxy[K, V](regionPath)
+ var count = 0
+ val chunks = data.grouped(batchSize)
+ chunks.foreach { chunk =>
+ val map = chunk.foldLeft(new JMap[K, V]()){case (m, (k,v)) => m.put(k,v); m}
+ region.putAll(map)
+ count += chunk.length
+ }
+ logDebug(s"$count entries (batch.batch = $batchSize) are saved to region $regionPath")
+ }
+}
+
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/760c6e22/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/internal/rdd/GeodeRegionRDD.scala
----------------------------------------------------------------------
diff --git a/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/internal/rdd/GeodeRegionRDD.scala b/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/internal/rdd/GeodeRegionRDD.scala
new file mode 100644
index 0000000..6980c0f
--- /dev/null
+++ b/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/internal/rdd/GeodeRegionRDD.scala
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.pivotal.geode.spark.connector.internal.rdd
+
+import scala.collection.Seq
+import scala.reflect.ClassTag
+import org.apache.spark.rdd.RDD
+import org.apache.spark.{TaskContext, Partition, SparkContext}
+import io.pivotal.geode.spark.connector.{GeodeConnectionConf, PreferredPartitionerPropKey}
+import io.pivotal.geode.spark.connector.internal.rdd.GeodeRDDPartitioner._
+
+/**
+ * This class exposes Geode region as a RDD.
+ * @param sc the Spark Context
+ * @param regionPath the full path of the region
+ * @param connConf the GeodeConnectionConf to access the region
+ * @param opConf the parameters for this operation, such as preferred partitioner.
+ */
+class GeodeRegionRDD[K, V] private[connector]
+ (@transient sc: SparkContext,
+ val regionPath: String,
+ val connConf: GeodeConnectionConf,
+ val opConf: Map[String, String] = Map.empty,
+ val whereClause: Option[String] = None
+ ) (implicit ctk: ClassTag[K], ctv: ClassTag[V])
+ extends RDD[(K, V)](sc, Seq.empty) {
+
+ /** validate region existence when GeodeRDD object is created */
+ validate()
+
+ /** Validate region, and make sure it exists. */
+ private def validate(): Unit = connConf.getConnection.validateRegion[K, V](regionPath)
+
+ def kClassTag = ctk
+
+ def vClassTag = ctv
+
+ /**
+ * method `copy` is used by method `where` that creates new immutable
+ * GeodeRDD instance based this instance.
+ */
+ private def copy(
+ regionPath: String = regionPath,
+ connConf: GeodeConnectionConf = connConf,
+ opConf: Map[String, String] = opConf,
+ whereClause: Option[String] = None
+ ): GeodeRegionRDD[K, V] = {
+
+ require(sc != null,
+ """RDD transformation requires a non-null SparkContext. Unfortunately
+ |SparkContext in this GeodeRDD is null. This can happen after
+ |GeodeRDD has been deserialized. SparkContext is not Serializable,
+ |therefore it deserializes to null. RDD transformations are not allowed
+ |inside lambdas used in other RDD transformations.""".stripMargin )
+
+ new GeodeRegionRDD[K, V](sc, regionPath, connConf, opConf, whereClause)
+ }
+
+ /** When where clause is specified, OQL query
+ * `select key, value from /<region-path>.entries where <where clause> `
+ * is used to filter the dataset.
+ */
+ def where(whereClause: Option[String]): GeodeRegionRDD[K, V] = {
+ if (whereClause.isDefined) copy(whereClause = whereClause)
+ else this
+ }
+
+ /** this version is for Java API that doesn't use scala.Option */
+ def where(whereClause: String): GeodeRegionRDD[K, V] = {
+ if (whereClause == null || whereClause.trim.isEmpty) this
+ else copy(whereClause = Option(whereClause.trim))
+ }
+
+ /**
+ * Use preferred partitioner generate partitions. `defaultReplicatedRegionPartitioner`
+ * will be used if it's a replicated region.
+ */
+ override def getPartitions: Array[Partition] = {
+ val conn = connConf.getConnection
+ val md = conn.getRegionMetadata[K, V](regionPath)
+ md match {
+ case None => throw new RuntimeException(s"region $regionPath was not found.")
+ case Some(data) =>
+ logInfo(s"""RDD id=${this.id} region=$regionPath conn=${connConf.locators.mkString(",")}, env=$opConf""")
+ val p = if (data.isPartitioned) preferredPartitioner else defaultReplicatedRegionPartitioner
+ val splits = p.partitions[K, V](conn, data, opConf)
+ logDebug(s"""RDD id=${this.id} region=$regionPath partitions=\n ${splits.mkString("\n ")}""")
+ splits
+ }
+ }
+
+ /**
+ * provide preferred location(s) (host name(s)) of the given partition.
+ * Only some partitioner implementation(s) provides this info, which is
+ * useful when Spark cluster and Geode cluster share some hosts.
+ */
+ override def getPreferredLocations(split: Partition) =
+ split.asInstanceOf[GeodeRDDPartition].locations
+
+ /**
+ * Get preferred partitioner. return `defaultPartitionedRegionPartitioner` if none
+ * preference is specified.
+ */
+ private def preferredPartitioner =
+ GeodeRDDPartitioner(opConf.getOrElse(
+ PreferredPartitionerPropKey, GeodeRDDPartitioner.defaultPartitionedRegionPartitioner.name))
+
+ /** materialize a RDD partition */
+ override def compute(split: Partition, context: TaskContext): Iterator[(K, V)] = {
+ val partition = split.asInstanceOf[GeodeRDDPartition]
+ logDebug(s"compute RDD id=${this.id} partition $partition")
+ connConf.getConnection.getRegionData[K,V](regionPath, whereClause, partition)
+ // new InterruptibleIterator(context, split.asInstanceOf[GeodeRDDPartition[K, V]].iterator)
+ }
+}
+
+object GeodeRegionRDD {
+
+ def apply[K: ClassTag, V: ClassTag](sc: SparkContext, regionPath: String,
+ connConf: GeodeConnectionConf, opConf: Map[String, String] = Map.empty)
+ : GeodeRegionRDD[K, V] =
+ new GeodeRegionRDD[K, V](sc, regionPath, connConf, opConf)
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/760c6e22/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/javaapi/GeodeJavaRegionRDD.scala
----------------------------------------------------------------------
diff --git a/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/javaapi/GeodeJavaRegionRDD.scala b/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/javaapi/GeodeJavaRegionRDD.scala
new file mode 100644
index 0000000..f859173
--- /dev/null
+++ b/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/javaapi/GeodeJavaRegionRDD.scala
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.pivotal.geode.spark.connector.javaapi
+
+import io.pivotal.geode.spark.connector.internal.rdd.GeodeRegionRDD
+import org.apache.spark.api.java.JavaPairRDD
+
+class GeodeJavaRegionRDD[K, V](rdd: GeodeRegionRDD[K, V]) extends JavaPairRDD[K, V](rdd)(rdd.kClassTag, rdd.vClassTag) {
+
+ def where(whereClause: String): GeodeJavaRegionRDD[K, V] = new GeodeJavaRegionRDD(rdd.where(whereClause))
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/760c6e22/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/javaapi/JavaAPIHelper.scala
----------------------------------------------------------------------
diff --git a/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/javaapi/JavaAPIHelper.scala b/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/javaapi/JavaAPIHelper.scala
new file mode 100644
index 0000000..ffa6195
--- /dev/null
+++ b/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/javaapi/JavaAPIHelper.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.pivotal.geode.spark.connector.javaapi
+
+import org.apache.spark.api.java.{JavaPairRDD, JavaRDD}
+import org.apache.spark.streaming.api.java.{JavaPairDStream, JavaDStream}
+
+import scala.reflect.ClassTag
+import scala.collection.JavaConversions._
+
+/**
+ * A helper class to make it possible to access components written in Scala from Java code.
+ */
+private[connector] object JavaAPIHelper {
+
+ /** Returns a `ClassTag` of a given runtime class. */
+ def getClassTag[T](clazz: Class[T]): ClassTag[T] = ClassTag(clazz)
+
+ /**
+ * Produces a ClassTag[T], which is actually just a casted ClassTag[AnyRef].
+ * see JavaSparkContext.fakeClassTag in Spark for more info.
+ */
+ def fakeClassTag[T]: ClassTag[T] = ClassTag.AnyRef.asInstanceOf[ClassTag[T]]
+
+ /** Converts a Java `Properties` to a Scala immutable `Map[String, String]`. */
+ def propertiesToScalaMap[K, V](props: java.util.Properties): Map[String, String] =
+ Map(props.toSeq: _*)
+
+ /** convert a JavaRDD[(K,V)] to JavaPairRDD[K,V] */
+ def toJavaPairRDD[K, V](rdd: JavaRDD[(K, V)]): JavaPairRDD[K, V] =
+ JavaPairRDD.fromJavaRDD(rdd)
+
+ /** convert a JavaDStream[(K,V)] to JavaPairDStream[K,V] */
+ def toJavaPairDStream[K, V](ds: JavaDStream[(K, V)]): JavaPairDStream[K, V] =
+ JavaPairDStream.fromJavaDStream(ds)
+
+ /** an empty Map[String, String] for default opConf **/
+ val emptyStrStrMap: Map[String, String] = Map.empty
+}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/760c6e22/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/package.scala
----------------------------------------------------------------------
diff --git a/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/package.scala b/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/package.scala
new file mode 100644
index 0000000..6f9a780
--- /dev/null
+++ b/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/package.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.pivotal.geode.spark
+
+import io.pivotal.geode.spark.connector.internal.rdd.{ServerSplitsPartitioner, OnePartitionPartitioner}
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.SQLContext
+
+import scala.reflect.ClassTag
+
+/**
+ * The root package of Geode connector for Apache Spark.
+ * Provides handy implicit conversions that add geode-specific
+ * methods to `SparkContext` and `RDD`.
+ */
+package object connector {
+
+ /** constants */
+ final val GeodeLocatorPropKey = "spark.geode.locators"
+ // partitioner related keys and values
+ final val PreferredPartitionerPropKey = "preferred.partitioner"
+ final val NumberPartitionsPerServerPropKey = "number.partitions.per.server"
+ final val OnePartitionPartitionerName = OnePartitionPartitioner.name
+ final val ServerSplitsPartitionerName = ServerSplitsPartitioner.name
+
+ final val RDDSaveBatchSizePropKey = "rdd.save.batch.size"
+ final val RDDSaveBatchSizeDefault = 10000
+
+ /** implicits */
+
+ implicit def toSparkContextFunctions(sc: SparkContext): GeodeSparkContextFunctions =
+ new GeodeSparkContextFunctions(sc)
+
+ implicit def toSQLContextFunctions(sqlContext: SQLContext): GeodeSQLContextFunctions =
+ new GeodeSQLContextFunctions(sqlContext)
+
+ implicit def toGeodePairRDDFunctions[K: ClassTag, V: ClassTag]
+ (self: RDD[(K, V)]): GeodePairRDDFunctions[K, V] = new GeodePairRDDFunctions(self)
+
+ implicit def toGeodeRDDFunctions[T: ClassTag]
+ (self: RDD[T]): GeodeRDDFunctions[T] = new GeodeRDDFunctions(self)
+
+ /** utility implicits */
+
+ /** convert Map[String, String] to java.util.Properties */
+ implicit def map2Properties(map: Map[String,String]): java.util.Properties =
+ (new java.util.Properties /: map) {case (props, (k,v)) => props.put(k,v); props}
+
+ /** internal util methods */
+
+ private[connector] def getRddPartitionsInfo(rdd: RDD[_], sep: String = "\n "): String =
+ rdd.partitions.zipWithIndex.map{case (p,i) => s"$i: $p loc=${rdd.preferredLocations(p)}"}.mkString(sep)
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/760c6e22/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/streaming/GeodeDStreamFunctions.scala
----------------------------------------------------------------------
diff --git a/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/streaming/GeodeDStreamFunctions.scala b/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/streaming/GeodeDStreamFunctions.scala
new file mode 100644
index 0000000..4d46429
--- /dev/null
+++ b/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/streaming/GeodeDStreamFunctions.scala
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.pivotal.geode.spark.connector.streaming
+
+import io.pivotal.geode.spark.connector.GeodeConnectionConf
+import io.pivotal.geode.spark.connector.internal.rdd.{GeodePairRDDWriter, GeodeRDDWriter}
+import org.apache.spark.Logging
+import org.apache.spark.api.java.function.PairFunction
+import org.apache.spark.streaming.dstream.DStream
+
+/**
+ * Extra geode functions on DStream of non-pair elements through an implicit conversion.
+ * Import `io.pivotal.geode.spark.connector.streaming._` at the top of your program to
+ * use these functions.
+ */
+class GeodeDStreamFunctions[T](val dstream: DStream[T]) extends Serializable with Logging {
+
+ /**
+ * Save the DStream of non-pair elements to Geode key-value store.
+ * @param regionPath the full path of region that the DStream is stored
+ * @param func the function that converts elements of the DStream to key/value pairs
+ * @param connConf the GeodeConnectionConf object that provides connection to Geode cluster
+ * @param opConf the optional parameters for this operation
+ */
+ def saveToGeode[K, V](
+ regionPath: String,
+ func: T => (K, V),
+ connConf: GeodeConnectionConf = defaultConnectionConf,
+ opConf: Map[String, String] = Map.empty): Unit = {
+ connConf.getConnection.validateRegion[K, V](regionPath)
+ val writer = new GeodeRDDWriter[T, K, V](regionPath, connConf, opConf)
+ logInfo(s"""Save DStream region=$regionPath conn=${connConf.locators.mkString(",")}""")
+ dstream.foreachRDD(rdd => rdd.sparkContext.runJob(rdd, writer.write(func) _))
+ }
+
+ /** this version of saveToGeode is just for Java API */
+ def saveToGeode[K, V](
+ regionPath: String,
+ func: PairFunction[T, K, V],
+ connConf: GeodeConnectionConf,
+ opConf: Map[String, String] ): Unit = {
+ saveToGeode[K, V](regionPath, func.call _, connConf, opConf)
+ }
+
+ private[connector] def defaultConnectionConf: GeodeConnectionConf =
+ GeodeConnectionConf(dstream.context.sparkContext.getConf)
+}
+
+
+/**
+ * Extra geode functions on DStream of (key, value) pairs through an implicit conversion.
+ * Import `io.pivotal.geode.spark.connector.streaming._` at the top of your program to
+ * use these functions.
+ */
+class GeodePairDStreamFunctions[K, V](val dstream: DStream[(K,V)]) extends Serializable with Logging {
+
+ /**
+ * Save the DStream of pairs to Geode key-value store without any conversion
+ * @param regionPath the full path of region that the DStream is stored
+ * @param connConf the GeodeConnectionConf object that provides connection to Geode cluster
+ * @param opConf the optional parameters for this operation
+ */
+ def saveToGeode(
+ regionPath: String,
+ connConf: GeodeConnectionConf = defaultConnectionConf,
+ opConf: Map[String, String] = Map.empty): Unit = {
+ connConf.getConnection.validateRegion[K, V](regionPath)
+ val writer = new GeodePairRDDWriter[K, V](regionPath, connConf, opConf)
+ logInfo(s"""Save DStream region=$regionPath conn=${connConf.locators.mkString(",")}""")
+ dstream.foreachRDD(rdd => rdd.sparkContext.runJob(rdd, writer.write _))
+ }
+
+ private[connector] def defaultConnectionConf: GeodeConnectionConf =
+ GeodeConnectionConf(dstream.context.sparkContext.getConf)
+}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/760c6e22/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/streaming/package.scala
----------------------------------------------------------------------
diff --git a/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/streaming/package.scala b/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/streaming/package.scala
new file mode 100644
index 0000000..0d1f1eb
--- /dev/null
+++ b/geode-spark-connector/geode-spark-connector/src/main/scala/io/pivotal/geode/spark/connector/streaming/package.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.pivotal.geode.spark.connector
+
+import org.apache.spark.streaming.dstream.DStream
+
+/**
+ * Provides handy implicit conversions that add gemfire-specific methods to `DStream`.
+ */
+package object streaming {
+
+ implicit def toGeodeDStreamFunctions[T](ds: DStream[T]): GeodeDStreamFunctions[T] =
+ new GeodeDStreamFunctions[T](ds)
+
+ implicit def toGeodePairDStreamFunctions[K, V](ds: DStream[(K, V)]): GeodePairDStreamFunctions[K, V] =
+ new GeodePairDStreamFunctions[K, V](ds)
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/760c6e22/geode-spark-connector/geode-spark-connector/src/test/java/io/pivotal/geode/spark/connector/JavaAPITest.java
----------------------------------------------------------------------
diff --git a/geode-spark-connector/geode-spark-connector/src/test/java/io/pivotal/geode/spark/connector/JavaAPITest.java b/geode-spark-connector/geode-spark-connector/src/test/java/io/pivotal/geode/spark/connector/JavaAPITest.java
new file mode 100644
index 0000000..142907e
--- /dev/null
+++ b/geode-spark-connector/geode-spark-connector/src/test/java/io/pivotal/geode/spark/connector/JavaAPITest.java
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.pivotal.geode.spark.connector;
+
+import io.pivotal.geode.spark.connector.javaapi.*;
+import org.apache.spark.SparkContext;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.rdd.RDD;
+import org.apache.spark.sql.SQLContext;
+//import org.apache.spark.sql.api.java.JavaSQLContext;
+import org.apache.spark.streaming.api.java.JavaDStream;
+import org.apache.spark.streaming.api.java.JavaPairDStream;
+import org.apache.spark.streaming.dstream.DStream;
+import org.junit.Test;
+import org.scalatest.junit.JUnitSuite;
+import scala.Function1;
+import scala.Function2;
+import scala.Tuple2;
+import scala.Tuple3;
+import scala.collection.mutable.LinkedList;
+import scala.reflect.ClassTag;
+
+import static org.junit.Assert.*;
+import static io.pivotal.geode.spark.connector.javaapi.GeodeJavaUtil.*;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.*;
+
+public class JavaAPITest extends JUnitSuite {
+
+ @SuppressWarnings( "unchecked" )
+ public Tuple3<SparkContext, GeodeConnectionConf, GeodeConnection> createCommonMocks() {
+ SparkContext mockSparkContext = mock(SparkContext.class);
+ GeodeConnectionConf mockConnConf = mock(GeodeConnectionConf.class);
+ GeodeConnection mockConnection = mock(GeodeConnection.class);
+ when(mockConnConf.getConnection()).thenReturn(mockConnection);
+ when(mockConnConf.locators()).thenReturn(new LinkedList());
+ return new Tuple3<>(mockSparkContext, mockConnConf, mockConnection);
+ }
+
+ @Test
+ public void testSparkContextFunction() throws Exception {
+ Tuple3<SparkContext, GeodeConnectionConf, GeodeConnection> tuple3 = createCommonMocks();
+ GeodeJavaSparkContextFunctions wrapper = javaFunctions(tuple3._1());
+ assertTrue(tuple3._1() == wrapper.sc);
+ String regionPath = "testregion";
+ JavaPairRDD<String, String> rdd = wrapper.geodeRegion(regionPath, tuple3._2());
+ verify(tuple3._3()).validateRegion(regionPath);
+ }
+
+ @Test
+ public void testJavaSparkContextFunctions() throws Exception {
+ SparkContext mockSparkContext = mock(SparkContext.class);
+ JavaSparkContext mockJavaSparkContext = mock(JavaSparkContext.class);
+ when(mockJavaSparkContext.sc()).thenReturn(mockSparkContext);
+ GeodeJavaSparkContextFunctions wrapper = javaFunctions(mockJavaSparkContext);
+ assertTrue(mockSparkContext == wrapper.sc);
+ }
+
+ @Test
+ @SuppressWarnings( "unchecked" )
+ public void testJavaPairRDDFunctions() throws Exception {
+ JavaPairRDD<String, Integer> mockPairRDD = mock(JavaPairRDD.class);
+ RDD<Tuple2<String, Integer>> mockTuple2RDD = mock(RDD.class);
+ when(mockPairRDD.rdd()).thenReturn(mockTuple2RDD);
+ GeodeJavaPairRDDFunctions wrapper = javaFunctions(mockPairRDD);
+ assertTrue(mockTuple2RDD == wrapper.rddf.rdd());
+
+ Tuple3<SparkContext, GeodeConnectionConf, GeodeConnection> tuple3 = createCommonMocks();
+ when(mockTuple2RDD.sparkContext()).thenReturn(tuple3._1());
+ String regionPath = "testregion";
+ wrapper.saveToGeode(regionPath, tuple3._2());
+ verify(mockTuple2RDD, times(1)).sparkContext();
+ verify(tuple3._1(), times(1)).runJob(eq(mockTuple2RDD), any(Function2.class), any(ClassTag.class));
+ }
+
+ @Test
+ @SuppressWarnings( "unchecked" )
+ public void testJavaRDDFunctions() throws Exception {
+ JavaRDD<String> mockJavaRDD = mock(JavaRDD.class);
+ RDD<String> mockRDD = mock(RDD.class);
+ when(mockJavaRDD.rdd()).thenReturn(mockRDD);
+ GeodeJavaRDDFunctions wrapper = javaFunctions(mockJavaRDD);
+ assertTrue(mockRDD == wrapper.rddf.rdd());
+
+ Tuple3<SparkContext, GeodeConnectionConf, GeodeConnection> tuple3 = createCommonMocks();
+ when(mockRDD.sparkContext()).thenReturn(tuple3._1());
+ PairFunction<String, String, Integer> mockPairFunc = mock(PairFunction.class);
+ String regionPath = "testregion";
+ wrapper.saveToGeode(regionPath, mockPairFunc, tuple3._2());
+ verify(mockRDD, times(1)).sparkContext();
+ verify(tuple3._1(), times(1)).runJob(eq(mockRDD), any(Function2.class), any(ClassTag.class));
+ }
+
+ @Test
+ @SuppressWarnings( "unchecked" )
+ public void testJavaPairDStreamFunctions() throws Exception {
+ JavaPairDStream<String, String> mockJavaDStream = mock(JavaPairDStream.class);
+ DStream<Tuple2<String, String>> mockDStream = mock(DStream.class);
+ when(mockJavaDStream.dstream()).thenReturn(mockDStream);
+ GeodeJavaPairDStreamFunctions wrapper = javaFunctions(mockJavaDStream);
+ assertTrue(mockDStream == wrapper.dsf.dstream());
+
+ Tuple3<SparkContext, GeodeConnectionConf, GeodeConnection> tuple3 = createCommonMocks();
+ String regionPath = "testregion";
+ wrapper.saveToGeode(regionPath, tuple3._2());
+ verify(tuple3._2()).getConnection();
+ verify(tuple3._3()).validateRegion(regionPath);
+ verify(mockDStream).foreachRDD(any(Function1.class));
+ }
+
+ @Test
+ @SuppressWarnings( "unchecked" )
+ public void testJavaPairDStreamFunctionsWithTuple2DStream() throws Exception {
+ JavaDStream<Tuple2<String, String>> mockJavaDStream = mock(JavaDStream.class);
+ DStream<Tuple2<String, String>> mockDStream = mock(DStream.class);
+ when(mockJavaDStream.dstream()).thenReturn(mockDStream);
+ GeodeJavaPairDStreamFunctions wrapper = javaFunctions(toJavaPairDStream(mockJavaDStream));
+ assertTrue(mockDStream == wrapper.dsf.dstream());
+ }
+
+ @Test
+ @SuppressWarnings( "unchecked" )
+ public void testJavaDStreamFunctions() throws Exception {
+ JavaDStream<String> mockJavaDStream = mock(JavaDStream.class);
+ DStream<String> mockDStream = mock(DStream.class);
+ when(mockJavaDStream.dstream()).thenReturn(mockDStream);
+ GeodeJavaDStreamFunctions wrapper = javaFunctions(mockJavaDStream);
+ assertTrue(mockDStream == wrapper.dsf.dstream());
+
+ Tuple3<SparkContext, GeodeConnectionConf, GeodeConnection> tuple3 = createCommonMocks();
+ PairFunction<String, String, Integer> mockPairFunc = mock(PairFunction.class);
+ String regionPath = "testregion";
+ wrapper.saveToGeode(regionPath, mockPairFunc, tuple3._2());
+ verify(tuple3._2()).getConnection();
+ verify(tuple3._3()).validateRegion(regionPath);
+ verify(mockDStream).foreachRDD(any(Function1.class));
+ }
+
+ @Test
+ public void testSQLContextFunction() throws Exception {
+ SQLContext mockSQLContext = mock(SQLContext.class);
+ GeodeJavaSQLContextFunctions wrapper = javaFunctions(mockSQLContext);
+ assertTrue(wrapper.scf.getClass() == GeodeSQLContextFunctions.class);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/760c6e22/geode-spark-connector/geode-spark-connector/src/test/scala/io/pivotal/geode/spark/connector/GeodeFunctionDeployerTest.scala
----------------------------------------------------------------------
diff --git a/geode-spark-connector/geode-spark-connector/src/test/scala/io/pivotal/geode/spark/connector/GeodeFunctionDeployerTest.scala b/geode-spark-connector/geode-spark-connector/src/test/scala/io/pivotal/geode/spark/connector/GeodeFunctionDeployerTest.scala
new file mode 100644
index 0000000..4e45dc2
--- /dev/null
+++ b/geode-spark-connector/geode-spark-connector/src/test/scala/io/pivotal/geode/spark/connector/GeodeFunctionDeployerTest.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.pivotal.geode.spark.connector
+
+import org.mockito.Mockito._
+import org.scalatest.mock.MockitoSugar
+import org.scalatest.{FunSuite, Matchers}
+import org.apache.commons.httpclient.HttpClient
+import java.io.File
+
+
+class GeodeFunctionDeployerTest extends FunSuite with Matchers with MockitoSugar {
+ val mockHttpClient: HttpClient = mock[HttpClient]
+
+ test("jmx url creation") {
+ val jmxHostAndPort = "localhost:7070"
+ val expectedUrlString = "http://" + jmxHostAndPort + "/gemfire/v1/deployed"
+ val gfd = new GeodeFunctionDeployer(mockHttpClient);
+ val urlString = gfd.constructURLString(jmxHostAndPort)
+ assert(urlString === expectedUrlString)
+ }
+
+ test("missing jar file") {
+ val missingJarFileLocation = "file:///somemissingjarfilethatdoesnot.exist"
+ val gfd = new GeodeFunctionDeployer(mockHttpClient);
+ intercept[RuntimeException] { gfd.jarFileHandle(missingJarFileLocation)}
+ }
+
+ test("deploy with missing jar") {
+ val missingJarFileLocation = "file:///somemissingjarfilethatdoesnot.exist"
+ val gfd = new GeodeFunctionDeployer(mockHttpClient);
+ intercept[RuntimeException] {(gfd.deploy("localhost:7070", missingJarFileLocation).contains("Deployed"))}
+ intercept[RuntimeException] {(gfd.deploy("localhost", 7070, missingJarFileLocation).contains("Deployed"))}
+ }
+
+ test("successful mocked deploy") {
+ val gfd = new GeodeFunctionDeployer(mockHttpClient);
+ val jar = new File("README.md");
+ assert(gfd.deploy("localhost:7070", jar).contains("Deployed"))
+ }
+
+
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/760c6e22/geode-spark-connector/geode-spark-connector/src/test/scala/io/pivotal/geode/spark/connector/internal/DefaultGemFireConnectionManagerTest.scala
----------------------------------------------------------------------
diff --git a/geode-spark-connector/geode-spark-connector/src/test/scala/io/pivotal/geode/spark/connector/internal/DefaultGemFireConnectionManagerTest.scala b/geode-spark-connector/geode-spark-connector/src/test/scala/io/pivotal/geode/spark/connector/internal/DefaultGemFireConnectionManagerTest.scala
new file mode 100644
index 0000000..798912c
--- /dev/null
+++ b/geode-spark-connector/geode-spark-connector/src/test/scala/io/pivotal/geode/spark/connector/internal/DefaultGemFireConnectionManagerTest.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.pivotal.geode.spark.connector.internal
+
+import io.pivotal.geode.spark.connector.GeodeConnectionConf
+import org.mockito.Mockito._
+import org.scalatest.mock.MockitoSugar
+import org.scalatest.{FunSuite, Matchers}
+
+class DefaultGeodeConnectionManagerTest extends FunSuite with Matchers with MockitoSugar {
+
+ test("DefaultGeodeConnectionFactory get/closeConnection") {
+ // note: connConf 1-4 share the same set of locators
+ val connConf1 = new GeodeConnectionConf(Seq(("host1", 1234)))
+ val connConf2 = new GeodeConnectionConf(Seq(("host2", 5678)))
+ val connConf3 = new GeodeConnectionConf(Seq(("host1", 1234), ("host2", 5678)))
+ val connConf4 = new GeodeConnectionConf(Seq(("host2", 5678), ("host1", 1234)))
+ val connConf5 = new GeodeConnectionConf(Seq(("host5", 3333)))
+
+ val props: Map[String, String] = Map.empty
+ val mockConnFactory: DefaultGeodeConnectionFactory = mock[DefaultGeodeConnectionFactory]
+ val mockConn1 = mock[DefaultGeodeConnection]
+ val mockConn2 = mock[DefaultGeodeConnection]
+ when(mockConnFactory.newConnection(connConf3.locators, props)).thenReturn(mockConn1)
+ when(mockConnFactory.newConnection(connConf5.locators, props)).thenReturn(mockConn2)
+
+ assert(DefaultGeodeConnectionManager.getConnection(connConf3)(mockConnFactory) == mockConn1)
+ // note: following 3 lines do not trigger connFactory.newConnection(...)
+ assert(DefaultGeodeConnectionManager.getConnection(connConf1)(mockConnFactory) == mockConn1)
+ assert(DefaultGeodeConnectionManager.getConnection(connConf2)(mockConnFactory) == mockConn1)
+ assert(DefaultGeodeConnectionManager.getConnection(connConf4)(mockConnFactory) == mockConn1)
+ assert(DefaultGeodeConnectionManager.getConnection(connConf5)(mockConnFactory) == mockConn2)
+
+ // connFactory.newConnection(...) were invoked only twice
+ verify(mockConnFactory, times(1)).newConnection(connConf3.locators, props)
+ verify(mockConnFactory, times(1)).newConnection(connConf5.locators, props)
+ assert(DefaultGeodeConnectionManager.connections.size == 3)
+
+ DefaultGeodeConnectionManager.closeConnection(connConf1)
+ assert(DefaultGeodeConnectionManager.connections.size == 1)
+ DefaultGeodeConnectionManager.closeConnection(connConf5)
+ assert(DefaultGeodeConnectionManager.connections.isEmpty)
+ }
+
+ test("DefaultGeodeConnectionFactory newConnection(...) throws RuntimeException") {
+ val connConf1 = new GeodeConnectionConf(Seq(("host1", 1234)))
+ val props: Map[String, String] = Map.empty
+ val mockConnFactory: DefaultGeodeConnectionFactory = mock[DefaultGeodeConnectionFactory]
+ when(mockConnFactory.newConnection(connConf1.locators, props)).thenThrow(new RuntimeException())
+ intercept[RuntimeException] { DefaultGeodeConnectionManager.getConnection(connConf1)(mockConnFactory) }
+ verify(mockConnFactory, times(1)).newConnection(connConf1.locators, props)
+ }
+
+ test("DefaultGeodeConnectionFactory close() w/ non-exist connection") {
+ val props: Map[String, String] = Map.empty
+ val mockConnFactory: DefaultGeodeConnectionFactory = mock[DefaultGeodeConnectionFactory]
+ val connConf1 = new GeodeConnectionConf(Seq(("host1", 1234)))
+ val connConf2 = new GeodeConnectionConf(Seq(("host2", 5678)))
+ val mockConn1 = mock[DefaultGeodeConnection]
+ when(mockConnFactory.newConnection(connConf1.locators, props)).thenReturn(mockConn1)
+ assert(DefaultGeodeConnectionManager.getConnection(connConf1)(mockConnFactory) == mockConn1)
+ assert(DefaultGeodeConnectionManager.connections.size == 1)
+ // connection does not exists in the connection manager
+ DefaultGeodeConnectionManager.closeConnection(connConf2)
+ assert(DefaultGeodeConnectionManager.connections.size == 1)
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/760c6e22/geode-spark-connector/geode-spark-connector/src/test/scala/io/pivotal/geode/spark/connector/internal/gemfirefunctions/StructStreamingResultSenderAndCollectorTest.scala
----------------------------------------------------------------------
diff --git a/geode-spark-connector/geode-spark-connector/src/test/scala/io/pivotal/geode/spark/connector/internal/gemfirefunctions/StructStreamingResultSenderAndCollectorTest.scala b/geode-spark-connector/geode-spark-connector/src/test/scala/io/pivotal/geode/spark/connector/internal/gemfirefunctions/StructStreamingResultSenderAndCollectorTest.scala
new file mode 100644
index 0000000..f2303e7
--- /dev/null
+++ b/geode-spark-connector/geode-spark-connector/src/test/scala/io/pivotal/geode/spark/connector/internal/gemfirefunctions/StructStreamingResultSenderAndCollectorTest.scala
@@ -0,0 +1,254 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.pivotal.geode.spark.connector.internal.geodefunctions
+
+import com.gemstone.gemfire.DataSerializer
+import com.gemstone.gemfire.cache.execute.{ResultCollector, ResultSender}
+import com.gemstone.gemfire.cache.query.internal.types.{ObjectTypeImpl, StructTypeImpl}
+import com.gemstone.gemfire.cache.query.types.ObjectType
+import com.gemstone.gemfire.internal.{Version, ByteArrayDataInput, HeapDataOutputStream}
+import com.gemstone.gemfire.internal.cache.{CachedDeserializable, CachedDeserializableFactory}
+import org.scalatest.{BeforeAndAfter, FunSuite}
+import scala.collection.JavaConversions._
+import scala.concurrent.{Await, ExecutionContext, Future}
+import ExecutionContext.Implicits.global
+import scala.concurrent.duration._
+
+class StructStreamingResultSenderAndCollectorTest extends FunSuite with BeforeAndAfter {
+
+ /**
+ * A test ResultSender that connects struct ResultSender and ResultCollector
+ * Note: this test ResultSender has to copy the data (byte array) since the
+ * StructStreamingResultSender will reuse the byte array.
+ */
+ class LocalResultSender(collector: ResultCollector[Array[Byte], _], num: Int = 1) extends ResultSender[Object] {
+
+ var finishedNum = 0
+
+ override def sendResult(result: Object): Unit =
+ collector.addResult(null, result.asInstanceOf[Array[Byte]].clone())
+
+ /** exception should be sent via lastResult() */
+ override def sendException(throwable: Throwable): Unit =
+ throw new UnsupportedOperationException("sendException is not supported.")
+
+ override def lastResult(result: Object): Unit = {
+ collector.addResult(null, result.asInstanceOf[Array[Byte]].clone())
+ this.synchronized {
+ finishedNum += 1
+ if (finishedNum == num)
+ collector.endResults()
+ }
+ }
+ }
+
+ /** common variables */
+ var collector: StructStreamingResultCollector = _
+ var baseSender: LocalResultSender = _
+ /** common types */
+ val objType = new ObjectTypeImpl("java.lang.Object").asInstanceOf[ObjectType]
+ val TwoColType = new StructTypeImpl(Array("key", "value"), Array(objType, objType))
+ val OneColType = new StructTypeImpl(Array("value"), Array(objType))
+
+ before {
+ collector = new StructStreamingResultCollector
+ baseSender = new LocalResultSender(collector, 1)
+ }
+
+ test("transfer simple data") {
+ verifySimpleTransfer(sendDataType = true)
+ }
+
+ test("transfer simple data with no type info") {
+ verifySimpleTransfer(sendDataType = false)
+ }
+
+ def verifySimpleTransfer(sendDataType: Boolean): Unit = {
+ val iter = (0 to 9).map(i => Array(i.asInstanceOf[Object], (i.toString * 5).asInstanceOf[Object])).toIterator
+ val dataType = if (sendDataType) TwoColType else null
+ new StructStreamingResultSender(baseSender, dataType , iter).send()
+ // println("type: " + collector.getResultType.toString)
+ assert(TwoColType.equals(collector.getResultType))
+ val iter2 = collector.getResult
+ (0 to 9).foreach { i =>
+ assert(iter2.hasNext)
+ val o = iter2.next()
+ assert(o.size == 2)
+ assert(o(0).asInstanceOf[Int] == i)
+ assert(o(1).asInstanceOf[String] == i.toString * 5)
+ }
+ assert(! iter2.hasNext)
+ }
+
+
+ /**
+ * A test iterator that generate integer data
+ * @param start the 1st value
+ * @param n number of integers generated
+ * @param genExcp generate Exception if true. This is used to test exception handling.
+ */
+ def intIterator(start: Int, n: Int, genExcp: Boolean): Iterator[Array[Object]] = {
+ new Iterator[Array[Object]] {
+ val max = if (genExcp) start + n else start + n - 1
+ var index: Int = start - 1
+
+ override def hasNext: Boolean = if (index < max) true else false
+
+ override def next(): Array[Object] =
+ if (index < (start + n - 1)) {
+ index += 1
+ Array(index.asInstanceOf[Object])
+ } else throw new RuntimeException("simulated error")
+ }
+ }
+
+ test("transfer data with 0 row") {
+ new StructStreamingResultSender(baseSender, OneColType, intIterator(1, 0, genExcp = false)).send()
+ // println("type: " + collector.getResultType.toString)
+ assert(collector.getResultType == null)
+ val iter = collector.getResult
+ assert(! iter.hasNext)
+ }
+
+ test("transfer data with 10K rows") {
+ new StructStreamingResultSender(baseSender, OneColType, intIterator(1, 10000, genExcp = false)).send()
+ // println("type: " + collector.getResultType.toString)
+ assert(OneColType.equals(collector.getResultType))
+ val iter = collector.getResult
+ // println(iter.toList.map(list => list.mkString(",")).mkString("; "))
+ (1 to 10000).foreach { i =>
+ assert(iter.hasNext)
+ val o = iter.next()
+ assert(o.size == 1)
+ assert(o(0).asInstanceOf[Int] == i)
+ }
+ assert(! iter.hasNext)
+ }
+
+ test("transfer data with 10K rows with 2 sender") {
+ baseSender = new LocalResultSender(collector, 2)
+ val total = 300
+ val sender1 = Future { new StructStreamingResultSender(baseSender, OneColType, intIterator(1, total/2, genExcp = false), "sender1").send()}
+ val sender2 = Future { new StructStreamingResultSender(baseSender, OneColType, intIterator(total/2+1, total/2, genExcp = false), "sender2").send()}
+ Await.result(sender1, 1.seconds)
+ Await.result(sender2, 1.seconds)
+
+ // println("type: " + collector.getResultType.toString)
+ assert(OneColType.equals(collector.getResultType))
+ val iter = collector.getResult
+ // println(iter.toList.map(list => list.mkString(",")).mkString("; "))
+ val set = scala.collection.mutable.Set[Int]()
+ (1 to total).foreach { i =>
+ assert(iter.hasNext)
+ val o = iter.next()
+ assert(o.size == 1)
+ assert(! set.contains(o(0).asInstanceOf[Int]))
+ set.add(o(0).asInstanceOf[Int])
+ }
+ assert(! iter.hasNext)
+ }
+
+ test("transfer data with 10K rows with 2 sender with error") {
+ baseSender = new LocalResultSender(collector, 2)
+ val total = 1000
+ val sender1 = Future { new StructStreamingResultSender(baseSender, OneColType, intIterator(1, total/2, genExcp = false), "sender1").send()}
+ val sender2 = Future { new StructStreamingResultSender(baseSender, OneColType, intIterator(total/2+1, total/2, genExcp = true), "sender2").send()}
+ Await.result(sender1, 1 seconds)
+ Await.result(sender2, 1 seconds)
+
+ // println("type: " + collector.getResultType.toString)
+ assert(OneColType.equals(collector.getResultType))
+ val iter = collector.getResult
+ // println(iter.toList.map(list => list.mkString(",")).mkString("; "))
+ val set = scala.collection.mutable.Set[Int]()
+ intercept[RuntimeException] {
+ (1 to total).foreach { i =>
+ assert(iter.hasNext)
+ val o = iter.next()
+ assert(o.size == 1)
+ assert(! set.contains(o(0).asInstanceOf[Int]))
+ set.add(o(0).asInstanceOf[Int])
+ }
+ }
+ // println(s"rows received: ${set.size}")
+ }
+
+ test("transfer data with Exception") {
+ new StructStreamingResultSender(baseSender, OneColType, intIterator(1, 200, genExcp = true)).send()
+ // println("type: " + collector.getResultType.toString)
+ val iter = collector.getResult
+ intercept[RuntimeException] ( iter.foreach(_.mkString(",")) )
+ }
+
+ def stringPairIterator(n: Int, genExcp: Boolean): Iterator[Array[Object]] =
+ intIterator(1, n, genExcp).map(x => Array(s"key-${x(0)}", s"value-${x(0)}"))
+
+ test("transfer string pair data with 200 rows") {
+ new StructStreamingResultSender(baseSender, TwoColType, stringPairIterator(1000, genExcp = false)).send()
+ // println("type: " + collector.getResultType.toString)
+ assert(TwoColType.equals(collector.getResultType))
+ val iter = collector.getResult
+ // println(iter.toList.map(list => list.mkString(",")).mkString("; "))
+ (1 to 1000).foreach { i =>
+ assert(iter.hasNext)
+ val o = iter.next()
+ assert(o.size == 2)
+ assert(o(0) == s"key-$i")
+ assert(o(1) == s"value-$i")
+ }
+ assert(! iter.hasNext)
+ }
+
+ /**
+ * Usage notes: There are 3 kinds of data to transfer:
+ * (1) object, (2) byte array of serialized object, and (3) byte array
+ * this test shows how to handle all of them.
+ */
+ test("DataSerializer usage") {
+ val outBuf = new HeapDataOutputStream(1024, null)
+ val inBuf = new ByteArrayDataInput()
+
+ // 1. a regular object
+ val hello = "Hello World!" * 30
+ // serialize the data
+ DataSerializer.writeObject(hello, outBuf)
+ val bytesHello = outBuf.toByteArray.clone()
+ // de-serialize the data
+ inBuf.initialize(bytesHello, Version.CURRENT)
+ val hello2 = DataSerializer.readObject(inBuf).asInstanceOf[Object]
+ assert(hello == hello2)
+
+ // 2. byte array of serialized object
+ // serialize: byte array from `CachedDeserializable`
+ val cd: CachedDeserializable = CachedDeserializableFactory.create(bytesHello)
+ outBuf.reset()
+ DataSerializer.writeByteArray(cd.getSerializedValue, outBuf)
+ // de-serialize the data in 2 steps
+ inBuf.initialize(outBuf.toByteArray.clone(), Version.CURRENT)
+ val bytesHello2: Array[Byte] = DataSerializer.readByteArray(inBuf)
+ inBuf.initialize(bytesHello2, Version.CURRENT)
+ val hello3 = DataSerializer.readObject(inBuf).asInstanceOf[Object]
+ assert(hello == hello3)
+
+ // 3. byte array
+ outBuf.reset()
+ DataSerializer.writeByteArray(bytesHello, outBuf)
+ inBuf.initialize(outBuf.toByteArray.clone(), Version.CURRENT)
+ val bytesHello3: Array[Byte] = DataSerializer.readByteArray(inBuf)
+ assert(bytesHello sameElements bytesHello3)
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/760c6e22/geode-spark-connector/geode-spark-connector/src/test/scala/io/pivotal/geode/spark/connector/internal/oql/QueryParserTest.scala
----------------------------------------------------------------------
diff --git a/geode-spark-connector/geode-spark-connector/src/test/scala/io/pivotal/geode/spark/connector/internal/oql/QueryParserTest.scala b/geode-spark-connector/geode-spark-connector/src/test/scala/io/pivotal/geode/spark/connector/internal/oql/QueryParserTest.scala
new file mode 100644
index 0000000..54394e8
--- /dev/null
+++ b/geode-spark-connector/geode-spark-connector/src/test/scala/io/pivotal/geode/spark/connector/internal/oql/QueryParserTest.scala
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.pivotal.geode.spark.connector.internal.oql
+
+import org.scalatest.FunSuite
+
+class QueryParserTest extends FunSuite {
+
+ test("select * from /r1") {
+ val r = QueryParser.parseOQL("select * from /r1").get
+ assert(r == "List(/r1)")
+ }
+
+ test("select c2 from /r1") {
+ val r = QueryParser.parseOQL("select c2 from /r1").get
+ assert(r == "List(/r1)")
+ }
+
+ test("select key, value from /r1.entries") {
+ val r = QueryParser.parseOQL("select key, value from /r1.entries").get
+ assert(r == "List(/r1.entries)")
+ }
+
+ test("select c1, c2 from /r1 where col1 > 100 and col2 <= 120 or c3 = 2") {
+ val r = QueryParser.parseOQL("select c1, c2 from /r1 where col1 > 100 and col2 <= 120 or c3 = 2").get
+ assert(r == "List(/r1)")
+ }
+
+ test("select * from /r1/r2 where c1 >= 200") {
+ val r = QueryParser.parseOQL("select * from /r1/r2 where c1 >= 200").get
+ assert(r == "List(/r1/r2)")
+ }
+
+ test("import io.pivotal select c1, c2, c3 from /r1/r2, /r3/r4 where c1 <= 15 and c2 = 100") {
+ val r = QueryParser.parseOQL("import io.pivotal select c1, c2, c3 from /r1/r2, /r3/r4 where c1 <= 15 and c2 = 100").get
+ assert(r == "List(/r1/r2, /r3/r4)")
+ }
+
+ test("SELECT distinct f1, f2 FROM /r1/r2 WHere f = 100") {
+ val r = QueryParser.parseOQL("SELECT distinct f1, f2 FROM /r1/r2 WHere f = 100").get
+ assert(r == "List(/r1/r2)")
+ }
+
+ test("IMPORT io.pivotal.geode IMPORT com.mypackage SELECT key,value FROM /root/sub.entries WHERE status = 'active' ORDER BY id desc") {
+ val r = QueryParser.parseOQL("IMPORT io.pivotal.geode IMPORT com.mypackage SELECT key,value FROM /root/sub.entries WHERE status = 'active' ORDER BY id desc").get
+ assert(r == "List(/root/sub.entries)")
+ }
+
+ test("select distinct p.ID, p.status from /region p where p.ID > 5 order by p.status") {
+ val r = QueryParser.parseOQL("select distinct p.ID, p.status from /region p where p.ID > 5 order by p.status").get
+ assert(r == "List(/region)")
+ }
+
+ test("SELECT DISTINCT * FROM /QueryRegion1 r1, /QueryRegion2 r2 WHERE r1.ID = r2.ID") {
+ val r = QueryParser.parseOQL("SELECT DISTINCT * FROM /QueryRegion1 r1, /QueryRegion2 r2 WHERE r1.ID = r2.ID").get
+ assert(r == "List(/QueryRegion1, /QueryRegion2)")
+ }
+
+ test("SELECT id, \"type\", positions, status FROM /obj_obj_region WHERE status = 'active'") {
+ val r = QueryParser.parseOQL("SELECT id, \"type\", positions, status FROM /obj_obj_region WHERE status = 'active'").get
+ println("r.type=" + r.getClass.getName + " r=" + r)
+ assert(r == "List(/obj_obj_region)")
+ }
+
+ test("SELECT r.id, r.\"type\", r.positions, r.status FROM /obj_obj_region r, r.positions.values f WHERE r.status = 'active' and f.secId = 'MSFT'") {
+ val r = QueryParser.parseOQL("SELECT r.id, r.\"type\", r.positions, r.status FROM /obj_obj_region r, r.positions.values f WHERE r.status = 'active' and f.secId = 'MSFT'").get
+ assert(r == "List(/obj_obj_region, r.positions.values)")
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/760c6e22/geode-spark-connector/geode-spark-connector/src/test/scala/unittest/io/pivotal/geode/spark/connector/ConnectorImplicitsTest.scala
----------------------------------------------------------------------
diff --git a/geode-spark-connector/geode-spark-connector/src/test/scala/unittest/io/pivotal/geode/spark/connector/ConnectorImplicitsTest.scala b/geode-spark-connector/geode-spark-connector/src/test/scala/unittest/io/pivotal/geode/spark/connector/ConnectorImplicitsTest.scala
new file mode 100644
index 0000000..b0464cc
--- /dev/null
+++ b/geode-spark-connector/geode-spark-connector/src/test/scala/unittest/io/pivotal/geode/spark/connector/ConnectorImplicitsTest.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package unittest.io.pivotal.geode.spark.connector
+
+import io.pivotal.geode.spark.connector._
+import org.apache.spark.SparkContext
+import org.apache.spark.sql.SQLContext
+import org.scalatest.FunSuite
+import org.scalatest.mock.MockitoSugar
+import org.scalatest.Matchers
+
+class ConnectorImplicitsTest extends FunSuite with Matchers with MockitoSugar {
+
+ test("implicit map2Properties") {
+ verifyProperties(Map.empty)
+ verifyProperties(Map("One" -> "1", "Two" -> "2", "Three" ->"3"))
+ }
+
+ def verifyProperties(map: Map[String, String]): Unit = {
+ val props: java.util.Properties = map
+ assert(props.size() == map.size)
+ map.foreach(p => assert(props.getProperty(p._1) == p._2))
+ }
+
+ test("Test Implicit SparkContext Conversion") {
+ val mockSparkContext = mock[SparkContext]
+ val gfscf: GeodeSparkContextFunctions = mockSparkContext
+ assert(gfscf.isInstanceOf[GeodeSparkContextFunctions])
+ }
+
+ test("Test Implicit SQLContext Conversion") {
+ val mockSQLContext = mock[SQLContext]
+ val gfscf: GeodeSQLContextFunctions = mockSQLContext
+ assert(gfscf.isInstanceOf[GeodeSQLContextFunctions])
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/760c6e22/geode-spark-connector/geode-spark-connector/src/test/scala/unittest/io/pivotal/geode/spark/connector/GeodeConnectionConfTest.scala
----------------------------------------------------------------------
diff --git a/geode-spark-connector/geode-spark-connector/src/test/scala/unittest/io/pivotal/geode/spark/connector/GeodeConnectionConfTest.scala b/geode-spark-connector/geode-spark-connector/src/test/scala/unittest/io/pivotal/geode/spark/connector/GeodeConnectionConfTest.scala
new file mode 100644
index 0000000..a3076f4
--- /dev/null
+++ b/geode-spark-connector/geode-spark-connector/src/test/scala/unittest/io/pivotal/geode/spark/connector/GeodeConnectionConfTest.scala
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package unittest.io.pivotal.geode.spark.connector
+
+import org.apache.spark.SparkConf
+import org.mockito.Mockito._
+import org.scalatest.mock.MockitoSugar
+import org.scalatest.{Matchers, FunSuite}
+import io.pivotal.geode.spark.connector._
+
+class GeodeConnectionConfTest extends FunSuite with Matchers with MockitoSugar {
+
+ test("apply(SparkConf) w/ GeodeLocator property and empty geodeProps") {
+ val (host1, port1) = ("host1", 1234)
+ val (host2, port2) = ("host2", 5678)
+ val conf = new SparkConf().set(GeodeLocatorPropKey, s"$host1[$port1],$host2[$port2]")
+ val connConf = GeodeConnectionConf(conf)
+ assert(connConf.locators == Seq((host1, port1),(host2, port2)))
+ assert(connConf.geodeProps.isEmpty)
+ }
+
+ test("apply(SparkConf) w/ GeodeLocator property and geode properties") {
+ val (host1, port1) = ("host1", 1234)
+ val (host2, port2) = ("host2", 5678)
+ val (propK1, propV1) = ("ack-severe-alert-threshold", "1")
+ val (propK2, propV2) = ("ack-wait-threshold", "10")
+ val conf = new SparkConf().set(GeodeLocatorPropKey, s"$host1[$port1],$host2[$port2]")
+ .set(s"spark.geode.$propK1", propV1).set(s"spark.geode.$propK2", propV2)
+ val connConf = GeodeConnectionConf(conf)
+ assert(connConf.locators == Seq((host1, port1),(host2, port2)))
+ assert(connConf.geodeProps == Map(propK1 -> propV1, propK2 -> propV2))
+ }
+
+ test("apply(SparkConf) w/o GeodeLocator property") {
+ intercept[RuntimeException] { GeodeConnectionConf(new SparkConf()) }
+ }
+
+ test("apply(SparkConf) w/ invalid GeodeLocator property") {
+ val conf = new SparkConf().set(GeodeLocatorPropKey, "local^host:1234")
+ intercept[Exception] { GeodeConnectionConf(conf) }
+ }
+
+ test("apply(locatorStr, geodeProps) w/ valid locatorStr and non geodeProps") {
+ val (host1, port1) = ("host1", 1234)
+ val connConf = GeodeConnectionConf(s"$host1:$port1")
+ assert(connConf.locators == Seq((host1, port1)))
+ assert(connConf.geodeProps.isEmpty)
+ }
+
+ test("apply(locatorStr, geodeProps) w/ valid locatorStr and non-empty geodeProps") {
+ val (host1, port1) = ("host1", 1234)
+ val (host2, port2) = ("host2", 5678)
+ val (propK1, propV1) = ("ack-severe-alert-threshold", "1")
+ val (propK2, propV2) = ("ack-wait-threshold", "10")
+ val props = Map(propK1 -> propV1, propK2 -> propV2)
+ val connConf = GeodeConnectionConf(s"$host1:$port1,$host2:$port2", props)
+ assert(connConf.locators == Seq((host1, port1),(host2, port2)))
+ assert(connConf.geodeProps == props)
+ }
+
+ test("apply(locatorStr, geodeProps) w/ invalid locatorStr") {
+ intercept[Exception] { GeodeConnectionConf("local~host:4321") }
+ }
+
+ test("constructor w/ empty (host,port) pairs") {
+ intercept[IllegalArgumentException] { new GeodeConnectionConf(Seq.empty) }
+ }
+
+ test("getConnection() normal") {
+ implicit val mockFactory = mock[GeodeConnectionManager]
+ val mockConnection = mock[GeodeConnection]
+ when(mockFactory.getConnection(org.mockito.Matchers.any[GeodeConnectionConf])).thenReturn(mockConnection)
+ val connConf = GeodeConnectionConf("localhost:1234")
+ assert(connConf.getConnection == mockConnection)
+ verify(mockFactory).getConnection(connConf)
+ }
+
+ test("getConnection() failure") {
+ implicit val mockFactory = mock[GeodeConnectionManager]
+ when(mockFactory.getConnection(org.mockito.Matchers.any[GeodeConnectionConf])).thenThrow(new RuntimeException)
+ val connConf = GeodeConnectionConf("localhost:1234")
+ intercept[RuntimeException] { connConf.getConnection }
+ verify(mockFactory).getConnection(connConf)
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/760c6e22/geode-spark-connector/geode-spark-connector/src/test/scala/unittest/io/pivotal/geode/spark/connector/GeodeDStreamFunctionsTest.scala
----------------------------------------------------------------------
diff --git a/geode-spark-connector/geode-spark-connector/src/test/scala/unittest/io/pivotal/geode/spark/connector/GeodeDStreamFunctionsTest.scala b/geode-spark-connector/geode-spark-connector/src/test/scala/unittest/io/pivotal/geode/spark/connector/GeodeDStreamFunctionsTest.scala
new file mode 100644
index 0000000..bcba7e1
--- /dev/null
+++ b/geode-spark-connector/geode-spark-connector/src/test/scala/unittest/io/pivotal/geode/spark/connector/GeodeDStreamFunctionsTest.scala
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package unittest.io.pivotal.geode.spark.connector
+
+import com.gemstone.gemfire.cache.Region
+import io.pivotal.geode.spark.connector.{GeodeConnection, GeodeConnectionConf}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.streaming.dstream.DStream
+import org.mockito.Mockito._
+import org.scalatest.mock.MockitoSugar
+import org.scalatest.{Matchers, FunSuite}
+import org.mockito.Matchers.{eq => mockEq, any => mockAny}
+
+import scala.reflect.ClassTag
+
+class GeodeDStreamFunctionsTest extends FunSuite with Matchers with MockitoSugar {
+
+ test("test GeodePairDStreamFunctions Implicit") {
+ import io.pivotal.geode.spark.connector.streaming._
+ val mockDStream = mock[DStream[(Int, String)]]
+ // the implicit make the following line valid
+ val pairDStream: GeodePairDStreamFunctions[Int, String] = mockDStream
+ pairDStream shouldBe a[GeodePairDStreamFunctions[_, _]]
+ }
+
+ test("test GeodeDStreamFunctions Implicit") {
+ import io.pivotal.geode.spark.connector.streaming._
+ val mockDStream = mock[DStream[String]]
+ // the implicit make the following line valid
+ val dstream: GeodeDStreamFunctions[String] = mockDStream
+ dstream shouldBe a[GeodeDStreamFunctions[_]]
+ }
+
+ def createMocks[K, V](regionPath: String)
+ (implicit kt: ClassTag[K], vt: ClassTag[V], m: Manifest[Region[K, V]])
+ : (String, GeodeConnectionConf, GeodeConnection, Region[K, V]) = {
+ val mockConnection = mock[GeodeConnection]
+ val mockConnConf = mock[GeodeConnectionConf]
+ val mockRegion = mock[Region[K, V]]
+ when(mockConnConf.getConnection).thenReturn(mockConnection)
+ when(mockConnConf.locators).thenReturn(Seq.empty)
+ (regionPath, mockConnConf, mockConnection, mockRegion)
+ }
+
+ test("test GeodePairDStreamFunctions.saveToGeode()") {
+ import io.pivotal.geode.spark.connector.streaming._
+ val (regionPath, mockConnConf, mockConnection, mockRegion) = createMocks[String, String]("test")
+ val mockDStream = mock[DStream[(String, String)]]
+ mockDStream.saveToGeode(regionPath, mockConnConf)
+ verify(mockConnConf).getConnection
+ verify(mockConnection).validateRegion[String, String](regionPath)
+ verify(mockDStream).foreachRDD(mockAny[(RDD[(String, String)]) => Unit])
+ }
+
+ test("test GeodeDStreamFunctions.saveToGeode()") {
+ import io.pivotal.geode.spark.connector.streaming._
+ val (regionPath, mockConnConf, mockConnection, mockRegion) = createMocks[String, Int]("test")
+ val mockDStream = mock[DStream[String]]
+ mockDStream.saveToGeode[String, Int](regionPath, (s: String) => (s, s.length), mockConnConf)
+ verify(mockConnConf).getConnection
+ verify(mockConnection).validateRegion[String, String](regionPath)
+ verify(mockDStream).foreachRDD(mockAny[(RDD[String]) => Unit])
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/760c6e22/geode-spark-connector/geode-spark-connector/src/test/scala/unittest/io/pivotal/geode/spark/connector/GeodeRDDFunctionsTest.scala
----------------------------------------------------------------------
diff --git a/geode-spark-connector/geode-spark-connector/src/test/scala/unittest/io/pivotal/geode/spark/connector/GeodeRDDFunctionsTest.scala b/geode-spark-connector/geode-spark-connector/src/test/scala/unittest/io/pivotal/geode/spark/connector/GeodeRDDFunctionsTest.scala
new file mode 100644
index 0000000..96e5f26
--- /dev/null
+++ b/geode-spark-connector/geode-spark-connector/src/test/scala/unittest/io/pivotal/geode/spark/connector/GeodeRDDFunctionsTest.scala
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package unittest.io.pivotal.geode.spark.connector
+
+import com.gemstone.gemfire.cache.Region
+import io.pivotal.geode.spark.connector._
+import io.pivotal.geode.spark.connector.internal.rdd.{GeodeRDDWriter, GeodePairRDDWriter}
+import org.apache.spark.{TaskContext, SparkContext}
+import org.apache.spark.rdd.RDD
+import org.mockito.Mockito._
+import org.scalatest.mock.MockitoSugar
+import org.scalatest.{FunSuite, Matchers}
+import collection.JavaConversions._
+import scala.reflect.ClassTag
+import org.mockito.Matchers.{eq => mockEq, any => mockAny}
+
+class GeodeRDDFunctionsTest extends FunSuite with Matchers with MockitoSugar {
+
+ test("test PairRDDFunction Implicit") {
+ import io.pivotal.geode.spark.connector._
+ val mockRDD = mock[RDD[(Int, String)]]
+ // the implicit make the following line valid
+ val pairRDD: GeodePairRDDFunctions[Int, String] = mockRDD
+ pairRDD shouldBe a [GeodePairRDDFunctions[_, _]]
+ }
+
+ test("test RDDFunction Implicit") {
+ import io.pivotal.geode.spark.connector._
+ val mockRDD = mock[RDD[String]]
+ // the implicit make the following line valid
+ val nonPairRDD: GeodeRDDFunctions[String] = mockRDD
+ nonPairRDD shouldBe a [GeodeRDDFunctions[_]]
+ }
+
+ def createMocks[K, V](regionPath: String)
+ (implicit kt: ClassTag[K], vt: ClassTag[V], m: Manifest[Region[K, V]]): (String, GeodeConnectionConf, GeodeConnection, Region[K, V]) = {
+ val mockConnection = mock[GeodeConnection]
+ val mockConnConf = mock[GeodeConnectionConf]
+ val mockRegion = mock[Region[K, V]]
+ when(mockConnConf.getConnection).thenReturn(mockConnection)
+ when(mockConnection.getRegionProxy[K, V](regionPath)).thenReturn(mockRegion)
+ // mockRegion shouldEqual mockConn.getRegionProxy[K, V](regionPath)
+ (regionPath, mockConnConf, mockConnection, mockRegion)
+ }
+
+ test("test GeodePairRDDWriter") {
+ val (regionPath, mockConnConf, mockConnection, mockRegion) = createMocks[String, String]("test")
+ val writer = new GeodePairRDDWriter[String, String](regionPath, mockConnConf)
+ val data = List(("1", "one"), ("2", "two"), ("3", "three"))
+ writer.write(null, data.toIterator)
+ val expectedMap: Map[String, String] = data.toMap
+ verify(mockRegion).putAll(expectedMap)
+ }
+
+ test("test GeodeNonPairRDDWriter") {
+ val (regionPath, mockConnConf, mockConnection, mockRegion) = createMocks[Int, String]("test")
+ val writer = new GeodeRDDWriter[String, Int, String](regionPath, mockConnConf)
+ val data = List("a", "ab", "abc")
+ val f: String => (Int, String) = s => (s.length, s)
+ writer.write(f)(null, data.toIterator)
+ val expectedMap: Map[Int, String] = data.map(f).toMap
+ verify(mockRegion).putAll(expectedMap)
+ }
+
+ test("test PairRDDFunctions.saveToGeode") {
+ verifyPairRDDFunction(useOpConf = false)
+ }
+
+ test("test PairRDDFunctions.saveToGeode w/ opConf") {
+ verifyPairRDDFunction(useOpConf = true)
+ }
+
+ def verifyPairRDDFunction(useOpConf: Boolean): Unit = {
+ import io.pivotal.geode.spark.connector._
+ val (regionPath, mockConnConf, mockConnection, mockRegion) = createMocks[String, String]("test")
+ val mockRDD = mock[RDD[(String, String)]]
+ val mockSparkContext = mock[SparkContext]
+ when(mockRDD.sparkContext).thenReturn(mockSparkContext)
+ val result =
+ if (useOpConf)
+ mockRDD.saveToGeode(regionPath, mockConnConf, Map(RDDSaveBatchSizePropKey -> "5000"))
+ else
+ mockRDD.saveToGeode(regionPath, mockConnConf)
+ verify(mockConnection, times(1)).validateRegion[String, String](regionPath)
+ result === Unit
+ verify(mockSparkContext, times(1)).runJob[(String, String), Unit](
+ mockEq(mockRDD), mockAny[(TaskContext, Iterator[(String, String)]) => Unit])(mockAny(classOf[ClassTag[Unit]]))
+
+ // Note: current implementation make following code not compilable
+ // so not negative test for this case
+ // val rdd: RDD[(K, V)] = ...
+ // rdd.saveToGeode(regionPath, s => (s.length, s))
+ }
+
+ test("test RDDFunctions.saveToGeode") {
+ verifyRDDFunction(useOpConf = false)
+ }
+
+ test("test RDDFunctions.saveToGeode w/ opConf") {
+ verifyRDDFunction(useOpConf = true)
+ }
+
+ def verifyRDDFunction(useOpConf: Boolean): Unit = {
+ import io.pivotal.geode.spark.connector._
+ val (regionPath, mockConnConf, mockConnection, mockRegion) = createMocks[Int, String]("test")
+ val mockRDD = mock[RDD[(String)]]
+ val mockSparkContext = mock[SparkContext]
+ when(mockRDD.sparkContext).thenReturn(mockSparkContext)
+ val result =
+ if (useOpConf)
+ mockRDD.saveToGeode(regionPath, s => (s.length, s), mockConnConf, Map(RDDSaveBatchSizePropKey -> "5000"))
+ else
+ mockRDD.saveToGeode(regionPath, s => (s.length, s), mockConnConf)
+ verify(mockConnection, times(1)).validateRegion[Int, String](regionPath)
+ result === Unit
+ verify(mockSparkContext, times(1)).runJob[String, Unit](
+ mockEq(mockRDD), mockAny[(TaskContext, Iterator[String]) => Unit])(mockAny(classOf[ClassTag[Unit]]))
+
+ // Note: current implementation make following code not compilable
+ // so not negative test for this case
+ // val rdd: RDD[T] = ... // T is not a (K, V) tuple
+ // rdd.saveToGeode(regionPath)
+ }
+
+}