You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@labs.apache.org by at...@apache.org on 2015/09/01 12:50:29 UTC

svn commit: r1700483 - in /labs: cms/trunk/content/events.xml cms/trunk/content/labs.html gravity/ gravity/doap.rdf gravity/src/ gravity/src/interval_partitioner.cpp gravity/src/reservoirsamp.cpp

Author: atri
Date: Tue Sep  1 10:50:29 2015
New Revision: 1700483

URL: http://svn.apache.org/r1700483
Log:
Added Gravity to Apache Labs

Added:
    labs/gravity/
    labs/gravity/doap.rdf
    labs/gravity/src/
    labs/gravity/src/interval_partitioner.cpp
    labs/gravity/src/reservoirsamp.cpp
Modified:
    labs/cms/trunk/content/events.xml
    labs/cms/trunk/content/labs.html

Modified: labs/cms/trunk/content/events.xml
URL: http://svn.apache.org/viewvc/labs/cms/trunk/content/events.xml?rev=1700483&r1=1700482&r2=1700483&view=diff
==============================================================================
--- labs/cms/trunk/content/events.xml (original)
+++ labs/cms/trunk/content/events.xml Tue Sep  1 10:50:29 2015
@@ -172,4 +172,8 @@
     <event start="Wed, 28 May 2014 13:00:36 GMT" title="Flexicon Lab Created">
       Flexicon Lab created with Gabriela Gibson as its PI.
     </event>
+
+    <event start="Wed, 1 September 2015 16:16:00 IST" title="Gravity Lab Created">
+      Gravity lab created with Atri Sharma as its PI.
+    </event>
 </data>

Modified: labs/cms/trunk/content/labs.html
URL: http://svn.apache.org/viewvc/labs/cms/trunk/content/labs.html?rev=1700483&r1=1700482&r2=1700483&view=diff
==============================================================================
--- labs/cms/trunk/content/labs.html (original)
+++ labs/cms/trunk/content/labs.html Tue Sep  1 10:50:29 2015
@@ -123,6 +123,11 @@
 	  Fluid extends Java Persistence Architetcure (JPA) for Service Data Objects (SDO).
 	  </li>
 
+          <li><a href="http://svn.apache.org/repos/asf/labs/gravity/" title="gravity">Gravity</a> - 
+	  Gravity is a high performance distributed sorting system with features like algorithms to
+          make resorting on different keys or adding satellite keys cheaper.
+	  </li>
+
       <li>
           <a href="http://svn.apache.org/repos/asf/labs/jaxmas/" title="JaxMas">JaxMas</a>
           - a poor mans implementation of JAXR, the Java API for XML registries, based

Added: labs/gravity/doap.rdf
URL: http://svn.apache.org/viewvc/labs/gravity/doap.rdf?rev=1700483&view=auto
==============================================================================
--- labs/gravity/doap.rdf (added)
+++ labs/gravity/doap.rdf Tue Sep  1 10:50:29 2015
@@ -0,0 +1,33 @@
+?xml version="1.0" encoding="UTF-8"?>
+<rdf:RDF
+   xmlns="http://usefulinc.com/ns/doap#"
+   xmlns:foaf="http://xmlns.com/foaf/0.1/"
+   xmlns:labs="http://labs.apache.org/doap-ext/1.0#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+>
+  <rdf:Description rdf:about="http://people.apache.org/~atri/#me">
+    <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Person"/>
+    <foaf:mbox_sha1sum>b6e3794ae148a78d1f72a017e997490fdc298e99</foaf:mbox_sha1sum>
+    <foaf:homepage rdf:resource="http://people.apache.org/~atri/"/>
+    <foaf:name>Atri Sharma</foaf:name>
+  </rdf:Description>
+  <rdf:Description rdf:about="http://labs.apache.org/labs#gravity">
+    <homepage rdf:resource="http://labs.apache.org/gravity/"/>
+    <labs:status>active</labs:status>
+    <created>2015-8-28</created>
+    <license rdf:resource="http://usefulinc.com/doap/licenses/asl20"/>
+    <rdf:type rdf:resource="http://usefulinc.com/ns/doap#Project"/>
+    <description xml:lang="en">Gravity is a very high performance distributed sorting engine aimed at sorting large data in minimal time. The project uses novel ideas and algorithms and is an outcome of years of wanting a dedicated light weight distributed sorting engine for big data. Gravity aims at becoming de facto for distributed sorting for big data, with necessary statistics to estimate which algorithm to be used, indexes on potential keys to help resort on different keys etc</description>
+    <shortname>gravity</shortname>
+    <programming-language>C++</programming-language>
+    <shortdesc xml:lang="en">A high performance distributed sorting engine</shortdesc>
+    <name>Gravity</name>
+    <maintainer rdf:resource="http://people.apache.org/~atri/#me"/>
+    <repository rdf:nodeID="N644ba76339b94d9c8a56522284284c7f"/>
+  </rdf:Description>
+  <rdf:Description rdf:nodeID="N644ba76339b94d9c8a56522284284c7f">
+    <browse rdf:resource="http://svn.apache.org/viewvc/labs/gravity/"/>
+    <rdf:type rdf:resource="http://usefulinc.com/ns/doap#SVNRepository"/>
+    <location rdf:resource="http://svn.apache.org/repos/asf/labs/gravity/"/>
+  </rdf:Description>
+</rdf:RDF>

Added: labs/gravity/src/interval_partitioner.cpp
URL: http://svn.apache.org/viewvc/labs/gravity/src/interval_partitioner.cpp?rev=1700483&view=auto
==============================================================================
--- labs/gravity/src/interval_partitioner.cpp (added)
+++ labs/gravity/src/interval_partitioner.cpp Tue Sep  1 10:50:29 2015
@@ -0,0 +1,317 @@
+#include <iostream>
+#include <string>
+
+using namespace std;
+/* Node class for Interval tree */
+class IntervalTreeNode
+{
+	string lo;
+	string hi;
+	IntervalTreeNode *left;
+	IntervalTreeNode *right;
+	string max_subtree_hi_val;
+	bool lo_included;
+	bool hi_included;
+public:
+	string getlo()
+	{
+		return lo;
+	}
+	
+	string gethi()
+	{
+		return hi;
+	}
+	
+	IntervalTreeNode* getleft()
+	{
+		return left;
+	}
+	
+	IntervalTreeNode* getright()
+	{
+		return right;
+	}
+
+        IntervalTreeNode* setleft(IntervalTreeNode *value)
+        {
+	  left = value;
+	}
+
+        IntervalTreeNode* setRight(IntervalTreeNode *value)
+        {
+	  right = value;
+	}
+	
+	string getmaxhi()
+	{
+		return max_subtree_hi_val;
+	}
+	
+	IntervalTreeNode(string lo, string hi, bool lo_included, bool hi_included)
+	{
+		this->lo = lo;
+		this->hi = hi;
+		this->lo_included = lo_included;
+		this->hi_included = hi_included;
+		
+		left = NULL;
+		right = NULL;
+	}
+
+	IntervalTreeNode* search(string key, bool &isFound);
+	void insertInterval(string lo, string hi, bool lo_included, bool hi_included);
+        bool searchInterval(string lo, string hi);
+};
+
+class IntervalTree
+{
+public:
+        IntervalTreeNode *root;
+
+        IntervalTree()
+        {
+	  root = NULL;
+	}
+
+	bool isKeyPresent(string key);
+	void addInterval(string lo, string hi, bool lo_included, bool hi_included);
+	void deleteInterval(string lo, string hi);
+        void deleteNode(string lo);
+};
+
+/* Search if given interval overlaps with the interval in the current node */
+bool IntervalTreeNode::searchInterval(string lo, string hi)
+{
+
+  if (lo <= this->hi && this->lo <= hi)
+        return true;
+
+  return false;
+}
+
+/* Node deletion with given key */
+void IntervalTree::deleteNode(string deletelo)
+{
+  IntervalTreeNode *traverse = root;
+  IntervalTreeNode *parent_pointer = NULL;
+
+  if (root == NULL)
+    return;
+
+  while (traverse != NULL && (traverse->getlo()) != deletelo)
+  {
+    parent_pointer = traverse;
+
+    if ((traverse->getlo()) > deletelo)
+    {
+      traverse = traverse->getleft();
+    }
+    else
+    {
+      traverse = traverse->getright();
+    }
+
+  }
+
+  /* Check if the key wasnt found in the tree */
+  if (traverse == NULL)
+    return;
+
+  /* Check if only a left child exists. */
+  if ((traverse->getright()) == NULL)
+  {
+    if ((parent_pointer->getleft()) == traverse)
+    {
+      parent_pointer->setleft(traverse->getleft());
+    }
+    else
+    {
+      parent_pointer->setright(traverse->getleft());
+
+    }
+
+
+    
+	
+
+IntervalTreeNode* IntervalTreeNode::search(string key, bool &isFound)
+{
+
+	//pthread_mutex_lock( &mutex_node );
+	isFound = false;
+
+	if (lo_included && key == lo)
+	{
+		isFound = true;
+		//pthread_mutex_unlock( &mutex_node );
+		return this;
+	}
+
+	if (hi_included && key == hi)
+	{
+		isFound = true;
+		//pthread_mutex_unlock( &mutex_node );
+		return this;
+	}
+
+
+	if (lo < key && hi > key)
+	{
+		isFound = true;
+		//pthread_mutex_unlock( &mutex_node );
+		return this;
+	}
+
+	if (left != NULL && (left->getmaxhi()) > key)
+	{
+		//pthread_mutex_unlock( &mutex_node );
+		return left;
+	}
+
+	//pthread_mutex_unlock( &mutex_node );
+	return right;
+}
+bool IntervalTree::isKeyPresent(string key)
+{
+	IntervalTreeNode *traverse = root;
+	IntervalTreeNode *temp = NULL;
+	bool isFound = false;
+
+	/* While the value is not found and we still have nodes to search */
+	while (traverse != NULL && isFound != true)
+	{
+		temp = traverse->search(key, isFound);
+		traverse = temp;
+	}
+
+	return isFound;
+}
+
+void IntervalTreeNode::insertInterval(string insertlo, string inserthi, bool lo_included, bool hi_included)
+{
+	IntervalTreeNode *new_node = NULL;
+	
+	cout<<"in insertInterval"<<" "<<insertlo<<" "<<lo<<endl;
+
+	/* Since the BST is built only on the lo values of the intervals as key, we shall only take lo values into account here */
+	if (insertlo == lo) /* Key already exists in interval tree */
+	{
+		cout<<"case1"<<endl;
+		/* need to check if hi value needs to updated for the current interval */
+		if (inserthi > hi)
+		{
+		        cout<<"value updated1"<<endl;
+			hi = inserthi;
+			cout<<"value after updating1"<<" "<<hi<<endl;
+
+		}
+	}
+	else if (insertlo < lo)
+	{
+		cout<<"case2"<<endl;
+		/* Insertion in left subtree */
+		if (left == NULL)
+		{
+			new_node = new IntervalTreeNode(insertlo, inserthi, lo_included, hi_included);
+			left = new_node;
+		}
+		else
+		{
+			left->insertInterval(insertlo, inserthi, lo_included, hi_included);
+		}
+	}
+	else if(insertlo > lo)
+	{
+		cout<<"case3"<<endl;
+		/* Insertion in right subtree */
+		if (right == NULL)
+		{
+			new_node = new IntervalTreeNode(insertlo, inserthi, lo_included, hi_included);
+			right = new_node;
+		}
+		else
+		{
+			right->insertInterval(insertlo, inserthi, lo_included, hi_included);
+		}
+	}
+
+	/* Check if max hi val needs to be updated for the current node */
+	if (inserthi > max_subtree_hi_val)
+	{
+		max_subtree_hi_val = inserthi;
+	}
+
+}	
+
+	 
+void IntervalTree::addInterval(string insertlo, string inserthi, bool lo_included, bool hi_included)
+{
+	IntervalTreeNode *traverse = root;
+	bool isInserted = false;
+	IntervalTreeNode *new_node = NULL;
+
+	if (inserthi <= insertlo)
+	{
+		cout<<"Erratical range"<<endl;
+		return;
+	}
+
+	if (root == NULL)
+	{
+		new_node = new IntervalTreeNode(insertlo, inserthi, lo_included, hi_included);
+		root = new_node;
+		return;
+	}
+
+	cout<<"inserting2"<<" "<<insertlo<<endl;
+	root->insertInterval(insertlo, inserthi, lo_included, hi_included);
+}
+
+int main()
+{
+	IntervalTree tree1;
+	bool return_val = false;
+	
+	tree1.addInterval("AAA", "BaB", true, true);
+	
+	return_val = tree1.isKeyPresent("Aaab");
+	
+	if (return_val == true)
+	{
+		cout<<"true1"<<endl;
+	}
+	else
+	{
+		cout<<"false1"<<endl;
+	}
+	
+	//tree1.addInterval("DD", "Df", true, true);
+	tree1.addInterval("BBB", "DDD", true, true);
+	
+	return_val = tree1.isKeyPresent("CD");
+	
+	if (return_val == true)
+	{
+		cout<<"true2"<<endl;
+	}
+	else
+	{
+		cout<<"false2"<<endl;
+	}
+
+	return_val = tree1.root->searchInterval("EEE", "FFF");
+
+	if (return_val == true)
+	{
+		cout<<"true3"<<endl;
+	}
+	else
+	{
+		cout<<"false3"<<endl;
+	}
+
+	//tree1.addInterval("AAA","CCC", true, true);
+
+	cout<<((tree1.root)->getmaxhi())<<endl;
+}

Added: labs/gravity/src/reservoirsamp.cpp
URL: http://svn.apache.org/viewvc/labs/gravity/src/reservoirsamp.cpp?rev=1700483&view=auto
==============================================================================
--- labs/gravity/src/reservoirsamp.cpp (added)
+++ labs/gravity/src/reservoirsamp.cpp Tue Sep  1 10:50:29 2015
@@ -0,0 +1,45 @@
+#include <iostream>
+using namespace std;
+ 
+int reservoirSample(int sample, int* samples, int size, int count)
+{
+  if(count < size)
+    samples[count] = sample;
+  else
+    if((rand()%count) < size)
+      samples[rand()%size] = sample;
+   
+  return ++count;
+}
+ 
+#define SIZE 10
+// #define STREAM_AVERAGE 300
+#define STREAM_AVERAGE 10
+int main()
+{
+  int count = 0;
+  int samples[SIZE];
+  int sample;
+  int i = 0;
+ 
+  srand(time(NULL));
+ 
+  cout << "Sample Stream: " << endl;
+  while(
+        (count < SIZE) ||
+        (rand()%STREAM_AVERAGE > 0)
+        )
+    {
+      sample = rand()%1000;
+      cout << sample << " ";
+ 
+      count = reservoirSample(sample, samples, SIZE, count);
+    }
+  cout << endl;
+  cout << "Total samples: " << count << endl;
+ 
+  cout << "Output samples: " << endl;
+  for(i = 0;i < SIZE;i++)
+    cout << samples[i] << " ";
+  cout << endl;
+}



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@labs.apache.org
For additional commands, e-mail: commits-help@labs.apache.org