You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spot.apache.org by na...@apache.org on 2018/02/09 22:50:49 UTC

[01/11] incubator-spot git commit: added some docstrings

Repository: incubator-spot
Updated Branches:
  refs/heads/master 9215d8168 -> f722127ca


added some docstrings


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/a99404b0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/a99404b0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/a99404b0

Branch: refs/heads/master
Commit: a99404b05045087bfd02d99f4764df1738959566
Parents: 9215d81
Author: tpltnt <tp...@dropcut.net>
Authored: Thu Jan 25 11:28:26 2018 +0100
Committer: tpltnt <tp...@dropcut.net>
Committed: Thu Jan 25 11:28:26 2018 +0100

----------------------------------------------------------------------
 spot-ingest/pipelines/proxy/bluecoat.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/a99404b0/spot-ingest/pipelines/proxy/bluecoat.py
----------------------------------------------------------------------
diff --git a/spot-ingest/pipelines/proxy/bluecoat.py b/spot-ingest/pipelines/proxy/bluecoat.py
index 31d89ca..1fe02a2 100644
--- a/spot-ingest/pipelines/proxy/bluecoat.py
+++ b/spot-ingest/pipelines/proxy/bluecoat.py
@@ -62,7 +62,10 @@ proxy_schema = StructType([
                                     StructField("h", StringType(), True)])
 
 def main():
-    
+    """
+    Handle commandline arguments and
+    start the collector.
+    """
     # input Parameters
     parser = argparse.ArgumentParser(description="Bluecoat Parser")
     parser.add_argument('-zk','--zookeeper',dest='zk',required=True,help='Zookeeper IP and port (i.e. 10.0.0.1:2181)',metavar='')
@@ -83,7 +86,12 @@ def spot_decoder(s):
     return s
 
 def split_log_entry(line):
+    """
+    Split the given line into its fields.
 
+    :param line: line to split
+    :returns: list
+    """
     lex = shlex.shlex(line)
     lex.quotes = '"'
     lex.whitespace_split = True
@@ -91,7 +99,12 @@ def split_log_entry(line):
     return list(lex)
 
 def proxy_parser(proxy_fields):
-    
+    """
+    Parse and normalize data.
+
+    :param proxy_fields: list with fields from log
+    :returns: list of str
+    """
     proxy_parsed_data = []
 
     if len(proxy_fields) > 1:
@@ -114,7 +127,9 @@ def proxy_parser(proxy_fields):
 
 
 def save_data(rdd,sqc,db,db_table,topic):
-
+    """
+    Create and save a data frame with the given data.
+    """
     if not rdd.isEmpty():
 
         df = sqc.createDataFrame(rdd,proxy_schema)        


[11/11] incubator-spot git commit: corrected data insertion

Posted by na...@apache.org.
corrected data insertion


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/f722127c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/f722127c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/f722127c

Branch: refs/heads/master
Commit: f722127ca410a88355cc3f6d952739845c7499ce
Parents: a166efc
Author: tpltnt <tp...@dropcut.net>
Authored: Mon Feb 5 19:05:13 2018 +0100
Committer: tpltnt <tp...@dropcut.net>
Committed: Mon Feb 5 19:05:13 2018 +0100

----------------------------------------------------------------------
 spot-ingest/pipelines/proxy/bluecoat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/f722127c/spot-ingest/pipelines/proxy/bluecoat.py
----------------------------------------------------------------------
diff --git a/spot-ingest/pipelines/proxy/bluecoat.py b/spot-ingest/pipelines/proxy/bluecoat.py
index 01b9922..d476733 100644
--- a/spot-ingest/pipelines/proxy/bluecoat.py
+++ b/spot-ingest/pipelines/proxy/bluecoat.py
@@ -168,7 +168,7 @@ def save_data(rdd, sqc, db, db_table, topic):
         sqc.setConf("hive.exec.dynamic.partition", "true")
         sqc.setConf("hive.exec.dynamic.partition.mode", "nonstrict")
         hive_table = "{0}.{1}".format(db, db_table)
-        df.write.format("parquet").mode("append").insertInto(hive_table)
+        df.write.format("parquet").mode("append").partitionBy('y', 'm', 'd', 'h').insertInto(hive_table)
 
     else:
         print("------------------------LISTENING KAFKA TOPIC:{0}------------------------".format(topic))


[08/11] incubator-spot git commit: fixed bluecoat_parse()

Posted by na...@apache.org.
fixed bluecoat_parse()


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/6b79abbb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/6b79abbb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/6b79abbb

Branch: refs/heads/master
Commit: 6b79abbb079d99d283664382fba131864049f1fa
Parents: 2ea6b4e
Author: tpltnt <tp...@dropcut.net>
Authored: Thu Jan 25 12:40:38 2018 +0100
Committer: tpltnt <tp...@dropcut.net>
Committed: Thu Jan 25 12:42:22 2018 +0100

----------------------------------------------------------------------
 spot-ingest/pipelines/proxy/bluecoat.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/6b79abbb/spot-ingest/pipelines/proxy/bluecoat.py
----------------------------------------------------------------------
diff --git a/spot-ingest/pipelines/proxy/bluecoat.py b/spot-ingest/pipelines/proxy/bluecoat.py
index 597c13c..2f5da0d 100644
--- a/spot-ingest/pipelines/proxy/bluecoat.py
+++ b/spot-ingest/pipelines/proxy/bluecoat.py
@@ -170,21 +170,30 @@ def save_data(rdd, sqc, db, db_table, topic):
         print("------------------------LISTENING KAFKA TOPIC:{0}------------------------".format(topic))
 
 
-def bluecoat_parse(zk,topic,db,db_table,num_of_workers,batch_size):
-    
+def bluecoat_parse(zk, topic, db, db_table, num_of_workers, batch_size):
+    """
+    Parse and save bluecoat logs.
+
+    :param zk: Apache ZooKeeper quorum
+    :param topic: Apache Kafka topic (application name)
+    :param db: Apache Hive database to save into
+    :param db_table: table of `db` to save into
+    :param num_of_workers: number of Apache Kafka workers
+    :param batch_size: batch size for Apache Spark streaming context
+    """
     app_name = topic
     wrks = int(num_of_workers)
 
     # create spark context
     sc = SparkContext(appName=app_name)
-    ssc = StreamingContext(sc,int(batch_size))
+    ssc = StreamingContext(sc, int(batch_size))
     sqc = HiveContext(sc)
 
     tp_stream = KafkaUtils.createStream(ssc, zk, app_name, {topic: wrks}, keyDecoder=spot_decoder, valueDecoder=spot_decoder)
 
-    proxy_data = tp_stream.map(lambda row: row[1]).flatMap(lambda row: row.split("\n")).filter(lambda row: rex_date.match(row)).map(lambda row: row.strip("\n").strip("\r").replace("\t", " ").replace("  ", " ")).map(lambda row:  split_log_entry(row)).map(lambda row: proxy_parser(row))
-    saved_data = proxy_data.foreachRDD(lambda row: save_data(row,sqc,db,db_table,topic))
-    ssc.start();
+    proxy_data = tp_stream.map(lambda row: row[1]).flatMap(lambda row: row.split("\n")).filter(lambda row: rex_date.match(row)).map(lambda row: row.strip("\n").strip("\r").replace("\t", " ").replace("  ", " ")).map(lambda row: split_log_entry(row)).map(lambda row: proxy_parser(row))
+    saved_data = proxy_data.foreachRDD(lambda row: save_data(row, sqc, db, db_table, topic))
+    ssc.start()
     ssc.awaitTermination()
 
 


[06/11] incubator-spot git commit: PEP8 proxy_parser()

Posted by na...@apache.org.
PEP8 proxy_parser()


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/b9befd7b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/b9befd7b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/b9befd7b

Branch: refs/heads/master
Commit: b9befd7b0a882a327c31a691a57c07a86a64ff31
Parents: 8ff0e47
Author: tpltnt <tp...@dropcut.net>
Authored: Thu Jan 25 11:45:07 2018 +0100
Committer: tpltnt <tp...@dropcut.net>
Committed: Thu Jan 25 11:45:07 2018 +0100

----------------------------------------------------------------------
 spot-ingest/pipelines/proxy/bluecoat.py | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/b9befd7b/spot-ingest/pipelines/proxy/bluecoat.py
----------------------------------------------------------------------
diff --git a/spot-ingest/pipelines/proxy/bluecoat.py b/spot-ingest/pipelines/proxy/bluecoat.py
index 5667204..c2ddb04 100644
--- a/spot-ingest/pipelines/proxy/bluecoat.py
+++ b/spot-ingest/pipelines/proxy/bluecoat.py
@@ -118,25 +118,32 @@ def proxy_parser(proxy_fields):
     Parse and normalize data.
 
     :param proxy_fields: list with fields from log
-    :returns: list of str
+    :returns: list
     """
     proxy_parsed_data = []
 
     if len(proxy_fields) > 1:
 
         # create full URI.
-        proxy_uri_path =  proxy_fields[17] if  len(proxy_fields[17]) > 1 else ""
-        proxy_uri_qry =  proxy_fields[18] if  len(proxy_fields[18]) > 1 else ""
-        full_uri= "{0}{1}{2}".format(proxy_fields[15],proxy_uri_path,proxy_uri_qry)
+        proxy_uri_path = proxy_fields[17] if len(proxy_fields[17]) > 1 else ""
+        proxy_uri_qry = proxy_fields[18] if len(proxy_fields[18]) > 1 else ""
+        full_uri = "{0}{1}{2}".format(proxy_fields[15], proxy_uri_path, proxy_uri_qry)
         date = proxy_fields[0].split('-')
-        year =  date[0]
+        year = date[0]
         month = date[1].zfill(2)
         day = date[2].zfill(2)
         hour = proxy_fields[1].split(":")[0].zfill(2)
-        # re-order fields. 
-        proxy_parsed_data = [proxy_fields[0],proxy_fields[1],proxy_fields[3],proxy_fields[15],proxy_fields[12],proxy_fields[20],proxy_fields[13],int(proxy_fields[2]),proxy_fields[4],
-        proxy_fields[5],proxy_fields[6],proxy_fields[7],proxy_fields[8],proxy_fields[9],proxy_fields[10],proxy_fields[11],proxy_fields[14],proxy_fields[16],proxy_fields[17],proxy_fields[18],
-        proxy_fields[19],proxy_fields[21],int(proxy_fields[22]),int(proxy_fields[23]),proxy_fields[24],proxy_fields[25],proxy_fields[26],full_uri,year,month,day,hour ]
+        # re-order fields.
+        proxy_parsed_data = [proxy_fields[0], proxy_fields[1], proxy_fields[3],
+                             proxy_fields[15], proxy_fields[12], proxy_fields[20],
+                             proxy_fields[13], int(proxy_fields[2]), proxy_fields[4],
+                             proxy_fields[5], proxy_fields[6], proxy_fields[7],
+                             proxy_fields[8], proxy_fields[9], proxy_fields[10],
+                             proxy_fields[11], proxy_fields[14], proxy_fields[16],
+                             proxy_fields[17], proxy_fields[18], proxy_fields[19],
+                             proxy_fields[21], int(proxy_fields[22]), int(proxy_fields[23]),
+                             proxy_fields[24], proxy_fields[25], proxy_fields[26],
+                             full_uri, year, month, day, hour]
 
     return proxy_parsed_data
 


[02/11] incubator-spot git commit: fixed identation

Posted by na...@apache.org.
fixed identation


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/df86326b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/df86326b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/df86326b

Branch: refs/heads/master
Commit: df86326bf991a4f49c57e8aeeb4af0d8b059b21b
Parents: a99404b
Author: tpltnt <tp...@dropcut.net>
Authored: Thu Jan 25 11:30:12 2018 +0100
Committer: tpltnt <tp...@dropcut.net>
Committed: Thu Jan 25 11:30:12 2018 +0100

----------------------------------------------------------------------
 spot-ingest/pipelines/proxy/bluecoat.py | 64 ++++++++++++++--------------
 1 file changed, 32 insertions(+), 32 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/df86326b/spot-ingest/pipelines/proxy/bluecoat.py
----------------------------------------------------------------------
diff --git a/spot-ingest/pipelines/proxy/bluecoat.py b/spot-ingest/pipelines/proxy/bluecoat.py
index 1fe02a2..5e36a4e 100644
--- a/spot-ingest/pipelines/proxy/bluecoat.py
+++ b/spot-ingest/pipelines/proxy/bluecoat.py
@@ -28,38 +28,38 @@ from pyspark.sql.types import *
 rex_date = re.compile("\d{4}-\d{2}-\d{2}")
 
 proxy_schema = StructType([
-                                    StructField("p_date", StringType(), True),
-                                    StructField("p_time", StringType(), True),
-                                    StructField("clientip", StringType(), True),
-                                    StructField("host", StringType(), True),
-                                    StructField("reqmethod", StringType(), True),
-                                    StructField("useragent", StringType(), True),
-                                    StructField("resconttype", StringType(), True),
-                                    StructField("duration", IntegerType(), True),
-                                    StructField("username", StringType(), True),
-                                    StructField("authgroup", StringType(), True),
-                                    StructField("exceptionid", StringType(), True),
-                                    StructField("filterresult", StringType(), True),
-                                    StructField("webcat", StringType(), True),
-                                    StructField("referer", StringType(), True),
-                                    StructField("respcode", StringType(), True),
-                                    StructField("action", StringType(), True),
-                                    StructField("urischeme", StringType(), True),
-                                    StructField("uriport", StringType(), True),
-                                    StructField("uripath", StringType(), True),
-                                    StructField("uriquery", StringType(), True),
-                                    StructField("uriextension", StringType(), True),
-                                    StructField("serverip", StringType(), True),
-                                    StructField("scbytes", IntegerType(), True),
-                                    StructField("csbytes", IntegerType(), True),
-                                    StructField("virusid", StringType(), True),
-                                    StructField("bcappname", StringType(), True),
-                                    StructField("bcappoper", StringType(), True),
-                                    StructField("fulluri", StringType(), True),
-                                    StructField("y", StringType(), True),
-                                    StructField("m", StringType(), True),
-                                    StructField("d", StringType(), True),
-                                    StructField("h", StringType(), True)])
+    StructField("p_date", StringType(), True),
+    StructField("p_time", StringType(), True),
+    StructField("clientip", StringType(), True),
+    StructField("host", StringType(), True),
+    StructField("reqmethod", StringType(), True),
+    StructField("useragent", StringType(), True),
+    StructField("resconttype", StringType(), True),
+    StructField("duration", IntegerType(), True),
+    StructField("username", StringType(), True),
+    StructField("authgroup", StringType(), True),
+    StructField("exceptionid", StringType(), True),
+    StructField("filterresult", StringType(), True),
+    StructField("webcat", StringType(), True),
+    StructField("referer", StringType(), True),
+    StructField("respcode", StringType(), True),
+    StructField("action", StringType(), True),
+    StructField("urischeme", StringType(), True),
+    StructField("uriport", StringType(), True),
+    StructField("uripath", StringType(), True),
+    StructField("uriquery", StringType(), True),
+    StructField("uriextension", StringType(), True),
+    StructField("serverip", StringType(), True),
+    StructField("scbytes", IntegerType(), True),
+    StructField("csbytes", IntegerType(), True),
+    StructField("virusid", StringType(), True),
+    StructField("bcappname", StringType(), True),
+    StructField("bcappoper", StringType(), True),
+    StructField("fulluri", StringType(), True),
+    StructField("y", StringType(), True),
+    StructField("m", StringType(), True),
+    StructField("d", StringType(), True),
+    StructField("h", StringType(), True)])
 
 def main():
     """


[07/11] incubator-spot git commit: fixes for save_data()

Posted by na...@apache.org.
fixes for save_data()


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/2ea6b4ea
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/2ea6b4ea
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/2ea6b4ea

Branch: refs/heads/master
Commit: 2ea6b4eac7c11ef6084955179eb211b696737e9e
Parents: b9befd7
Author: tpltnt <tp...@dropcut.net>
Authored: Thu Jan 25 12:01:31 2018 +0100
Committer: tpltnt <tp...@dropcut.net>
Committed: Thu Jan 25 12:01:31 2018 +0100

----------------------------------------------------------------------
 spot-ingest/pipelines/proxy/bluecoat.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/2ea6b4ea/spot-ingest/pipelines/proxy/bluecoat.py
----------------------------------------------------------------------
diff --git a/spot-ingest/pipelines/proxy/bluecoat.py b/spot-ingest/pipelines/proxy/bluecoat.py
index c2ddb04..597c13c 100644
--- a/spot-ingest/pipelines/proxy/bluecoat.py
+++ b/spot-ingest/pipelines/proxy/bluecoat.py
@@ -148,21 +148,28 @@ def proxy_parser(proxy_fields):
     return proxy_parsed_data
 
 
-def save_data(rdd,sqc,db,db_table,topic):
+def save_data(rdd, sqc, db, db_table, topic):
     """
     Create and save a data frame with the given data.
+
+    :param rdd: collection of objects (Resilient Distributed Dataset) to store
+    :param sqc: Apache Hive context
+    :param db: Apache Hive database to save into
+    :param db_table: table of `db` to save into
+    :param topic: Apache Kafka topic to listen for (if `rdd` is empty)
     """
     if not rdd.isEmpty():
 
-        df = sqc.createDataFrame(rdd,proxy_schema)        
+        df = sqc.createDataFrame(rdd, proxy_schema)
         sqc.setConf("hive.exec.dynamic.partition", "true")
         sqc.setConf("hive.exec.dynamic.partition.mode", "nonstrict")
-        hive_table = "{0}.{1}".format(db,db_table)
+        hive_table = "{0}.{1}".format(db, db_table)
         df.write.format("parquet").mode("append").insertInto(hive_table)
 
     else:
         print("------------------------LISTENING KAFKA TOPIC:{0}------------------------".format(topic))
 
+
 def bluecoat_parse(zk,topic,db,db_table,num_of_workers,batch_size):
     
     app_name = topic


[03/11] incubator-spot git commit: style fixes argparse

Posted by na...@apache.org.
style fixes argparse


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/51040a2c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/51040a2c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/51040a2c

Branch: refs/heads/master
Commit: 51040a2c112f81ec99873b882c65bf19ba45e1fb
Parents: df86326
Author: tpltnt <tp...@dropcut.net>
Authored: Thu Jan 25 11:32:25 2018 +0100
Committer: tpltnt <tp...@dropcut.net>
Committed: Thu Jan 25 11:33:03 2018 +0100

----------------------------------------------------------------------
 spot-ingest/pipelines/proxy/bluecoat.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/51040a2c/spot-ingest/pipelines/proxy/bluecoat.py
----------------------------------------------------------------------
diff --git a/spot-ingest/pipelines/proxy/bluecoat.py b/spot-ingest/pipelines/proxy/bluecoat.py
index 5e36a4e..898ff2e 100644
--- a/spot-ingest/pipelines/proxy/bluecoat.py
+++ b/spot-ingest/pipelines/proxy/bluecoat.py
@@ -68,12 +68,18 @@ def main():
     """
     # input Parameters
     parser = argparse.ArgumentParser(description="Bluecoat Parser")
-    parser.add_argument('-zk','--zookeeper',dest='zk',required=True,help='Zookeeper IP and port (i.e. 10.0.0.1:2181)',metavar='')
-    parser.add_argument('-t','--topic',dest='topic',required=True,help='Topic to listen for Spark Streaming',metavar='')
-    parser.add_argument('-db','--database',dest='db',required=True,help='Hive database whete the data will be ingested',metavar='')
-    parser.add_argument('-dt','--db-table',dest='db_table',required=True,help='Hive table whete the data will be ingested',metavar='')
-    parser.add_argument('-w','--num_of_workers',dest='num_of_workers',required=True,help='Num of workers for Parallelism in Data Processing',metavar='')
-    parser.add_argument('-bs','--batch-size',dest='batch_size',required=True,help='Batch Size (Milliseconds)',metavar='')
+    parser.add_argument('-zk', '--zookeeper', dest='zk', required=True,
+                        help='Zookeeper IP and port (i.e. 10.0.0.1:2181)', metavar='')
+    parser.add_argument('-t', '--topic', dest='topic', required=True,
+                        help='Topic to listen for Spark Streaming', metavar='')
+    parser.add_argument('-db', '--database', dest='db', required=True,
+                        help='Hive database whete the data will be ingested', metavar='')
+    parser.add_argument('-dt', '--db-table', dest='db_table', required=True,
+                        help='Hive table whete the data will be ingested', metavar='')
+    parser.add_argument('-w', '--num_of_workers', dest='num_of_workers', required=True,
+                        help='Num of workers for Parallelism in Data Processing', metavar='')
+    parser.add_argument('-bs', '--batch-size', dest='batch_size', required=True,
+                        help='Batch Size (Milliseconds)', metavar='')
     args = parser.parse_args()
 
     # start collector based on data source type.


[05/11] incubator-spot git commit: fixed spot_decoder()

Posted by na...@apache.org.
fixed spot_decoder()


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/8ff0e473
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/8ff0e473
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/8ff0e473

Branch: refs/heads/master
Commit: 8ff0e4730dd05e10bf8908ed7691dd85a4cf35ff
Parents: b5cf634
Author: tpltnt <tp...@dropcut.net>
Authored: Thu Jan 25 11:40:54 2018 +0100
Committer: tpltnt <tp...@dropcut.net>
Committed: Thu Jan 25 11:40:54 2018 +0100

----------------------------------------------------------------------
 spot-ingest/pipelines/proxy/bluecoat.py | 7 +++++++
 1 file changed, 7 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/8ff0e473/spot-ingest/pipelines/proxy/bluecoat.py
----------------------------------------------------------------------
diff --git a/spot-ingest/pipelines/proxy/bluecoat.py b/spot-ingest/pipelines/proxy/bluecoat.py
index 54c3b28..5667204 100644
--- a/spot-ingest/pipelines/proxy/bluecoat.py
+++ b/spot-ingest/pipelines/proxy/bluecoat.py
@@ -88,11 +88,17 @@ def main():
 
 
 def spot_decoder(s):
+    """
+    Dummy decoder function.
 
+    :param s: input to decode
+    :returns: s
+    """
     if s is None:
         return None
     return s
 
+
 def split_log_entry(line):
     """
     Split the given line into its fields.
@@ -106,6 +112,7 @@ def split_log_entry(line):
     lex.commenters = ''
     return list(lex)
 
+
 def proxy_parser(proxy_fields):
     """
     Parse and normalize data.


[04/11] incubator-spot git commit: PEP8 fixes main()

Posted by na...@apache.org.
PEP8 fixes main()


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/b5cf6344
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/b5cf6344
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/b5cf6344

Branch: refs/heads/master
Commit: b5cf6344a075889da041341cd4f5d1545ea5c379
Parents: 51040a2
Author: tpltnt <tp...@dropcut.net>
Authored: Thu Jan 25 11:36:10 2018 +0100
Committer: tpltnt <tp...@dropcut.net>
Committed: Thu Jan 25 11:36:10 2018 +0100

----------------------------------------------------------------------
 spot-ingest/pipelines/proxy/bluecoat.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/b5cf6344/spot-ingest/pipelines/proxy/bluecoat.py
----------------------------------------------------------------------
diff --git a/spot-ingest/pipelines/proxy/bluecoat.py b/spot-ingest/pipelines/proxy/bluecoat.py
index 898ff2e..54c3b28 100644
--- a/spot-ingest/pipelines/proxy/bluecoat.py
+++ b/spot-ingest/pipelines/proxy/bluecoat.py
@@ -61,6 +61,7 @@ proxy_schema = StructType([
     StructField("d", StringType(), True),
     StructField("h", StringType(), True)])
 
+
 def main():
     """
     Handle commandline arguments and
@@ -83,7 +84,8 @@ def main():
     args = parser.parse_args()
 
     # start collector based on data source type.
-    bluecoat_parse(args.zk,args.topic,args.db,args.db_table,args.num_of_workers,args.batch_size)
+    bluecoat_parse(args.zk, args.topic, args.db, args.db_table, args.num_of_workers, args.batch_size)
+
 
 def spot_decoder(s):
 


[10/11] incubator-spot git commit: added docstring for complete script

Posted by na...@apache.org.
added docstring for complete script


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/a166efc3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/a166efc3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/a166efc3

Branch: refs/heads/master
Commit: a166efc306ca041f668c9af7dfd65a94f006e454
Parents: 4ebf4be
Author: tpltnt <tp...@dropcut.net>
Authored: Thu Jan 25 12:45:02 2018 +0100
Committer: tpltnt <tp...@dropcut.net>
Committed: Thu Jan 25 12:45:02 2018 +0100

----------------------------------------------------------------------
 spot-ingest/pipelines/proxy/bluecoat.py | 4 ++++
 1 file changed, 4 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/a166efc3/spot-ingest/pipelines/proxy/bluecoat.py
----------------------------------------------------------------------
diff --git a/spot-ingest/pipelines/proxy/bluecoat.py b/spot-ingest/pipelines/proxy/bluecoat.py
index 541abb5..01b9922 100644
--- a/spot-ingest/pipelines/proxy/bluecoat.py
+++ b/spot-ingest/pipelines/proxy/bluecoat.py
@@ -1,3 +1,7 @@
+"""
+This script adds support for ingesting Bluecoat log files
+into Apache Spot.
+"""
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with


[09/11] incubator-spot git commit: whitespace fix

Posted by na...@apache.org.
whitespace fix


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/4ebf4be1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/4ebf4be1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/4ebf4be1

Branch: refs/heads/master
Commit: 4ebf4be1190da91bb23aab73db93dae0bc7f17ba
Parents: 6b79abb
Author: tpltnt <tp...@dropcut.net>
Authored: Thu Jan 25 12:42:57 2018 +0100
Committer: tpltnt <tp...@dropcut.net>
Committed: Thu Jan 25 12:42:57 2018 +0100

----------------------------------------------------------------------
 spot-ingest/pipelines/proxy/bluecoat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/4ebf4be1/spot-ingest/pipelines/proxy/bluecoat.py
----------------------------------------------------------------------
diff --git a/spot-ingest/pipelines/proxy/bluecoat.py b/spot-ingest/pipelines/proxy/bluecoat.py
index 2f5da0d..541abb5 100644
--- a/spot-ingest/pipelines/proxy/bluecoat.py
+++ b/spot-ingest/pipelines/proxy/bluecoat.py
@@ -197,5 +197,5 @@ def bluecoat_parse(zk, topic, db, db_table, num_of_workers, batch_size):
     ssc.awaitTermination()
 
 
-if __name__ =='__main__':
+if __name__ == '__main__':
     main()