You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spot.apache.org by na...@apache.org on 2018/02/09 22:50:49 UTC
[01/11] incubator-spot git commit: added some docstrings
Repository: incubator-spot
Updated Branches:
refs/heads/master 9215d8168 -> f722127ca
added some docstrings
Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/a99404b0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/a99404b0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/a99404b0
Branch: refs/heads/master
Commit: a99404b05045087bfd02d99f4764df1738959566
Parents: 9215d81
Author: tpltnt <tp...@dropcut.net>
Authored: Thu Jan 25 11:28:26 2018 +0100
Committer: tpltnt <tp...@dropcut.net>
Committed: Thu Jan 25 11:28:26 2018 +0100
----------------------------------------------------------------------
spot-ingest/pipelines/proxy/bluecoat.py | 21 ++++++++++++++++++---
1 file changed, 18 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/a99404b0/spot-ingest/pipelines/proxy/bluecoat.py
----------------------------------------------------------------------
diff --git a/spot-ingest/pipelines/proxy/bluecoat.py b/spot-ingest/pipelines/proxy/bluecoat.py
index 31d89ca..1fe02a2 100644
--- a/spot-ingest/pipelines/proxy/bluecoat.py
+++ b/spot-ingest/pipelines/proxy/bluecoat.py
@@ -62,7 +62,10 @@ proxy_schema = StructType([
StructField("h", StringType(), True)])
def main():
-
+ """
+ Handle commandline arguments and
+ start the collector.
+ """
# input Parameters
parser = argparse.ArgumentParser(description="Bluecoat Parser")
parser.add_argument('-zk','--zookeeper',dest='zk',required=True,help='Zookeeper IP and port (i.e. 10.0.0.1:2181)',metavar='')
@@ -83,7 +86,12 @@ def spot_decoder(s):
return s
def split_log_entry(line):
+ """
+ Split the given line into its fields.
+ :param line: line to split
+ :returns: list
+ """
lex = shlex.shlex(line)
lex.quotes = '"'
lex.whitespace_split = True
@@ -91,7 +99,12 @@ def split_log_entry(line):
return list(lex)
def proxy_parser(proxy_fields):
-
+ """
+ Parse and normalize data.
+
+ :param proxy_fields: list with fields from log
+ :returns: list of str
+ """
proxy_parsed_data = []
if len(proxy_fields) > 1:
@@ -114,7 +127,9 @@ def proxy_parser(proxy_fields):
def save_data(rdd,sqc,db,db_table,topic):
-
+ """
+ Create and save a data frame with the given data.
+ """
if not rdd.isEmpty():
df = sqc.createDataFrame(rdd,proxy_schema)
[11/11] incubator-spot git commit: corrected data insertion
Posted by na...@apache.org.
corrected data insertion
Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/f722127c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/f722127c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/f722127c
Branch: refs/heads/master
Commit: f722127ca410a88355cc3f6d952739845c7499ce
Parents: a166efc
Author: tpltnt <tp...@dropcut.net>
Authored: Mon Feb 5 19:05:13 2018 +0100
Committer: tpltnt <tp...@dropcut.net>
Committed: Mon Feb 5 19:05:13 2018 +0100
----------------------------------------------------------------------
spot-ingest/pipelines/proxy/bluecoat.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/f722127c/spot-ingest/pipelines/proxy/bluecoat.py
----------------------------------------------------------------------
diff --git a/spot-ingest/pipelines/proxy/bluecoat.py b/spot-ingest/pipelines/proxy/bluecoat.py
index 01b9922..d476733 100644
--- a/spot-ingest/pipelines/proxy/bluecoat.py
+++ b/spot-ingest/pipelines/proxy/bluecoat.py
@@ -168,7 +168,7 @@ def save_data(rdd, sqc, db, db_table, topic):
sqc.setConf("hive.exec.dynamic.partition", "true")
sqc.setConf("hive.exec.dynamic.partition.mode", "nonstrict")
hive_table = "{0}.{1}".format(db, db_table)
- df.write.format("parquet").mode("append").insertInto(hive_table)
+ df.write.format("parquet").mode("append").partitionBy('y', 'm', 'd', 'h').insertInto(hive_table)
else:
print("------------------------LISTENING KAFKA TOPIC:{0}------------------------".format(topic))
[08/11] incubator-spot git commit: fixed bluecoat_parse()
Posted by na...@apache.org.
fixed bluecoat_parse()
Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/6b79abbb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/6b79abbb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/6b79abbb
Branch: refs/heads/master
Commit: 6b79abbb079d99d283664382fba131864049f1fa
Parents: 2ea6b4e
Author: tpltnt <tp...@dropcut.net>
Authored: Thu Jan 25 12:40:38 2018 +0100
Committer: tpltnt <tp...@dropcut.net>
Committed: Thu Jan 25 12:42:22 2018 +0100
----------------------------------------------------------------------
spot-ingest/pipelines/proxy/bluecoat.py | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/6b79abbb/spot-ingest/pipelines/proxy/bluecoat.py
----------------------------------------------------------------------
diff --git a/spot-ingest/pipelines/proxy/bluecoat.py b/spot-ingest/pipelines/proxy/bluecoat.py
index 597c13c..2f5da0d 100644
--- a/spot-ingest/pipelines/proxy/bluecoat.py
+++ b/spot-ingest/pipelines/proxy/bluecoat.py
@@ -170,21 +170,30 @@ def save_data(rdd, sqc, db, db_table, topic):
print("------------------------LISTENING KAFKA TOPIC:{0}------------------------".format(topic))
-def bluecoat_parse(zk,topic,db,db_table,num_of_workers,batch_size):
-
+def bluecoat_parse(zk, topic, db, db_table, num_of_workers, batch_size):
+ """
+ Parse and save bluecoat logs.
+
+ :param zk: Apache ZooKeeper quorum
+ :param topic: Apache Kafka topic (application name)
+ :param db: Apache Hive database to save into
+ :param db_table: table of `db` to save into
+ :param num_of_workers: number of Apache Kafka workers
+ :param batch_size: batch size for Apache Spark streaming context
+ """
app_name = topic
wrks = int(num_of_workers)
# create spark context
sc = SparkContext(appName=app_name)
- ssc = StreamingContext(sc,int(batch_size))
+ ssc = StreamingContext(sc, int(batch_size))
sqc = HiveContext(sc)
tp_stream = KafkaUtils.createStream(ssc, zk, app_name, {topic: wrks}, keyDecoder=spot_decoder, valueDecoder=spot_decoder)
- proxy_data = tp_stream.map(lambda row: row[1]).flatMap(lambda row: row.split("\n")).filter(lambda row: rex_date.match(row)).map(lambda row: row.strip("\n").strip("\r").replace("\t", " ").replace(" ", " ")).map(lambda row: split_log_entry(row)).map(lambda row: proxy_parser(row))
- saved_data = proxy_data.foreachRDD(lambda row: save_data(row,sqc,db,db_table,topic))
- ssc.start();
+ proxy_data = tp_stream.map(lambda row: row[1]).flatMap(lambda row: row.split("\n")).filter(lambda row: rex_date.match(row)).map(lambda row: row.strip("\n").strip("\r").replace("\t", " ").replace(" ", " ")).map(lambda row: split_log_entry(row)).map(lambda row: proxy_parser(row))
+ saved_data = proxy_data.foreachRDD(lambda row: save_data(row, sqc, db, db_table, topic))
+ ssc.start()
ssc.awaitTermination()
[06/11] incubator-spot git commit: PEP8 proxy_parser()
Posted by na...@apache.org.
PEP8 proxy_parser()
Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/b9befd7b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/b9befd7b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/b9befd7b
Branch: refs/heads/master
Commit: b9befd7b0a882a327c31a691a57c07a86a64ff31
Parents: 8ff0e47
Author: tpltnt <tp...@dropcut.net>
Authored: Thu Jan 25 11:45:07 2018 +0100
Committer: tpltnt <tp...@dropcut.net>
Committed: Thu Jan 25 11:45:07 2018 +0100
----------------------------------------------------------------------
spot-ingest/pipelines/proxy/bluecoat.py | 25 ++++++++++++++++---------
1 file changed, 16 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/b9befd7b/spot-ingest/pipelines/proxy/bluecoat.py
----------------------------------------------------------------------
diff --git a/spot-ingest/pipelines/proxy/bluecoat.py b/spot-ingest/pipelines/proxy/bluecoat.py
index 5667204..c2ddb04 100644
--- a/spot-ingest/pipelines/proxy/bluecoat.py
+++ b/spot-ingest/pipelines/proxy/bluecoat.py
@@ -118,25 +118,32 @@ def proxy_parser(proxy_fields):
Parse and normalize data.
:param proxy_fields: list with fields from log
- :returns: list of str
+ :returns: list
"""
proxy_parsed_data = []
if len(proxy_fields) > 1:
# create full URI.
- proxy_uri_path = proxy_fields[17] if len(proxy_fields[17]) > 1 else ""
- proxy_uri_qry = proxy_fields[18] if len(proxy_fields[18]) > 1 else ""
- full_uri= "{0}{1}{2}".format(proxy_fields[15],proxy_uri_path,proxy_uri_qry)
+ proxy_uri_path = proxy_fields[17] if len(proxy_fields[17]) > 1 else ""
+ proxy_uri_qry = proxy_fields[18] if len(proxy_fields[18]) > 1 else ""
+ full_uri = "{0}{1}{2}".format(proxy_fields[15], proxy_uri_path, proxy_uri_qry)
date = proxy_fields[0].split('-')
- year = date[0]
+ year = date[0]
month = date[1].zfill(2)
day = date[2].zfill(2)
hour = proxy_fields[1].split(":")[0].zfill(2)
- # re-order fields.
- proxy_parsed_data = [proxy_fields[0],proxy_fields[1],proxy_fields[3],proxy_fields[15],proxy_fields[12],proxy_fields[20],proxy_fields[13],int(proxy_fields[2]),proxy_fields[4],
- proxy_fields[5],proxy_fields[6],proxy_fields[7],proxy_fields[8],proxy_fields[9],proxy_fields[10],proxy_fields[11],proxy_fields[14],proxy_fields[16],proxy_fields[17],proxy_fields[18],
- proxy_fields[19],proxy_fields[21],int(proxy_fields[22]),int(proxy_fields[23]),proxy_fields[24],proxy_fields[25],proxy_fields[26],full_uri,year,month,day,hour ]
+ # re-order fields.
+ proxy_parsed_data = [proxy_fields[0], proxy_fields[1], proxy_fields[3],
+ proxy_fields[15], proxy_fields[12], proxy_fields[20],
+ proxy_fields[13], int(proxy_fields[2]), proxy_fields[4],
+ proxy_fields[5], proxy_fields[6], proxy_fields[7],
+ proxy_fields[8], proxy_fields[9], proxy_fields[10],
+ proxy_fields[11], proxy_fields[14], proxy_fields[16],
+ proxy_fields[17], proxy_fields[18], proxy_fields[19],
+ proxy_fields[21], int(proxy_fields[22]), int(proxy_fields[23]),
+ proxy_fields[24], proxy_fields[25], proxy_fields[26],
+ full_uri, year, month, day, hour]
return proxy_parsed_data
[02/11] incubator-spot git commit: fixed identation
Posted by na...@apache.org.
fixed identation
Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/df86326b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/df86326b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/df86326b
Branch: refs/heads/master
Commit: df86326bf991a4f49c57e8aeeb4af0d8b059b21b
Parents: a99404b
Author: tpltnt <tp...@dropcut.net>
Authored: Thu Jan 25 11:30:12 2018 +0100
Committer: tpltnt <tp...@dropcut.net>
Committed: Thu Jan 25 11:30:12 2018 +0100
----------------------------------------------------------------------
spot-ingest/pipelines/proxy/bluecoat.py | 64 ++++++++++++++--------------
1 file changed, 32 insertions(+), 32 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/df86326b/spot-ingest/pipelines/proxy/bluecoat.py
----------------------------------------------------------------------
diff --git a/spot-ingest/pipelines/proxy/bluecoat.py b/spot-ingest/pipelines/proxy/bluecoat.py
index 1fe02a2..5e36a4e 100644
--- a/spot-ingest/pipelines/proxy/bluecoat.py
+++ b/spot-ingest/pipelines/proxy/bluecoat.py
@@ -28,38 +28,38 @@ from pyspark.sql.types import *
rex_date = re.compile("\d{4}-\d{2}-\d{2}")
proxy_schema = StructType([
- StructField("p_date", StringType(), True),
- StructField("p_time", StringType(), True),
- StructField("clientip", StringType(), True),
- StructField("host", StringType(), True),
- StructField("reqmethod", StringType(), True),
- StructField("useragent", StringType(), True),
- StructField("resconttype", StringType(), True),
- StructField("duration", IntegerType(), True),
- StructField("username", StringType(), True),
- StructField("authgroup", StringType(), True),
- StructField("exceptionid", StringType(), True),
- StructField("filterresult", StringType(), True),
- StructField("webcat", StringType(), True),
- StructField("referer", StringType(), True),
- StructField("respcode", StringType(), True),
- StructField("action", StringType(), True),
- StructField("urischeme", StringType(), True),
- StructField("uriport", StringType(), True),
- StructField("uripath", StringType(), True),
- StructField("uriquery", StringType(), True),
- StructField("uriextension", StringType(), True),
- StructField("serverip", StringType(), True),
- StructField("scbytes", IntegerType(), True),
- StructField("csbytes", IntegerType(), True),
- StructField("virusid", StringType(), True),
- StructField("bcappname", StringType(), True),
- StructField("bcappoper", StringType(), True),
- StructField("fulluri", StringType(), True),
- StructField("y", StringType(), True),
- StructField("m", StringType(), True),
- StructField("d", StringType(), True),
- StructField("h", StringType(), True)])
+ StructField("p_date", StringType(), True),
+ StructField("p_time", StringType(), True),
+ StructField("clientip", StringType(), True),
+ StructField("host", StringType(), True),
+ StructField("reqmethod", StringType(), True),
+ StructField("useragent", StringType(), True),
+ StructField("resconttype", StringType(), True),
+ StructField("duration", IntegerType(), True),
+ StructField("username", StringType(), True),
+ StructField("authgroup", StringType(), True),
+ StructField("exceptionid", StringType(), True),
+ StructField("filterresult", StringType(), True),
+ StructField("webcat", StringType(), True),
+ StructField("referer", StringType(), True),
+ StructField("respcode", StringType(), True),
+ StructField("action", StringType(), True),
+ StructField("urischeme", StringType(), True),
+ StructField("uriport", StringType(), True),
+ StructField("uripath", StringType(), True),
+ StructField("uriquery", StringType(), True),
+ StructField("uriextension", StringType(), True),
+ StructField("serverip", StringType(), True),
+ StructField("scbytes", IntegerType(), True),
+ StructField("csbytes", IntegerType(), True),
+ StructField("virusid", StringType(), True),
+ StructField("bcappname", StringType(), True),
+ StructField("bcappoper", StringType(), True),
+ StructField("fulluri", StringType(), True),
+ StructField("y", StringType(), True),
+ StructField("m", StringType(), True),
+ StructField("d", StringType(), True),
+ StructField("h", StringType(), True)])
def main():
"""
[07/11] incubator-spot git commit: fixes for save_data()
Posted by na...@apache.org.
fixes for save_data()
Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/2ea6b4ea
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/2ea6b4ea
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/2ea6b4ea
Branch: refs/heads/master
Commit: 2ea6b4eac7c11ef6084955179eb211b696737e9e
Parents: b9befd7
Author: tpltnt <tp...@dropcut.net>
Authored: Thu Jan 25 12:01:31 2018 +0100
Committer: tpltnt <tp...@dropcut.net>
Committed: Thu Jan 25 12:01:31 2018 +0100
----------------------------------------------------------------------
spot-ingest/pipelines/proxy/bluecoat.py | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/2ea6b4ea/spot-ingest/pipelines/proxy/bluecoat.py
----------------------------------------------------------------------
diff --git a/spot-ingest/pipelines/proxy/bluecoat.py b/spot-ingest/pipelines/proxy/bluecoat.py
index c2ddb04..597c13c 100644
--- a/spot-ingest/pipelines/proxy/bluecoat.py
+++ b/spot-ingest/pipelines/proxy/bluecoat.py
@@ -148,21 +148,28 @@ def proxy_parser(proxy_fields):
return proxy_parsed_data
-def save_data(rdd,sqc,db,db_table,topic):
+def save_data(rdd, sqc, db, db_table, topic):
"""
Create and save a data frame with the given data.
+
+ :param rdd: collection of objects (Resilient Distributed Dataset) to store
+ :param sqc: Apache Hive context
+ :param db: Apache Hive database to save into
+ :param db_table: table of `db` to save into
+ :param topic: Apache Kafka topic to listen for (if `rdd` is empty)
"""
if not rdd.isEmpty():
- df = sqc.createDataFrame(rdd,proxy_schema)
+ df = sqc.createDataFrame(rdd, proxy_schema)
sqc.setConf("hive.exec.dynamic.partition", "true")
sqc.setConf("hive.exec.dynamic.partition.mode", "nonstrict")
- hive_table = "{0}.{1}".format(db,db_table)
+ hive_table = "{0}.{1}".format(db, db_table)
df.write.format("parquet").mode("append").insertInto(hive_table)
else:
print("------------------------LISTENING KAFKA TOPIC:{0}------------------------".format(topic))
+
def bluecoat_parse(zk,topic,db,db_table,num_of_workers,batch_size):
app_name = topic
[03/11] incubator-spot git commit: style fixes argparse
Posted by na...@apache.org.
style fixes argparse
Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/51040a2c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/51040a2c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/51040a2c
Branch: refs/heads/master
Commit: 51040a2c112f81ec99873b882c65bf19ba45e1fb
Parents: df86326
Author: tpltnt <tp...@dropcut.net>
Authored: Thu Jan 25 11:32:25 2018 +0100
Committer: tpltnt <tp...@dropcut.net>
Committed: Thu Jan 25 11:33:03 2018 +0100
----------------------------------------------------------------------
spot-ingest/pipelines/proxy/bluecoat.py | 18 ++++++++++++------
1 file changed, 12 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/51040a2c/spot-ingest/pipelines/proxy/bluecoat.py
----------------------------------------------------------------------
diff --git a/spot-ingest/pipelines/proxy/bluecoat.py b/spot-ingest/pipelines/proxy/bluecoat.py
index 5e36a4e..898ff2e 100644
--- a/spot-ingest/pipelines/proxy/bluecoat.py
+++ b/spot-ingest/pipelines/proxy/bluecoat.py
@@ -68,12 +68,18 @@ def main():
"""
# input Parameters
parser = argparse.ArgumentParser(description="Bluecoat Parser")
- parser.add_argument('-zk','--zookeeper',dest='zk',required=True,help='Zookeeper IP and port (i.e. 10.0.0.1:2181)',metavar='')
- parser.add_argument('-t','--topic',dest='topic',required=True,help='Topic to listen for Spark Streaming',metavar='')
- parser.add_argument('-db','--database',dest='db',required=True,help='Hive database whete the data will be ingested',metavar='')
- parser.add_argument('-dt','--db-table',dest='db_table',required=True,help='Hive table whete the data will be ingested',metavar='')
- parser.add_argument('-w','--num_of_workers',dest='num_of_workers',required=True,help='Num of workers for Parallelism in Data Processing',metavar='')
- parser.add_argument('-bs','--batch-size',dest='batch_size',required=True,help='Batch Size (Milliseconds)',metavar='')
+ parser.add_argument('-zk', '--zookeeper', dest='zk', required=True,
+ help='Zookeeper IP and port (i.e. 10.0.0.1:2181)', metavar='')
+ parser.add_argument('-t', '--topic', dest='topic', required=True,
+ help='Topic to listen for Spark Streaming', metavar='')
+ parser.add_argument('-db', '--database', dest='db', required=True,
+ help='Hive database whete the data will be ingested', metavar='')
+ parser.add_argument('-dt', '--db-table', dest='db_table', required=True,
+ help='Hive table whete the data will be ingested', metavar='')
+ parser.add_argument('-w', '--num_of_workers', dest='num_of_workers', required=True,
+ help='Num of workers for Parallelism in Data Processing', metavar='')
+ parser.add_argument('-bs', '--batch-size', dest='batch_size', required=True,
+ help='Batch Size (Milliseconds)', metavar='')
args = parser.parse_args()
# start collector based on data source type.
[05/11] incubator-spot git commit: fixed spot_decoder()
Posted by na...@apache.org.
fixed spot_decoder()
Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/8ff0e473
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/8ff0e473
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/8ff0e473
Branch: refs/heads/master
Commit: 8ff0e4730dd05e10bf8908ed7691dd85a4cf35ff
Parents: b5cf634
Author: tpltnt <tp...@dropcut.net>
Authored: Thu Jan 25 11:40:54 2018 +0100
Committer: tpltnt <tp...@dropcut.net>
Committed: Thu Jan 25 11:40:54 2018 +0100
----------------------------------------------------------------------
spot-ingest/pipelines/proxy/bluecoat.py | 7 +++++++
1 file changed, 7 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/8ff0e473/spot-ingest/pipelines/proxy/bluecoat.py
----------------------------------------------------------------------
diff --git a/spot-ingest/pipelines/proxy/bluecoat.py b/spot-ingest/pipelines/proxy/bluecoat.py
index 54c3b28..5667204 100644
--- a/spot-ingest/pipelines/proxy/bluecoat.py
+++ b/spot-ingest/pipelines/proxy/bluecoat.py
@@ -88,11 +88,17 @@ def main():
def spot_decoder(s):
+ """
+ Dummy decoder function.
+ :param s: input to decode
+ :returns: s
+ """
if s is None:
return None
return s
+
def split_log_entry(line):
"""
Split the given line into its fields.
@@ -106,6 +112,7 @@ def split_log_entry(line):
lex.commenters = ''
return list(lex)
+
def proxy_parser(proxy_fields):
"""
Parse and normalize data.
[04/11] incubator-spot git commit: PEP8 fixes main()
Posted by na...@apache.org.
PEP8 fixes main()
Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/b5cf6344
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/b5cf6344
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/b5cf6344
Branch: refs/heads/master
Commit: b5cf6344a075889da041341cd4f5d1545ea5c379
Parents: 51040a2
Author: tpltnt <tp...@dropcut.net>
Authored: Thu Jan 25 11:36:10 2018 +0100
Committer: tpltnt <tp...@dropcut.net>
Committed: Thu Jan 25 11:36:10 2018 +0100
----------------------------------------------------------------------
spot-ingest/pipelines/proxy/bluecoat.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/b5cf6344/spot-ingest/pipelines/proxy/bluecoat.py
----------------------------------------------------------------------
diff --git a/spot-ingest/pipelines/proxy/bluecoat.py b/spot-ingest/pipelines/proxy/bluecoat.py
index 898ff2e..54c3b28 100644
--- a/spot-ingest/pipelines/proxy/bluecoat.py
+++ b/spot-ingest/pipelines/proxy/bluecoat.py
@@ -61,6 +61,7 @@ proxy_schema = StructType([
StructField("d", StringType(), True),
StructField("h", StringType(), True)])
+
def main():
"""
Handle commandline arguments and
@@ -83,7 +84,8 @@ def main():
args = parser.parse_args()
# start collector based on data source type.
- bluecoat_parse(args.zk,args.topic,args.db,args.db_table,args.num_of_workers,args.batch_size)
+ bluecoat_parse(args.zk, args.topic, args.db, args.db_table, args.num_of_workers, args.batch_size)
+
def spot_decoder(s):
[10/11] incubator-spot git commit: added docstring for complete script
Posted by na...@apache.org.
added docstring for complete script
Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/a166efc3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/a166efc3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/a166efc3
Branch: refs/heads/master
Commit: a166efc306ca041f668c9af7dfd65a94f006e454
Parents: 4ebf4be
Author: tpltnt <tp...@dropcut.net>
Authored: Thu Jan 25 12:45:02 2018 +0100
Committer: tpltnt <tp...@dropcut.net>
Committed: Thu Jan 25 12:45:02 2018 +0100
----------------------------------------------------------------------
spot-ingest/pipelines/proxy/bluecoat.py | 4 ++++
1 file changed, 4 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/a166efc3/spot-ingest/pipelines/proxy/bluecoat.py
----------------------------------------------------------------------
diff --git a/spot-ingest/pipelines/proxy/bluecoat.py b/spot-ingest/pipelines/proxy/bluecoat.py
index 541abb5..01b9922 100644
--- a/spot-ingest/pipelines/proxy/bluecoat.py
+++ b/spot-ingest/pipelines/proxy/bluecoat.py
@@ -1,3 +1,7 @@
+"""
+This script adds support for ingesting Bluecoat log files
+into Apache Spot.
+"""
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
[09/11] incubator-spot git commit: whitespace fix
Posted by na...@apache.org.
whitespace fix
Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/4ebf4be1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/4ebf4be1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/4ebf4be1
Branch: refs/heads/master
Commit: 4ebf4be1190da91bb23aab73db93dae0bc7f17ba
Parents: 6b79abb
Author: tpltnt <tp...@dropcut.net>
Authored: Thu Jan 25 12:42:57 2018 +0100
Committer: tpltnt <tp...@dropcut.net>
Committed: Thu Jan 25 12:42:57 2018 +0100
----------------------------------------------------------------------
spot-ingest/pipelines/proxy/bluecoat.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/4ebf4be1/spot-ingest/pipelines/proxy/bluecoat.py
----------------------------------------------------------------------
diff --git a/spot-ingest/pipelines/proxy/bluecoat.py b/spot-ingest/pipelines/proxy/bluecoat.py
index 2f5da0d..541abb5 100644
--- a/spot-ingest/pipelines/proxy/bluecoat.py
+++ b/spot-ingest/pipelines/proxy/bluecoat.py
@@ -197,5 +197,5 @@ def bluecoat_parse(zk, topic, db, db_table, num_of_workers, batch_size):
ssc.awaitTermination()
-if __name__ =='__main__':
+if __name__ == '__main__':
main()