You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spot.apache.org by ev...@apache.org on 2017/03/29 16:51:25 UTC

[01/50] [abbrv] incubator-spot git commit: Adding date and time to GraphQL DnsScoredConnectionType

Repository: incubator-spot
Updated Branches:
  refs/heads/SPOT-35_graphql_api [created] b41e90491


Adding date and time to GraphQL DnsScoredConnectionType


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/af247a7d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/af247a7d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/af247a7d

Branch: refs/heads/SPOT-35_graphql_api
Commit: af247a7d0f1a6a18d8607a302c9217c957eecae7
Parents: c676feb
Author: Diego Ortiz <di...@intel.com>
Authored: Fri Mar 3 12:08:56 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:48:56 2017 -0700

----------------------------------------------------------------------
 spot-oa/api/graphql/dns/query.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/af247a7d/spot-oa/api/graphql/dns/query.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/graphql/dns/query.py b/spot-oa/api/graphql/dns/query.py
index e8edf77..7b49421 100644
--- a/spot-oa/api/graphql/dns/query.py
+++ b/spot-oa/api/graphql/dns/query.py
@@ -214,9 +214,14 @@ IpDetailsType = GraphQLObjectType(
     }
 )
 
-ThreatType = GraphQLObjectType(
-    name='DnsThreatType',
+ScoredThreatType = GraphQLObjectType(
+    name='DnsScoredThreatType',
     fields={
+        'frameTime': GraphQLField(
+            type=SpotDatetimeType,
+            description='Date and time of user score',
+            resolver=lambda root, *_: datetime.utcfromtimestamp(int(root.get('unix_tstamp') or 0))
+        ),
         'dnsQuery': GraphQLField(
             type=GraphQLString,
             description='A dns query that has been scored as high risk (1)',
@@ -291,7 +296,7 @@ ThreatsInformationType = GraphQLObjectType(
     name='DnsThreats',
     fields={
         'list': GraphQLField(
-            type=GraphQLList(ThreatType),
+            type=GraphQLList(ScoredThreatType),
             description='List of dns queries or client ips that have been scored as high risk (1)',
             args={
                 'date': GraphQLArgument(


[47/50] [abbrv] incubator-spot git commit: CSV removal documentation update

Posted by ev...@apache.org.
CSV removal documentation update


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/363c02d8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/363c02d8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/363c02d8

Branch: refs/heads/SPOT-35_graphql_api
Commit: 363c02d89b2f36d70b94d5e89b25018a58919f8c
Parents: 8bab8f0
Author: LedaLima <le...@apache.org>
Authored: Mon Mar 13 10:04:34 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:51:23 2017 -0700

----------------------------------------------------------------------
 spot-oa/oa/dns/README.md                        | 118 ++++++------
 spot-oa/oa/dns/ipynb_templates/EdgeNotebook.md  |  64 +++----
 .../dns/ipynb_templates/ThreatInvestigation.md  |  65 ++-----
 spot-oa/oa/flow/README.md                       | 123 +++++++------
 spot-oa/oa/flow/ipynb_templates/EdgeNotebook.md |  69 +++----
 .../flow/ipynb_templates/ThreatInvestigation.md | 181 +++----------------
 spot-oa/oa/proxy/README.md                      | 126 ++++++-------
 7 files changed, 283 insertions(+), 463 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/363c02d8/spot-oa/oa/dns/README.md
----------------------------------------------------------------------
diff --git a/spot-oa/oa/dns/README.md b/spot-oa/oa/dns/README.md
index aab8673..0d37435 100644
--- a/spot-oa/oa/dns/README.md
+++ b/spot-oa/oa/dns/README.md
@@ -1,6 +1,6 @@
 # DNS
 
-DNS sub-module extracts and transforms DNS (Domain Name Service) data already ranked by spot-ml and will load into csv files for presentation layer.
+DNS sub-module extracts and transforms DNS (Domain Name Service) data already ranked by spot-ml and will load it into into impala tables for the presentation layer.
 
 ## DNS Components
 
@@ -15,30 +15,26 @@ DNS spot-oa main script executes the following steps:
 			ipython Notebooks: ipynb/dns/<date>/
 		
 		2. Creates a copy of the notebooks templates into the ipython Notebooks path and renames them removing the "_master" part from the name.
-		
+
 		3. Gets the dns_results.csv from the HDFS location according to the selected date, and copies it back to the corresponding data path.
-		 
+
 		4. Reads a given number of rows from the results file.
 
-		5. Gets the top level domain out of the dns_qry_name, and adds it in the new column 'tld' 
-		 
+		5. Gets the top level domain out of the dns_qry_name, and adds it in the new column 'tld'.
+
 		6. Checks reputation for the query_name of each connection.
-		 
+
 		7. Adds two new columns for the severity of the query_name and the client ip of each connection.
 
 		8. Adds a new column with the hour part of the frame_time.
-		 
-		9. Translates the 'dns_query_class', 'dns_query_type','dns_query_rcode' to human readable text according to the IANA specification. The translated values are stored in the dns_qry_class_name, dns_qry_type_name, dns_qry_rcode_name columns, respectively. 
-		 
+
+		9. Translates the 'dns_query_class', 'dns_query_type','dns_query_rcode' to human readable text according to the IANA specification. The translated values are stored in the dns_qry_class_name, dns_qry_type_name, dns_qry_rcode_name columns, respectively.
+
 		10. Adds Network Context.
-		
-		11. Saves dns_scores.csv file.
-		 
-		12. Creates a backup of dns_scores.csv file named dns_scores_bu.csv.
-		
-		13. Creates dns data details files.
-		
-		14. Creates dendrogram data files.
+
+		11. Saves results to the dns_scores table.
+
+    	12. Generates details and dendrogram diagram data. These details include information about aditional connections to display the details table in the UI.
 
 
 **Dependencies**
@@ -51,9 +47,8 @@ DNS spot-oa main script executes the following steps:
 - [components/data](/spot-oa/oa/components#data)
 - [components/nc](/spot-oa/oa/components#network-context-nc)
 - [components/reputation](/spot-oa/oa/components/reputation)
-- dns_conf.json
-
-
+- dns_conf.json 
+ 
     
 **Prerequisites**
 
@@ -70,12 +65,12 @@ Before running DNS OA users need to configure components for the first time. It
 
 **Output**
 
-- dns_scores.csv: Main results file for DNS OA. This file will contain suspicious connects information and it's limited to the number of rows the user selected when running [oa/start_oa.py](/spot-oa/oa/INSTALL.md#usage).
- 
-		Schema with zero-indexed columns: 
-		
+- DNS suspicious connections. _dns\_scores_ table.
+
+Main results for Flow OA. Main results file for DNS OA. The data stored in this table is limited by the number of rows the user selected when running [oa/start_oa.py](/spot-oa/oa/INSTALL.md#usage). (/spot-oa/oa/INSTALL.md#usage).
+  
 		0.frame_time: string		
-		1.frame_len: int		
+		1.unix_tstamp: bigint		
 		2.ip_dst: string		
 		3.dns_qry_name: string		
 		4.dns_qry_class: string		
@@ -84,49 +79,60 @@ Before running DNS OA users need to configure components for the first time. It
 		7.score: double	
 		8.tld: string		
 		9.query_rep: string		
-		10.hh: string		
-		11.ip_sev: int		
-		12.dns_sev: int		
-		13.dns_qry_class_name: string		
-		14.dns_qry_type_name: string		
-		15.dns_qry_rcode_name: string		
-		16.network_context: string		
-		17.unix_tstamp: bigint
+		10.hh: string	
+		11.dns_qry_class_name: string		
+		12.dns_qry_type_name: string		
+		13.dns_qry_rcode_name: string		
+		14.network_context: string	 
 
-- dns_scores_bu.csv: The backup file of suspicious connects in case user wants to roll back any changes made during analysis. Schema is same as dns_scores.csv.
 
+- DNS details _dns\_scores_ table.  
 
-- dendro-\<DNS query name>.csv: One file for each source IP. This file includes information about all the queries made to a particular DNS query name. The number of retrieved rows is limited by the value of "\_details\_limit" parameter
-
-		Schema with zero-indexed columns:
-		
-		0.dns_a: string		
-		1.dns_qry_name: string		
-		2.ip_dst: string
+One file for each source IP. This file includes information about all the queries made to a particular DNS query name. The number of retrieved rows is limited by the value of "\_details\_limit" parameter
+ 
+		0.unix_tstamp bigint 
+		1.dns_a string
+		2.dns_qry_name string
+		3.ip_dst string 
 
-- edge-\<DNS query name>_\<HH>_00.csv: One file for each DNS query name for each hour of the day. This file contains details for each
-connection between DNS and source IP.
 
-		Schema with zero-indexed columns:
-		
-		0.frame_time: string		
-		1.frame_len: int		
-		2.ip_dst: string		
-		3.ip_src: string		
-		4.dns_qry_name: string		
-		5.dns_qry_class_name: string		
-		6.dns_qry_type_name: string		
-		7.dns_qry_rcode_name: string		
-		8.dns_a: string
+- DNS Details: _dns\_dendro_ table.  
 
+One file for each DNS query name for each hour of the day. This file contains details for each
+connection between DNS and source IP.
+ 
+		0.unix_tstamp bigint
+    	1.frame_len bigint
+    	2.ip_dst string
+    	3.ip_src string
+    	4.ns_qry_name string
+    	5.dns_qry_class string
+    	6.dns_qry_type int
+    	7.dns_qry_rcode int
+    	8.dns_a string
+    	9.hh int
+    	10.dns_qry_class_name string
+    	11.dns_qry_type_name string
+    	12.dns_qry_rcode_name string
+    	13.network_context string
+
+
+- DNS Ingest summary. _dns\_ingest\_summary_ table.
+
+This table is populated with the number of connections ingested by minute during that day.
+
+        Table schema:
+        0. tdate:      string
+        1. total:      bigint 
+ 
 
 ###dns_conf.json
-This file is part of the initial configuration for the DNS pipeline. It will contain mapped all the columns included in the dns_results.csv and dns_scores.csv files.
+This file is part of the initial configuration for the DNS pipeline. It will contain mapped all the columns included in the _dns\_edge_ and _dns\_dendro_ tables.
 
 This file contains three main arrays:
 
 	-  dns_results_fields: Reference of the column name and indexes in the dns_results.csv file.	 
-	-  dns_score_fields:  Reference of the column name and indexes in the dns_scores.csv file.	
+	-  dns_score_fields:  Reference of the column name and indexes in the _dns\_edge_ table.
 	-  add_reputation: According to the dns_results.csv file, this is the column index of the value which will be evaluated using the reputation services.
 
 

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/363c02d8/spot-oa/oa/dns/ipynb_templates/EdgeNotebook.md
----------------------------------------------------------------------
diff --git a/spot-oa/oa/dns/ipynb_templates/EdgeNotebook.md b/spot-oa/oa/dns/ipynb_templates/EdgeNotebook.md
index 6b1579e..f9e8a4a 100644
--- a/spot-oa/oa/dns/ipynb_templates/EdgeNotebook.md
+++ b/spot-oa/oa/dns/ipynb_templates/EdgeNotebook.md
@@ -7,70 +7,50 @@
 
 The following python modules will be imported for the notebook to work correctly:    
 
-        import urllib2  
-        import json  
-        import os  
-        import csv  
+        import urllib2
+        import json
+        import os 
+        import datetime  
+        import subprocess 
         import ipywidgets #For jupyter/ipython >= 1.4  
         from IPython.html import widgets # For jupyter/ipython < 1.4  
         from IPython.display import display, HTML, clear_output, Javascript   
-        import datetime  
-        import subprocess 
-
+        
 
 ###Pre-requisites
-- Execution of the spot-oa process for DNS
-- Correct setup the spot.conf file. [Read more](/wiki/Edit%20Solution%20Configuration) 
-- Have a public key authentication between the current UI node and the ML node. [Read more](/wiki/Configure%20User%20Accounts#configure-user-accounts)
+- Execute hdfs_setup.sh script to create OA tables and setup permissions
+- Correct setup the spot.conf file. [Read more](http://spot.incubator.apache.org/doc/#configuration)
+- Execution of the spot-oa process for Flow
+- Correct installation of the UI [Read more](/ui/INSTALL.md)
 
 
 ##Data source
-
-The whole process in this notebook depends entirely on the existence of `dns_scores.csv` file, which is generated at the OA process.  
-The data is directly manipulated on the .csv files, so a `dns_scores_bu.csv` is created as a backup to allow the user to restore the original data at any point, 
-and this can be performed executing the last cell on the notebook with the following command:  
-
-        !cp $sconnectbu $sconnect
+The whole process in this notebook depends entirely on the existence of `dns_scores` table in the database.  
+The data is manipulated through the graphql api also included in the repository.
 
 
 **Input files**  
-All these paths should be relative to the main OA path.    
-Schema for these files can be found [here](/spot-oa/oa/dns)
-
-        data/dns/<date>/dns_scores.csv  
-        data/dns/<date>/dns_scores_bu.csv
+The data to be processed should be stored in the following tables:
 
-**Temporary Files**
+        dns_scores
+        dns
 
-        data/dns/<date>/score_tmp.csv
 
-**Output files**
+**Output**
+The following tables will be populated after the scoring process:
+        dns_threat_investigation
 
-        data/dns/<date>/dns_scores.csv  (Updated with severity values)
-        data/dns/<date>/dns_scores_fb.csv (File with scored connections that will be used for ML feedback)
 
 ###Functions
 **Widget configuration**
 This is not a function, but more like global code to set up styles and widgets to format the output of the notebook. 
 
-`data_loader():` - This function loads the source file into a csv dictionary reader with all suspicious unscored connections, creating separated lists for 
+`data_loader():` - This function calls the graphql api query *suspicious* to list all suspicious unscored connections, creating separated lists for 
 the 'client_ip' and 'dns_qry_name'.
  Also displays the widgets for the listboxes, textbox, radiobutton list and the 'Score' and 'Save' buttons.  
   
-`fill_list(list_control,source):` - This function loads the given dictionary into a listbox and appends an empty item at the top with the value '--Select--' (Just for design sake)
-
-` assign_score(b):` - This function is executed on the onclick event of the \u2018Score\u2019 button. The system will first try to get the value from the 'Quick search' textbox ignoring the selections from the listboxes; in case the textbox is empty, it will then
- get the selected values from the 'Client IP' and 'Query' listboxes to then search through the dns_scores.csv file to find matching values. 
-A linear search on the file is then performed:  
-The value in the 'Quick Scoring' textbox, will be compared against the `dns_qry_name` column. Partial matches will be considered as a positive match and the `dns_sev` column will be updated to the value selected from the radiobutton list.   
-The column `ip_dst` will be compared against the 'Client IP' selected value; if a match is found, the `ip_sev` column will be updated to the value selected from the radiobutton list.   
-The column `dns_qry_name` will be compared against the 'Query' selected value; if a match is found, the `dns_sev` column will be updated to the value selected from the radiobutton list.     
-Every row will be appended to the `dns_scores_tmp.csv` file. This file will replace the original `dns_scores.csv` at the end of the process.  
-
-Only the scored rows will also be appended to the `dns_scores_fb.csv` file, which will later be used for the ML feedback.
+`fill_list(list_control,source):` - This function loads the given dictionary into a listbox widget
 
-`save(b):` - This event is triggered by the 'Save' button, and executes javascript functions to refresh the data on all the panels in Suspicious Connects. Since the data source file has been updated, the scored connections will be removed from all
-the panels, since those panels will only display connections where the `dns_sev` value is zero.
-This function also removes the widget panel and reloads it again to update the results, removing the need of a manual refresh, and calls the `ml_feedback():` function.
+` assign_score(b):` - This function is executed on the onclick event of the \u2018Score\u2019 button. The system will first try to get the value from the 'Quick search' textbox ignoring the selections from the listboxes; in case the textbox is empty, it will then get the selected values from the 'Client IP' and 'Query' listboxes to append them to a temporary list. 
 
-`ml_feedback():` - A shell script is executed, transferring thru secure copy the _proxy_scores_fb.csv_ file into ML Master node, where the destination path is defined at the spot.conf file.
+`save(b):` - This event is triggered by the 'Save' button, and executes javascript functions to refresh the data on all the panels in Suspicious Connects. This function calls the *score* mutation which updates the score for the selected values in the database.

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/363c02d8/spot-oa/oa/dns/ipynb_templates/ThreatInvestigation.md
----------------------------------------------------------------------
diff --git a/spot-oa/oa/dns/ipynb_templates/ThreatInvestigation.md b/spot-oa/oa/dns/ipynb_templates/ThreatInvestigation.md
index 281ff07..db8bdbd 100644
--- a/spot-oa/oa/dns/ipynb_templates/ThreatInvestigation.md
+++ b/spot-oa/oa/dns/ipynb_templates/ThreatInvestigation.md
@@ -21,60 +21,31 @@ The following python modules will have to be imported for the notebook to work c
 
 ##Pre-requisites  
 - Execution of the spot-oa process for DNS 
-- Score a set connections in the Edge Investigation Notebook
-- Correct setup of the spot.conf file. [Read more](/wiki/Edit%20Solution%20Configuration) 
-
+- Correct installation of the UI [Read more](/ui/INSTALL.md)
+- Score a set connections at the Edge Investigation Notebook 
+- Correct setup the spot.conf file. [Read more](/wiki/Edit%20Solution%20Configuration) 
 
 ##Additional Configuration  
 `top_results` - This value defines the number of rows that will be displayed onscreen after the expanded search. 
 
-
 ##Data source 
-The whole process in this notebook depends entirely on the existence of the scored `dns_scores.csv` file, which is generated at the OA process, and scored at the Edge Investigation Notebook.
- 
-**Input files**
-All these paths should be relative to the main OA path.       
-Schema for these files can be found [here](/spot-oa/oa/DNS)   
-
-        data/dns/<date>/dns_scores.csv  
-
-**Output files**
-
-- threats.csv : Pipe separated file containing the comments saved by the user. This file is updated every time the user adds comments for a new threat. 
-        
-        Schema with zero-indexed columns:
-        
-        0.ip_dst : string
-        0.dns_qry_name : string
-        1.title: string
-        2.description: string
-
-- threat-dendro-\<anchor>.csv : Comma separated file generated in base of the results from the expanded 
-search query. This file includes a list of connections involving the DNS or IP selected from the list. 
-These results are limited to the day under analysis. 
+Data should exists in the following tables:
+        *dns*
+        *dns_threat_investigation*
 
-        
-        Schema with zero-indexed columns:
-
-        0.total: int  
-        1.dns_qry_name: string
-        2.ip_dst: string
-        3.sev: int
-
-
-**HDFS tables consumed**  
-
-        dns
+**Output**
+The following tables will be populated after the threat investigation process:
+        *dns_storyboard*
+        *dns_threat_dendro*
 
 ##FUNCTIONS  
 
 **Widget configuration**
 This is not a function, but more like global code to set up styles and widgets to format the output of the notebook. 
 
-`start_investigation():` - This function cleans the notebook from previous executions, then calls the data_loader() function to obtain the data and afterwards displays the corresponding widgets
+`start_investigation():` - This function cleans the notebook from previous executions.
 
-`data_loader():` - This function loads the _dns_scores.csv_ file into a csv dictionary reader to find all `ip_dst` values where `ip_sev` = 1, and the `dns_qry_name` where `dns_sev` = 1, merging both 
-lists into a dictionary to populate the 'Suspicious DNS' listbox, through the _fill_list()_ function.
+`data_loader():` - , then calls the *threats* query to get the `ip_dst` and `dns_qry_name` values previously scored as high risk, merging both lists into a single dictionary to populate the 'Suspicious DNS' listbox, through the _fill_list()_ function.
 
 `display_controls(ip_list):` - This function will only display the main widget box, containing:
 - "Suspicious URI" listbox
@@ -82,19 +53,15 @@ lists into a dictionary to populate the 'Suspicious DNS' listbox, through the _f
 - Container for the "Threat summary" and "Title" text boxes
 - Container for the "Top N results" HTML table
 
-`fill_list(list_control,source):` - This function populates a listbox widget with the given data dictionary and appends an empty item at the top with the value '--Select--' (Just for visualization  sake)
+`fill_list(list_control,source):` - This function populates a listbox widget with the given data dictionary and appends an empty item at the top with the value '--Select--' (Just for visualization sake)
 
-`search_ip(b):` - This function is triggered by the onclick event of the "Search" button. This will get the selected value from the listbox and perform a query to the _dns_ table to retrieve all comunication involving that IP/Domain during the day with any other IPs or Domains. 
-The output of the query will automatically be stored in the _threat-dendro-&lt;threat&gt;.csv_ file.  
-Afterwards it will read through the output file to display the HTML table, and the results displayed will be limited by the value set in the _top_results_ variable, 
-ordered by amount of connections, listing the most active connections first.
+`search_ip(b):` - This function is triggered by the onclick event of the "Search" button. This calls the graphql *threat / details* query to find additional connections involving the selected IP or query name. 
+The results will be displayed in the HTML table, ordered by amount of connections, listing the most active connections first.
 Here the "display_threat_box()" function will be invoqued. 
 
 `display_threat_box(ip):` - Generates and displays the widgets for "Title" and "Comments" textboxes and the "Save" button on the notebook.
 
 `save_threat_summary(b):` - This function is triggered by the _onclick_ event on the 'Save' button.
  This will take the contents of the form and create/update the _threats.csv_ file.
- 
-`file_is_empty(path):` - Performs a validation to check the file size to determine if it is empty.
- 
+
 `removeWidget(index):` - Javascript function that removes a specific widget from the notebook. 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/363c02d8/spot-oa/oa/flow/README.md
----------------------------------------------------------------------
diff --git a/spot-oa/oa/flow/README.md b/spot-oa/oa/flow/README.md
index 4b3dbc1..005da57 100644
--- a/spot-oa/oa/flow/README.md
+++ b/spot-oa/oa/flow/README.md
@@ -1,6 +1,6 @@
 # **Flow OA**
  
-Flow sub-module extracts and transforms Flow data already ranked by spot-ml and will load into csv files for presentation layer.
+Flow sub-module extracts and transforms Flow data already ranked by spot-ml and will load into impala tables for presentation the layer.
 
 ## **Flow OA Components**
 
@@ -13,12 +13,16 @@ Flow spot-oa main script executes the following steps:
                 ipython Notebooks: ipynb/flow/<date>/
 
     2. Creates a copy of iPython notebooks out of templates in ipynb_templates folder into output folder.
+
     3. Reads Flow spot-ml results for a given date and loads only the requested limit.
+
     4. Add network context to source and destination IPs.
+
     5. Add geolocation to source and destination IPs.
-    6. Saves transformed data into a new csv file, this file is called flow_scores.csv.
-    7. Creates details, and chord diagram files. These details include information about each suspicious connection and some additional information
-       to draw chord diagrams.
+
+    6. Stores transformed data in the selected database.
+
+    7. Generates details and chord diagram data. These details include information about aditional connections and some additional information to draw chord diagrams in the UI.
 
 **Dependencies**
 
@@ -48,37 +52,37 @@ Before running Flow OA users need to configure components for the first time. It
 
 **Output**
 
-- flow_scores.csv. Main results file for Flow OA. This file will contain suspicious connects information and it's limited to the number of rows the user selected when running [oa/start_oa.py](/spot-oa/oa/INSTALL.md#usage).
+- Flow suspicious connections. _flow\_scores_ table.  
+
+Main results for Flow OA. The data stored in this table is limited by the number of rows the user selected when running [oa/start_oa.py](/spot-oa/oa/INSTALL.md#usage). [oa/start_oa.py](/spot-oa/oa/INSTALL.md#usage).
        
-        Schema with zero-indexed columns:
-        0.   sev:            int
-        1.   tstart:         string
-        2.   srcIP:          string
-        3.   dstIP:          string
-        4.   sport:          int
-        5.   dport:          int
-        6.   proto:          string
-        7.   ipkt:           bigint
-        8.   ibyt:           bigint
-        9.   opkt:           bigint
-        10.  obyt:           bigint
-        11.  score:          double
-        12.  rank:           int
-        13.  srcIpInternal:  bit
-        14.  destIpInternal: bit
-        15.  srcGeo:         string
-        16.  dstGeo:         string
-        17.  srcDomain:      string
-        18.  dstDomain:      string
-        19.  srcIP_rep:      string
-        20.  dstIP_rep:      string
-
-
-- flow_scores_bu.csv. Backup file for flow_scores.csv in case user needs to roll back the scoring or the changes made during analysis. Schema it's same as flow_scores.csv.
-
-- edge-\<source IP>-\<destination IP>-\<HH>-\<MM>.tsv. Edge files. One for each suspicious connection containing the details for each comunication occurred during the same specific minute between source IP and destination IP.
-
-        Schema with zero-indexed columns:
+        Table schema:
+        0.   tstart:         string
+        1.   srcip:          string
+        2.   dstip:          string
+        3.   sport:          int
+        4.   dport:          int
+        5.   proto:          string
+        6.   ipkt:           int
+        7.   ibyt:           int
+        8.   opkt:           int
+        9.   obyt:           int
+        10.  score:          float
+        11.  rank:           int
+        12.  srcip_internal:  bit
+        13.  destip_internal: bit
+        15.  src_geoloc:         string
+        16.  dst_geoloc:         string
+        17.  src_domain:      string
+        18.  dst_domain:      string
+        19.  src_rep:      string
+        20.  dst_rep:      string
+
+-  Flow details. _flow\_edge_ table.
+
+A query will be executed for each suspicious connection detected, to find the details for each connection occurred during the same specific minute between given source IP and destination IP.
+
+        Table schema:
         0.  tstart:     string
         1.  srcip:      string
         2.  dstip:      string
@@ -87,30 +91,47 @@ Before running Flow OA users need to configure components for the first time. It
         5.  proto:      string
         6.  flags:      string
         7.  tos:        int
-        8.  bytes:      bigint
-        9.  pkts:       bigint
-        10. input:      int
-        11. output:     int
-        12. rip:        string
-
-- chord-\<client ip>.tsv. Chord files. One for each distinct client ip. These files contain the sum of input packets and bytes transferred between the client ip and every other IP it connected to.
-
-        Schema with zero-indexed columns:
-        0.  srcip:      string
-        1.  dstip:      string
-        2.  ibytes:     bigint
-        3.  ipkts:      double
-        
+        8.  ibyt:       bigint
+        9.  ipkt:       bigint
+        10.  pkts:      bigint
+        11. input:      int
+        12. output:     int
+        13. rip:        string
+        14. obyt:       bigint
+        15. opkt:       bigint
+        16. hh:         int
+        17. md:         int         
+
+- Flow Chord Diagrams.  _flow\_chords_ table.
+
+A query will be executed for each distinct client ip that has connections to 2 or more other suspicious IP. This query will retrieve the sum of input packets and bytes transferred between the client ip and every other suspicious IP it connected to.
+
+        Table schema:
+        0. ip_threat:  string
+        1. srcip:      string
+        2. dstip:      string
+        3. ibyt:       bigint
+        4. ipkt:       bigint
+
+
+- Flow Ingest summary. _flow\_ingest\_summary_ table.
+
+This table is populated with the number of connections ingested by minute during that day.
+
+        Table schema:
+        0. tdate:      string
+        1. total:      bigint 
+
+
 ### flow_config.json
 
 Flow spot-oa configuration. Contains columns name and index for input and output files.
 This Json file contains 3 main arrays:
    
     - flow_results_fields: list of column name and index of ML flow_results.csv file. Flow OA uses this mapping to reference columns by name.
-    - column_indexes_filter: the list of indices to take out of flow_results_fields for OA process. 
+    - column_indexes_filter: the list of indices to take out of flow_results_fields for the OA process. 
     - flow_score_fields: list of column name and index for flow_scores.csv. After the OA process completes more columns are added.
-        
-
+    
 
 ### ipynb_templates
 Templates for iPython notebooks.

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/363c02d8/spot-oa/oa/flow/ipynb_templates/EdgeNotebook.md
----------------------------------------------------------------------
diff --git a/spot-oa/oa/flow/ipynb_templates/EdgeNotebook.md b/spot-oa/oa/flow/ipynb_templates/EdgeNotebook.md
index 272e2bc..973a463 100644
--- a/spot-oa/oa/flow/ipynb_templates/EdgeNotebook.md
+++ b/spot-oa/oa/flow/ipynb_templates/EdgeNotebook.md
@@ -8,56 +8,53 @@
 
 The following python modules will be imported for the notebook to work correctly:    
 
+        import datetime
         import struct, socket
         import shutil
         import numpy as np
         import pandas as pd
         import linecache, bisect
-        import csv
+        import csv, json
         import operator
-        import os, time, subprocess
+        import os, time, subprocess 
+        from collections import OrderedDict
         import ipywidgets #For jupyter/ipython >= 1.4  
         from IPython.html import widgets #For jupyter/ipython < 1.4  
-        from IPython.display import display, HTML, clear_output, Javascript   
+        from IPython.display import display, Javascript, clear_output   
 
 
 ###Pre-requisites
+- Execute hdfs_setup.sh script to create OA tables and setup permissions
+- Correct setup the spot.conf file. [Read more](http://spot.incubator.apache.org/doc/#configuration)
 - Execution of the spot-oa process for Flow
-- Correct setup the spot.conf file. [Read more](/wiki/Edit%20Solution%20Configuration)
-- Have a public key created between the current UI node and the ML node. [Read more](/wiki/Configure%20User%20Accounts#configure-user-accounts)
+- Correct installation of the UI [Read more](/ui/INSTALL.md)
 
 
-##Additional Configuration
+##Additional Configuration inside the notebook
 `coff` - This value defines the max number of records used to populate the listbox widgets. This value is set by default on 250.
 `nwloc` - File name of the custom network context.  
 
-###Data source
-The whole process in this notebook depends entirely on the existence of `flow_scores.csv` file, which is generated at the OA process at the path.  
-The data is directly manipulated on the .csv files, so a `flow_scores_bu.csv` on the same path is created as a backup to allow the user to restore the original data at any point, 
-and this can be performed executing the last cell on the notebook with the following command:  
-
-        !cp $sconnectbu $sconnect
 
+###Data source
+The whole process in this notebook depends entirely on the existence of `flow_scores` table in the database.  
+The data is manipulated through the graphql api also included in the repository.
 
-**Input files**  
-All these paths should be relative to the main OA path.    
-Schema for these files can be found [here](/spot-oa/oa/flow)
 
-        data/flow/<date>/flow_scores.csv
-        data/flow/<date>/flow_scores_bu.csv
+**Input**  
+The data to be processed should be stored in the following tables:
 
-**Temporary Files**
+        flow_scores
+        flow
 
-        data/flow/<date>/flow_scores.csv.tmp
 
-**Output files**
+**Output**
+The following tables will be populated after the scoring process:
+        flow_threat_investigation
 
-        data/flow/<date>/flow_scores.csv (Updated with severity values)
-        data/flow/<date>/flow_scores_fb.csv (File with scored connections that will be used for ML feedback)
 
 ##Functions 
  
-`displaythis():` - This function reads the `flow_scores.csv` file to list all suspicious unscored connections, creating separated lists for:
+`data_loader():` - This function calls the graphql api query *suspicious* to list all suspicious unscored connections, creating separated lists for:
 - Source IP
 - Destination IP
 - Source port
@@ -69,29 +66,7 @@ Each of these lists will populate a listbox widget and then they will be display
 
 `update_sconnects(b):` -   
 This function is executed on the onclick event of the \u2018Assign\u2019 button. The notebook will first try to get the value from the 'Quick IP Scoring' textbox ignoring the selections from the listboxes; in case the textbox is empty, it will then
- get the selected values from each of the listboxes to look them up in the `flow_scores.csv` file. 
-A binary search on the file is then performed:  
-- The value in the 'Quick IP Scoring' textbox, will be compared against the `ip_src` and `ip_dst` columns; if either column is a match, the `sev` column will be updated with the value selected from the radiobutton list. 
-- The column `srcIP` will be compared against the 'Source IP' selected value.  
-- The column `dstIP` will be compared against the 'Dest IP' selected value. 
-- The column `sport` will be compared against the 'Src Port' selected value.
-- The column `dport` will be compared against the 'Dst Port' selected value.  
-
-Every row will be then appended to the `flow_scores.csv.tmp` file, which will replace the original `flow_scores.csv` at the end of the process.
-The scored rows will also be appended to the `flow_scores_fb.csv` file, which will later be used for the ML feedback.   
-
-`set_rules():` - Predefined function where the user can define custom rules to be initally applied to the dataset. By default this function is commented out.
-
-`create_feedback_file(scored_rows):` - Appends the updated rows to the _flow_scores_fb.csv_ everytime a connection is scored. This file is used as feedback for the ML process.
-
-`apply_rules(rops,rvals,risk):` - This function applies the rules defined by `set_rules()` and updates the `flow_scores.csv` file following a similar process to the `update_sconnects()` function. By default this function is commented out.
-
-`attack_heuristics():` - This function is executed at the start, and loads the data from `flow_scores.csv` into a pandas dataframe grouped by `srcIp` column,
-to then print only those IP's that connect to more than 20 other different IP's. By default this function is commented out.
+ get the selected values from each of the listboxes and append them to a temporary list. 
 
 `savesort(b):` - This event is triggered by the 'Save' button, and executes javascript functions to refresh the data on all the panels in Suspicious Connects.  
-This function also reorders the _flow_scores.csv_ file by moving all scored connections to the end of the file and sorting the remaining connections by `lda_score` column.    
-Finally, removes the widget panel and reloads it again to update the results, removing the need of a manual refresh, and calls the `ml_feedback():` function.    
-
-`ml_feedback():` - A shell script is executed, transferring thru secure copy the _flow_scores_fb.csv_ file into ML Master node, where the destination path is defined at the spot.conf file.
-   
\ No newline at end of file
+This function calls the *score* mutation which updates the score for the selected values in the database.

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/363c02d8/spot-oa/oa/flow/ipynb_templates/ThreatInvestigation.md
----------------------------------------------------------------------
diff --git a/spot-oa/oa/flow/ipynb_templates/ThreatInvestigation.md b/spot-oa/oa/flow/ipynb_templates/ThreatInvestigation.md
index fdbcad0..787d955 100644
--- a/spot-oa/oa/flow/ipynb_templates/ThreatInvestigation.md
+++ b/spot-oa/oa/flow/ipynb_templates/ThreatInvestigation.md
@@ -7,6 +7,7 @@
 
 The following python modules will have to be imported for the notebook to work correctly:  
 
+        import datetime
         import struct, socket
         import numpy as np
         import linecache, bisect
@@ -14,6 +15,7 @@ The following python modules will have to be imported for the notebook to work c
         import operator
         import json
         import os
+        import pandas as pd
         import ipywidgets as widgets # For jupyter/ipython >= 1.4
         from IPython.html import widgets
         from IPython.display import display, Javascript, clear_output
@@ -21,149 +23,46 @@ The following python modules will have to be imported for the notebook to work c
 
 ##Pre-requisites  
 - Execution of the spot-oa process for Flow
+- Correct installation of the UI [Read more](/ui/INSTALL.md)
 - Score a set connections at the Edge Investigation Notebook 
 - Correct setup the spot.conf file. [Read more](/wiki/Edit%20Solution%20Configuration) 
 - Include a comma separated network context file. **Optional** [Schema](/spot-oa/oa/components/README.md#network-context-nc)
-- Include a geolocation database file. [Schema](/spot-oa/oa/components/README.md#geoloc)   
+- Include a geolocation database file.  **Optional** [Schema](/spot-oa/oa/components/README.md#geoloc)   
 
 
-##Additional Configuration
+##Additional Configuration inside the notebook
 `top_results` - This value defines the number of rows that will be displayed onscreen after the expanded search. 
 This also affects the number of IPs that will appear in the Timeline chart.
 
 
 ##Data source  
-The whole process in this notebook depends entirely on the existence of the scored _flow_scores.csv_ file, which is generated at the OA process, and scored at the Edge Investigation Notebook.
+Data should exists in the following tables:
+        *flow*
+        *flow_threat_investigation*
+
 
 **Input files**  
 All these paths should be relative to the main OA path.    
 Schema for these files can be found here:
-
-[flow_scores.csv](/spot-oa/oa/flow)  
+ 
 [iploc.csv](/spot-oa/oa/components/README.md#geoloc)  
 [networkcontext_1.csv](/spot-oa/oa/components/README.md#network-context-nc)  
   
-
-        data/flow/<date>/flow_scores.csv  
+ 
         context/iploc.csv
         context/networkcontext_1.csv
  
 
-**Output files**  
-- threats.csv : Pipe separated file containing the comments saved by the user. This file is updated every time the user adds comments for a new threat. 
-        
-        Schema with zero-indexed columns:
-        
-        0.ip: string
-        1.title: string
-        2.description: string
- 
-- sbdet-\<ip>.tsv : Tab separated file, this file lists all the client IP's that connected to the IP under investigation, including: 
-the duration of the connection, response code and exact date and time of each the connection.  
-        Schema with zero-indexed columns:
-        
-        0.tstart: string
-        1.tend: string
-        2.srcip	: string
-        3.dstip : string
-        4.proto : string
-        5.sport : string
-        6.dport : string
-        7.pkts : string
-        8.bytes : string
-
-
-- globe-\<ip>.json : Json file including the geolocation of all the suspicious connections related to the IP under investigation. 
-                Schema:
-
-                {
-                        "destips": [{
-                                "geometry": {
-                                        "type": "Point",
-                                        "coordinates": <[Lat, Long] values from the geolocation database>
-                                },
-                                "type": "Feature",
-                                "properties": {
-                                        "ip": "<dst IP>",
-                                        "type": <1 for Inbound, 2 for Outbound, 3 for Two way>,
-                                        "location": "<Host name provided by the geolocation db>"
-                                },
-                                ......
-                        }
-                        }],
-                        "type": "FeatureCollection",
-                        "sourceips": [{
-                                "geometry": {
-                                        "type": "Point",
-                                        "coordinates": <[Lat, Long] values from the geolocation database>
-                                },
-                                "type": "Feature",
-                                "properties": {
-                                        "ip": "<src ip>",
-                                        "type": <1 for Inbound, 2 for Outbound, 3 for Two way>,
-                                        "location": "<Host name provided by the geolocation db>"
-                                },
-                                ......
-                        }]
-                }
- 
-
-- stats-\<ip>.json: Json file containing the count of connections of each kind made to/from the suspicious IP.
-                Schema:
-
-                {
-                        "size": <total of connections>,
-                        "name": "<Name of suspicious IP, according to the network context",
-                        "children": [{
-                                "size": <Total number of Inbound connections>,
-                                "name": "Inbound Only", 
-                                "children": 
-                                        [{
-                                                "name": "<Context name>",
-                                                "size": <Number of connections>
-                                        }, ...
-                                        ]
-                                },
-                                {"size": <Total number of Outbound connections>,
-                                 "name": "Outbound Only", 
-                                 "children": 
-                                        [{
-                                                "name": "<Context name>",
-                                                "size": <Number of connections>
-                                        }, ...
-                                        ]
-                                }, 
-                                {"size": <Total number of Two way connections>,
-                                 "name": "two way",
-                                 "children":
-                                        [{
-                                                "name": "<Context name>",
-                                                "size": <Number of connections>
-                                        }, ...
-                                        ]
-                                }]
-                        }
-  
-
- - threat-dendro-\<ip>.json : Json file including the breakdown of the connections performed by the suspicious IP.  
-
-                Schema: 
-
-                {"time": "date in YYYYMMDD format>",
-                 "name": "<suspicious IP>",
-                 "children": [{
-                        "impact": 0,
-                        "name": "<Type of connections>", 
-                        "children": [
-                                <Individual connections named after the network context>
-                                ]
-                        }]
-                }
+**Output**  
+The following tables will be populated after the threat investigation process:
+        *flow_storyboard*
+        *flow_timeline*
 
-**HDFS tables consumed**  
-
-                flow
-   
+The following files will be created and stored in HDFS.
+ 
+        globe-\<ip>.json
+        stats-\<ip>.json:
+        threat-dendro-\<ip>.json
 
 ##FUNCTIONS  
 
@@ -171,19 +70,13 @@ the duration of the connection, response code and exact date and time of each th
 
 This is not a function, but more like global code to set up styles and widgets to format the output of the notebook.   
 
-`start_investigation():` - This function cleans the notebook from previous executions, then loops through the _flow_scores.csv_ file to get the 'srcIp' and 'dstIP' values from connections scored as high risk (sev = 1), ignoring IPs
-already saved in the _threats.csv_ file. 
+`start_investigation():` - This function cleans the notebook from previous executions, and calls the *threats* query to get the source and destination IP's previously scored as high risk. 
 
 `display_controls(threat_list):` - This function will display the ipython widgets with the listbox of high risk IP's and the "Search" button.
 
-`search_ip()` - This function is triggered by the onclick event of the "Search" button after selecting an IP from the listbox. This will perform a query to the _flow_ table to find all connections involving the selected IP.
- The results are stored in the _ir-\<ip>.tsv_ file. If the file is not empty, this will immediately execute the following functions:  
- - get_in_out_and_twoway_conns()
- - add_geospatial_info()
- - add_network_context() 
- - display_threat_box()
+`search_ip()` - This function is triggered by the onclick event of the "Search" button after selecting an IP from the listbox. This calls the graphql *threat / details* query to find additional connections involving the selected IP. 
 
-`get_in_out_and_twoway_conns():` - With the data from the _ir-\<ip>.tsv_ file, this function will loop through each connection and store it into one of three dictionaries:
+`get_in_out_and_twoway_conns():` - With the data from the previous method, this function will loop through each connection and store it into one of three dictionaries:
 - All unique \u2018inbound\u2019 connected IP's (Where the internal sought IP appears only as destination, or the opposite if the IP is external)  
 - All unique \u2018outbound\u2019 connected IP's (Where the internal sought IP appears only as source, or the opposite if the IP is external)
 - All unique \u2018two way\u2019 connected IP's (Where the sought IP appears as both source and destination)
@@ -197,18 +90,7 @@ To aid on the analysis, this function displays four html tables each containing
 
 `display_threat_box(ip):` - Displays the widgets for "Title", "Comments" textboxes and the "Save" button on the notebook, so the user can add comments related to the threat and save them to continue with the analysis.  
 
-`add_network_context()` - This function depends on the existence of the _networkcontext\_1.csv_ file, otherwise this step will be skipped.
-This function will loop through all dictionaries updating each IP with its context depending on the ranges defined in the networkcontext.
-
-`add_geospatial_info()` - This function depends on the existence of the _iploc.csv_ file. This will read through the dictionaries created, looking for every IP and updating its geolocation data according to the iploc database. If the iploc file doesn't exist, this function will be skipped.
-
-`save_threat_summary()` - This function is triggered by the onclick event of the "Save" button. Removes the widgets and cleans the notebook from previous executions, removes the selected value from the listbox widget and 
- executes each of the following functions to create the data source files for the storyboard:
-- generate_attack_map_file()
-- generate_stats()
-- generate_dendro()
-- details_inbound()
-- add_threat() 
+`save_threat_summary()` - This function is triggered by the onclick event of the "Save" button. Removes the widgets and cleans the notebook from previous executions, removes the selected value from the listbox widget and executes the *createStoryboard* mutation to save the data for the storyboard.
 
 `display_results(cols, dataframe, top)` - 
 *cols*: List of columns to display from the dataframe
@@ -216,23 +98,8 @@ This function will loop through all dictionaries updating each IP with its conte
 *top*: Number of top rows to display.
 This function will create a formatted html table to display the provided dataframe.
 
-`generate_attack_map_file(ip, inbound, outbound, twoway): `- This function depends on the existence of the _iploc.csv_ file. Using the geospatial info previously added to the dictionaries, this function will create the _globe.json_ file. If the iploc file doesn't exist, this function will be skipped.
-
-`generate_stats(ip, inbound, outbound, twoway, threat_name):` - This function reads through each of the dictionaries to group the connections by type. The results are stored in the _stats-&lt;ip&gt;.json_ file. 
-
-`generate_dendro(ip, inbound, outbound, twoway, date):` - This function groups the results from all three dictionaries into a json file, adding additionals level if the dictionaries include network context for each IP. 
-The results are stored in the _threat-dendro-&lt;ip&gt;.json_ file.
-
-`details_inbound(anchor, inbound, outbond, twoway):` -  This function executes a query to the _flow_ table looking for all additional information between the shought IP (threat) and the IP's in the 'top_n' dictionaries. The results will be stored in the _sbdet-&lt;ip&gt;.tsv_ file.
- 
-`add_threat(ip,threat_title):`- Creates or updates the _threats.csv_ file, appending the IP and Title from the web form. This will serve as the menu for the Story Board.
-
 `get_top_bytes(conns_dict, top):` - Orders a dictionary descendent by number of bytes, returns a dictionary with the top 'n' values. This dictionary will be printed onscreen, listing the most active connections first.   
 
 `get_top_conns(conns_dict, top):` - Orders a dictionary descendent by number of connections executed, returns a dictionary with the top 'n' values. This dictionary will be printed onscreen, listing the most active connections first.   
 
-`file_is_empty(path):` - Performs a validation to check the file of a size to determine if it is empty.
- 
 `removeWidget(index):` - Javascript function that removes a specific widget from the notebook.
- 
-`get_ctx_name(full_context): ` **Deprecated**    

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/363c02d8/spot-oa/oa/proxy/README.md
----------------------------------------------------------------------
diff --git a/spot-oa/oa/proxy/README.md b/spot-oa/oa/proxy/README.md
index ac6f73c..110a209 100644
--- a/spot-oa/oa/proxy/README.md
+++ b/spot-oa/oa/proxy/README.md
@@ -1,6 +1,6 @@
 # PROXY
 
-Proxy sub-module will extract and transform Proxy data already ranked by spot-ml and will load into csv files for presentation layer.
+Proxy sub-module will extract and transform Proxy data already ranked by spot-ml and will load it into impala tables for the presentation layer.
 
 ## Proxy Components
 
@@ -13,7 +13,6 @@ Proxy spot-oa main script executes the following steps:
 			data: data/proxy/<date>/
 			ipython Notebooks: ipynb/proxy/<date>/
 		
-		
 		2. Creates a copy of the notebooks templates into the ipython Notebooks path and renames them removing the "_master" part from the name.
 		
 		3. Gets the proxy_results.csv from the HDFS location according to the selected date, and copies it back to the corresponding data path.
@@ -30,11 +29,9 @@ Proxy spot-oa main script executes the following steps:
 		
 		9. Creates a hash for every full_uri + clientip pair to use as filename.  
 		 
-		10. Saves proxy_scores.tsv file.
-		 
-		11. Creates a backup of proxy_scores.tsv file.
+		10. Saves the data in the _proxy_\scores_ table. 
 		
-		12. Creates proxy data details files. 
+    	12. Collects information about aditional connections to display the details table in the UI.
 
 
 **Dependencies**
@@ -60,63 +57,70 @@ Before running Proxy OA, users need to configure components for the first time.
 
 **Output**
 
-- proxy_scores.tsv: Main results file for Proxy OA. This file is tab separated and it's limited to the number of rows the user selected when running [oa/start_oa.py](/spot-oa/oa/INSTALL.md#usage).
-
-		Schema with zero-indexed columns: 
-
-		0.p_date: string 
-		1.p_time: string 
-		2.clientip: string 
-		3.host: string 
-		4.reqmethod: string
-		5.useragent: string
-		6.resconttype: string
-		7.duration: int
-		8.username: string 
-		9.webcat: string 
-		10.referer: string 
-		11.respcode: string 
-		12.uriport: string 
-		13.uripath: string
-		14.uriquery: string 
-		15.serverip: string
-		16.scbytes: int
-		17.csbytes: int
-		18.fulluri: string
-		19.word: string
-		20.score: string 
-		21.uri_rep: string
-		22.uri_sev: string 
-		23.respcode_name: string 
-		24.network_context: string
-		25.hash: string
-
-
-- proxy_scores_bu.tsv: The backup file of suspicious connects in case user want to roll back any changes made during analysis. Schema is same as proxy_scores.tsv.
-     
-
-- edge-clientip-\<hash>HH.tsv: One file for each fulluri + clientip connection for each hour of the day.
-
-		Schema with zero-indexed columns:
-
-		0.p_date: string
-		1.p_time: string
-		2.clientip: string
-		3.host: string
-		4.webcat: string
-		5.respcode: string
-		6.reqmethod: string
-		7.useragent: string
-		8.resconttype: string
-		9.referer: string
-		10.uriport: string
-		11.serverip: string
-		12.scbytes: int
-		13.csbytes: int
-		14.fulluri: string
+- Proxy suspicious connections. _proxy\_scores_ table.
+
+Main results file for Proxy OA. The data stored in this table is limited by the number of rows the user selected when running [oa/start_oa.py](/spot-oa/oa/INSTALL.md#usage).
+ 
+		0.tdate string
+		1.time string
+		2.clientip string
+		3.host string
+		4.reqmethod string
+		5.useragent string
+		6.resconttype string
+		7.duration int
+		8.username string
+		9.webcat string
+		10.referer string
+		11.respcode string
+		12.uriport string
+		13.uripath string
+		14.uriquery string
+		15.serverip string
+		16.scbytes int
+		17.csbytes int
+		18.fulluri string
+		19.word string
+		20.ml_score Float
+		21.uri_rep string
+		22.respcode_name string
+		23.network_context string 
+
+
+- Proxy details. _proxy\_edge_ table.
+
+A query will be executed for each fulluri + clientip connection for each hour of the day.
+ 
+		0.tdate STRING
+		1.time STRING
+		2.clientIp STRING
+		3.host string
+		4.webcat string
+		5.respcode string
+		6.reqmethod string
+		7.useragent string
+		8.resconttype string
+		9.referer string
+		10.uriport string
+		11.serverip string
+		12.scbytes int
+		13.csbytes int
+		14.fulluri string
+		15.hh int
+		16.respcode_name string
+
+
+- Proxy Ingest summary. _proxy\_ingest\_summary_ table.
+
+This table is populated with the number of connections ingested by minute during that day.
+
+        Table schema:
+        0. tdate:      string
+        1. total:      bigint 
+
 
 ###proxy_conf.json
-This file is part of the initial configuration for the proxy pipeline It will contain mapped all the columns included in the proxy_results.csv and proxy_scores.tsv files.
+This file is part of the initial configuration for the proxy pipeline It will contain mapped all the columns included in the proxy_results.csv and proxy tables.
 
 This file contains three main arrays:
 


[23/50] [abbrv] incubator-spot git commit: Adding README and requiremets for API Resources

Posted by ev...@apache.org.
Adding README and requiremets for API Resources


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/ffc6dbc6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/ffc6dbc6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/ffc6dbc6

Branch: refs/heads/SPOT-35_graphql_api
Commit: ffc6dbc616a91485a34d2fff3917ab03fbcb3411
Parents: 3718d80
Author: Everardo Lopez Sandoval (Intel) <el...@elopezsa-mac02.local>
Authored: Thu Mar 9 13:26:30 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:48 2017 -0700

----------------------------------------------------------------------
 spot-oa/api/resources/README.md | 50 ++++++++++++++++++++++++++++++++++++
 spot-oa/requirements.txt        |  6 +++++
 2 files changed, 56 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/ffc6dbc6/spot-oa/api/resources/README.md
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/README.md b/spot-oa/api/resources/README.md
new file mode 100644
index 0000000..1d74e6f
--- /dev/null
+++ b/spot-oa/api/resources/README.md
@@ -0,0 +1,50 @@
+
+
+# API Resources 
+
+
+API Resources are the backend methods used by GraphQL to perform CRUD operations to Apache Spot (incubating) like score connections, performa a threat investigation, generate a storyboard, etc.
+
+**Classes:**
+
+* Resources/flow.py
+* Resources/dns
+* Resources/proxy
+* Resources/configurator
+* Resources/hdfs_client
+* Resources/impala_engine.py
+
+
+
+## **Configuration Required (spot.conf):**
+
+API Resources use WebHDFS REST API (https://hadoop.apache.org/docs/r1.0.4/webhdfs.html) and Impala API based on that some new configuration is required.
+
+**_Keys in HDFS section:_**
+
+**NAME_NODE:** this key is required to setup the name node (full DNS domain or IP) to get connected to WebHDFS REST API.
+**WEB_PORT:** Web port to WebHDFS REST API (default=50070)
+
+**_Keys in Impala section:_**
+
+**IMPALA_DEM:** This key has been there since the last release ,but now that we spot uses an API to get connected you need to either put the impala daemon full DNS or Server IP.
+**IMPALA_PORT:** Port on which HiveServer2 client requests are served by Impala Daemons.
+
+## **Prerequisites:**
+
+#### Python:
+* setuptools
+* thrift==0.9.3
+* impyla
+* hdfs
+
+**NOTE: all this requirements are already part of requiremets.txt file, you dont need to install the python prerequisites manually.**
+
+#### Hadoop:
+
+* Impala.
+* WebHDFS REST API.
+
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/ffc6dbc6/spot-oa/requirements.txt
----------------------------------------------------------------------
diff --git a/spot-oa/requirements.txt b/spot-oa/requirements.txt
index 9d6f868..9f3afb8 100644
--- a/spot-oa/requirements.txt
+++ b/spot-oa/requirements.txt
@@ -18,3 +18,9 @@ flask
 flask-graphql
 graphql-core
 urllib3
+
+# API Resources
+setuptools>=3.4.4
+thrift==0.9.3
+impyla
+hdfs


[11/50] [abbrv] incubator-spot git commit: Removed csv files from OA and ipython notebooks

Posted by ev...@apache.org.
Removed csv files from OA and ipython notebooks


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/1904f2b4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/1904f2b4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/1904f2b4

Branch: refs/heads/SPOT-35_graphql_api
Commit: 1904f2b44cec2bd8cc7c6a22efad0009ddce1d3c
Parents: 2c951e9
Author: LedaLima <le...@apache.org>
Authored: Sun Mar 5 18:20:05 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:47 2017 -0700

----------------------------------------------------------------------
 spot-oa/oa/dns/dns_conf.json                    |  36 +-
 spot-oa/oa/dns/dns_oa.py                        | 318 ++++-----
 .../Edge_Investigation_master.ipynb             | 183 +++---
 .../Threat_Investigation_master.ipynb           | 186 +++---
 spot-oa/oa/flow/flow_conf.json                  |  27 +-
 spot-oa/oa/flow/flow_oa.py                      | 245 ++++---
 .../Edge_Investigation_master.ipynb             | 302 ++-------
 .../Threat_Investigation_master.ipynb           | 651 ++++---------------
 .../Edge_Investigation_master.ipynb             | 150 ++---
 .../Threat_Investigation_master.ipynb           | 306 ++++-----
 spot-oa/oa/proxy/proxy_conf.json                |  10 +-
 spot-oa/oa/proxy/proxy_oa.py                    | 232 ++++---
 spot-oa/oa/utils.py                             |  12 +
 13 files changed, 1035 insertions(+), 1623 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/1904f2b4/spot-oa/oa/dns/dns_conf.json
----------------------------------------------------------------------
diff --git a/spot-oa/oa/dns/dns_conf.json b/spot-oa/oa/dns/dns_conf.json
index aadd92c..0e23a38 100644
--- a/spot-oa/oa/dns/dns_conf.json
+++ b/spot-oa/oa/dns/dns_conf.json
@@ -11,27 +11,25 @@
         , "score" : 8  
     },
     "dns_score_fields": 
-    {
+    { 
         "frame_time" : 0
-        , "frame_len" : 1 
-        , "ip_dst": 2
-        , "dns_qry_name" : 3
-        , "dns_qry_class" : 4
-        , "dns_qry_type" : 5
-        , "dns_qry_rcode" : 6
-        , "score" : 7
-        , "tld" : 8
-        , "query_rep" : 9 
-        , "hh": 10
-        , "ip_sev" : 11
-        , "dns_sev" : 12
-        , "dns_qry_class_name" : 13
-        , "dns_qry_type_name" : 14
-        , "dns_qry_rcode_name" : 15
-        , "network_context" : 16
-        , "unix_tstamp": 17
+        ,"unix_tstamp" : 1
+        , "frame_len" : 2
+        , "ip_dst": 3
+        , "dns_qry_name" : 4
+        , "dns_qry_class" : 5
+        , "dns_qry_type" : 6
+        , "dns_qry_rcode" : 7
+        , "score" : 8
+        , "tld" : 9
+        , "query_rep" : 10
+        , "hh": 11
+        , "dns_qry_class_name" : 12
+        , "dns_qry_type_name" : 13
+        , "dns_qry_rcode_name" : 14
+        , "network_context" : 15
     },
     "add_reputation":{
         "query_rep":4   
     }
-}
\ No newline at end of file
+} 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/1904f2b4/spot-oa/oa/dns/dns_oa.py
----------------------------------------------------------------------
diff --git a/spot-oa/oa/dns/dns_oa.py b/spot-oa/oa/dns/dns_oa.py
index 2033c89..8d3ce80 100644
--- a/spot-oa/oa/dns/dns_oa.py
+++ b/spot-oa/oa/dns/dns_oa.py
@@ -23,7 +23,8 @@ import sys
 import datetime
 import csv, math
 from tld import get_tld
-
+import api.resources.impala_engine as impala
+import api.resources.hdfs_client as HDFSClient
 from collections import OrderedDict
 from utils import Util
 from components.data.data import Data
@@ -53,22 +54,20 @@ class OA(object):
         self._data_path = None
         self._ipynb_path = None
         self._ingest_summary_path = None
-        self._dns_scores = []
-        self._dns_scores_headers = []
+        self._dns_scores = [] 
         self._results_delimiter = '\t'
         self._details_limit = 250
 
         # get app configuration.
         self._spot_conf = Util.get_spot_conf()
 
-        # get scores fields conf
+        # # get scores fields conf
         conf_file = "{0}/dns_conf.json".format(self._scrtip_path)
         self._conf = json.loads(open (conf_file).read(),object_pairs_hook=OrderedDict)
 
         # initialize data engine
         self._db = self._spot_conf.get('conf', 'DBNAME').replace("'", "").replace('"', '')
-        self._engine = Data(self._db,self._table_name ,self._logger) 
-
+        
 
     def start(self):
 
@@ -76,15 +75,16 @@ class OA(object):
         start = time.time()
         ####################
 
+        self._clear_previous_executions()
         self._create_folder_structure()
         self._add_ipynb()
         self._get_dns_results()
         self._add_tld_column()
         self._add_reputation()
-        self._add_hh_and_severity()
+        self._add_hh_column()
         self._add_iana()
         self._add_network_context()
-        self._create_dns_scores_csv()
+        self._create_dns_scores()
         self._get_oa_details()
         self._ingest_summary()
 
@@ -93,12 +93,33 @@ class OA(object):
         print(end - start)
         ##################
 
-    def _create_folder_structure(self):
 
+    def _clear_previous_executions(self):
+        
+        self._logger.info("Cleaning data from previous executions for the day")       
+        yr = self._date[:4]
+        mn = self._date[4:6]
+        dy = self._date[6:]  
+        table_schema = []
+        HUSER = self._spot_conf.get('conf', 'HUSER').replace("'", "").replace('"', '')
+        table_schema=['suspicious', 'edge', 'dendro', 'threat_dendro', 'threat_investigation', 'storyboard' ]
+
+        for path in table_schema:
+            HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}/d={5}".format(HUSER,self._table_name,path,yr,mn,dy),user="impala")
+        HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}".format(HUSER,self._table_name,"summary",yr,mn),user="impala")
+        #removes Feedback file
+        HDFSClient.delete_folder("{0}/{1}/scored_results/{2}{3}{4}/feedback/ml_feedback.csv".format(HUSER,self._table_name,yr,mn,dy))
+        #removes json files from the storyboard
+        HDFSClient.delete_folder("{0}/{1}/oa/{2}/{3}/{4}/{5}".format(HUSER,self._table_name,"storyboard",yr,mn,dy))
+
+
+    def _create_folder_structure(self):
+        
         # create date folder structure if it does not exist.
         self._logger.info("Creating folder structure for OA (data and ipynb)")       
         self._data_path,self._ingest_summary_path,self._ipynb_path = Util.create_oa_folders("dns",self._date)
     
+
     def _add_ipynb(self):
 
         if os.path.isdir(self._ipynb_path):
@@ -125,59 +146,59 @@ class OA(object):
         # get results file from hdfs.
         get_command = Util.get_ml_results_form_hdfs(hdfs_path,self._data_path)
         self._logger.info("{0}".format(get_command))
-
-         # validate files exists
+ 
         if os.path.isfile(dns_results):
-
+    
             # read number of results based in the limit specified.
             self._logger.info("Reading {0} dns results file: {1}".format(self._date,dns_results))
             self._dns_results = Util.read_results(dns_results,self._limit,self._results_delimiter)[:]
-            if len(self._dns_results) == 0: self._logger.error("There are not flow results.");sys.exit(1)
+            if len(self._dns_results) == 0: self._logger.error("There are not dns results.");sys.exit(1)
 
         else:
             self._logger.error("There was an error getting ML results from HDFS")
-            sys.exit(1)
-
-        # add headers.        
-        self._logger.info("Adding headers")
-        self._dns_scores_headers = [  str(key) for (key,value) in self._conf['dns_score_fields'].items() ]
+            sys.exit(1) 
 
         # add dns content.
-        self._dns_scores = [ conn[:]  for conn in self._dns_results][:]       
+        self._dns_scores = [ conn[:]  for conn in self._dns_results][:]    
 
-    def _move_time_stamp(self,dns_data):
-        
-        for dns in dns_data:
-            time_stamp = dns[1]
-            dns.remove(time_stamp)
-            dns.append(time_stamp)
+
+    def _move_time_stamp(self,dns_data): 
         
+        # return dns_data_ordered
         return dns_data        
 
-    def _create_dns_scores_csv(self):
+
+    def _create_dns_scores(self):
         
-        dns_scores_csv = "{0}/dns_scores.csv".format(self._data_path)
-        dns_scores_final =  self._move_time_stamp(self._dns_scores)
-        dns_scores_final.insert(0,self._dns_scores_headers)
-        Util.create_csv_file(dns_scores_csv,dns_scores_final)   
+        # get date parameters.
+        yr = self._date[:4]
+        mn = self._date[4:6]
+        dy = self._date[6:] 
+        value_string = ""
 
-        # create bk file
-        dns_scores_bu_csv = "{0}/dns_scores_bu.csv".format(self._data_path)
-        Util.create_csv_file(dns_scores_bu_csv,dns_scores_final)     
+        dns_scores_final = self._move_time_stamp(self._dns_scores)
+        self._dns_scores = dns_scores_final
+        for row in dns_scores_final:
+            value_string += str(tuple(Util.cast_val(item) for item in row)) + ","              
+    
+        load_into_impala = ("""
+             INSERT INTO {0}.dns_scores partition(y={2}, m={3}, d={4}) VALUES {1}
+        """).format(self._db, value_string[:-1], yr, mn, dy) 
+        impala.execute_query(load_into_impala)
 
 
     def _add_tld_column(self):
         qry_name_col = self._conf['dns_results_fields']['dns_qry_name'] 
-        self._dns_scores = [conn + [ get_tld("http://" + str(conn[qry_name_col]), fail_silently=True) if "http://" not in str(conn[qry_name_col]) else get_tld(str(conn[qry_name_col]), fail_silently=True)] for conn in self._dns_scores ] 
-  
+        self._dns_scores = [conn + [ get_tld("http://" + str(conn[qry_name_col]), fail_silently=True) if "http://" not in str(conn[qry_name_col]) else get_tld(str(conn[qry_name_col]), fail_silently=True)] for conn in self._dns_scores ]
+        
+
     def _add_reputation(self):
 
         # read configuration.
         reputation_conf_file = "{0}/components/reputation/reputation_config.json".format(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
         self._logger.info("Reading reputation configuration file: {0}".format(reputation_conf_file))
         rep_conf = json.loads(open(reputation_conf_file).read())
-        
-        
+                
         # initialize reputation services.
         self._rep_services = []
         self._logger.info("Initializing reputation services.")
@@ -199,7 +220,6 @@ class OA(object):
         # get reputation per column.
         self._logger.info("Getting reputation for each service in config")        
         rep_services_results = []
-
  
         if self._rep_services :
             for key,value in rep_cols.items():
@@ -213,12 +233,11 @@ class OA(object):
             self._dns_scores = [ conn + [""]   for conn in self._dns_scores  ]
 
 
+    def _add_hh_column(self):
 
-    def _add_hh_and_severity(self):
-
-        # add hh value and sev columns.
+        # add hh value column.
         dns_date_index = self._conf["dns_results_fields"]["frame_time"]
-        self._dns_scores = [conn + [ filter(None,conn[dns_date_index].split(" "))[3].split(":")[0]] + [0] + [0] for conn in self._dns_scores  ]
+        self._dns_scores = [conn + [ filter(None,conn[dns_date_index].split(" "))[3].split(":")[0]] for conn in self._dns_scores  ]
 
 
     def _add_iana(self):
@@ -236,8 +255,8 @@ class OA(object):
         else:            
             self._dns_scores = [ conn + ["","",""] for conn in self._dns_scores ] 
 
-    def _add_network_context(self):
 
+    def _add_network_context(self):
         nc_conf_file = "{0}/components/nc/nc_config.json".format(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
         if os.path.isfile(nc_conf_file):
             nc_conf = json.loads(open(nc_conf_file).read())["NC"]
@@ -245,15 +264,15 @@ class OA(object):
             ip_dst_index = self._conf["dns_results_fields"]["ip_dst"]
             self._dns_scores = [ conn + [dns_nc.get_nc(conn[ip_dst_index])] for conn in self._dns_scores ]
         else:
-            self._dns_scores = [ conn + [""] for conn in self._dns_scores ]
+            self._dns_scores = [ conn + [0] for conn in self._dns_scores ]
 
 
     def _get_oa_details(self):
         
-        self._logger.info("Getting OA DNS suspicious details/chord diagram")       
+        self._logger.info("Getting OA DNS suspicious details/dendro diagram")       
         # start suspicious connects details process.
         p_sp = Process(target=self._get_suspicious_details)
-        p_sp.start()        
+        p_sp.start()
 
         # start chord diagram process.            
         p_dn = Process(target=self._get_dns_dendrogram)
@@ -262,6 +281,7 @@ class OA(object):
         p_sp.join()
         p_dn.join()
 
+
     def _get_suspicious_details(self):
 
         iana_conf_file = "{0}/components/iana/iana_config.json".format(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -269,86 +289,87 @@ class OA(object):
             iana_config  = json.loads(open(iana_conf_file).read())
             dns_iana = IanaTransform(iana_config["IANA"])
         
-        for conn in self._dns_scores:
-            # get data to query
-            date=conn[self._conf["dns_score_fields"]["frame_time"]].split(" ")
-            date = filter(None,date)
-
-            if len(date) == 5:
-                year=date[2]
-                month=datetime.datetime.strptime(date[0], '%b').strftime('%m')
-                day=date[1]                
-                hh=conn[self._conf["dns_score_fields"]["hh"]]
-                dns_qry_name = conn[self._conf["dns_score_fields"]["dns_qry_name"]]
-                self._get_dns_details(dns_qry_name,year,month,day,hh,dns_iana)
+        for conn in self._dns_scores:       
+  
+            timestamp = conn[self._conf["dns_score_fields"]["unix_tstamp"]]
+            full_date = datetime.datetime.utcfromtimestamp(int(timestamp)).strftime('%Y-%m-%d %H:%M:%S')
+  
+            date = full_date.split(" ")[0].split("-")
+            # get date parameters.
+            yr = date[0]
+            mn = date[1]
+            dy = date[2] 
+            time = full_date.split(" ")[1].split(":")
+            hh = int(time[0])
+
+            dns_qry_name = conn[self._conf["dns_score_fields"]["dns_qry_name"]]
+            self._get_dns_details(dns_qry_name,yr,mn,dy,hh,dns_iana)
 
     def _get_dns_details(self,dns_qry_name,year,month,day,hh,dns_iana):
+        value_string = ""
+        query_to_load =("""
+            SELECT unix_tstamp,frame_len,ip_dst,ip_src,dns_qry_name,dns_qry_class,dns_qry_type,dns_qry_rcode,dns_a,h as hh
+            FROM {0}.{1} WHERE y={2} AND m={3} AND d={4} AND dns_qry_name LIKE '%{5}%' AND h={6} LIMIT {7};
+        """).format(self._db,self._table_name,year,month,day,dns_qry_name,hh,self._details_limit)
+        
+        try: 
+             dns_details = impala.execute_query(query_to_load) 
+        except:
+            self._logger.error("ERROR. Details couldn't be retreived for {0}, skipping this step".format(dns_qry_name))
+        else:
+        # add IANA to results. 
+            update_rows = []
+            if dns_iana:
+                self._logger.info("Adding IANA translation to details results") 
                     
-        limit = self._details_limit
-        edge_file ="{0}/edge-{1}_{2}_00.csv".format(self._data_path,dns_qry_name.replace("/","-"),hh)
-        edge_tmp  ="{0}/edge-{1}_{2}_00.tmp".format(self._data_path,dns_qry_name.replace("/","-"),hh)
-
-        if not os.path.isfile(edge_file):
-    
-            dns_qry = ("SELECT frame_time,frame_len,ip_dst,ip_src,dns_qry_name,dns_qry_class,dns_qry_type,dns_qry_rcode,dns_a FROM {0}.{1} WHERE y={2} AND m={3} AND d={4} AND dns_qry_name LIKE '%{5}%' AND h={6} LIMIT {7};").format(self._db,self._table_name,year,month,day,dns_qry_name,hh,limit)
-            
-            # execute query
-	    try:
-                self._engine.query(dns_qry,edge_tmp)
-            except:
-		self._logger.error("ERROR. Edge file couldn't be created for {0}, skipping this step".format(dns_qry_name))
+                dns_details = [ conn + (str(dns_iana.get_name(conn[5],"dns_qry_class")),str(dns_iana.get_name(conn[6],"dns_qry_type")),str(dns_iana.get_name(conn[7],"dns_qry_rcode"))) for conn in dns_details ]
+            else: 
+                self._logger.info("WARNING: NO IANA configured.")
+                dns_details = [ conn + ("","","") for conn in dns_details ]
+
+            nc_conf_file = "{0}/components/nc/nc_config.json".format(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+            if os.path.isfile(nc_conf_file):
+                nc_conf = json.loads(open(nc_conf_file).read())["NC"]
+                dns_nc = NetworkContext(nc_conf,self._logger) 
+                dns_details = [ conn + (dns_nc.get_nc(conn[2]),) for conn in dns_details ]
             else:
-            # add IANA to results.
-                if dns_iana:
-                    update_rows = []
-                    self._logger.info("Adding IANA translation to details results")
-                    with open(edge_tmp) as dns_details_csv:
-                        rows = csv.reader(dns_details_csv, delimiter=',', quotechar='|')
-                        try:
-                            next(rows)
-                            update_rows = [[conn[0]] + [conn[1]] + [conn[2]] + [conn[3]] + [conn[4]] + [dns_iana.get_name(conn[5],"dns_qry_class")] + [dns_iana.get_name(conn[6],"dns_qry_type")] + [dns_iana.get_name(conn[7],"dns_qry_rcode")] + [conn[8]] for conn in rows]
-                            update_rows = filter(None, update_rows)
-                            header = [ "frame_time", "frame_len", "ip_dst","ip_src","dns_qry_name","dns_qry_class_name","dns_qry_type_name","dns_qry_rcode_name","dns_a" ]
-                            update_rows.insert(0,header)
-                        except IndexError:
-                            pass
-
-                else:
-                    self._logger.info("WARNING: NO IANA configured.")
-
-                    # create edge file.
-                self._logger.info("Creating edge file:{0}".format(edge_file))
-                with open(edge_file,'wb') as dns_details_edge:
-                    writer = csv.writer(dns_details_edge, quoting=csv.QUOTE_ALL)
-                    if update_rows:
-                        writer.writerows(update_rows)
-                    else:            
-                        shutil.copy(edge_tmp,edge_file)           
-                
-                os.remove(edge_tmp)
-
+                dns_details = [ conn + (0,) for conn in dns_details ]
+                         
+            for row in dns_details:
+                value_string += str(tuple(item for item in row)) + ","   
 
-    def _get_dns_dendrogram(self):
-        limit = self._details_limit
-        for conn in self._dns_scores:            
-            date=conn[self._conf["dns_score_fields"]["frame_time"]].split(" ")
-            date = filter(None,date)
-
-            if len(date) == 5:
-                year=date[2]
-                month=datetime.datetime.strptime(date[0], '%b').strftime('%m')
-                day=date[1]
-                ip_dst=conn[self._conf["dns_score_fields"]["ip_dst"]]
-                self._get_dendro(self._db,self._table_name,ip_dst,year,month,day, limit)
+            if value_string != "": 
+                
+                query_to_insert=("""
+                    INSERT INTO {0}.dns_edge PARTITION (y={1}, m={2}, d={3}) VALUES ({4});
+                """).format(self._db,year, month, day,  value_string[:-1])
 
+                impala.execute_query(query_to_insert) 
+ 
 
-    def _get_dendro(self,db,table,ip_dst,year,month,day,limit):
+    def _get_dns_dendrogram(self): 
 
-        dendro_file = "{0}/dendro-{1}.csv".format(self._data_path,ip_dst)
-        if not os.path.isfile(dendro_file):
-            dndro_qry = ("SELECT dns_a, dns_qry_name, ip_dst FROM (SELECT susp.ip_dst, susp.dns_qry_name, susp.dns_a FROM {0}.{1} as susp WHERE susp.y={2} AND susp.m={3} AND susp.d={4} AND susp.ip_dst='{5}' LIMIT {6}) AS tmp GROUP BY dns_a, dns_qry_name, ip_dst").format(db,table,year,month,day,ip_dst,limit)
-            # execute query
-            self._engine.query(dndro_qry,dendro_file)
+        for conn in self._dns_scores:   
+            timestamp = conn[self._conf["dns_score_fields"]["unix_tstamp"]]
+            
+            full_date = datetime.datetime.utcfromtimestamp(int(timestamp)).strftime('%Y-%m-%d %H:%M:%S')
+            date = full_date.split(" ")[0].split("-")
+            # get date parameters.
+            
+            yr = date[0]
+            mn = date[1]
+            dy = date[2]
+            ip_dst=conn[self._conf["dns_score_fields"]["ip_dst"]]
+
+            query_to_load = ("""
+                INSERT INTO TABLE {0}.dns_dendro PARTITION (y={2}, m={3},d={4})
+                SELECT unix_tstamp, dns_a, dns_qry_name, ip_dst 
+                FROM (SELECT unix_tstamp, susp.ip_dst, susp.dns_qry_name, susp.dns_a
+                    FROM {0}.{1} as susp WHERE susp.y={2} AND susp.m={3} AND susp.d={4} AND susp.ip_dst='{5}' 
+                LIMIT {6}) AS tmp GROUP BY dns_a, dns_qry_name, ip_dst, unix_tstamp
+            """).format(self._db,self._table_name,yr,mn,dy,ip_dst,self._details_limit)
+           
+            impala.execute_query(query_to_load)
 
         
     def _ingest_summary(self):
@@ -363,48 +384,31 @@ class OA(object):
         result_rows = []        
         df_filtered =  pd.DataFrame()
 
-        ingest_summary_file = "{0}/is_{1}{2}.csv".format(self._ingest_summary_path,yr,mn)			
-        ingest_summary_tmp = "{0}.tmp".format(ingest_summary_file)
+        query_to_load = ("""
+            SELECT frame_time, COUNT(*) as total FROM {0}.{1}
+            WHERE y={2} AND m={3} AND d={4} AND unix_tstamp IS NOT NULL 
+            AND frame_time IS NOT NULL AND frame_len IS NOT NULL 
+            AND dns_qry_name IS NOT NULL AND ip_src IS NOT NULL 
+            AND (dns_qry_class IS NOT NULL AND dns_qry_type IS NOT NULL 
+            AND dns_qry_rcode IS NOT NULL ) GROUP BY frame_time;
+        """).format(self._db,self._table_name, yr, mn, dy)
 
-        if os.path.isfile(ingest_summary_file):
-        	df = pd.read_csv(ingest_summary_file, delimiter=',')
-            #discards previous rows from the same date
-        	df_filtered = df[df['date'].str.contains("{0}-{1}-{2}".format(yr, mn, dy)) == False] 
-        else:
-        	df = pd.DataFrame()
-            
-        # get ingest summary.
-        ingest_summary_qry = ("SELECT frame_time, COUNT(*) as total "
-                                    " FROM {0}.{1}"
-                                    " WHERE y={2} AND m={3} AND d={4} "
-                                    " AND unix_tstamp IS NOT NULL AND frame_time IS NOT NULL"
-                                    " AND frame_len IS NOT NULL AND dns_qry_name IS NOT NULL"
-                                    " AND ip_src IS NOT NULL " 
-                                    " AND (dns_qry_class IS NOT NULL AND dns_qry_type IS NOT NULL AND dns_qry_rcode IS NOT NULL ) "
-                                    " GROUP BY frame_time;") 
-
-        ingest_summary_qry = ingest_summary_qry.format(self._db,self._table_name, yr, mn, dy)
-        
-        results_file = "{0}/results_{1}.csv".format(self._ingest_summary_path,self._date)
-        self._engine.query(ingest_summary_qry,output_file=results_file,delimiter=",")
+        results = impala.execute_query_as_list(query_to_load)
+        df = pd.DataFrame(results)
 
+        # Forms a new dataframe splitting the minutes from the time column
+        df_new = pd.DataFrame([["{0}-{1}-{2} {3}:{4}".format(yr, mn, dy,val['frame_time'].split(" ")[3].split(":")[0].zfill(2),val['frame_time'].split(" ")[3].split(":")[1].zfill(2)), int(val['total']) if not math.isnan(val['total']) else 0 ] for key,val in df.iterrows()],columns = ingest_summary_cols)
 
-        if os.path.isfile(results_file):        
-            df_results = pd.read_csv(results_file, delimiter=',') 
+        #Groups the data by minute 
+        sf = df_new.groupby(by=['date'])['total'].sum()
+        df_per_min = pd.DataFrame({'date':sf.index, 'total':sf.values})
 
-            # Forms a new dataframe splitting the minutes from the time column
-            df_new = pd.DataFrame([["{0}-{1}-{2} {3}:{4}".format(yr, mn, dy,val['frame_time'].split(" ")[3].split(":")[0].zfill(2),val['frame_time'].split(" ")[3].split(":")[1].zfill(2)), int(val['total']) if not math.isnan(val['total']) else 0 ] for key,val in df_results.iterrows()],columns = ingest_summary_cols)
-    
-            #Groups the data by minute 
-            sf = df_new.groupby(by=['date'])['total'].sum()
-        
-            df_per_min = pd.DataFrame({'date':sf.index, 'total':sf.values})
-            
-            df_final = df_filtered.append(df_per_min, ignore_index=True)
-            df_final.to_csv(ingest_summary_tmp,sep=',', index=False)
+        df_final = df_filtered.append(df_per_min, ignore_index=True).to_records(False,False) 
 
-            os.remove(results_file)
-            os.rename(ingest_summary_tmp,ingest_summary_file)
-        else:
-            self._logger.info("No data found for the ingest summary")
+        if len(df_final) > 0:
+            query_to_insert=("""
+                INSERT INTO {0}.dns_ingest_summary PARTITION (y={1}, m={2}) VALUES {3};
+            """).format(self._db, yr, mn, tuple(df_final))
+            
+            impala.execute_query(query_to_insert)  
         

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/1904f2b4/spot-oa/oa/dns/ipynb_templates/Edge_Investigation_master.ipynb
----------------------------------------------------------------------
diff --git a/spot-oa/oa/dns/ipynb_templates/Edge_Investigation_master.ipynb b/spot-oa/oa/dns/ipynb_templates/Edge_Investigation_master.ipynb
index 5573c9a..88f047e 100644
--- a/spot-oa/oa/dns/ipynb_templates/Edge_Investigation_master.ipynb
+++ b/spot-oa/oa/dns/ipynb_templates/Edge_Investigation_master.ipynb
@@ -17,19 +17,14 @@
    "source": [
     "import urllib2\n",
     "import json\n",
-    "import os\n",
-    "import csv\n",
+    "import os \n",
+    "import datetime\n",
     "\n",
     "# getting date from the parent path. \n",
     "path = os.getcwd().split(\"/\") \n",
     "date = path[len(path)-1]   \n",
     "dsource = path[len(path)-2]  \n",
-    "dpath = '/'.join(['data' if var == 'ipynb' else var for var in path]) + '/'\n",
-    "\n",
-    "sconnect = dpath + 'dns_scores.csv'\n",
-    "sconnectbu = dpath + 'dns_scores_bu.csv'\n",
-    "score_tmp = dpath + 'score_tmp.csv'  \n",
-    "score_fbk = dpath + 'dns_scores_fb.csv'  "
+    "dpath = '/'.join(['data' if var == 'ipynb' else var for var in path]) + '/' "
    ]
   },
   {
@@ -56,7 +51,7 @@
     "from IPython.display import display, HTML, clear_output, Javascript \n",
     "\n",
     "def fill_list(list_control,source):\n",
-    "    options_list = ['--Select--'] \n",
+    "    options_list = ['- Select -'] \n",
     "    options_list.extend([s for s in source])\n",
     "    list_control.options = options_list\n",
     "\n",
@@ -101,18 +96,29 @@
     "    us_ips = []\n",
     "    us_dns = []\n",
     "\n",
-    "    with open(sconnect, 'r') as f:\n",
-    "        reader = csv.DictReader(f, delimiter=',')\n",
-    "        for row in reader:           \n",
-    "            if row['ip_dst'] not in us_ips and row['ip_sev'] == '0': \n",
-    "                us_ips.append(row['ip_dst'])\n",
-    "            if row['dns_qry_name'] not in us_dns and row['dns_sev'] == '0':\n",
-    "                us_dns.append(row['dns_qry_name']) \n",
-    "\n",
+    "    query=\"\"\"query($date:SpotDateType!) {\n",
+    "            dns{\n",
+    "                suspicious(date:$date){\n",
+    "                dnsQuery\n",
+    "                clientIp\n",
+    "            }\n",
+    "        }\n",
+    "    }\"\"\"\n",
+    "    variables={\n",
+    "        'date': datetime.datetime.strptime(date, '%Y%m%d').strftime('%Y-%m-%d')\n",
+    "    }\n",
+    "    response = GraphQLClient.request(query, variables)\n",
+    "  \n",
+    "    for row in response['data']['dns']['suspicious']:           \n",
+    "        if row['clientIp'] not in us_ips: \n",
+    "            us_ips.append(row['clientIp'])\n",
+    "        if row['dnsQuery'] not in us_dns:\n",
+    "            us_dns.append(row['dnsQuery'])  \n",
+    "            \n",
     "    fill_list(client_select,us_ips)\n",
     "    fill_list(query_select,us_dns)\n",
-    "    client_select.value = \"--Select--\"\n",
-    "    query_select.value = \"--Select--\"    \n",
+    "    client_select.value = \"- Select -\"\n",
+    "    query_select.value = \"- Select -\"    \n",
     "\n",
     "\n",
     "display(Javascript(\"$('.widget-area > .widget-subarea > *').remove();\"))\n",
@@ -138,110 +144,75 @@
     "import csv\n",
     "import datetime\n",
     "import subprocess \n",
+    "global score_values\n",
+    "score_values = []\n",
+    "\n",
     "\n",
     "def assign_score(b):\n",
-    "    score_values = []\n",
-    "    scored_threats = []\n",
-    "    ip_sev = int(rating_btn.selected_label) if not \"--Select--\" in client_select.value else \"\"\n",
-    "    dns_sev = int(rating_btn.selected_label) if not \"--Select--\" in query_select.value else \"\"    \n",
     "\n",
+    "    sev = int(rating_btn.selected_label) \n",
+    "    \n",
     "    if quick_text.value: \n",
     "        ip = \"\"\n",
     "        dns = quick_text.value\n",
-    "        dns_sev = int(rating_btn.selected_label) \n",
-    "        # Loop over every element in query_select widget\n",
-    "        score_values = []\n",
+    "        dns_sev = int(rating_btn.selected_label)  \n",
     "        for query in query_select.options:\n",
-    "            if query.endswith(dns):\n",
-    "                # Matching element, create one row\n",
-    "                score_values.append((ip,query,ip_sev,dns_sev))\n",
+    "            if query.endswith(dns): \n",
+    "                score_values.append((ip,query,sev))\n",
     "    else: \n",
-    "        ip = client_select.value if not \"--Select--\" in client_select.value else \"\"\n",
-    "        dns = query_select.value if not \"--Select--\" in query_select.value else \"\"\n",
-    "        score_values.append((ip,dns,ip_sev,dns_sev))\n",
-    "\n",
-    "    with open(sconnect, 'r') as f:\n",
-    "        reader = csv.DictReader(f, delimiter=',')\n",
-    "        rowct = 0\n",
-    "        with open(score_tmp, 'w') as score:\n",
-    "            wr = csv.DictWriter(score, delimiter=',', quoting=csv.QUOTE_NONE, fieldnames=reader.fieldnames)            \n",
-    "            wr.writeheader()\n",
-    "            for row in reader:   \n",
-    "                for value in score_values: \n",
-    "                    if row['ip_dst'] == value[0]:  \n",
-    "                        row['ip_sev'] = value[2]       \n",
-    "                        scored_threats.append(row)  \n",
-    "                        rowct += 1                  \n",
-    "                        break\n",
-    "                    if row['dns_qry_name'] == value[1]:  \n",
-    "                        row['dns_sev'] = value[3]                        \n",
-    "                        scored_threats.append(row)  \n",
-    "                        rowct += 1\n",
-    "                        break\n",
-    "                wr.writerow(row)     \n",
-    "                    \n",
-    "        if not os.path.exists(score_fbk):  \n",
-    "            with open(score_fbk, 'w') as feedback:\n",
-    "                wr = csv.DictWriter(feedback, delimiter='\\t', quoting=csv.QUOTE_NONE, fieldnames=reader.fieldnames)            \n",
-    "                wr.writeheader()\n",
-    "\n",
-    "        with open(score_fbk, 'a') as feedback:\n",
-    "            for row in scored_threats:\n",
-    "                wr = csv.DictWriter(feedback, delimiter='\\t', quoting=csv.QUOTE_NONE, fieldnames=reader.fieldnames)            \n",
-    "                wr.writerow(row)\n",
-    "\n",
-    "    clear_output()\n",
-    "    print \"{0} matching connections scored\".format(rowct)\n",
-    "    !mv $score_tmp $sconnect \n",
-    "\n",
-    "    if ip != \"--Select--\":\n",
+    "        ip = client_select.value if not \"- Select -\" in client_select.value else \"\"\n",
+    "        dns = query_select.value if not \"- Select -\" in query_select.value else \"\"\n",
+    "        score_values.append((ip,dns,sev))\n",
+    "        clear_output()\n",
+    "   \n",
+    "    if ip != \"- Select -\":\n",
     "        display(Javascript(\"$(\\\"option[data-value='\" + ip +\"']\\\").remove();\"))\n",
     "    if quick_text.value:\n",
     "        display(Javascript(\"$(\\\"option[data-value$='\" + quick_text.value +\"']\\\").remove();\"))\n",
-    "    elif dns != \"--Select--\":\n",
+    "    elif dns != \"- Select -\":\n",
     "        display(Javascript(\"$(\\\"option[data-value='\" + dns +\"']\\\").remove();\"))\n",
     "\n",
-    "    client_select.value = \"--Select--\"\n",
-    "    query_select.value = \"--Select--\"\n",
+    "    client_select.value = \"- Select -\"\n",
+    "    query_select.value = \"- Select -\"\n",
     "    quick_text.value = \"\"\n",
     "\n",
     "\n",
-    "def save(b):   \n",
-    "    clear_output()    \n",
-    "    display(Javascript(\"$('.widget-area > .widget-subarea > *').remove();\"))\n",
-    "    data_loader() \n",
-    "    display(scoring_form)\n",
-    "    display(Javascript('reloadParentData();'))\n",
-    "    ml_feedback() \n",
-    "    print \"Suspicious connects successfully updated\"\n",
-    "\n",
-    "\n",
-    "assign_btn.on_click(assign_score)\n",
-    "save_btn.on_click(save)\n",
+    "def save(b):    \n",
+    "    variables=[]\n",
+    "    global score_values\n",
+    "    mutation=\"\"\"mutation($input:[DnsScoreType!]!)\n",
+    "                {\n",
+    "                  dns{\n",
+    "                    score(input:$input)\n",
+    "                        {success}\n",
+    "                  }\n",
+    "                }\"\"\" \n",
+    "    \n",
+    "    for row in score_values:\n",
+    "        variables.append({\n",
+    "            'date': datetime.datetime.strptime(date, '%Y%m%d').strftime('%Y-%m-%d'),\n",
+    "            'clientIp': row[0] if row[0] != \"\" else None,\n",
+    "            'dnsQuery': row[1] if row[1] != \"\" else None,\n",
+    "            'score': row[2] if row[2] != \"\" else None \n",
+    "            })\n",
+    "\n",
+    "    var = {'input':variables}\n",
+    "    response = GraphQLClient.request(mutation,var)\n",
+    "    \n",
+    "    score_values = []\n",
+    "    if not 'errors' in response:\n",
+    "        clear_output()    \n",
+    "        display(Javascript(\"$('.widget-area > .widget-subarea > *').remove();\"))\n",
+    "        data_loader() \n",
+    "        display(scoring_form)\n",
+    "        display(Javascript('reloadParentData();')) \n",
+    "        print \"Suspicious connects successfully updated\"\n",
+    "    else:\n",
+    "        print \"An error ocurred: \" + response['errors'][0]['message']\n",
     "        \n",
-    "\n",
-    "def ml_feedback():\n",
-    "    dst_name = os.path.basename(sconnect)\n",
-    "    str_fb=\"DSOURCE={0} &&\\\n",
-    "        FDATE={1} &&\\\n",
-    "        source /etc/spot.conf &&\\\n",
-    "        usr=$(echo $LUSER | cut -f3 -d'/') &&\\\n",
-    "        mlnode=$MLNODE &&\\\n",
-    "        lpath=$LPATH &&\\\n",
-    "        scp {2} $usr@$mlnode:$lpath/{3}\".format(dsource,date,score_fbk,dst_name) \n",
-    "\n",
-    "    subprocess.call(str_fb, shell=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "# !cp $sconnectbu $sconnect"
+    "        \n",
+    "assign_btn.on_click(assign_score)\n",
+    "save_btn.on_click(save) "
    ]
   }
  ],

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/1904f2b4/spot-oa/oa/dns/ipynb_templates/Threat_Investigation_master.ipynb
----------------------------------------------------------------------
diff --git a/spot-oa/oa/dns/ipynb_templates/Threat_Investigation_master.ipynb b/spot-oa/oa/dns/ipynb_templates/Threat_Investigation_master.ipynb
index 6eb9bad..cbaa2b7 100644
--- a/spot-oa/oa/dns/ipynb_templates/Threat_Investigation_master.ipynb
+++ b/spot-oa/oa/dns/ipynb_templates/Threat_Investigation_master.ipynb
@@ -11,7 +11,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true
+    "collapsed": false
    },
    "outputs": [],
    "source": [
@@ -30,17 +30,9 @@
     "    from IPython.html import widgets\n",
     "from IPython.display import display, HTML, clear_output, Javascript \n",
     "\n",
-    "with open('/etc/spot.conf') as conf:\n",
-    "    for line in conf.readlines():\n",
-    "        if \"DBNAME=\" in line: DBNAME = line.split(\"=\")[1].strip('\\n').replace(\"'\",\"\");      \n",
-    "        elif \"IMPALA_DEM=\" in line: IMPALA_DEM = line.split(\"=\")[1].strip('\\n').replace(\"'\",\"\"); \n",
-    "\n",
     "path = os.getcwd().split(\"/\") \n",
     "t_date = path[len(path)-1]   \n",
-    "dpath = '/'.join(['data' if var == 'ipynb' else var for var in path]) + '/'\n",
-    "t_date = path[len(path)-1] \n",
-    "sconnect = dpath + 'dns_scores.csv' \n",
-    "threat_f = dpath + \"threats.csv\"\n",
+    "\n",
     "anchor = ''\n",
     "anchor_type = ''\n",
     "top_results = 20\n",
@@ -51,7 +43,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true
+    "collapsed": false
    },
    "outputs": [],
    "source": [
@@ -127,7 +119,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true
+    "collapsed": false
    },
    "outputs": [],
    "source": [
@@ -144,29 +136,39 @@
     "    ips_query = {} \n",
     "    ip_sev={}\n",
     "    dns_sev={}\n",
-    "    c_ips=[]\n",
-    "    c_dns=[]\n",
-    "\n",
-    "    if os.path.isfile(threat_f) and not file_is_empty(threat_f):\n",
-    "        with open(threat_f, 'r') as th:\n",
-    "            t_read = csv.reader(th, delimiter='|')\n",
-    "            t_read.next()\n",
-    "            for row in t_read: \n",
-    "                if row[0] != '' : c_ips.append(row[0])\n",
-    "                if row[1] != '' : c_dns.append(row[1])\n",
-    "            \n",
-    "    with open(sconnect, 'r') as f:\n",
-    "        reader = csv.DictReader(f, delimiter=',')\n",
-    "        for row in reader:\n",
-    "            if row['ip_dst'] not in ips_query and row['ip_dst'] not in c_ips and row['ip_sev'] == '1': \n",
-    "                    ips_query[row['ip_dst']]='i'\n",
-    "            if row['dns_qry_name'] not in ips_query and row['dns_qry_name'] not in c_dns and row['dns_sev'] == '1':\n",
-    "                    ips_query[row['dns_qry_name']]='q' \n",
+    "      \n",
+    "    response = GraphQLClient.request(\n",
+    "        query=\"\"\"query($date:SpotDateType!) {\n",
+    "                dns{\n",
+    "                    threats{\n",
+    "                        list(date:$date) {\n",
+    "                            dnsScore\n",
+    "                            clientIpScore\n",
+    "                            clientIp\n",
+    "                            dnsQuery\n",
+    "                        }\n",
+    "                    }\n",
+    "            }\n",
+    "        }\"\"\",\n",
+    "        variables={\n",
+    "            'date': datetime.datetime.strptime(t_date, '%Y%m%d').strftime('%Y-%m-%d')\n",
+    "        }\n",
+    "    )  \n",
+    "    \n",
+    "    if not 'errors' in response: \n",
+    "        for row in response['data']['dns']['threats']['list']:        \n",
+    "            if row['clientIp'] not in ips_query and row['clientIpScore'] == 1: \n",
+    "                    ips_query[row['clientIp']]='i'\n",
+    "            if row['dnsQuery'] not in ips_query and row['dnsScore'] == 1: \n",
+    "                    ips_query[row['dnsQuery']]='q' \n",
     "            \n",
-    "            if row['ip_dst'] not in ip_sev: \n",
-    "                ip_sev[row['ip_dst']] = row['score']\n",
-    "            if row['dns_qry_name'] not in dns_sev: \n",
-    "                dns_sev[row['dns_qry_name']] =row['score']\n",
+    "            if row['clientIp'] not in ip_sev: \n",
+    "                ip_sev[row['clientIp']] = row['clientIpScore']\n",
+    "            if row['dnsQuery'] not in dns_sev: \n",
+    "                dns_sev[row['dnsQuery']] =row['dnsScore']\n",
+    "    else:\n",
+    "        print \"An error ocurred: \" + response[\"errors\"][0][\"message\"]\n",
+    " \n",
     "                \n",
     "    threat_title.value =\"<h4>Suspicious DNS</h4>\"\n",
     "                       \n",
@@ -192,7 +194,8 @@
     "  \n",
     "    def search_ip(b):  \n",
     "        global anchor \n",
-    "        global anchor_type\n",
+    "        global anchor_type \n",
+    "        global expanded_results\n",
     "        anchor = ''\n",
     "        anchor_type = ''\n",
     "        anchor = susp_select.selected_label  \n",
@@ -200,42 +203,40 @@
     "        removeWidget(2)\n",
     "        removeWidget(1) \n",
     "        clear_output()\n",
-    "        \n",
-    "        global ir_f\n",
-    "        ir_f = dpath + 'threat-dendro-' + anchor + \".csv\"             \n",
-    "        table = \"<table><th>IP</th><th>QUERY</th><th>TOTAL</th>\"\n",
-    "        \n",
-    "        if not os.path.isfile(ir_f) or (os.path.isfile(ir_f) and file_is_empty(ir_f)):\n",
-    "            if anchor_type == 'i':\n",
-    "                imp_query = \"\\\" SELECT COUNT(dns_qry_name) as total, dns_qry_name, ip_dst, 0 as sev FROM {0}.dns \\\n",
-    "                        WHERE y={1} AND m={2} AND d={3} AND ip_dst='{4}' GROUP BY dns_qry_name, ip_dst \\\n",
-    "                        ORDER BY total DESC LIMIT {5}\\\"\"\n",
-    "            elif anchor_type == 'q':\n",
-    "                imp_query = \"\\\" SELECT COUNT(ip_dst) as total, dns_qry_name, ip_dst, 0 as sev FROM {0}.dns \\\n",
-    "                        WHERE y={1} AND m={2} AND d={3} AND dns_qry_name='{4}'\\\n",
-    "                        GROUP BY ip_dst, dns_qry_name ORDER BY total DESC LIMIT {5}\\\"\"\n",
     "\n",
-    "            imp_query=imp_query.format(DBNAME, yy, mm, dd, anchor, details_limit)\n",
-    "            !impala-shell -i $IMPALA_DEM --quiet -q \"INVALIDATE METADATA\"\n",
-    "            !impala-shell -i $IMPALA_DEM --quiet --print_header -B --output_delimiter=',' -q $imp_query -o $ir_f\n",
+    "        \n",
+    "        expanded_results = GraphQLClient.request(\n",
+    "            query=\"\"\"query($date:SpotDateType,$dnsQuery:String, $clientIp:SpotIpType){\n",
+    "                      dns{\n",
+    "                        threat{details(date:$date,dnsQuery:$dnsQuery,clientIp:$clientIp){\n",
+    "                          total \n",
+    "                          clientIp\n",
+    "                          dnsQuery \n",
+    "                        }}\n",
+    "                      }\n",
     "\n",
+    "                }\"\"\",\n",
+    "            variables={\n",
+    "                'date': datetime.datetime.strptime(t_date, '%Y%m%d').strftime('%Y-%m-%d'),\n",
+    "                'dnsQuery': anchor if anchor_type == 'q' else None,\n",
+    "                'clientIp': anchor if anchor_type == 'i' else None  \n",
+    "            }\n",
+    "        )  \n",
+    "        \n",
     "        clear_output() \n",
-    "#       total, dns_qry_name, ip_dst, sev\n",
-    "        with open(ir_f, 'r') as f:\n",
-    "            try:\n",
-    "                reader = itertools.islice(csv.reader(f, delimiter=','), top_results) \n",
-    "                if reader!= '':\n",
-    "                    reader.next()\n",
-    "                    for row in reader:  \n",
-    "                        table += \"<tr><td class='spot-text-wrapper' data-toggle='tooltip'>\"+row[2]+\"</td>\\\n",
-    "                            <td class='spot-text-wrapper' data-toggle='tooltip'>\"+row[1]+\"</td>\\\n",
-    "                            <td align='center'>\"+str(row[0])+\"</td></tr>\"  \n",
+    "        \n",
+    "        if not 'errors' in expanded_results:\n",
+    "            table = \"<table><th>IP</th><th>QUERY</th><th>TOTAL</th>\"\n",
+    "        \n",
+    "            for row in expanded_results[\"data\"][\"dns\"][\"threat\"][\"details\"]: \n",
+    "                table += \"<tr><td class='spot-text-wrapper' data-toggle='tooltip'>\"+row[\"clientIp\"]+\"</td>\\\n",
+    "                    <td class='spot-text-wrapper' data-toggle='tooltip'>\"+row[\"dnsQuery\"]+\"</td>\\\n",
+    "                            <td align='center'>\"+str(row[\"total\"])+\"</td></tr>\"  \n",
     "\n",
-    "                table += \"</table>\"                  \n",
-    "                result_html_title.value='<h4>Displaying top {0} search results</h4>'.format(top_results)      \n",
-    "            except:\n",
-    "                table = \"<table></table>\"\n",
-    "                result_html_title.value='<h4>No results were found.</h4>'\n",
+    "            table += \"</table>\"                  \n",
+    "            result_html_title.value='<h4>Displaying top {0} search results</h4>'.format(top_results)  \n",
+    "        else:\n",
+    "            print \"An error ocurred: \" + response[\"errors\"][0][\"message\"]\n",
     "\n",
     "        result_html.value=table\n",
     "        result_html_box.children = [result_html]\n",
@@ -263,24 +264,49 @@
     "    result_summary_box.children = [result_summary_container, result_button_container]\n",
     "    resultSummaryBox.children = [result_title,result_summary_box]\n",
     "    \n",
-    "    \n",
+    "\n",
     "    def save_threat_summary(b):\n",
     "        global anchor\n",
     "        anchor_ip =''\n",
     "        anchor_dns ='' \n",
-    "        if anchor != '':             \n",
-    "            if anchor_type == 'i':\n",
-    "                anchor_ip = anchor\n",
-    "            elif anchor_type == 'q':\n",
-    "                anchor_dns = anchor\n",
-    "                \n",
-    "            if not os.path.exists(threat_f):  \n",
-    "                with open(threat_f, 'w') as comment:\n",
-    "                    comment.write('ip_dst|dns_qry_name|title|summary\\n')\n",
+    "        \n",
+    "        if anchor_type == 'i':\n",
+    "            anchor_ip = anchor\n",
+    "        elif anchor_type == 'q':\n",
+    "            anchor_dns = anchor\n",
     "            \n",
-    "            with open(threat_f, 'a') as comment:\n",
-    "                comment.write(anchor_ip + '|' + anchor_dns + '|' + tc_txt_title.value + '|' +\n",
-    "                                  tc_txa_summary.value.replace('\\n', '\\\\n') + '\\n') \n",
+    "        if anchor != '':\n",
+    "            mutation=\"\"\"mutation(\n",
+    "                        $date: SpotDateType, \n",
+    "                        $dnsQuery:String, \n",
+    "                        $clientIp:SpotIpType,\n",
+    "                        $text: String!, \n",
+    "                        $title: String!,\n",
+    "                        $threatDetails: [DnsThreatDetailsInputType!]!) \n",
+    "                        {\n",
+    "                          dns{\n",
+    "                            createStoryboard(input:{\n",
+    "                                threatDetails: $threatDetails,\n",
+    "                                date: $date, \n",
+    "                                dnsQuery: $dnsQuery, \n",
+    "                                clientIp: $clientIp,\n",
+    "                                title: $title, \n",
+    "                                text: $text\n",
+    "                                })\n",
+    "                            {success}\n",
+    "                          }\n",
+    "                        }\"\"\"\n",
+    "\n",
+    "            variables={\n",
+    "                'date': datetime.datetime.strptime(t_date, '%Y%m%d').strftime('%Y-%m-%d'),\n",
+    "                'dnsQuery': anchor_dns if anchor_type == 'q' else None, \n",
+    "                'clientIp': anchor_ip if anchor_type == 'i' else None, \n",
+    "                'title': tc_txt_title.value,\n",
+    "                'text': tc_txa_summary.value.replace('\\n', '\\\\n'),\n",
+    "                'threatDetails': expanded_results['data']['dns']['threat']['details']  \n",
+    "            }\n",
+    "\n",
+    "            response = GraphQLClient.request(mutation, variables)\n",
     "\n",
     "            display(Javascript(\"$(\\\"option[data-value='\" + anchor +\"']\\\").remove();\"))   \n",
     "            display(Javascript(\"$('.widget-area > .widget-subarea > .widget-box:gt(0)').remove();\"))\n",

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/1904f2b4/spot-oa/oa/flow/flow_conf.json
----------------------------------------------------------------------
diff --git a/spot-oa/oa/flow/flow_conf.json b/spot-oa/oa/flow/flow_conf.json
index 18e76a7..cd96b08 100644
--- a/spot-oa/oa/flow/flow_conf.json
+++ b/spot-oa/oa/flow/flow_conf.json
@@ -21,20 +21,19 @@
 	    ,"score":17
     },
 	"column_indexes_filter": [0,8,9,10,11,12,13,14,15,16,17],	
-	"flow_score_fields": {
-		"sev": 0
-		,"tstart": 1
-		,"srcIP": 2
-		,"dstIP":3
-		,"sport":4
-		,"dport":5
-		,"proto":6
-		,"ipkt":7
-		,"ibyt":8
-		,"opkt":9
-		,"obyt":10
-		,"score":11
-		,"rank":12
+	"flow_score_fields": { 
+		"tstart": 0
+		,"srcIP": 1
+		,"dstIP":2
+		,"sport":3
+		,"dport":4
+		,"proto":5
+		,"ipkt":6
+		,"ibyt":7
+		,"opkt":8
+		,"obyt":9
+		,"score":10
+		,"rank":11
 	},
 	"flow_feedback_fields": {
 		"sev":0

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/1904f2b4/spot-oa/oa/flow/flow_oa.py
----------------------------------------------------------------------
diff --git a/spot-oa/oa/flow/flow_oa.py b/spot-oa/oa/flow/flow_oa.py
index 027b54f..26e224b 100644
--- a/spot-oa/oa/flow/flow_oa.py
+++ b/spot-oa/oa/flow/flow_oa.py
@@ -24,25 +24,27 @@ import numpy as np
 import linecache, bisect
 import csv
 import pandas as pd
+import subprocess
+import numbers
+import api.resources.hdfs_client as HDFSClient
+import api.resources.impala_engine as impala
 
 from collections import OrderedDict
 from multiprocessing import Process
-from utils import Util,ProgressBar
+from utils import Util, ProgressBar
 from components.data.data import Data
 from components.geoloc.geoloc import GeoLocalization
 from components.reputation.gti import gti
-
 import time
 
 
 class OA(object):
 
-    def __init__(self,date,limit=500,logger=None):       
-       
-       self._initialize_members(date,limit,logger)
-       
-    def _initialize_members(self,date,limit,logger):
-        
+    def __init__(self,date,limit=500,logger=None):
+        self._initialize_members(date,limit,logger)
+
+    def _initialize_members(self,date,limit,logger): 
+
         # get logger if exists. if not, create new instance.
         self._logger = logging.getLogger('OA.Flow') if logger else Util.get_logger('OA.Flow',create_file=False)
 
@@ -57,31 +59,35 @@ class OA(object):
         self._ingest_summary_path = None
         self._flow_scores = []
         self._results_delimiter = '\t'
+        
 
         # get app configuration.
         self._spot_conf = Util.get_spot_conf()
 
-        # get scores fields conf
+        # # get scores fields conf
         conf_file = "{0}/flow_conf.json".format(self._scrtip_path)
-        self._conf = json.loads(open (conf_file).read(),object_pairs_hook=OrderedDict)     
- 
+        self._conf = json.loads(open (conf_file).read(),object_pairs_hook=OrderedDict)
+
         # initialize data engine
         self._db = self._spot_conf.get('conf', 'DBNAME').replace("'", "").replace('"', '')
         self._engine = Data(self._db, self._table_name,self._logger)
-                      
+        
+        
+                
     def start(self):       
         
         ####################
         start = time.time()
-        ####################
+        ####################         
 
         self._create_folder_structure()
+        self._clear_previous_executions()        
         self._add_ipynb()  
         self._get_flow_results()
         self._add_network_context()
         self._add_geo_localization()
         self._add_reputation()        
-        self._create_flow_scores_csv()
+        self._create_flow_scores()
         self._get_oa_details()
         self._ingest_summary()
 
@@ -89,12 +95,34 @@ class OA(object):
         end = time.time()
         print(end - start)
         ##################
-       
-    def _create_folder_structure(self):
+        
+
+    def _clear_previous_executions(self):
+        
+        self._logger.info("Cleaning data from previous executions for the day")       
+        yr = self._date[:4]
+        mn = self._date[4:6]
+        dy = self._date[6:]  
+        table_schema = []
+        HUSER = self._spot_conf.get('conf', 'HUSER').replace("'", "").replace('"', '')
+        table_schema=['suspicious', 'edge','chords','threat_investigation', 'timeline', 'storyboard', 'summary' ] 
+
+        for path in table_schema:
+            HDFSClient.delete_folder("{0}/flow/hive/oa/{1}/y={2}/m={3}/d={4}".format(HUSER,path,yr,mn,dy),user="impala")
+        HDFSClient.delete_folder("{0}/flow/hive/oa/{1}/y={2}/m={3}".format(HUSER,"",yr,mn),user="impala")
+        #removes Feedback file
+        HDFSClient.delete_folder("{0}/{1}/scored_results/{2}{3}{4}/feedback/ml_feedback.csv".format(HUSER,self._table_name,yr,mn,dy))
+        #removes json files from the storyboard
+        HDFSClient.delete_folder("{0}/{1}/oa/{2}/{3}/{4}/{5}".format(HUSER,self._table_name,"storyboard",yr,mn,dy))
+
+        
+
+
+    def _create_folder_structure(self):   
 
-        # create date folder structure if it does not exist.
         self._logger.info("Creating folder structure for OA (data and ipynb)")       
         self._data_path,self._ingest_summary_path,self._ipynb_path = Util.create_oa_folders("flow",self._date)
+ 
 
     def _add_ipynb(self):     
 
@@ -109,6 +137,7 @@ class OA(object):
         else:
             self._logger.error("There was a problem adding the IPython Notebooks, please check the directory exists.")
             
+            
     def _get_flow_results(self):
                
         self._logger.info("Getting {0} Machine Learning Results from HDFS".format(self._date))
@@ -117,8 +146,8 @@ class OA(object):
         # get hdfs path from conf file 
         HUSER = self._spot_conf.get('conf', 'HUSER').replace("'", "").replace('"', '')
         hdfs_path = "{0}/flow/scored_results/{1}/scores/flow_results.csv".format(HUSER,self._date)
-               
-        # get results file from hdfs
+        
+         # get results file from hdfs
         get_command = Util.get_ml_results_form_hdfs(hdfs_path,self._data_path)
         self._logger.info("{0}".format(get_command))
 
@@ -134,36 +163,36 @@ class OA(object):
             self._logger.error("There was an error getting ML results from HDFS")
             sys.exit(1)
 
-        # add headers.        
-        self._logger.info("Adding headers based on configuration file: score_fields.json")
-        self._flow_scores = [ [ str(key) for (key,value) in self._conf['flow_score_fields'].items()] ]
-
-        # filter results add sev and rank.
+        # filter results add rank.
         self._logger.info("Filtering required columns based on configuration")
-        self._flow_scores.extend([ [0] +  [ conn[i] for i in self._conf['column_indexes_filter'] ] + [n] for n, conn in enumerate(self._flow_results) ])
+
+        self._flow_scores.extend([ [ conn[i] for i in self._conf['column_indexes_filter'] ] + [n] for n, conn in enumerate(self._flow_results) ])
      
-    def _create_flow_scores_csv(self):
 
-        flow_scores_csv = "{0}/flow_scores.csv".format(self._data_path)
-        Util.create_csv_file(flow_scores_csv,self._flow_scores)
+    def _create_flow_scores(self):
+
+        # get date parameters.
+        yr = self._date[:4]
+        mn = self._date[4:6]
+        dy = self._date[6:] 
+        value_string = ""
 
-        # create bk file
-        flow_scores_bu_csv = "{0}/flow_scores_bu.csv".format(self._data_path)
-        Util.create_csv_file(flow_scores_bu_csv,self._flow_scores)  
+        for row in self._flow_scores:
+            value_string += str(tuple(Util.cast_val(item) for item in row)) + ","              
+    
+        load_into_impala = ("""
+             INSERT INTO {0}.flow_scores partition(y={2}, m={3}, d={4}) VALUES {1}
+        """).format(self._db, value_string[:-1], yr, mn, dy) 
+        impala.execute_query(load_into_impala)
+ 
 
     def _add_network_context(self):
 
         # use ipranges to see if the IPs are internals.         
         ip_ranges_file = "{0}/context/ipranges.csv".format(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 
-        # add new headers (srcIpInternal/destIpInternal).
-        self._logger.info("Adding network context headers")
-        flow_headers = self._flow_scores[0]
-        flow_headers.extend(["srcIpInternal","destIpInternal"])
-
         # add values to srcIpInternal and destIpInternal.
         flow_scores = iter(self._flow_scores)
-        next(flow_scores)
 
         if os.path.isfile(ip_ranges_file):
 
@@ -184,11 +213,9 @@ class OA(object):
             self._flow_scores = [ conn + [ self._is_ip_internal(conn[src_ip_index],ip_internal_ranges)]+[ self._is_ip_internal(conn[dst_ip_index],ip_internal_ranges)] for conn in flow_scores]
            
         else:
-
-            self._flow_scores = [ conn + ["",""] for conn in flow_scores ]            
+            self._flow_scores = [ conn + [0,0] for conn in flow_scores ]            
             self._logger.info("WARNING: Network context was not added because the file ipranges.csv does not exist.")
         
-        self._flow_scores.insert(0,flow_headers)
 
     def _is_ip_internal(self,ip, ranges):
         result = 0
@@ -204,14 +231,10 @@ class OA(object):
         # use ipranges to see if the IPs are internals.         
         iploc_file = "{0}/context/iploc.csv".format(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 
-        # add new headers (srcIpInternal/destIpInternal).     
         self._logger.info("Adding geo localization headers")
-        flow_headers = self._flow_scores[0]
-        flow_headers.extend(["srcGeo","dstGeo","srcDomain","dstDomain"]) 
 
         # add values to srcIpInternal and destIpInternal.
         flow_scores = iter(self._flow_scores)
-        next(flow_scores)
 
         if os.path.isfile(iploc_file):
 
@@ -241,17 +264,11 @@ class OA(object):
             self._flow_scores = [ conn + ["","","",""] for conn in flow_scores ]   
             self._logger.info("WARNING: IP location was not added because the file {0} does not exist.".format(iploc_file))
 
-        self._flow_scores.insert(0,flow_headers)       
-
+        
     def _add_reputation(self):
         
         reputation_conf_file = "{0}/components/reputation/reputation_config.json".format(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
         
-        # add new headers (gtiSrcRep/gtiDstRep).
-        self._logger.info("Adding reputation headers")
-        flow_headers_rep = self._flow_scores[0]
-        flow_headers_rep.extend(["srcIP_rep","dstIP_rep"])
-        
         # read configuration.
         self._logger.info("Reading reputation configuration file: {0}".format(reputation_conf_file))
         rep_conf = json.loads(open(reputation_conf_file).read())
@@ -268,7 +285,6 @@ class OA(object):
 
             self._logger.info("Getting GTI reputation for src IPs")
             flow_scores_src = iter(self._flow_scores)
-            next(flow_scores_src)
 
             # getting reputation for src IPs
             src_ips = [ conn[src_ip_index] for conn in flow_scores_src ]            
@@ -276,30 +292,25 @@ class OA(object):
 
             self._logger.info("Getting GTI reputation for dst IPs")
             flow_scores_dst = iter(self._flow_scores)
-            next(flow_scores_dst)
 
             # getting reputation for dst IPs            
             dst_ips = [  conn[dst_ip_index] for conn in flow_scores_dst ]
             dst_rep_results = flow_gti.check(dst_ips)
 
             flow_scores_final = iter(self._flow_scores)
-            next(flow_scores_final)
 
             self._flow_scores = []
-            flow_scores = [conn + [src_rep_results[conn[src_ip_index]]] + [dst_rep_results[conn[dst_ip_index]]]  for conn in  flow_scores_final ]
+            flow_scores = [conn + [src_rep_results[conn[src_ip_index]]] + [dst_rep_results[conn[dst_ip_index]]] for conn in flow_scores_final ]
             self._flow_scores = flow_scores           
             
         else:
             # add values to gtiSrcRep and gtiDstRep.
             flow_scores = iter(self._flow_scores)
-            next(flow_scores)
 
             self._flow_scores = [ conn + ["",""] for conn in flow_scores ]   
             self._logger.info("WARNING: IP reputation was not added. No refclient configured")  
 
 
-        self._flow_scores.insert(0,flow_headers_rep)       
-
     def _get_oa_details(self):
 
         self._logger.info("Getting OA Flow suspicious details/chord diagram")
@@ -319,8 +330,6 @@ class OA(object):
         
         # skip header
         sp_connections = iter(self._flow_scores)
-        next(sp_connections)
-      
         # loop connections.
         connections_added = [] 
         for conn in sp_connections:
@@ -330,7 +339,7 @@ class OA(object):
                 continue
             else:
                 connections_added.append(conn)
-           
+            
             src_ip_index = self._conf["flow_score_fields"]["srcIP"]
             dst_ip_index = self._conf["flow_score_fields"]["dstIP"]
 
@@ -340,34 +349,32 @@ class OA(object):
             dip = conn[dst_ip_index]
 
             # get hour and date  (i.e. 2014-07-08 10:10:40)
-            date_array = conn[1].split(' ')
+            
+            date_array = conn[0].split(' ')
             date_array_1 = date_array[0].split('-')
             date_array_2 = date_array[1].split(':')
-
+	    
             yr = date_array_1[0]                   
             dy = date_array_1[2]
             mh = date_array_1[1]
 
             hr = date_array_2[0]
             mm = date_array_2[1]
-        
-            # connection details query.
-            sp_query = ("SELECT treceived as tstart,sip as srcip,dip as dstip,sport as sport,dport as dport,proto as proto,flag as flags,stos as TOS,ibyt as ibytes,ipkt as ipkts,input as input, output as output,rip as rip, obyt as obytes, opkt as opkts from {0}.{1} where ((sip='{2}' AND dip='{3}') or (sip='{3}' AND dip='{2}')) AND y={8} AND m={4} AND d={5} AND h={6} AND trminute={7} order by tstart limit 100")
-                 
-            # sp query.
-            sp_query = sp_query.format(self._db,self._table_name,sip,dip,mh,dy,hr,mm,yr)
-
-            # output file.
-            edge_file = "{0}/edge-{1}-{2}-{3}-{4}.tsv".format(self._data_path,sip.replace(".","_"),dip.replace(".","_"),hr,mm)
+            
+            query_to_load = ("""
+                INSERT INTO TABLE {0}.flow_edge PARTITION (y={2}, m={3}, d={4})
+                SELECT treceived as tstart,sip as srcip,dip as dstip,sport as sport,dport as dport,proto as proto,flag as flags,
+                stos as tos,ibyt as ibyt,ipkt as ipkt, input as input, output as output,rip as rip, obyt as obyt, 
+                opkt as opkt, h as hh, trminute as mn from {0}.{1} where ((sip='{7}' AND dip='{8}') or (sip='{8}' AND dip='{7}')) 
+                AND y={2} AND m={3} AND d={4} AND h={5} AND trminute={6};
+                """).format(self._db,self._table_name,yr, mh, dy, hr, mm, sip,dip)
+            impala.execute_query(query_to_load)
+            
 
-            # execute query
-            self._engine.query(sp_query,output_file=edge_file,delimiter="\\t")
-    
     def _get_chord_details(self,bar=None):
 
          # skip header
         sp_connections = iter(self._flow_scores)
-        next(sp_connections) 
 
         src_ip_index = self._conf["flow_score_fields"]["srcIP"]
         dst_ip_index = self._conf["flow_score_fields"]["dstIP"] 
@@ -389,69 +396,51 @@ class OA(object):
             if n > 1:
                 ip_list = []                
                 sp_connections = iter(self._flow_scores)
-                next(sp_connections)
                 for row in sp_connections:                    
-                    if ip == row[2] : ip_list.append(row[3])
-                    if ip == row[3] :ip_list.append(row[2])    
+                    if ip == row[1] : ip_list.append(row[2])
+                    if ip == row[2] :ip_list.append(row[1])    
                 ips = list(set(ip_list))
              
                 if len(ips) > 1:
                     ips_filter = (",".join(str("'{0}'".format(ip)) for ip in ips))
-                    chord_file = "{0}/chord-{1}.tsv".format(self._data_path,ip.replace(".","_"))                     
-                    ch_query = ("SELECT sip as srcip, dip as dstip, SUM(ibyt) as ibytes, SUM(ipkt) as ipkts from {0}.{1} where y={2} and m={3} \
-                        and d={4} and ( (sip='{5}' and dip IN({6})) or (sip IN({6}) and dip='{5}') ) group by sip,dip")
-                    self._engine.query(ch_query.format(self._db,self._table_name,yr,mn,dy,ip,ips_filter),chord_file,delimiter="\\t")
+ 
+                    query_to_load = ("""
+                        INSERT INTO TABLE {0}.flow_chords PARTITION (y={2}, m={3}, d={4})
+                        SELECT '{5}' as ip_threat, sip as srcip, dip as dstip, SUM(ibyt) as ibyt, SUM(ipkt) as ipkt from {0}.{1} where y={2} and m={3}
+                        and d={4} and ((sip='{5}' and dip IN({6})) or (sip IN({6}) and dip='{5}')) group by sip,dip,m,d;
+                        """).format(self._db,self._table_name,yr,mn,dy,ip,ips_filter)
+
+                    impala.execute_query(query_to_load)
+ 
 
-     
     def _ingest_summary(self): 
         # get date parameters.
         yr = self._date[:4]
         mn = self._date[4:6]
         dy = self._date[6:]
 
-        self._logger.info("Getting ingest summary data for the day")
+        self._logger.info("Getting ingest summary data for the day") 
+
+        query_to_load = ("""
+            INSERT INTO TABLE {0}.flow_ingest_summary PARTITION (y={2}, m={3})
+            SELECT treceived,tryear, trmonth, trday, trhour, trminute, COUNT(*) total
+            FROM {0}.{1}
+            WHERE y={2} AND m={3} AND d={4}
+            AND unix_tstamp IS NOT NULL
+            AND sip IS NOT NULL
+            AND sport IS NOT NULL
+            AND dip IS NOT NULL
+            AND dport IS NOT NULL
+            AND ibyt IS NOT NULL
+            AND ipkt IS NOT NULL
+            AND cast(treceived as timestamp) IS NOT NULL
+            GROUP BY treceived,tryear, trmonth, trday, trhour, trminute;
+            """).format(self._db,self._table_name,yr,mn,dy) 
         
-        ingest_summary_cols = ["date","total"]		
-        result_rows = []       
-        df_filtered =  pd.DataFrame() 
-
-        ingest_summary_file = "{0}/is_{1}{2}.csv".format(self._ingest_summary_path,yr,mn)			
-        ingest_summary_tmp = "{0}.tmp".format(ingest_summary_file)
-        if os.path.isfile(ingest_summary_file):
-            df = pd.read_csv(ingest_summary_file, delimiter=',',names=ingest_summary_cols, skiprows=1)
-            df_filtered = df[df['date'].str.contains("{0}-{1}-{2}".format(yr, mn, dy)) == False] 
-        else:
-            df = pd.DataFrame()
-        
-        # get ingest summary.           
-        ingest_summary_qry = ("SELECT tryear, trmonth, trday, trhour, trminute, COUNT(*) total"
-                            " FROM {0}.{1} "
-                            " WHERE "
-                            " y={2} "
-                            " AND m={3} "
-                            " AND d={4} "
-                            " AND unix_tstamp IS NOT NULL AND sip IS NOT NULL "
-                            " AND sport IS NOT NULL AND dip IS NOT NULL "
-                            " AND dport IS NOT NULL AND ibyt IS NOT NULL "
-                            " AND ipkt IS NOT NULL "
-                            " GROUP BY tryear, trmonth, trday, trhour, trminute;")
-
-
-        ingest_summary_qry = ingest_summary_qry.format(self._db,self._table_name, yr, mn, dy)
-        results_file = "{0}/results_{1}.csv".format(self._ingest_summary_path,self._date)
-        self._engine.query(ingest_summary_qry,output_file=results_file,delimiter=",")
-
-        if os.path.isfile(results_file):
-            result_rows = pd.read_csv(results_file, delimiter=',') 
-
-            df_new = pd.DataFrame([["{0}-{1}-{2} {3}:{4}".format(yr, mn, dy, str(val['trhour']).zfill(2), str(val['trminute']).zfill(2)), int(val[5])] for key,val in result_rows.iterrows()],columns = ingest_summary_cols)						
-
-            df_filtered = df_filtered.append(df_new, ignore_index=True)
-            df_filtered.to_csv(ingest_summary_tmp,sep=',', index=False)
-
-            os.remove(results_file)
-            os.rename(ingest_summary_tmp,ingest_summary_file)
-        else:
-            self._logger.info("No data found for the ingest summary")
+        impala.execute_query(query_to_load)
+
+
 
-        
\ No newline at end of file
+ 
+
+        

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/1904f2b4/spot-oa/oa/flow/ipynb_templates/Edge_Investigation_master.ipynb
----------------------------------------------------------------------
diff --git a/spot-oa/oa/flow/ipynb_templates/Edge_Investigation_master.ipynb b/spot-oa/oa/flow/ipynb_templates/Edge_Investigation_master.ipynb
index f4536a3..ab41963 100644
--- a/spot-oa/oa/flow/ipynb_templates/Edge_Investigation_master.ipynb
+++ b/spot-oa/oa/flow/ipynb_templates/Edge_Investigation_master.ipynb
@@ -40,21 +40,16 @@
     "dsource = path[len(path)-2]  \n",
     "dpath = '/'.join(['data' if var == 'ipynb' else var for var in path]) + '/'\n",
     "cpath = '/'.join(['context' if var == 'ipynb' else var for var in path][:len(path)-2]) + '/'\n",
-    "opath = '/'.join(['oa' if var == 'ipynb' else var for var in path][:len(path)-1]) + '/'\n",
-    "sconnect = dpath + 'flow_scores.csv' \n",
-    "sconnectbu = dpath + 'flow_scores_bu.csv'\n",
-    "score_fbk = dpath + 'flow_scores_fb.csv'\n",
-    "tmpconnect = sconnect +'.tmp'\n",
-    "stemp = sconnect + '.new'\n",
-    "file_schemas = opath + dsource + '_conf.json'\n",
-    "#gets feedback columns from config file\n",
-    "feedback_cols = json.loads(open (file_schemas).read(),object_pairs_hook=OrderedDict)['flow_feedback_fields']\n",
+    "opath = '/'.join(['oa' if var == 'ipynb' else var for var in path][:len(path)-1]) + '/'  \n",
+    "\n",
     "coff = 250;\n",
     "nwloc = cpath + 'networkcontext.csv' \n",
     "srcdict,srclist = {},[]\n",
     "dstdict,dstlist = {},[]\n",
     "sportdict,sportlist = {},[]\n",
-    "dportdict,dportlist = {},[]"
+    "dportdict,dportlist = {},[]\n",
+    "global svals\n",
+    "svals = []"
    ]
   },
   {
@@ -87,8 +82,7 @@
     "    srclist.append('- Select -')\n",
     "    dstlist.append('- Select -')\n",
     "    sportlist.append('- Select -')\n",
-    "    dportlist.append('- Select -')\n",
-    "    set_rules()\n",
+    "    dportlist.append('- Select -')\n", 
     "    \n",
     "    response = GraphQLClient.request(\n",
     "        query=\"\"\"query($date:SpotDateType!) {\n",
@@ -98,7 +92,6 @@
     "                    sport: srcPort\n",
     "                    dstIP: dstIp\n",
     "                    dport: dstPort\n",
-    "                    sev\n",
     "                }\n",
     "            }\n",
     "        }\"\"\",\n",
@@ -108,22 +101,24 @@
     "    )\n",
     "\n",
     "    rowct = 1\n",
-    "    for row in response['data']['flow']['suspicious']:\n",
-    "        if row['srcIP'] not in srcdict and row['sev'] == 0:\n",
-    "            srclist.append(row['srcIP'])\n",
-    "            srcdict[row['srcIP']] = struct.unpack(\"!L\", socket.inet_aton(row['srcIP']))[0]\n",
-    "        if row['dstIP'] not in dstdict and row['sev'] == 0:\n",
-    "            dstlist.append(row['dstIP'])\n",
-    "            dstdict[row['dstIP']] = struct.unpack(\"!L\", socket.inet_aton(row['dstIP']))[0]\n",
-    "        if row['sport'] not in sportdict and row['sev'] == 0:\n",
-    "            sportlist.append(row['sport'])\n",
-    "            sportdict[row['sport']] = row['sport']\n",
-    "        if row['dport'] not in dportdict and row['sev'] == 0:\n",
-    "            dportlist.append(row['dport'])\n",
-    "            dportdict[row['dport']] = row['dport']\n",
-    "        if rowct == coff:\n",
-    "            break;\n",
-    "        rowct += 1\n",
+    "    if not 'errors' in response:\n",
+    "        for row in response['data']['flow']['suspicious']:\n",
+    "            if row['srcIP'] not in srcdict:\n",
+    "                srclist.append(row['srcIP'])\n",
+    "                srcdict[row['srcIP']] = struct.unpack(\"!L\", socket.inet_aton(row['srcIP']))[0]\n",
+    "            if row['dstIP'] not in dstdict:\n",
+    "                dstlist.append(row['dstIP'])\n",
+    "                dstdict[row['dstIP']] = struct.unpack(\"!L\", socket.inet_aton(row['dstIP']))[0]\n",
+    "            if row['sport'] not in sportdict:\n",
+    "                sportlist.append(str(row['sport']))\n",
+    "                sportdict[row['sport']] = row['sport']\n",
+    "            if row['dport'] not in dportdict:\n",
+    "                dportlist.append(str(row['dport']))\n",
+    "                dportdict[row['dport']] = row['dport']\n",
+    "            if rowct == coff:\n",
+    "                break;\n",
+    "            rowct += 1\n",
+    "     \n",
     "    \n",
     "    # Source IP box\n",
     "    scrIpLalbel = widgets.HTML(value=\"Source IP:\", height='10%', width='100%')\n",
@@ -178,217 +173,64 @@
     "    \n",
     "    def update_sconnects(b):\n",
     "        clear_output()\n",
-    "        time.sleep(.25)\n",
-    "        dvals,svals = [], [] \n",
-    "        scored_threats =[]\n",
-    "        #define logic based on combo of input\n",
     "        #Gets input values\n",
+    "        global svals\n",
     "        if srctext.value != '':\n",
-    "            svals = [srctext.value,dstselect.value,sportselect.value,dportselect.value]\n",
-    "            dvals = [srcselect.value,srctext.value,sportselect.value,dportselect.value] \n",
+    "            svals.append([srctext.value,dstselect.value,sportselect.value,dportselect.value, ratingbut.value])\n",
+    "            svals.append([srcselect.value,srctext.value,sportselect.value,dportselect.value, ratingbut.value])\n",
     "        else:\n",
-    "            svals = [srcselect.value,dstselect.value,sportselect.value,dportselect.value]\n",
-    "            dvals = [] \n",
-    "        risk = ratingbut.value \n",
-    "        shash, dhash = 0, 0\n",
-    "        fhash = ['srcIP','dstIP','sport','dport'] \n",
-    "        \n",
-    "        for k in xrange(len(svals)):\n",
-    "            if svals[k] == '- Select -': svals[k] = ''\n",
-    "            if svals[k] != '': shash += 2**k    \n",
-    "            if len(dvals) > 0:\n",
-    "                if dvals[k] == '- Select -': dvals[k] = ''\n",
-    "                if dvals[k] != '': dhash += 2**k    \n",
-    "        \n",
-    "        rowct = 0\n",
-    "        threat = []\n",
-    "        if shash > 0 or dhash > 0:            \n",
-    "            with open(tmpconnect,'w') as g:\n",
-    "                with open(sconnect, 'r') as f:\n",
-    "                    reader = csv.DictReader(f,delimiter=',')\n",
-    "                    riter = csv.DictWriter(g,delimiter=',', fieldnames=reader.fieldnames)\n",
-    "                    riter.writeheader()\n",
-    "                    \n",
-    "                    for row in reader: \n",
-    "                        result, resultd = 0,0\n",
-    "                        for n in xrange(0,len(svals)):\n",
-    "                            if (2**n & shash) > 0:  \n",
-    "                                if row[fhash[n]] == svals[n]:\n",
-    "                                    result += 2**n \n",
-    "                        if result == shash:\n",
-    "                            row['sev'] = risk \n",
-    "                            scored_threats.append({col:row[col] for col in feedback_cols.keys()})\n",
-    "                            rowct += 1\n",
-    "\n",
-    "                        if len(dvals) > 0:\n",
-    "                            for n in xrange(0,len(dvals)):\n",
-    "                                if (2**n & dhash) > 0:  \n",
-    "                                    if row[fhash[n]] == dvals[n]:\n",
-    "                                        resultd += 2**n \n",
-    "                            if resultd == dhash:\n",
-    "                                row['sev'] = risk\n",
-    "                                scored_threats.append({col:row[col] for col in feedback_cols.keys()})\n",
-    "                                rowct += 1\n",
-    "                                \n",
-    "                        riter.writerow(row) \n",
+    "            svals.append([srcselect.value,dstselect.value,sportselect.value,dportselect.value, ratingbut.value])\n",
+    "     \n",
+    "        if srcselect.value != \"- Select -\":\n",
+    "            display(Javascript(\"$(\\\"option[data-value='\" + srcselect.value +\"']\\\").remove();\"))\n",
+    "        if dstselect.value != \"- Select -\":\n",
+    "            display(Javascript(\"$(\\\"option[data-value='\" + srcselect.value +\"']\\\").remove();\"))\n",
+    "        if sportselect.value != \"- Select -\":\n",
+    "            display(Javascript(\"$(\\\"option[data-value='\" + srcselect.value +\"']\\\").remove();\"))\n",
+    "        if dportselect.value != \"- Select -\":\n",
+    "            display(Javascript(\"$(\\\"option[data-value='\" + dportselect.value +\"']\\\").remove();\"))\n",
     "\n",
-    "            create_feedback_file(scored_threats)\n",
-    "            shutil.copyfile(tmpconnect,sconnect)\n",
-    "            \n",
-    "        print \"{0} matching connections scored\".format(rowct)\n",
     "        \n",
     "            \n",
     "    def savesort(b):\n",
+    "        global svals\n",
     "        clear_output()\n",
-    "        with open(stemp,'w') as g:\n",
-    "            reader = csv.DictReader(open(sconnect), delimiter=\",\")\n",
-    "            riter = csv.DictWriter(g,fieldnames=reader.fieldnames, delimiter=',')\n",
-    "            srtlist = sorted(reader, key=lambda x: (int(x[\"sev\"]), float(x[\"score\"])))\n",
-    "            riter.writeheader()\n",
-    "            riter.writerows(srtlist)\n",
-    "                \n",
-    "        shutil.copyfile(stemp,sconnect)\n",
-    "        print \"Suspicious connects successfully updated\"        \n",
-    "        display(Javascript('reloadParentData();')) \n",
-    "        bigBox.close()\n",
-    "        # Rebuild widgets form\n",
-    "        displaythis()\n",
-    "        ml_feedback()\n",
+    "        variables = []\n",
+    "        mutation=\"\"\"mutation($input:[NetflowScoreInputType!]!)\n",
+    "                {\n",
+    "                  flow{\n",
+    "                    score(input:$input)\n",
+    "                        {success}\n",
+    "                  }\n",
+    "                }\"\"\"\n",
+    "        \n",
+    "        for row in svals:\n",
+    "            variables.append({\n",
+    "                'date': datetime.datetime.strptime(date, '%Y%m%d').strftime('%Y-%m-%d'),\n",
+    "                'score': row[4],\n",
+    "                'srcIp': row[0] if row[0] != '- Select -' else None,\n",
+    "                'dstIp': row[1] if row[1] != '- Select -' else None,\n",
+    "                'srcPort': row[2] if row[2] != '- Select -' else None,\n",
+    "                'dstPort': row[3]  if row[3] != '- Select -' else None\n",
+    "                })\n",
+    "        \n",
+    "        var = {'input':variables}\n",
+    "        response = GraphQLClient.request(mutation,var)\n",
+    "         \n",
+    "        svals = []\n",
+    "        if not 'errors' in response :\n",
+    "            print \"Suspicious connects successfully updated\"        \n",
+    "            display(Javascript('reloadParentData();')) \n",
+    "            bigBox.close()\n",
+    "            # Rebuild widgets form\n",
+    "            displaythis() \n",
+    "        else:\n",
+    "            print \"An error ocurred whith the scoring process\"\n",
+    "            print response['errors'][0]['message']\n",
+    "        \n",
     "    assignbut.on_click(update_sconnects)\n",
     "    updatebut.on_click(savesort)\n",
-    "\n",
-    "    \n",
-    "def create_feedback_file(scored_rows):\n",
-    "#     #works on the feedback tab-separated file\n",
-    "    if not os.path.exists(score_fbk):  \n",
-    "        with open(score_fbk, 'w') as feedback:\n",
-    "            wr = csv.DictWriter(feedback, fieldnames=feedback_cols, delimiter='\\t', quoting=csv.QUOTE_NONE)   \n",
-    "            wr.writeheader()\n",
-    "\n",
-    "    wr = csv.DictWriter(open(score_fbk, 'a'), delimiter='\\t', fieldnames=feedback_cols, quoting=csv.QUOTE_NONE)\n",
-    "    for row in scored_rows:\n",
-    "        wr.writerow(row)\n",
-    "\n",
-    "\n",
-    "def set_rules():\n",
-    "    rops = ['leq','leq','leq','leq','leq','leq']\n",
-    "    rvals = ['','','',1024,'',54]\n",
-    "    risk = 2\n",
-    "    apply_rules(rops,rvals,risk)\n",
-    "    rops = ['leq','leq','leq','leq','eq','eq']\n",
-    "    rvals = ['','','',1024,3,152]\n",
-    "    risk = 2\n",
-    "    apply_rules(rops,rvals,risk)\n",
-    "    rops = ['leq','leq','leq','leq','eq','eq']\n",
-    "    rvals = ['','','',1024,2,104]\n",
-    "    risk = 2\n",
-    "    rops = ['leq','leq','eq','leq','leq','leq']\n",
-    "    rvals = ['','',0,1023,'','']\n",
-    "    risk = 2\n",
-    "    apply_rules(rops,rvals,risk)\n",
-    "\n",
-    "    \n",
-    "    \n",
-    "def apply_rules(rops,rvals,risk):\n",
-    "    #define logic based on combo of input\n",
-    "    rhash = 0\n",
-    "    rfhash = ['srcIP','dstIP','sport','dport', 'ipkt', 'ibyt']\n",
-    "    scored_threats=[]\n",
-    "    \n",
-    "    for k in xrange(len(rvals)):\n",
-    "        if rvals[k] != '':                \n",
-    "            rhash += 2**k\n",
-    "            \n",
-    "    with open(sconnect, 'r') as f:\n",
-    "        with open(tmpconnect,'w') as g:\n",
-    "            reader = csv.DictReader(f,delimiter=',')\n",
-    "            riter = csv.DictWriter(g,fieldnames=reader.fieldnames,delimiter=',')\n",
-    "            riter.writeheader()\n",
-    "            for row in reader: \n",
-    "                result = 0\n",
-    "                for n in xrange(0,len(rvals)):\n",
-    "                    if (2**n & rhash) > 0:\n",
-    "                        if rops[n] == 'leq':\n",
-    "                            if int(row[rfhash[n]]) <= int(rvals[n]):\n",
-    "                                result += 2**n                           \n",
-    "                        if rops[n] == 'eq':\n",
-    "                            if int(row[rfhash[n]]) == int(rvals[n]):\n",
-    "                                result += 2**n                           \n",
-    "                if result == rhash:\n",
-    "                    row['sev'] = risk\n",
-    "                    scored_threats.append({col:row[col] for col in feedback_cols.keys()})\n",
-    "                riter.writerow(row)  \n",
-    "                \n",
-    "    create_feedback_file(scored_threats)\n",
-    "    shutil.copyfile(tmpconnect,sconnect)\n",
-    "    \n",
-    "    \n",
-    "def attack_heuristics():\n",
-    "    with open(sconnect, 'rb') as f:\n",
-    "        reader = csv.DictReader(f,delimiter=',') \n",
-    "        reader.next();\n",
-    "        rowct = 1\n",
-    "        for row in reader:\n",
-    "            if row['srcIP'] not in srcdict:\n",
-    "                srcdict[row['srcIP']] = row['srcIP']\n",
-    "            if row['dstIP'] not in dstdict:\n",
-    "                 dstdict[row['dstIP']] = row['dstIP']\n",
-    "            if row['sport'] not in sportdict:\n",
-    "                sportdict[row['sport']] = row['sport']\n",
-    "            if row['dport'] not in dportdict:\n",
-    "                dportdict[row['dport']] = row['dport']\n",
-    "\n",
-    "    df = pd.read_csv(sconnect)   \n",
-    "    gb = df.groupby([u'srcIP'])      \n",
-    "  \n",
-    "    for srcip in srcdict:\n",
-    "        try:\n",
-    "            if len(gb.get_group(srcip)) > 20:\n",
-    "                print srcip,'connects:',len(gb.get_group(srcip))\n",
-    "        except:\n",
-    "            print \"Key Error for ip: \" + srcip\n",
-    "               \n",
-    "            \n",
-    "def ml_feedback():\n",
-    "    dst_name = os.path.basename(sconnect)\n",
-    "    str_fb=\"DSOURCE={0} &&\\\n",
-    "        FDATE={1} &&\\\n",
-    "        source /etc/spot.conf &&\\\n",
-    "        usr=$(echo $LUSER | cut -f3 -d'/') &&\\\n",
-    "        mlnode=$MLNODE &&\\\n",
-    "        lpath=$LPATH &&\\\n",
-    "        scp {2} $usr@$mlnode:$lpath/{3}\".format(dsource,date,score_fbk,dst_name)  \n",
-    "    \n",
-    "    subprocess.call(str_fb, shell=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Run attack heuristics."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "# set_rules()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "# attack_heuristics()"
+    "    "
    ]
   },
   {


[05/50] [abbrv] incubator-spot git commit: remove files from HDFS/HIVE

Posted by ev...@apache.org.
remove files from HDFS/HIVE


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/b85e3270
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/b85e3270
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/b85e3270

Branch: refs/heads/SPOT-35_graphql_api
Commit: b85e32701c20dabc776627e463fd8e278447ecf7
Parents: 1904f2b
Author: Everardo Lopez Sandoval (Intel) <el...@elopezsa-mac02.zpn.intel.com>
Authored: Mon Mar 6 12:27:40 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:47 2017 -0700

----------------------------------------------------------------------
 spot-oa/oa/dns/dns_oa.py     | 20 ++++++++++++--------
 spot-oa/oa/flow/flow_oa.py   | 14 +++++---------
 spot-oa/oa/proxy/proxy_oa.py | 10 +++++-----
 3 files changed, 22 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/b85e3270/spot-oa/oa/dns/dns_oa.py
----------------------------------------------------------------------
diff --git a/spot-oa/oa/dns/dns_oa.py b/spot-oa/oa/dns/dns_oa.py
index 8d3ce80..f72f5a4 100644
--- a/spot-oa/oa/dns/dns_oa.py
+++ b/spot-oa/oa/dns/dns_oa.py
@@ -105,8 +105,10 @@ class OA(object):
         table_schema=['suspicious', 'edge', 'dendro', 'threat_dendro', 'threat_investigation', 'storyboard' ]
 
         for path in table_schema:
-            HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}/d={5}".format(HUSER,self._table_name,path,yr,mn,dy),user="impala")
-        HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}".format(HUSER,self._table_name,"summary",yr,mn),user="impala")
+            HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}/d={5}".format(HUSER,self._table_name,path,yr,int(mn),int(dy)),user="impala")
+        HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}".format(HUSER,self._table_name,"summary",yr,int(mn)),user="impala")
+        impala.execute_query("invalidate metadata")
+
         #removes Feedback file
         HDFSClient.delete_folder("{0}/{1}/scored_results/{2}{3}{4}/feedback/ml_feedback.csv".format(HUSER,self._table_name,yr,mn,dy))
         #removes json files from the storyboard
@@ -334,9 +336,10 @@ class OA(object):
                 dns_details = [ conn + (dns_nc.get_nc(conn[2]),) for conn in dns_details ]
             else:
                 dns_details = [ conn + (0,) for conn in dns_details ]
-                         
+            
+            # value_string += str(tuple(row) for row in dns_details) + ","              
             for row in dns_details:
-                value_string += str(tuple(item for item in row)) + ","   
+                value_string += str(tuple(item for item in row)) + ","
 
             if value_string != "": 
                 
@@ -350,9 +353,10 @@ class OA(object):
     def _get_dns_dendrogram(self): 
 
         for conn in self._dns_scores:   
-            timestamp = conn[self._conf["dns_score_fields"]["unix_tstamp"]]
-            
+            timestamp = conn[self._conf["dns_score_fields"]["unix_tstamp"]]         
+
             full_date = datetime.datetime.utcfromtimestamp(int(timestamp)).strftime('%Y-%m-%d %H:%M:%S')
+
             date = full_date.split(" ")[0].split("-")
             # get date parameters.
             
@@ -408,7 +412,7 @@ class OA(object):
         if len(df_final) > 0:
             query_to_insert=("""
                 INSERT INTO {0}.dns_ingest_summary PARTITION (y={1}, m={2}) VALUES {3};
-            """).format(self._db, yr, mn, tuple(df_final))
-            
+            """).format(self._db, yr, mn, tuple(df_final))            
             impala.execute_query(query_to_insert)  
+
         

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/b85e3270/spot-oa/oa/flow/flow_oa.py
----------------------------------------------------------------------
diff --git a/spot-oa/oa/flow/flow_oa.py b/spot-oa/oa/flow/flow_oa.py
index 26e224b..0eb3e22 100644
--- a/spot-oa/oa/flow/flow_oa.py
+++ b/spot-oa/oa/flow/flow_oa.py
@@ -69,10 +69,7 @@ class OA(object):
         self._conf = json.loads(open (conf_file).read(),object_pairs_hook=OrderedDict)
 
         # initialize data engine
-        self._db = self._spot_conf.get('conf', 'DBNAME').replace("'", "").replace('"', '')
-        self._engine = Data(self._db, self._table_name,self._logger)
-        
-        
+        self._db = self._spot_conf.get('conf', 'DBNAME').replace("'", "").replace('"', '')        
                 
     def start(self):       
         
@@ -108,16 +105,15 @@ class OA(object):
         table_schema=['suspicious', 'edge','chords','threat_investigation', 'timeline', 'storyboard', 'summary' ] 
 
         for path in table_schema:
-            HDFSClient.delete_folder("{0}/flow/hive/oa/{1}/y={2}/m={3}/d={4}".format(HUSER,path,yr,mn,dy),user="impala")
-        HDFSClient.delete_folder("{0}/flow/hive/oa/{1}/y={2}/m={3}".format(HUSER,"",yr,mn),user="impala")
+            HDFSClient.delete_folder("{0}/flow/hive/oa/{1}/y={2}/m={3}/d={4}".format(HUSER,path,yr,int(mn),int(dy)),user="impala")
+       
+        HDFSClient.delete_folder("{0}/flow/hive/oa/{1}/y={2}/m={3}".format(HUSER,"summary",yr,int(mn)),user="impala")
+        impala.execute_query("invalidate metadata")
         #removes Feedback file
         HDFSClient.delete_folder("{0}/{1}/scored_results/{2}{3}{4}/feedback/ml_feedback.csv".format(HUSER,self._table_name,yr,mn,dy))
         #removes json files from the storyboard
         HDFSClient.delete_folder("{0}/{1}/oa/{2}/{3}/{4}/{5}".format(HUSER,self._table_name,"storyboard",yr,mn,dy))
 
-        
-
-
     def _create_folder_structure(self):   
 
         self._logger.info("Creating folder structure for OA (data and ipynb)")       

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/b85e3270/spot-oa/oa/proxy/proxy_oa.py
----------------------------------------------------------------------
diff --git a/spot-oa/oa/proxy/proxy_oa.py b/spot-oa/oa/proxy/proxy_oa.py
index 02a9297..d54219e 100644
--- a/spot-oa/oa/proxy/proxy_oa.py
+++ b/spot-oa/oa/proxy/proxy_oa.py
@@ -72,7 +72,6 @@ class OA(object):
 
         # initialize data engine
         self._db = self._spot_conf.get('conf', 'DBNAME').replace("'", "").replace('"', '')
-        self._engine = Data(self._db, self._table_name,self._logger)
 
 
     def start(self):
@@ -116,9 +115,10 @@ class OA(object):
         table_schema=['suspicious', 'edge','threat_investigation', 'timeline', 'storyboard', 'summary' ] 
 
         for path in table_schema:
-            HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}/d={5}".format(HUSER,self._table_name,path,yr,mn,dy),user="impala")
-        
-        HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}".format(HUSER,self._table_name,"summary",yr,mn),user="impala")
+            HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}/d={5}".format(HUSER,self._table_name,path,yr,int(mn),int(dy)),user="impala")        
+        HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}".format(HUSER,self._table_name,"summary",yr,int(mn)),user="impala")
+        impala.execute_query("invalidate metadata")
+
         #removes Feedback file
         HDFSClient.delete_folder("{0}/{1}/scored_results/{2}{3}{4}/feedback/ml_feedback.csv".format(HUSER,self._table_name,yr,mn,dy))
         #removes json files from the storyboard
@@ -354,4 +354,4 @@ class OA(object):
                 
         else:
             self._logger.info("No data found for the ingest summary")
-        
\ No newline at end of file
+        


[06/50] [abbrv] incubator-spot git commit: Flow GraphQL fixes

Posted by ev...@apache.org.
Flow GraphQL fixes


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/6c1f3001
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/6c1f3001
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/6c1f3001

Branch: refs/heads/SPOT-35_graphql_api
Commit: 6c1f300165429af9f7434fd0b8a0e3d41d980455
Parents: f37bb40
Author: Diego Ortiz <di...@intel.com>
Authored: Mon Mar 6 10:53:59 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:47 2017 -0700

----------------------------------------------------------------------
 spot-oa/api/graphql/flow/mutation.py | 2 +-
 spot-oa/api/graphql/flow/query.py    | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/6c1f3001/spot-oa/api/graphql/flow/mutation.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/graphql/flow/mutation.py b/spot-oa/api/graphql/flow/mutation.py
index ce016d2..d4f5d7f 100644
--- a/spot-oa/api/graphql/flow/mutation.py
+++ b/spot-oa/api/graphql/flow/mutation.py
@@ -149,7 +149,7 @@ def _create_storyboard(args):
 
     result = Flow.create_storyboard(date=_date, ip=ip, title=title, text=text, expanded_search=threat_details, top_results=first)
 
-    return {'sucess': result}
+    return {'success': result}
 
 MutationType = GraphQLObjectType(
     name='NetflowMutationType',

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/6c1f3001/spot-oa/api/graphql/flow/query.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/graphql/flow/query.py b/spot-oa/api/graphql/flow/query.py
index 91395ee..138ee98 100644
--- a/spot-oa/api/graphql/flow/query.py
+++ b/spot-oa/api/graphql/flow/query.py
@@ -624,8 +624,7 @@ QueryType = GraphQLObjectType(
                     description='End date'
                 )
             },
-            resolver=lambda root, args, *
-            _: Flow.ingest_summary(start_date=args.get('startDate'), end_date=args.get('endDate'))
+            resolver=lambda root, args, *_: Flow.ingest_summary(start_date=args.get('startDate'), end_date=args.get('endDate'))
         )
     }
 )


[19/50] [abbrv] incubator-spot git commit: Include parent packages in oa moudle

Posted by ev...@apache.org.
Include parent packages in oa moudle


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/b165f5ee
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/b165f5ee
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/b165f5ee

Branch: refs/heads/SPOT-35_graphql_api
Commit: b165f5eecb2073bad1020a8f7ac32f8e825a6eb1
Parents: 3386849
Author: Everardo Lopez Sandoval (Intel) <el...@elopezsa-mac02.local>
Authored: Tue Mar 7 09:50:22 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:48 2017 -0700

----------------------------------------------------------------------
 spot-oa/oa/start_oa.py | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/b165f5ee/spot-oa/oa/start_oa.py
----------------------------------------------------------------------
diff --git a/spot-oa/oa/start_oa.py b/spot-oa/oa/start_oa.py
index b604aab..660814f 100644
--- a/spot-oa/oa/start_oa.py
+++ b/spot-oa/oa/start_oa.py
@@ -20,6 +20,7 @@
 import argparse
 import os
 import sys
+sys.path.append("../")
 import logging
 
 from utils import Util
@@ -39,43 +40,43 @@ def main():
     start_oa(args)
 
 def start_oa(args):
-    
-    # setup the main logger for all the OA process.    
+
+    # setup the main logger for all the OA process.
     logger = Util.get_logger('OA',create_file=False)
 
-    logger.info("-------------------- STARTING OA ---------------------")   
-    validate_parameters_values(args,logger)   
+    logger.info("-------------------- STARTING OA ---------------------")
+    validate_parameters_values(args,logger)
 
     # create data type instance.
     module = __import__("{0}.{0}_oa".format(args.type),fromlist=['OA'])
-   
-    # start OA.   
+
+    # start OA.
     oa_process = module.OA(args.date,args.limit,logger)
     oa_process.start()
-  
+
 def validate_parameters_values(args,logger):
-    
+
     logger.info("Validating input parameter values")
 
     #date.
-    is_date_ok = True if len(args.date) == 8 else False    
+    is_date_ok = True if len(args.date) == 8 else False
 
     # type
     dirs = os.walk(script_path).next()[1]
     is_type_ok = True if args.type in dirs else False
-   
-    #limit    
+
+    #limit
     try:
         int(args.limit)
         is_limit_ok = True
     except ValueError:
         is_limit_ok = False
-      
-    if not is_date_ok: logger.error("date parameter is not correct, please validate it") 
+
+    if not is_date_ok: logger.error("date parameter is not correct, please validate it")
     if not is_type_ok: logger.error("type parameter is not supported, please select a valid type")
     if not is_limit_ok: logger.error("limit parameter is not correct, please select a valid limit")
     if not is_date_ok or not is_type_ok or not is_limit_ok: sys.exit(1)
-   
+
 
 if __name__=='__main__':
-    main()
\ No newline at end of file
+    main()


[41/50] [abbrv] incubator-spot git commit: fix DNS dendrogram data

Posted by ev...@apache.org.
fix DNS dendrogram data


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/8bab8f03
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/8bab8f03
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/8bab8f03

Branch: refs/heads/SPOT-35_graphql_api
Commit: 8bab8f03fab41562beb7f79396ee1f7e66a95ede
Parents: 03639dd
Author: Everardo Lopez Sandoval (Intel) <el...@elopezsa-mac02.local>
Authored: Mon Mar 13 10:02:35 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:51:23 2017 -0700

----------------------------------------------------------------------
 spot-oa/api/resources/dns.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/8bab8f03/spot-oa/api/resources/dns.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/dns.py b/spot-oa/api/resources/dns.py
index ed63c84..a2a8fea 100644
--- a/spot-oa/api/resources/dns.py
+++ b/spot-oa/api/resources/dns.py
@@ -85,7 +85,7 @@ def incident_progression(date, query,ip):
         return None
 
     db = Configuration.db()
-    return_value = "dns_qry_name" if query else "ip_dst"
+    return_value = "dns_qry_name" if ip else "ip_dst"
     dns_threat_query = ("""
             SELECT
                 anchor,total,{0}
@@ -96,7 +96,7 @@ def incident_progression(date, query,ip):
                 AND anchor = '{5}'
             """).format(return_value,db,date.year,date.month,date.day,\
             query if query else ip)
-
+                
     return ImpalaEngine.execute_query_as_list(dns_threat_query)
 
 """


[15/50] [abbrv] incubator-spot git commit: Updated clear_previous_executions method to delete data from summary by day partition

Posted by ev...@apache.org.
Updated clear_previous_executions method to delete data from summary by day partition


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/92cdccae
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/92cdccae
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/92cdccae

Branch: refs/heads/SPOT-35_graphql_api
Commit: 92cdccae54ef6df37b35b118e12701a42eabe3ea
Parents: 8841ffd
Author: LedaLima <le...@apache.org>
Authored: Tue Mar 7 17:27:12 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:48 2017 -0700

----------------------------------------------------------------------
 spot-oa/oa/dns/dns_oa.py     | 3 +--
 spot-oa/oa/flow/flow_oa.py   | 1 -
 spot-oa/oa/proxy/proxy_oa.py | 1 -
 3 files changed, 1 insertion(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/92cdccae/spot-oa/oa/dns/dns_oa.py
----------------------------------------------------------------------
diff --git a/spot-oa/oa/dns/dns_oa.py b/spot-oa/oa/dns/dns_oa.py
index 06da51d..b636b92 100644
--- a/spot-oa/oa/dns/dns_oa.py
+++ b/spot-oa/oa/dns/dns_oa.py
@@ -102,11 +102,10 @@ class OA(object):
         dy = self._date[6:]  
         table_schema = []
         HUSER = self._spot_conf.get('conf', 'HUSER').replace("'", "").replace('"', '')
-        table_schema=['suspicious', 'edge', 'dendro', 'threat_dendro', 'threat_investigation', 'storyboard' ]
+        table_schema=['suspicious', 'edge', 'dendro', 'threat_dendro', 'threat_investigation', 'storyboard', 'summary' ]
 
         for path in table_schema:
             HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}/d={5}".format(HUSER,self._table_name,path,yr,int(mn),int(dy)),user="impala")
-        HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}".format(HUSER,self._table_name,"summary",yr,int(mn)),user="impala")
         impala.execute_query("invalidate metadata")
 
         #removes Feedback file

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/92cdccae/spot-oa/oa/flow/flow_oa.py
----------------------------------------------------------------------
diff --git a/spot-oa/oa/flow/flow_oa.py b/spot-oa/oa/flow/flow_oa.py
index 4f8bcb2..95ca44e 100644
--- a/spot-oa/oa/flow/flow_oa.py
+++ b/spot-oa/oa/flow/flow_oa.py
@@ -107,7 +107,6 @@ class OA(object):
 
         for path in table_schema:
             HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}/d={5}".format(HUSER,self._table_name,path,yr,int(mn),int(dy)),user="impala")        
-        HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}".format(HUSER,self._table_name,"summary",yr,int(mn)),user="impala")        
         impala.execute_query("invalidate metadata")
         #removes Feedback file
         HDFSClient.delete_folder("{0}/{1}/scored_results/{2}{3}{4}/feedback/ml_feedback.csv".format(HUSER,self._table_name,yr,mn,dy))

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/92cdccae/spot-oa/oa/proxy/proxy_oa.py
----------------------------------------------------------------------
diff --git a/spot-oa/oa/proxy/proxy_oa.py b/spot-oa/oa/proxy/proxy_oa.py
index f01d8ff..99e4561 100644
--- a/spot-oa/oa/proxy/proxy_oa.py
+++ b/spot-oa/oa/proxy/proxy_oa.py
@@ -116,7 +116,6 @@ class OA(object):
 
         for path in table_schema:
             HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}/d={5}".format(HUSER,self._table_name,path,yr,int(mn),int(dy)),user="impala")        
-        HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}".format(HUSER,self._table_name,"summary",yr,int(mn)),user="impala")
         impala.execute_query("invalidate metadata")
 
         #removes Feedback file


[36/50] [abbrv] incubator-spot git commit: Removed proxy scored connections from the list

Posted by ev...@apache.org.
Removed proxy scored connections from the list


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/c1be09ef
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/c1be09ef
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/c1be09ef

Branch: refs/heads/SPOT-35_graphql_api
Commit: c1be09efc2249be257c60be26644b2b9cc3d2a92
Parents: bb8847a
Author: LedaLima <le...@apache.org>
Authored: Fri Mar 10 11:35:53 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:50:20 2017 -0700

----------------------------------------------------------------------
 .../oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/c1be09ef/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
----------------------------------------------------------------------
diff --git a/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb b/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
index 50041af..b173fe8 100644
--- a/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
+++ b/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
@@ -102,7 +102,7 @@
     "    \n",
     "    scored = []\n",
     "    for item in score_values:\n",
-    "        scored.append(urllib.quote_plus(item[0]))\n",
+    "	     scored.append(urllib.quote_plus(item[0]))\n",
     "        \n",
     "    if not 'errors' in response: \n",
     "        for row in response['data']['proxy']['suspicious']:\n",
@@ -141,9 +141,7 @@
     "def assign_score(b): \n",
     "    clear_output()\n",
     "    uri = quick_text.value or uri_select.value\n",
-    "    uri_sev = int(rating_btn.selected_label) if not \"- Select -\" in uri_select.value else \"\"\n",
-    "    \n",
-    "    clear_output() \n",
+    "    uri_sev = int(rating_btn.selected_label) \n",
     "    \n",
     "    global score_values\n",
     "    \n",
@@ -151,6 +149,8 @@
     "        \n",
     "    if uri_select.value != \"- Select -\":\n",
     "        display(Javascript(\"$(\\\"option[data-value='\" + urllib.quote_plus(uri_select.value) +\"']\\\").remove();\"))\n",
+    "    else:\n",
+    "        display(Javascript(\"$(\\\"option[data-value$='\" + uri +\"']\\\").remove();\"))\n",
     "      \n",
     "    clear_output()\n",
     "    data_loader()\n",


[22/50] [abbrv] incubator-spot git commit: Update README.md

Posted by ev...@apache.org.
Update README.md

Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/19af01b6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/19af01b6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/19af01b6

Branch: refs/heads/SPOT-35_graphql_api
Commit: 19af01b65f20a1ed2f4a5b61498675d8b8081d3d
Parents: ffc6dbc
Author: Everardo Lopez Sandoval <ev...@intel.com>
Authored: Thu Mar 9 13:29:55 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:48 2017 -0700

----------------------------------------------------------------------
 spot-oa/api/resources/README.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/19af01b6/spot-oa/api/resources/README.md
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/README.md b/spot-oa/api/resources/README.md
index 1d74e6f..ccfafb1 100644
--- a/spot-oa/api/resources/README.md
+++ b/spot-oa/api/resources/README.md
@@ -18,17 +18,17 @@ API Resources are the backend methods used by GraphQL to perform CRUD operations
 
 ## **Configuration Required (spot.conf):**
 
-API Resources use WebHDFS REST API (https://hadoop.apache.org/docs/r1.0.4/webhdfs.html) and Impala API based on that some new configuration is required.
+API Resources use [WebHDFS REST API] (https://hadoop.apache.org/docs/r1.0.4/webhdfs.html) and Impala API, based on that some new configuration is required.
 
 **_Keys in HDFS section:_**
 
-**NAME_NODE:** this key is required to setup the name node (full DNS domain or IP) to get connected to WebHDFS REST API.
-**WEB_PORT:** Web port to WebHDFS REST API (default=50070)
+* **NAME_NODE:** this key is required to setup the name node (full DNS domain or IP) to get connected to WebHDFS REST API.
+* **WEB_PORT:** Web port to WebHDFS REST API (default=50070)
 
 **_Keys in Impala section:_**
 
-**IMPALA_DEM:** This key has been there since the last release ,but now that we spot uses an API to get connected you need to either put the impala daemon full DNS or Server IP.
-**IMPALA_PORT:** Port on which HiveServer2 client requests are served by Impala Daemons.
+* **IMPALA_DEM:** This key has been there since the last release ,but now that we spot uses an API to get connected you need to either put the impala daemon full DNS or Server IP.
+* **IMPALA_PORT:** Port on which HiveServer2 client requests are served by Impala Daemons.
 
 ## **Prerequisites:**
 


[24/50] [abbrv] incubator-spot git commit: Ading link to OA installation

Posted by ev...@apache.org.
Ading link to OA installation


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/56d6e885
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/56d6e885
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/56d6e885

Branch: refs/heads/SPOT-35_graphql_api
Commit: 56d6e885aea35fb569c846ca3274a8e0f30862ed
Parents: 19af01b
Author: Everardo Lopez Sandoval (Intel) <el...@elopezsa-mac02.local>
Authored: Thu Mar 9 15:28:30 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:48 2017 -0700

----------------------------------------------------------------------
 spot-oa/api/resources/README.md | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/56d6e885/spot-oa/api/resources/README.md
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/README.md b/spot-oa/api/resources/README.md
index ccfafb1..1a9587b 100644
--- a/spot-oa/api/resources/README.md
+++ b/spot-oa/api/resources/README.md
@@ -3,31 +3,30 @@
 # API Resources 
 
 
-API Resources are the backend methods used by GraphQL to perform CRUD operations to Apache Spot (incubating) like score connections, performa a threat investigation, generate a storyboard, etc.
+API Resources are the backend methods used by GraphQL to perform CRUD operations to Apache Spot (incubating) like score connections, perform threat investigation, generate a storyboard, etc.
 
 **Classes:**
 
-* Resources/flow.py
-* Resources/dns
-* Resources/proxy
-* Resources/configurator
-* Resources/hdfs_client
-* Resources/impala_engine.py
+* resources/flow.py
+* resources/dns
+* resources/proxy
+* resources/configurator
+* resources/hdfs_client
+* resources/impala_engine.py
 
 
 
 ## **Configuration Required (spot.conf):**
-
-API Resources use [WebHDFS REST API] (https://hadoop.apache.org/docs/r1.0.4/webhdfs.html) and Impala API, based on that some new configuration is required.
+API Resources use [WebHDFS REST API] (https://hadoop.apache.org/docs/r1.0.4/webhdfs.html), based on that spot.conf has been updated to include new KEYS (o variables o entries)
 
 **_Keys in HDFS section:_**
 
 * **NAME_NODE:** this key is required to setup the name node (full DNS domain or IP) to get connected to WebHDFS REST API.
-* **WEB_PORT:** Web port to WebHDFS REST API (default=50070)
+* **WEB_PORT:** Web port for WebHDFS REST API (default=50070)
 
 **_Keys in Impala section:_**
 
-* **IMPALA_DEM:** This key has been there since the last release ,but now that we spot uses an API to get connected you need to either put the impala daemon full DNS or Server IP.
+* **IMPALA_DEM:** This key was part of previous release. Now that spot uses an API to connect, you need to either put the impala daemon full DNS or Server IP Address.
 * **IMPALA_PORT:** Port on which HiveServer2 client requests are served by Impala Daemons.
 
 ## **Prerequisites:**
@@ -38,7 +37,7 @@ API Resources use [WebHDFS REST API] (https://hadoop.apache.org/docs/r1.0.4/webh
 * impyla
 * hdfs
 
-**NOTE: all this requirements are already part of requiremets.txt file, you dont need to install the python prerequisites manually.**
+**NOTE:** all these requirements are already part of requirements.txt file, you don't need to install the python prerequisites manually.[OA install](../../../oa/INSTALL.md)
 
 #### Hadoop:
 


[16/50] [abbrv] incubator-spot git commit: Added day partition to the ingest summary

Posted by ev...@apache.org.
Added day partition to the ingest summary


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/8841ffd8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/8841ffd8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/8841ffd8

Branch: refs/heads/SPOT-35_graphql_api
Commit: 8841ffd853318cd2de0eecb7695df56097b34bf4
Parents: 50fbe61
Author: LedaLima <le...@apache.org>
Authored: Tue Mar 7 17:20:10 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:48 2017 -0700

----------------------------------------------------------------------
 spot-oa/oa/dns/dns_oa.py     | 4 ++--
 spot-oa/oa/flow/flow_oa.py   | 4 ++--
 spot-oa/oa/proxy/proxy_oa.py | 7 +++----
 3 files changed, 7 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/8841ffd8/spot-oa/oa/dns/dns_oa.py
----------------------------------------------------------------------
diff --git a/spot-oa/oa/dns/dns_oa.py b/spot-oa/oa/dns/dns_oa.py
index a154c35..06da51d 100644
--- a/spot-oa/oa/dns/dns_oa.py
+++ b/spot-oa/oa/dns/dns_oa.py
@@ -410,6 +410,6 @@ class OA(object):
 
         if len(df_final) > 0:
             query_to_insert=("""
-                INSERT INTO {0}.dns_ingest_summary PARTITION (y={1}, m={2}) VALUES {3};
-            """).format(self._db, yr, mn, tuple(df_final))            
+                INSERT INTO {0}.dns_ingest_summary PARTITION (y={1}, m={2}, d={3}) VALUES {4};
+            """).format(self._db, yr, mn, dy, tuple(df_final))            
             impala.execute_query(query_to_insert)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/8841ffd8/spot-oa/oa/flow/flow_oa.py
----------------------------------------------------------------------
diff --git a/spot-oa/oa/flow/flow_oa.py b/spot-oa/oa/flow/flow_oa.py
index bf2d301..4f8bcb2 100644
--- a/spot-oa/oa/flow/flow_oa.py
+++ b/spot-oa/oa/flow/flow_oa.py
@@ -454,8 +454,8 @@ class OA(object):
             df_final = df_filtered.append(df_per_min, ignore_index=True).to_records(False,False) 
             if len(df_final) > 0:
                 query_to_insert=("""
-                    INSERT INTO {0}.flow_ingest_summary PARTITION (y={1}, m={2}) VALUES {3};
-                """).format(self._db, yr, mn, tuple(df_final))
+                    INSERT INTO {0}.flow_ingest_summary PARTITION (y={1}, m={2}, d={3}) VALUES {4};
+                """).format(self._db, yr, mn, dy, tuple(df_final))
 
                 impala.execute_query(query_to_insert)
                 

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/8841ffd8/spot-oa/oa/proxy/proxy_oa.py
----------------------------------------------------------------------
diff --git a/spot-oa/oa/proxy/proxy_oa.py b/spot-oa/oa/proxy/proxy_oa.py
index 07313eb..f01d8ff 100644
--- a/spot-oa/oa/proxy/proxy_oa.py
+++ b/spot-oa/oa/proxy/proxy_oa.py
@@ -307,8 +307,7 @@ class OA(object):
             """).format(self._db,year, month, day, value_string[:-1])
 
             impala.execute_query(query_to_insert) 
-
-   
+ 
 
     def _ingest_summary(self): 
         # get date parameters.
@@ -347,8 +346,8 @@ class OA(object):
             df_final = df_filtered.append(df_per_min, ignore_index=True).to_records(False,False) 
             if len(df_final) > 0:
                 query_to_insert=("""
-                    INSERT INTO {0}.proxy_ingest_summary PARTITION (y={1}, m={2}) VALUES {3};
-                """).format(self._db, yr, mn, tuple(df_final))
+                    INSERT INTO {0}.proxy_ingest_summary PARTITION (y={1}, m={2}, d={3}) VALUES {4};
+                """).format(self._db, yr, mn, dy, tuple(df_final))
 
                 impala.execute_query(query_to_insert) 
                 


[39/50] [abbrv] incubator-spot git commit: Quick fix

Posted by ev...@apache.org.
Quick fix


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/550ba313
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/550ba313
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/550ba313

Branch: refs/heads/SPOT-35_graphql_api
Commit: 550ba313d0ecf7cbaf3137ab61cbba8ee46ef88a
Parents: b78e1df
Author: LedaLima <le...@apache.org>
Authored: Thu Mar 9 18:00:23 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:51:22 2017 -0700

----------------------------------------------------------------------
 .../Edge_Investigation_master.ipynb             | 21 ++++----------------
 1 file changed, 4 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/550ba313/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
----------------------------------------------------------------------
diff --git a/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb b/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
index e2a89c2..b20c65e 100644
--- a/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
+++ b/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
@@ -9,7 +9,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
@@ -29,24 +29,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [
-    {
-     "data": {
-      "application/javascript": [
-       "$('.widget-area > .widget-subarea > *').remove();"
-      ],
-      "text/plain": [
-       "<IPython.core.display.Javascript object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "try:\n",
     "    import ipywidgets as widgets # For jupyter/ipython >= 1.4\n",
@@ -141,7 +128,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },


[02/50] [abbrv] incubator-spot git commit: Adding date and time to GraphQL NetflowScoredConnectionType

Posted by ev...@apache.org.
Adding date and time to GraphQL NetflowScoredConnectionType


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/c676feb0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/c676feb0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/c676feb0

Branch: refs/heads/SPOT-35_graphql_api
Commit: c676feb08a159607431eb9b71d48b113a7dea85c
Parents: ed90fd8
Author: Diego Ortiz <di...@intel.com>
Authored: Fri Mar 3 11:51:49 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:48:56 2017 -0700

----------------------------------------------------------------------
 spot-oa/api/graphql/flow/query.py | 4 ++++
 1 file changed, 4 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/c676feb0/spot-oa/api/graphql/flow/query.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/graphql/flow/query.py b/spot-oa/api/graphql/flow/query.py
index 4fc8899..91395ee 100644
--- a/spot-oa/api/graphql/flow/query.py
+++ b/spot-oa/api/graphql/flow/query.py
@@ -191,6 +191,10 @@ IpConnectionDetailsType = GraphQLObjectType(
 ScoredConnectionType = GraphQLObjectType(
     name='NetflowScoredConnectionType',
     fields={
+        'tstart': GraphQLField(
+            type=SpotDatetimeType,
+            resolver=lambda root, *_: root.get('tstart')
+        ),
         'srcIp': GraphQLField(
             type=SpotIpType,
             description='Source Ip',


[17/50] [abbrv] incubator-spot git commit: Updated api queries for ingest summary

Posted by ev...@apache.org.
Updated api queries for ingest summary


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/c35a4e9f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/c35a4e9f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/c35a4e9f

Branch: refs/heads/SPOT-35_graphql_api
Commit: c35a4e9ffb7748ac5efa1c8c160f601d7633751a
Parents: 92cdcca
Author: LedaLima <le...@apache.org>
Authored: Tue Mar 7 17:37:25 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:48 2017 -0700

----------------------------------------------------------------------
 spot-oa/api/resources/dns.py   | 5 +++--
 spot-oa/api/resources/flow.py  | 9 +++++----
 spot-oa/api/resources/proxy.py | 5 +++--
 3 files changed, 11 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/c35a4e9f/spot-oa/api/resources/dns.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/dns.py b/spot-oa/api/resources/dns.py
index ae2076b..6000fa6 100644
--- a/spot-oa/api/resources/dns.py
+++ b/spot-oa/api/resources/dns.py
@@ -337,8 +337,9 @@ def ingest_summary(start_date,end_date):
                 FROM {0}.dns_ingest_summary
                 WHERE
                     ( y >= {1} and y <= {2}) AND
-                    ( m >= {3} and m <= {4})
+                    ( m >= {3} and m <= {4}) AND
+                    ( d >= {5} and d <= {6})
                 """)\
-                .format(db,start_date.year,end_date.year,start_date.month,end_date.month)
+                .format(db,start_date.year,end_date.year,start_date.month,end_date.month, start_date.day, end_date.day)
 
     return ImpalaEngine.execute_query_as_list(is_query)

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/c35a4e9f/spot-oa/api/resources/flow.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/flow.py b/spot-oa/api/resources/flow.py
index d904b61..211932c 100755
--- a/spot-oa/api/resources/flow.py
+++ b/spot-oa/api/resources/flow.py
@@ -90,12 +90,13 @@ def ingest_summary(start_date,end_date):
                 tdate,total
             FROM {0}.flow_ingest_summary
             WHERE
-                ( y >= {1} AND y <= {2})
-                AND
-                ( m >= {3} AND m <= {4})
+                ( y >= {1} AND y <= {2}) AND
+                ( m >= {3} AND m <= {4}) AND
+                ( d >= {5} AND d <= {6})
             ORDER BY tdate
             """).format(db,start_date.year,end_date.year, \
-                        start_date.month,end_date.month)
+                        start_date.month,end_date.month, \
+                        start_date.day, end_date.day)
 
     return ImpalaEngine.execute_query_as_list(is_query)
 

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/c35a4e9f/spot-oa/api/resources/proxy.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/proxy.py b/spot-oa/api/resources/proxy.py
index 6f86aae..cbfb368 100644
--- a/spot-oa/api/resources/proxy.py
+++ b/spot-oa/api/resources/proxy.py
@@ -394,8 +394,9 @@ def ingest_summary(start_date,end_date):
                 FROM {0}.proxy_ingest_summary
                 WHERE
                     ( y >= {1} and y <= {2}) AND
-                    ( m >= {3} and m <= {4})
+                    ( m >= {3} and m <= {4}) AND
+                    ( d >= {5} and d <= {6})
                 """)\
-                .format(db,start_date.year,end_date.year,start_date.month,end_date.month)
+                .format(db,start_date.year,end_date.year,start_date.month,end_date.month, start_date.day, end_date.day)
 
     return ImpalaEngine.execute_query_as_list(is_query)


[27/50] [abbrv] incubator-spot git commit: Bug fixing for Proxy Edge notebook

Posted by ev...@apache.org.
Bug fixing for Proxy Edge notebook


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/41c41325
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/41c41325
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/41c41325

Branch: refs/heads/SPOT-35_graphql_api
Commit: 41c41325325cce665acedaaa9cbc0af89e593873
Parents: 7abbab2
Author: LedaLima <le...@apache.org>
Authored: Thu Mar 9 17:58:03 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:48 2017 -0700

----------------------------------------------------------------------
 .../Edge_Investigation_master.ipynb             | 23 +++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/41c41325/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
----------------------------------------------------------------------
diff --git a/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb b/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
index 1251960..cf638b7 100644
--- a/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
+++ b/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
@@ -9,7 +9,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {
     "collapsed": false
    },
@@ -29,11 +29,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "application/javascript": [
+       "$('.widget-area > .widget-subarea > *').remove();"
+      ],
+      "text/plain": [
+       "<IPython.core.display.Javascript object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "try:\n",
     "    import ipywidgets as widgets # For jupyter/ipython >= 1.4\n",
@@ -102,7 +115,7 @@
     "    \n",
     "    scored = []\n",
     "    for item in score_values:\n",
-    "        scored.append(item[0])\n",
+    "        scored.append(urllib.quote_plus(item[0]))\n",
     "        \n",
     "    if not 'errors' in response: \n",
     "        for row in response['data']['proxy']['suspicious']:\n",
@@ -128,7 +141,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {
     "collapsed": false
    },


[03/50] [abbrv] incubator-spot git commit: Fix IANA code translation

Posted by ev...@apache.org.
Fix IANA code translation


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/8b5b32c3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/8b5b32c3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/8b5b32c3

Branch: refs/heads/SPOT-35_graphql_api
Commit: 8b5b32c35b4ffa05337492c9f1f1883a87fbf3bb
Parents: 93b2fdc
Author: Everardo Lopez Sandoval (Intel) <el...@elopezsa-mac02.zpn.intel.com>
Authored: Mon Mar 6 13:35:17 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:47 2017 -0700

----------------------------------------------------------------------
 spot-oa/oa/dns/dns_oa.py     | 4 ++--
 spot-oa/oa/proxy/proxy_oa.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/8b5b32c3/spot-oa/oa/dns/dns_oa.py
----------------------------------------------------------------------
diff --git a/spot-oa/oa/dns/dns_oa.py b/spot-oa/oa/dns/dns_oa.py
index a8dd2a5..1117583 100644
--- a/spot-oa/oa/dns/dns_oa.py
+++ b/spot-oa/oa/dns/dns_oa.py
@@ -251,8 +251,8 @@ class OA(object):
 
             dns_qry_class_index = self._conf["dns_results_fields"]["dns_qry_class"]
             dns_qry_type_index = self._conf["dns_results_fields"]["dns_qry_type"]
-            dns_qry_rcode_index = self._conf["dns_results_fields"]["dns_qry_rcode"]
-            self._dns_scores = [ conn + [ dns_iana.get_name(conn[dns_qry_class_index],"dns_qry_class")] + [dns_iana.get_name(conn[dns_qry_type_index],"dns_qry_type")] + [ dns_iana.get_name(conn[dns_qry_rcode_index],"dns_qry_rcode") ] for conn in self._dns_scores ]
+            dns_qry_rcode_index = self._conf["dns_results_fields"]["dns_qry_rcode"]            
+            self._dns_scores = [ conn + [ str(dns_iana.get_name(conn[dns_qry_class_index],"dns_qry_class"))] + [str(dns_iana.get_name(conn[dns_qry_type_index],"dns_qry_type"))] + [str(dns_iana.get_name(conn[dns_qry_rcode_index],"dns_qry_rcode"))] for conn in self._dns_scores ]
             
         else:            
             self._dns_scores = [ conn + ["","",""] for conn in self._dns_scores ] 

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/8b5b32c3/spot-oa/oa/proxy/proxy_oa.py
----------------------------------------------------------------------
diff --git a/spot-oa/oa/proxy/proxy_oa.py b/spot-oa/oa/proxy/proxy_oa.py
index d54219e..ea46114 100644
--- a/spot-oa/oa/proxy/proxy_oa.py
+++ b/spot-oa/oa/proxy/proxy_oa.py
@@ -230,8 +230,8 @@ class OA(object):
         if os.path.isfile(iana_conf_file):
             iana_config  = json.loads(open(iana_conf_file).read())
             proxy_iana = IanaTransform(iana_config["IANA"])
-            proxy_rcode_index = self._conf["proxy_score_fields"]["respcode"]
-            self._proxy_scores = [ conn + [ proxy_iana.get_name(conn[proxy_rcode_index],"proxy_http_rcode")] for conn in self._proxy_scores ]
+            proxy_rcode_index = self._conf["proxy_score_fields"]["respcode"]            
+            self._proxy_scores = [ conn + [str(proxy_iana.get_name(conn[proxy_rcode_index],"proxy_http_rcode"))] for conn in self._proxy_scores ]
         else:
             self._proxy_scores = [ conn + [""] for conn in self._proxy_scores ]
 


[40/50] [abbrv] incubator-spot git commit: Show comments of the threats already captured

Posted by ev...@apache.org.
Show comments of the threats already captured


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/6427f645
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/6427f645
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/6427f645

Branch: refs/heads/SPOT-35_graphql_api
Commit: 6427f64501200c3cd1a82446539756b730741bb4
Parents: 9f4a5af
Author: Everardo Lopez Sandoval (Intel) <el...@elopezsa-mac02.zpn.intel.com>
Authored: Fri Mar 10 17:43:31 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:51:23 2017 -0700

----------------------------------------------------------------------
 spot-oa/api/resources/dns.py                    |  6 +--
 spot-oa/api/resources/hdfs_client.py            |  1 -
 .../Threat_Investigation_master.ipynb           | 53 ++++++++++++++++----
 .../Threat_Investigation_master.ipynb           | 36 ++++++++++---
 .../Threat_Investigation_master.ipynb           | 36 ++++++++++---
 5 files changed, 105 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/6427f645/spot-oa/api/resources/dns.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/dns.py b/spot-oa/api/resources/dns.py
index 767454e..9ac4a26 100644
--- a/spot-oa/api/resources/dns.py
+++ b/spot-oa/api/resources/dns.py
@@ -85,7 +85,7 @@ def incident_progression(date, query,ip):
         return None
 
     db = Configuration.db()
-    return_value = "dns_qry_name" if ip else "ip_dst"
+    return_value = "dns_qry_name" if query else "ip_dst"
     dns_threat_query = ("""
             SELECT
                 anchor,total,{0}
@@ -258,7 +258,7 @@ def create_storyboard(expanded_search,date,ip,query,title,text):
 
     anchor = ip if ip else query
     create_dendro(expanded_search,date,anchor)
-    save_comments(ip,query,title,text,date)
+    save_comments(anchor,ip,query,title,text,date)
 
 """
 --------------------------------------------------------------------------
@@ -299,7 +299,7 @@ def  save_comments(ip,query,title,text,date):
     # find value if already exists.
     saved = False
     for item in sb_data:
-        if item["ip_threat"] == ip or item["dns_threat"]== query:
+        if item["ip_threat"] == anchor or item["dns_threat"]== anchor:
             item["title"] = title
             item["text"] = text
             saved = True

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/6427f645/spot-oa/api/resources/hdfs_client.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/hdfs_client.py b/spot-oa/api/resources/hdfs_client.py
index 41d4214..a6da531 100644
--- a/spot-oa/api/resources/hdfs_client.py
+++ b/spot-oa/api/resources/hdfs_client.py
@@ -8,7 +8,6 @@ import api.resources.configurator as Config
 def _get_client(user=None):
     hdfs_nm,hdfs_port,hdfs_user = Config.hdfs()
     client = InsecureClient('http://{0}:{1}'.format(hdfs_nm,hdfs_port), user= user if user else hdfs_user)
-    print hdfs_nm,hdfs_port,hdfs_user
     return client
 
 def get_file(hdfs_file):

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/6427f645/spot-oa/oa/dns/ipynb_templates/Threat_Investigation_master.ipynb
----------------------------------------------------------------------
diff --git a/spot-oa/oa/dns/ipynb_templates/Threat_Investigation_master.ipynb b/spot-oa/oa/dns/ipynb_templates/Threat_Investigation_master.ipynb
index cbaa2b7..0c61ee4 100644
--- a/spot-oa/oa/dns/ipynb_templates/Threat_Investigation_master.ipynb
+++ b/spot-oa/oa/dns/ipynb_templates/Threat_Investigation_master.ipynb
@@ -36,7 +36,8 @@
     "anchor = ''\n",
     "anchor_type = ''\n",
     "top_results = 20\n",
-    "details_limit = 1000"
+    "details_limit = 1000\n",
+    "query_comments = {}\n"
    ]
   },
   {
@@ -136,7 +137,8 @@
     "    ips_query = {} \n",
     "    ip_sev={}\n",
     "    dns_sev={}\n",
-    "      \n",
+    "    global query_comments\n",
+    "    \n",
     "    response = GraphQLClient.request(\n",
     "        query=\"\"\"query($date:SpotDateType!) {\n",
     "                dns{\n",
@@ -153,7 +155,30 @@
     "        variables={\n",
     "            'date': datetime.datetime.strptime(t_date, '%Y%m%d').strftime('%Y-%m-%d')\n",
     "        }\n",
-    "    )  \n",
+    "    )\n",
+    "    \n",
+    "    query_comments = GraphQLClient.request(\n",
+    "        query=\"\"\"query($date:SpotDateType!) {\n",
+    "                dns{\n",
+    "                    threats{\n",
+    "                        comments(date:$date) {                           \n",
+    "                            title\n",
+    "                            text\n",
+    "                            ... on DnsQueryCommentType {\n",
+    "                            dnsQuery\n",
+    "                            }\n",
+    "                            ... on DnsClientIpCommentType {\n",
+    "                            clientIp\n",
+    "                            }\n",
+    "                        }\n",
+    "                    }\n",
+    "            }\n",
+    "        }\"\"\",\n",
+    "        variables={\n",
+    "            'date': datetime.datetime.strptime(t_date, '%Y%m%d').strftime('%Y-%m-%d')\n",
+    "        }\n",
+    "    )\n",
+    "    query_comments = query_comments['data']['dns']['threats']['comments']    \n",
     "    \n",
     "    if not 'errors' in response: \n",
     "        for row in response['data']['dns']['threats']['list']:        \n",
@@ -241,17 +266,25 @@
     "        result_html.value=table\n",
     "        result_html_box.children = [result_html]\n",
     "\n",
-    "        display_threat_box(anchor)\n",
+    "        display_threat_box(anchor,anchor_type)\n",
     "        resultTableBox.children = [result_html_title, result_html_box]\n",
     "        display(bottomBox)\n",
     "    \n",
     "    search_btn.on_click(search_ip)\n",
     "\n",
     "        \n",
-    "def display_threat_box(ip):    \n",
+    "def display_threat_box(ip,anchor_type): \n",
+    "    \n",
+    "    global query_comments \n",
+    "    title =\"\" \n",
+    "    text = \"\"\n",
+    "    data_filter = \"\"\n",
+    "    data_filter = \"dnsQuery\" if anchor_type == 'q' else \"clientIp\"\n",
+    "    title = next((item['title'] for item in query_comments if item.get(data_filter) == ip), \"\")\n",
+    "    text = next((item['text'] for item in query_comments if item.get(data_filter) == ip), \"\") \n",
     "    result_title.value=\"<h4 class='spot-text-wrapper spot-text-xlg' data-toggle='tooltip'>Threat summary for \" + anchor +\"</h4>\"\n",
-    "    tc_txt_title = widgets.Text(value='', placeholder='Threat Title', width='100%')\n",
-    "    tc_txa_summary = widgets.Textarea(value='', height=100, width='95%')\n",
+    "    tc_txt_title = widgets.Text(value=title, placeholder='Threat Title', width='100%')\n",
+    "    tc_txa_summary = widgets.Textarea(value=text, height=100, width='95%')\n",
     "    tc_btn_save = widgets.Button(description='Save', width='65px', layout='width:100%')\n",
     "    tc_btn_save.button_style = 'primary'\n",
     "    \n",
@@ -306,9 +339,7 @@
     "                'threatDetails': expanded_results['data']['dns']['threat']['details']  \n",
     "            }\n",
     "\n",
-    "            response = GraphQLClient.request(mutation, variables)\n",
-    "\n",
-    "            display(Javascript(\"$(\\\"option[data-value='\" + anchor +\"']\\\").remove();\"))   \n",
+    "            response = GraphQLClient.request(mutation, variables)            \n",
     "            display(Javascript(\"$('.widget-area > .widget-subarea > .widget-box:gt(0)').remove();\"))\n",
     "            \n",
     "            response = \"Summary successfully saved\"\n",
@@ -335,7 +366,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": false
+    "collapsed": false   
    },
    "outputs": [],
    "source": [

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/6427f645/spot-oa/oa/flow/ipynb_templates/Threat_Investigation_master.ipynb
----------------------------------------------------------------------
diff --git a/spot-oa/oa/flow/ipynb_templates/Threat_Investigation_master.ipynb b/spot-oa/oa/flow/ipynb_templates/Threat_Investigation_master.ipynb
index 761a434..9860580 100644
--- a/spot-oa/oa/flow/ipynb_templates/Threat_Investigation_master.ipynb
+++ b/spot-oa/oa/flow/ipynb_templates/Threat_Investigation_master.ipynb
@@ -34,7 +34,8 @@
     "ir_f = ''\n",
     "threat_name = ''\n",
     "iplist = ''\n",
-    "top_results = 20"
+    "top_results = 20\n",
+    "ip_comments = {}"
    ]
   },
   {
@@ -131,7 +132,8 @@
     "def start_investigation(): \n",
     "    display(Javascript(\"$('.widget-area > .widget-subarea > *').remove();\"))   \n",
     "    external_ips = []\n",
-    "    clear_output()  \n",
+    "    clear_output()\n",
+    "    global ip_comments\n",
     "    \n",
     "    response = GraphQLClient.request(\n",
     "        query=\"\"\"query($date:SpotDateType!) {\n",
@@ -152,6 +154,23 @@
     "        }\n",
     "    ) \n",
     "     \n",
+    "    ip_comments = GraphQLClient.request(\n",
+    "        query=\"\"\"query($date:SpotDateType!) {\n",
+    "                flow{\n",
+    "                    threats{\n",
+    "                        comments(date:$date) {\n",
+    "                            ip\n",
+    "                            title\n",
+    "                            text\n",
+    "                        }\n",
+    "                    }\n",
+    "            }\n",
+    "        }\"\"\",\n",
+    "        variables={\n",
+    "            'date': datetime.datetime.strptime(date, '%Y%m%d').strftime('%Y-%m-%d')\n",
+    "        }\n",
+    "    )\n",
+    "    ip_comments = ip_comments['data']['flow']['threats']['comments']\n",
     "    if not 'errors' in response : \n",
     "        for row in response['data']['flow']['threats']['list']:\n",
     "            if row['score'] == 1: \n",
@@ -236,9 +255,14 @@
     "\n",
     "def display_threat_box(ip):\n",
     "    clear_output()\n",
+    "    global ip_comments    \n",
+    "    title =\"\" \n",
+    "    text = \"\"\n",
+    "    title = next((item['title'] for item in ip_comments if item.get(\"ip\") == ip), \"\")\n",
+    "    text = next((item['text'] for item in ip_comments if item.get(\"ip\") == ip), \"\")  \n",
     "    result_title.value=\"<h4 class='spot-text-wrapper spot-text-xlg' data-toggle='tooltip'>Threat summary for \" + anchor +\"</h4>\"\n",
-    "    tc_txt_title = widgets.Text(value='', placeholder='Threat Title', width='100%')\n",
-    "    tc_txa_summary = widgets.Textarea(value='', height=100, width='95%')\n",
+    "    tc_txt_title = widgets.Text(value=title, placeholder='Threat Title', width='100%')\n",
+    "    tc_txa_summary = widgets.Textarea(value=text, height=100, width='95%')\n",
     "    tc_btn_save = widgets.Button(description='Save', width='65px', layout='width:100%')\n",
     "    tc_btn_save.button_style = 'primary'\n",
     "     \n",
@@ -383,8 +407,8 @@
    "execution_count": null,
    "metadata": {
     "collapsed": false
-   },
-   "outputs": [],
+     },
+     "outputs": [],     
    "source": [
     "start_investigation()"
    ]

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/6427f645/spot-oa/oa/proxy/ipynb_templates/Threat_Investigation_master.ipynb
----------------------------------------------------------------------
diff --git a/spot-oa/oa/proxy/ipynb_templates/Threat_Investigation_master.ipynb b/spot-oa/oa/proxy/ipynb_templates/Threat_Investigation_master.ipynb
index f68f5c9..8372bbe 100644
--- a/spot-oa/oa/proxy/ipynb_templates/Threat_Investigation_master.ipynb
+++ b/spot-oa/oa/proxy/ipynb_templates/Threat_Investigation_master.ipynb
@@ -39,7 +39,8 @@
     "refered    = defaultdict(int)\n",
     "requests = []\n",
     "top_results = 20\n",
-    "details_limit = 1000  "
+    "details_limit = 1000\n",
+    "proxy_comments = {}"
    ]
   },
   {
@@ -141,7 +142,8 @@
     "    clear_output() \n",
     "    c_uri = []\n",
     "    uri_sev=[]\n",
-    "          \n",
+    "    global proxy_comments\n",
+    "    \n",
     "    response = GraphQLClient.request(\n",
     "        query=\"\"\"query($date:SpotDateType!) {\n",
     "                 proxy{\n",
@@ -157,8 +159,24 @@
     "        variables={\n",
     "            'date': datetime.datetime.strptime(date, '%Y%m%d').strftime('%Y-%m-%d')\n",
     "        }\n",
-    "    )  \n",
-    "     \n",
+    "    )\n",
+    "    proxy_comments = GraphQLClient.request(\n",
+    "        query=\"\"\"query($date:SpotDateType!) {\n",
+    "                 proxy{\n",
+    "                    threats{\n",
+    "                        comments(date:$date) {                            \n",
+    "                            uri\n",
+    "                            title\n",
+    "                            text\n",
+    "                        }\n",
+    "                    }\n",
+    "            }\n",
+    "        }\"\"\",\n",
+    "        variables={\n",
+    "            'date': datetime.datetime.strptime(date, '%Y%m%d').strftime('%Y-%m-%d')\n",
+    "        }\n",
+    "    )\n",
+    "    proxy_comments = proxy_comments['data']['proxy']['threats']['comments']\n",
     "    if not 'errors' in response: \n",
     "        for row in response['data']['proxy']['threats']['list']:        \n",
     "            if row['uri'] not in uri_sev and row['score'] == 1: \n",
@@ -268,9 +286,15 @@
     "        \n",
     "def display_threat_box(ip):   \n",
     "    global expanded_results\n",
+    "    global proxy_comments\n",
+    "    \n",
+    "    title =\"\" \n",
+    "    comments = \"\"\n",
+    "    title = next((item['title'] for item in proxy_comments if item.get(\"uri\") == ip), \"\")\n",
+    "    comments = next((item['text'] for item in proxy_comments if item.get(\"uri\") == ip), \"\")\n",
     "    result_title.value=\"<h4 class='spot-text-wrapper spot-text-xlg' data-toggle='tooltip'>Threat summary for \" + anchor +\"</h4>\"\n",
-    "    tc_txt_title = widgets.Text(value='', placeholder='Threat Title', width='100%')\n",
-    "    tc_txa_summary = widgets.Textarea(value='', height=100, width='95%')\n",
+    "    tc_txt_title = widgets.Text(value=title, placeholder='Threat Title', width='100%')\n",
+    "    tc_txa_summary = widgets.Textarea(value=comments, height=100, width='95%')\n",
     "    tc_btn_save = widgets.Button(description='Save', width='65px', layout='width:100%')\n",
     "    tc_btn_save.button_style = 'primary'\n",
     "    \n",


[26/50] [abbrv] incubator-spot git commit: Quick fix

Posted by ev...@apache.org.
Quick fix


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/85431c64
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/85431c64
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/85431c64

Branch: refs/heads/SPOT-35_graphql_api
Commit: 85431c64c044bcc2f6016956dacf242664588f0a
Parents: 41c4132
Author: LedaLima <le...@apache.org>
Authored: Thu Mar 9 18:00:23 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:48 2017 -0700

----------------------------------------------------------------------
 .../Edge_Investigation_master.ipynb             | 21 ++++----------------
 1 file changed, 4 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/85431c64/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
----------------------------------------------------------------------
diff --git a/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb b/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
index cf638b7..50041af 100644
--- a/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
+++ b/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
@@ -9,7 +9,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
@@ -29,24 +29,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [
-    {
-     "data": {
-      "application/javascript": [
-       "$('.widget-area > .widget-subarea > *').remove();"
-      ],
-      "text/plain": [
-       "<IPython.core.display.Javascript object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "try:\n",
     "    import ipywidgets as widgets # For jupyter/ipython >= 1.4\n",
@@ -141,7 +128,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },


[20/50] [abbrv] incubator-spot git commit: Ingest summary link bug fixed, added the necessary element to show Ingest summary link on ui/ingest-sumary.html

Posted by ev...@apache.org.
Ingest summary link bug fixed, added the necessary element to show Ingest summary link on ui/ingest-sumary.html


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/3718d809
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/3718d809
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/3718d809

Branch: refs/heads/SPOT-35_graphql_api
Commit: 3718d8095e70ddd3b2735c7702e4d66befb38a55
Parents: 2c3f3d3
Author: bryanmontesv <br...@gmail.com>
Authored: Wed Mar 8 16:23:58 2017 -0800
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:48 2017 -0700

----------------------------------------------------------------------
 spot-oa/ui/ingest-summary.html | 3 +++
 1 file changed, 3 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/3718d809/spot-oa/ui/ingest-summary.html
----------------------------------------------------------------------
diff --git a/spot-oa/ui/ingest-summary.html b/spot-oa/ui/ingest-summary.html
index bbc9877..bc910a9 100755
--- a/spot-oa/ui/ingest-summary.html
+++ b/spot-oa/ui/ingest-summary.html
@@ -157,6 +157,9 @@
                             </li>
                         </ul>
                     </li>
+                    <li>
+                        <a>Ingest Summary</a>
+                    </li>
                 </ul>
             </div>
             <div id="search-box" class="row text-right">


[43/50] [abbrv] incubator-spot git commit: DNS OA - Fixed bug where the IANA codes where not being translated for the details

Posted by ev...@apache.org.
DNS OA - Fixed bug where the IANA codes where not being translated for the details


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/9f4a5afb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/9f4a5afb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/9f4a5afb

Branch: refs/heads/SPOT-35_graphql_api
Commit: 9f4a5afb0bb6e67b9eceacbbb74841c38c59551f
Parents: 548eb17
Author: LedaLima <le...@apache.org>
Authored: Fri Mar 10 12:02:20 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:51:23 2017 -0700

----------------------------------------------------------------------
 spot-oa/oa/dns/dns_oa.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/9f4a5afb/spot-oa/oa/dns/dns_oa.py
----------------------------------------------------------------------
diff --git a/spot-oa/oa/dns/dns_oa.py b/spot-oa/oa/dns/dns_oa.py
index b636b92..71de7ea 100644
--- a/spot-oa/oa/dns/dns_oa.py
+++ b/spot-oa/oa/dns/dns_oa.py
@@ -324,7 +324,7 @@ class OA(object):
             if dns_iana:
                 self._logger.info("Adding IANA translation to details results") 
                     
-                dns_details = [ conn + (str(dns_iana.get_name(conn[5],"dns_qry_class")),str(dns_iana.get_name(conn[6],"dns_qry_type")),str(dns_iana.get_name(conn[7],"dns_qry_rcode"))) for conn in dns_details ]
+                dns_details = [ conn + (dns_iana.get_name(str(conn[5]),"dns_qry_class"),dns_iana.get_name(str(conn[6]),"dns_qry_type"),dns_iana.get_name(str(conn[7]),"dns_qry_rcode")) for conn in dns_details ]
             else: 
                 self._logger.info("WARNING: NO IANA configured.")
                 dns_details = [ conn + ("","","") for conn in dns_details ]


[14/50] [abbrv] incubator-spot git commit: use bulk insert for scored connections

Posted by ev...@apache.org.
use bulk insert for scored connections


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/a32d3ed3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/a32d3ed3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/a32d3ed3

Branch: refs/heads/SPOT-35_graphql_api
Commit: a32d3ed36398bd4faeff12ac24fda5e2f1e2cf14
Parents: c35a4e9
Author: Everardo Lopez Sandoval (Intel) <el...@elopezsa-mac02.zpn.intel.com>
Authored: Tue Mar 7 18:22:06 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:48 2017 -0700

----------------------------------------------------------------------
 spot-oa/api/resources/dns.py         | 19 ++++++++++++-------
 spot-oa/api/resources/flow.py        | 19 +++++++++++--------
 spot-oa/api/resources/hdfs_client.py |  8 ++++++--
 spot-oa/api/resources/proxy.py       | 14 +++++++++-----
 4 files changed, 38 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/a32d3ed3/spot-oa/api/resources/dns.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/dns.py b/spot-oa/api/resources/dns.py
index 6000fa6..767454e 100644
--- a/spot-oa/api/resources/dns.py
+++ b/spot-oa/api/resources/dns.py
@@ -153,7 +153,14 @@ def  score_connection(date,ip="", dns="", ip_sev=0, dns_sev=0):
     connections = ImpalaEngine.execute_query(sq_query + connections_filter)
 
     # add score to connections
+
+    insert_command = ("""INSERT INTO {0}.dns_threat_investigation
+                        PARTITION (y={1},m={2},d={3})
+                        VALUES (""") \
+                        .format(db,date.year,date.month,date.day)
+
     fb_data =  []
+    first = True
     for row in connections:
         # insert into dns_threat_investigation.
         threat_data = (row[1],row[3],row[4],ip_sev if ip == row[3] else 0,\
@@ -163,13 +170,11 @@ def  score_connection(date,ip="", dns="", ip_sev=0, dns_sev=0):
         row[8],row[9],row[10],row[11],ip_sev,dns_sev,row[12],row[13],row[14],\
         row[15],row[1]])
 
-        insert_command = ("""
-            INSERT INTO {0}.dns_threat_investigation
-            PARTITION (y={1},m={2},d={3})
-            VALUES {4}
-            """).format(db,date.year,date.month,date.day,threat_data)
+        insert_command += "{0}{1}".format("," if not first else "", threat_data)
+        first = False
 
-        ImpalaEngine.execute_query(insert_command)
+    insert_command += ")"
+    ImpalaEngine.execute_query(insert_command)
 
     # create feedback file.
     app_path = Configuration.spot()
@@ -324,7 +329,7 @@ def  save_comments(ip,query,title,text,date):
 
 """
 --------------------------------------------------------------------------
-Return a list(dict) with all the data ingested during the timeframe 
+Return a list(dict) with all the data ingested during the timeframe
 provided.
 --------------------------------------------------------------------------
 """

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/a32d3ed3/spot-oa/api/resources/flow.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/flow.py b/spot-oa/api/resources/flow.py
index 211932c..14d7132 100755
--- a/spot-oa/api/resources/flow.py
+++ b/spot-oa/api/resources/flow.py
@@ -228,24 +228,27 @@ def score_connection(score,date,src_ip=None,dst_ip=None,src_port=None,dst_port=N
 
     connections_filter += " AND dport = {0}" \
     .format(str(dst_port)) if dst_port else ""
-
     connections = ImpalaEngine.execute_query(connections_query + connections_filter)
 
 
     # add score to connections
+    insert_command = ("""
+        INSERT INTO {0}.flow_threat_investigation
+        PARTITION (y={1},m={2},d={3})
+        VALUES (""")
+        .format(db,date.year,date.month,date.day)
+
     fb_data =  []
+    first = True
     for row in connections:
         # insert into flow_threat_investigation.
         threat_data = (row[0],row[1],row[2],row[3],row[4],score)
         fb_data.append([score,row[0],row[1],row[2],row[3],row[4],row[5],row[6]])
+        insert_command += "{0}{1}".format("," if not first else "", threat_data)
+        first = False
 
-        insert_command = ("""
-            INSERT INTO {0}.flow_threat_investigation
-            PARTITION (y={1},m={2},d={3})
-            VALUES {4}
-            """).format(db,date.year,date.month,date.day,threat_data)
-
-        ImpalaEngine.execute_query(insert_command)
+    insert_command += ")"
+    ImpalaEngine.execute_query(insert_command)
 
     # create feedback file.
     app_path = Configuration.spot()

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/a32d3ed3/spot-oa/api/resources/hdfs_client.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/hdfs_client.py b/spot-oa/api/resources/hdfs_client.py
index d47d16c..41d4214 100644
--- a/spot-oa/api/resources/hdfs_client.py
+++ b/spot-oa/api/resources/hdfs_client.py
@@ -1,5 +1,6 @@
 
 from hdfs import InsecureClient
+from hdfs.util import HdfsError
 from json import dump
 import api.resources.configurator as Config
 
@@ -39,8 +40,11 @@ def delete_folder(hdfs_file,user=None):
     client.delete(hdfs_file,recursive=True)
 
 def list_dir(hdfs_path):
-    client = _get_client()
-    return client.list(hdfs_path)
+    try:
+        client = _get_client()
+        return client.list(hdfs_path)
+    except HdfsError:
+        return {}
 
 def file_exists(hdfs_path,file_name):
     files = list_dir(hdfs_path)

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/a32d3ed3/spot-oa/api/resources/proxy.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/proxy.py b/spot-oa/api/resources/proxy.py
index cbfb368..5edafe2 100644
--- a/spot-oa/api/resources/proxy.py
+++ b/spot-oa/api/resources/proxy.py
@@ -89,8 +89,13 @@ def score_request(date,score,uri):
     connections = ImpalaEngine.execute_query(p_query)
 
     # add score to connections
-    fb_data =  []
+    insert_command = ("""
+		INSERT INTO {0}.proxy_threat_investigation PARTITION (y={1},m={2},d={3})
+		VALUES (""")
+        .format(db,date.year,date.month,date.day)
 
+    fb_data =  []
+    first = True
     for row in connections:
         cip_index = row[2]
         uri_index = row[18]
@@ -103,11 +108,10 @@ def score_request(date,score,uri):
 			,row[8],row[9],row[10],row[11],row[12],row[13],row[14],row[15] \
 			,row[16],row[17],row[18],row[19],score,row[20],row[21],row[22], \
 			row[23],hash_field])
+        insert_command += "{0}{1}".format("," if not first else "", threat_data)
+        first = False
 
-	insert_command = ("""
-		INSERT INTO {0}.proxy_threat_investigation PARTITION (y={1},m={2},d={3})
-		VALUES {4}
-		""").format(db,date.year,date.month,date.day,threat_data)
+    insert_command += ")"
 	ImpalaEngine.execute_query(insert_command)
 
     # create feedback file.


[46/50] [abbrv] incubator-spot git commit: CSV Removal documentation update for proxy notebooks

Posted by ev...@apache.org.
CSV Removal documentation update for proxy notebooks


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/dbb5174d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/dbb5174d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/dbb5174d

Branch: refs/heads/SPOT-35_graphql_api
Commit: dbb5174dfefd97236781e214d539443821821ad0
Parents: 363c02d
Author: LedaLima <le...@apache.org>
Authored: Mon Mar 13 12:19:18 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:51:23 2017 -0700

----------------------------------------------------------------------
 .../oa/proxy/ipynb_templates/EdgeNotebook.md    | 53 +++++--------
 .../ipynb_templates/ThreatInvestigation.md      | 78 ++++----------------
 2 files changed, 33 insertions(+), 98 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/dbb5174d/spot-oa/oa/proxy/ipynb_templates/EdgeNotebook.md
----------------------------------------------------------------------
diff --git a/spot-oa/oa/proxy/ipynb_templates/EdgeNotebook.md b/spot-oa/oa/proxy/ipynb_templates/EdgeNotebook.md
index 2f9472c..d75ec3f 100644
--- a/spot-oa/oa/proxy/ipynb_templates/EdgeNotebook.md
+++ b/spot-oa/oa/proxy/ipynb_templates/EdgeNotebook.md
@@ -19,56 +19,37 @@ The following python modules will be imported for the notebook to work correctly
 
 
 ###Pre-requisites
-- Execution of the spot-oa process for Proxy
+- Execute hdfs_setup.sh script to create OA tables and setup permissions
 - Correct setup the spot.conf file [Read more](/wiki/Edit%20Solution%20Configuration)
-- Have a public key created between the current UI node and the ML node. [Read more](/wiki/Configure%20User%20Accounts#configure-user-accounts)
-
-
-###Data
-The whole process in this notebook depends entirely on the existence of `proxy_scores.tsv` file, which is generated at the OA process.  
-The data is directly manipulated on the .tsv files, so a `proxy_scores_bu.tsv` is created as a backup to allow the user to restore the original data at any point, 
-and this can be performed executing the last cell on the notebook with the following command.
-
-        !cp $sconnectbu $sconnect
-
-
-**Input files**
-All these paths should be relative to the main OA path.    
-Schema for these files can be found [here](/spot-oa/oa/proxy)
+- Execution of the spot-oa process for Proxy
+- Correct installation of the UI [Read more](/ui/INSTALL.md)
 
-        data/proxy/<date>/proxy_scores.tsv  
-        data/proxy/<date>/proxy_scores_bu.tsv
 
-**Temporary Files**
+###Data source 
+The whole process in this notebook depends entirely on the existence of `proxy_scores` table in the database, which is generated at the OA process.  
+The data is manipulated through the graphql api also included in the repository.
 
-        data/proxy/<date>/proxy_scores_tmp.tsv
+**Input**  
+The data to be processed should be stored in the following tables:
 
-**Output files**
+        proxy_scores
+        proxy
 
-        data/proxy/<date>/proxy_scores.tsv (Updated with severity values)
-        data/proxy/<date>/proxy_scores_fb.csv (File with scored connections that will be used for ML feedback)
+**Output**
+The following tables will be populated after the scoring process:
+        proxy_threat_investigation
 
 
 ###Functions
 **Widget configuration**
 This is not a function, but more like global code to set up styles and widgets to format the output of the notebook. 
 
-`data_loader():` - This function loads the source file into a csv dictionary reader to create a list with all disctinct full_uri values. 
+`data_loader():` - - This function calls the graphql api query *suspicious* to list all suspicious unscored connections.
   
 `fill_list(list_control,source):` - This function loads the given dictionary into a listbox and appends an empty item at the top with the value '--Select--' (Just for design sake)
    
 ` assign_score(b):` - This event is executed when the user clicks the 'Score' button. 
-If the 'Quick scoring' textbox is not empty, the notebook will read that value and ignore any selection made in the listbox, otherwise the sought value will be obtained from the listbox.
-A linear search will be performed in the `proxy_scores.tsv` file to find all `full_uri` values matching the sought .
-In every matching row found, the `uri_sev` value will be updated according to the 'Rating' value selected in the radio button list. 
-All of the rows will then be appended to the `proxy_scores_tmp.tsv` file. At the end of this process, this file will replace the original `proxy_scores.tsv`.  
-
-Only the scored rows will also be appended to the `proxy_scores_fb.csv` file, which will later be used for the ML feedback.
-
-`save(b):` -This event is triggered by the 'Save' button, first it will remove the widget area and call the `load_data()` function to start the loading process again, this will 
-refresh the listbox removing all scored URIs.
-A javascript function is also executed to refresh the other panels in the suspicious connects page removing the need of a manual refresh.
-Afterwards the `ml_feedback()` function will be invoqued. 
+If the 'Quick scoring' textbox is not empty, the notebook will read that value and ignore any selection made in the listbox, otherwise the sought value will be obtained from the listbox and will append each value to a temporary list. 
 
-`ml_feedback():` - A shell script is executed, transferring thru secure copy the _proxy_scores_fb.csv_ file into ML Master node, where the destination path is defined at the spot.conf file.
-   
\ No newline at end of file
+`save(b):` -This event is triggered by the 'Save' button, first it will remove the widget area and call the `load_data()` function to start the loading process again, this will refresh the listbox removing all scored URIs.
+This function calls the *score* mutation which updates the score for the selected values in the database.
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/dbb5174d/spot-oa/oa/proxy/ipynb_templates/ThreatInvestigation.md
----------------------------------------------------------------------
diff --git a/spot-oa/oa/proxy/ipynb_templates/ThreatInvestigation.md b/spot-oa/oa/proxy/ipynb_templates/ThreatInvestigation.md
index 0681461..cb17619 100644
--- a/spot-oa/oa/proxy/ipynb_templates/ThreatInvestigation.md
+++ b/spot-oa/oa/proxy/ipynb_templates/ThreatInvestigation.md
@@ -22,7 +22,8 @@ The following python modules will have to be imported for the notebook to work c
 
 ##Pre-requisites  
 - Execution of the spot-oa process for Proxy
-- Score a set connections at the Edge Investigation Notebook
+- Correct installation of the UI [Read more](/ui/INSTALL.md)
+- Score a set connections at the Edge Investigation Notebook 
 - Correct setup of the spot.conf file. [Read more](/wiki/Edit%20Solution%20Configuration) 
 
 
@@ -30,62 +31,20 @@ The following python modules will have to be imported for the notebook to work c
 `top_results` - This value defines the number of rows that will be displayed onscreen after the expanded search. 
 This also affects the number of IPs that will appear in the Timeline chart.
 
-##Data
-The whole process in this notebook depends entirely on the existence of the scored _proxy_scores.tsv_ file, which is generated at the OA process, and scored at the Edge Investigation Notebook.
-
-**Input files**
-Schema for these files can be found [here](/spot-oa/oa/proxy)
-
-        ~/spot-oa/data/proxy/<date>/proxy_scores.tsv  
-
-**Output files**  
-- threats.csv : Pipe separated file containing the comments saved by the user. This file is updated every time the user adds comments for a new threat. 
-        
-        Schema with zero-indexed columns:
-        
-        0.hash: string
-        1.title: string
-        2.description: string
-
-- incident-progression-\<anchor hash>.json : Json file generated in base of the results from the expanded 
-search. This file includes a list of all requests performed to and from the URI under analysis, as well as the request methods used and the response content type. 
-These results are limited to the day under analysis. 
-this file will serve as datasource for the Incident Progression chart at the storyboard.
-        
-        Schema with zero-indexed columns:
-
-        {
-            'fulluri':<URI under investigation>, 
-            'requests': [{
-                'clientip':<client IP>,
-                'referer':<referer for the URI under analysis>,
-                'reqmethod':<method used to connect to the URI>,
-                'resconttype':<content type of the response>
-                }, ...
-                ],
-            'referer_for':[
-                         <List of unique URIs refered by the URI under investigation> 
-            ]
-        }
-
-- timeline-\<anchor hash>.tsv : Tab separated file, this file lists all the client IP's that connected to the URI under investigation, including: 
-the duration of the connection, response code and exact date and time of the connections.
-
-        Schema with zero-indexed columns:
-        
-        0.tstart: string
-        1.tend: string
-        2.duration: string
-        3.clientip: string
-        4.respcode: string
- 
-- es-\<anchor hash>.tsv : (Expanded Search). Tab separated file, this is formed with the results from the Expanded Search query. Includes all connections where the investigated URI matches the `referer` or the `full_uri` columns.  
+##Data source
+Data should exists in the following tables:
+        *proxy*
+        *proxy_threat_investigation*
 
 
-**HDFS tables consumed**
+**Output**  
+The following tables will be populated after the threat investigation process:
+        *proxy_storyboard*
+        *proxy_timeline*
 
-        proxy
+The following files will be created and stored in HDFS.
 
+        incident-progression-\<anchor hash>.json
 
 ##Functions  
 **Widget configuration**
@@ -94,9 +53,8 @@ This is not a function, but more like global code to set up styles and widgets t
 
 `start_investigation():` - This function cleans the notebook from previous executions, then calls the data_loader() function to obtain the data and afterwards displays the corresponding widgets
 
-`data_loader():` - This function loads the source _proxy_scores.tsv_ file into a csv dictionary reader to create a list with all disctinct `full_uri` values 
-where `uri_sev` = 1. This function will also read through the _threats.tsv_ file to discard all URIs that have already been investigated. 
-  
+`data_loader():` - This function lcalls the *threats* query to get the source and destination IP's previously scored as high risk to create a list with all disctinct `full_uri` values.
+
 `fill_list(list_control,source):` - This function populates a listbox widget with the given data list and appends an empty item at the top with the value '--Select--' (Just for visualization  sake)
 
 `display_controls():` - This function will only display the main widget box, containing:
@@ -106,9 +64,7 @@ where `uri_sev` = 1. This function will also read through the _threats.tsv_ file
 - Container for the "Top N results" HTML table
 
 `search_ip(b):` - This function is triggered by the _onclick_ event of the "Search" button.
-This will get the selected value from the listbox and perform a query to the _proxy_ table to retrieve all comunication involving the selected URI.
-Using MD5 algorythm, the URI will be hashed and use it in the name of the output files (anchor hash)
-The output of the query will automatically fill the es-/<anchor hash>.tsv file. 
+This calls the graphql *threat / details* query to find additional connections involving the selected full uri. 
 Afterwards it will read through the output file to display the HTML table, this will be limited to the value set in the _top_results_ variable. At the same time, four dictionaries will be filled:
 - clientips
 - reqmethods * 
@@ -119,8 +75,6 @@ Afterwards it will read through the output file to display the HTML table, this
 This function will also display the 'Threat summary' and 'title' textboxes, along with the 'Save' button.
 
 `save_threat_summary(b):` - This function is triggered by the _onclick_ event on the 'Save' button.
- This will take the contents of the form and create/update the _threats.csv_ file.
- 
-`file_is_empty(path):` - Performs a validation to check the file size to determine if it is empty.
+Removes the widgets and cleans the notebook from previous executions, removes the selected value from the listbox widget and executes the *createStoryboard* mutation to save the data for the storyboard.
  
 `removeWidget(index):` - Javascript function that removes a specific widget from the notebook. 
\ No newline at end of file


[21/50] [abbrv] incubator-spot git commit: fix indentation problems

Posted by ev...@apache.org.
fix indentation problems


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/2c3f3d3b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/2c3f3d3b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/2c3f3d3b

Branch: refs/heads/SPOT-35_graphql_api
Commit: 2c3f3d3b11443c5b1516fde576b30ec303aac806
Parents: a32d3ed
Author: Everardo Lopez Sandoval (Intel) <el...@elopezsa-mac02.zpn.intel.com>
Authored: Wed Mar 8 17:16:35 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:48 2017 -0700

----------------------------------------------------------------------
 spot-oa/api/resources/flow.py  | 2 +-
 spot-oa/api/resources/proxy.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/2c3f3d3b/spot-oa/api/resources/flow.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/flow.py b/spot-oa/api/resources/flow.py
index 14d7132..665d4cd 100755
--- a/spot-oa/api/resources/flow.py
+++ b/spot-oa/api/resources/flow.py
@@ -235,7 +235,7 @@ def score_connection(score,date,src_ip=None,dst_ip=None,src_port=None,dst_port=N
     insert_command = ("""
         INSERT INTO {0}.flow_threat_investigation
         PARTITION (y={1},m={2},d={3})
-        VALUES (""")
+        VALUES (""") \
         .format(db,date.year,date.month,date.day)
 
     fb_data =  []

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/2c3f3d3b/spot-oa/api/resources/proxy.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/proxy.py b/spot-oa/api/resources/proxy.py
index 5edafe2..49f78a1 100644
--- a/spot-oa/api/resources/proxy.py
+++ b/spot-oa/api/resources/proxy.py
@@ -91,7 +91,7 @@ def score_request(date,score,uri):
     # add score to connections
     insert_command = ("""
 		INSERT INTO {0}.proxy_threat_investigation PARTITION (y={1},m={2},d={3})
-		VALUES (""")
+		VALUES (""") \
         .format(db,date.year,date.month,date.day)
 
     fb_data =  []
@@ -112,7 +112,7 @@ def score_request(date,score,uri):
         first = False
 
     insert_command += ")"
-	ImpalaEngine.execute_query(insert_command)
+    ImpalaEngine.execute_query(insert_command)
 
     # create feedback file.
     app_path = Configuration.spot()


[12/50] [abbrv] incubator-spot git commit: Update dns.py

Posted by ev...@apache.org.
Update dns.py

Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/2c951e95
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/2c951e95
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/2c951e95

Branch: refs/heads/SPOT-35_graphql_api
Commit: 2c951e9581b06d013e17d1ef48a6a88c5a746565
Parents: 8f151f5
Author: Everardo Lopez Sandoval <ev...@intel.com>
Authored: Mon Mar 6 10:59:15 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:47 2017 -0700

----------------------------------------------------------------------
 spot-oa/api/resources/dns.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/2c951e95/spot-oa/api/resources/dns.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/dns.py b/spot-oa/api/resources/dns.py
index 8924470..ae2076b 100644
--- a/spot-oa/api/resources/dns.py
+++ b/spot-oa/api/resources/dns.py
@@ -321,8 +321,12 @@ def  save_comments(ip,query,title,text,date):
         ImpalaEngine.execute_query(insert_query)
 
     return True
+
 """
-Return a list(dict) with all the data ingested during the time frame provided.
+--------------------------------------------------------------------------
+Return a list(dict) with all the data ingested during the timeframe 
+provided.
+--------------------------------------------------------------------------
 """
 def ingest_summary(start_date,end_date):
 


[34/50] [abbrv] incubator-spot git commit: Improving GraphQL documentation

Posted by ev...@apache.org.
Improving GraphQL documentation


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/4734f4f6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/4734f4f6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/4734f4f6

Branch: refs/heads/SPOT-35_graphql_api
Commit: 4734f4f6f59e585a4e2d3429243c1e48355cec39
Parents: b165f5e
Author: Diego Ortiz Huerta <di...@intel.com>
Authored: Tue Mar 7 08:16:21 2017 -0800
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:48 2017 -0700

----------------------------------------------------------------------
 spot-oa/api/graphql/README.md          |  13 +++
 spot-oa/api/graphql/common.py          |   1 +
 spot-oa/api/graphql/dns/mutation.py    |  28 +++---
 spot-oa/api/graphql/dns/query.py       | 111 ++++++++++++-----------
 spot-oa/api/graphql/flow/mutation.py   |  38 ++++----
 spot-oa/api/graphql/flow/query.py      | 133 ++++++++++++++++++----------
 spot-oa/api/graphql/proxy/mutation.py  |  56 +++++++-----
 spot-oa/api/graphql/proxy/query.py     | 107 +++++++++++++---------
 spot-oa/api/graphql/schema.py          |   9 +-
 spot-oa/api/resources/configurator.pyc | Bin 1706 -> 0 bytes
 spot-oa/ui/README.md                   |   1 +
 11 files changed, 308 insertions(+), 189 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/4734f4f6/spot-oa/api/graphql/README.md
----------------------------------------------------------------------
diff --git a/spot-oa/api/graphql/README.md b/spot-oa/api/graphql/README.md
new file mode 100644
index 0000000..17fd7f6
--- /dev/null
+++ b/spot-oa/api/graphql/README.md
@@ -0,0 +1,13 @@
+# Apache Spot (incubating) - GraphQL API
+
+Provides an endpoint to send GraphQL queries to access and modify data.
+
+1. Install Spot OA. Follow this [guide](../../README.md#installation).
+2. Start Spot OA Web Server.
+    1. From spot-oa dir, run ./runIpython.sh
+3. Apache Spot will deploy a GraphQL endpoint under /graphql URL
+
+## Development mode and GraphiQL UI
+
+When Spot OA Web server is started in [development mode](../../ui/README.md#developmentdebugging-process), a GraphiQL UI will be enabled at /graphql URL. Use
+this tool to play with our GraphQL API and to explore the entities we have made available.

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/4734f4f6/spot-oa/api/graphql/common.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/graphql/common.py b/spot-oa/api/graphql/common.py
index 2c2e9bd..1e34674 100644
--- a/spot-oa/api/graphql/common.py
+++ b/spot-oa/api/graphql/common.py
@@ -112,6 +112,7 @@ def create_spot_node_type(name, extra_fields={}):
 
 IngestSummaryType = GraphQLObjectType(
     name='SpotIngestSummaryType',
+    description='Number of ingested records',
     fields={
         'datetime': GraphQLField(
             type=SpotDatetimeType,

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/4734f4f6/spot-oa/api/graphql/dns/mutation.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/graphql/dns/mutation.py b/spot-oa/api/graphql/dns/mutation.py
index de7395f..da93ba6 100644
--- a/spot-oa/api/graphql/dns/mutation.py
+++ b/spot-oa/api/graphql/dns/mutation.py
@@ -27,11 +27,11 @@ ScoreInputType = GraphQLInputObjectType(
         ),
         'dnsQuery': GraphQLInputObjectField(
             type=GraphQLString,
-            description='Dns query to score'
+            description='Dns query name to score'
         ),
         'clientIp': GraphQLInputObjectField(
             type=SpotIpType,
-            description='Client\'s ip to score'
+            description='Client IP to score'
         )
     }
 )
@@ -40,13 +40,16 @@ ThreatDetailsInputType = GraphQLInputObjectType(
     name='DnsThreatDetailsInputType',
     fields={
         'total': GraphQLInputObjectField(
-            type=GraphQLInt
+            type=GraphQLInt,
+            description='The number of time an IP sent a dns query'
         ),
         'dnsQuery': GraphQLInputObjectField(
-            type=GraphQLString
+            type=GraphQLString,
+            description='DNS query name'
         ),
         'clientIp': GraphQLInputObjectField(
-            type=SpotIpType
+            type=SpotIpType,
+            description='Client IP address'
         )
     }
 )
@@ -56,26 +59,27 @@ CreateStoryboardInputType = GraphQLInputObjectType(
     fields={
         'date': GraphQLInputObjectField(
             type=SpotDateType,
-            description='A reference date for the add comment process. Defaults to today'
+            description='A reference date for the storyboard being created. Defaults to today'
         ),
         'dnsQuery': GraphQLInputObjectField(
             type=GraphQLString,
-            description='Reference dns query for the comment'
+            description='Threat dns query name'
         ),
         'clientIp': GraphQLInputObjectField(
             type=SpotIpType,
-            description='Reference client ip for the comment'
+            description='Threat client IP'
         ),
         'title': GraphQLInputObjectField(
             type=GraphQLNonNull(GraphQLString),
-            description='A title for the comment'
+            description='Threat title'
         ),
         'text': GraphQLInputObjectField(
             type=GraphQLNonNull(GraphQLString),
-            description='A description text for the comment'
+            description='Threat title description'
         ),
         'threatDetails': GraphQLInputObjectField(
             type=GraphQLNonNull(GraphQLList(GraphQLNonNull(ThreatDetailsInputType))),
+            description='Threat details. See DnsThreatInformation.details'
         )
     }
 )
@@ -118,6 +122,7 @@ MutationType = GraphQLObjectType(
     fields={
         'score': GraphQLField(
             type=GraphQLList(SpotOperationOutputType),
+            description='Sets a score value to connections',
             args={
                 'input': GraphQLArgument(
                     type=GraphQLNonNull(GraphQLList(GraphQLNonNull(ScoreInputType))),
@@ -128,10 +133,11 @@ MutationType = GraphQLObjectType(
         ),
         'createStoryboard': GraphQLField(
             type=SpotOperationOutputType,
+            description='Request Spot to create an entry on storyboard for a particular threat',
             args={
                 'input': GraphQLArgument(
                     type=GraphQLNonNull(CreateStoryboardInputType),
-                    description='Generates every data needed to move a threat to the storyboard'
+                    description='Threat information'
                 )
             },
             resolver=lambda root, args, *_: _create_storyboard(args)

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/4734f4f6/spot-oa/api/graphql/dns/query.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/graphql/dns/query.py b/spot-oa/api/graphql/dns/query.py
index 7b49421..db55a13 100644
--- a/spot-oa/api/graphql/dns/query.py
+++ b/spot-oa/api/graphql/dns/query.py
@@ -20,7 +20,7 @@ SuspiciousType = GraphQLObjectType(
     fields={
         'frameTime': GraphQLField(
             type=SpotDatetimeType,
-            description='Date and time of the frame',
+            description='Frame time',
             resolver=lambda root, *_: datetime.utcfromtimestamp(int(root.get('unix_tstamp') or 0))
         ),
         'frameLength': GraphQLField(
@@ -30,32 +30,32 @@ SuspiciousType = GraphQLObjectType(
         ),
         'clientIp': GraphQLField(
             type=SpotIpType,
-            description='Client\'s ip',
+            description='Client\'s IP address',
             resolver=lambda root, *_: root.get('ip_dst')
         ),
         'dnsQuery': GraphQLField(
             type=GraphQLString,
-            description='Dns query sent by client',
+            description='DNS query name',
             resolver=lambda root, *_: root.get('dns_qry_name')
         ),
         'dnsQueryClass': GraphQLField(
             type=GraphQLInt,
-            description='Class of dns query sent by client',
+            description='DNS query class',
             resolver=lambda root, *_: int(root.get('dns_qry_class') or '0x0', 16)
         ),
         'dnsQueryType': GraphQLField(
             type=GraphQLInt,
-            description='Type of dns query send by client',
+            description='DNS query type',
             resolver=lambda root, *_: root.get('dns_qry_type') or 0
         ),
         'dnsQueryRcode': GraphQLField(
             type=GraphQLInt,
-            description='Return code sent to client',
+            description='DNS query response code',
             resolver=lambda root, *_: root.get('dns_qry_rcode') or 0
         ),
         'score': GraphQLField(
             type=GraphQLFloat,
-            description='Machine learning score value',
+            description='Spot ML score value',
             resolver=lambda root, *_: root.get('ml_score') or 0
         ),
         'tld': GraphQLField(
@@ -65,42 +65,42 @@ SuspiciousType = GraphQLObjectType(
         ),
         'dnsQueryRep': GraphQLField(
             type=GraphQLString,
-            description='Reputation of dns query',
+            description='DNS query name reputation metadata',
             resolver=lambda root, *_: root.get('query_rep')
         ),
         'clientIpSev': GraphQLField(
             type=GraphQLInt,
-            description='User\'s score value for client ip',
+            description='Client IP user risk score',
             resolver=lambda root, *_: root.get('ip_sev') or 0
         ),
         'dnsQuerySev': GraphQLField(
             type=GraphQLInt,
-            description='User\'s score value for dns query',
+            description='@deprecated DNS query name user risk score',
             resolver=lambda root, *_: root.get('dns_sev') or 0
         ),
         'dnsQueryClassLabel': GraphQLField(
             type=GraphQLString,
-            description='Human readable representation of dnsQueryClass value',
+            description='DNS query class name',
             resolver=lambda root, *_: root.get('dns_qry_class_name')
         ),
         'dnsQueryTypeLabel': GraphQLField(
             type=GraphQLString,
-            description='Human readable representation of dnsQueryType value',
+            description='DNS query type name',
             resolver=lambda root, *_: root.get('dns_qry_type_name')
         ),
         'dnsQueryRcodeLabel': GraphQLField(
             type=GraphQLString,
-            description='Human readable representation of dnsQueryRcode value',
+            description='DNS query response code name',
             resolver=lambda root, *_: root.get('dns_qry_rcode_name')
         ),
         'networkContext': GraphQLField(
             type=GraphQLString,
-            description='Network context for client ip',
+            description='@deprecated Network context for client ip',
             resolver=lambda root, *_: root.get('network_context')
         ),
         'unixTimestamp': GraphQLField(
             type=GraphQLInt,
-            description='Unix timestamp for this frame',
+            description='Frame unix timestamp',
             resolver=lambda root, *_: root.get('unix_tstamp') or 0
         )
     }
@@ -111,7 +111,7 @@ EdgeDetailsType = GraphQLObjectType(
     fields={
         'frameTime': GraphQLField(
             type=SpotDatetimeType,
-            description='Date and time of the frame',
+            description='Frame time',
             resolver=lambda root, *_: datetime.utcfromtimestamp(int(root.get('unix_tstamp') or 0))
         ),
         'frameLength': GraphQLField(
@@ -121,57 +121,57 @@ EdgeDetailsType = GraphQLObjectType(
         ),
         'clientIp': GraphQLField(
             type=SpotIpType,
-            description='Client\'s ip',
+            description='Client\'s IP address',
             resolver=lambda root, *_: root.get('ip_dst')
         ),
         'serverIp': GraphQLField(
             type=SpotIpType,
-            description='Dns server\'s ip',
+            description='DNS server IP address',
             resolver=lambda root, *_: root.get('ip_src')
         ),
         'dnsQuery': GraphQLField(
             type=GraphQLString,
-            description='Dns query sent by client',
+            description='DNS query name',
             resolver=lambda root, *_: root.get('dns_qry_name')
         ),
         'dnsQueryClass': GraphQLField(
             type=GraphQLInt,
-            description='Class of dns query sent by client',
+            description='DNS query class',
             resolver=lambda root, *_: int(root.get('dns_qry_class') or '0x0', 16)
         ),
         'dnsQueryType': GraphQLField(
             type=GraphQLInt,
-            description='Type of dns query send by client',
+            description='DNS query type',
             resolver=lambda root, *_: root.get('dns_qry_type') or 0
         ),
         'dnsQueryRcode': GraphQLField(
             type=GraphQLInt,
-            description='Return code sent to client',
+            description='DNS query response code',
             resolver=lambda root, *_: root.get('dns_qry_rcode') or 0
         ),
         'dnsQueryClassLabel': GraphQLField(
             type=GraphQLString,
-            description='Human readable representation of dnsQueryClass value',
+            description='DNS query class name',
             resolver=lambda root, *_: root.get('dns_qry_class_name')
         ),
         'dnsQueryTypeLabel': GraphQLField(
             type=GraphQLString,
-            description='Human readable representation of dnsQueryType value',
+            description='DNS query type name',
             resolver=lambda root, *_: root.get('dns_qry_type_name')
         ),
         'dnsQueryRcodeLabel': GraphQLField(
             type=GraphQLString,
-            description='Human readable representation of dnsQueryRcode value',
+            description='DNS query response code name',
             resolver=lambda root, *_: root.get('dns_qry_rcode_name')
         ),
         'dnsQueryAnswers': GraphQLField(
             type=GraphQLList(GraphQLString),
-            description='Dns server\'s answers to query sent by client',
+            description='DNS answers',
             resolver=lambda root, *_: root.get('dns_a', '').split('|')
         ),
         'unixTimestamp': GraphQLField(
             type=GraphQLInt,
-            description='Unix timestamp for this frame',
+            description='Frame unix timestamp',
             resolver=lambda root, *_: root.get('unix_tstamp') or 0
         )
     }
@@ -182,17 +182,17 @@ ThreatDetailsType = GraphQLObjectType(
     fields={
         'total': GraphQLField(
             type=GraphQLInt,
-            description='Total threats',
+            description='The number of time an IP sent a dns query',
             resolver=lambda root, *_: root.get('total')
         ),
         'dnsQuery': GraphQLField(
             type=GraphQLString,
-            description='Dns Threats',
+            description='DNS query name',
             resolver=lambda root, *_: root.get('dns_qry_name')
         ),
         'clientIp': GraphQLField(
             type=SpotIpType,
-            description='Ip Threats',
+            description='Client IP address',
             resolver=lambda root, *_: root.get('ip_dst')
         )
     }
@@ -203,12 +203,12 @@ IpDetailsType = GraphQLObjectType(
     fields={
         'dnsQuery': GraphQLField(
             type=GraphQLString,
-            description='',
+            description='DNS query name',
             resolver=lambda root, *_: root.get('dns_qry_name')
         ),
         'dnsQueryAnswers': GraphQLField(
             type=GraphQLList(GraphQLString),
-            description='Dns server\'s answers to query sent by client',
+            description='DNS answers',
             resolver=lambda root, *_: root.get('dns_a', '').split('|')
         )
     }
@@ -217,27 +217,29 @@ IpDetailsType = GraphQLObjectType(
 ScoredThreatType = GraphQLObjectType(
     name='DnsScoredThreatType',
     fields={
-        'frameTime': GraphQLField(
+        'datetime': GraphQLField(
             type=SpotDatetimeType,
             description='Date and time of user score',
             resolver=lambda root, *_: datetime.utcfromtimestamp(int(root.get('unix_tstamp') or 0))
         ),
         'dnsQuery': GraphQLField(
             type=GraphQLString,
-            description='A dns query that has been scored as high risk (1)',
+            description='DNS query name',
             resolver=lambda root, *_: root.get('dns_qry_name')
         ),
         'clientIp': GraphQLField(
             type=SpotIpType,
-            description='A client ip that has been scored as high risk (1)',
+            description='Client\'s IP address',
             resolver=lambda root, *_: root.get('ip_dst')
         ),
         'dnsScore': GraphQLField(
             type=GraphQLInt,
+            description='DNS query name risk score value. 1->High, 2->Medium, 3->Low',
             resolver=lambda root, *_: root.get('dns_sev') or 0
         ),
         'clientIpScore': GraphQLField(
             type=GraphQLInt,
+            description='Client IP address risk score value. 1->High, 2->Medium, 3->Low',
             resolver=lambda root, *_: root.get('ip_sev') or 0
         )
     }
@@ -258,16 +260,17 @@ QueryCommentType = GraphQLObjectType(
     fields={
         'dnsQuery': GraphQLField(
             type=GraphQLString,
+            description='High risk DNS query name',
             resolver=lambda root, *_: root.get('dns_threat')
         ),
         'title': GraphQLField(
             type=GraphQLString,
-            description='A title for the comment',
+            description='Threat title',
             resolver=lambda root, *_: root.get('title')
         ),
         'text': GraphQLField(
             type=GraphQLString,
-            description='A title for the comment',
+            description='Threat description',
             resolver=lambda root, *_: root.get('text')
         )
     }
@@ -279,14 +282,17 @@ ClientIpCommentType = GraphQLObjectType(
     fields={
         'clientIp': GraphQLField(
             type=SpotIpType,
+            description='High risk client IP address',
             resolver=lambda root, *_: root.get('ip_threat')
         ),
         'title': GraphQLField(
             type=GraphQLString,
+            description='Threat title',
             resolver=lambda root, *_: root.get('title')
         ),
         'text': GraphQLField(
             type=GraphQLString,
+            description='Threat description',
             resolver=lambda root, *_: root.get('text')
         )
     }
@@ -297,7 +303,7 @@ ThreatsInformationType = GraphQLObjectType(
     fields={
         'list': GraphQLField(
             type=GraphQLList(ScoredThreatType),
-            description='List of dns queries or client ips that have been scored as high risk (1)',
+            description='List of DNS query names or client IPs that have been scored',
             args={
                 'date': GraphQLArgument(
                     type=SpotDateType,
@@ -340,6 +346,7 @@ IncidentProgressionClientIpType = GraphQLObjectType(
         ),
         'clientIp': GraphQLField(
             type=SpotIpType,
+            description='Client\'s IP address',
             resolver=lambda root, *_: root.get('ip_dst')
         )
     }
@@ -355,6 +362,7 @@ IncidentProgressionQueryType = GraphQLObjectType(
         ),
         'dnsQuery': GraphQLField(
             type=GraphQLString,
+            description='DNS query name',
             resolver=lambda root, *_: root.get('dns_qry_name')
         )
     }
@@ -365,7 +373,7 @@ ThreatInformationType = GraphQLObjectType(
     fields={
         'incidentProgression': GraphQLField(
             type=GraphQLList(IncidentProgressionInterface),
-            description='Incident progression information',
+            description='Details the type of connections that conform the activity related to the threat',
             args={
                 'date': GraphQLArgument(
                     type=SpotDateType,
@@ -392,14 +400,15 @@ ThreatInformationType = GraphQLObjectType(
                 ),
                 'dnsQuery': GraphQLArgument(
                     type=GraphQLString,
-                    description='DNS query of interest'
+                    description='DNS query name of interest'
                 ),
                 'clientIp': GraphQLArgument(
                     type=SpotIpType,
-                    description='Ip of interest'
+                    description='Client IP address of interest'
                 ),
                 'first': GraphQLArgument(
-                    type=GraphQLInt
+                    type=GraphQLInt,
+                    description='The number of records to return'
                 )
             },
             resolver=lambda root, args, *_: Dns.expanded_search(
@@ -416,26 +425,26 @@ QueryType = GraphQLObjectType(
     fields={
         'suspicious': GraphQLField(
             type=GraphQLList(SuspiciousType),
-            description='Suspicious dns queries',
+            description='Suspicious DNS query names',
             args={
                 'date': GraphQLArgument(
                     type=SpotDateType,
-                    description='A date to use as a reference for suspicous connections. Defaults to today'
+                    description='A date to use as a reference for suspicious connections. Defaults to today'
                 ),
                 'clientIp': GraphQLArgument(
                     type=SpotIpType,
-                    description='Ip of interest'
+                    description='Client IP of interest'
                 ),
                 'dnsQuery': GraphQLArgument(
                     type=GraphQLString,
-                    description='Partial query of interest'
+                    description='Partial query name of interest'
                 )
             },
             resolver=lambda root, args, *_: Dns.suspicious_queries(date=args.get('date', date.today()), ip=args.get('clientIp'), query=args.get('dnsQuery'))
         ),
         'edgeDetails': GraphQLField(
             type=GraphQLList(EdgeDetailsType),
-            description='Dns queries between client and dns server around a particular moment in time',
+            description='DNS queries between client and DNS server around a particular moment in time',
             args={
                 'frameTime': GraphQLArgument(
                     type=GraphQLNonNull(SpotDatetimeType),
@@ -443,14 +452,14 @@ QueryType = GraphQLObjectType(
                 ),
                 'dnsQuery': GraphQLArgument(
                     type=GraphQLNonNull(GraphQLString),
-                    description='Dns query of interest'
+                    description='DNS query name of interest'
                 )
             },
             resolver=lambda root, args, *_: Dns.details(frame_time=args.get('frameTime'), query=args.get('dnsQuery'))
         ),
         'ipDetails': GraphQLField(
             type=GraphQLList(IpDetailsType),
-            description='Queries made by client',
+            description='Query names made by client',
             args={
                 'date': GraphQLArgument(
                     type=SpotDateType,
@@ -458,7 +467,7 @@ QueryType = GraphQLObjectType(
                 ),
                 'clientIp': GraphQLArgument(
                     type=GraphQLNonNull(SpotIpType),
-                    description='Client\'s ip'
+                    description='Client\'s IP address'
                 )
             },
             resolver=lambda root, args, *_: Dns.client_details(date=args.get('date', date.today()), ip=args.get('clientIp'))
@@ -475,7 +484,7 @@ QueryType = GraphQLObjectType(
         ),
         'ingestSummary': GraphQLField(
             type=GraphQLList(IngestSummaryType),
-            description='Total of ingested dns queries',
+            description='Summary of ingested DNS records in range',
             args={
                 'startDate': GraphQLArgument(
                     type=GraphQLNonNull(SpotDateType),

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/4734f4f6/spot-oa/api/graphql/flow/mutation.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/graphql/flow/mutation.py b/spot-oa/api/graphql/flow/mutation.py
index d4f5d7f..7650f7e 100644
--- a/spot-oa/api/graphql/flow/mutation.py
+++ b/spot-oa/api/graphql/flow/mutation.py
@@ -28,19 +28,19 @@ ScoreInputType = GraphQLInputObjectType(
         ),
         'srcIp': GraphQLInputObjectField(
             type=SpotIpType,
-            description='Source ip'
+            description='Source IP to score'
         ),
         'dstIp': GraphQLInputObjectField(
             type=SpotIpType,
-            description='Destination ip'
+            description='Destination IP to score'
         ),
         'srcPort': GraphQLInputObjectField(
             type=GraphQLInt,
-            description='Source port'
+            description='Source port to score'
         ),
         'dstPort': GraphQLInputObjectField(
             type=GraphQLInt,
-            description='Destination port'
+            description='Destination port to score'
         )
     }
 )
@@ -50,19 +50,19 @@ ThreatDetailsInputType = GraphQLInputObjectType(
     fields={
         'firstSeen': GraphQLInputObjectField(
             type=SpotDatetimeType,
-            description='First time two ips were seen on one day data of network traffic'
+            description='First time two IPs were seen on a particular day of flow traffic data'
         ),
         'lastSeen': GraphQLInputObjectField(
             type=SpotDatetimeType,
-            description='Last time two ips were seen on one day data of network trafic'
+            description='Last time two IPs were seen on a particular day of flow traffic data'
         ),
         'srcIp': GraphQLInputObjectField(
             type=SpotIpType,
-            description='Source ip'
+            description='Source IP address'
         ),
         'dstIp': GraphQLInputObjectField(
             type=SpotIpType,
-            description='Destination ip'
+            description='Destination IP address'
         ),
         'srcPort': GraphQLInputObjectField(
             type=GraphQLInt,
@@ -74,7 +74,7 @@ ThreatDetailsInputType = GraphQLInputObjectType(
         ),
         'connections': GraphQLInputObjectField(
             type=GraphQLInt,
-            description='Number of connections on one day of network traffic'
+            description='Number of connections on a particular day of flow traffic data'
         ),
         'maxPkts': GraphQLInputObjectField(
             type=GraphQLInt,
@@ -82,7 +82,7 @@ ThreatDetailsInputType = GraphQLInputObjectType(
         ),
         'avgPkts': GraphQLInputObjectField(
             type=GraphQLInt,
-            description='Average number of packets transferred bwteen ips'
+            description='Average number of packets transferred bwteen IPs'
         ),
         'maxBytes': GraphQLInputObjectField(
             type=GraphQLInt,
@@ -90,7 +90,7 @@ ThreatDetailsInputType = GraphQLInputObjectType(
         ),
         'avgBytes': GraphQLInputObjectField(
             type=GraphQLInt,
-            description='Average number of bytes transferred bwteen ips'
+            description='Average number of bytes transferred bwteen IPs'
         )
     }
 )
@@ -100,25 +100,27 @@ CreateStoryboardInputType = GraphQLInputObjectType(
     fields={
         'date': GraphQLInputObjectField(
             type=SpotDateType,
-            description='A reference date for the add comment process. Defaults to today'
+            description='A reference date for the storyboard being created. Defaults to today'
         ),
         'ip': GraphQLInputObjectField(
             type=GraphQLNonNull(SpotIpType),
-            description='Reference IP for the comment'
+            description='High risk IP address'
         ),
         'title': GraphQLInputObjectField(
             type=GraphQLNonNull(GraphQLString),
-            description='A title for the comment'
+            description='Threat title'
         ),
         'text': GraphQLInputObjectField(
             type=GraphQLNonNull(GraphQLString),
-            description='A description text for the comment'
+            description='Threat title description'
         ),
         'threatDetails': GraphQLInputObjectField(
             type=GraphQLNonNull(GraphQLList(ThreatDetailsInputType)),
+            description='Threat details. See NetflowThreatInformation.details'
         ),
         'first': GraphQLInputObjectField(
-            type=GraphQLInt
+            type=GraphQLInt,
+            description='The number of records to return'
         )
     }
 )
@@ -156,6 +158,7 @@ MutationType = GraphQLObjectType(
     fields={
         'score': GraphQLField(
             type=GraphQLList(SpotOperationOutputType),
+            description='Sets a score value to connections',
             args={
                 'input': GraphQLArgument(
                     type=GraphQLNonNull(GraphQLList(GraphQLNonNull(ScoreInputType))),
@@ -166,10 +169,11 @@ MutationType = GraphQLObjectType(
         ),
         'createStoryboard': GraphQLField(
             type=SpotOperationOutputType,
+            description='Request Spot to create an entry on storyboard for a particular threat',
             args={
                 'input': GraphQLArgument(
                     type=GraphQLNonNull(CreateStoryboardInputType),
-                    description='Generates every data needed to move a threat to the storyboard'
+                    description='Threat information'
                 )
             },
             resolver=lambda root, args, *_: _create_storyboard(args)

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/4734f4f6/spot-oa/api/graphql/flow/query.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/graphql/flow/query.py b/spot-oa/api/graphql/flow/query.py
index 138ee98..3806de9 100644
--- a/spot-oa/api/graphql/flow/query.py
+++ b/spot-oa/api/graphql/flow/query.py
@@ -19,82 +19,102 @@ SuspiciousType = GraphQLObjectType(
     fields={
         'tstart': GraphQLField(
             type=SpotDatetimeType,
+            description='Time the flow was received by the flow collector',
             resolver=lambda root, *_: root.get('tstart')
         ),
         'srcIp': GraphQLField(
             type=GraphQLString,
+            description='Source IP address',
             resolver=lambda root, *_: root.get('srcip')
         ),
         'dstIp': GraphQLField(
             type=GraphQLString,
+            description='Destination IP address',
             resolver=lambda root, *_: root.get('dstip')
         ),
         'srcPort': GraphQLField(
             type=GraphQLInt,
+            description='Source port',
             resolver=lambda root, *_: root.get('sport') or 0
         ),
         'dstPort': GraphQLField(
             type=GraphQLInt,
+            description='Destination port',
             resolver=lambda root, *_: root.get('dport') or 0
         ),
         'protocol': GraphQLField(
             type=GraphQLString,
+            description='IP protocol',
             resolver=lambda root, *_: root.get('proto')
         ),
         'inPkts': GraphQLField(
             type=GraphQLInt,
+            description='Input packets',
             resolver=lambda root, *_: root.get('ipkt') or 0
         ),
         'inBytes': GraphQLField(
             type=GraphQLInt,
+            description='Input bytes',
             resolver=lambda root, *_: root.get('ibyt') or 0
         ),
         'outPkts': GraphQLField(
             type=GraphQLInt,
+            description='Output packets',
             resolver=lambda root, *_: root.get('opkt') or 0
         ),
         'outBytes': GraphQLField(
             type=GraphQLInt,
+            description='Output bytes',
             resolver=lambda root, *_: root.get('obyt') or 0
         ),
         'score': GraphQLField(
             type=GraphQLFloat,
+            description='Spot ML score',
             resolver=lambda root, *_: root.get('ml_score') or 0
         ),
         'rank': GraphQLField(
             type=GraphQLInt,
+            description='Spot ML rank',
             resolver=lambda root, *_: root.get('rank') or 0
         ),
         'srcIp_isInternal': GraphQLField(
             type=GraphQLBoolean,
+            description='Internal source IP address context flag',
             resolver=lambda root, *_: root.get('srcip_internal') == '1'
         ),
         'dstIp_isInternal': GraphQLField(
             type=GraphQLBoolean,
+            description='Internal destionation IP address context flag',
             resolver=lambda root, *_: root.get('dstip_internal') == '1'
         ),
         'srcIp_geoloc': GraphQLField(
             type=GraphQLString,
+            description='Source IP geolocation',
             resolver=lambda root, *_: root.get('src_geoloc')
         ),
         'dstIp_geoloc': GraphQLField(
             type=GraphQLString,
+            description='Destination IP geolocation',
             resolver=lambda root, *_: root.get('dst_geoloc')
         ),
         'srcIp_domain': GraphQLField(
             type=GraphQLString,
+            description='Source IP domain',
             resolver=lambda root, *_: root.get('src_domain')
         ),
         'dstIp_domain': GraphQLField(
             type=GraphQLString,
+            description='Destination IP domain',
             resolver=lambda root, *_: root.get('dst_domain')
         ),
         'srcIp_rep': GraphQLField(
             type=GraphQLString,
+            description='Source IP reputation metadata',
             resolver=lambda root, *_: root.get('src_rep')
         ),
         'dstIp_rep': GraphQLField(
             type=GraphQLString,
+            description='Destination IP reputation metadata',
             resolver=lambda root, *_: root.get('dst_rep')
         )
     }
@@ -105,62 +125,77 @@ EdgeDetailsType = GraphQLObjectType(
     fields={
         'tstart': GraphQLField(
             type=SpotDatetimeType,
+            description='Time the flow was received by the flow collector',
             resolver=lambda root, *_: root.get('tstart')
         ),
         'srcIp': GraphQLField(
             type=GraphQLString,
+            description='Source IP address',
             resolver=lambda root, *_: root.get('srcip')
         ),
         'dstIp': GraphQLField(
             type=GraphQLString,
+            description='Destination IP address',
             resolver=lambda root, *_: root.get('dstip')
         ),
         'srcPort': GraphQLField(
             type=GraphQLString,
+            description='Source port',
             resolver=lambda root, *_: root.get('sport')
         ),
         'dstPort': GraphQLField(
             type=GraphQLString,
+            description='Destination port',
             resolver=lambda root, *_: root.get('dport')
         ),
         'protocol': GraphQLField(
             type=GraphQLString,
+            description='IP protocol',
             resolver=lambda root, *_: root.get('proto')
         ),
         'flags': GraphQLField(
             type=GraphQLString,
+            description='TCP flags',
             resolver=lambda root, *_: root.get('flags')
         ),
         'tos': GraphQLField(
             type=GraphQLString,
+            description='DSCP value',
             resolver=lambda root, *_: root.get('tos')
         ),
         'inBytes': GraphQLField(
             type=GraphQLInt,
+            description='Input bytes',
             resolver=lambda root, *_: root.get('ibyt') or 0
         ),
         'inPkts': GraphQLField(
             type=GraphQLInt,
+            description='Input packets',
             resolver=lambda root, *_: root.get('ipkt') or 0
         ),
         'inIface': GraphQLField(
             type=GraphQLString,
+            description='SNMP input interface id index',
             resolver=lambda root, *_: root.get('input')
         ),
         'outIface': GraphQLField(
             type=GraphQLString,
+            description='SNMP output interface id index',
             resolver=lambda root, *_: root.get('output')
         ),
         'routerIp': GraphQLField(
             type=GraphQLString,
+            description='Reporting router IP address',
             resolver=lambda root, *_: root.get('rip')
         ),
         'outBytes': GraphQLField(
             type=GraphQLInt,
+            description='Output bytes',
             resolver=lambda root, *_: root.get('obyt') or 0
         ),
         'outPkts': GraphQLField(
             type=GraphQLInt,
+            description='Output packets',
             resolver=lambda root, *_: root.get('opkt') or 0
         )
     }
@@ -171,18 +206,22 @@ IpConnectionDetailsType = GraphQLObjectType(
     fields={
         'srcIp': GraphQLField(
             type=GraphQLString,
+            description='Source IP address',
             resolver=lambda root, *_: root.get('srcip')
         ),
         'dstIp': GraphQLField(
             type=GraphQLString,
+            description='Destination IP address',
             resolver=lambda root, *_: root.get('dstip')
         ),
         'inBytes': GraphQLField(
             type=GraphQLInt,
+            description='Input bytes',
             resolver=lambda root, *_: root.get('ibyt') or 0
         ),
         'inPkts': GraphQLField(
             type=GraphQLInt,
+            description='Input packets',
             resolver=lambda root, *_: root.get('ipkt') or 0
         )
     }
@@ -193,11 +232,12 @@ ScoredConnectionType = GraphQLObjectType(
     fields={
         'tstart': GraphQLField(
             type=SpotDatetimeType,
+            description='Time the flow was received by the flow collector',
             resolver=lambda root, *_: root.get('tstart')
         ),
         'srcIp': GraphQLField(
             type=SpotIpType,
-            description='Source Ip',
+            description='Source IP address',
             resolver=lambda root, *_: root.get('srcip')
         ),
         'srcPort': GraphQLField(
@@ -207,7 +247,7 @@ ScoredConnectionType = GraphQLObjectType(
         ),
         'dstIp': GraphQLField(
             type=SpotIpType,
-            description='Destination Ip',
+            description='Destination IP address',
             resolver=lambda root, *_: root.get('dstip')
         ),
         'dstPort': GraphQLField(
@@ -217,7 +257,7 @@ ScoredConnectionType = GraphQLObjectType(
         ),
         'score': GraphQLField(
             type=GraphQLInt,
-            description='Score value. 1->High, 2->Medium, 3->Low',
+            description='Risk score value. 1->High, 2->Medium, 3->Low',
             resolver=lambda root, *_: root.get('score') or 0
         )
     }
@@ -228,22 +268,22 @@ ThreatDetailsType = GraphQLObjectType(
     fields={
         'firstSeen': GraphQLField(
             type=SpotDatetimeType,
-            description='First time two ips were seen on one day data of network traffic',
+            description='First time two IPs were seen on a particular day of flow traffic data',
             resolver=lambda root, *_: root.get('firstseen')
         ),
         'lastSeen': GraphQLField(
             type=SpotDatetimeType,
-            description='Last time two ips were seen on one day data of network trafic',
+            description='Last time two IPs were seen on a particular day of flow traffic data',
             resolver=lambda root, *_: root.get('lastseen')
         ),
         'srcIp': GraphQLField(
             type=SpotIpType,
-            description='Source ip',
+            description='Source IP address',
             resolver=lambda root, *_: root.get('srcip')
         ),
         'dstIp': GraphQLField(
             type=SpotIpType,
-            description='Destination ip',
+            description='Destination IP address',
             resolver=lambda root, *_: root.get('dstip')
         ),
         'srcPort': GraphQLField(
@@ -258,7 +298,7 @@ ThreatDetailsType = GraphQLObjectType(
         ),
         'connections': GraphQLField(
             type=GraphQLInt,
-            description='Number of connections on one day of network traffic',
+            description='Number of connections on a particular day of flow traffic data',
             resolver=lambda root, *_: root.get('conns')
         ),
         'maxPkts': GraphQLField(
@@ -268,7 +308,7 @@ ThreatDetailsType = GraphQLObjectType(
         ),
         'avgPkts': GraphQLField(
             type=GraphQLInt,
-            description='Average number of packets transferred bwteen ips',
+            description='Average number of packets transferred bwteen IPs',
             resolver=lambda root, *_: root.get('avgpkts')
         ),
         'maxBytes': GraphQLField(
@@ -278,7 +318,7 @@ ThreatDetailsType = GraphQLObjectType(
         ),
         'avgBytes': GraphQLField(
             type=GraphQLInt,
-            description='Average number of bytes transferred bwteen ips',
+            description='Average number of bytes transferred bwteen IPs',
             resolver=lambda root, *_: root.get('avgbyts')
         )
     }
@@ -289,14 +329,17 @@ CommentType = GraphQLObjectType(
     fields={
         'ip': GraphQLField(
             type=SpotIpType,
+            description='High risk IP address',
             resolver=lambda root, *_: root.get('ip_threat')
         ),
         'title': GraphQLField(
             type=GraphQLString,
+            description='Threat title',
             resolver=lambda root, *_: root.get('title')
         ),
         'text': GraphQLField(
             type=GraphQLString,
+            description='Threat description',
             resolver=lambda root, *_: root.get('text')
         )
     }
@@ -307,11 +350,11 @@ ThreatsInformationType = GraphQLObjectType(
     fields={
         'list': GraphQLField(
             type=GraphQLList(ScoredConnectionType),
-            description='List of connections that have been scored',
+            description='List of suspicious IPs that have been scored',
             args={
                 'date': GraphQLArgument(
                     type=SpotDateType,
-                    description='A date to use as reference to retrieve the list of scored connections. Defaults to today'
+                    description='A date to use as reference to retrieve the list of scored IPs. Defaults to today'
                 )
             },
             resolver=lambda root, args, *
@@ -338,7 +381,7 @@ IncidentProgressionNodeType = create_spot_node_type(
 ImpactAnalysisNodeType = create_spot_node_type('NetflowImpactAnalysisNodeType', {
     'size': GraphQLField(
         type=GraphQLInt,
-        description='Node size',
+        description='Number of inbound, outbound and two-way connections',
         resolver=lambda root, *_: root.get('size') or 0
     )
 })
@@ -348,7 +391,7 @@ MapViewGeometryType = GraphQLObjectType(
     fields={
         'coordinates': GraphQLField(
             type=GraphQLList(GraphQLFloat),
-            description='Geo Latitude and longitude',
+            description='Geo latitude and longitude',
             resolver=lambda root, *_: root.get('coordinates')
         )
     }
@@ -359,12 +402,12 @@ MapViewPropertiesType = GraphQLObjectType(
     fields={
         'ip': GraphQLField(
             type=SpotIpType,
-            description='Ip',
+            description='IP',
             resolver=lambda root, *_: root.get('ip')
         ),
         'location': GraphQLField(
             type=GraphQLString,
-            description='Name of the ip\'s location',
+            description='Name of the IP\'s location',
             resolver=lambda root, *_: root.get('location')
         ),
         'type': GraphQLField(
@@ -396,12 +439,12 @@ MapViewType = GraphQLObjectType(
     fields={
         'srcIps': GraphQLField(
             type=GraphQLList(MapViewIpType),
-            description='A list of source ips',
+            description='A list of source IPs',
             resolver=lambda root, *_: root.get('sourceips', [])
         ),
         'dstIps': GraphQLField(
             type=GraphQLList(MapViewIpType),
-            description='A list of destination ips',
+            description='A list of destination IPs',
             resolver=lambda root, *_: root.get('destips', [])
         )
     }
@@ -422,12 +465,12 @@ TimelineType = GraphQLObjectType(
         ),
         'srcIp': GraphQLField(
             type=GraphQLNonNull(SpotIpType),
-            description='Source ip',
+            description='Source IP address',
             resolver=lambda root, *_: root.get('srcip')
         ),
         'dstIp': GraphQLField(
             type=GraphQLNonNull(SpotIpType),
-            description='Destination ip',
+            description='Destination IP address',
             resolver=lambda root, *_: root.get('dstip')
         ),
         'protocol': GraphQLField(
@@ -442,17 +485,17 @@ TimelineType = GraphQLObjectType(
         ),
         'dstPort': GraphQLField(
             type=GraphQLNonNull(GraphQLInt),
-            description='Destionation port',
+            description='Destination port',
             resolver=lambda root, *_: root.get('dport')
         ),
         'pkts': GraphQLField(
             type=GraphQLNonNull(GraphQLInt),
-            description='Packets tranferred between ips',
+            description='Packets tranferred between IPs',
             resolver=lambda root, *_: root.get('ipkt')
         ),
         'bytes': GraphQLField(
             type=GraphQLNonNull(GraphQLInt),
-            description='Bytes tranferred between ips',
+            description='Bytes tranferred between IPs',
             resolver=lambda root, *_: root.get('ibyt')
         )
     }
@@ -463,15 +506,15 @@ ThreatInformationType = GraphQLObjectType(
     fields={
         'details': GraphQLField(
             type=GraphQLList(ThreatDetailsType),
-            description='Detailed information about a high risk threat',
+            description='Detailed information about a high risk IP',
             args={
                 'date': GraphQLArgument(
                     type=SpotDateType,
-                    description='A date to use as reference for incident progression information. Defaults to today'
+                    description='A date to use as reference for high rist IP information. Defaults to today'
                 ),
                 'ip': GraphQLArgument(
                     type=GraphQLNonNull(SpotIpType),
-                    description='Threat\'s Ip'
+                    description='Suspicious IP'
                 )
             },
             resolver=lambda root, args, *
@@ -479,7 +522,7 @@ ThreatInformationType = GraphQLObjectType(
         ),
         'incidentProgression': GraphQLField(
             type=IncidentProgressionNodeType,
-            description='Incident progression information',
+            description='Details for the type of connections that conform the activity related to the threat',
             args={
                 'date': GraphQLArgument(
                     type=SpotDateType,
@@ -487,7 +530,7 @@ ThreatInformationType = GraphQLObjectType(
                 ),
                 'ip': GraphQLArgument(
                     type=GraphQLNonNull(SpotIpType),
-                    description='Threat\'s Ip'
+                    description='Suspicious IP'
                 )
             },
             resolver=lambda root, args, *
@@ -495,7 +538,7 @@ ThreatInformationType = GraphQLObjectType(
         ),
         'impactAnalysis': GraphQLField(
             type=ImpactAnalysisNodeType,
-            description='Impact analysis information',
+            description='Contains the number of inbound, outbound and two-way connections found related to the suspicious IP',
             args={
                 'date': GraphQLArgument(
                     type=SpotDateType,
@@ -503,7 +546,7 @@ ThreatInformationType = GraphQLObjectType(
                 ),
                 'ip': GraphQLArgument(
                     type=GraphQLNonNull(SpotIpType),
-                    description='Threat\'s Ip'
+                    description='Suspicious IP'
                 )
             },
             resolver=lambda root, args, *
@@ -511,7 +554,7 @@ ThreatInformationType = GraphQLObjectType(
         ),
         'geoLocalization': GraphQLField(
             type=MapViewType,
-            description='Gelocalization info about the ips related to this threat',
+            description='Gelocalization info about the IPs related to this threat',
             args={
                 'date': GraphQLArgument(
                     type=SpotDateType,
@@ -519,7 +562,7 @@ ThreatInformationType = GraphQLObjectType(
                 ),
                 'ip': GraphQLArgument(
                     type=GraphQLNonNull(SpotIpType),
-                    description='Threat\'s Ip'
+                    description='Suspicious IP'
                 )
             },
             resolver=lambda root, args, *
@@ -527,7 +570,7 @@ ThreatInformationType = GraphQLObjectType(
         ),
         'timeline': GraphQLField(
             type=GraphQLList(TimelineType),
-            description='Time based information about this threat',
+            description='Lists \'clusters\' of inbound connections to the IP, grouped by time; showing an overall idea of the times during the day with the most activity',
             args={
                 'date': GraphQLArgument(
                     type=SpotDateType,
@@ -535,7 +578,7 @@ ThreatInformationType = GraphQLObjectType(
                 ),
                 'ip': GraphQLArgument(
                     type=GraphQLNonNull(SpotIpType),
-                    description='Threat\'s Ip'
+                    description='Suspicious Ip'
                 )
             },
             resolver=lambda root, args, *
@@ -549,7 +592,7 @@ QueryType = GraphQLObjectType(
     fields={
         'suspicious': GraphQLField(
             type=GraphQLList(SuspiciousType),
-            description='Netflow Suspicious connections',
+            description='Flow suspicious connections',
             args={
                 'date': GraphQLArgument(
                     type=SpotDateType,
@@ -557,7 +600,7 @@ QueryType = GraphQLObjectType(
                 ),
                 'ip': GraphQLArgument(
                     type=SpotIpType,
-                    description='Ip of interest'
+                    description='IP of interest'
                 )
             },
             resolver=lambda root, args, *
@@ -565,7 +608,7 @@ QueryType = GraphQLObjectType(
         ),
         'edgeDetails': GraphQLField(
             type=GraphQLList(EdgeDetailsType),
-            description='Network acitvity between two ips around a particular moment in time',
+            description='Flow activity between two IPs around a particular moment in time',
             args={
                 'tstart': GraphQLArgument(
                     type=GraphQLNonNull(SpotDatetimeType),
@@ -573,11 +616,11 @@ QueryType = GraphQLObjectType(
                 ),
                 'srcIp': GraphQLArgument(
                     type=GraphQLNonNull(SpotIpType),
-                    description='Source ip'
+                    description='Source IP address'
                 ),
                 'dstIp': GraphQLArgument(
                     type=GraphQLNonNull(SpotIpType),
-                    description='Destination ip'
+                    description='Destination IP address'
                 )
             },
             resolver=lambda root, args, *_: Flow.details(
@@ -587,15 +630,15 @@ QueryType = GraphQLObjectType(
         ),
         'ipDetails': GraphQLField(
             type=GraphQLList(IpConnectionDetailsType),
-            description='Ip network activity details',
+            description='Flow activity details in between IP of interest and other suspicious IPs',
             args={
                 'date': GraphQLArgument(
                     type=SpotDateType,
-                    description='A date to use as reference for ip network activity details. Defaults to today'
+                    description='A date to use as reference for IP network activity details. Defaults to today'
                 ),
                 'ip': GraphQLArgument(
                     type=GraphQLNonNull(SpotIpType),
-                    description='Ip of interest'
+                    description='IP address of interest'
                 )
             },
             resolver=lambda root, args, *
@@ -603,17 +646,17 @@ QueryType = GraphQLObjectType(
         ),
         'threats': GraphQLField(
             type=ThreatsInformationType,
-            description='Advanced inforamtion about threats',
+            description='Advanced information about threats',
             resolver=lambda *_: {}
         ),
         'threat': GraphQLField(
             type=ThreatInformationType,
-            description='Advanced inforamtion about a single threat',
+            description='Advanced information about a single threat',
             resolver=lambda *_: {}
         ),
         'ingestSummary': GraphQLField(
             type=GraphQLList(IngestSummaryType),
-            description='Total of ingested netflows',
+            description='Summary of ingested flows in range',
             args={
                 'startDate': GraphQLArgument(
                     type=GraphQLNonNull(SpotDateType),

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/4734f4f6/spot-oa/api/graphql/proxy/mutation.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/graphql/proxy/mutation.py b/spot-oa/api/graphql/proxy/mutation.py
index 09e9fe4..1d08186 100644
--- a/spot-oa/api/graphql/proxy/mutation.py
+++ b/spot-oa/api/graphql/proxy/mutation.py
@@ -36,51 +36,64 @@ ThreatDetailsInputType = GraphQLInputObjectType(
     name='ProxyThreatDetailsInputType',
     fields={
         'datetime': GraphQLInputObjectField(
-            type=SpotDatetimeType
+            type=SpotDatetimeType,
+            description='Start time of the request'
         ),
         'clientIp': GraphQLInputObjectField(
-            type=SpotIpType
+            type=SpotIpType,
+            description='Client\'s IP address'
         ),
         'username': GraphQLInputObjectField(
-            type=GraphQLString
+            type=GraphQLString,
+            description='Username used for authetication'
         ),
         'duration': GraphQLInputObjectField(
-            type=GraphQLInt
+            type=GraphQLInt,
+            description='Connection duration'
         ),
         'uri': GraphQLInputObjectField(
-            type=GraphQLString
+            type=GraphQLString,
+            description='The original URI requested'
         ),
         'webCategory': GraphQLInputObjectField(
-            type=GraphQLString
+            type=GraphQLString,
+            description='Web content categories'
         ),
         'responseCode': GraphQLInputObjectField(
-            type=GraphQLInt
+            type=GraphQLInt,
+            description='HTTP response code'
         ),
         'requestMethod': GraphQLInputObjectField(
             type=GraphQLString,
-            description='Http Method'
+            description='HTTP request method'
         ),
         'userAgent': GraphQLInputObjectField(
             type=GraphQLString,
             description='Client\'s user agent'
         ),
         'responseContentType': GraphQLInputObjectField(
-            type=GraphQLString
+            type=GraphQLString,
+            description='HTTP response content type (MIME)'
         ),
         'referer': GraphQLInputObjectField(
-            type=GraphQLString
+            type=GraphQLString,
+            description='The address of the webpage that linked to the resource being requested'
         ),
         'uriPort': GraphQLInputObjectField(
-            type=GraphQLInt
+            type=GraphQLInt,
+            description='URI port'
         ),
         'serverIp': GraphQLInputObjectField(
-            type=SpotIpType
+            type=SpotIpType,
+            description='The address of the webpage that linked to the resource being requested'
         ),
         'serverToClientBytes': GraphQLInputObjectField(
-            type=GraphQLInt
+            type=GraphQLInt,
+            description='Number of bytes sent from appliance to client'
         ),
         'clientToServerBytes': GraphQLInputObjectField(
-            type=GraphQLInt
+            type=GraphQLInt,
+            description='Number of bytes sent from client to appliance'
         )
     }
 )
@@ -90,25 +103,27 @@ CreateStoryboardInputType = GraphQLInputObjectType(
     fields={
         'date': GraphQLInputObjectField(
             type=SpotDateType,
-            description='A reference date for the add comment process. Defaults to today'
+            description='A reference date for the storyboard being created. Defaults to today'
         ),
         'uri': GraphQLInputObjectField(
             type=GraphQLNonNull(GraphQLString),
-            description='Reference URI for the comment'
+            description='Threat UI'
         ),
         'title': GraphQLInputObjectField(
             type=GraphQLNonNull(GraphQLString),
-            description='A title for the comment'
+            description='Threat title'
         ),
         'text': GraphQLInputObjectField(
             type=GraphQLNonNull(GraphQLString),
-            description='A description text for the comment'
+            description='Threat description'
         ),
         'threatDetails': GraphQLInputObjectField(
             type=GraphQLNonNull(GraphQLList(GraphQLNonNull(ThreatDetailsInputType))),
+            description='Threat details. See ProxyThreatInformation.details'
         ),
         'first': GraphQLInputObjectField(
-            type=GraphQLInt
+            type=GraphQLInt,
+            description='The number of records to return'
         )
     }
 )
@@ -146,6 +161,7 @@ MutationType = GraphQLObjectType(
     fields={
         'score': GraphQLField(
             type=GraphQLList(SpotOperationOutputType),
+            description='Sets a score value to connections',
             args={
                 'input': GraphQLArgument(
                     type=GraphQLNonNull(GraphQLList(GraphQLNonNull(ScoreInputType))),
@@ -159,7 +175,7 @@ MutationType = GraphQLObjectType(
             args={
                 'input': GraphQLArgument(
                     type=GraphQLNonNull(CreateStoryboardInputType),
-                    description='Generates every data needed to move a threat to the storyboard'
+                    description='Request Spot to create an entry on storyboard for a particular threat'
                 )
             },
             resolver=lambda root, args, *_: _create_storyboard(args)

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/4734f4f6/spot-oa/api/graphql/proxy/query.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/graphql/proxy/query.py b/spot-oa/api/graphql/proxy/query.py
index d75a1df..697606c 100644
--- a/spot-oa/api/graphql/proxy/query.py
+++ b/spot-oa/api/graphql/proxy/query.py
@@ -22,17 +22,17 @@ SuspiciousType = GraphQLObjectType(
         ),
         'clientIp': GraphQLField(
             type=SpotIpType,
-            description='Clent\'s ip',
+            description='Client\'s IP address',
             resolver=lambda root, *_: root.get('clientip')
         ),
         'host': GraphQLField(
             type=GraphQLString,
-            description='Target host of the request',
+            description='Host name from the client request URL',
             resolver=lambda root, *_: root.get('host')
         ),
         'requestMethod': GraphQLField(
             type=GraphQLString,
-            description='HTTP method',
+            description='HTTP request method',
             resolver=lambda root, *_: root.get('reqmethod')
         ),
         'userAgent': GraphQLField(
@@ -42,12 +42,12 @@ SuspiciousType = GraphQLObjectType(
         ),
         'responseContentType': GraphQLField(
             type=GraphQLString,
-            description='HTTP response content type',
+            description='HTTP response content type (MIME)',
             resolver=lambda root, *_: root.get('resconttype')
         ),
         'duration': GraphQLField(
             type=GraphQLInt,
-            description='Duration of the request',
+            description='Connection duration',
             resolver=lambda root, *_: root.get('duration')
         ),
         'username': GraphQLField(
@@ -57,12 +57,12 @@ SuspiciousType = GraphQLObjectType(
         ),
         'webCategory': GraphQLField(
             type=GraphQLString,
-            description='Web category',
+            description='Web content categories',
             resolver=lambda root, *_: root.get('webcat')
         ),
         'referer': GraphQLField(
             type=GraphQLString,
-            description='Request\'s referer',
+            description='The address of the webpage that linked to the resource being requested',
             resolver=lambda root, *_: root.get('referer')
         ),
         'responseCode': GraphQLField(
@@ -72,22 +72,22 @@ SuspiciousType = GraphQLObjectType(
         ),
         'uriPort': GraphQLField(
             type=GraphQLInt,
-            description='URI\'s port',
+            description='URI port',
             resolver=lambda root, *_: root.get('uriport')
         ),
         'uriPath': GraphQLField(
             type=GraphQLString,
-            description='URI\'s path',
+            description='URI path',
             resolver=lambda root, *_: root.get('uripath')
         ),
         'uriQuery': GraphQLField(
             type=GraphQLString,
-            description='URI\'s query',
+            description='URI query',
             resolver=lambda root, *_: root.get('uriquery')
         ),
         'serverIp': GraphQLField(
             type=SpotIpType,
-            description='Server\'s ip',
+            description='Server/Proxy IP',
             resolver=lambda root, *_: root.get('serverip')
         ),
         'serverToClientBytes': GraphQLField(
@@ -102,26 +102,27 @@ SuspiciousType = GraphQLObjectType(
         ),
         'uri': GraphQLField(
             type=GraphQLString,
-            description='The original URL requested',
+            description='The original URI requested',
             resolver=lambda root, *_: root.get('fulluri')
         ),
         'score': GraphQLField(
             type=GraphQLInt,
-            description='Score value assigned by machine learning algorithm',
+            description='Spot ML score value',
             resolver=lambda root, *_: root.get('ml_score') or 0
         ),
         'uriRep': GraphQLField(
             type=GraphQLString,
-            description='URI\'s reputation',
+            description='URI reputation metadata',
             resolver=lambda root, *_: root.get('uri_rep')
         ),
         'responseCodeLabel': GraphQLField(
             type=GraphQLString,
-            description='HTTP response code label',
+            description='HTTP response code name',
             resolver=lambda root, *_: root.get('respcode_name')
         ),
         'networkContext': GraphQLField(
             type=GraphQLString,
+            description='@deprecated',
             resolver=lambda root, *_: root.get('network_context')
         )
     }
@@ -137,17 +138,17 @@ EdgeDetailsType = GraphQLObjectType(
         ),
         'clientIp': GraphQLField(
             type=SpotIpType,
-            description='Clent\'s ip',
+            description='Client\'s IP address',
             resolver=lambda root, *_: root.get('clientip')
         ),
         'host': GraphQLField(
             type=GraphQLString,
-            description='Target host of the request',
+            description='Host name from the client request URL',
             resolver=lambda root, *_: root.get('host')
         ),
         'webCategory': GraphQLField(
             type=GraphQLString,
-            description='Web category',
+            description='Web content categories',
             resolver=lambda root, *_: root.get('webcat')
         ),
         'responseCode': GraphQLField(
@@ -157,12 +158,12 @@ EdgeDetailsType = GraphQLObjectType(
         ),
         'responseCodeLabel': GraphQLField(
             type=GraphQLString,
-            description='HTTP response code label',
+            description='HTTP response code name',
             resolver=lambda root, *_: root.get('respcode_name')
         ),
         'requestMethod': GraphQLField(
             type=GraphQLString,
-            description='HTTP method',
+            description='HTTP request method',
             resolver=lambda root, *_: root.get('reqmethod')
         ),
         'userAgent': GraphQLField(
@@ -172,22 +173,22 @@ EdgeDetailsType = GraphQLObjectType(
         ),
         'responseContentType': GraphQLField(
             type=GraphQLString,
-            description='HTTP response content type',
+            description='HTTP response content type (MIME)',
             resolver=lambda root, *_: root.get('resconttype')
         ),
         'referer': GraphQLField(
             type=GraphQLString,
-            description='Request\'s referer',
+            description='The address of the webpage that linked to the resource being requested',
             resolver=lambda root, *_: root.get('referer')
         ),
         'uriPort': GraphQLField(
             type=GraphQLInt,
-            description='URI\'s port',
+            description='URI port',
             resolver=lambda root, *_: root.get('uriport')
         ),
         'serverIp': GraphQLField(
             type=SpotIpType,
-            description='Server\'s ip',
+            description='Server/Proxy IP',
             resolver=lambda root, *_: root.get('serverip')
         ),
         'serverToClientBytes': GraphQLField(
@@ -202,7 +203,7 @@ EdgeDetailsType = GraphQLObjectType(
         ),
         'uri': GraphQLField(
             type=GraphQLString,
-            description='The original URL requested',
+            description='The original URI requested',
             resolver=lambda root, *_: root.get('fulluri')
         )
     }
@@ -223,7 +224,7 @@ ScoredRequestType = GraphQLObjectType(
         ),
         'score': GraphQLField(
             type=GraphQLInt,
-            description='Score value. 1->High, 2->Medium, 3->Low',
+            description='URI risk score value. 1->High, 2->Medium, 3->Low',
             resolver=lambda root, *_: root.get('uri_sev') or 0
         )
     }
@@ -234,14 +235,17 @@ CommentType = GraphQLObjectType(
     fields={
         'uri': GraphQLField(
             type=GraphQLString,
+            description='High risk URI',
             resolver=lambda root, *_: root.get('p_threat')
         ),
         'title': GraphQLField(
             type=GraphQLString,
+            description='Threat title',
             resolver=lambda root, *_: root.get('title')
         ),
         'text': GraphQLField(
             type=GraphQLString,
+            description='Threat description',
             resolver=lambda root, *_: root.get('text')
         )
     }
@@ -252,11 +256,11 @@ ThreatsInformationType = GraphQLObjectType(
     fields={
         'list': GraphQLField(
             type=GraphQLList(ScoredRequestType),
-            description='List of requests that have been scored',
+            description='List of URIs that have been scored',
             args={
                 'date': GraphQLArgument(
                     type=SpotDateType,
-                    description='A date to use as reference to retrieve the list of scored requests. Defaults to today'
+                    description='A date to use as reference to retrieve the list of scored URI. Defaults to today'
                 )
             },
             resolver=lambda root, args, *_: Proxy.get_scored_requests(date=args.get('date', date.today()))
@@ -280,35 +284,42 @@ ThreatDetailsType = GraphQLObjectType(
     fields={
         'datetime': GraphQLField(
             type=SpotDatetimeType,
+            description='Start time of the request',
             resolver=lambda root, *_: '{} {}'.format(root.get('p_date') or '1970-01-01', root.get('p_time') or '00:00:00')
         ),
         'clientIp': GraphQLField(
             type=SpotIpType,
+            description='Client\'s IP address',
             resolver=lambda root, *_: root.get('clientip')
         ),
         'username': GraphQLField(
             type=GraphQLString,
+            description='Username used for authetication',
             resolver=lambda root, *_: root.get('username')
         ),
         'duration': GraphQLField(
             type=GraphQLInt,
+            description='Connection duration',
             resolver=lambda root, *_: root.get('duration')
         ),
         'uri': GraphQLField(
             type=GraphQLString,
+            description='The original URI requested',
             resolver=lambda root, *_: root.get('fulluri')
         ),
         'webCategory': GraphQLField(
             type=GraphQLString,
+            description='Web content categories',
             resolver=lambda root, *_: root.get('webcat')
         ),
         'responseCode': GraphQLField(
             type=GraphQLInt,
+            description='HTTP response code',
             resolver=lambda root, *_: root.get('respcode')
         ),
         'requestMethod': GraphQLField(
             type=GraphQLString,
-            description='Http Method',
+            description='HTTP request method',
             resolver=lambda root, *_: root.get('reqmethod')
         ),
         'userAgent': GraphQLField(
@@ -318,26 +329,32 @@ ThreatDetailsType = GraphQLObjectType(
         ),
         'responseContentType': GraphQLField(
             type=GraphQLString,
+            description='HTTP response content type (MIME)',
             resolver=lambda root, *_: root.get('resconttype')
         ),
         'referer': GraphQLField(
             type=GraphQLString,
+            description='The address of the webpage that linked to the resource being requested',
             resolver=lambda root, *_: root.get('referer')
         ),
         'uriPort': GraphQLField(
             type=GraphQLInt,
+            description='URI port',
             resolver=lambda root, *_: root.get('uriport')
         ),
         'serverIp': GraphQLField(
             type=SpotIpType,
+            description='The address of the webpage that linked to the resource being requested',
             resolver=lambda root, *_: root.get('serverip')
         ),
         'serverToClientBytes': GraphQLField(
             type=GraphQLInt,
+            description='Number of bytes sent from appliance to client',
             resolver=lambda root, *_: root.get('scbytes')
         ),
         'clientToServerBytes': GraphQLField(
             type=GraphQLInt,
+            description='Number of bytes sent from client to appliance',
             resolver=lambda root, *_: root.get('csbytes')
         )
     }
@@ -348,12 +365,12 @@ IncidentProgressionRequestType = GraphQLObjectType(
     fields={
         'clientIp': GraphQLField(
             type=SpotIpType,
-            description='Client\'s ip',
+            description='Client\'s IP',
             resolver=lambda root, *_: root.get('clientip')
         ),
         'referer': GraphQLField(
             type=GraphQLString,
-            description='URI that refers to Threat\'s URI',
+            description='The address of the webpage that linked to the resource being requested',
             resolver=lambda root, *_: root.get('referer')
         ),
         'requestMethod': GraphQLField(
@@ -363,7 +380,7 @@ IncidentProgressionRequestType = GraphQLObjectType(
         ),
         'responseContentType': GraphQLField(
             type=GraphQLString,
-            description='Response Content Type',
+            description='HTTP response content type (MIME)',
             resolver=lambda root, *_: root.get('resconttype')
         )
     }
@@ -374,7 +391,7 @@ IncidentProgressionType = GraphQLObjectType(
     fields={
         'uri': GraphQLField(
             type=GraphQLString,
-            description='Threat\'s URI',
+            description='Threat URI',
             resolver=lambda root, *_: root.get('fulluri')
         ),
         'refererFor': GraphQLField(
@@ -395,26 +412,32 @@ TimelineType = GraphQLObjectType(
     fields={
         'startDatetime': GraphQLField(
             type=SpotDatetimeType,
+            description='Connection\'s start time',
             resolver=lambda root, *_: root.get('tstart') or '1970-01-01 00:00:00'
         ),
         'endDatetime': GraphQLField(
             type=SpotDatetimeType,
+            description='Connection\'s end time',
             resolver=lambda root, *_: root.get('tend') or '1970-01-01 00:00:00'
         ),
         'duration': GraphQLField(
             type=GraphQLInt,
+            description='Connection duration',
             resolver=lambda root, *_: root.get('duration')
         ),
         'clientIp': GraphQLField(
             type=SpotIpType,
+            description='Client\'s IP address',
             resolver=lambda root, *_: root.get('clientip')
         ),
         'responseCode': GraphQLField(
             type=GraphQLInt,
+            description='HTTP response code',
             resolver=lambda root, *_: root.get('respcode')
         ),
         'responseCodeLabel': GraphQLField(
             type=GraphQLString,
+            description='HTTP response code name',
             resolver=lambda root, *_: root.get('respcode_name')
         )
     }
@@ -429,7 +452,7 @@ ThreatInformationType = GraphQLObjectType(
             args={
                 'date': GraphQLArgument(
                     type=SpotDateType,
-                    description='A date to use as reference for incident progression information. Defaults to today'
+                    description='A date to use as reference for detailed information. Defaults to today'
                 ),
                 'uri': GraphQLArgument(
                     type=GraphQLNonNull(GraphQLString),
@@ -440,7 +463,7 @@ ThreatInformationType = GraphQLObjectType(
         ),
         'incidentProgression': GraphQLField(
             type=IncidentProgressionType,
-            description='Incident progression information',
+            description='Details the type of connections that conform the activity related to the threat',
             args={
                 'date': GraphQLArgument(
                     type=SpotDateType,
@@ -448,14 +471,14 @@ ThreatInformationType = GraphQLObjectType(
                 ),
                 'uri': GraphQLArgument(
                     type=GraphQLNonNull(GraphQLString),
-                    description='Threat\'s uri'
+                    description='Threat URI'
                 )
             },
             resolver=lambda root, args, *_: Proxy.incident_progression(date=args.get('date', date.today()), uri=args.get('uri'))
         ),
         'timeline': GraphQLField(
             type=GraphQLList(TimelineType),
-            description='Time based information about this threat',
+            description='Lists \'clusters\' of inbound connections to the IP, grouped by time; showing an overall idea of the times during the day with the most activity',
             args={
                 'date': GraphQLArgument(
                     type=SpotDateType,
@@ -463,7 +486,7 @@ ThreatInformationType = GraphQLObjectType(
                 ),
                 'uri': GraphQLArgument(
                     type=GraphQLNonNull(GraphQLString),
-                    description='Threat\'s URI'
+                    description='Threat URI'
                 )
             },
             resolver=lambda root, args, *_: Proxy.time_line(date=args.get('date', date.today()), uri=args.get('uri'))
@@ -476,7 +499,7 @@ QueryType = GraphQLObjectType(
     fields={
         'suspicious': GraphQLField(
             type=GraphQLList(SuspiciousType),
-            description='Proxy Suspicious requests',
+            description='Proxy suspicious requests',
             args={
                 'date': GraphQLArgument(
                     type=SpotDateType,
@@ -495,7 +518,7 @@ QueryType = GraphQLObjectType(
         ),
         'edgeDetails': GraphQLField(
             type=GraphQLList(EdgeDetailsType),
-            description='HTTP requests to a particular uri',
+            description='HTTP requests to a particular URI',
             args={
                 'date': GraphQLArgument(
                     type=SpotDateType,
@@ -507,7 +530,7 @@ QueryType = GraphQLObjectType(
                 ),
                 'clientIp': GraphQLArgument(
                     type=GraphQLNonNull(SpotIpType),
-                    description='Client\'s ip'
+                    description='Client\'s IP'
                 )
             },
             resolver=lambda root, args, *_: Proxy.details(date=args.get('date', date.today()), uri=args.get('uri'), ip=args.get('clientIp'))
@@ -524,7 +547,7 @@ QueryType = GraphQLObjectType(
         ),
         'ingestSummary': GraphQLField(
             type=GraphQLList(IngestSummaryType),
-            description='Total of ingested http requests',
+            description='Summary of ingested proxy records in range',
             args={
                 'startDate': GraphQLArgument(
                     type=GraphQLNonNull(SpotDateType),

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/4734f4f6/spot-oa/api/graphql/schema.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/graphql/schema.py b/spot-oa/api/graphql/schema.py
index 3975b6c..488f310 100644
--- a/spot-oa/api/graphql/schema.py
+++ b/spot-oa/api/graphql/schema.py
@@ -14,17 +14,17 @@ SpotSchema = GraphQLSchema(
     fields={
       'flow': GraphQLField(
         type=NetflowQueryType,
-        description='Flow information',
+        description='Flow is a network protocol that collects IP traffic information and monitors network traffic',
         resolver=lambda *_: {}
       ),
       'dns': GraphQLField(
         type=DnsQueryType,
-        description='Dns information',
+        description='Domain Name System (DNS) Log Records contains the requests in between clients and DNS servers',
         resolver=lambda *_: {}
       ),
       'proxy': GraphQLField(
         type=ProxyQueryType,
-        description='Proxy Information',
+        description='Proxy Logs contains the requests in between clients and Proxy servers',
         resolver=lambda *_: {}
       )
     }
@@ -34,14 +34,17 @@ SpotSchema = GraphQLSchema(
     fields={
         'flow': GraphQLField(
             type=NetflowMutationType,
+            description='Flow related mutation operations',
             resolver=lambda *_: {}
         ),
         'dns': GraphQLField(
             type=DnsMutationType,
+            description='DNS related mutation operations',
             resolver=lambda *_: {}
         ),
         'proxy': GraphQLField(
             type=ProxyMutationType,
+            description='Proxy related mutation operations',
             resolver=lambda *_: {}
         )
     }

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/4734f4f6/spot-oa/api/resources/configurator.pyc
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/configurator.pyc b/spot-oa/api/resources/configurator.pyc
deleted file mode 100644
index 04505e0..0000000
Binary files a/spot-oa/api/resources/configurator.pyc and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/4734f4f6/spot-oa/ui/README.md
----------------------------------------------------------------------
diff --git a/spot-oa/ui/README.md b/spot-oa/ui/README.md
index 4b35c86..53c008b 100755
--- a/spot-oa/ui/README.md
+++ b/spot-oa/ui/README.md
@@ -110,6 +110,7 @@ For every path found on this document, "SPOT" refers to the path where Spot UI i
 
 1. Install Spot UI. Follow this [guide](INSTALL.md#install-spot-ui).
 2. Run Spot UI. Follow this [guide](INSTALL.md#how-to-run-spot-ui).
+    1. Set SPOT_DEV env var to 1 to enable GraphiQL UI and run ipython in debug mode.
 3. Start watching for code changes
     1. $ cd SPOT/ui/PIPELINE/
     2. Watch one of the following modules


[10/50] [abbrv] incubator-spot git commit: Removed csv files from OA and ipython notebooks

Posted by ev...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/1904f2b4/spot-oa/oa/flow/ipynb_templates/Threat_Investigation_master.ipynb
----------------------------------------------------------------------
diff --git a/spot-oa/oa/flow/ipynb_templates/Threat_Investigation_master.ipynb b/spot-oa/oa/flow/ipynb_templates/Threat_Investigation_master.ipynb
index a0a7d26..761a434 100644
--- a/spot-oa/oa/flow/ipynb_templates/Threat_Investigation_master.ipynb
+++ b/spot-oa/oa/flow/ipynb_templates/Threat_Investigation_master.ipynb
@@ -8,6 +8,7 @@
    },
    "outputs": [],
    "source": [
+    "import datetime\n",
     "import struct, socket\n",
     "import numpy as np\n",
     "import linecache, bisect\n",
@@ -16,37 +17,24 @@
     "import json\n",
     "import os\n",
     "import pandas as pd\n",
+    "\n",
     "try:\n",
     "    import ipywidgets as widgets # For jupyter/ipython >= 1.4\n",
     "except ImportError:\n",
     "    from IPython.html import widgets\n",
     "from IPython.display import display, Javascript, clear_output\n",
     "\n",
-    "with open('/etc/spot.conf') as conf:\n",
-    "    for line in conf.readlines():            \n",
-    "        if \"DBNAME=\" in line: DBNAME = line.split(\"=\")[1].strip('\\n').replace(\"'\",\"\");      \n",
-    "        elif \"IMPALA_DEM=\" in line: IMPALA_DEM = line.split(\"=\")[1].strip('\\n').replace(\"'\",\"\"); \n",
-    "\n",
     "spath = os.getcwd()\n",
     "path = spath.split(\"/\") \n",
     "date = path[len(path)-1]   \n",
     "dpath = '/'.join(['data' if var == 'ipynb' else var for var in path]) + '/'\n",
     "cpath = '/'.join(['context' if var == 'ipynb' else var for var in path][:len(path)-2]) + '/'\n",
     "\n",
-    "sconnect = dpath + 'flow_scores.csv'\n",
-    "threats_file = dpath + 'threats.csv'\n",
-    "iploc = cpath + 'iploc.csv'\n",
-    "nwloc = cpath + 'networkcontext_1.csv'\n",
     "anchor = ''\n",
     "ir_f = ''\n",
     "threat_name = ''\n",
     "iplist = ''\n",
-    "top_results = 20\n",
-    "details_limit = 1000\n",
-    "if os.path.isfile(iploc):\n",
-    "    iplist = np.loadtxt(iploc,dtype=np.uint32,delimiter=',',usecols={0}, converters={0: lambda s: np.uint32(s.replace('\"',''))})\n",
-    "else:\n",
-    "    print \"No iploc.csv file was found, Map View map won't be created\""
+    "top_results = 20"
    ]
   },
   {
@@ -123,7 +111,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true
+    "collapsed": false
    },
    "outputs": [],
    "source": [
@@ -143,37 +131,50 @@
     "def start_investigation(): \n",
     "    display(Javascript(\"$('.widget-area > .widget-subarea > *').remove();\"))   \n",
     "    external_ips = []\n",
-    "    c_ips=[]\n",
-    "    clear_output() \n",
+    "    clear_output()  \n",
     "    \n",
-    "    if os.path.isfile(threats_file) and not file_is_empty(threats_file):\n",
-    "        with open(threats_file, 'r') as th:\n",
-    "            t_read = csv.DictReader(th, delimiter='|') \n",
-    "            for row in t_read: \n",
-    "                if row['ip'] != '' : c_ips.append(row['ip']) \n",
-    "\n",
-    "    with open(sconnect, 'r') as f:\n",
-    "        reader = csv.DictReader(f, delimiter=',') \n",
-    "        #Internal Netflows use case:\n",
-    "        for row in reader: \n",
-    "            if row['sev'] == '1':\n",
+    "    response = GraphQLClient.request(\n",
+    "        query=\"\"\"query($date:SpotDateType!) {\n",
+    "                flow{\n",
+    "                    threats{\n",
+    "                        list(date:$date) {\n",
+    "                           srcIp\n",
+    "                           dstPort\n",
+    "                           dstIp\n",
+    "                           srcPort\n",
+    "                           score \n",
+    "                        }\n",
+    "                    }\n",
+    "            }\n",
+    "        }\"\"\",\n",
+    "        variables={\n",
+    "            'date': datetime.datetime.strptime(date, '%Y%m%d').strftime('%Y-%m-%d')\n",
+    "        }\n",
+    "    ) \n",
+    "     \n",
+    "    if not 'errors' in response : \n",
+    "        for row in response['data']['flow']['threats']['list']:\n",
+    "            if row['score'] == 1: \n",
     "                srcIP = ''\n",
     "                dstIP = '' \n",
-    "                if row['srcIP'] not in external_ips and row['srcIP'] not in c_ips: \n",
-    "                    external_ips.append(row['srcIP'])\n",
-    "                if row['dstIP'] not in external_ips and row['dstIP'] not in c_ips: \n",
-    "                    external_ips.append(row['dstIP'])\n",
-    "\n",
-    "    if len(external_ips) == 0:\n",
-    "        display(widgets.Box((widgets.HTML(value=\"There are no connections scored as High risk.\\\n",
-    "            You can score some connections at the 'Suspicious' panel.\", width='100%'),)))\n",
-    "    else:  \n",
-    "        sorted_dict = sorted(external_ips, key=operator.itemgetter(0))      \n",
-    "        display_controls(sorted_dict)   \n",
+    "                if row['srcIp'] not in external_ips: \n",
+    "                    external_ips.append(row['srcIp'])\n",
+    "                if row['dstIp'] not in external_ips: \n",
+    "                    external_ips.append(row['dstIp'])\n",
+    "\n",
+    "        if len(external_ips) == 0:\n",
+    "            display(widgets.Box((widgets.HTML(value=\"There are no connections scored as High risk.\\\n",
+    "                You can score some connections at the 'Suspicious' panel.\", width='100%'),)))\n",
+    "        else:  \n",
+    "            sorted_dict = sorted(external_ips, key=operator.itemgetter(0))      \n",
+    "            display_controls(sorted_dict)  \n",
+    "    else:   \n",
     "        \n",
+    "        display(widgets.Box((widgets.HTML(value=\"An error occurred while trying to get the results:\" \n",
+    "                                          + response['errors'][0]['message'], width='100%'),)))\n",
     "        \n",
     "\n",
-    "def display_controls(threat_list):        \n",
+    "def display_controls(threat_list):\n",
     "    threat_title.value =\"<h4>Suspicious Connections</h4>\"    \n",
     "    susp_select.options = threat_list\n",
     "    susp_select.height=150\n",
@@ -186,36 +187,49 @@
     "    def search_ip(b):\n",
     "        global anchor \n",
     "        global top_inbound_b\n",
+    "        global expanded_results\n",
     "        anchor = susp_select.value   \n",
     "        if anchor != \"\":\n",
     "            clear_output()        \n",
     "            removeWidget(1)\n",
-    "            print \"Searching for ip: \" + anchor\n",
-    "            global ir_f \n",
-    "            ir_f = dpath + \"ir-\" + anchor + \".tsv\"\n",
-    "\n",
-    "            if not os.path.isfile(ir_f) or (os.path.isfile(ir_f) and file_is_empty(ir_f)):  \n",
-    "                imp_query = (\" \\\"SELECT min(treceived) as firstSeen, max(treceived) as lastSeen, sip as srcIP, dip as dstIP, \" + \n",
-    "                \"sport as SPort, dport AS Dport, count(sip) as conns, max(ipkt) as maxPkts, avg(ipkt) \" + \n",
-    "                \"as avgPkts, max(ibyt) as maxBytes, avg(ibyt) as avgBytes FROM \"+DBNAME+\".flow WHERE \" + \n",
-    "                \"y=\"+ date[0:4] +\" AND m=\"+ date[4:6] +\" AND d=\"+ date[6:] +\" \" + \n",
-    "                \" AND (sip =\\'\" + anchor + \"\\'  OR dip=\\'\" + anchor + \"\\') GROUP BY sip, dip,sport,dport\\\" \") \n",
-    "                !impala-shell -i $IMPALA_DEM --quiet -q \"INVALIDATE METADATA\"\n",
-    "                !impala-shell -i $IMPALA_DEM --quiet --print_header -B --output_delimiter='\\t' -q $imp_query -o $ir_f\n",
-    "            clear_output()\n",
     "            \n",
-    "            if not file_is_empty(ir_f):               \n",
+    "            expanded_results = GraphQLClient.request(\n",
+    "                        query=\"\"\"query($date:SpotDateType!,$ip:SpotIpType!){\n",
+    "                                  flow{\n",
+    "                                    threat{\n",
+    "                                      details(date:$date,ip:$ip) {\n",
+    "                                        srcIp\n",
+    "                                        maxBytes\n",
+    "                                        connections\n",
+    "                                        maxPkts\n",
+    "                                        avgPkts\n",
+    "                                        lastSeen\n",
+    "                                        srcPort\n",
+    "                                        firstSeen \n",
+    "                                        dstIp\n",
+    "                                        avgBytes\n",
+    "                                        dstPort\n",
+    "                                      }\n",
+    "                                    }\n",
+    "                                  }  \n",
+    "                                }\n",
+    "                                \"\"\",\n",
+    "                            variables={\n",
+    "                            'date': datetime.datetime.strptime(date, '%Y%m%d').strftime('%Y-%m-%d'),\n",
+    "                            'ip': anchor\n",
+    "                            }\n",
+    "                        ) \n",
+    "             \n",
+    "            if not 'errors' in expanded_results :  \n",
     "                print \"\\n Looking for additional details...\"\n",
     "                display_threat_box(anchor)\n",
     "\n",
     "                get_in_out_and_twoway_conns() \n",
-    "                add_geospatial_info()\n",
-    "                add_network_context() \n",
-    "                \n",
+    "                 \n",
     "                display(bottomBox)     \n",
-    "            else:\n",
+    "            else: \n",
     "                display(widgets.Box((widgets.HTML(value=\"Something went wrong. \\\n",
-    "                    The expanded search couldn't be performed\", width='100%'),)))\n",
+    "                    The expanded search couldn't be performed\" + expanded_results['errors'][0]['message'], width='100%'),)))\n",
     "                        \n",
     "    search_btn.on_click(search_ip)\n",
     "\n",
@@ -241,10 +255,7 @@
     "        clear_output()        \n",
     "        removeWidget(1) \n",
     "        response = \"\"\n",
-    "        response += generate_attack_map_file(anchor, top_inbound_b, top_outbound_b, top_twoway_b)\n",
-    "        response += generate_stats(anchor, inbound, outbound, twoway, threat_name)\n",
-    "        response += generate_dendro(anchor, inbound, outbound, twoway, date)\n",
-    "        response += details_inbound(anchor, top_inbound_b, top_outbound_b, top_twoway_b)\n",
+    "        \n",
     "        response += add_threat(anchor, tc_txt_title.value, tc_txa_summary.value.replace('\\n', '\\\\n'))\n",
     "        response += \"Story board successfully created for {0}\".format(anchor)\n",
     "        start_investigation()\n",
@@ -262,234 +273,46 @@
    },
    "outputs": [],
    "source": [
-    "def details_inbound(anchor, inbound, outbound, twoway):\n",
-    "    top_keys = []\n",
-    "    if len(twoway) > 0: top_keys.extend(twoway.keys())\n",
-    "    if len(outbound) > 0: top_keys.extend(outbound.keys()) \n",
-    "    if len(inbound) > 0: top_keys.extend(inbound.keys())\n",
-    "    sbdet_f = dpath + \"sbdet-\" + anchor + \".tsv\"\n",
-    "    if not os.path.isfile(sbdet_f):\n",
-    "        imp_query = (\"\\\"SELECT min(treceived) as tstart, max(treceived) as tend, sip as srcIP, \"\n",
-    "            + \"dip as dstIP, proto as Proto, sport as SPort, dport AS Dport,ipkt as \"\n",
-    "            + \"Pkts, ibyt as Bytes  FROM \"+DBNAME+\".flow WHERE \"\n",
-    "            + \"y=\"+ date[0:4] +\" AND m=\"+ date[4:6] +\" AND d=\"+ date[6:]\n",
-    "            + \" AND ((dip IN({0}) AND sip ='{1}') OR \"\n",
-    "            + \"(sip IN({0}) \"\n",
-    "            + \"AND dip ='{1}')) GROUP BY sip, dip, proto, sport, dport, ipkt, ibyt ORDER BY tstart \"\n",
-    "            + \"LIMIT {2}\\\" \")  \n",
-    "        ips = \"'\" + \"','\".join(top_keys) + \"'\"\n",
-    "        imp_query = imp_query.format(ips,anchor,details_limit)\n",
-    "        !impala-shell -i $IMPALA_DEM --quiet -q \"INVALIDATE METADATA\"\n",
-    "        !impala-shell -i $IMPALA_DEM --quiet --print_header -B --output_delimiter='\\t' -q $imp_query -o $sbdet_f\n",
-    "\n",
-    "        clear_output()\n",
-    "        return \"Timeline successfully created <br/>\"\n",
-    "    else:\n",
-    "        return \"Timeline file already existed <br/>\"\n",
-    "\n",
-    "        \n",
-    "def generate_dendro(ip, inbound, outbound, twoway, date):  \n",
-    "    dendro_fpath = dpath + 'threat-dendro-' + anchor + \".json\"\n",
-    "    \n",
-    "    obj = {\n",
-    "        'name':ip,\n",
-    "        'children': [],\n",
-    "        'time': date        \n",
-    "    }\n",
-    "    \n",
-    "    #----- Add Inbound Connections-------#\n",
-    "    if len(inbound) > 0:\n",
-    "        obj[\"children\"].append({'name': 'Inbound Only', 'children': [], 'impact': 0})    \n",
-    "        in_ctxs = {}\n",
-    "        for ip in inbound:\n",
-    "            if 'nwloc' in inbound[ip] and len(inbound[ip]['nwloc']) > 0:\n",
-    "                ctx = inbound[ip]['nwloc'][2] # get the machine type Only for vast Data\n",
-    "                if ctx not in in_ctxs:\n",
-    "                    in_ctxs[ctx] = 1\n",
-    "                else:\n",
-    "                    in_ctxs[ctx] += 1\n",
-    "        for ctx in in_ctxs:\n",
-    "            obj[\"children\"][0]['children'].append({\n",
-    "                    'name': ctx,\n",
-    "                    'impact': in_ctxs[ctx]\n",
-    "                })         \n",
-    "    \n",
-    "    #------ Add Outbound ----------------#\n",
-    "    if len(outbound) > 0:\n",
-    "        obj[\"children\"].append({'name': 'Outbound Only', 'children': [], 'impact': 0})\n",
-    "        out_ctxs = {}\n",
-    "        for ip in outbound:       \n",
-    "            if 'nwloc' in outbound[ip] and len(outbound[ip]['nwloc']) > 0:\n",
-    "                ctx = outbound[ip]['nwloc'][2] # get the machine type Only for vast Data\n",
-    "                if ctx not in out_ctxs:\n",
-    "                    out_ctxs[ctx] = 1\n",
-    "                else:\n",
-    "                    out_ctxs[ctx] += 1\n",
-    "        for ctx in out_ctxs:\n",
-    "            obj[\"children\"][1]['children'].append({\n",
-    "                    'name': ctx,\n",
-    "                    'impact': out_ctxs[ctx]\n",
-    "                }) \n",
-    "    \n",
-    "    #------ Add TwoWay ----------------#\n",
-    "    if len(twoway) > 0:\n",
-    "        obj[\"children\"].append({'name': 'two way', 'children': [], 'impact': 0})\n",
-    "        tw_ctxs = {}\n",
-    "        for ip in twoway:\n",
-    "            if 'nwloc' in twoway[ip] and len(twoway[ip]['nwloc']) > 0:\n",
-    "                ctx = twoway[ip]['nwloc'][2] # get the machine type Only for vast Data\n",
-    "                if ctx not in tw_ctxs:\n",
-    "                    tw_ctxs[ctx] = 1\n",
-    "                else:\n",
-    "                    tw_ctxs[ctx] += 1\n",
-    "\n",
-    "        for ctx in tw_ctxs:\n",
-    "            obj[\"children\"][2]['children'].append({\n",
-    "                    'name': ctx,\n",
-    "                    'impact': tw_ctxs[ctx]\n",
-    "                })\n",
-    "    \n",
-    "    with open(dendro_fpath, 'w') as dendro_f:\n",
-    "        dendro_f.write(json.dumps(obj))\n",
-    "    return \"Incident progression successfully created <br/>\"\n",
-    "\n",
-    "    \n",
-    "def generate_stats(ip, inbound, outbound, twoway, threat_name):\n",
-    "    stats_fpath = dpath + 'stats-' + anchor + \".json\"\n",
-    "    \n",
-    "    obj = {\n",
-    "        'name':threat_name,\n",
-    "        'children': [],\n",
-    "        'size': len(inbound) + len(outbound) + len(twoway)\n",
-    "    }\n",
-    "    \n",
-    "    #----- Add Inbound Connections-------#\n",
-    "    obj[\"children\"].append({'name': 'Inbound Only', 'children': [], 'size': len(inbound)})    \n",
-    "    in_ctxs = {}\n",
-    "    for ip in inbound:\n",
-    "        full_ctx = ''\n",
-    "        if 'nwloc' in inbound[ip] and len(inbound[ip]['nwloc']) > 0:\n",
-    "            full_ctx = inbound[ip]['nwloc'][2].split('.')[0]\n",
-    "        ctx = get_ctx_name(full_ctx) # get the machine type Only for vast Data\n",
-    "        if ctx not in in_ctxs:\n",
-    "            in_ctxs[ctx] = 1\n",
-    "        else:\n",
-    "            in_ctxs[ctx] += 1\n",
-    "    for ctx in in_ctxs:\n",
-    "        obj[\"children\"][0]['children'].append({\n",
-    "                'name': ctx,\n",
-    "                'size': in_ctxs[ctx]\n",
-    "            })        \n",
-    "        \n",
-    "    \n",
-    "    #------ Add Outbound ----------------#\n",
-    "    obj[\"children\"].append({'name': 'Outbound Only', 'children': [], 'size': len(outbound)})\n",
-    "    out_ctxs = {}\n",
-    "    for ip in outbound:\n",
-    "        full_ctx = ''\n",
-    "        if 'nwloc' in outbound[ip] and len(outbound[ip]['nwloc']) > 0:\n",
-    "            full_ctx = outbound[ip]['nwloc'][2].split('.')[0]\n",
-    "        ctx = get_ctx_name(full_ctx) # get the machine type Only for vast Data\n",
-    "        if ctx not in out_ctxs:\n",
-    "            out_ctxs[ctx] = 1\n",
-    "        else:\n",
-    "            out_ctxs[ctx] += 1\n",
-    "    for ctx in out_ctxs:\n",
-    "        obj[\"children\"][1]['children'].append({\n",
-    "                'name': ctx,\n",
-    "                'size': out_ctxs[ctx]\n",
-    "            }) \n",
-    "    \n",
-    "    #------ Add Twoway ----------------#\n",
-    "    obj[\"children\"].append({'name': 'two way', 'children': [], 'size': len(twoway)})\n",
-    "    tw_ctxs = {}\n",
-    "    for ip in twoway:\n",
-    "        full_ctx = ''\n",
-    "        if 'nwloc' in twoway[ip] and len(twoway[ip]['nwloc']) > 0:\n",
-    "            full_ctx = twoway[ip]['nwloc'][2].split('.')[0]\n",
-    "        ctx = get_ctx_name(full_ctx) # get the machine type Only for vast Data\n",
-    "        if ctx not in tw_ctxs:\n",
-    "            tw_ctxs[ctx] = 1\n",
-    "        else:\n",
-    "            tw_ctxs[ctx] += 1\n",
-    "    \n",
-    "    for ctx in tw_ctxs:\n",
-    "        obj[\"children\"][2]['children'].append({\n",
-    "                'name': ctx,\n",
-    "                'size': tw_ctxs[ctx]\n",
-    "            })\n",
-    "    \n",
-    "    json_str = json.dumps(obj)\n",
-    "    with open(stats_fpath, 'w') as stats_f:\n",
-    "        stats_f.write(json_str)\n",
-    "    return \"Stats file successfully created <br/>\"\n",
-    "\n",
-    "    \n",
-    "def get_ctx_name(full_context):    \n",
-    "    ctx= 'DMZ'\n",
-    "    if \"VPN\" in full_context:\n",
-    "        ctx = \"VPN\" \n",
-    "    elif \"DMZ\" in full_context:\n",
-    "        ctx = \"DMZ\"\n",
-    "    elif \"Proxy\" in full_context:\n",
-    "        ctx = \"Proxy\" \n",
-    "    elif \"FW\" in full_context:\n",
-    "        ctx = \"FW\"     \n",
-    "    return ctx\n",
     "\n",
-    "\n",
-    "# calculate number of inbound only, two-way, and outbound only\n",
-    "# build dict of IP addresses\n",
-    "# firstSeen,lastSeen,srcIP, dstIP, sport,dport,conns, maxPkts, avgPkts,maxBytes, avgBytes\n",
     "def get_in_out_and_twoway_conns():\n",
-    "    global inbound\n",
-    "    inbound = {}\n",
-    "    global outbound\n",
-    "    outbound = {}\n",
-    "    global twoway\n",
-    "    twoway = {}\n",
     "    srcdict = {}\n",
     "    dstdict = {}\n",
     "    conns_dict= {} \n",
-    "    rowct = 0\n",
-    "    if os.path.isfile(ir_f):         \n",
-    "        df = pd.read_csv(ir_f,sep='\\t') \n",
-    "        with open(ir_f, 'r') as f:\n",
-    "            reader = csv.reader(f,delimiter='\\t')\n",
-    "            reader.next() #skip headers \n",
-    "            for row in reader:  \n",
-    "                if row != []:\n",
-    "                    srcdict[row[2]] = {\n",
-    "                        'ip_int': struct.unpack(\"!L\", socket.inet_aton(row[2]))[0],\n",
-    "                        'dst_ip': row[3],\n",
-    "                        'dst_ip_int': struct.unpack(\"!L\", socket.inet_aton(row[3]))[0],\n",
-    "                        'conns': int(row[6]),\n",
-    "                        'maxbytes': int(row[9])\n",
-    "                    }\n",
-    "                    dstdict[row[3]] = {\n",
-    "                         'ip_int': struct.unpack(\"!L\", socket.inet_aton(row[3]))[0],\n",
-    "                         'src_ip': row[2],\n",
-    "                         'src_ip_int': struct.unpack(\"!L\", socket.inet_aton(row[2]))[0],\n",
-    "                         'conns': int(row[6]),\n",
-    "                         'maxbytes': int(row[9])\n",
-    "                        }       \n",
-    "                    rowct +=1  \n",
+    "    rowct = 0 \n",
+    "    if not 'errors' in expanded_results :         \n",
+    "        df = pd.DataFrame(expanded_results['data']['flow']['threat']['details']) \n",
+    "        for row in expanded_results['data']['flow']['threat']['details']:  \n",
+    "            srcdict[row['srcIp']] = {\n",
+    "                'ip_int': struct.unpack(\"!L\", socket.inet_aton(str(row['srcIp'])))[0],\n",
+    "                'dst_ip': row['dstIp'],\n",
+    "                'dst_ip_int': struct.unpack(\"!L\", socket.inet_aton(str(row['dstIp'])))[0],\n",
+    "                'conns': int(row['connections']),\n",
+    "                'maxbytes': int(row['maxBytes'])\n",
+    "            }\n",
+    "            dstdict[row['dstIp']] = {\n",
+    "                 'ip_int': struct.unpack(\"!L\", socket.inet_aton(str(row['dstIp'])))[0],\n",
+    "                 'src_ip': row['srcIp'],\n",
+    "                 'src_ip_int': struct.unpack(\"!L\", socket.inet_aton(str(row['srcIp'])))[0],\n",
+    "                 'conns': int(row['connections']),\n",
+    "                 'maxbytes': int(row['maxBytes'])\n",
+    "                }       \n",
+    "            rowct +=1  \n",
     "        \n",
-    "        src = df.loc[df['dstip'] == anchor]\n",
-    "        src_per_conns = src.sort_values('conns',0,False)\n",
-    "        src_per_bytes = src.sort_values('maxbytes',0,False)\n",
-    "        dst = df.loc[df['srcip'] == anchor]\n",
-    "        dst_per_conns = dst.sort_values('conns',0,False)\n",
-    "        dst_per_bytes = dst.sort_values('maxbytes',0,False)\n",
+    "        src = df.loc[df['dstIp'] == anchor]\n",
+    "        src_per_conns = src.sort_values('connections',0,False)\n",
+    "        src_per_bytes = src.sort_values('maxBytes',0,False)\n",
+    "        dst = df.loc[df['srcIp'] == anchor]\n",
+    "        dst_per_conns = dst.sort_values('connections',0,False)\n",
+    "        dst_per_bytes = dst.sort_values('maxBytes',0,False)\n",
     "\n",
     "        children = []\n",
-    "        children += (display_results(['srcip','conns','sport','dport'], src_per_conns, \n",
+    "        children += (display_results(['srcIp','connections','srcPort','dstPort'], src_per_conns, \n",
     "                                          top_results),)\n",
-    "        children += (display_results(['dstip','conns','sport','dport'], dst_per_conns, \n",
+    "        children += (display_results(['dstIp','connections','srcPort','dstPort'], dst_per_conns, \n",
     "                                          top_results),)\n",
-    "        children += (display_results(['srcip','maxbytes','sport','dport'], src_per_bytes, \n",
+    "        children += (display_results(['srcIp','maxBytes','srcPort','dstPort'], src_per_bytes, \n",
     "                                           top_results),)\n",
-    "        children += (display_results(['dstip','maxbytes','sport','dport'], dst_per_bytes, \n",
+    "        children += (display_results(['dstIp','maxBytes','srcPort','dstPort'], dst_per_bytes, \n",
     "                                           top_results),)\n",
     "\n",
     "        result_tabs = widgets.Accordion(children=children, width='100%', selected_index=-1)\n",
@@ -505,245 +328,51 @@
     "\n",
     "        resultTableBox.children = [result_tabs,]\n",
     "         \n",
-    "    if rowct > 0:\n",
-    "        for result in srcdict:\n",
-    "            if result in dstdict:\n",
-    "                twoway[result] = srcdict[result]\n",
-    "            else:\n",
-    "                outbound[result] = srcdict[result]\n",
-    "\n",
-    "        for result in dstdict:\n",
-    "            if result not in srcdict:\n",
-    "                inbound[result] = dstdict[result]   \n",
-    "                \n",
-    "    global top_inbound_b\n",
-    "    global top_outbound_b\n",
-    "    global top_twoway_b\n",
-    "    if len(inbound) > 0:\n",
-    "        top_inbound_b = get_top_bytes(inbound,top_results)\n",
-    "        top_inbound_conns = get_top_conns(inbound,top_results)\n",
-    "        top_inbound_b.update(top_inbound_conns) # merge the two dictionaries\n",
-    "    if len(outbound) > 0:\n",
-    "        top_outbound_b = get_top_bytes(outbound,top_results)\n",
-    "        top_outbound_conns = get_top_conns(outbound,top_results)\n",
-    "        top_outbound_b.update(top_outbound_conns) # merge the two dictionaries\n",
-    "    if len(twoway) > 0:        \n",
-    "        top_twoway_b = get_top_bytes(twoway,top_results)\n",
-    "        top_twoway_conns = get_top_conns(twoway,top_results)\n",
-    "        top_twoway_b.update(top_twoway_conns) # merge the two dictionaries\n",
     "\n",
     "def display_results(cols, dataframe, top):    \n",
     "    table = dataframe[:top].to_html(classes='table table-striped table-bordered table-hover', columns=cols, index=False)\n",
     "    return widgets.HTML(value=table, width='100%')\n",
-    "            \n",
-    "#=========== Adds GEO IP information to the outbound, inbound and twoway connections==============================# \n",
-    "def add_geospatial_info():\n",
-    "    # get geospatial info, only when iplocation file is available\n",
-    "    if iplist != '':\n",
-    "        for srcip in outbound:\n",
-    "            reader = csv.reader([linecache.getline(iploc, bisect.bisect(iplist,outbound[srcip]['ip_int'])).replace('\\n','')])\n",
-    "            outbound[srcip]['geo'] = reader.next()\n",
-    "            reader = csv.reader([linecache.getline(iploc, bisect.bisect(iplist,outbound[srcip]['dst_ip_int'])).replace('\\n','')])\n",
-    "            outbound[srcip]['geo_dst'] = reader.next()\n",
-    "\n",
-    "        for dstip in twoway:\n",
-    "            reader = csv.reader([linecache.getline(iploc, bisect.bisect(iplist,twoway[dstip]['ip_int'])).replace('\\n','')])\n",
-    "            twoway[dstip]['geo'] = reader.next()\n",
-    "\n",
-    "        for srcip in inbound:\n",
-    "            reader = csv.reader([linecache.getline(iploc, bisect.bisect(iplist,inbound[srcip]['ip_int'])).replace('\\n','')])\n",
-    "            inbound[srcip]['geo'] = reader.next()\n",
-    "            reader = csv.reader([linecache.getline(iploc, bisect.bisect(iplist,inbound[srcip]['src_ip_int'])).replace('\\n','')])\n",
-    "            inbound[srcip]['geo_src'] = reader.next()\n",
-    "     \n",
-    "             \n",
-    "              \n",
-    "# need some way to combine timelines of outbound and two-way with big picture inbound only\n",
-    "# get network context - get start and end ranges\n",
-    "def add_network_context():\n",
-    "    nwdict = {}\n",
-    "    if os.path.isfile(nwloc) : \n",
-    "        with open(nwloc, 'r') as f:\n",
-    "            reader = csv.reader(f,delimiter=',')\n",
-    "            reader.next()\n",
-    "            #address range, description\n",
-    "            for row in reader:\n",
     "\n",
-    "                if '/' in row[0]: \n",
-    "                    #Range in subnet\n",
-    "                    iprange = row[0].split('/')\n",
-    "                    if len(iprange) < 2:\n",
-    "                        ipend = 0\n",
-    "                    else:\n",
-    "                        ipend = int(iprange[1])\n",
-    "                    nwdict[row[0]] = [struct.unpack(\"!L\", socket.inet_aton(iprange[0]))[0],\n",
-    "                                      struct.unpack(\"!L\", socket.inet_aton(iprange[0]))[0]+2**(32-ipend)-1, row[1]]  \n",
-    "                elif '-' in row[0]: \n",
-    "                    #IP Range \n",
-    "                    iprange = row[0].split('-')   \n",
-    "                    nwdict[row[0]] = [struct.unpack(\"!L\", socket.inet_aton(iprange[0].replace(\" \", \"\")))[0],\n",
-    "                                      struct.unpack(\"!L\", socket.inet_aton(iprange[1].replace(\" \", \"\")))[0], row[1]]\n",
-    "                else:\n",
-    "                    #Exact match  \n",
-    "                    nwdict[row[0]] = [struct.unpack(\"!L\", socket.inet_aton(row[0]))[0],\n",
-    "                                      struct.unpack(\"!L\", socket.inet_aton(row[0]))[0], row[1]] \n",
-    "\n",
-    "        for srcip in outbound:  \n",
-    "            temp_ip = struct.unpack(\"!L\", socket.inet_aton(srcip))[0]\n",
-    "            if srcip in nwdict:\n",
-    "                inbound[srcip]['nwloc'] = nwdict[srcip]\n",
-    "            else:\n",
-    "                matchingVals = [x for x in nwdict if nwdict[x][1] >= temp_ip and nwdict[x][0] <= temp_ip]\n",
-    "                outbound[srcip]['nwloc'] = nwdict[matchingVals[0]] if len(matchingVals) > 0 else ''      \n",
-    "\n",
-    "        for dstip in twoway:  \n",
-    "            temp_ip = struct.unpack(\"!L\", socket.inet_aton(dstip))[0]\n",
-    "            if dstip in nwdict:\n",
-    "                twoway[dstip]['nwloc'] = nwdict[dstip]\n",
-    "            else:\n",
-    "                matchingVals = [x for x in nwdict if nwdict[x][1] >= temp_ip and nwdict[x][0] <= temp_ip]\n",
-    "                twoway[dstip]['nwloc'] = nwdict[matchingVals[0]] if len(matchingVals) > 0 else ''\n",
-    "\n",
-    "        for srcip in inbound:\n",
-    "            temp_ip = struct.unpack(\"!L\", socket.inet_aton(srcip))[0]\n",
-    "            if srcip in nwdict:\n",
-    "                inbound[srcip]['nwloc'] = nwdict[srcip]\n",
-    "            else:\n",
-    "                matchingVals = [x for x in nwdict if nwdict[x][1] >= temp_ip and nwdict[x][0] <= temp_ip]\n",
-    "                inbound[srcip]['nwloc'] = nwdict[matchingVals[0]] if len(matchingVals) > 0 else ''\n",
-    "\n",
-    "                \n",
-    "def generate_attack_map_file(ip, inbound, outbound, twoway): \n",
-    "    if iplist != '':\n",
-    "        globe_fpath = dpath + 'globe-' + ip + \".json\"\n",
-    "        globe_json = {}\n",
-    "        globe_json['type'] = \"FeatureCollection\"\n",
-    "        globe_json['sourceips'] = []\n",
-    "        globe_json['destips'] = [] \n",
-    "        for srcip in twoway:\n",
-    "            try:\n",
-    "                row =  twoway[srcip]['geo']                \n",
-    "                globe_json['destips'].append({\n",
-    "                        'type': 'Feature',\n",
-    "                        'properties': {\n",
-    "                            'location':row[8],\n",
-    "                            'ip':srcip,\n",
-    "                            'type':1\n",
-    "                        },\n",
-    "                        'geometry': {\n",
-    "                            'type': 'Point',\n",
-    "                            'coordinates': [float(row[7]), float(row[6])]\n",
-    "                        }\n",
-    "                    })\n",
-    "            except ValueError:\n",
-    "                pass\n",
-    "\n",
-    "\n",
-    "        for dstip in outbound:\n",
-    "            try:\n",
-    "                row =  outbound[dstip]['geo']\n",
-    "                dst_geo = outbound[dstip]['geo_dst']\n",
-    "                globe_json['sourceips'].append({\n",
-    "                        'type': 'Feature',\n",
-    "                        'properties': {\n",
-    "                            'location':row[8],\n",
-    "                            'ip':dstip,\n",
-    "                            'type':3\n",
-    "                        },\n",
-    "                        'geometry': {\n",
-    "                            'type': 'Point',\n",
-    "                            'coordinates': [float(row[7]), float(row[6])]\n",
-    "                        }\n",
-    "                    })\n",
-    "                globe_json['destips'].append({\n",
-    "                        'type': 'Feature',\n",
-    "                        'properties': {\n",
-    "                            'location':row[8],\n",
-    "                            'ip':outbound[dstip]['dst_ip'],\n",
-    "                            'type':3\n",
-    "                        },\n",
-    "                        'geometry': {\n",
-    "                            'type': 'Point',\n",
-    "                            'coordinates': [float(dst_geo[7]), float(dst_geo[6])]\n",
-    "                        }\n",
-    "                    }) \n",
-    "\n",
-    "            except ValueError:\n",
-    "                pass\n",
-    "\n",
-    "        for dstip in inbound:\n",
-    "            try:\n",
-    "                row =  inbound[dstip]['geo']\n",
-    "                dst_geo = inbound[dstip]['geo_src']\n",
-    "                globe_json['sourceips'].append({\n",
-    "                        'type': 'Feature',\n",
-    "                        'properties': {\n",
-    "                            'location':row[8],\n",
-    "                            'ip':dstip,\n",
-    "                            'type':2\n",
-    "                        },\n",
-    "                        'geometry': {\n",
-    "                            'type': 'Point',\n",
-    "                            'coordinates': [float(row[7]), float(row[6])]\n",
-    "                        }\n",
-    "                    })\n",
-    "                globe_json['destips'].append({\n",
-    "                        'type': 'Feature',\n",
-    "                        'properties': {\n",
-    "                            'location':row[8],\n",
-    "                            'ip':inbound[dstip]['src_ip'],\n",
-    "                            'type':2\n",
-    "                        },\n",
-    "                        'geometry': {\n",
-    "                            'type': 'Point',\n",
-    "                            'coordinates': [float(dst_geo[7]), float(dst_geo[6])]\n",
-    "                        }\n",
-    "                    })\n",
-    "            except ValueError:\n",
-    "                pass\n",
-    "\n",
-    "        json_str = json.dumps(globe_json)\n",
-    "        with open(globe_fpath, 'w') as globe_f:\n",
-    "            globe_f.write(json_str)\n",
-    "        response = \"Geolocation map successfully created <br/>\"\n",
-    "    else:\n",
-    "        response = \"The map can't be created without an iploc file <br/>\"        \n",
-    "        \n",
-    "    return response\n",
     "\n",
     "    \n",
     "def add_threat(ip,threat_title, threat_comment):\n",
-    "    content = ''\n",
-    "    try:\n",
-    "        threat_f = open(threats_file, 'r')\n",
-    "        content = threat_f.read()\n",
-    "        if '{0}|{1}|{2}\\n'.format(ip,threat_title,threat_comment) not in content:\n",
-    "            content += '{0}|{1}|{2}\\n'.format(ip,threat_title,threat_comment)\n",
-    "        threat_f.close() \n",
-    "    except:\n",
-    "        content = 'ip|title|summary\\n'\n",
-    "        content += '{0}|{1}|{2}\\n'.format(ip,threat_title,threat_comment)\n",
-    "    \n",
-    "    threat_fw = open(threats_file, 'w')\n",
-    "    threat_fw.write(content)\n",
-    "    threat_fw.close()\n",
-    "    return \"\"\n",
-    "\n",
+    "       \n",
+    "    mutation=\"\"\"mutation(\n",
+    "                $date: SpotDateType, \n",
+    "                $ip: SpotIpType!, \n",
+    "                $text: String!, \n",
+    "                $title: String!,\n",
+    "                $threatDetails: [NetflowThreatDetailsInputType]!,\n",
+    "                $topResults:Int) \n",
+    "                {\n",
+    "                  flow {\n",
+    "                    createStoryboard(input:{\n",
+    "                        threatDetails: $threatDetails,\n",
+    "                        date: $date, \n",
+    "                        ip: $ip, \n",
+    "                        title: $title, \n",
+    "                        text: $text,\n",
+    "                        first:$topResults})\n",
+    "                    {success}\n",
+    "                  }\n",
+    "                }\"\"\"\n",
+    " \n",
+    "    variables={\n",
+    "        'date': datetime.datetime.strptime(date, '%Y%m%d').strftime('%Y-%m-%d'),\n",
+    "        'ip': ip,\n",
+    "        'title': threat_title,\n",
+    "        'text': threat_comment,\n",
+    "        'threatDetails': expanded_results['data']['flow']['threat']['details'],\n",
+    "        'first':top_results\n",
+    "\n",
+    "        }\n",
     "    \n",
-    "def get_top_bytes(conns_dict, top):\n",
-    "    topbytes = sorted(conns_dict.iteritems(), key=lambda (x,y): y['maxbytes'], reverse=True)\n",
-    "    topbytes = topbytes[0:top]\n",
-    "    return dict(topbytes)\n",
-    "\n",
-    "\n",
-    "def get_top_conns(conns_dict, top):    \n",
-    "    topconns = sorted(conns_dict.iteritems(), key=lambda (x,y): y['conns'], reverse=True)\n",
-    "    topconns = topconns[0:top]\n",
-    "    return dict(topconns)\n",
+    "    response = GraphQLClient.request(mutation,variables)\n",
+    "    if not 'errors' in response:\n",
+    "        return \"Story board successfully created\"\n",
+    "    else:\n",
+    "        return response['errors'][0]['message']\n",
     "    \n",
-    "def file_is_empty(path):\n",
-    "    return os.stat(path).st_size==0\n",
-    "\n",
     "def removeWidget(index):\n",
     "    js_command = \"$('.widget-area > .widget-subarea > .widget-box:eq({0})').remove();\".format(index)    \n",
     "    display(Javascript(js_command))"
@@ -777,7 +406,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython2",
-   "version": "2.7.10"
+   "version": "2.7.5"
   }
  },
  "nbformat": 4,

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/1904f2b4/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
----------------------------------------------------------------------
diff --git a/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb b/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
index 0cfc28b..8eedc53 100644
--- a/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
+++ b/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
@@ -18,18 +18,14 @@
     "import urllib2\n",
     "import json\n",
     "import os\n",
+    "import datetime\n",
     "import csv \n",
     "\n",
     "# getting date from the parent path. \n",
     "path = os.getcwd().split(\"/\") \n",
     "date = path[len(path)-1]   \n",
     "dsource = path[len(path)-2]  \n",
-    "dpath = '/'.join(['data' if var == 'ipynb' else var for var in path]) + '/'\n",
-    "\n",
-    "sconnect = dpath + 'proxy_scores.tsv'\n",
-    "sconnectbu = dpath + 'proxy_scores_bu.tsv'\n",
-    "score_tmp = dpath + 'proxy_tmp.tsv'  \n",
-    "score_fbk = dpath + 'proxy_scores_fb.csv'"
+    "score_values = []"
    ]
   },
   {
@@ -47,7 +43,7 @@
     "from IPython.display import display, HTML, clear_output, Javascript \n",
     "\n",
     "def fill_list(list_control,source):\n",
-    "    options_list = ['--Select--'] \n",
+    "    options_list = ['- Select -'] \n",
     "    options_list.extend([s for s in source])\n",
     "    list_control.options = options_list\n",
     "\n",
@@ -90,16 +86,28 @@
     "\n",
     "def data_loader(): \n",
     "    us_uris = []\n",
-    "\n",
-    "    with open(sconnect, 'r') as f:\n",
-    "        reader = csv.DictReader(f, delimiter='\\t')\n",
+    "    \n",
+    "    response = GraphQLClient.request(\n",
+    "        query=\"\"\"query($date:SpotDateType!) {\n",
+    "                proxy{\n",
+    "                    suspicious(date:$date){\n",
+    "                    uri\n",
+    "                }\n",
+    "            }\n",
+    "        }\"\"\",\n",
+    "        variables={\n",
+    "            'date': datetime.datetime.strptime(date, '%Y%m%d').strftime('%Y-%m-%d')\n",
+    "        }\n",
+    "    )\n",
+    "    \n",
+    "    if not 'errors' in response:\n",
+    "        for row in response['data']['proxy']['suspicious']:\n",
+    "            us_uris.append(row['uri'])\n",
+    "    else:\n",
+    "        print 'An error occured : '+ response['errors'][0]['message']\n",
     "        \n",
-    "        for row in reader: \n",
-    "            if row['fulluri'] not in us_uris and row['uri_sev'] == '0': \n",
-    "                us_uris.append(row['fulluri'])\n",
-    "\n",
     "    fill_list(uri_select,us_uris)\n",
-    "    uri_select.value = \"--Select--\"   \n",
+    "    uri_select.value = \"- Select -\"   \n",
     "\n",
     "display(Javascript(\"$('.widget-area > .widget-subarea > *').remove();\"))\n",
     "data_loader()\n",
@@ -125,83 +133,61 @@
     "import datetime\n",
     "import subprocess \n",
     "\n",
-    "def assign_score(b):\n",
-    "    scored_threats = []\n",
+    "def assign_score(b): \n",
     "    clear_output()\n",
     "    uri = quick_text.value or uri_select.value\n",
-    "    uri_sev = int(rating_btn.selected_label) if not \"--Select--\" in uri_select.value else \"\"\n",
-    "\n",
-    "    with open(sconnect, 'r') as f:\n",
-    "        reader = csv.DictReader(f, delimiter='\\t')\n",
-    "        rowct = 0\n",
-    "        with open(score_tmp, 'w') as score:\n",
-    "            wr = csv.DictWriter(score, delimiter='\\t', quoting=csv.QUOTE_NONE, fieldnames=reader.fieldnames)            \n",
-    "            wr.writeheader()\n",
-    "                \n",
-    "            for row in reader:\n",
-    "                if row['fulluri'] == uri:\n",
-    "                    row['uri_sev'] = uri_sev\n",
-    "                    scored_threats.append(row)                    \n",
-    "                    rowct += 1\n",
-    "                try:\n",
-    "                    wr.writerow(row)\n",
-    "                except:\n",
-    "                    print str(row)\n",
-    "                    \n",
-    "        #works on the feedback tab-separated file\n",
-    "        if not os.path.exists(score_fbk):  \n",
-    "            with open(score_fbk, 'w') as feedback:\n",
-    "                wr = csv.DictWriter(feedback, delimiter='\\t', quoting=csv.QUOTE_NONE, fieldnames=reader.fieldnames)            \n",
-    "                wr.writeheader()\n",
-    "            \n",
-    "        with open(score_fbk, 'a') as feedback:\n",
-    "            for row in scored_threats:\n",
-    "                wr = csv.DictWriter(feedback, delimiter='\\t', quoting=csv.QUOTE_NONE, fieldnames=reader.fieldnames)            \n",
-    "                wr.writerow(row)\n",
-    "  \n",
+    "    uri_sev = int(rating_btn.selected_label) if not \"- Select -\" in uri_select.value else \"\"\n",
+    "    \n",
+    "    clear_output()\n",
+    "    #Gets input values\n",
+    "    global score_values\n",
+    "    \n",
+    "    score_values.append((uri, uri_sev))\n",
+    "    \n",
+    "    if uri_select.value != \"- Select -\":\n",
+    "        display(Javascript(\"$(\\\"option[data-value='\" + uri_select.value +\"']\\\").remove();\"))\n",
+    "      \n",
     "    clear_output()\n",
-    "    print \"{0} matching requests scored\".format(rowct)\n",
-    "    !mv $score_tmp $sconnect\n",
     "    data_loader()\n",
-    "    uri_select.value = \"--Select--\"\n",
+    "    uri_select.value = \"- Select -\"\n",
     "    quick_text.value = \"\"\n",
     "\n",
     "\n",
     "def save(b):   \n",
-    "    clear_output()\n",
-    "    display(Javascript(\"$('.widget-area > .widget-subarea > *').remove();\"))\n",
-    "    data_loader()\n",
-    "    display(scoring_form)\n",
-    "    display(Javascript('reloadParentData();')) \n",
-    "    ml_feedback()\n",
-    "    print \"Suspicious requests successfully updated\"\n",
-    "\n",
+    "    variables=[]\n",
+    "    global score_values\n",
+    "    mutation=\"\"\"mutation($input:[ProxyScoreInputType!]!)\n",
+    "                {\n",
+    "                  proxy{\n",
+    "                    score(input:$input)\n",
+    "                        {success}\n",
+    "                  }\n",
+    "                }\"\"\" \n",
+    "    \n",
+    "    for row in score_values:\n",
+    "        variables.append({\n",
+    "            'date': datetime.datetime.strptime(date, '%Y%m%d').strftime('%Y-%m-%d'),\n",
+    "            'uri': row[0] if row[0] != \"\" else None,\n",
+    "            'score': row[1] if row[1] != \"\" else None  \n",
+    "            })\n",
+    "\n",
+    "    var = {'input':variables}\n",
+    "    response = GraphQLClient.request(mutation,var)\n",
+    "    \n",
+    "    score_values = []\n",
+    "    if not 'errors' in response:\n",
+    "        clear_output()    \n",
+    "        display(Javascript(\"$('.widget-area > .widget-subarea > *').remove();\"))\n",
+    "        data_loader() \n",
+    "        display(scoring_form)\n",
+    "        display(Javascript('reloadParentData();')) \n",
+    "        print \"Suspicious connects successfully updated\"\n",
+    "    else:\n",
+    "        print \"An error ocurred: \" + response['errors'][0]['message']\n",
     "\n",
-    "assign_btn.on_click(assign_score)\n",
-    "save_btn.on_click(save)\n",
     "        \n",
-    "\n",
-    "def ml_feedback():\n",
-    "    dst_name = os.path.basename(sconnect)\n",
-    "    str_fb=\"DSOURCE={0} &&\\\n",
-    "        FDATE={1} &&\\\n",
-    "        source /etc/spot.conf &&\\\n",
-    "        usr=$(echo $LUSER | cut -f3 -d'/') &&\\\n",
-    "        mlnode=$MLNODE &&\\\n",
-    "        lpath=$LPATH &&\\\n",
-    "        scp {2} $usr@$mlnode:$lpath/{3}\".format(dsource,date,score_fbk,dst_name)\n",
-    "    subprocess.call(str_fb, shell=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "# !cp $sconnectbu $sconnect"
+    "assign_btn.on_click(assign_score)\n",
+    "save_btn.on_click(save)"
    ]
   }
  ],

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/1904f2b4/spot-oa/oa/proxy/ipynb_templates/Threat_Investigation_master.ipynb
----------------------------------------------------------------------
diff --git a/spot-oa/oa/proxy/ipynb_templates/Threat_Investigation_master.ipynb b/spot-oa/oa/proxy/ipynb_templates/Threat_Investigation_master.ipynb
index 5cd89db..f68f5c9 100644
--- a/spot-oa/oa/proxy/ipynb_templates/Threat_Investigation_master.ipynb
+++ b/spot-oa/oa/proxy/ipynb_templates/Threat_Investigation_master.ipynb
@@ -26,17 +26,10 @@
     "except ImportError:\n",
     "    from IPython.html import widgets\n",
     "from IPython.display import display, HTML, clear_output, Javascript \n",
-    "\n",
-    "with open('/etc/spot.conf') as conf:\n",
-    "    for line in conf.readlines():\n",
-    "        if \"DBNAME=\" in line: DBNAME = line.split(\"=\")[1].strip('\\n').replace(\"'\",\"\");      \n",
-    "        elif \"IMPALA_DEM=\" in line: IMPALA_DEM = line.split(\"=\")[1].strip('\\n').replace(\"'\",\"\"); \n",
     "            \n",
     "path = os.getcwd().split(\"/\") \n",
     "date = path[len(path)-1]   \n",
     "dpath = '/'.join(['data' if var == 'ipynb' else var for var in path]) + '/'\n",
-    "sconnect = dpath + 'proxy_scores.tsv' \n",
-    "threat_f = dpath + \"threats.csv\"\n",
     "anchor = ''\n",
     "anchor_hash = ''\n",
     "clientips  = defaultdict(int)\n",
@@ -148,28 +141,31 @@
     "    clear_output() \n",
     "    c_uri = []\n",
     "    uri_sev=[]\n",
-    "\n",
-    "    #discards threats already commented\n",
-    "    if os.path.isfile(threat_f) and not file_is_empty(threat_f):\n",
-    "        with open(threat_f, 'r') as th:\n",
-    "            t_read = csv.reader(th, delimiter='|')\n",
-    "            t_read.next()\n",
-    "            for row in t_read: \n",
-    "                if row[0] != '' : c_uri.append(row[0])\n",
-    "            \n",
-    "    with open(sconnect, 'r') as f:\n",
-    "        reader = csv.reader(f, delimiter='\\t')\n",
-    "        reader.next()\n",
-    "        for row in reader:\n",
-    "        #   \"p_date\":0 , \"p_time\":1, \"clientip\":2 , \"host\":3, \"reqmethod\":4 , \"useragent\":5 , \"resconttype\":6\n",
-    "        # , \"duration\":7, \"username\":8 , \"webcat\":9, \"referer\":10, \"respcode\":11, \"uriport\":12, \"uripath\":13\n",
-    "        # , \"uriquery\":14, \"serverip\":15, \"scbytes\":16 , \"csbytes\":17, \"fulluri\":18, \"word\":19\n",
-    "            #Forms a hash out of the anchor to use it as the file name\n",
-    "            if row[22] == '1': \n",
-    "                row_hash = md5.new(str(row[18])).hexdigest()\n",
-    "                if row[18] not in uri_sev and row_hash not in c_uri:\n",
-    "                    uri_sev.append(row[18])\n",
-    "\n",
+    "          \n",
+    "    response = GraphQLClient.request(\n",
+    "        query=\"\"\"query($date:SpotDateType!) {\n",
+    "                 proxy{\n",
+    "                    threats{\n",
+    "                        list(date:$date) {\n",
+    "                            score\n",
+    "                            uri\n",
+    "                            datetime\n",
+    "                        }\n",
+    "                    }\n",
+    "            }\n",
+    "        }\"\"\",\n",
+    "        variables={\n",
+    "            'date': datetime.datetime.strptime(date, '%Y%m%d').strftime('%Y-%m-%d')\n",
+    "        }\n",
+    "    )  \n",
+    "     \n",
+    "    if not 'errors' in response: \n",
+    "        for row in response['data']['proxy']['threats']['list']:        \n",
+    "            if row['uri'] not in uri_sev and row['score'] == 1: \n",
+    "                uri_sev.append(row['uri'])\n",
+    "    else:\n",
+    "        print \"An error ocurred: \" + response[\"errors\"][0][\"message\"]\n",
+    " \n",
     "    threat_title.value =\"<h4>Suspicious URI</h4>\"\n",
     "                       \n",
     "    if len(uri_sev) == 0:\n",
@@ -191,92 +187,87 @@
     "    display(topBox) \n",
     "  \n",
     "    def search_ip(b):  \n",
-    "        global anchor  \n",
-    "        global anchor_hash\n",
-    "        global ir_f\n",
+    "        global anchor\n",
+    "        global expanded_results\n",
     "        anchor='' \n",
-    "        anchor_hash = ''\n",
     "        anchor = susp_select.value   \n",
-    "        anchor_hash = md5.new(str(anchor)).hexdigest()\n",
     "        removeWidget(3)\n",
     "        removeWidget(2)\n",
     "        removeWidget(1) \n",
-    "        height=80        \n",
-    "        ir_f = dpath + 'es-' + anchor_hash + \".csv\" \n",
-    "        table = \"<table><th>TIME</th><th>CLIENT IP</th><th>USERNAME</th><th>DURATION</th> \\\n",
-    "        <th>FULL URI</th><th>WEB CATEGORY</th><th>RESPONSE CODE</th><th>REQUEST METHOD</th><th>USER AGENT</th> \\\n",
-    "        <th>MIME TYPE</th><th>REFERER</th><th>URI PORT</th><th>PROXY IP</th><th>SERVER BYTES</th><th>CLIENT BYTES</th>\"\n",
+    "        height=80   \n",
     "        \n",
-    "        if not os.path.isfile(ir_f) or (os.path.isfile(ir_f) and file_is_empty(ir_f)):\n",
-    "            # time:0, clientip:1, username:2, duration:3, fullURI:4, webcat:5, respcode:6, reqmethod:7\n",
-    "            # useragent:8, resconttype: 9, referer: 10, uriport:11, serverip:12, scbytes:13, csbytes:14\n",
-    "            imp_query = (\"\\\"SELECT p_time, clientip, username, duration, fulluri, webcat, respcode, reqmethod,\\\n",
-    "                 useragent, resconttype, referer, uriport, serverip, scbytes, csbytes FROM {0}.proxy\\\n",
-    "                 WHERE y='{1}' AND m='{2}' AND d='{3}' AND (fulluri='{4}' OR referer ='{4}') ORDER BY p_time\\\"\")   \n",
-    "            \n",
-    "            imp_query = imp_query.format(DBNAME,yy,mm,dd,anchor) \n",
-    "            !impala-shell -i $IMPALA_DEM --quiet -q \"INVALIDATE METADATA\"\n",
-    "            !impala-shell -i $IMPALA_DEM --quiet --print_header -B --output_delimiter='\\t' -q $imp_query -o $ir_f\n",
-    "           \n",
-    "        clear_output() \n",
-    "        req_method = {}\n",
+    "        expanded_results = GraphQLClient.request(\n",
+    "            query=\"\"\"query($date:SpotDateType!,$uri:String!){\n",
+    "                              proxy{\n",
+    "                                threat{\n",
+    "                                  details(date:$date,uri:$uri) {\n",
+    "                                    username\n",
+    "                                    webCategory\n",
+    "                                    responseContentType\n",
+    "                                    datetime\n",
+    "                                    referer\n",
+    "                                    clientToServerBytes\n",
+    "                                    duration\n",
+    "                                    userAgent\n",
+    "                                    uri\n",
+    "                                    serverIp\n",
+    "                                    requestMethod\n",
+    "                                    responseCode\n",
+    "                                    uriPort\n",
+    "                                    clientIp\n",
+    "                                    serverToClientBytes\n",
+    "                                  }\n",
+    "                                }\n",
+    "                              }  \n",
+    "                            }\n",
+    "                            \"\"\",\n",
+    "                        variables={\n",
+    "                        'date': datetime.datetime.strptime(date, '%Y%m%d').strftime('%Y-%m-%d'),\n",
+    "                        'uri': anchor\n",
+    "                        }\n",
+    "                    )\n",
+    "        \n",
+    "        \n",
+    "        if not 'errors' in expanded_results: \n",
+    "            i = 0\n",
+    "            table = \"<table><th>TIME</th><th>CLIENT IP</th><th>USERNAME</th><th>DURATION</th> \\\n",
+    "            <th>FULL URI</th><th>WEB CATEGORY</th><th>RESPONSE CODE</th><th>REQUEST METHOD</th><th>USER AGENT</th> \\\n",
+    "            <th>MIME TYPE</th><th>REFERER</th><th>URI PORT</th><th>PROXY IP</th><th>SERVER BYTES</th><th>CLIENT BYTES</th>\"\n",
+    "            for row in expanded_results['data']['proxy']['threat']['details']:\n",
+    "                if i < top_results:\n",
+    "                    table += \"<tr><td>\"+ str(row['datetime'])+\"</td><td>\"+str(row['clientIp'])+\"</td>\\\n",
+    "                    <td><div class='spot-text-wrapper' data-toggle='tooltip'>\"+str(row['username'])+\"\\\n",
+    "                    </div></td><td>\"+str(row['duration'])+\"</td>\\\n",
+    "                    <td><div class='spot-text-wrapper' data-toggle='tooltip'>\"+str(row['uri'])+\"</div>\\\n",
+    "                    </td><td>\"+str(row['webCategory'])+\"</td>\\\n",
+    "                    <td>\"+str(row['responseCode'])+\"</td><td>\"+str(row['requestMethod'])+\"</td>\\\n",
+    "                    <td><div class='spot-text-wrapper' data-toggle='tooltip'>\"+str(row['userAgent'])+\"</div></td>\\\n",
+    "                    <td><div class='spot-text-wrapper' data-toggle='tooltip'>\"+str(row['responseContentType'])+\"</div></td>\\\n",
+    "                    <td><div class='spot-text-wrapper' data-toggle='tooltip'>\"+str(row['referer'])+\"</div></td>\\\n",
+    "                    <td>\"+str(row['uriPort'])+\"</td><td>\"+str(row['serverIp'])+\"</td><td>\\\n",
+    "                    \"+str(row['serverToClientBytes'])+\"</td><td>\"+str(row['clientToServerBytes'])+\"</td></tr>\"\n",
+    "\n",
+    "                height += 20\n",
+    "                i+=1\n",
+    "            table += \"</table>\"                \n",
+    "            result_html_title.value='<h4>Displaying top {0} search results</h4>'.format(top_results)\n",
+    "        else:\n",
+    "            table = \"<table></table>\"\n",
+    "            result_html_title.value='<h4>No results were found.</h4>'\n",
     "\n",
-    "        with open(ir_f, 'r') as f:\n",
-    "            #Creates default dictionaries\n",
-    "            global reqmethods\n",
-    "            global rescontype\n",
-    "            global referers\n",
-    "            global refered\n",
-    "            global requests\n",
-    "            global clientips\n",
-    "            clientips  = defaultdict(int)\n",
-    "            reqmethods = defaultdict(int)\n",
-    "            rescontype = defaultdict(int)\n",
-    "            referers   = defaultdict(int)\n",
-    "            refered    = defaultdict(int)\n",
-    "            try:\n",
-    "                reader = csv.reader(f, delimiter='\\t')\n",
-    "                reader.next() # Skip headers\n",
-    "                i=0         \n",
-    "                for row in reader:\n",
-    "                    clientips[row[1]]+=1\n",
-    "                    reqmethods[row[7]]+=1\n",
-    "                    rescontype[row[9]]+=1\n",
-    "                    if row[10] != anchor:\n",
-    "                        #Source URI's that refered the user to the threat\n",
-    "                        referers[row[10]]+=1\n",
-    "                        if({'clientip':row[1],'referer':row[10],'reqmethod':row[7],'resconttype':row[9]}) not in requests:\n",
-    "                            requests.append({'clientip':row[1],'referer':row[10],'reqmethod':row[7],'resconttype':row[9]})\n",
-    "                        if i < top_results:\n",
-    "                            table += \"<tr><td>\"+row[0]+\"</td><td>\"+row[1]+\"</td>\\\n",
-    "                            <td><div class='spot-text-wrapper' data-toggle='tooltip'>\"+row[2]+\"</div></td><td>\"+row[3]+\"</td>\\\n",
-    "                            <td><div class='spot-text-wrapper' data-toggle='tooltip'>\"+row[4]+\"</div></td><td>\"+row[5]+\"</td>\\\n",
-    "                            <td>\"+row[6]+\"</td><td>\"+row[7]+\"</td>\\\n",
-    "                            <td><div class='spot-text-wrapper' data-toggle='tooltip'>\"+row[8]+\"</div></td>\\\n",
-    "                            <td><div class='spot-text-wrapper' data-toggle='tooltip'>\"+row[9]+\"</div></td>\\\n",
-    "                            <td><div class='spot-text-wrapper' data-toggle='tooltip'>\"+row[10]+\"</div></td>\\\n",
-    "                            <td>\"+row[11]+\"</td><td>\"+row[12]+\"</td><td>\"+row[13]+\"</td><td>\"+row[14]+\"</td></tr>\"\n",
-    "                    else:\n",
-    "                        #Destination URI's refered by the threat\n",
-    "                        refered[row[4]]+=1\n",
-    "                    height += 20\n",
-    "                    i+=1\n",
-    "                table += \"</table>\"                \n",
-    "                result_html_title.value='<h4>Displaying top {0} search results</h4>'.format(top_results)\n",
-    "            except:\n",
-    "                table = \"<table></table>\"\n",
-    "                result_html_title.value='<h4>No results were found.</h4>'\n",
-    "                \n",
-    "            result_html.value=table\n",
-    "            result_html_box.children = [result_html]\n",
+    "        result_html.value=table\n",
+    "        result_html_box.children = [result_html]\n",
     " \n",
-    "            display_threat_box(anchor)\n",
-    "            resultTableBox.children = [result_html_title, result_html_box]\n",
-    "            display(bottomBox)\n",
+    "        display_threat_box(anchor)\n",
+    "        resultTableBox.children = [result_html_title, result_html_box]\n",
+    "        display(bottomBox)\n",
+    "        \n",
+    "        \n",
     "    search_btn.on_click(search_ip)\n",
     "\n",
     "        \n",
-    "def display_threat_box(ip):    \n",
+    "def display_threat_box(ip):   \n",
+    "    global expanded_results\n",
     "    result_title.value=\"<h4 class='spot-text-wrapper spot-text-xlg' data-toggle='tooltip'>Threat summary for \" + anchor +\"</h4>\"\n",
     "    tc_txt_title = widgets.Text(value='', placeholder='Threat Title', width='100%')\n",
     "    tc_txa_summary = widgets.Textarea(value='', height=100, width='95%')\n",
@@ -293,75 +284,56 @@
     "    resultSummaryBox.children = [result_title,result_summary_box]\n",
     "    \n",
     "    def save_threat_summary(b):\n",
-    "        global anchor \n",
-    "        global anchor_hash \n",
-    "        if anchor != '':      \n",
-    "            global threat_f\n",
-    "            if not os.path.exists(threat_f):  \n",
-    "                with open(threat_f, 'w') as comment:\n",
-    "                    comment.write('hash|title|summary\\n')\n",
-    "            \n",
-    "            with open(threat_f, 'a') as comment:\n",
-    "                comment.write(anchor_hash + '|' + tc_txt_title.value + '|' +\n",
-    "                                  tc_txa_summary.value.replace('\\n', '\\\\n') + '\\n') \n",
-    "            \n",
-    "            display(Javascript(\"$(\\\"option[data-value='\" + anchor +\"']\\\").remove();\"))   \n",
-    "            display(Javascript(\"$('.widget-area > .widget-subarea > .widget-box:gt(0)').remove();\"))\n",
-    "            \n",
-    "            response = \"Summary successfully saved\"\n",
-    "            incident_progression(anchor, anchor_hash)\n",
-    "            timeline(anchor, anchor_hash)\n",
+    "        result_msg = \"\"\n",
+    "        threat_title = tc_txt_title.value \n",
+    "        threat_comment = tc_txa_summary.value\n",
+    "\n",
+    "        \n",
+    "        if anchor != '':   \n",
+    "            mutation=\"\"\"mutation(\n",
+    "                        $date: SpotDateType, \n",
+    "                        $uri: String!, \n",
+    "                        $text: String!, \n",
+    "                        $title: String!,\n",
+    "                        $threatDetails: [ProxyThreatDetailsInputType!]!,\n",
+    "                        $first:Int) \n",
+    "                        {\n",
+    "                          proxy {\n",
+    "                            createStoryboard(input:{\n",
+    "                                threatDetails: $threatDetails,\n",
+    "                                date: $date, \n",
+    "                                uri: $uri, \n",
+    "                                title: $title, \n",
+    "                                text: $text,\n",
+    "                                first:$first})\n",
+    "                            {success}\n",
+    "                          }\n",
+    "                        }\"\"\"\n",
+    "\n",
+    "            variables={\n",
+    "                'date': datetime.datetime.strptime(date, '%Y%m%d').strftime('%Y-%m-%d'),\n",
+    "                'uri': anchor,\n",
+    "                'title': threat_title,\n",
+    "                'text': threat_comment,\n",
+    "                'threatDetails': expanded_results['data']['proxy']['threat']['details'],\n",
+    "                'first':top_results\n",
+    "                }\n",
+    "\n",
+    "            response = GraphQLClient.request(mutation,variables)\n",
+    "            if not 'errors' in response:\n",
+    "                start_investigation()\n",
+    "                result_msg = \"Story board successfully created\"\n",
+    "            else:\n",
+    "                result_msg = response['errors'][0]['message'] \n",
     "        else:\n",
-    "            response = \"No data selected\" \n",
-    "       \n",
+    "            result_msg = \"No data selected\" \n",
+    "\n",
     "        susp_select.selected_label = susp_select.options[0]\n",
-    "        display(widgets.Box((widgets.HTML(value=response, width='100%'),)))\n",
+    "        display(widgets.Box((widgets.HTML(value=result_msg, width='100%'),)))\n",
     "    \n",
     "    tc_btn_save.on_click(save_threat_summary)\n",
-    "    \n",
-    "    \n",
-    "def incident_progression(anchor, anchor_hash):\n",
-    "    file_name = dpath + 'incident-progression-'+anchor_hash+'.json'\n",
-    "    jsonstring = json.dumps({'fulluri':anchor, 'requests':requests,'referer_for':referers.keys()})\n",
-    "    if not os.path.exists(file_name):\n",
-    "         with open(file_name, 'w') as f:\n",
-    "            f.write(jsonstring)        \n",
-    "    response = \"Incident progression successfuly created\"\n",
-    "    display(widgets.Box((widgets.HTML(value=response, width='100%'),)))\n",
-    "    \n",
-    "    \n",
-    "def timeline(anchor, anchor_hash): \n",
-    "    response = \"\"\n",
-    "    susp_ips = []\n",
-    "    if clientips:\n",
-    "        srtlist = sorted(list(clientips.items()), key=lambda x: x[1], reverse=True)\n",
-    "        for val in srtlist[:top_results]:\n",
-    "            susp_ips.append(val[0])  \n",
-    "            \n",
-    "    if anchor != \"\":\n",
-    "        sbdet_f = dpath + \"timeline-\"+anchor_hash+\".tsv\"\n",
-    "        if not os.path.isfile(sbdet_f) or (os.path.isfile(sbdet_f) and file_is_empty(sbdet_f)):  \n",
-    "            imp_query = \"\\\"SELECT concat(cast(p_date as string), ' ', cast(MIN(p_time) as string)) AS tstart,\\\n",
-    "            concat(cast(p_date as string), ' ', cast(MAX(p_time) as string)) AS tend, SUM(duration) AS duration,\\\n",
-    "            clientip, respcode from {0}.proxy WHERE fulluri='{1}' AND clientip IN ({5}) \\\n",
-    "            AND y='{2}' AND m='{3}' AND d='{4}' GROUP BY clientip, p_time, respcode, p_date ORDER BY clientip\\\n",
-    "            LIMIT {6}\\\"\"\n",
     "\n",
-    "            imp_query=imp_query.format(DBNAME,anchor,yy,mm,dd,(\"'\" + \"','\".join(susp_ips) + \"'\"), details_limit)  \n",
-    "            !impala-shell -i $IMPALA_DEM --quiet -q \"INVALIDATE METADATA\"\n",
-    "            !impala-shell -i $IMPALA_DEM --quiet --print_header -B --output_delimiter='\\t' -q $imp_query -o $sbdet_f\n",
-    "            clear_output()\n",
-    "            \n",
-    "            response = \"Timeline successfully saved\"\n",
-    "    else:\n",
-    "        response = \"Timeline couldn't be created\"\n",
     "    \n",
-    "    display(widgets.Box((widgets.HTML(value=response, width='100%'),)))\n",
-    "    data_loader()\n",
-    "    \n",
-    "def file_is_empty(path):\n",
-    "    return os.stat(path).st_size==0\n",
-    "\n",
     "def removeWidget(index):\n",
     "    js_command = \"$('.widget-area > .widget-subarea > .widget-box:eq({0})').remove();\".format(index)    \n",
     "    display(Javascript(js_command)) "

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/1904f2b4/spot-oa/oa/proxy/proxy_conf.json
----------------------------------------------------------------------
diff --git a/spot-oa/oa/proxy/proxy_conf.json b/spot-oa/oa/proxy/proxy_conf.json
index 5378939..2228e80 100644
--- a/spot-oa/oa/proxy/proxy_conf.json
+++ b/spot-oa/oa/proxy/proxy_conf.json
@@ -42,14 +42,12 @@
         , "serverip":15
         , "scbytes":16
         , "csbytes":17
-        , "fulluri":18
+        , "fulluri":18 
         , "word":19
-        , "score":20
+        , "ml_score":20
         , "uri_rep":21
-        , "uri_sev":22
-        , "respcode_name":23
-        , "network_context":24
-        , "hash":25
+        , "respcode_name":22
+        , "network_context":23 
     },
     "add_reputation":{
         "fulluri":18   

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/1904f2b4/spot-oa/oa/proxy/proxy_oa.py
----------------------------------------------------------------------
diff --git a/spot-oa/oa/proxy/proxy_oa.py b/spot-oa/oa/proxy/proxy_oa.py
index 1324f1a..02a9297 100644
--- a/spot-oa/oa/proxy/proxy_oa.py
+++ b/spot-oa/oa/proxy/proxy_oa.py
@@ -27,8 +27,12 @@ from utils import Util
 from components.data.data import Data
 from components.iana.iana_transform import IanaTransform
 from components.nc.network_context import NetworkContext
+
+import api.resources.hdfs_client as HDFSClient
+import api.resources.impala_engine as impala
 from multiprocessing import Process
 import pandas as pd 
+from impala.util import as_pandas
 
 import time
 import md5
@@ -78,13 +82,12 @@ class OA(object):
         ####################
 
         self._create_folder_structure()
+        self._clear_previous_executions()   
         self._add_ipynb()
         self._get_proxy_results()
-        self._add_reputation()
-        self._add_severity()
+        self._add_reputation() 
         self._add_iana()
-        self._add_network_context()
-        self._add_hash()
+        self._add_network_context() 
         self._create_proxy_scores_csv()
         self._get_oa_details()
         self._ingest_summary()
@@ -102,6 +105,27 @@ class OA(object):
         self._data_path,self._ingest_summary_path,self._ipynb_path = Util.create_oa_folders("proxy",self._date)
 
 
+    def _clear_previous_executions(self):
+        
+        self._logger.info("Cleaning data from previous executions for the day")       
+        yr = self._date[:4]
+        mn = self._date[4:6]
+        dy = self._date[6:]  
+        table_schema = []
+        HUSER = self._spot_conf.get('conf', 'HUSER').replace("'", "").replace('"', '')
+        table_schema=['suspicious', 'edge','threat_investigation', 'timeline', 'storyboard', 'summary' ] 
+
+        for path in table_schema:
+            HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}/d={5}".format(HUSER,self._table_name,path,yr,mn,dy),user="impala")
+        
+        HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}".format(HUSER,self._table_name,"summary",yr,mn),user="impala")
+        #removes Feedback file
+        HDFSClient.delete_folder("{0}/{1}/scored_results/{2}{3}{4}/feedback/ml_feedback.csv".format(HUSER,self._table_name,yr,mn,dy))
+        #removes json files from the storyboard
+        HDFSClient.delete_folder("{0}/{1}/oa/{2}/{3}/{4}/{5}".format(HUSER,self._table_name,"storyboard",yr,mn,dy))
+
+
+
     def _add_ipynb(self):
 
         if os.path.isdir(self._ipynb_path):
@@ -140,23 +164,23 @@ class OA(object):
             self._logger.error("There was an error getting ML results from HDFS")
             sys.exit(1)
 
-        # add headers.
-        self._logger.info("Adding headers")
-        self._proxy_scores_headers = [  str(key) for (key,value) in self._conf['proxy_score_fields'].items() ]
-
         self._proxy_scores = self._proxy_results[:]
 
 
     def _create_proxy_scores_csv(self):
-
-        proxy_scores_csv = "{0}/proxy_scores.tsv".format(self._data_path)
-        proxy_scores_final = self._proxy_scores[:];
-        proxy_scores_final.insert(0,self._proxy_scores_headers)
-        Util.create_csv_file(proxy_scores_csv,proxy_scores_final, self._results_delimiter)
-
-        # create bk file
-        proxy_scores_bu_csv = "{0}/proxy_scores_bu.tsv".format(self._data_path)
-        Util.create_csv_file(proxy_scores_bu_csv,proxy_scores_final, self._results_delimiter)
+        # get date parameters.
+        yr = self._date[:4]
+        mn = self._date[4:6]
+        dy = self._date[6:] 
+        value_string = ""
+ 
+        for row in self._proxy_scores:
+            value_string += str(tuple(Util.cast_val(item) for item in row)) + ","              
+    
+        load_into_impala = ("""
+             INSERT INTO {0}.proxy_scores partition(y={2}, m={3}, d={4}) VALUES {1}
+        """).format(self._db, value_string[:-1], yr, mn, dy) 
+        impala.execute_query(load_into_impala)
 
 
     def _add_reputation(self):
@@ -200,12 +224,6 @@ class OA(object):
             self._proxy_scores = [ conn + [""] for conn in self._proxy_scores  ]
 
 
-
-    def _add_severity(self):
-        # Add severity column
-        self._proxy_scores = [conn + [0] for conn in self._proxy_scores]
-
-
     def _add_iana(self):
 
         iana_conf_file = "{0}/components/iana/iana_config.json".format(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -225,96 +243,72 @@ class OA(object):
             nc_conf = json.loads(open(nc_conf_file).read())["NC"]
             proxy_nc = NetworkContext(nc_conf,self._logger)
             ip_dst_index = self._conf["proxy_score_fields"]["clientip"]
-            self._proxy_scores = [ conn + [proxy_nc.get_nc(conn[ip_dst_index])] for conn in self._proxy_scores ]
-
+            self._proxy_scores = [ conn + [proxy_nc.get_nc(conn[ip_dst_index])] for conn in self._proxy_scores ] 
         else:
             self._proxy_scores = [ conn + [""] for conn in self._proxy_scores ]
 
 
-    def _add_hash(self):
-        #A hash string is generated to be used as the file name for the edge files.
-        #These fields are used for the hash creation, so this combination of values is treated as
-        #a 'unique' connection
-        cip_index = self._conf["proxy_score_fields"]["clientip"]
-        uri_index = self._conf["proxy_score_fields"]["fulluri"]
-        tme_index = self._conf["proxy_score_fields"]["p_time"]
-
-        self._proxy_scores = [conn + [str( md5.new(str(conn[cip_index]) + str(conn[uri_index])).hexdigest() + str((conn[tme_index].split(":"))[0]) )] for conn in self._proxy_scores]
-
-
     def _get_oa_details(self):
 
         self._logger.info("Getting OA Proxy suspicious details")
         # start suspicious connects details process.
         p_sp = Process(target=self._get_suspicious_details)
         p_sp.start()
-
-        # p_sp.join()
+ 
 
     def _get_suspicious_details(self):
-        hash_list = []
+        uri_list = []
         iana_conf_file = "{0}/components/iana/iana_config.json".format(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
         if os.path.isfile(iana_conf_file):
             iana_config  = json.loads(open(iana_conf_file).read())
             proxy_iana = IanaTransform(iana_config["IANA"])
 
         for conn in self._proxy_scores:
-            conn_hash = conn[self._conf["proxy_score_fields"]["hash"]]
-            if conn_hash not in hash_list:
-                hash_list.append(conn_hash)
-                clientip = conn[self._conf["proxy_score_fields"]["clientip"]]
-                fulluri = conn[self._conf["proxy_score_fields"]["fulluri"]]
-                date=conn[self._conf["proxy_score_fields"]["p_date"]].split('-')
-                if len(date) == 3:
-                    year=date[0]
-                    month=date[1].zfill(2)
-                    day=date[2].zfill(2)
-                    hh=(conn[self._conf["proxy_score_fields"]["p_time"]].split(":"))[0]
-                    self._get_proxy_details(fulluri,clientip,conn_hash,year,month,day,hh,proxy_iana)
-
-
-    def _get_proxy_details(self,fulluri,clientip,conn_hash,year,month,day,hh,proxy_iana):
-
-        limit = 250
-        output_delimiter = '\t'
-        edge_file ="{0}/edge-{1}-{2}.tsv".format(self._data_path,clientip,conn_hash)
-        edge_tmp  ="{0}/edge-{1}-{2}.tmp".format(self._data_path,clientip,conn_hash)
-
-        if not os.path.isfile(edge_file):
-            proxy_qry = ("SELECT p_date, p_time, clientip, host, webcat, respcode, reqmethod, useragent, resconttype, \
-                referer, uriport, serverip, scbytes, csbytes, fulluri FROM {0}.{1} WHERE y=\'{2}\' AND m=\'{3}\' AND d=\'{4}\' AND \
-                h=\'{5}\' AND fulluri =\'{6}\' AND clientip = \'{7}\' LIMIT {8};").format(self._db,self._table_name, year,month,day,hh,fulluri,clientip,limit)
-
-            # execute query
-            self._engine.query(proxy_qry,edge_tmp,output_delimiter)
-            # add IANA to results.
+            clientip = conn[self._conf["proxy_score_fields"]["clientip"]]
+            fulluri = conn[self._conf["proxy_score_fields"]["fulluri"]]
+            date=conn[self._conf["proxy_score_fields"]["p_date"]].split('-')
+            if len(date) == 3:
+                year=date[0]
+                month=date[1].zfill(2)
+                day=date[2].zfill(2)
+                hh=(conn[self._conf["proxy_score_fields"]["p_time"]].split(":"))[0]
+                self._get_proxy_details(fulluri,clientip,year,month,day,hh,proxy_iana)
+
+
+
+    def _get_proxy_details(self,fulluri,clientip,year,month,day,hh,proxy_iana):
+        limit = 250 
+        value_string = ""
+        
+        query_to_load =("""
+            SELECT p_date, p_time, clientip, host, webcat, respcode, reqmethod, useragent, resconttype,
+            referer, uriport, serverip, scbytes, csbytes, fulluri, {5} as hh
+            FROM {0}.{1} WHERE y='{2}' AND m='{3}' AND d='{4}' AND
+            h='{5}' AND fulluri='{6}' AND clientip='{7}' LIMIT {8};
+        """).format(self._db,self._table_name, year,month,day,hh,fulluri,clientip,limit)
+
+        detail_results = impala.execute_query(query_to_load)
+ 
+        if proxy_iana:
+             # add IANA to results.
             self._logger.info("Adding IANA translation to details results")
-            with open(edge_tmp) as proxy_details_csv:
-                rows = csv.reader(proxy_details_csv, delimiter=output_delimiter,quotechar='"')
-                next(proxy_details_csv)
-                update_rows = [[conn[0]] + [conn[1]] + [conn[2]] + [conn[3]] + [conn[4]] + [proxy_iana.get_name(conn[5],"proxy_http_rcode") if proxy_iana else conn[5]] + [conn[6]] + [conn[7]] + [conn[8]] + [conn[9]] + [conn[10]] + [conn[11]] + [conn[12]] + [conn[13]] + [conn[14]] if len(conn) > 0 else [] for conn in rows]
-                update_rows = filter(None, update_rows)
-                header = ["p_date","p_time","clientip","host","webcat","respcode","reqmethod","useragent","resconttype","referer","uriport","serverip","scbytes","csbytes","fulluri"]
-                update_rows.insert(0,header)
-
-		# due an issue with the output of the query.
-		update_rows = [ [ w.replace('"','') for w in l ] for l in update_rows ]
-	
-
-            # create edge file.
-            self._logger.info("Creating edge file:{0}".format(edge_file))
-            with open(edge_file,'wb') as proxy_details_edge:
-                writer = csv.writer(proxy_details_edge, quoting=csv.QUOTE_NONE, delimiter=output_delimiter)
-                if update_rows:
-                    writer.writerows(update_rows)
-                else:
-                    shutil.copy(edge_tmp,edge_file)
-
-            try:
-                os.remove(edge_tmp)
-            except OSError:
-                pass
+ 
+            updated_rows = [conn + (proxy_iana.get_name(conn[5],"proxy_http_rcode"),) for conn in detail_results]
+            updated_rows = filter(None, updated_rows)            
+        else:
+            updated_rows = [conn + ("") for conn in detail_results ]
+ 
+        for row in updated_rows:
+            value_string += str(tuple(item for item in row)) + ","     
+        
+        if value_string != "":  
+            query_to_insert=("""
+                INSERT INTO {0}.proxy_edge PARTITION (y={1}, m={2}, d={3}) VALUES ({4});
+            """).format(self._db,year, month, day, value_string[:-1])
+
+            impala.execute_query(query_to_insert) 
 
+   
 
     def _ingest_summary(self): 
         # get date parameters.
@@ -328,44 +322,36 @@ class OA(object):
         result_rows = []        
         df_filtered =  pd.DataFrame()
 
-        ingest_summary_file = "{0}/is_{1}{2}.csv".format(self._ingest_summary_path,yr,mn)			
-        ingest_summary_tmp = "{0}.tmp".format(ingest_summary_file)
-
-        if os.path.isfile(ingest_summary_file):
-        	df = pd.read_csv(ingest_summary_file, delimiter=',')
-            #discards previous rows from the same date
-        	df_filtered = df[df['date'].str.contains("{0}-{1}-{2}".format(yr, mn, dy)) == False] 
-        else:
-        	df = pd.DataFrame()
-            
         # get ingest summary.
-        ingest_summary_qry = ("SELECT p_date, p_time, COUNT(*) as total "
-                                    " FROM {0}.{1}"
-                                    " WHERE y='{2}' AND m='{3}' AND d='{4}' "
-                                    " AND p_date IS NOT NULL AND p_time IS NOT NULL " 
-                                    " AND clientip IS NOT NULL AND p_time != '' "
-                                    " AND host IS NOT NULL AND fulluri IS NOT NULL "
-                                    " GROUP BY p_date, p_time;") 
-
-        ingest_summary_qry = ingest_summary_qry.format(self._db,self._table_name, yr, mn, dy)
-        results_file = "{0}/results_{1}.csv".format(self._ingest_summary_path,self._date)        
-        self._engine.query(ingest_summary_qry,output_file=results_file,delimiter=",")
+
+        query_to_load=("""
+                SELECT p_date, p_time, COUNT(*) as total
+                FROM {0}.{1} WHERE y='{2}' AND m='{3}' AND d='{4}'
+                AND p_date IS NOT NULL AND p_time IS NOT NULL
+                AND clientip IS NOT NULL AND p_time != ''
+                AND host IS NOT NULL AND fulluri IS NOT NULL
+                GROUP BY p_date, p_time;
+        """).format(self._db,self._table_name, yr, mn, dy)
         
-        if os.path.isfile(results_file):
-            df_results = pd.read_csv(results_file, delimiter=',')  
-            
+        results = impala.execute_query(query_to_load) 
+ 
+        if results:
+            df_results = as_pandas(results)
             #Forms a new dataframe splitting the minutes from the time column/
             df_new = pd.DataFrame([["{0} {1}:{2}".format(val['p_date'], val['p_time'].split(":")[0].zfill(2), val['p_time'].split(":")[1].zfill(2)), int(val['total']) if not math.isnan(val['total']) else 0 ] for key,val in df_results.iterrows()],columns = ingest_summary_cols)
-            
+            value_string = ''
             #Groups the data by minute 
             sf = df_new.groupby(by=['date'])['total'].sum()
             df_per_min = pd.DataFrame({'date':sf.index, 'total':sf.values})
             
-            df_final = df_filtered.append(df_per_min, ignore_index=True)
-            df_final.to_csv(ingest_summary_tmp,sep=',', index=False)
-
-            os.remove(results_file)
-            os.rename(ingest_summary_tmp,ingest_summary_file)
+            df_final = df_filtered.append(df_per_min, ignore_index=True).to_records(False,False) 
+            if len(df_final) > 0:
+                query_to_insert=("""
+                    INSERT INTO {0}.proxy_ingest_summary PARTITION (y={1}, m={2}) VALUES {3};
+                """).format(self._db, yr, mn, tuple(df_final))
+
+                impala.execute_query(query_to_insert) 
+                
         else:
             self._logger.info("No data found for the ingest summary")
         
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/1904f2b4/spot-oa/oa/utils.py
----------------------------------------------------------------------
diff --git a/spot-oa/oa/utils.py b/spot-oa/oa/utils.py
index 99b006e..2bed10e 100644
--- a/spot-oa/oa/utils.py
+++ b/spot-oa/oa/utils.py
@@ -121,6 +121,18 @@ class Util(object):
 			writer.writerows(content)
 
 
+	@classmethod
+    	def cast_val(self,value):
+       	    try: 
+            	val = int(value) 
+            except:
+            	try:
+                    val = float(value) 
+            	except:
+                    val = str(value) 
+            return val    
+
+
 class SecHead(object):
     def __init__(self, fp):
         self.fp = fp


[29/50] [abbrv] incubator-spot git commit: Added pipeline on ingest summary link to show pipelines across pages

Posted by ev...@apache.org.
Added pipeline on ingest summary link to show pipelines across pages


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/5af8419f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/5af8419f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/5af8419f

Branch: refs/heads/SPOT-35_graphql_api
Commit: 5af8419f948c2b5ba4880d0e63c353fdf7e633fa
Parents: 70db6ee
Author: Montes Velazquez <jo...@intel.com>
Authored: Thu Mar 9 18:31:21 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:48 2017 -0700

----------------------------------------------------------------------
 spot-oa/ui/dns/storyboard.html             | 2 +-
 spot-oa/ui/dns/suspicious.html             | 2 +-
 spot-oa/ui/dns/threat-investigation.html   | 2 +-
 spot-oa/ui/flow/storyboard.html            | 2 +-
 spot-oa/ui/flow/suspicious.html            | 2 +-
 spot-oa/ui/flow/threat-investigation.html  | 2 +-
 spot-oa/ui/js/actions/SpotActions.js       | 6 ++++++
 spot-oa/ui/js/constants/SpotConstants.js   | 1 +
 spot-oa/ui/js/ingest-summary.js            | 9 ++++++---
 spot-oa/ui/js/stores/IngestSummaryStore.js | 3 +++
 spot-oa/ui/proxy/storyboard.html           | 2 +-
 spot-oa/ui/proxy/suspicious.html           | 2 +-
 spot-oa/ui/proxy/threat-investigation.html | 2 +-
 13 files changed, 25 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/5af8419f/spot-oa/ui/dns/storyboard.html
----------------------------------------------------------------------
diff --git a/spot-oa/ui/dns/storyboard.html b/spot-oa/ui/dns/storyboard.html
index c6a2567..251a289 100755
--- a/spot-oa/ui/dns/storyboard.html
+++ b/spot-oa/ui/dns/storyboard.html
@@ -123,7 +123,7 @@
                         </ul>
                     </li>
                     <li>
-                        <a data-href="../ingest-summary.html#end-date=${date}">Ingest Summary</a>
+                        <a data-href="../ingest-summary.html#end-date=${date}|pipeline=dns">Ingest Summary</a>
                     </li>
                 </ul>
             </div>

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/5af8419f/spot-oa/ui/dns/suspicious.html
----------------------------------------------------------------------
diff --git a/spot-oa/ui/dns/suspicious.html b/spot-oa/ui/dns/suspicious.html
index 31a82c3..54b1edf 100755
--- a/spot-oa/ui/dns/suspicious.html
+++ b/spot-oa/ui/dns/suspicious.html
@@ -133,7 +133,7 @@
                         </ul>
                     </li>
                     <li>
-                        <a data-href="../ingest-summary.html#end-date=${date}">Ingest Summary</a>
+                        <a data-href="../ingest-summary.html#end-date=${date}|pipeline=dns">Ingest Summary</a>
                     </li>
                 </ul>
             </div>

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/5af8419f/spot-oa/ui/dns/threat-investigation.html
----------------------------------------------------------------------
diff --git a/spot-oa/ui/dns/threat-investigation.html b/spot-oa/ui/dns/threat-investigation.html
index f7f66d0..804a988 100755
--- a/spot-oa/ui/dns/threat-investigation.html
+++ b/spot-oa/ui/dns/threat-investigation.html
@@ -121,7 +121,7 @@
                         </ul>
                     </li>
                     <li>
-                        <a data-href="../ingest-summary.html#end-date=${date}">Ingest Summary</a>
+                        <a data-href="../ingest-summary.html#end-date=${date}|pipeline=dns">Ingest Summary</a>
                     </li>
                 </ul>
             </div>

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/5af8419f/spot-oa/ui/flow/storyboard.html
----------------------------------------------------------------------
diff --git a/spot-oa/ui/flow/storyboard.html b/spot-oa/ui/flow/storyboard.html
index 0ebf859..48121c2 100755
--- a/spot-oa/ui/flow/storyboard.html
+++ b/spot-oa/ui/flow/storyboard.html
@@ -123,7 +123,7 @@
                         </ul>
                     </li>
                     <li>
-                        <a data-href="../ingest-summary.html#end-date=${date}">Ingest Summary</a>
+                        <a data-href="../ingest-summary.html#end-date=${date}|pipeline=flow">Ingest Summary</a>
                     </li>
                 </ul>
             </div>

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/5af8419f/spot-oa/ui/flow/suspicious.html
----------------------------------------------------------------------
diff --git a/spot-oa/ui/flow/suspicious.html b/spot-oa/ui/flow/suspicious.html
index f475f4b..b591445 100755
--- a/spot-oa/ui/flow/suspicious.html
+++ b/spot-oa/ui/flow/suspicious.html
@@ -127,7 +127,7 @@
                         </ul>
                     </li>
                     <li>
-                        <a data-href="../ingest-summary.html#end-date=${date}">Ingest Summary</a>
+                        <a data-href="../ingest-summary.html#end-date=${date}|pipeline=flow">Ingest Summary</a>
                     </li>
                 </ul>
             </div>

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/5af8419f/spot-oa/ui/flow/threat-investigation.html
----------------------------------------------------------------------
diff --git a/spot-oa/ui/flow/threat-investigation.html b/spot-oa/ui/flow/threat-investigation.html
index 97d70f3..0b80392 100755
--- a/spot-oa/ui/flow/threat-investigation.html
+++ b/spot-oa/ui/flow/threat-investigation.html
@@ -121,7 +121,7 @@
                         </ul>
                     </li>
                     <li>
-                        <a data-href="../ingest-summary.html#end-date=${date}">Ingest Summary</a>
+                        <a data-href="../ingest-summary.html#end-date=${date}|pipeline=flow">Ingest Summary</a>
                     </li>
                 </ul>
             </div>

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/5af8419f/spot-oa/ui/js/actions/SpotActions.js
----------------------------------------------------------------------
diff --git a/spot-oa/ui/js/actions/SpotActions.js b/spot-oa/ui/js/actions/SpotActions.js
index a70f2a9..c32201f 100755
--- a/spot-oa/ui/js/actions/SpotActions.js
+++ b/spot-oa/ui/js/actions/SpotActions.js
@@ -35,6 +35,12 @@ var SpotActions = {
             name: name
         });
     },
+    setPipeline(pipeline) {
+       SpotDispatcher.dispatch({
+           actionType: SpotConstants.UPDATE_PIPELINE,
+           pipeline
+       });
+   },
     expandPanel: function (panel) {
         SpotDispatcher.dispatch({
             actionType: SpotConstants.EXPAND_PANEL,

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/5af8419f/spot-oa/ui/js/constants/SpotConstants.js
----------------------------------------------------------------------
diff --git a/spot-oa/ui/js/constants/SpotConstants.js b/spot-oa/ui/js/constants/SpotConstants.js
index 204840f..90b03cc 100755
--- a/spot-oa/ui/js/constants/SpotConstants.js
+++ b/spot-oa/ui/js/constants/SpotConstants.js
@@ -4,6 +4,7 @@ const SpotConstants = {
   PIPELINE_NETFLOW: 'flow',
   PIPELINE_DNS: 'dns',
   PIPELINE_PROXY: 'proxy',
+  UPDATE_PIPELINE: 'UPDATE_PIPELINE',
   // Search Actions
   UPDATE_FILTER: 'UPDATE_FILTER',
   UPDATE_DATE: 'UPDATE_DATE',

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/5af8419f/spot-oa/ui/js/ingest-summary.js
----------------------------------------------------------------------
diff --git a/spot-oa/ui/js/ingest-summary.js b/spot-oa/ui/js/ingest-summary.js
index f907477..f374860 100755
--- a/spot-oa/ui/js/ingest-summary.js
+++ b/spot-oa/ui/js/ingest-summary.js
@@ -44,12 +44,12 @@ if (endDate < startDate)
   startDate = endDate;
   endDate = today;
 }
-
 const PIPELINES = IngestSummaryStore.PIPELINES;
-const DEFAULT_PIPELINE = Object.keys(PIPELINES)[0];
+//check if pipeline is on URL, if not the first element of PIPELINES is taken
+const DEFAULT_PIPELINE =  SpotUtils.getUrlParam('pipeline') || Object.keys(PIPELINES)[0];
 
 const loadPipeline = function loadPipeline(pipeline) {
-    IngestSummaryStore.setPipeline(pipeline);
+    SpotActions.setPipeline(pipeline);
     InSumActions.reloadSummary();
 }
 
@@ -106,9 +106,12 @@ ReactDOM.render(
   document.getElementById('spot-content-wrapper')
 );
 
+
+
 // Set period
 SpotActions.setDate(startDate, SpotConstants.START_DATE);
 SpotActions.setDate(endDate, SpotConstants.END_DATE);
+SpotActions.setPipeline(DEFAULT_PIPELINE);
 
 // Load data
 loadPipeline(DEFAULT_PIPELINE);

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/5af8419f/spot-oa/ui/js/stores/IngestSummaryStore.js
----------------------------------------------------------------------
diff --git a/spot-oa/ui/js/stores/IngestSummaryStore.js b/spot-oa/ui/js/stores/IngestSummaryStore.js
index 83e713f..955c451 100755
--- a/spot-oa/ui/js/stores/IngestSummaryStore.js
+++ b/spot-oa/ui/js/stores/IngestSummaryStore.js
@@ -92,6 +92,9 @@ SpotDispatcher.register(function (action) {
         case SpotConstants.RELOAD_INGEST_SUMMARY:
             iss.sendQuery();
             break;
+        case SpotConstants.UPDATE_PIPELINE:
+            iss.setPipeline(action.pipeline)
+        break;
     }
 });
 

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/5af8419f/spot-oa/ui/proxy/storyboard.html
----------------------------------------------------------------------
diff --git a/spot-oa/ui/proxy/storyboard.html b/spot-oa/ui/proxy/storyboard.html
index cabd387..8d23ebb 100755
--- a/spot-oa/ui/proxy/storyboard.html
+++ b/spot-oa/ui/proxy/storyboard.html
@@ -201,7 +201,7 @@
                         </ul>
                     </li>
                     <li>
-                        <a data-href="../ingest-summary.html#end-date=${date}">Ingest Summary</a>
+                        <a data-href="../ingest-summary.html#end-date=${date}|pipeline=proxy">Ingest Summary</a>
                     </li>
                 </ul>
             </div>

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/5af8419f/spot-oa/ui/proxy/suspicious.html
----------------------------------------------------------------------
diff --git a/spot-oa/ui/proxy/suspicious.html b/spot-oa/ui/proxy/suspicious.html
index 1d77322..0216982 100755
--- a/spot-oa/ui/proxy/suspicious.html
+++ b/spot-oa/ui/proxy/suspicious.html
@@ -191,7 +191,7 @@
                         </ul>
                     </li>
                     <li>
-                        <a data-href="../ingest-summary.html#end-date=${date}">Ingest Summary</a>
+                        <a data-href="../ingest-summary.html#end-date=${date}|pipeline=proxy">Ingest Summary</a>
                     </li>
                 </ul>
             </div>

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/5af8419f/spot-oa/ui/proxy/threat-investigation.html
----------------------------------------------------------------------
diff --git a/spot-oa/ui/proxy/threat-investigation.html b/spot-oa/ui/proxy/threat-investigation.html
index 98c3ca4..57b34eb 100755
--- a/spot-oa/ui/proxy/threat-investigation.html
+++ b/spot-oa/ui/proxy/threat-investigation.html
@@ -120,7 +120,7 @@
                         </ul>
                     </li>
                     <li>
-                        <a data-href="../ingest-summary.html#end-date=${date}">Ingest Summary</a>
+                        <a data-href="../ingest-summary.html#end-date=${date}|pipeline=proxy">Ingest Summary</a>
                     </li>
                 </ul>
             </div>


[25/50] [abbrv] incubator-spot git commit: Update README.md

Posted by ev...@apache.org.
Update README.md

Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/a2026e12
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/a2026e12
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/a2026e12

Branch: refs/heads/SPOT-35_graphql_api
Commit: a2026e12b6350a5c8dceb358fae2e484aa4ad33f
Parents: 56d6e88
Author: Everardo Lopez Sandoval <ev...@intel.com>
Authored: Thu Mar 9 15:32:26 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:48 2017 -0700

----------------------------------------------------------------------
 spot-oa/api/resources/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/a2026e12/spot-oa/api/resources/README.md
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/README.md b/spot-oa/api/resources/README.md
index 1a9587b..ce8b311 100644
--- a/spot-oa/api/resources/README.md
+++ b/spot-oa/api/resources/README.md
@@ -37,7 +37,7 @@ API Resources use [WebHDFS REST API] (https://hadoop.apache.org/docs/r1.0.4/webh
 * impyla
 * hdfs
 
-**NOTE:** all these requirements are already part of requirements.txt file, you don't need to install the python prerequisites manually.[OA install](../../../oa/INSTALL.md)
+**NOTE:** all these requirements are already part of requirements.txt file, you don't need to install the python prerequisites manually. For more information go to [install python requirements](../../README.md)
 
 #### Hadoop:
 


[13/50] [abbrv] incubator-spot git commit: Adding proxy ingest summary method

Posted by ev...@apache.org.
Adding proxy ingest summary method


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/8f151f54
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/8f151f54
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/8f151f54

Branch: refs/heads/SPOT-35_graphql_api
Commit: 8f151f544a706707f0c9acc0b934235fd8ff38b7
Parents: 6c1f300
Author: Everardo Lopez Sandoval (Intel) <el...@elopezsa-mac02.zpn.intel.com>
Authored: Mon Mar 6 10:56:02 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:47 2017 -0700

----------------------------------------------------------------------
 spot-oa/api/resources/dns.py   |  8 +++-----
 spot-oa/api/resources/proxy.py | 18 ++++++++++++++++++
 2 files changed, 21 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/8f151f54/spot-oa/api/resources/dns.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/dns.py b/spot-oa/api/resources/dns.py
index ba26ae9..8924470 100644
--- a/spot-oa/api/resources/dns.py
+++ b/spot-oa/api/resources/dns.py
@@ -322,9 +322,7 @@ def  save_comments(ip,query,title,text,date):
 
     return True
 """
---------------------------------------------------------------------------
 Return a list(dict) with all the data ingested during the time frame provided.
---------------------------------------------------------------------------
 """
 def ingest_summary(start_date,end_date):
 
@@ -334,9 +332,9 @@ def ingest_summary(start_date,end_date):
                     tdate,total
                 FROM {0}.dns_ingest_summary
                 WHERE
-                    ( y <= {1} and y >= {2}) AND
-                    ( m <= {3} and m >= {4})
+                    ( y >= {1} and y <= {2}) AND
+                    ( m >= {3} and m <= {4})
                 """)\
-                .format(start_date.year,end_date.year,start_date.month,end_date.month)
+                .format(db,start_date.year,end_date.year,start_date.month,end_date.month)
 
     return ImpalaEngine.execute_query_as_list(is_query)

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/8f151f54/spot-oa/api/resources/proxy.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/proxy.py b/spot-oa/api/resources/proxy.py
index a2a8e46..6f86aae 100644
--- a/spot-oa/api/resources/proxy.py
+++ b/spot-oa/api/resources/proxy.py
@@ -381,3 +381,21 @@ def incident_progression(date,uri):
         .format(hdfs_path,file_name)))
     else:
         return {}
+
+"""
+Return a list(dict) with all the data ingested during the time frame provided.
+"""
+def ingest_summary(start_date,end_date):
+
+    db = Configuration.db()
+    is_query = ("""
+                SELECT
+                    tdate,total
+                FROM {0}.proxy_ingest_summary
+                WHERE
+                    ( y >= {1} and y <= {2}) AND
+                    ( m >= {3} and m <= {4})
+                """)\
+                .format(db,start_date.year,end_date.year,start_date.month,end_date.month)
+
+    return ImpalaEngine.execute_query_as_list(is_query)


[45/50] [abbrv] incubator-spot git commit: Fixed json error in proxy edge notebook

Posted by ev...@apache.org.
Fixed json error in proxy edge notebook


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/dcb4224b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/dcb4224b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/dcb4224b

Branch: refs/heads/SPOT-35_graphql_api
Commit: dcb4224b1cae8330286ccc57fb554ece26347e66
Parents: 4ff1937
Author: LedaLima <le...@apache.org>
Authored: Fri Mar 10 17:56:34 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:51:23 2017 -0700

----------------------------------------------------------------------
 spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/dcb4224b/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
----------------------------------------------------------------------
diff --git a/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb b/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
index 0fad18f..b20c65e 100644
--- a/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
+++ b/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
@@ -102,7 +102,7 @@
     "    \n",
     "    scored = []\n",
     "    for item in score_values:\n",
-    "	     scored.append(urllib.quote_plus(item[0]))\n",
+    "        scored.append(urllib.quote_plus(item[0]))\n",
     "        \n",
     "    if not 'errors' in response: \n",
     "        for row in response['data']['proxy']['suspicious']:\n",


[48/50] [abbrv] incubator-spot git commit: Flow Fixed bug that didn't remove values from lists after scoring

Posted by ev...@apache.org.
Flow Fixed bug that didn't remove values from lists after scoring


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/4ff19372
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/4ff19372
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/4ff19372

Branch: refs/heads/SPOT-35_graphql_api
Commit: 4ff19372a325f82021a378e16b20450a27d45dbe
Parents: 189acce
Author: LedaLima <le...@apache.org>
Authored: Fri Mar 10 17:52:25 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:51:23 2017 -0700

----------------------------------------------------------------------
 .../Edge_Investigation_master.ipynb             | 296 ++++++++++---------
 1 file changed, 163 insertions(+), 133 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/4ff19372/spot-oa/oa/flow/ipynb_templates/Edge_Investigation_master.ipynb
----------------------------------------------------------------------
diff --git a/spot-oa/oa/flow/ipynb_templates/Edge_Investigation_master.ipynb b/spot-oa/oa/flow/ipynb_templates/Edge_Investigation_master.ipynb
index ab41963..6720de1 100644
--- a/spot-oa/oa/flow/ipynb_templates/Edge_Investigation_master.ipynb
+++ b/spot-oa/oa/flow/ipynb_templates/Edge_Investigation_master.ipynb
@@ -38,9 +38,7 @@
     "path = os.getcwd().split(\"/\") \n",
     "date = path[len(path)-1]   \n",
     "dsource = path[len(path)-2]  \n",
-    "dpath = '/'.join(['data' if var == 'ipynb' else var for var in path]) + '/'\n",
     "cpath = '/'.join(['context' if var == 'ipynb' else var for var in path][:len(path)-2]) + '/'\n",
-    "opath = '/'.join(['oa' if var == 'ipynb' else var for var in path][:len(path)-1]) + '/'  \n",
     "\n",
     "coff = 250;\n",
     "nwloc = cpath + 'networkcontext.csv' \n",
@@ -49,7 +47,7 @@
     "sportdict,sportlist = {},[]\n",
     "dportdict,dportlist = {},[]\n",
     "global svals\n",
-    "svals = []"
+    "svals = [] "
    ]
   },
   {
@@ -69,21 +67,76 @@
     "        ('select', 'margin', 0)\n",
     "    )\n",
     "\n",
+    "# Source IP box\n",
+    "scrIpLalbel = widgets.HTML(value=\"Source IP:\", height='10%', width='100%')\n",
+    "# srcselect = widgets.Select(options=srclist, height='90%')\n",
+    "srcselect = widgets.Select(height='100%')\n",
+    "apply_css_to_select(srcselect)\n",
+    "srcIpBox = widgets.Box(width='25%', height='100%')\n",
+    "srcIpBox.children = (scrIpLalbel, srcselect)\n",
     "\n",
-    "#load LDA scores#update lda doc with scores - either one edge,# or ALL that contain x IP and y port\n",
-    "def displaythis() :\n",
+    "# Destination IP box\n",
+    "dstIpLalbel = widgets.HTML(value=\"Dest IP:\", height='10%', width='100%')\n",
+    "dstselect = widgets.Select(height='100%')\n",
+    "# dstselect = widgets.Select(options=dstlist, height='90%')\n",
+    "apply_css_to_select(dstselect)\n",
+    "dstIpBox = widgets.Box(width='25%', height='100%')\n",
+    "dstIpBox.children = (dstIpLalbel, dstselect)\n",
+    "\n",
+    "# Source Port box\n",
+    "scrPortLalbel = widgets.HTML(value=\"Src Port:\", height='10%', width='100%')\n",
+    "sportselect = widgets.Select(height='100%')\n",
+    "# sportselect = widgets.Select(options=sportlist, height='90%')\n",
+    "apply_css_to_select(sportselect)\n",
+    "srcPortBox = widgets.Box(width='20%', height='100%')\n",
+    "srcPortBox.children = (scrPortLalbel, sportselect)\n",
+    "\n",
+    "# Destionation Port box\n",
+    "dstPortLalbel = widgets.HTML(value=\"Dst Port:\", height='10%', width='100%')\n",
+    "dportselect = widgets.Select(height='100%')\n",
+    "# dportselect = widgets.Select(options=dportlist,height='90%')\n",
+    "apply_css_to_select(dportselect)\n",
+    "dstPortBox = widgets.Box(width='20%', height='100%')\n",
+    "dstPortBox.children = (dstPortLalbel, dportselect)\n",
+    "\n",
+    "# Quick Search and Actions Box\n",
+    "emptyLalbel = widgets.HTML(value=\"&nbsp;\")\n",
+    "srctext = widgets.Text(value='', width='100%', placeholder='Quick IP scoring')\n",
+    "srctext._css = (\n",
+    "    (None, 'width', '100%'),\n",
+    ")\n",
+    "ratingbut = widgets.RadioButtons(description='Rating:',options=['1', '2', '3'], width='100%')\n",
+    "assignbut = widgets.Button(description='Score', width='45%')\n",
+    "assignbut.button_style = 'primary'\n",
+    "updatebut = widgets.Button(description='Save', width='45%')\n",
+    "updatebut.button_style = 'primary'\n",
+    "updatebut._css = (\n",
+    "    (None, 'margin-left', '10%'),\n",
+    ")\n",
+    "actionsBox = widgets.Box(width='20%', height='100%')\n",
+    "actionsBox.children = (emptyLalbel, srctext,ratingbut,assignbut,updatebut)\n",
+    "\n",
+    "# Container Box\n",
+    "bigBox = widgets.HBox(width='90%', height=250)\n",
+    "bigBox.children = (srcIpBox, dstIpBox, srcPortBox, dstPortBox, actionsBox)\n",
+    "\n",
+    "\n",
+    "def fill_list(list_control,source):\n",
+    "    options_list = ['- Select -'] \n",
+    "    options_list.extend([s for s in source])\n",
+    "    list_control.options = options_list\n",
+    "\n",
+    "\n",
+    "def data_loader():\n",
     "    # build dict of IP addresses\n",
     "    #sev,score, tstart,srcIP,dstIP,sport,dport,proto,ipkt,ibyt\n",
-    "    display(Javascript(\"$('.widget-area > .widget-subarea > *').remove();\"))\n",
     "    srcdict,srclist = {},[]\n",
     "    dstdict,dstlist = {},[]\n",
     "    sportdict,sportlist = {},[]\n",
-    "    dportdict,dportlist = {},[]\n",
-    "    srclist.append('- Select -')\n",
-    "    dstlist.append('- Select -')\n",
-    "    sportlist.append('- Select -')\n",
-    "    dportlist.append('- Select -')\n", 
-    "    \n",
+    "    dportdict,dportlist = {},[] \n",
+    "    global svals \n",
+    "    srcips,srcports = [],[]\n",
+    "    dstips,dstports = [],[]\n",
     "    response = GraphQLClient.request(\n",
     "        query=\"\"\"query($date:SpotDateType!) {\n",
     "                flow{\n",
@@ -99,138 +152,44 @@
     "            'date': datetime.datetime.strptime(date, '%Y%m%d').strftime('%Y-%m-%d')\n",
     "        }\n",
     "    )\n",
-    "\n",
+    "    \n",
+    "    \n",
+    "    for item in svals:\n",
+    "        srcips.append(item[0])\n",
+    "        srcports.append(item[2]) \n",
+    "        dstips.append(item[1]) \n",
+    "        dstports.append(item[3])\n",
+    "        \n",
     "    rowct = 1\n",
+    "     \n",
+    "    \n",
     "    if not 'errors' in response:\n",
     "        for row in response['data']['flow']['suspicious']:\n",
-    "            if row['srcIP'] not in srcdict:\n",
+    "            if row['srcIP'] not in srcdict and row['srcIP'] not in srcips:\n",
     "                srclist.append(row['srcIP'])\n",
     "                srcdict[row['srcIP']] = struct.unpack(\"!L\", socket.inet_aton(row['srcIP']))[0]\n",
-    "            if row['dstIP'] not in dstdict:\n",
+    "            if row['dstIP'] not in dstdict and row['dstIP'] not in dstips:\n",
     "                dstlist.append(row['dstIP'])\n",
     "                dstdict[row['dstIP']] = struct.unpack(\"!L\", socket.inet_aton(row['dstIP']))[0]\n",
-    "            if row['sport'] not in sportdict:\n",
+    "            if row['sport'] not in sportdict and str(row['sport']) not in srcports:\n",
     "                sportlist.append(str(row['sport']))\n",
     "                sportdict[row['sport']] = row['sport']\n",
-    "            if row['dport'] not in dportdict:\n",
+    "            if row['dport'] not in dportdict and str(row['dport']) not in dstports:\n",
     "                dportlist.append(str(row['dport']))\n",
-    "                dportdict[row['dport']] = row['dport']\n",
+    "                dportdict[row['dport']] = row['dport']                \n",
     "            if rowct == coff:\n",
     "                break;\n",
     "            rowct += 1\n",
-    "     \n",
-    "    \n",
-    "    # Source IP box\n",
-    "    scrIpLalbel = widgets.HTML(value=\"Source IP:\", height='10%', width='100%')\n",
-    "    srcselect = widgets.Select(options=srclist, height='90%')\n",
-    "    apply_css_to_select(srcselect)\n",
-    "    srcIpBox = widgets.Box(width='25%', height='100%')\n",
-    "    srcIpBox.children = (scrIpLalbel, srcselect)\n",
-    "    \n",
-    "    # Destination IP box\n",
-    "    dstIpLalbel = widgets.HTML(value=\"Dest IP:\", height='10%', width='100%')\n",
-    "    dstselect = widgets.Select(options=dstlist, height='90%')\n",
-    "    apply_css_to_select(dstselect)\n",
-    "    dstIpBox = widgets.Box(width='25%', height='100%')\n",
-    "    dstIpBox.children = (dstIpLalbel, dstselect)\n",
-    "    \n",
-    "    # Source Port box\n",
-    "    scrPortLalbel = widgets.HTML(value=\"Src Port:\", height='10%', width='100%')\n",
-    "    sportselect = widgets.Select(options=sportlist, height='90%')\n",
-    "    apply_css_to_select(sportselect)\n",
-    "    srcPortBox = widgets.Box(width='20%', height='100%')\n",
-    "    srcPortBox.children = (scrPortLalbel, sportselect)\n",
-    "    \n",
-    "    # Destionation Port box\n",
-    "    dstPortLalbel = widgets.HTML(value=\"Dst Port:\", height='10%', width='100%')\n",
-    "    dportselect = widgets.Select(options=dportlist,height='90%')\n",
-    "    apply_css_to_select(dportselect)\n",
-    "    dstPortBox = widgets.Box(width='20%', height='100%')\n",
-    "    dstPortBox.children = (dstPortLalbel, dportselect)\n",
-    "    \n",
-    "    # Quick Search and Actions Box\n",
-    "    emptyLalbel = widgets.HTML(value=\"&nbsp;\")\n",
-    "    srctext = widgets.Text(value='', width='100%', placeholder='Quick IP scoring')\n",
-    "    srctext._css = (\n",
-    "        (None, 'width', '100%'),\n",
-    "    )\n",
-    "    ratingbut = widgets.RadioButtons(description='Rating:',options=['1', '2', '3'], width='100%')\n",
-    "    assignbut = widgets.Button(description='Score', width='45%')\n",
-    "    assignbut.button_style = 'primary'\n",
-    "    updatebut = widgets.Button(description='Save', width='45%')\n",
-    "    updatebut.button_style = 'primary'\n",
-    "    updatebut._css = (\n",
-    "        (None, 'margin-left', '10%'),\n",
-    "    )\n",
-    "    actionsBox = widgets.Box(width='20%', height='100%')\n",
-    "    actionsBox.children = (emptyLalbel, srctext,ratingbut,assignbut,updatebut)\n",
-    "    \n",
-    "    # Container Box\n",
-    "    bigBox = widgets.HBox(width='90%', height=250)\n",
-    "    bigBox.children = (srcIpBox, dstIpBox, srcPortBox, dstPortBox, actionsBox)\n",
-    "    \n",
-    "    display(bigBox)\n",
-    "    \n",
-    "    def update_sconnects(b):\n",
-    "        clear_output()\n",
-    "        #Gets input values\n",
-    "        global svals\n",
-    "        if srctext.value != '':\n",
-    "            svals.append([srctext.value,dstselect.value,sportselect.value,dportselect.value, ratingbut.value])\n",
-    "            svals.append([srcselect.value,srctext.value,sportselect.value,dportselect.value, ratingbut.value])\n",
-    "        else:\n",
-    "            svals.append([srcselect.value,dstselect.value,sportselect.value,dportselect.value, ratingbut.value])\n",
-    "     \n",
-    "        if srcselect.value != \"- Select -\":\n",
-    "            display(Javascript(\"$(\\\"option[data-value='\" + srcselect.value +\"']\\\").remove();\"))\n",
-    "        if dstselect.value != \"- Select -\":\n",
-    "            display(Javascript(\"$(\\\"option[data-value='\" + srcselect.value +\"']\\\").remove();\"))\n",
-    "        if sportselect.value != \"- Select -\":\n",
-    "            display(Javascript(\"$(\\\"option[data-value='\" + srcselect.value +\"']\\\").remove();\"))\n",
-    "        if dportselect.value != \"- Select -\":\n",
-    "            display(Javascript(\"$(\\\"option[data-value='\" + dportselect.value +\"']\\\").remove();\"))\n",
-    "\n",
-    "        \n",
     "            \n",
-    "    def savesort(b):\n",
-    "        global svals\n",
-    "        clear_output()\n",
-    "        variables = []\n",
-    "        mutation=\"\"\"mutation($input:[NetflowScoreInputType!]!)\n",
-    "                {\n",
-    "                  flow{\n",
-    "                    score(input:$input)\n",
-    "                        {success}\n",
-    "                  }\n",
-    "                }\"\"\"\n",
-    "        \n",
-    "        for row in svals:\n",
-    "            variables.append({\n",
-    "                'date': datetime.datetime.strptime(date, '%Y%m%d').strftime('%Y-%m-%d'),\n",
-    "                'score': row[4],\n",
-    "                'srcIp': row[0] if row[0] != '- Select -' else None,\n",
-    "                'dstIp': row[1] if row[1] != '- Select -' else None,\n",
-    "                'srcPort': row[2] if row[2] != '- Select -' else None,\n",
-    "                'dstPort': row[3]  if row[3] != '- Select -' else None\n",
-    "                })\n",
-    "        \n",
-    "        var = {'input':variables}\n",
-    "        response = GraphQLClient.request(mutation,var)\n",
-    "         \n",
-    "        svals = []\n",
-    "        if not 'errors' in response :\n",
-    "            print \"Suspicious connects successfully updated\"        \n",
-    "            display(Javascript('reloadParentData();')) \n",
-    "            bigBox.close()\n",
-    "            # Rebuild widgets form\n",
-    "            displaythis() \n",
-    "        else:\n",
-    "            print \"An error ocurred whith the scoring process\"\n",
-    "            print response['errors'][0]['message']\n",
-    "        \n",
-    "    assignbut.on_click(update_sconnects)\n",
-    "    updatebut.on_click(savesort)\n",
-    "    "
+    "    fill_list(srcselect,srclist)\n",
+    "    fill_list(dstselect,dstlist)\n",
+    "    fill_list(sportselect,sportlist)\n",
+    "    fill_list(dportselect,dportlist)\n",
+    "            \n",
+    "    srcselect.value = \"- Select -\"\n",
+    "    dstselect.value = \"- Select -\"    \n",
+    "    sportselect.value = \"- Select -\"    \n",
+    "    dportselect.value = \"- Select -\""
    ]
   },
   {
@@ -241,7 +200,9 @@
    },
    "outputs": [],
    "source": [
-    "displaythis()"
+    "display(Javascript(\"$('.widget-area > .widget-subarea > *').remove();\"))\n",
+    "data_loader() \n",
+    "display(bigBox)"
    ]
   },
   {
@@ -252,7 +213,76 @@
    },
    "outputs": [],
    "source": [
-    "# !cp $sconnectbu $sconnect"
+    "def assign_score(b):\n",
+    "    clear_output()\n",
+    "    #Gets input values\n",
+    "    global svals\n",
+    "    if srctext.value != '':\n",
+    "        svals.append([srctext.value,dstselect.value,sportselect.value,dportselect.value, ratingbut.value])\n",
+    "        svals.append([srcselect.value,srctext.value,sportselect.value,dportselect.value, ratingbut.value])\n",
+    "        display(Javascript(\"$(\\\"option[data-value='\" + srctext.value +\"']\\\").remove();\"))\n",
+    "        dstselect.value = \"- Select -\"        \n",
+    "        srcselect.value = \"- Select -\"        \n",
+    "    else:\n",
+    "        svals.append([srcselect.value,dstselect.value,sportselect.value,dportselect.value, ratingbut.value])\n",
+    "    \n",
+    "    if srcselect.value != \"- Select -\":  \n",
+    "        display(Javascript(\"$(\\\"select.widget-listbox:eq(0) option[data-value='\" + srcselect.value +\"']\\\").remove();\"))  \n",
+    "        srcselect.value = \"- Select -\"\n",
+    "    if dstselect.value != \"- Select -\":\n",
+    "        display(Javascript(\"$(\\\"select.widget-listbox:eq(1) option[data-value='\" + dstselect.value +\"']\\\").remove();\"))  \n",
+    "        dstselect.value = \"- Select -\"\n",
+    "    if sportselect.value != \"- Select -\": \n",
+    "        display(Javascript(\"$(\\\"select.widget-listbox:eq(2) option[data-value='\" + sportselect.value +\"']\\\").remove();\"))  \n",
+    "        sportselect.value = \"- Select -\"\n",
+    "    if dportselect.value != \"- Select -\": \n",
+    "        display(Javascript(\"$(\\\"select.widget-listbox:eq(3) option[data-value='\" + dportselect.value +\"']\\\").remove();\"))  \n",
+    "        dportselect.value = \"- Select -\"\n",
+    "    srctext.value = \"\"\n",
+    "    \n",
+    "    data_loader()\n",
+    "    \n",
+    "    print \"Click the 'Save' button when you're finished scoring\" \n",
+    "\n",
+    "\n",
+    "def savesort(b):\n",
+    "    global svals\n",
+    "    clear_output()    \n",
+    "\n",
+    "    variables = []\n",
+    "    mutation=\"\"\"mutation($input:[NetflowScoreInputType!]!)\n",
+    "            {\n",
+    "              flow{\n",
+    "                score(input:$input)\n",
+    "                    {success}\n",
+    "              }\n",
+    "            }\"\"\"\n",
+    "\n",
+    "    for row in svals:\n",
+    "        variables.append({\n",
+    "            'date': datetime.datetime.strptime(date, '%Y%m%d').strftime('%Y-%m-%d'),\n",
+    "            'score': row[4],\n",
+    "            'srcIp': row[0] if row[0] != '- Select -' else None,\n",
+    "            'dstIp': row[1] if row[1] != '- Select -' else None,\n",
+    "            'srcPort': row[2] if row[2] != '- Select -' else None,\n",
+    "            'dstPort': row[3]  if row[3] != '- Select -' else None\n",
+    "            })\n",
+    "\n",
+    "    var = {'input':variables}\n",
+    "    response = GraphQLClient.request(mutation,var)\n",
+    "\n",
+    "    svals = []\n",
+    "    if not 'errors' in response :\n",
+    "        display(Javascript(\"$('.widget-area > .widget-subarea > *').remove();\"))\n",
+    "        data_loader() \n",
+    "        display(bigBox)\n",
+    "        display(Javascript('reloadParentData();'))         \n",
+    "        print \"Suspicious connects successfully updated\"        \n",
+    "    else:\n",
+    "        print \"An error ocurred: \" + response['errors'][0]['message']\n",
+    "\n",
+    "assignbut.on_click(assign_score)\n",
+    "updatebut.on_click(savesort)"
    ]
   }
  ],
@@ -272,7 +302,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython2",
-   "version": "2.7.6"
+   "version": "2.7.5"
   }
  },
  "nbformat": 4,


[07/50] [abbrv] incubator-spot git commit: Include configuration for HDFS and Impala APIs

Posted by ev...@apache.org.
Include configuration for HDFS and Impala APIs


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/52f34f48
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/52f34f48
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/52f34f48

Branch: refs/heads/SPOT-35_graphql_api
Commit: 52f34f48f67fa307104b532a5bfc05540a7a7ae8
Parents: 5b75b41
Author: Everardo Lopez Sandoval (Intel) <el...@jmoren4x-mobl2.amr.corp.intel.com>
Authored: Sun Mar 5 18:16:27 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:47 2017 -0700

----------------------------------------------------------------------
 spot-setup/spot.conf | 3 +++
 1 file changed, 3 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/52f34f48/spot-setup/spot.conf
----------------------------------------------------------------------
diff --git a/spot-setup/spot.conf b/spot-setup/spot.conf
index 65fbd74..cdff372 100755
--- a/spot-setup/spot.conf
+++ b/spot-setup/spot.conf
@@ -6,6 +6,8 @@ DBNAME='spot'
 
 #hdfs - base user and data source config
 HUSER='/user/spot'
+NAME_NODE=''
+WEB_PORT=50070
 DNS_PATH=${HUSER}/${DSOURCE}/hive/y=${YR}/m=${MH}/d=${DY}/
 PROXY_PATH=${HUSER}/${DSOURCE}/hive/y=${YR}/m=${MH}/d=${DY}/
 FLOW_PATH=${HUSER}/${DSOURCE}/hive/y=${YR}/m=${MH}/d=${DY}/
@@ -13,6 +15,7 @@ HPATH=${HUSER}/${DSOURCE}/scored_results/${FDATE}
 
 #impala config
 IMPALA_DEM='node04'
+IMPALA_PORT=21050
 
 #kerberos config
 KRB_AUTH=false


[04/50] [abbrv] incubator-spot git commit: Remove commented code

Posted by ev...@apache.org.
Remove commented code


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/93b2fdc4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/93b2fdc4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/93b2fdc4

Branch: refs/heads/SPOT-35_graphql_api
Commit: 93b2fdc4c7232960fd6021f84609071c7968c784
Parents: b85e327
Author: Everardo Lopez Sandoval (Intel) <el...@elopezsa-mac02.zpn.intel.com>
Authored: Mon Mar 6 12:40:07 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:47 2017 -0700

----------------------------------------------------------------------
 spot-oa/oa/dns/dns_oa.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/93b2fdc4/spot-oa/oa/dns/dns_oa.py
----------------------------------------------------------------------
diff --git a/spot-oa/oa/dns/dns_oa.py b/spot-oa/oa/dns/dns_oa.py
index f72f5a4..a8dd2a5 100644
--- a/spot-oa/oa/dns/dns_oa.py
+++ b/spot-oa/oa/dns/dns_oa.py
@@ -336,8 +336,7 @@ class OA(object):
                 dns_details = [ conn + (dns_nc.get_nc(conn[2]),) for conn in dns_details ]
             else:
                 dns_details = [ conn + (0,) for conn in dns_details ]
-            
-            # value_string += str(tuple(row) for row in dns_details) + ","              
+                          
             for row in dns_details:
                 value_string += str(tuple(item for item in row)) + ","
 
@@ -353,8 +352,7 @@ class OA(object):
     def _get_dns_dendrogram(self): 
 
         for conn in self._dns_scores:   
-            timestamp = conn[self._conf["dns_score_fields"]["unix_tstamp"]]         
-
+            timestamp = conn[self._conf["dns_score_fields"]["unix_tstamp"]]
             full_date = datetime.datetime.utcfromtimestamp(int(timestamp)).strftime('%Y-%m-%d %H:%M:%S')
 
             date = full_date.split(" ")[0].split("-")
@@ -413,6 +411,4 @@ class OA(object):
             query_to_insert=("""
                 INSERT INTO {0}.dns_ingest_summary PARTITION (y={1}, m={2}) VALUES {3};
             """).format(self._db, yr, mn, tuple(df_final))            
-            impala.execute_query(query_to_insert)  
-
-        
+            impala.execute_query(query_to_insert)
\ No newline at end of file


[30/50] [abbrv] incubator-spot git commit: Updating setup documentation

Posted by ev...@apache.org.
Updating setup documentation


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/70db6eec
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/70db6eec
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/70db6eec

Branch: refs/heads/SPOT-35_graphql_api
Commit: 70db6eec3b7346fed4c8920e7786e9320dd7d666
Parents: 03e6319
Author: Moises Valdovinos <mv...@mvaldovi-mac01.amr.corp.intel.com>
Authored: Thu Mar 9 01:50:20 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:48 2017 -0700

----------------------------------------------------------------------
 spot-setup/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/70db6eec/spot-setup/README.md
----------------------------------------------------------------------
diff --git a/spot-setup/README.md b/spot-setup/README.md
index ad72eb2..e0125b3 100644
--- a/spot-setup/README.md
+++ b/spot-setup/README.md
@@ -18,7 +18,7 @@ To collaborate and run spot-setup, it is required the following prerequisites:
 
 ## General Description
 
-The main script in the repository is **hdfs_setup.sh** which is responsible of loading environment variables, creating folders in Hadoop for the different use cases (flow, DNS or Proxy), create the Hive database, and finally execute hive query scripts that creates Hive tables needed to access netflow, dns and proxy data.
+The main script in the repository is **hdfs_setup.sh** which is responsible of loading environment variables, creating folders in Hadoop for the different use cases (flow, DNS or Proxy), create the Impala database, and finally execute Impala query scripts that creates Impala tables needed to access netflow, dns and proxy data.
 
 ## Environment Variables
 
@@ -32,7 +32,7 @@ To read more about these variables, please review the [documentation] (http://sp
 
 spot-setup contains a script per use case, as of today, there is a table creation script for each DNS, flow and Proxy data.
 
-These HQL scripts are intended to be executed as a Hive statement and must comply HQL standards.
+These HQL scripts are intended to be executed as a Impala statement and must comply HQL standards.
 
 We create tables using Parquet format to get a faster query performance. This format is an industry standard and you can find more information about it on:
 - Parquet is a columnar storage format - https://parquet.apache.org/


[37/50] [abbrv] incubator-spot git commit: Adding permissions to Impala and fixing Proxy table partitions

Posted by ev...@apache.org.
Adding permissions to Impala and fixing Proxy table partitions


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/bb8847ac
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/bb8847ac
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/bb8847ac

Branch: refs/heads/SPOT-35_graphql_api
Commit: bb8847acb06df80d0d231df21331537bf6c2749c
Parents: 5af8419
Author: Moises Valdovinos <mv...@mvaldovi-mac01.amr.corp.intel.com>
Authored: Thu Mar 9 18:37:37 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:50:20 2017 -0700

----------------------------------------------------------------------
 spot-setup/create_dns_avro_parquet.hql   |  37 ------
 spot-setup/create_dns_parquet.hql        | 147 +++++++++++++++++++++
 spot-setup/create_flow_avro_parquet.hql  |  72 -----------
 spot-setup/create_flow_parquet.hql       | 179 ++++++++++++++++++++++++++
 spot-setup/create_proxy_avro_parquet.hql |  76 -----------
 spot-setup/create_proxy_parquet.hql      | 161 +++++++++++++++++++++++
 spot-setup/hdfs_setup.sh                 |  46 ++++---
 7 files changed, 518 insertions(+), 200 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/bb8847ac/spot-setup/create_dns_avro_parquet.hql
----------------------------------------------------------------------
diff --git a/spot-setup/create_dns_avro_parquet.hql b/spot-setup/create_dns_avro_parquet.hql
deleted file mode 100755
index 105c386..0000000
--- a/spot-setup/create_dns_avro_parquet.hql
+++ /dev/null
@@ -1,37 +0,0 @@
-SET hiveconf:huser;
-SET hiveconf:dbname;
-
-CREATE EXTERNAL TABLE IF NOT EXISTS ${hiveconf:dbname}.dns (
- frame_time STRING,
- unix_tstamp BIGINT,
- frame_len INT,
- ip_dst STRING,
- ip_src STRING,
- dns_qry_name STRING,
- dns_qry_class STRING,
- dns_qry_type INT,
- dns_qry_rcode INT,
- dns_a STRING
-)
-PARTITIONED BY (y INT, m INT, d INT, h int)
-ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
-STORED AS PARQUET
-LOCATION '${hiveconf:huser}/dns/hive'
-TBLPROPERTIES ('avro.schema.literal'='{
-    "type":   "record"
-  , "name":   "DnsRecord"
-  , "namespace" : "com.cloudera.accelerators.dns.avro"
-  , "fields": [
-        {"name": "frame_time",                  "type":["string",   "null"]}
-     ,  {"name": "unix_tstamp",                    "type":["bigint",   "null"]}
-     ,  {"name": "frame_len",                    "type":["int",   "null"]}
-     ,  {"name": "ip_dst",                    "type":["string",   "null"]}
-     ,  {"name": "ip_src",                    "type":["string",   "null"]}
-     ,  {"name": "dns_qry_name",              "type":["string",   "null"]}
-     ,  {"name": "dns_qry_class",             "type":["string",   "null"]}
-     ,  {"name": "dns_qry_type",              "type":["int",   "null"]}
-     ,  {"name": "dns_qry_rcode",             "type":["int",   "null"]}
-     ,  {"name": "dns_a",                 "type":["string",   "null"]}
-  ]
-}');
-

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/bb8847ac/spot-setup/create_dns_parquet.hql
----------------------------------------------------------------------
diff --git a/spot-setup/create_dns_parquet.hql b/spot-setup/create_dns_parquet.hql
new file mode 100755
index 0000000..4e01d26
--- /dev/null
+++ b/spot-setup/create_dns_parquet.hql
@@ -0,0 +1,147 @@
+
+CREATE EXTERNAL TABLE IF NOT EXISTS ${var:dbname}.dns (
+frame_time STRING, 
+unix_tstamp BIGINT,
+frame_len INT,
+ip_dst STRING,
+ip_src STRING,
+dns_qry_name STRING,
+dns_qry_class STRING,
+dns_qry_type INT,
+dns_qry_rcode INT,
+dns_a STRING
+)
+PARTITIONED BY (
+y SMALLINT,
+m TINYINT,
+d TINYINT,
+h TINYINT
+)
+STORED AS PARQUET 
+LOCATION '${var:huser}/dns/hive';
+
+
+CREATE EXTERNAL TABLE ${var:dbname}.dns_dendro (
+unix_tstamp BIGINT,
+dns_a STRING,
+dns_qry_name STRING,
+ip_dst STRING
+)
+PARTITIONED BY (
+y SMALLINT,
+m TINYINT,
+d TINYINT
+)
+STORED AS PARQUET
+LOCATION '${var:huser}/dns/hive/oa/dendro';
+
+
+CREATE EXTERNAL TABLE ${var:dbname}.dns_edge ( 
+unix_tstamp BIGINT,
+frame_len BIGINT,
+ip_dst STRING,
+ip_src STRING,
+dns_qry_name STRING,
+dns_qry_class STRING,
+dns_qry_type INT,
+dns_qry_rcode INT,
+dns_a STRING,
+hh INT,
+dns_qry_class_name STRING,
+dns_qry_type_name STRING,
+dns_qry_rcode_name STRING,
+network_context STRING
+)
+PARTITIONED BY (
+y SMALLINT,
+m TINYINT,
+d TINYINT
+)
+STORED AS PARQUET
+LOCATION '${var:huser}/dns/hive/oa/edge';
+
+
+CREATE EXTERNAL TABLE ${var:dbname}.dns_ingest_summary ( 
+tdate STRING,
+total BIGINT
+)
+PARTITIONED BY (
+y SMALLINT,
+m TINYINT,
+d TINYINT
+)
+STORED AS PARQUET
+LOCATION '${var:huser}/dns/hive/oa/summary';
+
+
+CREATE EXTERNAL TABLE ${var:dbname}.dns_scores ( 
+frame_time STRING, 
+unix_tstamp BIGINT,
+frame_len BIGINT,
+ip_dst STRING, 
+dns_qry_name STRING, 
+dns_qry_class STRING,
+dns_qry_type INT,
+dns_qry_rcode INT, 
+ml_score FLOAT,
+tld STRING,
+query_rep STRING,
+hh INT,
+dns_qry_class_name STRING, 
+dns_qry_type_name STRING,
+dns_qry_rcode_name STRING, 
+network_context STRING 
+)
+PARTITIONED BY ( 
+y SMALLINT,
+m TINYINT,
+d TINYINT
+)
+STORED AS PARQUET
+LOCATION '${var:huser}/dns/hive/oa/suspicious';
+
+
+CREATE EXTERNAL TABLE ${var:dbname}.dns_storyboard ( 
+ip_threat STRING,
+dns_threat STRING, 
+title STRING,
+text STRING
+)
+PARTITIONED BY ( 
+y SMALLINT,
+m TINYINT,
+d TINYINT
+)
+STORED AS PARQUET
+LOCATION '${var:huser}/dns/hive/oa/storyboard';
+
+
+CREATE EXTERNAL TABLE ${var:dbname}.dns_threat_dendro (
+anchor STRING, 
+total BIGINT,
+dns_qry_name STRING, 
+ip_dst STRING
+)
+PARTITIONED BY ( 
+y SMALLINT,
+m TINYINT,
+d TINYINT
+)
+STORED AS PARQUET
+LOCATION '${var:huser}/dns/hive/oa/threat_dendro';
+
+
+CREATE EXTERNAL TABLE ${var:dbname}.dns_threat_investigation ( 
+unix_tstamp BIGINT,
+ip_dst STRING, 
+dns_qry_name STRING, 
+ip_sev INT,
+dns_sev INT
+)
+PARTITIONED BY ( 
+y SMALLINT,
+m TINYINT,
+d TINYINT
+)
+STORED AS PARQUET
+LOCATION '${var:huser}/dns/hive/oa/threat_investigation';

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/bb8847ac/spot-setup/create_flow_avro_parquet.hql
----------------------------------------------------------------------
diff --git a/spot-setup/create_flow_avro_parquet.hql b/spot-setup/create_flow_avro_parquet.hql
deleted file mode 100755
index 34aa63b..0000000
--- a/spot-setup/create_flow_avro_parquet.hql
+++ /dev/null
@@ -1,72 +0,0 @@
-SET hiveconf:huser;
-SET hiveconf:dbname;
-
-CREATE EXTERNAL TABLE IF NOT EXISTS ${hiveconf:dbname}.flow (
-  treceived STRING,
-  unix_tstamp BIGINT,
-  tryear INT,
-  trmonth INT,
-  trday INT,
-  trhour INT,
-  trminute INT,
-  trsec INT,
-  tdur FLOAT,
-  sip  STRING,
-  dip STRING,
-  sport INT,
-  dport INT,
-  proto STRING,
-  flag STRING,
-  fwd INT,
-  stos INT,
-  ipkt BIGINT,
-  ibyt BIGINT,
-  opkt BIGINT,
-  obyt BIGINT,
-  input INT,
-  output INT,
-  sas INT,
-  das INT,
-  dtos INT,
-  dir INT,
-  rip STRING
-  )
-PARTITIONED BY (y INT, m INT, d INT, h int)
-ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
-STORED AS PARQUET
-LOCATION '${hiveconf:huser}/flow/hive'
-TBLPROPERTIES ('avro.schema.literal'='{
-    "type":   "record"
-  , "name":   "FlowRecord"
-  , "namespace" : "com.cloudera.accelerators.flows.avro"
-  , "fields": [
-        {"name": "treceived",                  "type":["string",   "null"]}
-     ,  {"name": "unix_tstamp",                 "type":["long",     "null"]}
-     ,  {"name": "tryear",                    "type":["int",   "null"]}
-     ,  {"name": "trmonth",                    "type":["int",   "null"]}
-     ,  {"name": "trday",                    "type":["int",   "null"]}
-     ,  {"name": "trhour",                    "type":["int",   "null"]}
-     ,  {"name": "trminute",                    "type":["int",   "null"]}
-     ,  {"name": "trsec",                    "type":["int",   "null"]}
-     ,  {"name": "tdur",                    "type":["float",   "null"]}
-     ,  {"name": "sip",              "type":["string",   "null"]}
-     ,  {"name": "sport",                 "type":["int",   "null"]}
-     ,  {"name": "dip",         "type":["string",   "null"]}
-     ,  {"name": "dport",        "type":["int",   "null"]}
-     ,  {"name": "proto",            "type":["string",   "null"]}
-     ,  {"name": "flag",            "type":["string",   "null"]}
-     ,  {"name": "fwd",                 "type":["int",   "null"]}
-     ,  {"name": "stos",                 "type":["int",   "null"]}
-     ,  {"name": "ipkt",                 "type":["bigint",   "null"]}
-     ,  {"name": "ibytt",                 "type":["bigint",   "null"]}
-     ,  {"name": "opkt",                 "type":["bigint",   "null"]}
-     ,  {"name": "obyt",                 "type":["bigint",   "null"]}
-     ,  {"name": "input",                 "type":["int",   "null"]}
-     ,  {"name": "output",                 "type":["int",   "null"]}
-     ,  {"name": "sas",                 "type":["int",   "null"]}
-     ,  {"name": "das",                 "type":["int",   "null"]}
-     ,  {"name": "dtos",                 "type":["int",   "null"]}
-     ,  {"name": "dir",                 "type":["int",   "null"]}
-     ,  {"name": "rip",                    "type":["string",   "null"]}
-  ]
-}');

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/bb8847ac/spot-setup/create_flow_parquet.hql
----------------------------------------------------------------------
diff --git a/spot-setup/create_flow_parquet.hql b/spot-setup/create_flow_parquet.hql
new file mode 100755
index 0000000..c1da27f
--- /dev/null
+++ b/spot-setup/create_flow_parquet.hql
@@ -0,0 +1,179 @@
+
+CREATE EXTERNAL TABLE IF NOT EXISTS ${var:dbname}.flow (
+treceived STRING,
+unix_tstamp BIGINT,
+tryear INT,
+trmonth INT,
+trday INT,
+trhour INT,
+trminute INT,
+trsec INT,
+tdur FLOAT,
+sip STRING,
+dip STRING,
+sport INT,
+dport INT,
+proto STRING,
+flag STRING,
+fwd INT,
+stos INT,
+ipkt BIGINT,
+ibyt BIGINT,
+opkt BIGINT, 
+obyt BIGINT,
+input INT,
+output INT,
+sas INT,
+das INT,
+dtos INT,
+dir INT,
+rip STRING
+)
+PARTITIONED BY (
+y SMALLINT,
+m TINYINT,
+d TINYINT,
+h TINYINT
+)
+STORED AS PARQUET
+LOCATION '${var:huser}/flow/hive';
+
+
+CREATE EXTERNAL TABLE ${var:dbname}.flow_chords (
+ip_threat STRING,
+srcip STRING,
+dstip STRING,
+ibyt BIGINT, 
+ipkt BIGINT
+)
+PARTITIONED BY (
+y SMALLINT,
+m TINYINT,
+d TINYINT
+)
+STORED AS PARQUET
+LOCATION '${var:huser}/flow/hive/oa/chords';
+
+
+CREATE EXTERNAL TABLE ${var:dbname}.flow_edge (
+tstart STRING, 
+srcip STRING,
+dstip STRING,
+sport INT, 
+dport INT, 
+proto STRING,
+flags STRING,
+tos INT, 
+ibyt BIGINT, 
+ipkt BIGINT, 
+input BIGINT,
+output BIGINT, 
+rip STRING,
+obyt BIGINT, 
+opkt BIGINT, 
+hh INT,
+mn INT 
+)
+PARTITIONED BY ( 
+y SMALLINT,
+m TINYINT,
+d TINYINT
+)
+STORED AS PARQUET
+LOCATION '${var:huser}/flow/hive/oa/edge';
+
+
+CREATE EXTERNAL TABLE ${var:dbname}.flow_ingest_summary (
+tdate STRING,
+total BIGINT 
+)
+PARTITIONED BY ( 
+y SMALLINT,
+m TINYINT,
+d TINYINT
+)
+STORED AS PARQUET
+LOCATION '${var:huser}/flow/hive/oa/summary';
+
+
+CREATE EXTERNAL TABLE ${var:dbname}.flow_scores (
+tstart STRING, 
+srcip STRING,
+dstip STRING,
+sport INT, 
+dport INT, 
+proto STRING,
+ipkt INT,
+ibyt INT,
+opkt INT,
+obyt INT,
+ml_score FLOAT,
+rank INT,
+srcip_INTernal INT,
+dstip_INTernal INT,
+src_geoloc STRING, 
+dst_geoloc STRING, 
+src_domain STRING, 
+dst_domain STRING, 
+src_rep STRING,
+dst_rep STRING 
+)
+PARTITIONED BY ( 
+y SMALLINT,
+m TINYINT,
+d TINYINT
+)
+STORED AS PARQUET
+LOCATION '${var:huser}/flow/hive/oa/suspicious';
+
+
+CREATE EXTERNAL TABLE ${var:dbname}.flow_storyboard (
+ip_threat STRING,
+title STRING,
+text STRING
+)
+PARTITIONED BY ( 
+y SMALLINT,
+m TINYINT,
+d TINYINT
+)
+STORED AS PARQUET
+LOCATION '${var:huser}/flow/hive/oa/storyboard';
+
+
+CREATE EXTERNAL TABLE ${var:dbname}.flow_threat_investigation ( 
+tstart STRING,
+srcip STRING, 
+dstip STRING, 
+srcport INT,
+dstport INT,
+score INT 
+) 
+PARTITIONED BY (
+y SMALLINT,
+m TINYINT,
+d TINYINT
+) 
+STORED AS PARQUET 
+LOCATION '${var:huser}/flow/hive/oa/threat_investigation';
+
+
+CREATE EXTERNAL TABLE ${var:dbname}.flow_timeline (
+ip_threat STRING,
+tstart STRING, 
+tend STRING, 
+srcip STRING,
+dstip STRING,
+proto STRING,
+sport INT, 
+dport INT, 
+ipkt BIGINT, 
+ibyt BIGINT
+)
+PARTITIONED BY ( 
+y SMALLINT,
+m TINYINT,
+d TINYINT
+)
+STORED AS PARQUET
+LOCATION '${var:huser}/flow/hive/oa/timeline';

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/bb8847ac/spot-setup/create_proxy_avro_parquet.hql
----------------------------------------------------------------------
diff --git a/spot-setup/create_proxy_avro_parquet.hql b/spot-setup/create_proxy_avro_parquet.hql
deleted file mode 100755
index 3fd7ea8..0000000
--- a/spot-setup/create_proxy_avro_parquet.hql
+++ /dev/null
@@ -1,76 +0,0 @@
-SET hiveconf:huser;
-SET hiveconf:dbname;
-
-CREATE EXTERNAL TABLE IF NOT EXISTS ${hiveconf:dbname}.proxy (
-p_date                string,
-p_time                string,
-clientip              string,
-host                  string,
-reqmethod             string,
-useragent             string,
-resconttype           string,
-duration              int,
-username              string,
-authgroup             string,
-exceptionid           string,
-filterresult          string,
-webcat                string,
-referer               string,
-respcode              string,
-action                string,
-urischeme             string,
-uriport               string,
-uripath               string,
-uriquery              string,
-uriextension          string,
-serverip              string,
-scbytes               int,
-csbytes               int,
-virusid               string,
-bcappname             string,
-bcappoper             string,
-fulluri               string
-)
-PARTITIONED BY (
-y string, 
-m string, 
-d string, 
-h string)
-ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
-STORED AS PARQUET
-LOCATION '${hiveconf:huser}/proxy/hive'
-TBLPROPERTIES ('avro.schema.literal'='{
-    "type":   "record"
-  , "name":   "ProxyRecord"
-  , "namespace" : "com.cloudera.accelerators.proxy.avro"
-  , "fields": [
-     {"name": "p_date", "type":["string",  "null"]}
-    , {"name": "p_time", "type":["string",  "null"]}
-    , {"name": "clientip", "type":["string",  "null"]}
-    , {"name": "host", "type":["string",  "null"]}
-    , {"name": "reqmethod", "type":["string",  "null"]}
-    , {"name": "useragent", "type":["string",  "null"]}
-    , {"name": "resconttype", "type":["string",  "null"]}
-    , {"name": "duration", "type":["int",  "null"]}
-    , {"name": "username",  "type":["string",  "null"]}
-    , {"name": "authgroup", "type":["string",  "null"]}
-    , {"name": "exceptionid", "type":["string",  "null"]}
-    , {"name": "filterresult", "type":["string",  "null"]}
-    , {"name": "webcat", "type":["string",  "null"]}
-    , {"name": "referer", "type":["string",  "null"]}
-    , {"name": "respcode", "type":["string",  "null"]}
-    , {"name": "action", "type":["string",  "null"]}
-    , {"name": "urischeme", "type":["string",  "null"]}
-    , {"name": "uriport", "type":["string",  "null"]}
-    , {"name": "uripath", "type":["string",  "null"]}
-    , {"name": "uriquery", "type":["string",  "null"]}
-    , {"name": "uriextension", "type":["string",  "null"]}
-    , {"name": "serverip", "type":["string",  "null"]}
-    , {"name": "scbytes", "type":["int",  "null"]}
-    , {"name": "csbytes", "type":["int",  "null"]}
-    , {"name": "virusid", "type":["string",  "null"]}
-    , {"name": "bcappname", "type":["string",  "null"]}
-    , {"name": "bcappoper", "type":["string",  "null"]}
-    , {"name": "fulluri", "type":["string",  "null"]}
-  ]
-}');

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/bb8847ac/spot-setup/create_proxy_parquet.hql
----------------------------------------------------------------------
diff --git a/spot-setup/create_proxy_parquet.hql b/spot-setup/create_proxy_parquet.hql
new file mode 100755
index 0000000..f28b831
--- /dev/null
+++ b/spot-setup/create_proxy_parquet.hql
@@ -0,0 +1,161 @@
+
+CREATE EXTERNAL TABLE IF NOT EXISTS ${var:dbname}.proxy (
+p_date STRING,
+p_time STRING,
+clientip STRING,
+host STRING,
+reqmethod STRING,
+useragent STRING,
+resconttype STRING,
+duration INT,
+username STRING,
+authgroup STRING,
+exceptionid STRING,
+filterresult STRING,
+webcat STRING,
+referer STRING,
+respcode STRING,
+action STRING,
+urischeme STRING,
+uriport STRING,
+uripath STRING,
+uriquery STRING,
+uriextension STRING,
+serverip STRING,
+scbytes INT,
+csbytes INT,
+virusid STRING,
+bcappname STRING,
+bcappoper STRING,
+fulluri STRING
+)
+PARTITIONED BY (
+y STRING,
+m STRING,
+d STRING,
+h STRING
+)
+STORED AS PARQUET
+LOCATION '${var:huser}/proxy/hive';
+
+
+CREATE EXTERNAL TABLE ${var:dbname}.proxy_edge ( 
+tdate STRING,
+time STRING, 
+clientip STRING, 
+host STRING, 
+webcat STRING, 
+respcode STRING, 
+reqmethod STRING,
+useragent STRING,
+resconttype STRING,
+referer STRING,
+uriport STRING,
+serverip STRING, 
+scbytes INT, 
+csbytes INT, 
+fulluri STRING,
+hh INT,
+respcode_name STRING 
+)
+PARTITIONED BY ( 
+y SMALLINT,
+m TINYINT,
+d TINYINT
+)
+STORED AS PARQUET
+LOCATION '${var:huser}/proxy/hive/oa/edge';
+
+
+CREATE EXTERNAL TABLE ${var:dbname}.proxy_ingest_summary ( 
+tdate STRING,
+total BIGINT 
+)
+PARTITIONED BY ( 
+y SMALLINT,
+m TINYINT,
+d TINYINT
+)
+STORED AS PARQUET
+LOCATION '${var:huser}/proxy/hive/oa/summary';
+
+
+CREATE EXTERNAL TABLE ${var:dbname}.proxy_scores ( 
+tdate STRING,
+time STRING, 
+clientip STRING, 
+host STRING, 
+reqmethod STRING,
+useragent STRING,
+resconttype STRING,
+duration INT,
+username STRING, 
+webcat STRING, 
+referer STRING,
+respcode INT,
+uriport INT, 
+uripath STRING,
+uriquery STRING, 
+serverip STRING, 
+scbytes INT, 
+csbytes INT, 
+fulluri STRING,
+word STRING, 
+ml_score FLOAT,
+uri_rep STRING,
+respcode_name STRING,
+network_context STRING 
+)
+PARTITIONED BY ( 
+y SMALLINT,
+m TINYINT,
+d TINYINT
+)
+STORED AS PARQUET
+LOCATION '${var:huser}/proxy/hive/oa/suspicious';
+
+
+CREATE EXTERNAL TABLE ${var:dbname}.proxy_storyboard ( 
+p_threat STRING, 
+title STRING,
+text STRING
+)
+PARTITIONED BY ( 
+y SMALLINT,
+m TINYINT,
+d TINYINT
+)
+STORED AS PARQUET
+LOCATION '${var:huser}/proxy/hive/oa/storyboard';
+
+
+CREATE EXTERNAL TABLE ${var:dbname}.proxy_threat_investigation ( 
+tdate STRING,
+fulluri STRING,
+uri_sev INT
+)
+PARTITIONED BY ( 
+y SMALLINT,
+m TINYINT,
+d TINYINT
+)
+STORED AS PARQUET
+LOCATION '${var:huser}/proxy/hive/oa/threat_investigation';
+
+
+CREATE EXTERNAL TABLE ${var:dbname}.proxy_timeline ( 
+p_threat STRING, 
+tstart STRING, 
+tend STRING, 
+duration BIGINT, 
+clientip STRING, 
+respcode STRING, 
+respcodename STRING
+)
+PARTITIONED BY ( 
+y SMALLINT,
+m TINYINT,
+d TINYINT
+)
+STORED AS PARQUET
+LOCATION '${var:huser}/proxy/hive/oa/timeline';

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/bb8847ac/spot-setup/hdfs_setup.sh
----------------------------------------------------------------------
diff --git a/spot-setup/hdfs_setup.sh b/spot-setup/hdfs_setup.sh
index 8eb9ae3..9c557ba 100755
--- a/spot-setup/hdfs_setup.sh
+++ b/spot-setup/hdfs_setup.sh
@@ -18,35 +18,51 @@
 #
 
 DSOURCES=('flow' 'dns' 'proxy')
-DFOLDERS=('binary' 'hive' 'stage')
+DFOLDERS=('binary' 
+'stage'
+'hive'
+'hive/oa'
+'hive/oa/chords'
+'hive/oa/edge'
+'hive/oa/summary'
+'hive/oa/suspicious'
+'hive/oa/storyboard'
+'hive/oa/threat_investigation'
+'hive/oa/timeline'
+'hive/oa/dendro'
+'hive/oa/threat_dendro'
+)
+
+# Sourcing spot configuration variables
 source /etc/spot.conf
 
-#
-# creating HDFS user's folder
-#
-hadoop fs -mkdir ${HUSER}
-hadoop fs -chown ${USER}:supergroup ${HUSER}
+# Creating HDFS user's folder
+sudo -u hdfs hdfs dfs -mkdir ${HUSER}
+sudo -u hdfs hdfs dfs -chown ${USER}:supergroup ${HUSER}
+sudo -u hdfs hdfs dfs -chmod 775 ${HUSER}
 
+# Creating HDFS paths for each use case
 for d in "${DSOURCES[@]}" 
 do 
 	echo "creating /$d"
-	hadoop fs -mkdir ${HUSER}/$d 
+	hdfs dfs -mkdir ${HUSER}/$d 
 	for f in "${DFOLDERS[@]}" 
 	do 
 		echo "creating $d/$f"
-		hadoop fs -mkdir ${HUSER}/$d/$f
+		hdfs dfs -mkdir ${HUSER}/$d/$f
 	done
 done
 
-#
-# create hive tables
-#
-#configure / create catalog
-hive -e "CREATE DATABASE ${DBNAME}"
+# Modifying permission on HDFS folders to allow Impala to read/write
+hdfs dfs -chmod -R 775 ${HUSER}
+hdfs dfs -setfacl -R -m user:impala:rwx ${HUSER}
+
+# Creating Spot Database
+impala-shell -i ${IMPALA_DEM} -q "CREATE DATABASE IF NOT EXISTS ${DBNAME};"
 
+# Creating Impala tables
 for d in "${DSOURCES[@]}" 
 do 
-	hive -hiveconf huser=${HUSER} -hiveconf dbname=${DBNAME} -f create_${d}_avro_parquet.hql
+	impala-shell -i ${IMPALA_DEM} --var=huser=${HUSER} --var=dbname=${DBNAME} -f create_${d}_parquet.hql
 done
 
-


[32/50] [abbrv] incubator-spot git commit: Improved ingest summary query

Posted by ev...@apache.org.
Improved ingest summary query


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/3386849d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/3386849d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/3386849d

Branch: refs/heads/SPOT-35_graphql_api
Commit: 3386849d4a628ef6c02fc818ba342d58a0f95f59
Parents: c526716
Author: LedaLima <le...@apache.org>
Authored: Mon Mar 6 18:30:19 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:48 2017 -0700

----------------------------------------------------------------------
 spot-oa/oa/flow/flow_oa.py | 70 ++++++++++++++++++++++++++++-------------
 1 file changed, 49 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/3386849d/spot-oa/oa/flow/flow_oa.py
----------------------------------------------------------------------
diff --git a/spot-oa/oa/flow/flow_oa.py b/spot-oa/oa/flow/flow_oa.py
index 63de068..bf2d301 100644
--- a/spot-oa/oa/flow/flow_oa.py
+++ b/spot-oa/oa/flow/flow_oa.py
@@ -22,7 +22,7 @@ import sys
 import json
 import numpy as np
 import linecache, bisect
-import csv
+import csv, math
 import pandas as pd
 import subprocess
 import numbers
@@ -35,6 +35,7 @@ from utils import Util, ProgressBar
 from components.data.data import Data
 from components.geoloc.geoloc import GeoLocalization
 from components.reputation.gti import gti
+from impala.util import as_pandas
 import time
 
 
@@ -407,32 +408,59 @@ class OA(object):
 
                     impala.execute_query(query_to_load)
  
-
-    def _ingest_summary(self): 
+ 
+    def _ingest_summary(self):
         # get date parameters.
         yr = self._date[:4]
         mn = self._date[4:6]
         dy = self._date[6:]
 
-        self._logger.info("Getting ingest summary data for the day") 
-
-        query_to_load = ("""
-            INSERT INTO TABLE {0}.flow_ingest_summary PARTITION (y={2}, m={3})
-            SELECT treceived,tryear, trmonth, trday, trhour, trminute, COUNT(*) total
-            FROM {0}.{1}
-            WHERE y={2} AND m={3} AND d={4}
-            AND unix_tstamp IS NOT NULL
-            AND sip IS NOT NULL
-            AND sport IS NOT NULL
-            AND dip IS NOT NULL
-            AND dport IS NOT NULL
-            AND ibyt IS NOT NULL
-            AND ipkt IS NOT NULL
-            AND cast(treceived as timestamp) IS NOT NULL
-            GROUP BY treceived,tryear, trmonth, trday, trhour, trminute;
-            """).format(self._db,self._table_name,yr,mn,dy) 
+        self._logger.info("Getting ingest summary data for the day")
+        
+        ingest_summary_cols = ["date","total"]		
+        result_rows = []        
+        df_filtered =  pd.DataFrame()
+
+        # get ingest summary.
+
+        query_to_load=("""
+                SELECT tryear, trmonth, trday, trhour, trminute, COUNT(*) as total
+                FROM {0}.{1} WHERE y={2} AND m={3} AND d={4}
+                AND unix_tstamp IS NOT NULL
+                AND sip IS NOT NULL
+                AND sport IS NOT NULL
+                AND dip IS NOT NULL
+                AND dport IS NOT NULL
+                AND ibyt IS NOT NULL
+                AND ipkt IS NOT NULL
+                AND tryear={2}
+                AND cast(treceived as timestamp) IS NOT NULL
+                GROUP BY tryear, trmonth, trday, trhour, trminute;
+        """).format(self._db,self._table_name, yr, mn, dy)
         
-        impala.execute_query(query_to_load)
+        results = impala.execute_query(query_to_load) 
+ 
+        if results:
+            df_results = as_pandas(results) 
+            
+            #Forms a new dataframe splitting the minutes from the time column
+            df_new = pd.DataFrame([["{0}-{1}-{2} {3}:{4}".format(val['tryear'],val['trmonth'],val['trday'], val['trhour'], val['trminute']), int(val['total']) if not math.isnan(val['total']) else 0 ] for key,val in df_results.iterrows()],columns = ingest_summary_cols)
+            value_string = ''
+            #Groups the data by minute 
+
+            sf = df_new.groupby(by=['date'])['total'].sum()
+            df_per_min = pd.DataFrame({'date':sf.index, 'total':sf.values})
+            
+            df_final = df_filtered.append(df_per_min, ignore_index=True).to_records(False,False) 
+            if len(df_final) > 0:
+                query_to_insert=("""
+                    INSERT INTO {0}.flow_ingest_summary PARTITION (y={1}, m={2}) VALUES {3};
+                """).format(self._db, yr, mn, tuple(df_final))
+
+                impala.execute_query(query_to_insert)
+                
+        else:
+            self._logger.info("No data found for the ingest summary")
 
 
 


[31/50] [abbrv] incubator-spot git commit: Updating setup documentation

Posted by ev...@apache.org.
Updating setup documentation


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/03e6319f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/03e6319f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/03e6319f

Branch: refs/heads/SPOT-35_graphql_api
Commit: 03e6319f0d8fe109c27b51d106838d23930c8d36
Parents: 85431c6
Author: Moises Valdovinos <mv...@mvaldovi-mac01.amr.corp.intel.com>
Authored: Thu Mar 9 01:47:20 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:48 2017 -0700

----------------------------------------------------------------------
 spot-setup/README.md | 43 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 32 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/03e6319f/spot-setup/README.md
----------------------------------------------------------------------
diff --git a/spot-setup/README.md b/spot-setup/README.md
index 1ac02f2..ad72eb2 100644
--- a/spot-setup/README.md
+++ b/spot-setup/README.md
@@ -8,7 +8,7 @@ This document is intended for any developer or sysadmin in learning the technica
 
 This information will help you to get started on contributing to the Apache Spot Setup repository. For information about installing and running Apache Spot go to our [Installation Guide](http://spot.apache.org/doc/).
 
-Spot-setup contains the scripts to setup HDFS for Apache Spot solution. It will create the folder and database structure needed to run Apache Spot on HDFS and HIVE respectively. Spot-setup is a component of Apache Spot and is executed in the initial configuration after Linux user creation and before Ingest installation.
+Spot-setup contains the scripts to setup HDFS for Apache Spot solution. It will create the folder and database structure needed to run Apache Spot on HDFS and Impala respectively. Spot-setup is a component of Apache Spot and is executed in the initial configuration after Linux user creation and before Ingest installation.
 
 ## Prerequisites
 
@@ -26,7 +26,7 @@ The main script in the repository is **hdfs_setup.sh** which is responsible of l
 
 This file also contains sources desired to be installed as part of Apache Spot, general paths for HDFS folders, Kerberos information and local paths in the Linux filesystem for the user as well as for machine learning, ipython, lda and ingest processes.
 
-To read more about these variables, please review the [wiki] (https://github.com/Open-Network-Insight/open-network-insight/wiki/Edit%20Solution%20Configuration).
+To read more about these variables, please review the [documentation] (http://spot.incubator.apache.org/doc/#configuration).
 
 ## Database Query Scripts
 
@@ -34,22 +34,42 @@ spot-setup contains a script per use case, as of today, there is a table creatio
 
 These HQL scripts are intended to be executed as a Hive statement and must comply HQL standards.
 
-We want to create tables in Avro/Parquet format to get a faster query performance. This format is an industry standard and you can find more information about it on:
-- Avro is a data serialization system - https://avro.apache.org/
+We create tables using Parquet format to get a faster query performance. This format is an industry standard and you can find more information about it on:
 - Parquet is a columnar storage format - https://parquet.apache.org/
 
-To get to Avro/parquet format we need a staging table to store CSV data temporarily for Flow and DNS. Then, run a Hive query statement to insert these text-formatted records into the Avro/parquet table. Hive will manage to convert the text data into the desired format. The staging table must be cleaned after loading data to Avro/parquet table for the next batch cycle. For Flow and DNS, a set of a staging (CSV) and a final (Avro/parquet) tables are needed for each data entity. For Proxy, only the Avro/parquet table is needed.
+To get to parquet format we need a staging table to store CSV data temporarily for Flow and DNS. Then, run an Impala query statement to insert these text-formatted records into the parquet table. Impala will manage to convert the text data into the desired format. The staging table must be cleaned after loading data to parquet table for the next batch cycle. For Flow and DNS, a set of a staging (CSV) and a final (parquet) tables are needed for each data entity. For Proxy, only the parquet table is needed.
 
 #### Flow Tables
-- flow - Avro/parquet final table to store flow records
-- flow_tmp - Text table to store temporarily flow records in CSV format
+- flow
+- flow_tmp
+- flow_chords
+- flow_edge
+- flow_ingest_summary
+- flow_scores
+- flow_storyboard
+- flow_threat_investigation
+- flow_timeline
 
 #### DNS Tables
-- dns - Avro/parquet final table to store DNS records
-- dns_tmp - Text table to store temporarily DNS records in CSV format
+- dns
+- dns_tmp
+- dns_dendro
+- dns_edge
+- dns_ingest_summary
+- dns_scores
+- dns_storyboard
+- dns_threat_dendro
+- dns_threat_investigation
 
 #### Proxy Tables
-- proxy - Avro/parquet final table to store Proxy records
+- proxy
+- proxy_edge
+- proxy_ingest_summary
+- proxy_scores
+- proxy_storyboard
+- proxy_threat_investigation
+- proxy_timeline
+
 
 ## Licensing
 
@@ -61,7 +81,8 @@ Create a pull request and contact the maintainers.
 
 ## Issues
 
-Report issues at the�Apache Spot [issues] (https://github.com/Open-Network-Insight/open-network-insight/issues) page.
+- Create an [issue] (https://issues.apache.org/jira/browse/SPOT-20?jql=project%20%3D%20SPOT).
+- Go to our Slack [channel] (https://apachespot.slack.com/messages/general).
 
 ## Maintainers
 


[18/50] [abbrv] incubator-spot git commit: use table variable in HDFS remove command

Posted by ev...@apache.org.
use table variable in HDFS remove command


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/c5267160
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/c5267160
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/c5267160

Branch: refs/heads/SPOT-35_graphql_api
Commit: c5267160eb71cd3649aa3f4dc07b5cb176b6c1e2
Parents: 8b5b32c
Author: Everardo Lopez Sandoval (Intel) <el...@elopezsa-mac02.zpn.intel.com>
Authored: Mon Mar 6 13:45:16 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:48 2017 -0700

----------------------------------------------------------------------
 spot-oa/oa/flow/flow_oa.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/c5267160/spot-oa/oa/flow/flow_oa.py
----------------------------------------------------------------------
diff --git a/spot-oa/oa/flow/flow_oa.py b/spot-oa/oa/flow/flow_oa.py
index 0eb3e22..63de068 100644
--- a/spot-oa/oa/flow/flow_oa.py
+++ b/spot-oa/oa/flow/flow_oa.py
@@ -105,9 +105,8 @@ class OA(object):
         table_schema=['suspicious', 'edge','chords','threat_investigation', 'timeline', 'storyboard', 'summary' ] 
 
         for path in table_schema:
-            HDFSClient.delete_folder("{0}/flow/hive/oa/{1}/y={2}/m={3}/d={4}".format(HUSER,path,yr,int(mn),int(dy)),user="impala")
-       
-        HDFSClient.delete_folder("{0}/flow/hive/oa/{1}/y={2}/m={3}".format(HUSER,"summary",yr,int(mn)),user="impala")
+            HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}/d={5}".format(HUSER,self._table_name,path,yr,int(mn),int(dy)),user="impala")        
+        HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}".format(HUSER,self._table_name,"summary",yr,int(mn)),user="impala")        
         impala.execute_query("invalidate metadata")
         #removes Feedback file
         HDFSClient.delete_folder("{0}/{1}/scored_results/{2}{3}{4}/feedback/ml_feedback.csv".format(HUSER,self._table_name,yr,mn,dy))


[49/50] [abbrv] incubator-spot git commit: Removed proxy scored connections from the list

Posted by ev...@apache.org.
Removed proxy scored connections from the list


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/548eb17b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/548eb17b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/548eb17b

Branch: refs/heads/SPOT-35_graphql_api
Commit: 548eb17b65e7ca769379604af8862fc12bbf6664
Parents: 550ba31
Author: LedaLima <le...@apache.org>
Authored: Fri Mar 10 11:35:53 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:51:23 2017 -0700

----------------------------------------------------------------------
 spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/548eb17b/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
----------------------------------------------------------------------
diff --git a/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb b/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
index b20c65e..0fad18f 100644
--- a/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
+++ b/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
@@ -102,7 +102,7 @@
     "    \n",
     "    scored = []\n",
     "    for item in score_values:\n",
-    "        scored.append(urllib.quote_plus(item[0]))\n",
+    "	     scored.append(urllib.quote_plus(item[0]))\n",
     "        \n",
     "    if not 'errors' in response: \n",
     "        for row in response['data']['proxy']['suspicious']:\n",


[33/50] [abbrv] incubator-spot git commit: US1508 - csv cleanup ML documentation change.

Posted by ev...@apache.org.
US1508 - csv cleanup ML documentation change.

ML documentation now mentions where the CSVs for user feedback are stored on HDFS.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/50fbe610
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/50fbe610
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/50fbe610

Branch: refs/heads/SPOT-35_graphql_api
Commit: 50fbe610377678abb11b64cfa75826f3a09a0758
Parents: 7693ad1
Author: nlsegerl <na...@intel.com>
Authored: Tue Mar 7 14:54:03 2017 -0800
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:48 2017 -0700

----------------------------------------------------------------------
 spot-ml/README.md | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/50fbe610/spot-ml/README.md
----------------------------------------------------------------------
diff --git a/spot-ml/README.md b/spot-ml/README.md
index 3200c06..00c3e2c 100644
--- a/spot-ml/README.md
+++ b/spot-ml/README.md
@@ -136,6 +136,21 @@ spot-ml output will be found under the ``HPATH`` at one of
 
 It is a csv file in which network events annotated with estimated probabilities and sorted in ascending order.
 
+## User Feedback
+
+The spot front end allows users to mark individual logged events as high, medium or low risk. 
+
+The risk score is stored as a 1 for high risk, 2 for medium risk and 3 for low risk.
+
+At present, the scores of events similar to low risk items are inflated by the model, and nothing (at present) changes events flagged medium or high risk.
+
+
+This information is stored in a tab-separated text file stored on HDFS at:
+
+
+	/user/<user_name>/<data source>/scored_results/<date>/feedback/ml_feedback.csv
+
+
 ## Licensing
 
 spot-ml is licensed under Apache Version 2.0


[44/50] [abbrv] incubator-spot git commit: Add anchor to save comments definition

Posted by ev...@apache.org.
Add anchor to save comments definition


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/03639dde
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/03639dde
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/03639dde

Branch: refs/heads/SPOT-35_graphql_api
Commit: 03639dde3ecac8817baf215b4d1d72d0f9014ecc
Parents: dcb4224
Author: Everardo Lopez Sandoval (Intel) <el...@elopezsa-mac02.zpn.intel.com>
Authored: Fri Mar 10 18:20:50 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:51:23 2017 -0700

----------------------------------------------------------------------
 spot-oa/api/resources/dns.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/03639dde/spot-oa/api/resources/dns.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/dns.py b/spot-oa/api/resources/dns.py
index 9ac4a26..ed63c84 100644
--- a/spot-oa/api/resources/dns.py
+++ b/spot-oa/api/resources/dns.py
@@ -283,7 +283,7 @@ def create_dendro(expanded_search,date,anchor):
 Create save comments for StoryBoard.
 --------------------------------------------------------------------------
 """
-def  save_comments(ip,query,title,text,date):
+def  save_comments(anchor,ip,query,title,text,date):
 
     db = Configuration.db()
     sb_query = ("""


[09/50] [abbrv] incubator-spot git commit: Proxy migrated to GraphQL

Posted by ev...@apache.org.
Proxy migrated to GraphQL


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/f37bb403
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/f37bb403
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/f37bb403

Branch: refs/heads/SPOT-35_graphql_api
Commit: f37bb403e5e3a55f88a3c291f4bc2c652d6bf505
Parents: 52f34f4
Author: Diego Ortiz <di...@intel.com>
Authored: Mon Mar 6 10:53:13 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:47 2017 -0700

----------------------------------------------------------------------
 spot-oa/api/graphql/common.py                 |   4 +-
 spot-oa/api/graphql/proxy/mutation.py         | 113 ++++++++++++++++-----
 spot-oa/api/graphql/proxy/query.py            |  38 ++++---
 spot-oa/api/graphql/schema.py                 |   4 +
 spot-oa/ui/proxy/js/stores/SuspiciousStore.js |  10 +-
 spot-oa/ui/proxy/js/stores/TimelineStore.js   |   2 +-
 6 files changed, 122 insertions(+), 49 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/f37bb403/spot-oa/api/graphql/common.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/graphql/common.py b/spot-oa/api/graphql/common.py
index 02568c2..2c2e9bd 100644
--- a/spot-oa/api/graphql/common.py
+++ b/spot-oa/api/graphql/common.py
@@ -23,7 +23,7 @@ def coerce_date(value):
         return datetime.strptime(str(value), '%Y-%m-%d').date()
 
 def serialize_date(value):
-    return date.strptime(value, '%Y-%m-%d').strftime('%Y-%m-%d')
+    return datetime.strptime(value, '%Y-%m-%d').strftime('%Y-%m-%d')
 
 def parse_date_literal(ast):
     return datetime.strptime(ast.value, '%Y-%m-%d')
@@ -115,7 +115,7 @@ IngestSummaryType = GraphQLObjectType(
     fields={
         'datetime': GraphQLField(
             type=SpotDatetimeType,
-            resolver=lambda root, *_: root.get('tdate')
+            resolver=lambda root, *_: '{}:00'.format(root.get('tdate'))
         ),
         'total': GraphQLField(
             type=GraphQLInt,

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/f37bb403/spot-oa/api/graphql/proxy/mutation.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/graphql/proxy/mutation.py b/spot-oa/api/graphql/proxy/mutation.py
index ffd8ea1..09e9fe4 100644
--- a/spot-oa/api/graphql/proxy/mutation.py
+++ b/spot-oa/api/graphql/proxy/mutation.py
@@ -3,6 +3,7 @@ from graphql import (
     GraphQLObjectType,
     GraphQLField,
     GraphQLArgument,
+    GraphQLList,
     GraphQLString,
     GraphQLInt,
     GraphQLNonNull,
@@ -10,7 +11,7 @@ from graphql import (
     GraphQLInputObjectField
 )
 
-from api.graphql.common import SpotDateType, SpotIpType, SpotOperationOutputType
+from api.graphql.common import SpotDateType, SpotDatetimeType, SpotIpType, SpotOperationOutputType
 import api.resources.proxy as Proxy
 
 ScoreInputType = GraphQLInputObjectType(
@@ -25,18 +26,67 @@ ScoreInputType = GraphQLInputObjectType(
             description='A score value, 1->High, 2->Medium, 3->Low'
         ),
         'uri': GraphQLInputObjectField(
-            type=GraphQLString,
-            description='Requested URI'
+            type=GraphQLNonNull(GraphQLString),
+            description='Full URI'
+        )
+    }
+)
+
+ThreatDetailsInputType = GraphQLInputObjectType(
+    name='ProxyThreatDetailsInputType',
+    fields={
+        'datetime': GraphQLInputObjectField(
+            type=SpotDatetimeType
         ),
         'clientIp': GraphQLInputObjectField(
-            type=SpotIpType,
-            description='Client\'s ip'
+            type=SpotIpType
+        ),
+        'username': GraphQLInputObjectField(
+            type=GraphQLString
+        ),
+        'duration': GraphQLInputObjectField(
+            type=GraphQLInt
+        ),
+        'uri': GraphQLInputObjectField(
+            type=GraphQLString
+        ),
+        'webCategory': GraphQLInputObjectField(
+            type=GraphQLString
+        ),
+        'responseCode': GraphQLInputObjectField(
+            type=GraphQLInt
+        ),
+        'requestMethod': GraphQLInputObjectField(
+            type=GraphQLString,
+            description='Http Method'
+        ),
+        'userAgent': GraphQLInputObjectField(
+            type=GraphQLString,
+            description='Client\'s user agent'
+        ),
+        'responseContentType': GraphQLInputObjectField(
+            type=GraphQLString
+        ),
+        'referer': GraphQLInputObjectField(
+            type=GraphQLString
+        ),
+        'uriPort': GraphQLInputObjectField(
+            type=GraphQLInt
+        ),
+        'serverIp': GraphQLInputObjectField(
+            type=SpotIpType
+        ),
+        'serverToClientBytes': GraphQLInputObjectField(
+            type=GraphQLInt
+        ),
+        'clientToServerBytes': GraphQLInputObjectField(
+            type=GraphQLInt
         )
     }
 )
 
-AddCommentInputType = GraphQLInputObjectType(
-    name='ProxyAddCommentInputType',
+CreateStoryboardInputType = GraphQLInputObjectType(
+    name='ProxyCreateStoryboardInputType',
     fields={
         'date': GraphQLInputObjectField(
             type=SpotDateType,
@@ -53,53 +103,66 @@ AddCommentInputType = GraphQLInputObjectType(
         'text': GraphQLInputObjectField(
             type=GraphQLNonNull(GraphQLString),
             description='A description text for the comment'
+        ),
+        'threatDetails': GraphQLInputObjectField(
+            type=GraphQLNonNull(GraphQLList(GraphQLNonNull(ThreatDetailsInputType))),
+        ),
+        'first': GraphQLInputObjectField(
+            type=GraphQLInt
         )
     }
 )
 
-def _score_connection(args):
+def _score_connections(args):
+    results = []
+
     _input = args.get('input')
-    _date = _input.get('date', date.today())
-    score = _input.get('score')
-    uri = _input.get('uri')
-    clientIp = _input.get('clientIp')
+    for cmd in _input:
+        _date = cmd.get('date', date.today())
+        score = cmd.get('score')
+        uri = cmd.get('uri')
 
-    return {'success': Proxy.score_request(date=_date, score=score, uri=uri, cllientip=clientIp)}
+        result = Proxy.score_request(date=_date, score=score, uri=uri)
 
-def _add_comment(args):
+        results.append({'success': result})
+
+    return results
+
+def _create_storyboard(args):
     _input = args.get('input')
     _date = _input.get('date', date.today())
     uri = _input.get('uri')
     title = _input.get('title')
     text = _input.get('text')
+    threat_details = _input.get('threatDetails')
+    first = _input.get('first')
 
-    if Proxy.save_comment(date=_date, uri=uri, title=title, text=text) is None:
-        return {'success':True}
-    else:
-        return {'success':False}
+    result = Proxy.create_storyboard(date=_date, uri=uri, title=title, text=text, expanded_search=threat_details, top_results=first)
+
+    return {'success': result}
 
 MutationType = GraphQLObjectType(
     name='ProxyMutationType',
     fields={
         'score': GraphQLField(
-            type=SpotOperationOutputType,
+            type=GraphQLList(SpotOperationOutputType),
             args={
                 'input': GraphQLArgument(
-                    type=GraphQLNonNull(ScoreInputType),
+                    type=GraphQLNonNull(GraphQLList(GraphQLNonNull(ScoreInputType))),
                     description='Score criteria'
                 )
             },
-            resolver=lambda root, args, *_: _score_connection(args)
+            resolver=lambda root, args, *_: _score_connections(args)
         ),
-        'addComment': GraphQLField(
+        'createStoryboard': GraphQLField(
             type=SpotOperationOutputType,
             args={
                 'input': GraphQLArgument(
-                    type=GraphQLNonNull(AddCommentInputType),
-                    description='Comment info'
+                    type=GraphQLNonNull(CreateStoryboardInputType),
+                    description='Generates every data needed to move a threat to the storyboard'
                 )
             },
-            resolver=lambda root, args, *_: _add_comment(args)
+            resolver=lambda root, args, *_: _create_storyboard(args)
         )
     }
 )

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/f37bb403/spot-oa/api/graphql/proxy/query.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/graphql/proxy/query.py b/spot-oa/api/graphql/proxy/query.py
index aa16fc8..d75a1df 100644
--- a/spot-oa/api/graphql/proxy/query.py
+++ b/spot-oa/api/graphql/proxy/query.py
@@ -18,7 +18,7 @@ SuspiciousType = GraphQLObjectType(
         'datetime': GraphQLField(
             type=SpotDatetimeType,
             description='Start time of the request',
-            resolver=lambda root, *_: '{} {}'.format(root.get('p_date', ''), root.get('p_time', ''))
+            resolver=lambda root, *_: '{} {}'.format(root.get('tdate') or '1970-01-01', root.get('time') or '00:00:00')
         ),
         'clientIp': GraphQLField(
             type=SpotIpType,
@@ -107,7 +107,8 @@ SuspiciousType = GraphQLObjectType(
         ),
         'score': GraphQLField(
             type=GraphQLInt,
-            resolver=lambda root, *_: root.get('score')
+            description='Score value assigned by machine learning algorithm',
+            resolver=lambda root, *_: root.get('ml_score') or 0
         ),
         'uriRep': GraphQLField(
             type=GraphQLString,
@@ -132,7 +133,7 @@ EdgeDetailsType = GraphQLObjectType(
         'datetime': GraphQLField(
             type=GraphQLString,
             description='Start time of the request',
-            resolver=lambda root, *_: '{} {}'.format(root.get('p_date'), root.get('p_time'))
+            resolver=lambda root, *_: '{} {}'.format(root.get('tdate') or '1970-01-01', root.get('time') or '00:00:00')
         ),
         'clientIp': GraphQLField(
             type=SpotIpType,
@@ -210,15 +211,20 @@ EdgeDetailsType = GraphQLObjectType(
 ScoredRequestType = GraphQLObjectType(
     name='ProxyScoredRequestType',
     fields={
+        'datetime': GraphQLField(
+            type=SpotDateType,
+            description='Date and time of user score',
+            resolver=lambda root, *_: root.get('tdate') or '1970-01-01'
+        ),
         'uri': GraphQLField(
             type=SpotIpType,
             description='Requested URI',
-            resolver=lambda root, *_: root.get('uri')
+            resolver=lambda root, *_: root.get('fulluri')
         ),
         'score': GraphQLField(
             type=GraphQLInt,
             description='Score value. 1->High, 2->Medium, 3->Low',
-            resolver=lambda root, *_: root.get('score') or 0
+            resolver=lambda root, *_: root.get('uri_sev') or 0
         )
     }
 )
@@ -228,7 +234,7 @@ CommentType = GraphQLObjectType(
     fields={
         'uri': GraphQLField(
             type=GraphQLString,
-            resolver=lambda root, *_: root.get('uri_threat')
+            resolver=lambda root, *_: root.get('p_threat')
         ),
         'title': GraphQLField(
             type=GraphQLString,
@@ -253,7 +259,7 @@ ThreatsInformationType = GraphQLObjectType(
                     description='A date to use as reference to retrieve the list of scored requests. Defaults to today'
                 )
             },
-            resolver=lambda root, args, *_: Proxy.get_scored_request(date=args.get('date', date.today()))
+            resolver=lambda root, args, *_: Proxy.get_scored_requests(date=args.get('date', date.today()))
         ),
         'comments': GraphQLField(
             type=GraphQLList(CommentType),
@@ -269,12 +275,12 @@ ThreatsInformationType = GraphQLObjectType(
     }
 )
 
-ExpandedSearchType = GraphQLObjectType(
-    name='DnsExpandedSearchType',
+ThreatDetailsType = GraphQLObjectType(
+    name='ProxyThreatDetailsType',
     fields={
         'datetime': GraphQLField(
             type=SpotDatetimeType,
-            resolver=lambda root, *_: root.get('p_time')
+            resolver=lambda root, *_: '{} {}'.format(root.get('p_date') or '1970-01-01', root.get('p_time') or '00:00:00')
         ),
         'clientIp': GraphQLField(
             type=SpotIpType,
@@ -389,11 +395,11 @@ TimelineType = GraphQLObjectType(
     fields={
         'startDatetime': GraphQLField(
             type=SpotDatetimeType,
-            resolver=lambda root, *_: root.get('tstart')
+            resolver=lambda root, *_: root.get('tstart') or '1970-01-01 00:00:00'
         ),
         'endDatetime': GraphQLField(
             type=SpotDatetimeType,
-            resolver=lambda root, *_: root.get('tend')
+            resolver=lambda root, *_: root.get('tend') or '1970-01-01 00:00:00'
         ),
         'duration': GraphQLField(
             type=GraphQLInt,
@@ -406,6 +412,10 @@ TimelineType = GraphQLObjectType(
         'responseCode': GraphQLField(
             type=GraphQLInt,
             resolver=lambda root, *_: root.get('respcode')
+        ),
+        'responseCodeLabel': GraphQLField(
+            type=GraphQLString,
+            resolver=lambda root, *_: root.get('respcode_name')
         )
     }
 )
@@ -414,7 +424,7 @@ ThreatInformationType = GraphQLObjectType(
     name='ProxyThreatInformation',
     fields={
         'details': GraphQLField(
-            type=GraphQLList(ExpandedSearchType),
+            type=GraphQLList(ThreatDetailsType),
             description='Detailed information about a high risk threat',
             args={
                 'date': GraphQLArgument(
@@ -500,7 +510,7 @@ QueryType = GraphQLObjectType(
                     description='Client\'s ip'
                 )
             },
-            resolver=lambda root, args, *_: Proxy.details(date=args.get('date', date.today()), uri=args.get('uri'), clientip=args.get('clientIp'))
+            resolver=lambda root, args, *_: Proxy.details(date=args.get('date', date.today()), uri=args.get('uri'), ip=args.get('clientIp'))
         ),
         'threats': GraphQLField(
             type=ThreatsInformationType,

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/f37bb403/spot-oa/api/graphql/schema.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/graphql/schema.py b/spot-oa/api/graphql/schema.py
index 06a238b..3975b6c 100644
--- a/spot-oa/api/graphql/schema.py
+++ b/spot-oa/api/graphql/schema.py
@@ -39,6 +39,10 @@ SpotSchema = GraphQLSchema(
         'dns': GraphQLField(
             type=DnsMutationType,
             resolver=lambda *_: {}
+        ),
+        'proxy': GraphQLField(
+            type=ProxyMutationType,
+            resolver=lambda *_: {}
         )
     }
   ),

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/f37bb403/spot-oa/ui/proxy/js/stores/SuspiciousStore.js
----------------------------------------------------------------------
diff --git a/spot-oa/ui/proxy/js/stores/SuspiciousStore.js b/spot-oa/ui/proxy/js/stores/SuspiciousStore.js
index 1b3141e..422eb1a 100755
--- a/spot-oa/ui/proxy/js/stores/SuspiciousStore.js
+++ b/spot-oa/ui/proxy/js/stores/SuspiciousStore.js
@@ -15,10 +15,6 @@ const HIGHLIGHT_THREAT_EVENT = 'hightlight_thread';
 const UNHIGHLIGHT_THREAT_EVENT = 'unhightlight_thread';
 const SELECT_THREAT_EVENT = 'select_treath';
 
-var filterName = '';
-var highlightedThread = null;
-var selectedThread = null;
-
 class SuspiciousStore extends ObservableWithHeadersGraphQLStore {
     constructor() {
         super();
@@ -87,8 +83,8 @@ class SuspiciousStore extends ObservableWithHeadersGraphQLStore {
             this.unsetVariable(CLIENT_IP_VAR);
         }
         else if (SpotUtils.IP_V4_REGEX.test(filter)) {
-            this.setVariable(URI_VAR, filter);
-            this.unsetVariable(CLIENT_IP_VAR, filter);
+            this.unsetVariable(URI_VAR, filter);
+            this.setVariable(CLIENT_IP_VAR, filter);
         }
         else {
             this.unsetVariable(CLIENT_IP_VAR);
@@ -99,7 +95,7 @@ class SuspiciousStore extends ObservableWithHeadersGraphQLStore {
     }
 
     getFilter() {
-        return this.getVariable(CLIENT_IP_VAR) || this.getVariable(URI_VAR);
+        return this.getVariable(CLIENT_IP_VAR) || this.getVariable(URI_VAR) || '';
     }
 
     addChangeFilterListener(callback) {

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/f37bb403/spot-oa/ui/proxy/js/stores/TimelineStore.js
----------------------------------------------------------------------
diff --git a/spot-oa/ui/proxy/js/stores/TimelineStore.js b/spot-oa/ui/proxy/js/stores/TimelineStore.js
index 952de48..54d7b48 100755
--- a/spot-oa/ui/proxy/js/stores/TimelineStore.js
+++ b/spot-oa/ui/proxy/js/stores/TimelineStore.js
@@ -18,7 +18,7 @@ class TimelineStore extends ObservableGraphQLStore {
                             duration
                             clientip: clientIp
                             tend: endDatetime
-                            respcode: responseCode
+                            respcode: responseCodeLabel
                             tstart: startDatetime
                         }
                     }


[50/50] [abbrv] incubator-spot git commit: Show comments of the threats already captured

Posted by ev...@apache.org.
Show comments of the threats already captured


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/189acce4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/189acce4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/189acce4

Branch: refs/heads/SPOT-35_graphql_api
Commit: 189acce4f992af136064868c0f59fbe9b6edce8f
Parents: 6427f64
Author: Everardo Lopez Sandoval (Intel) <el...@elopezsa-mac02.zpn.intel.com>
Authored: Fri Mar 10 17:43:31 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:51:23 2017 -0700

----------------------------------------------------------------------
 spot-oa/oa/dns/ipynb_templates/Threat_Investigation_master.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/189acce4/spot-oa/oa/dns/ipynb_templates/Threat_Investigation_master.ipynb
----------------------------------------------------------------------
diff --git a/spot-oa/oa/dns/ipynb_templates/Threat_Investigation_master.ipynb b/spot-oa/oa/dns/ipynb_templates/Threat_Investigation_master.ipynb
index 0c61ee4..fbfa976 100644
--- a/spot-oa/oa/dns/ipynb_templates/Threat_Investigation_master.ipynb
+++ b/spot-oa/oa/dns/ipynb_templates/Threat_Investigation_master.ipynb
@@ -395,4 +395,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 0
-}
\ No newline at end of file
+}


[28/50] [abbrv] incubator-spot git commit: Bug fixing for DNS and Proxy Edge Notebooks

Posted by ev...@apache.org.
Bug fixing for DNS and Proxy Edge Notebooks


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/7abbab2e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/7abbab2e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/7abbab2e

Branch: refs/heads/SPOT-35_graphql_api
Commit: 7abbab2ed82b8782210561618af20e20563ce161
Parents: a2026e1
Author: LedaLima <le...@apache.org>
Authored: Thu Mar 9 17:04:28 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:48 2017 -0700

----------------------------------------------------------------------
 .../Edge_Investigation_master.ipynb             | 42 ++++++++++++--------
 .../Edge_Investigation_master.ipynb             | 23 +++++++----
 2 files changed, 41 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/7abbab2e/spot-oa/oa/dns/ipynb_templates/Edge_Investigation_master.ipynb
----------------------------------------------------------------------
diff --git a/spot-oa/oa/dns/ipynb_templates/Edge_Investigation_master.ipynb b/spot-oa/oa/dns/ipynb_templates/Edge_Investigation_master.ipynb
index 88f047e..8cca233 100644
--- a/spot-oa/oa/dns/ipynb_templates/Edge_Investigation_master.ipynb
+++ b/spot-oa/oa/dns/ipynb_templates/Edge_Investigation_master.ipynb
@@ -23,8 +23,7 @@
     "# getting date from the parent path. \n",
     "path = os.getcwd().split(\"/\") \n",
     "date = path[len(path)-1]   \n",
-    "dsource = path[len(path)-2]  \n",
-    "dpath = '/'.join(['data' if var == 'ipynb' else var for var in path]) + '/' "
+    "dsource = path[len(path)-2]"
    ]
   },
   {
@@ -91,11 +90,16 @@
     "scoring_form = widgets.HBox(width='90%', height=250)\n",
     "scoring_form.children = [client_box,query_box,actions_box]\n",
     "\n",
+    "global score_values \n",
+    "\n",
+    "score_values = []\n",
     "\n",
     "def data_loader(): \n",
     "    us_ips = []\n",
     "    us_dns = []\n",
-    "\n",
+    "    scored = []\n",
+    "    \n",
+    "    global score_values\n",
     "    query=\"\"\"query($date:SpotDateType!) {\n",
     "            dns{\n",
     "                suspicious(date:$date){\n",
@@ -107,12 +111,17 @@
     "    variables={\n",
     "        'date': datetime.datetime.strptime(date, '%Y%m%d').strftime('%Y-%m-%d')\n",
     "    }\n",
+    "    \n",
+    "    for item in score_values:\n",
+    "        scored.append(item[0])\n",
+    "        scored.append(item[1])\n",
+    "    \n",
     "    response = GraphQLClient.request(query, variables)\n",
     "  \n",
     "    for row in response['data']['dns']['suspicious']:           \n",
-    "        if row['clientIp'] not in us_ips: \n",
+    "        if row['clientIp'] not in us_ips and row['clientIp'] not in scored: \n",
     "            us_ips.append(row['clientIp'])\n",
-    "        if row['dnsQuery'] not in us_dns:\n",
+    "        if row['dnsQuery'] not in us_dns and row['dnsQuery'] not in scored:\n",
     "            us_dns.append(row['dnsQuery'])  \n",
     "            \n",
     "    fill_list(client_select,us_ips)\n",
@@ -144,14 +153,12 @@
     "import csv\n",
     "import datetime\n",
     "import subprocess \n",
-    "global score_values\n",
-    "score_values = []\n",
     "\n",
     "\n",
     "def assign_score(b):\n",
-    "\n",
+    "    clear_output()\n",
     "    sev = int(rating_btn.selected_label) \n",
-    "    \n",
+    "\n",
     "    if quick_text.value: \n",
     "        ip = \"\"\n",
     "        dns = quick_text.value\n",
@@ -164,17 +171,20 @@
     "        dns = query_select.value if not \"- Select -\" in query_select.value else \"\"\n",
     "        score_values.append((ip,dns,sev))\n",
     "        clear_output()\n",
-    "   \n",
-    "    if ip != \"- Select -\":\n",
-    "        display(Javascript(\"$(\\\"option[data-value='\" + ip +\"']\\\").remove();\"))\n",
+    "        \n",
+    "    if client_select.value != \"- Select -\":   \n",
+    "        display(Javascript(\"$(\\\"option:nth-of-type(0)[data-value='\" + client_select.value +\"']\\\").remove();\"))\n",
     "    if quick_text.value:\n",
-    "        display(Javascript(\"$(\\\"option[data-value$='\" + quick_text.value +\"']\\\").remove();\"))\n",
-    "    elif dns != \"- Select -\":\n",
-    "        display(Javascript(\"$(\\\"option[data-value='\" + dns +\"']\\\").remove();\"))\n",
+    "        display(Javascript(\"$(\\\"option:nth-of-type(1)[data-value$='\" + dns +\"']\\\").remove();\"))\n",
+    "    elif query_select.value != \"- Select -\":\n",
+    "        display(Javascript(\"$(\\\"option:nth-of-type(1)[data-value='\" + query_select.value +\"']\\\").remove();\"))\n",
     "\n",
     "    client_select.value = \"- Select -\"\n",
     "    query_select.value = \"- Select -\"\n",
-    "    quick_text.value = \"\"\n",
+    "    quick_text.value = \"\"    \n",
+    "    \n",
+    "    data_loader() \n",
+    "    print \"Click the 'Save' button when you're finished scoring\" \n",
     "\n",
     "\n",
     "def save(b):    \n",

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/7abbab2e/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
----------------------------------------------------------------------
diff --git a/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb b/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
index 8eedc53..1251960 100644
--- a/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
+++ b/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
@@ -15,8 +15,7 @@
    },
    "outputs": [],
    "source": [
-    "import urllib2\n",
-    "import json\n",
+    "import urllib\n",
     "import os\n",
     "import datetime\n",
     "import csv \n",
@@ -86,6 +85,7 @@
     "\n",
     "def data_loader(): \n",
     "    us_uris = []\n",
+    "    global score_values\n",
     "    \n",
     "    response = GraphQLClient.request(\n",
     "        query=\"\"\"query($date:SpotDateType!) {\n",
@@ -100,9 +100,14 @@
     "        }\n",
     "    )\n",
     "    \n",
-    "    if not 'errors' in response:\n",
+    "    scored = []\n",
+    "    for item in score_values:\n",
+    "        scored.append(item[0])\n",
+    "        \n",
+    "    if not 'errors' in response: \n",
     "        for row in response['data']['proxy']['suspicious']:\n",
-    "            us_uris.append(row['uri'])\n",
+    "            if not row['uri'] in scored:\n",
+    "                us_uris.append(row['uri'])\n",
     "    else:\n",
     "        print 'An error occured : '+ response['errors'][0]['message']\n",
     "        \n",
@@ -138,19 +143,21 @@
     "    uri = quick_text.value or uri_select.value\n",
     "    uri_sev = int(rating_btn.selected_label) if not \"- Select -\" in uri_select.value else \"\"\n",
     "    \n",
-    "    clear_output()\n",
-    "    #Gets input values\n",
+    "    clear_output() \n",
+    "    \n",
     "    global score_values\n",
     "    \n",
     "    score_values.append((uri, uri_sev))\n",
-    "    \n",
+    "        \n",
     "    if uri_select.value != \"- Select -\":\n",
-    "        display(Javascript(\"$(\\\"option[data-value='\" + uri_select.value +\"']\\\").remove();\"))\n",
+    "        display(Javascript(\"$(\\\"option[data-value='\" + urllib.quote_plus(uri_select.value) +\"']\\\").remove();\"))\n",
     "      \n",
     "    clear_output()\n",
     "    data_loader()\n",
     "    uri_select.value = \"- Select -\"\n",
     "    quick_text.value = \"\"\n",
+    "    \n",
+    "    print \"Click the 'Save' button when you're finished scoring\"  \n",
     "\n",
     "\n",
     "def save(b):   \n",


[38/50] [abbrv] incubator-spot git commit: Bug fixing for Proxy Edge notebook

Posted by ev...@apache.org.
Bug fixing for Proxy Edge notebook


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/b78e1dfb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/b78e1dfb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/b78e1dfb

Branch: refs/heads/SPOT-35_graphql_api
Commit: b78e1dfbf53b9f362addd5363e8c648b8e183f66
Parents: c1be09e
Author: LedaLima <le...@apache.org>
Authored: Thu Mar 9 17:58:03 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:51:22 2017 -0700

----------------------------------------------------------------------
 .../Edge_Investigation_master.ipynb             | 25 +++++++++++++++-----
 1 file changed, 19 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/b78e1dfb/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
----------------------------------------------------------------------
diff --git a/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb b/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
index b173fe8..e2a89c2 100644
--- a/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
+++ b/spot-oa/oa/proxy/ipynb_templates/Edge_Investigation_master.ipynb
@@ -9,7 +9,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {
     "collapsed": false
    },
@@ -29,11 +29,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "application/javascript": [
+       "$('.widget-area > .widget-subarea > *').remove();"
+      ],
+      "text/plain": [
+       "<IPython.core.display.Javascript object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "try:\n",
     "    import ipywidgets as widgets # For jupyter/ipython >= 1.4\n",
@@ -102,7 +115,7 @@
     "    \n",
     "    scored = []\n",
     "    for item in score_values:\n",
-    "	     scored.append(urllib.quote_plus(item[0]))\n",
+    "        scored.append(urllib.quote_plus(item[0]))\n",
     "        \n",
     "    if not 'errors' in response: \n",
     "        for row in response['data']['proxy']['suspicious']:\n",
@@ -128,7 +141,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {
     "collapsed": false
    },
@@ -219,4 +232,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 0
-}
\ No newline at end of file
+}


[42/50] [abbrv] incubator-spot git commit: Adding Exception handler for HDFS errors

Posted by ev...@apache.org.
Adding Exception handler for HDFS errors


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/b41e9049
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/b41e9049
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/b41e9049

Branch: refs/heads/SPOT-35_graphql_api
Commit: b41e9049130b6016fe2b1dbcfced80f69d65924d
Parents: dbb5174
Author: Everardo Lopez Sandoval (Intel) <el...@elopezsa-mac02.zpn.intel.com>
Authored: Tue Mar 14 14:51:05 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:51:23 2017 -0700

----------------------------------------------------------------------
 spot-oa/api/resources/hdfs_client.py | 36 +++++++++++++++++++------------
 1 file changed, 22 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/b41e9049/spot-oa/api/resources/hdfs_client.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/hdfs_client.py b/spot-oa/api/resources/hdfs_client.py
index a6da531..ec7fc9f 100644
--- a/spot-oa/api/resources/hdfs_client.py
+++ b/spot-oa/api/resources/hdfs_client.py
@@ -17,22 +17,30 @@ def get_file(hdfs_file):
         return results
 
 def put_file_csv(hdfs_file_content,hdfs_path,hdfs_file_name,append_file=False,overwrite_file=False):
-    client = _get_client()
-    hdfs_full_name = "{0}/{1}".format(hdfs_path,hdfs_file_name)
-    with client.write(hdfs_full_name,append=append_file,overwrite=overwrite_file) as writer:
-        for item in hdfs_file_content:
-            data = ','.join(str(d) for d in item)
-            writer.write("{0}\n".format(data))
-
-    return True
+    
+    try:
+        client = _get_client()
+        hdfs_full_name = "{0}/{1}".format(hdfs_path,hdfs_file_name)
+        with client.write(hdfs_full_name,append=append_file,overwrite=overwrite_file) as writer:
+            for item in hdfs_file_content:
+                data = ','.join(str(d) for d in item)
+                writer.write("{0}\n".format(data))
+        return True
+        
+    except HdfsError:
+        return False
 
 def put_file_json(hdfs_file_content,hdfs_path,hdfs_file_name,append_file=False,overwrite_file=False):
-    client = _get_client()
-    hdfs_full_name = "{0}/{1}".format(hdfs_path,hdfs_file_name)
-    with client.write(hdfs_full_name,append=append_file,overwrite=overwrite_file,encoding='utf-8') as writer:
-	    dump(hdfs_file_content, writer)
-
-    return True
+    
+    try:
+        client = _get_client()
+        hdfs_full_name = "{0}/{1}".format(hdfs_path,hdfs_file_name)
+        with client.write(hdfs_full_name,append=append_file,overwrite=overwrite_file,encoding='utf-8') as writer:
+	        dump(hdfs_file_content, writer)
+        return True
+    except HdfsError:
+        return False
+    
 
 def delete_folder(hdfs_file,user=None):
     client = _get_client(user)


[08/50] [abbrv] incubator-spot git commit: Adding backend methods for GraphQL API

Posted by ev...@apache.org.
Adding backend methods for GraphQL API


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/5b75b418
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/5b75b418
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/5b75b418

Branch: refs/heads/SPOT-35_graphql_api
Commit: 5b75b418990f15b4b8dfdf04720e2edd9b5588f4
Parents: 415c7f5
Author: Everardo Lopez Sandoval (Intel) <el...@jmoren4x-mobl2.amr.corp.intel.com>
Authored: Sun Mar 5 18:14:45 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:47 2017 -0700

----------------------------------------------------------------------
 spot-oa/__init__.py                    |   0
 spot-oa/api/resources/__init__.py      |   0
 spot-oa/api/resources/configurator.py  |  43 ++
 spot-oa/api/resources/configurator.pyc | Bin 0 -> 1706 bytes
 spot-oa/api/resources/dns.py           | 342 +++++++++++
 spot-oa/api/resources/flow.py          | 906 ++++++++++++++++++++++++++++
 spot-oa/api/resources/hdfs_client.py   |  50 ++
 spot-oa/api/resources/impala_engine.py |  34 ++
 spot-oa/api/resources/proxy.py         | 383 ++++++++++++
 9 files changed, 1758 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/5b75b418/spot-oa/__init__.py
----------------------------------------------------------------------
diff --git a/spot-oa/__init__.py b/spot-oa/__init__.py
new file mode 100755
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/5b75b418/spot-oa/api/resources/__init__.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/__init__.py b/spot-oa/api/resources/__init__.py
new file mode 100755
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/5b75b418/spot-oa/api/resources/configurator.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/configurator.py b/spot-oa/api/resources/configurator.py
new file mode 100644
index 0000000..1cc470e
--- /dev/null
+++ b/spot-oa/api/resources/configurator.py
@@ -0,0 +1,43 @@
+import ConfigParser
+import os
+
+def configuration():
+
+    conf_file = "/etc/spot.conf"
+    config = ConfigParser.ConfigParser()
+    config.readfp(SecHead(open(conf_file)))
+    return config
+
+def db():
+    conf = configuration()
+    return conf.get('conf', 'DBNAME').replace("'","").replace('"','')
+
+def impala():
+    conf = configuration()
+    return conf.get('conf', 'IMPALA_DEM'),conf.get('conf', 'IMPALA_PORT')
+
+def hdfs():
+    conf = configuration()
+    name_node = conf.get('conf',"NAME_NODE")
+    web_port = conf.get('conf',"WEB_PORT")
+    hdfs_user = conf.get('conf',"HUSER")
+    hdfs_user = hdfs_user.split("/")[-1].replace("'","").replace('"','')
+    return name_node,web_port,hdfs_user
+
+def spot():
+    conf = configuration()
+    return conf.get('conf',"HUSER").replace("'","").replace('"','')
+
+class SecHead(object):
+    def __init__(self, fp):
+        self.fp = fp
+        self.sechead = '[conf]\n'
+
+    def readline(self):
+        if self.sechead:
+            try:
+                return self.sechead
+            finally:
+                self.sechead = None
+        else:
+            return self.fp.readline()

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/5b75b418/spot-oa/api/resources/configurator.pyc
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/configurator.pyc b/spot-oa/api/resources/configurator.pyc
new file mode 100644
index 0000000..04505e0
Binary files /dev/null and b/spot-oa/api/resources/configurator.pyc differ

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/5b75b418/spot-oa/api/resources/dns.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/dns.py b/spot-oa/api/resources/dns.py
new file mode 100644
index 0000000..ba26ae9
--- /dev/null
+++ b/spot-oa/api/resources/dns.py
@@ -0,0 +1,342 @@
+import api.resources.impala_engine as ImpalaEngine
+import api.resources.hdfs_client as HDFSClient
+import api.resources.configurator as Configuration
+
+"""
+--------------------------------------------------------------------------
+Return list(dict) of all the suspicious dns connections in one day.
+--------------------------------------------------------------------------
+"""
+def suspicious_queries(date, ip=None, query=None,limit=250):
+
+    db = Configuration.db()
+    sq_query = ("""
+            SELECT
+                ds.unix_tstamp,frame_len,ds.ip_dst,ds.dns_qry_name,
+                dns_qry_class,dns_qry_type,dns_qry_rcode,ml_score,tld,
+                query_rep,hh,dns_qry_class_name,dns_qry_type_name,
+                dns_qry_rcode_name,network_context
+            FROM {0}.dns_scores ds
+            LEFT JOIN {0}.dns_threat_investigation dt
+                ON  (ds.dns_qry_name = dt.dns_qry_name)
+            WHERE
+                ds.y={1} AND ds.m={2} AND ds.d={3}
+                AND (dt.dns_qry_name is NULL)
+            """).format(db,date.year,date.month,date.day)
+
+    sq_filter = ""
+    sq_filter += " AND ds.ip_dst = '{0}'".format(ip) if ip else ""
+    sq_filter += " AND ds.dns_qry_name LIKE '%{0}%'".format(query) if query else ""
+    sq_filter += " ORDER BY ds.ml_score limit {0}".format(limit)
+
+    sq_query = sq_query + sq_filter
+    return ImpalaEngine.execute_query_as_list(sq_query)
+
+"""
+--------------------------------------------------------------------------
+Return list(dict) of all the connectios related to a query name in one hour
+--------------------------------------------------------------------------
+"""
+def details(frame_time, query):
+
+    db = Configuration.db()
+    details_query = ("""
+            SELECT
+		unix_tstamp,frame_len,ip_dst,ip_src,dns_qry_name,dns_qry_class,
+		dns_qry_type,dns_qry_rcode,dns_a,dns_qry_type_name,
+		dns_qry_rcode_name,dns_qry_class_name
+            FROM
+                {0}.dns_edge
+            WHERE
+                y={1} AND m={2} AND d={3} AND hh={4} AND dns_qry_name = '{5}'
+            """).format(db,frame_time.year,frame_time.month,frame_time.day,\
+            frame_time.hour,query)
+
+    return ImpalaEngine.execute_query_as_list(details_query)
+
+"""
+--------------------------------------------------------------------------
+Return list(dict) of all the connections with a single client.
+--------------------------------------------------------------------------
+"""
+def client_details(date, ip):
+
+    db = Configuration.db()
+    client_query =("""
+            SELECT
+                ip_dst,dns_a,dns_qry_name,ip_dst
+            FROM
+                {0}.dns_dendro
+            WHERE
+                y={1} AND m={2} AND d={3}
+                AND ip_dst='{4}'
+            """).format(db,date.year,date.month,date.day,ip)
+
+    return ImpalaEngine.execute_query_as_list(client_query)
+
+"""
+--------------------------------------------------------------------------
+Return list(dict) of all the detected suspicious connections in one day.
+--------------------------------------------------------------------------
+"""
+def incident_progression(date, query,ip):
+
+    if not ip and not query:
+        return None
+
+    db = Configuration.db()
+    return_value = "dns_qry_name" if ip else "ip_dst"
+    dns_threat_query = ("""
+            SELECT
+                anchor,total,{0}
+            FROM
+                {1}.dns_threat_dendro
+            WHERE
+                y={2} AND m={3} AND d={4}
+                AND anchor = '{5}'
+            """).format(return_value,db,date.year,date.month,date.day,\
+            query if query else ip)
+
+    return ImpalaEngine.execute_query_as_list(dns_threat_query)
+
+"""
+--------------------------------------------------------------------------
+Return list(dict) of all queries saved in threat investigation.
+--------------------------------------------------------------------------
+"""
+def comments(date):
+
+    db = Configuration.db()
+    comments_query = ("""
+            SELECT
+                ip_threat,dns_threat,title,text
+            FROM
+                {0}.dns_storyboard
+            WHERE
+                y={1} AND m={2} AND d={3}
+            """).format(db,date.year,date.month,date.day)
+
+    results = ImpalaEngine.execute_query_as_list(comments_query)
+    for row in results:
+        row["text"] = row["text"].replace("\n","\\n")
+    return results
+
+"""
+--------------------------------------------------------------------------
+Score connections.
+--------------------------------------------------------------------------
+"""
+def  score_connection(date,ip="", dns="", ip_sev=0, dns_sev=0):
+
+    if (not ip and not ip_sev) and (not dns and not dns_sev):
+        return False
+
+    db = Configuration.db()
+    sq_query = ("""
+		SELECT
+    	    frame_time,unix_tstamp,frame_len,ip_dst,dns_qry_name,dns_qry_class,
+		    dns_qry_type,dns_qry_rcode,ml_score,tld,query_rep,
+		    hh,dns_qry_class_name,dns_qry_type_name,dns_qry_rcode_name,
+		    network_context
+		FROM
+		    {0}.dns_scores
+		WHERE
+		    y={1} and m={2} and d={3}
+            AND (
+		""").format(db,date.year,date.month,date.day)
+
+    connections_filter = ""
+    connections_filter += "ip_dst = '{0}' ".format(ip) if ip else ""
+    connections_filter += " OR " if ip and dns else ""
+    connections_filter += "dns_qry_name = '{0}' ".format(dns) if dns else ""
+    connections_filter += ")"
+    connections = ImpalaEngine.execute_query(sq_query + connections_filter)
+
+    # add score to connections
+    fb_data =  []
+    for row in connections:
+        # insert into dns_threat_investigation.
+        threat_data = (row[1],row[3],row[4],ip_sev if ip == row[3] else 0,\
+        dns_sev if dns == row[4] else 0)
+
+        fb_data.append([row[0],row[2],row[3],row[4],row[5],row[6],row[7],\
+        row[8],row[9],row[10],row[11],ip_sev,dns_sev,row[12],row[13],row[14],\
+        row[15],row[1]])
+
+        insert_command = ("""
+            INSERT INTO {0}.dns_threat_investigation
+            PARTITION (y={1},m={2},d={3})
+            VALUES {4}
+            """).format(db,date.year,date.month,date.day,threat_data)
+
+        ImpalaEngine.execute_query(insert_command)
+
+    # create feedback file.
+    app_path = Configuration.spot()
+    feedback_path = "{0}/dns/scored_results/{1}{2}{3}/feedback"\
+    .format(app_path,date.year,str(date.month).zfill(2),str(date.day).zfill(2))
+    ap_file = True
+
+    if len(HDFSClient.list_dir(feedback_path)) == 0:
+        fb_data.insert(0,["frame_time","frame_len","ip_dst","dns_qry_name",\
+        "dns_qry_class","dns_qry_type","dns_qry_rcode","score","tld","query_rep",\
+        "hh","ip_sev","dns_sev","dns_qry_class_name","dns_qry_type_name",\
+        "dns_qry_rcode_name","network_context","unix_tstamp"])
+        ap_file = False
+
+    HDFSClient.put_file_csv(fb_data,feedback_path,"ml_feedback.csv",append_file=ap_file)
+    return True
+
+"""
+--------------------------------------------------------------------------
+Return list(dict) of all the scored connections in one day.
+--------------------------------------------------------------------------
+"""
+def  get_scored_connections(date):
+
+    db = Configuration.db()
+    sc_query =  ("""
+                SELECT
+                    unix_tstamp,ip_dst,dns_qry_name,ip_sev,dns_sev
+                FROM
+                    {0}.dns_threat_investigation
+                WHERE
+                    y={1} AND m={2} AND d={3}
+                """).format(db,date.year,date.month,date.day)
+
+    return ImpalaEngine.execute_query_as_list(sc_query)
+
+"""
+--------------------------------------------------------------------------
+Get expanded search from raw data table.
+--------------------------------------------------------------------------
+"""
+def expanded_search(date,query=None,ip=None,limit=20):
+
+    if not ip and not query:
+        return False
+
+    db = Configuration.db()
+    if ip:
+	count = "dns_qry_name"
+        filter_param = "ip_dst"
+	filter_value = ip
+    else:
+	count = "ip_dst"
+	filter_param = "dns_qry_name"
+	filter_value = query
+
+    expanded_query = ("""
+   		SELECT
+    		    COUNT({0}) as total,dns_qry_name,ip_dst
+		FROM
+		    {1}.dns
+		WHERE y={2} AND m={3} AND d={4}
+		AND {5} = '{6}'
+		GROUP BY {0},{5}
+		ORDER BY total DESC
+		LIMIT {7}
+    """).format(count,db,date.year,date.month,date.day,\
+    filter_param,filter_value,limit if limit else 20)
+
+    return ImpalaEngine.execute_query_as_list(expanded_query)
+
+"""
+--------------------------------------------------------------------------
+Create StoryBoard.
+--------------------------------------------------------------------------
+"""
+def create_storyboard(expanded_search,date,ip,query,title,text):
+
+    if not ip and not query:
+        return False
+
+    anchor = ip if ip else query
+    create_dendro(expanded_search,date,anchor)
+    save_comments(ip,query,title,text,date)
+
+"""
+--------------------------------------------------------------------------
+Create dendrogram for StoryBoard.
+--------------------------------------------------------------------------
+"""
+def create_dendro(expanded_search,date,anchor):
+
+    db = Configuration.db()
+    for row in expanded_search:
+	dendro_query = ("""
+		INSERT INTO {0}.dns_threat_dendro PARTITION (y={1}, m={2},d={3})
+		VALUES ( '{4}',{5},'{6}','{7}')
+		""")\
+        .format(db,date.year,date.month,date.day,anchor,\
+        row["total"],row["dnsQuery"],row["clientIp"])
+
+	ImpalaEngine.execute_query(dendro_query)
+
+"""
+--------------------------------------------------------------------------
+Create save comments for StoryBoard.
+--------------------------------------------------------------------------
+"""
+def  save_comments(ip,query,title,text,date):
+
+    db = Configuration.db()
+    sb_query = ("""
+            SELECT
+                ip_threat,dns_threat,title,text
+            FROM
+                {0}.dns_storyboard
+            WHERE
+                y = {1} AND m= {2} AND d={3}
+            """).format(db,date.year,date.month,date.day)
+    sb_data = ImpalaEngine.execute_query_as_list(sb_query)
+
+    # find value if already exists.
+    saved = False
+    for item in sb_data:
+        if item["ip_threat"] == ip or item["dns_threat"]== query:
+            item["title"] = title
+            item["text"] = text
+            saved = True
+
+    if not saved:
+        sb_data.append({'text': text, 'ip_threat': str(ip), 'title': title,'dns_threat':query})
+
+
+    #remove old file.
+    app_path = Configuration.spot()
+    old_file = "{0}/dns/hive/oa/storyboard/y={1}/m={2}/d={3}/"\
+    .format(app_path,date.year,date.month,date.day)
+
+    HDFSClient.delete_folder(old_file,"impala")
+    ImpalaEngine.execute_query("invalidate metadata")
+
+    for item in sb_data:
+	insert_query = ("""
+         	INSERT INTO {0}.dns_storyboard PARTITION(y={1} , m={2} ,d={3})
+            	VALUES ( '{4}', '{5}', '{6}','{7}')
+            	""")\
+                .format(db,date.year,date.month,date.day,\
+                item["ip_threat"],item["dns_threat"],item["title"],item["text"])
+        ImpalaEngine.execute_query(insert_query)
+
+    return True
+"""
+--------------------------------------------------------------------------
+Return a list(dict) with all the data ingested during the time frame provided.
+--------------------------------------------------------------------------
+"""
+def ingest_summary(start_date,end_date):
+
+    db = Configuration.db()
+    is_query = ("""
+                SELECT
+                    tdate,total
+                FROM {0}.dns_ingest_summary
+                WHERE
+                    ( y <= {1} and y >= {2}) AND
+                    ( m <= {3} and m >= {4})
+                """)\
+                .format(start_date.year,end_date.year,start_date.month,end_date.month)
+
+    return ImpalaEngine.execute_query_as_list(is_query)

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/5b75b418/spot-oa/api/resources/flow.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/flow.py b/spot-oa/api/resources/flow.py
new file mode 100755
index 0000000..d904b61
--- /dev/null
+++ b/spot-oa/api/resources/flow.py
@@ -0,0 +1,906 @@
+import api.resources.impala_engine as ImpalaEngine
+import api.resources.hdfs_client as HDFSClient
+import api.resources.configurator as Configuration
+import os
+import struct, socket
+import json
+
+"""
+--------------------------------------------------------------------------
+Return a list (dict) of all the suspicious connections that happened
+during the specified timeframe.
+--------------------------------------------------------------------------
+"""
+def suspicious_connections(date,ip=None,limit=250):
+
+    db = Configuration.db()
+    sc_query = ("""
+                SELECT
+                    fs.tstart,fs.srcip,fs.dstip,fs.sport,fs.dport,proto,
+                    ipkt,ibyt,opkt,obyt,ml_score,rank,srcip_internal,
+                    dstip_internal,src_geoloc,dst_geoloc,src_domain,
+                    dst_domain,src_rep,dst_rep
+                FROM {0}.flow_scores fs
+                LEFT JOIN {0}.flow_threat_investigation ft
+                    ON (( fs.srcip = ft.srcip) OR ( fs.dstip = ft.dstip))
+                WHERE fs.y={1} AND fs.m={2} and fs.d={3}
+                    AND ( ft.srcip is NULL AND ft.dstip is NULL )
+                """).format(db,date.year,date.month,date.day)
+
+    sc_filter = ""
+    if ip:
+        sc_filter = " AND ( fs.srcip='{0}' OR fs.dstip='{0}')".format(ip)
+
+    sc_filter += " ORDER BY rank  limit {0}".format(limit)
+    sc_query = sc_query + sc_filter
+    return ImpalaEngine.execute_query_as_list(sc_query)
+
+"""
+--------------------------------------------------------------------------
+Retuarn a list(dict) of all the connections that happened
+between 2 IPs in one minute.
+--------------------------------------------------------------------------
+"""
+def details(src_ip,dst_ip,date):
+
+    db = Configuration.db()
+    details_query = ("""
+            SELECT
+                tstart,srcip,dstip,sport,dport,proto,flags,
+                tos,ibyt,ipkt,input,output,rip,obyt,opkt
+            FROM {0}.flow_edge
+            WHERE
+                y={1} AND m={2} AND d={3} AND hh={4} AND mn={5}
+                AND ((srcip='{6}' AND dstip='{7}')
+                OR  (srcip='{7}' AND dstip='{6}'))
+            ORDER BY tstart
+            """).format(db,date.year,date.month,date.day,date.hour, \
+                        date.minute,src_ip,dst_ip)
+
+    return ImpalaEngine.execute_query_as_list(details_query)
+
+"""
+--------------------------------------------------------------------------
+Return list(dict) of all the connections related with a single IP.
+--------------------------------------------------------------------------
+"""
+def chord_details(ip,date):
+
+    db = Configuration.db()
+    chord_query =  ("""
+            SELECT
+                srcip,dstip,ibyt,ipkt
+            FROM {0}.flow_chords
+            WHERE  y={1} AND m={2} AND d={3} AND ip_threat='{4}'
+            """).format(db,date.year,date.month,date.day,ip)
+
+    return ImpalaEngine.execute_query_as_list(chord_query)
+
+"""
+--------------------------------------------------------------------------
+Return a list(dict) with all the data ingested during
+the timeframe provided.
+--------------------------------------------------------------------------
+"""
+def ingest_summary(start_date,end_date):
+
+    db = Configuration.db()
+    is_query = ("""
+            SELECT
+                tdate,total
+            FROM {0}.flow_ingest_summary
+            WHERE
+                ( y >= {1} AND y <= {2})
+                AND
+                ( m >= {3} AND m <= {4})
+            ORDER BY tdate
+            """).format(db,start_date.year,end_date.year, \
+                        start_date.month,end_date.month)
+
+    return ImpalaEngine.execute_query_as_list(is_query)
+
+"""
+--------------------------------------------------------------------------
+Return list(dict) of connecions that happened between 2 ip
+grouped by second.
+--------------------------------------------------------------------------
+"""
+def time_line(ip,date):
+
+    db = Configuration.db()
+    time_line_query = ("""
+        SELECT
+            ip_threat,tstart,tend,srcip,dstip,proto,
+		    sport,dport,ipkt,ibyt
+        FROM {0}.flow_timeline
+        WHERE
+            y={1} AND m={2} AND d={3}
+            AND ip_threat = '{4}'
+        """).format(db,date.year,date.month,date.day,ip)
+
+    return ImpalaEngine.execute_query_as_list(time_line_query)
+
+"""
+--------------------------------------------------------------------------
+Return json file with all the  geo localization information.
+--------------------------------------------------------------------------
+"""
+def sc_geo(ip,date):
+
+    app_path = Configuration.spot()
+    file_name = "globe-{0}.json".format(ip.replace(".","_"))
+    hdfs_path = "{0}/flow/oa/storyboard/{1}/{2}/{3}/{4}" \
+    .format(app_path,date.year,date.month,date.day,ip.replace(".","_"))
+
+    if HDFSClient.file_exists(hdfs_path,file_name):
+        return json.loads(HDFSClient.get_file("{0}/{1}" \
+        .format(hdfs_path,file_name)))
+    else:
+        return {}
+
+"""
+--------------------------------------------------------------------------
+Return a list(dict) with the ip threatn information captured by the
+security expert.
+--------------------------------------------------------------------------
+"""
+def story_board(date):
+
+    db = Configuration.db()
+    sb_query= ("""
+            SELECT
+                ip_threat,title,text
+            FROM
+                {0}.flow_storyboard
+            WHERE
+                y={1} AND m={2} AND d={3}
+            """).format(db,date.year,date.month,date.day)
+
+    results = ImpalaEngine.execute_query_as_list(sb_query)
+    for row in results:
+	       row["text"] = row["text"].replace("\n","\\n")
+    return results
+
+"""
+--------------------------------------------------------------------------
+Return a json file with the impact analysis information.
+--------------------------------------------------------------------------
+"""
+def impact_analysis(ip,date):
+
+    app_path = Configuration.spot()
+    file_name = "stats-{0}.json".format(ip.replace(".","_"))
+    hdfs_path = "{0}/flow/oa/storyboard/{1}/{2}/{3}/{4}" \
+    .format(app_path,date.year,date.month,date.day,ip.replace(".","_"))
+
+    if HDFSClient.file_exists(hdfs_path,file_name):
+        return json.loads(HDFSClient.get_file("{0}/{1}" \
+        .format(hdfs_path,file_name)))
+    else:
+        return {}
+
+"""
+--------------------------------------------------------------------------
+Return a list(dict) with all the inbound, outbound and twoway connections.
+--------------------------------------------------------------------------
+"""
+def incident_progression(ip,date):
+
+    app_path = Configuration.spot()
+    file_name = "threat-dendro-{0}.json".format(ip.replace(".","_"))
+
+    hdfs_path = "{0}/flow/oa/storyboard/{1}/{2}/{3}/{4}" \
+    .format(app_path,date.year,date.month,date.day,ip.replace(".","_"))
+
+    if HDFSClient.file_exists(hdfs_path,file_name):
+        return json.loads(HDFSClient.get_file("{0}/{1}" \
+        .format(hdfs_path,file_name)))
+    else:
+        return {}
+
+"""
+--------------------------------------------------------------------------
+Save scored connection into Threat investigation table.
+--------------------------------------------------------------------------
+"""
+def score_connection(score,date,src_ip=None,dst_ip=None,src_port=None,dst_port=None):
+
+    if not src_ip and not dst_ip and not src_port and not dst_port:
+        return False
+
+    db = Configuration.db()
+    # get connections to score
+    connections_query = ("""
+            SELECT
+                tstart,srcip,dstip,sport,dport, ibyt,ipkt
+            FROM {0}.flow_scores
+            WHERE
+                y = {1} AND m={2} AND d={3}
+            """).format(db,date.year,date.month,date.day)
+
+    connections_filter = ""
+    connections_filter += " AND srcip = '{0}'".format(src_ip) if src_ip else ""
+    connections_filter += " AND dstip = '{0}'".format(dst_ip) if dst_ip else ""
+
+    connections_filter += " AND sport = {0}" \
+    .format(str(src_port)) if src_port else ""
+
+    connections_filter += " AND dport = {0}" \
+    .format(str(dst_port)) if dst_port else ""
+
+    connections = ImpalaEngine.execute_query(connections_query + connections_filter)
+
+
+    # add score to connections
+    fb_data =  []
+    for row in connections:
+        # insert into flow_threat_investigation.
+        threat_data = (row[0],row[1],row[2],row[3],row[4],score)
+        fb_data.append([score,row[0],row[1],row[2],row[3],row[4],row[5],row[6]])
+
+        insert_command = ("""
+            INSERT INTO {0}.flow_threat_investigation
+            PARTITION (y={1},m={2},d={3})
+            VALUES {4}
+            """).format(db,date.year,date.month,date.day,threat_data)
+
+        ImpalaEngine.execute_query(insert_command)
+
+    # create feedback file.
+    app_path = Configuration.spot()
+    feedback_path = "{0}/flow/scored_results/{1}{2}{3}/feedback" \
+    .format(app_path,date.year,str(date.month).zfill(2),str(date.day).zfill(2))
+
+    append_file = True
+    if len(HDFSClient.list_dir(feedback_path)) == 0:
+        fb_data.insert(0,["sev","tstart","sip","dip","sport","dport","ipkt","ibyt"])
+        append_file = False
+
+    HDFSClient.put_file_csv(fb_data,feedback_path,"ml_feedback.csv",\
+    append_file=append_file)
+    return True
+
+"""
+--------------------------------------------------------------------------
+Save connections details to flow_storyboard table.
+--------------------------------------------------------------------------
+"""
+def save_comment(ip,title,text,date):
+
+    #Get current table info.
+    db = Configuration.db()
+    sb_query = ("""
+            SELECT
+                ip_threat,title,text
+            FROM
+                {0}.flow_storyboard
+            WHERE
+                y = {1} AND m= {2} AND d={3}
+            """).format(db,date.year,date.month,date.day)
+
+    sb_data = ImpalaEngine.execute_query_as_list(sb_query)
+
+    # find value if already exists.
+    saved = False
+    for item in sb_data:
+        if item["ip_threat"] == ip:
+            item["title"] = title
+            item["text"] = text
+            saved = True
+
+    if not saved:
+        sb_data.append({'text': text, 'ip_threat': str(ip), 'title': title})
+
+    #remove old file.
+    app_path = Configuration.spot()
+    old_file = "{0}/flow/hive/oa/storyboard/y={1}/m={2}/d={3}/" \
+    .format(app_path,date.year,date.month,date.day)
+
+    # remove file manually to allow the comments update.
+    HDFSClient.delete_folder(old_file,"impala")
+    ImpalaEngine.execute_query("invalidate metadata")
+
+    for item in sb_data:
+	insert_query = ("""
+         	INSERT INTO {0}.flow_storyboard PARTITION(y={1} , m={2} ,d={3})
+            	VALUES ( '{4}', '{5}','{6}')
+            	""") \
+                .format(db,date.year,date.month,date.day, \
+                item["ip_threat"],item["title"],item["text"])
+
+        ImpalaEngine.execute_query(insert_query)
+    return True
+
+"""
+--------------------------------------------------------------------------
+Get scored connections from threat investigation table.
+--------------------------------------------------------------------------
+"""
+def get_scored_connections(date):
+
+
+    db = Configuration.db()
+    scored_query = ("""
+            SELECT
+                tstart,srcip,dstip,srcport,dstport,score
+            FROM
+                {0}.flow_threat_investigation
+            WHERE
+                y={1} AND m={2} AND d={3}
+            """).format(db,date.year,date.month,date.day)
+
+    return ImpalaEngine.execute_query_as_list(scored_query)
+
+"""
+--------------------------------------------------------------------------
+Get expanded search data from raw data table.
+--------------------------------------------------------------------------
+"""
+def expanded_search(date,ip):
+
+    db = Configuration.db()
+    expanded_query = ("""
+		SELECT
+		    min(treceived) as firstseen, max(treceived) as lastseen,
+            sip as srcip, dip as dstip, sport as sport,
+            dport as dport, count(sip) as conns, max(ipkt) as maxpkts,
+		    avg(ipkt) as avgpkts, max(ibyt) as maxbyts, avg(ibyt) as avgbyts
+	    FROM
+		    {0}.flow
+        WHERE
+	        y={1} AND m={2} AND d={3}
+        AND (sip ='{4}'  OR dip='{4}')
+        GROUP BY
+		    sip, dip,sport,dport
+		""").format(db,date.year,date.month,date.day,ip)
+
+    return ImpalaEngine.execute_query_as_list(expanded_query)
+
+"""
+--------------------------------------------------------------------------
+Generates all the required data for StoryBoard.
+--------------------------------------------------------------------------
+"""
+def create_storyboard(expanded_search,date,ip,title,text,top_results=20):
+
+
+    cpath = "{0}/context/" \
+    .format(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+    iploc = "{0}/{1}".format(cpath,'iploc.csv')
+    nwloc = "{0}/{1}".format(cpath,'networkcontext_1.csv')
+
+    connections = get_in_out_and_twoway_conns(expanded_search,top_results)
+    inbound,outbound,twoway = add_network_context(nwloc,connections["inbound"] \
+    ,connections["outbound"],connections["twoway"])
+
+    inbound,outbound,twoway = add_geospatial_info(iploc,inbound,outbound,twoway)
+    create_impact_analysis(ip, inbound,outbound,twoway, "",date)
+    create_map_view(ip,inbound,outbound,twoway,date,iploc)
+    create_incident_progression(ip,inbound,outbound,twoway,date)
+    create_time_line(ip,inbound,outbound,twoway,date)
+    save_comment(ip,title,text,date)
+    return True
+
+"""
+--------------------------------------------------------------------------
+Calculate number of inbound only, two-way, and outbound only.
+Migrated from IPython NoteBooks.
+--------------------------------------------------------------------------
+"""
+def get_in_out_and_twoway_conns(expanded_search,top_results=20):
+
+    inbound = {}
+    outbound = {}
+    twoway = {}
+    srcdict = {}
+    dstdict = {}
+    conns_dict= {}
+    rowct = 0
+
+    for row in expanded_search:
+        srcdict[row['srcIp']] = {
+            'ip_int': struct.unpack("!L", socket.inet_aton(str(row['srcIp'])))[0],
+            'dst_ip': row['dstIp'],
+            'dst_ip_int': struct.unpack("!L", socket.inet_aton(str(row['dstIp'])))[0],
+            'conns': int(row['connections']),
+            'maxbytes': int(row['maxBytes'])
+        }
+        dstdict[row['dstIp']] = {
+            'ip_int': struct.unpack("!L", socket.inet_aton(str(row['dstIp'])))[0],
+            'src_ip': row['srcIp'],
+            'src_ip_int': struct.unpack("!L", socket.inet_aton(str(row['srcIp'])))[0],
+            'conns': int(row['connections']),
+            'maxbytes': int(row['maxBytes'])
+        }
+        rowct +=1
+
+    if rowct > 0:
+        for result in srcdict:
+            if result in dstdict:
+                twoway[result] = srcdict[result]
+            else:
+                outbound[result] = srcdict[result]
+
+        for result in dstdict:
+            if result not in srcdict:
+                inbound[result] = dstdict[result]
+
+    top_inbound_b = {}
+    top_outbound_b = {}
+    top_twoway_b = {}
+
+    if len(inbound) > 0:
+        top_inbound_b = get_top_bytes(inbound,top_results)
+        top_inbound_conns = get_top_conns(inbound,top_results)
+        top_inbound_b.update(top_inbound_conns) # merge the two dictionaries
+    if len(outbound) > 0:
+        top_outbound_b = get_top_bytes(outbound,top_results)
+        top_outbound_conns = get_top_conns(outbound,top_results)
+        top_outbound_b.update(top_outbound_conns) # merge the two dictionaries
+    if len(twoway) > 0:
+        top_twoway_b = get_top_bytes(twoway,top_results)
+        top_twoway_conns = get_top_conns(twoway,top_results)
+        top_twoway_b.update(top_twoway_conns) # merge the two dictionaries
+
+
+    result = {}
+    result["inbound"] = top_inbound_b
+    result["outbound"] = top_outbound_b
+    result["twoway"] = top_twoway_b
+
+    return result
+
+"""
+--------------------------------------------------------------------------
+Create incident progression file.
+Migrated from IPython NoteBooks.
+--------------------------------------------------------------------------
+"""
+def create_incident_progression(anchor, inbound, outbound, twoway, date):
+
+    dendro_fpath = 'threat-dendro-' + anchor.replace('.','_') + ".json"
+    obj = {
+        'name':anchor,
+        'children': [],
+        'time': ""
+    }
+
+    #----- Add Inbound Connections-------#
+    if len(inbound) > 0:
+        obj["children"].append({'name': 'Inbound Only', 'children': [], 'impact': 0})
+        in_ctxs = {}
+        for ip in inbound:
+            if 'nwloc' in inbound[ip] and len(inbound[ip]['nwloc']) > 0:
+                ctx = inbound[ip]['nwloc'][2]
+                if ctx not in in_ctxs:
+                    in_ctxs[ctx] = 1
+                else:
+                    in_ctxs[ctx] += 1
+        for ctx in in_ctxs:
+            obj["children"][0]['children'].append({
+                    'name': ctx,
+                    'impact': in_ctxs[ctx]
+                })
+
+    #------ Add Outbound ----------------#
+    if len(outbound) > 0:
+        obj["children"].append({'name':'Outbound Only','children':[],'impact':0})
+        out_ctxs = {}
+        for ip in outbound:
+            if 'nwloc' in outbound[ip] and len(outbound[ip]['nwloc']) > 0:
+                ctx = outbound[ip]['nwloc'][2]
+                if ctx not in out_ctxs:
+                    out_ctxs[ctx] = 1
+                else:
+                    out_ctxs[ctx] += 1
+        for ctx in out_ctxs:
+            obj["children"][1]['children'].append({
+                    'name': ctx,
+                    'impact': out_ctxs[ctx]
+                })
+
+    #------ Add TwoWay ----------------#
+    if len(twoway) > 0:
+        obj["children"].append({'name':'two way','children': [], 'impact': 0})
+        tw_ctxs = {}
+        for ip in twoway:
+            if 'nwloc' in twoway[ip] and len(twoway[ip]['nwloc']) > 0:
+                ctx = twoway[ip]['nwloc'][2]
+                if ctx not in tw_ctxs:
+                    tw_ctxs[ctx] = 1
+                else:
+                    tw_ctxs[ctx] += 1
+
+        for ctx in tw_ctxs:
+            obj["children"][2]['children'].append({
+                    'name': ctx,
+                    'impact': tw_ctxs[ctx]
+                })
+
+    app_path = Configuration.spot()
+    hdfs_path = "{0}/flow/oa/storyboard/{1}/{2}/{3}/{4}" \
+    .format(app_path,date.year,date.month,date.day,anchor.replace(".","_"))
+
+    if HDFSClient.put_file_json(obj,hdfs_path,dendro_fpath,overwrite_file=True):
+        return "Incident progression successfully created \n"
+    else:
+        return "Incident progression couldn't be created \n"
+
+"""
+--------------------------------------------------------------------------
+Create map view for StoryBoard.
+Migrated from IPython NoteBooks.
+--------------------------------------------------------------------------
+"""
+def create_map_view(ip, inbound, outbound, twoway,date,iploc):
+
+    iplist = ''
+    if os.path.isfile(iploc):
+        iplist = np.loadtxt(iploc,dtype=np.uint32,delimiter=',',usecols={0},\
+        converters={0: lambda s: np.uint32(s.replace('"',''))})
+    else:
+        print "No iploc.csv file was found, Map View map won't be created"
+
+    response = ""
+    if iplist != '':
+        globe_fpath = 'globe-' + ip.replace('.','_') + ".json"
+        globe_json = {}
+        globe_json['type'] = "FeatureCollection"
+        globe_json['sourceips'] = []
+        globe_json['destips'] = []
+        for srcip in twoway:
+            try:
+                row =  twoway[srcip]['geo']
+                globe_json['destips'].append({
+                        'type': 'Feature',
+                        'properties': {
+                            'location':row[8],
+                            'ip':srcip,
+                            'type':1
+                        },
+                        'geometry': {
+                            'type': 'Point',
+                            'coordinates': [float(row[7]), float(row[6])]
+                        }
+                    })
+            except ValueError:
+                pass
+        for dstip in outbound:
+            try:
+                row =  outbound[dstip]['geo']
+                dst_geo = outbound[dstip]['geo_dst']
+                globe_json['sourceips'].append({
+                        'type': 'Feature',
+                        'properties': {
+                            'location':row[8],
+                            'ip':dstip,
+                            'type':3
+                        },
+                        'geometry': {
+                            'type': 'Point',
+                            'coordinates': [float(row[7]), float(row[6])]
+                        }
+                    })
+                globe_json['destips'].append({
+                        'type': 'Feature',
+                        'properties': {
+                            'location':row[8],
+                            'ip':outbound[dstip]['dst_ip'],
+                            'type':3
+                        },
+                        'geometry': {
+                            'type': 'Point',
+                            'coordinates': [float(dst_geo[7]), float(dst_geo[6])]
+                        }
+                    })
+            except ValueError:
+                pass
+        for dstip in inbound:
+            try:
+                row =  inbound[dstip]['geo']
+                dst_geo = inbound[dstip]['geo_src']
+                globe_json['sourceips'].append({
+                        'type': 'Feature',
+                        'properties': {
+                            'location':row[8],
+                            'ip':dstip,
+                            'type':2
+                        },
+                        'geometry': {
+                            'type': 'Point',
+                            'coordinates': [float(row[7]), float(row[6])]
+                        }
+                    })
+                globe_json['destips'].append({
+                        'type': 'Feature',
+                        'properties': {
+                            'location':row[8],
+                            'ip':inbound[dstip]['src_ip'],
+                            'type':2
+                        },
+                        'geometry': {
+                            'type': 'Point',
+                            'coordinates': [float(dst_geo[7]), float(dst_geo[6])]
+                        }
+                    })
+            except ValueError:
+                pass
+        json_str = json.dumps(globe_json)
+        app_path = Configuration.spot()
+        hdfs_path = "{0}/flow/oa/storyboard/{1}/{2}/{3}/{4}" \
+        .format(app_path,date.year,date.month,date.day,ip.replace(".","_"))
+
+        if HDFSClient.put_file_json(globe_json,hdfs_path,stats_fpath,overwrite_file=True) :
+            response = "Geolocation map successfully created \n"
+        else:
+            response = "The map can't be created without an iploc file \n"
+
+    return response
+
+"""
+--------------------------------------------------------------------------
+Create timeline for storyboard.
+Migrated from IPython NoteBooks.
+--------------------------------------------------------------------------
+"""
+def create_time_line(anchor,inbound, outbound, twoway,date):
+
+    top_keys = []
+    if len(twoway) > 0: top_keys.extend(twoway.keys())
+    if len(outbound) > 0: top_keys.extend(outbound.keys())
+    if len(inbound) > 0: top_keys.extend(inbound.keys())
+
+
+    db = Configuration.db()
+
+    imp_query =("""
+        INSERT INTO TABLE {0}.flow_timeline PARTITION (y={4}, m={5},d={6})
+        SELECT
+            '{7}' ,min(treceived) as tstart, max(treceived) as tend,
+            sip as srcIP,dip as dstip, proto as proto, sport as sport,
+            dport AS dport, ipkt as ipkt, ibyt as ibyt
+        FROM
+            {0}.flow
+        WHERE y={4} AND m={5} AND d={6}
+        AND ((dip IN({1}) AND sip ='{2}') OR (sip IN({1}) AND dip ='{2}'))
+        GROUP BY sip, dip, proto, sport, dport, ipkt, ibyt
+        ORDER BY tstart
+        LIMIT {3}
+    """)
+
+    ips = "'" + "','".join(top_keys) + "'"
+    imp_query = imp_query.format(db,ips,anchor,1000,date.year,date.month, date.day,anchor)
+
+    if ImpalaEngine.execute_query(imp_query):
+        return "Timeline successfully created \n"
+    else:
+        return "Timeline couldn't be created \n"
+
+"""
+--------------------------------------------------------------------------
+Create Impact Analysis for StoryBoard.
+Migrated from IPython NoteBooks.
+--------------------------------------------------------------------------
+"""
+def create_impact_analysis(anchor, inbound, outbound, twoway, threat_name,date):
+
+    stats_fpath = 'stats-' + anchor.replace('.','_') + ".json"
+
+    obj = {
+        'name':threat_name,
+        'children': [],
+        'size': len(inbound) + len(outbound) + len(twoway)
+    }
+
+    #----- Add Inbound Connections-------#
+    obj["children"].append({'name': 'Inbound Only', 'children': [], 'size': len(inbound)})
+    in_ctxs = {}
+    for ip in inbound:
+        full_ctx = ''
+        if 'nwloc' in inbound[ip] and len(inbound[ip]['nwloc']) > 0:
+            full_ctx = inbound[ip]['nwloc'][2].split('.')[0]
+        ctx = get_ctx_name(full_ctx)
+        if ctx not in in_ctxs:
+            in_ctxs[ctx] = 1
+        else:
+            in_ctxs[ctx] += 1
+    for ctx in in_ctxs:
+        obj["children"][0]['children'].append({
+                'name': ctx,
+                'size': in_ctxs[ctx]
+            })
+
+
+    #------ Add Outbound ----------------#
+    obj["children"].append({'name':'Outbound Only','children':[],'size':len(outbound)})
+    out_ctxs = {}
+    for ip in outbound:
+        full_ctx = ''
+        if 'nwloc' in outbound[ip] and len(outbound[ip]['nwloc']) > 0:
+            full_ctx = outbound[ip]['nwloc'][2].split('.')[0]
+        ctx = get_ctx_name(full_ctx)
+        if ctx not in out_ctxs:
+            out_ctxs[ctx] = 1
+        else:
+            out_ctxs[ctx] += 1
+    for ctx in out_ctxs:
+        obj["children"][1]['children'].append({
+                'name': ctx,
+                'size': out_ctxs[ctx]
+            })
+
+    #------ Add Twoway ----------------#
+    obj["children"].append({'name': 'two way', 'children': [], 'size': len(twoway)})
+    tw_ctxs = {}
+    for ip in twoway:
+        full_ctx = ''
+        if 'nwloc' in twoway[ip] and len(twoway[ip]['nwloc']) > 0:
+            full_ctx = twoway[ip]['nwloc'][2].split('.')[0]
+        ctx = get_ctx_name(full_ctx)
+        if ctx not in tw_ctxs:
+            tw_ctxs[ctx] = 1
+        else:
+            tw_ctxs[ctx] += 1
+
+    for ctx in tw_ctxs:
+        obj["children"][2]['children'].append({
+                'name': ctx,
+                'size': tw_ctxs[ctx]
+            })
+
+    app_path = Configuration.spot()
+    hdfs_path = "{0}/flow/oa/storyboard/{1}/{2}/{3}/{4}" \
+    .format(app_path,date.year,date.month,date.day,anchor.replace(".","_"))
+
+    data = json.dumps(obj)
+    if HDFSClient.put_file_json(obj,hdfs_path,stats_fpath,overwrite_file=True):
+        return "Stats file successfully created \n"
+    else:
+        return "Stats file couldn't be created \n"
+
+"""
+--------------------------------------------------------------------------
+Get topbytess list.
+Migrated from IPython NoteBooks.
+--------------------------------------------------------------------------
+"""
+def get_top_bytes(conns_dict, top):
+    topbytes = sorted(conns_dict.iteritems(),key=lambda (x,y): y['maxbytes'],reverse=True)
+    topbytes = topbytes[0:top]
+    return dict(topbytes)
+
+"""
+--------------------------------------------------------------------------
+Get top connections.
+Migrated from IPython NoteBooks.
+--------------------------------------------------------------------------
+"""
+def get_top_conns(conns_dict, top):
+    topconns = sorted(conns_dict.iteritems(), key=lambda (x,y): y['conns'], reverse=True)
+    topconns = topconns[0:top]
+    return dict(topconns)
+
+"""
+--------------------------------------------------------------------------
+Get network context - get start and end ranges.
+Migrated from IPython NoteBooks.
+--------------------------------------------------------------------------
+"""
+def add_network_context(nwloc,inbound,outbound,twoway):
+    nwdict = {}
+    if os.path.isfile(nwloc) :
+        with open(nwloc, 'r') as f:
+            reader = csv.reader(f,delimiter=',')
+            reader.next()
+            #address range, description
+            for row in reader:
+
+                if '/' in row[0]:
+                    #Range in subnet
+                    iprange = row[0].split('/')
+                    if len(iprange) < 2:
+                        ipend = 0
+                    else:
+                        ipend = int(iprange[1])
+                    nwdict[row[0]] = [struct.unpack("!L", \
+                    socket.inet_aton(iprange[0]))[0],\
+                    struct.unpack("!L",socket.inet_aton(iprange[0]))[0]+2**(32-ipend)-1, row[1]]
+                elif '-' in row[0]:
+                    #IP Range
+                    iprange = row[0].split('-')
+                    nwdict[row[0]] = [struct.unpack("!L",\
+                    socket.inet_aton(iprange[0].replace(" ", "")))[0],\
+                    struct.unpack("!L", socket.inet_aton(iprange[1].replace(" ", "")))[0], row[1]]
+                else:
+                    #Exact match
+                    nwdict[row[0]] = [struct.unpack("!L",\
+                    socket.inet_aton(row[0]))[0],struct.unpack("!L",\
+                    socket.inet_aton(row[0]))[0], row[1]]
+
+        for srcip in outbound:
+            temp_ip = struct.unpack("!L", socket.inet_aton(srcip))[0]
+            if srcip in nwdict:
+                inbound[srcip]['nwloc'] = nwdict[srcip]
+            else:
+                matchingVals = [x for x in nwdict if nwdict[x][1] >= temp_ip and nwdict[x][0] <= temp_ip]
+                outbound[srcip]['nwloc'] = nwdict[matchingVals[0]] if len(matchingVals) > 0 else ''
+
+        for dstip in twoway:
+            temp_ip = struct.unpack("!L", socket.inet_aton(dstip))[0]
+            if dstip in nwdict:
+                twoway[dstip]['nwloc'] = nwdict[dstip]
+            else:
+                matchingVals = [x for x in nwdict if nwdict[x][1] >= temp_ip and nwdict[x][0] <= temp_ip]
+                twoway[dstip]['nwloc'] = nwdict[matchingVals[0]] if len(matchingVals) > 0 else ''
+
+        for srcip in inbound:
+            temp_ip = struct.unpack("!L", socket.inet_aton(srcip))[0]
+            if srcip in nwdict:
+                inbound[srcip]['nwloc'] = nwdict[srcip]
+            else:
+                matchingVals = [x for x in nwdict if nwdict[x][1] >= temp_ip and nwdict[x][0] <= temp_ip]
+                inbound[srcip]['nwloc'] = nwdict[matchingVals[0]] if len(matchingVals) > 0 else ''
+
+    return inbound,outbound,twoway
+
+
+"""
+--------------------------------------------------------------------------
+Add Geo spatial info
+Migrated from IPython NoteBooks.
+--------------------------------------------------------------------------
+"""
+def add_geospatial_info(iploc,inbound,outbound,twoway):
+    iplist = ''
+    if os.path.isfile(iploc):
+        iplist = np.loadtxt(iploc,dtype=np.uint32,delimiter=',',usecols={0},\
+        converters={0: lambda s: np.uint32(s.replace('"',''))})
+    else:
+        print "No iploc.csv file was found, Map View map won't be created"
+
+
+    # get geospatial info, only when iplocation file is available
+    if iplist != '':
+        for srcip in outbound:
+            reader = csv.reader([linecache.getline(\
+            iploc, bisect.bisect(iplist,outbound[srcip]['ip_int'])).replace('\n','')])
+
+            outbound[srcip]['geo'] = reader.next()
+            reader = csv.reader([linecache.getline(\
+            iploc, bisect.bisect(iplist,outbound[srcip]['dst_ip_int'])).replace('\n','')])
+            outbound[srcip]['geo_dst'] = reader.next()
+
+        for dstip in twoway:
+            reader = csv.reader([linecache.getline(\
+            iploc,bisect.bisect(iplist,twoway[dstip]['ip_int'])).replace('\n','')])
+            twoway[dstip]['geo'] = reader.next()
+
+        for srcip in inbound:
+            reader = csv.reader([linecache.getline(\
+            iploc, bisect.bisect(iplist,inbound[srcip]['ip_int'])).replace('\n','')])
+
+            inbound[srcip]['geo'] = reader.next()
+            reader = csv.reader([linecache.getline(\
+            iploc, bisect.bisect(iplist,inbound[srcip]['src_ip_int'])).replace('\n','')])
+            inbound[srcip]['geo_src'] = reader.next()
+
+    return inbound,outbound,twoway
+
+"""
+--------------------------------------------------------------------------
+Get context name.
+Migrated from IPython NoteBooks.
+--------------------------------------------------------------------------
+"""
+def get_ctx_name(full_context):
+    ctx= 'DMZ'
+    if "VPN" in full_context:
+        ctx = "VPN"
+    elif "DMZ" in full_context:
+        ctx = "DMZ"
+    elif "Proxy" in full_context:
+        ctx = "Proxy"
+    elif "FW" in full_context:
+        ctx = "FW"
+    return ctx

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/5b75b418/spot-oa/api/resources/hdfs_client.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/hdfs_client.py b/spot-oa/api/resources/hdfs_client.py
new file mode 100644
index 0000000..d47d16c
--- /dev/null
+++ b/spot-oa/api/resources/hdfs_client.py
@@ -0,0 +1,50 @@
+
+from hdfs import InsecureClient
+from json import dump
+import api.resources.configurator as Config
+
+
+def _get_client(user=None):
+    hdfs_nm,hdfs_port,hdfs_user = Config.hdfs()
+    client = InsecureClient('http://{0}:{1}'.format(hdfs_nm,hdfs_port), user= user if user else hdfs_user)
+    print hdfs_nm,hdfs_port,hdfs_user
+    return client
+
+def get_file(hdfs_file):
+    client = _get_client()
+    with client.read(hdfs_file) as reader:
+        results = reader.read()
+        return results
+
+def put_file_csv(hdfs_file_content,hdfs_path,hdfs_file_name,append_file=False,overwrite_file=False):
+    client = _get_client()
+    hdfs_full_name = "{0}/{1}".format(hdfs_path,hdfs_file_name)
+    with client.write(hdfs_full_name,append=append_file,overwrite=overwrite_file) as writer:
+        for item in hdfs_file_content:
+            data = ','.join(str(d) for d in item)
+            writer.write("{0}\n".format(data))
+
+    return True
+
+def put_file_json(hdfs_file_content,hdfs_path,hdfs_file_name,append_file=False,overwrite_file=False):
+    client = _get_client()
+    hdfs_full_name = "{0}/{1}".format(hdfs_path,hdfs_file_name)
+    with client.write(hdfs_full_name,append=append_file,overwrite=overwrite_file,encoding='utf-8') as writer:
+	    dump(hdfs_file_content, writer)
+
+    return True
+
+def delete_folder(hdfs_file,user=None):
+    client = _get_client(user)
+    client.delete(hdfs_file,recursive=True)
+
+def list_dir(hdfs_path):
+    client = _get_client()
+    return client.list(hdfs_path)
+
+def file_exists(hdfs_path,file_name):
+    files = list_dir(hdfs_path)
+    if str(file_name) in files:
+	    return True
+    else:
+        return False

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/5b75b418/spot-oa/api/resources/impala_engine.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/impala_engine.py b/spot-oa/api/resources/impala_engine.py
new file mode 100644
index 0000000..d6b4c55
--- /dev/null
+++ b/spot-oa/api/resources/impala_engine.py
@@ -0,0 +1,34 @@
+from impala.dbapi import connect
+import api.resources.configurator as Config
+
+def create_connection():
+
+    impala_host, impala_port =  Config.impala()
+    db = Config.db()
+    conn = connect(host=impala_host, port=int(impala_port),database=db)
+    return conn.cursor()
+
+def execute_query(query,fetch=False):
+
+    impala_cursor = create_connection()
+    impala_cursor.execute(query)
+
+    return impala_cursor if not fetch else impala_cursor.fetchall()
+
+def execute_query_as_list(query):
+
+    query_results = execute_query(query)
+    row_result = {}
+    results = []
+
+    for row in query_results:
+        x=0
+        for header in query_results.description:
+            row_result[header[0]] = row[x]
+            x +=1
+        results.append(row_result)
+        row_result = {}
+
+    return results
+
+

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/5b75b418/spot-oa/api/resources/proxy.py
----------------------------------------------------------------------
diff --git a/spot-oa/api/resources/proxy.py b/spot-oa/api/resources/proxy.py
new file mode 100644
index 0000000..a2a8e46
--- /dev/null
+++ b/spot-oa/api/resources/proxy.py
@@ -0,0 +1,383 @@
+import md5
+import api.resources.impala_engine as ImpalaEngine
+import api.resources.hdfs_client as HDFSClient
+import api.resources.configurator as Configuration
+from collections import defaultdict
+import json
+import os
+
+"""
+--------------------------------------------------------------------------
+Return list(dict) of all the connectios related to a request name in one hour
+--------------------------------------------------------------------------
+"""
+def suspicious_requests(date,uri=None,ip=None,limit=250):
+
+    db = Configuration.db()
+    proxy_query = ("""
+	SELECT
+	    ps.tdate,ps.time,ps.clientip,ps.host,ps.reqmethod,ps.useragent,
+        ps.resconttype,ps.duration,ps.username,ps.webcat,ps.referer,
+        ps.respcode,ps.uriport,ps.uripath,ps.uriquery,ps.serverip,ps.scbytes,
+        ps.csbytes,ps.fulluri,ps.ml_score,ps.uri_rep,ps.respcode_name,
+        ps.network_context
+	FROM
+	    {0}.proxy_scores ps
+	LEFT JOIN
+	    {0}.proxy_threat_investigation pt
+	    ON (ps.fulluri = pt.fulluri)
+	WHERE
+	    ps.y={1} AND ps.m={2} AND ps.d={3}
+	    AND (pt.fulluri is NULL)
+    """).format(db,date.year,date.month,date.day)
+
+
+    p_filter = ""
+    p_filter += " AND ps.fulluri LIKE '%{0}%'".format(uri) if uri else ""
+    p_filter += " AND ps.clientip = '{0}'".format(ip) if ip else ""
+    p_filter += " ORDER BY ps.ml_score limit {0}".format(limit)
+    proxy_query = proxy_query + p_filter
+    return ImpalaEngine.execute_query_as_list(proxy_query)
+
+"""
+--------------------------------------------------------------------------
+Return list(dict) of all the connectios details for one request.
+--------------------------------------------------------------------------
+"""
+def details(date,uri,ip):
+
+    if not uri and not ip:
+	return None
+
+    db = Configuration.db()
+    p_details = ("""
+		SELECT
+		    tdate,time,clientIp,host,webcat,respcode,respcode_name
+		    ,reqmethod,useragent,resconttype,referer,uriport,serverip
+		    ,scbytes,csbytes,fulluri,hh
+		FROM
+		    {0}.proxy_edge
+		WHERE
+		    y={1} AND m={2} AND d={3}
+		""").format(db,date.year,date.month,date.day)
+    return ImpalaEngine.execute_query_as_list(p_details)
+
+"""
+--------------------------------------------------------------------------
+Score a request
+--------------------------------------------------------------------------
+"""
+def score_request(date,score,uri):
+
+    if not score and not uri:
+	return None
+
+    db = Configuration.db()
+    p_query = ("""
+		SELECT
+		    tdate,time,clientip,host,reqmethod,useragent,resconttype
+		    ,duration,username,webcat,referer,respcode,uriport
+		    ,uripath,uriquery,serverip,scbytes,csbytes,fulluri
+		    ,word,ml_score,uri_rep,respcode_name,network_context
+		FROM
+		    {0}.proxy_scores
+		WHERE
+		    y={1} and m={2} and d={3}
+		    AND fulluri = '{4}'
+		""").format(db,date.year,date.month,date.day,uri)
+
+    connections = ImpalaEngine.execute_query(p_query)
+
+    # add score to connections
+    fb_data =  []
+
+    for row in connections:
+        cip_index = row[2]
+        uri_index = row[18]
+        tme_index = row[2]
+        hash_field = [str( md5.new(str(cip_index) + str(uri_index)).hexdigest() \
+        + str((tme_index.split(":"))[0]) )]
+
+        threat_data = (row[0],row[18],score)
+        fb_data.append([row[0],row[1],row[2],row[3],row[4],row[5],row[6],row[7] \
+			,row[8],row[9],row[10],row[11],row[12],row[13],row[14],row[15] \
+			,row[16],row[17],row[18],row[19],score,row[20],row[21],row[22], \
+			row[23],hash_field])
+
+	insert_command = ("""
+		INSERT INTO {0}.proxy_threat_investigation PARTITION (y={1},m={2},d={3})
+		VALUES {4}
+		""").format(db,date.year,date.month,date.day,threat_data)
+	ImpalaEngine.execute_query(insert_command)
+
+    # create feedback file.
+    app_path = Configuration.spot()
+    feedback_path = "{0}/proxy/scored_results/{1}{2}{3}/feedback"\
+    .format(app_path,date.year,str(date.month).zfill(2),str(date.day).zfill(2))
+
+    ap_file = True
+    if len(HDFSClient.list_dir(feedback_path)) == 0:
+    	fb_data.insert(0,["p_date","p_time","clientip","host","reqmethod",\
+        "useragent","resconttype","duration","username","webcat","referer",\
+        "respcode","uriport","uripath","uriquery","serverip","scbytes","csbytes",\
+        "fulluri","word","score","uri_rep","uri_sev","respcode_name",\
+        "network_context","hash"])
+        ap_file = False
+
+    HDFSClient.put_file_csv(fb_data,feedback_path,"ml_feedback.csv",append_file=ap_file)
+    return True
+
+"""
+--------------------------------------------------------------------------
+Get expanded search from raw table.
+--------------------------------------------------------------------------
+"""
+def expanded_search(date,uri):
+
+    db = Configuration.db()
+    expanded_query = ("""
+			SELECT p_date, p_time, clientip, username, duration, fulluri,\
+			    webcat, respcode, reqmethod,useragent, resconttype,\
+			    referer, uriport, serverip, scbytes, csbytes
+			FROM {0}.proxy
+			WHERE y='{1}' AND m='{2}' AND d='{3}'
+			AND (fulluri='{4}' OR referer ='{4}')
+			ORDER BY p_time
+			""")\
+            .format(db,date.year,str(date.month).zfill(2),str(date.day).zfill(2),uri)
+    return ImpalaEngine.execute_query_as_list(expanded_query)
+
+"""
+--------------------------------------------------------------------------
+Get scored request from threat investigation.
+--------------------------------------------------------------------------
+"""
+def get_scored_requests(date):
+
+    db = Configuration.db()
+    sc_query =  ("""
+                SELECT
+                    tdate,fulluri,uri_sev
+                FROM
+                    {0}.proxy_threat_investigation
+                WHERE
+                    y={1} AND m={2} AND d={3}
+                """).format(db,date.year,date.month,date.day)
+
+    return ImpalaEngine.execute_query_as_list(sc_query)
+
+"""
+--------------------------------------------------------------------------
+Create storyboard.
+Migrated from IPython Notebooks
+--------------------------------------------------------------------------
+"""
+def create_storyboard(uri,date,title,text,expanded_search,top_results):
+
+
+    clientips  = defaultdict(int)
+    reqmethods = defaultdict(int)
+    rescontype = defaultdict(int)
+    referers   = defaultdict(int)
+    refered    = defaultdict(int)
+    requests   = defaultdict(int)
+
+    for row in expanded_search:
+        clientips[row['clientIp']]+=1
+        reqmethods[row['requestMethod']]+=1
+        rescontype[row['responseContentType']]+=1
+
+        if row['uri'] != uri:
+           #Source URI's that refered the user to the threat
+           referers[row[10]]+=1
+           if({'clientip':row['clientIp'],'referer':row['referer'],\
+           'reqmethod':row['requestMethod'],\
+           'resconttype':row['responseContentType']}) not in requests:
+               requests.append({'clientip':row['clientIp'],\
+               'referer':row['referer'],'reqmethod':row['requestMethod'],\
+               'resconttype':row['responseContentType']})
+        else:
+            #Destination URI's refered by the threat
+            refered[row['uri']]+=1
+
+    create_incident_progression(uri,requests,referers,date)
+    create_timeline(uri,clientips,date,top_results)
+    save_comments(uri,title,text,date)
+
+    return True
+
+"""
+--------------------------------------------------------------------------
+Create timeline for storyboard
+--------------------------------------------------------------------------
+"""
+def create_timeline(anchor,clientips,date,top_results):
+
+    response = ""
+    susp_ips = []
+
+
+    if clientips:
+	srtlist = sorted(list(clientips.items()), key=lambda x: x[1], reverse=True)
+        for val in srtlist[:top_results]:
+            susp_ips.append(val[0])
+
+    if anchor != "":
+        db = Configuration.db()
+        imp_query = ("""
+                        INSERT INTO TABLE {0}.proxy_timeline
+                        PARTITION (y={2}, m={3},d={4})
+                        SELECT
+                            '{7}' as p_threat,  concat(cast(p_date as string),
+                            ' ', cast(MIN(p_time) as string)) AS tstart,
+                            concat(cast(p_date as string), ' ',
+                            cast(MAX(p_time) as string)) AS tend,
+                            SUM(duration) AS duration,
+                            clientip, respcode,"respCodeName" as respCodeName
+                        FROM {0}.proxy
+                        WHERE fulluri='{1}' AND clientip IN ({5})
+                        AND y='{2}' AND m='{3}' AND d='{4}'
+                        GROUP BY clientip, p_time, respcode, p_date
+                        LIMIT {6}
+                    """)\
+                    .format(db,anchor,date.year,str(date.month).zfill(2),\
+                    str(date.day).zfill(2),("'" + "','".join(susp_ips) + "'")\
+                    ,top_results,anchor)
+
+        app_path = Configuration.spot()
+        old_file = "{0}/proxy/hive/oa/timeline/y={1}/m={2}/d={3}"\
+        .format(app_path,date.year,date.month,date.day)
+
+        HDFSClient.delete_folder(old_file,"impala")
+        ImpalaEngine.execute_query("invalidate metadata")
+        ImpalaEngine.execute_query(imp_query)
+        response = "Timeline successfully saved"
+    else:
+        response = "Timeline couldn't be created"
+
+"""
+--------------------------------------------------------------------------
+Create inciden progression for storyboard.
+--------------------------------------------------------------------------
+"""
+def create_incident_progression(anchor,requests,referers,date):
+
+    hash_name = md5.new(str(anchor)).hexdigest()
+    file_name = "incident-progression-{0}.json".format(hash_name)
+    app_path = Configuration.spot()
+    hdfs_path = "{0}/proxy/oa/storyboard/{1}/{2}/{3}"\
+    .format(app_path,date.year,date.month,date.day)
+
+    data = {'fulluri':anchor, 'requests':requests,'referer_for':referers.keys()}
+    if HDFSClient.put_file_json(data,hdfs_path,file_name,overwrite_file=True) :
+        response = "Incident progression successfuly created"
+    else:
+        return False
+
+"""
+--------------------------------------------------------------------------
+Save comments for storyboard.
+--------------------------------------------------------------------------
+"""
+def save_comments(uri,title,text,date):
+
+    db = Configuration.db()
+    sb_query = ("""
+            SELECT
+               p_threat,title,text
+            FROM
+                {0}.proxy_storyboard
+            WHERE
+                y = {1} AND m= {2} AND d={3}
+            """).format(db,date.year,date.month,date.day)
+    sb_data = ImpalaEngine.execute_query_as_list(sb_query)
+
+
+    # find value if already exists.
+    saved = False
+    for item in sb_data:
+        if item["p_threat"] == uri:
+            item["title"] = title
+            item["text"] = text
+            saved = True
+
+    if not saved:
+        sb_data.append({'text': text, 'p_threat': str(uri), 'title': title})
+
+    #remove old file.
+    app_path = Configuration.spot()
+    old_file = "{0}/proxy/hive/oa/storyboard/y={1}/m={2}/d={3}/"\
+    .format(app_path,date.year,date.month,date.day)
+    HDFSClient.delete_folder(old_file,"impala")
+    ImpalaEngine.execute_query("invalidate metadata")
+
+    for item in sb_data:
+	insert_query = ("""
+                INSERT INTO {0}.proxy_storyboard PARTITION(y={1} , m={2} ,d={3})
+                VALUES ( '{4}', '{5}', '{6}')
+                """)\
+                .format(db,date.year,date.month,date.day,\
+                item["p_threat"],item["title"],item["text"])
+
+	ImpalaEngine.execute_query(insert_query)
+
+"""
+--------------------------------------------------------------------------
+Get storyboard comments.
+--------------------------------------------------------------------------
+"""
+def story_board(date):
+
+    db = Configuration.db()
+    sb_query= ("""
+            SELECT
+                p_threat,title,text
+            FROM
+                {0}.proxy_storyboard
+            WHERE
+                y={1} AND m={2} AND d={3}
+            """).format(db,date.year,date.month,date.day)
+
+    results = ImpalaEngine.execute_query_as_list(sb_query)
+    for row in results:
+        row["text"] = row["text"].replace("\n","\\n")
+    return results
+
+"""
+--------------------------------------------------------------------------
+Get timeline for storyboard.
+--------------------------------------------------------------------------
+"""
+def time_line(date,uri):
+
+    db = Configuration.db()
+    time_line_query = ("""
+            SELECT
+		p_threat,tstart,tend,duration,clientip,respcode,respcodename
+            FROM {0}.proxy_timeline
+            WHERE
+                y={1} AND m={2} AND d={3}
+                AND p_threat = '{4}'
+            """).format(db,date.year,date.month,date.day,uri)
+
+    return ImpalaEngine.execute_query_as_list(time_line_query)
+
+"""
+--------------------------------------------------------------------------
+Get incident progression for storyboard.
+--------------------------------------------------------------------------
+"""
+def incident_progression(date,uri):
+
+    app_path = Configuration.spot()
+    hdfs_path = "{0}/proxy/oa/storyboard/{1}/{2}/{3}".format(app_path,\
+        date.year,date.month,date.day)
+
+    hash_name = md5.new(str(uri)).hexdigest()
+    file_name = "incident-progression-{0}.json".format(hash_name)
+
+    if HDFSClient.file_exists(hdfs_path,file_name):
+        return json.loads(HDFSClient.get_file("{0}/{1}"\
+        .format(hdfs_path,file_name)))
+    else:
+        return {}


[35/50] [abbrv] incubator-spot git commit: Fixed issue with iana codes resolution

Posted by ev...@apache.org.
Fixed issue with iana codes resolution


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/7693ad10
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/7693ad10
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/7693ad10

Branch: refs/heads/SPOT-35_graphql_api
Commit: 7693ad10ca23ad3c561139aaf1e1ee9ad2cb92a5
Parents: 4734f4f
Author: LedaLima <le...@apache.org>
Authored: Tue Mar 7 12:05:03 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:48 2017 -0700

----------------------------------------------------------------------
 spot-oa/oa/components/iana/iana_transform.py | 8 ++++----
 spot-oa/oa/components/reputation/fb/fb.py    | 2 +-
 spot-oa/oa/dns/dns_oa.py                     | 9 +++++----
 spot-oa/oa/proxy/proxy_oa.py                 | 2 +-
 4 files changed, 11 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/7693ad10/spot-oa/oa/components/iana/iana_transform.py
----------------------------------------------------------------------
diff --git a/spot-oa/oa/components/iana/iana_transform.py b/spot-oa/oa/components/iana/iana_transform.py
index 97a1c28..34730bd 100644
--- a/spot-oa/oa/components/iana/iana_transform.py
+++ b/spot-oa/oa/components/iana/iana_transform.py
@@ -82,20 +82,20 @@ class IanaTransform(object):
             if key in self._qclass_dict:
                 return self._qclass_dict[key]
             else:
-                return key
+                return ""
         if column == COL_QTYPE:
             if key in self._qtype_dict:
                 return self._qtype_dict[key]
             else:
-                return key
+                return ""
         if column == COL_RCODE:
             if key in self._rcode_dict:
                 return self._rcode_dict[key]
             else:
-                return key
+                return ""
         if column == COL_PRESP: 
             if key in self._http_rcode_dict:
                 return self._http_rcode_dict[key]
             else:
-                return key
+                return ""
 

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/7693ad10/spot-oa/oa/components/reputation/fb/fb.py
----------------------------------------------------------------------
diff --git a/spot-oa/oa/components/reputation/fb/fb.py b/spot-oa/oa/components/reputation/fb/fb.py
index e9d997c..b33ec7d 100644
--- a/spot-oa/oa/components/reputation/fb/fb.py
+++ b/spot-oa/oa/components/reputation/fb/fb.py
@@ -92,7 +92,7 @@ class Reputation(object):
             str_response = urllib2.urlopen(request).read()
             response = json.loads(str_response)
         except urllib2.HTTPError as e:
-            self._logger.error("Error calling ThreatExchange in module fb: " + e.message)
+            self._logger.info("Error calling ThreatExchange in module fb: " + e.message)
             reputation_dict[name] = self._get_reputation_label('UNKNOWN')
             return reputation_dict
 

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/7693ad10/spot-oa/oa/dns/dns_oa.py
----------------------------------------------------------------------
diff --git a/spot-oa/oa/dns/dns_oa.py b/spot-oa/oa/dns/dns_oa.py
index 1117583..a154c35 100644
--- a/spot-oa/oa/dns/dns_oa.py
+++ b/spot-oa/oa/dns/dns_oa.py
@@ -180,6 +180,7 @@ class OA(object):
 
         dns_scores_final = self._move_time_stamp(self._dns_scores)
         self._dns_scores = dns_scores_final
+    
         for row in dns_scores_final:
             value_string += str(tuple(Util.cast_val(item) for item in row)) + ","              
     
@@ -243,16 +244,15 @@ class OA(object):
 
 
     def _add_iana(self):
-
         iana_conf_file = "{0}/components/iana/iana_config.json".format(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-        if os.path.isfile(iana_conf_file):
+        if os.path.isfile(iana_conf_file):            
             iana_config  = json.loads(open(iana_conf_file).read())
             dns_iana = IanaTransform(iana_config["IANA"])
 
             dns_qry_class_index = self._conf["dns_results_fields"]["dns_qry_class"]
             dns_qry_type_index = self._conf["dns_results_fields"]["dns_qry_type"]
             dns_qry_rcode_index = self._conf["dns_results_fields"]["dns_qry_rcode"]            
-            self._dns_scores = [ conn + [ str(dns_iana.get_name(conn[dns_qry_class_index],"dns_qry_class"))] + [str(dns_iana.get_name(conn[dns_qry_type_index],"dns_qry_type"))] + [str(dns_iana.get_name(conn[dns_qry_rcode_index],"dns_qry_rcode"))] for conn in self._dns_scores ]
+            self._dns_scores = [ conn + [ dns_iana.get_name(conn[dns_qry_class_index],"dns_qry_class")] + [dns_iana.get_name(conn[dns_qry_type_index],"dns_qry_type")] + [dns_iana.get_name(conn[dns_qry_rcode_index],"dns_qry_rcode")] for conn in self._dns_scores ]
             
         else:            
             self._dns_scores = [ conn + ["","",""] for conn in self._dns_scores ] 
@@ -307,6 +307,7 @@ class OA(object):
             dns_qry_name = conn[self._conf["dns_score_fields"]["dns_qry_name"]]
             self._get_dns_details(dns_qry_name,yr,mn,dy,hh,dns_iana)
 
+
     def _get_dns_details(self,dns_qry_name,year,month,day,hh,dns_iana):
         value_string = ""
         query_to_load =("""
@@ -317,7 +318,7 @@ class OA(object):
         try: 
              dns_details = impala.execute_query(query_to_load) 
         except:
-            self._logger.error("ERROR. Details couldn't be retreived for {0}, skipping this step".format(dns_qry_name))
+            self._logger.info("WARNING. Details couldn't be retreived for {0}, skipping this step".format(dns_qry_name))
         else:
         # add IANA to results. 
             update_rows = []

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/7693ad10/spot-oa/oa/proxy/proxy_oa.py
----------------------------------------------------------------------
diff --git a/spot-oa/oa/proxy/proxy_oa.py b/spot-oa/oa/proxy/proxy_oa.py
index ea46114..07313eb 100644
--- a/spot-oa/oa/proxy/proxy_oa.py
+++ b/spot-oa/oa/proxy/proxy_oa.py
@@ -231,7 +231,7 @@ class OA(object):
             iana_config  = json.loads(open(iana_conf_file).read())
             proxy_iana = IanaTransform(iana_config["IANA"])
             proxy_rcode_index = self._conf["proxy_score_fields"]["respcode"]            
-            self._proxy_scores = [ conn + [str(proxy_iana.get_name(conn[proxy_rcode_index],"proxy_http_rcode"))] for conn in self._proxy_scores ]
+            self._proxy_scores = [ conn + [proxy_iana.get_name(conn[proxy_rcode_index],"proxy_http_rcode")] for conn in self._proxy_scores ]
         else:
             self._proxy_scores = [ conn + [""] for conn in self._proxy_scores ]