You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spot.apache.org by ev...@apache.org on 2017/03/29 16:51:29 UTC

[05/50] [abbrv] incubator-spot git commit: remove files from HDFS/HIVE

remove files from HDFS/HIVE


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/b85e3270
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/b85e3270
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/b85e3270

Branch: refs/heads/SPOT-35_graphql_api
Commit: b85e32701c20dabc776627e463fd8e278447ecf7
Parents: 1904f2b
Author: Everardo Lopez Sandoval (Intel) <el...@elopezsa-mac02.zpn.intel.com>
Authored: Mon Mar 6 12:27:40 2017 -0600
Committer: Diego Ortiz Huerta <di...@intel.com>
Committed: Wed Mar 15 11:49:47 2017 -0700

----------------------------------------------------------------------
 spot-oa/oa/dns/dns_oa.py     | 20 ++++++++++++--------
 spot-oa/oa/flow/flow_oa.py   | 14 +++++---------
 spot-oa/oa/proxy/proxy_oa.py | 10 +++++-----
 3 files changed, 22 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/b85e3270/spot-oa/oa/dns/dns_oa.py
----------------------------------------------------------------------
diff --git a/spot-oa/oa/dns/dns_oa.py b/spot-oa/oa/dns/dns_oa.py
index 8d3ce80..f72f5a4 100644
--- a/spot-oa/oa/dns/dns_oa.py
+++ b/spot-oa/oa/dns/dns_oa.py
@@ -105,8 +105,10 @@ class OA(object):
         table_schema=['suspicious', 'edge', 'dendro', 'threat_dendro', 'threat_investigation', 'storyboard' ]
 
         for path in table_schema:
-            HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}/d={5}".format(HUSER,self._table_name,path,yr,mn,dy),user="impala")
-        HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}".format(HUSER,self._table_name,"summary",yr,mn),user="impala")
+            HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}/d={5}".format(HUSER,self._table_name,path,yr,int(mn),int(dy)),user="impala")
+        HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}".format(HUSER,self._table_name,"summary",yr,int(mn)),user="impala")
+        impala.execute_query("invalidate metadata")
+
         #removes Feedback file
         HDFSClient.delete_folder("{0}/{1}/scored_results/{2}{3}{4}/feedback/ml_feedback.csv".format(HUSER,self._table_name,yr,mn,dy))
         #removes json files from the storyboard
@@ -334,9 +336,10 @@ class OA(object):
                 dns_details = [ conn + (dns_nc.get_nc(conn[2]),) for conn in dns_details ]
             else:
                 dns_details = [ conn + (0,) for conn in dns_details ]
-                         
+            
+            # value_string += str(tuple(row) for row in dns_details) + ","              
             for row in dns_details:
-                value_string += str(tuple(item for item in row)) + ","   
+                value_string += str(tuple(item for item in row)) + ","
 
             if value_string != "": 
                 
@@ -350,9 +353,10 @@ class OA(object):
     def _get_dns_dendrogram(self): 
 
         for conn in self._dns_scores:   
-            timestamp = conn[self._conf["dns_score_fields"]["unix_tstamp"]]
-            
+            timestamp = conn[self._conf["dns_score_fields"]["unix_tstamp"]]         
+
             full_date = datetime.datetime.utcfromtimestamp(int(timestamp)).strftime('%Y-%m-%d %H:%M:%S')
+
             date = full_date.split(" ")[0].split("-")
             # get date parameters.
             
@@ -408,7 +412,7 @@ class OA(object):
         if len(df_final) > 0:
             query_to_insert=("""
                 INSERT INTO {0}.dns_ingest_summary PARTITION (y={1}, m={2}) VALUES {3};
-            """).format(self._db, yr, mn, tuple(df_final))
-            
+            """).format(self._db, yr, mn, tuple(df_final))            
             impala.execute_query(query_to_insert)  
+
         

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/b85e3270/spot-oa/oa/flow/flow_oa.py
----------------------------------------------------------------------
diff --git a/spot-oa/oa/flow/flow_oa.py b/spot-oa/oa/flow/flow_oa.py
index 26e224b..0eb3e22 100644
--- a/spot-oa/oa/flow/flow_oa.py
+++ b/spot-oa/oa/flow/flow_oa.py
@@ -69,10 +69,7 @@ class OA(object):
         self._conf = json.loads(open (conf_file).read(),object_pairs_hook=OrderedDict)
 
         # initialize data engine
-        self._db = self._spot_conf.get('conf', 'DBNAME').replace("'", "").replace('"', '')
-        self._engine = Data(self._db, self._table_name,self._logger)
-        
-        
+        self._db = self._spot_conf.get('conf', 'DBNAME').replace("'", "").replace('"', '')        
                 
     def start(self):       
         
@@ -108,16 +105,15 @@ class OA(object):
         table_schema=['suspicious', 'edge','chords','threat_investigation', 'timeline', 'storyboard', 'summary' ] 
 
         for path in table_schema:
-            HDFSClient.delete_folder("{0}/flow/hive/oa/{1}/y={2}/m={3}/d={4}".format(HUSER,path,yr,mn,dy),user="impala")
-        HDFSClient.delete_folder("{0}/flow/hive/oa/{1}/y={2}/m={3}".format(HUSER,"",yr,mn),user="impala")
+            HDFSClient.delete_folder("{0}/flow/hive/oa/{1}/y={2}/m={3}/d={4}".format(HUSER,path,yr,int(mn),int(dy)),user="impala")
+       
+        HDFSClient.delete_folder("{0}/flow/hive/oa/{1}/y={2}/m={3}".format(HUSER,"summary",yr,int(mn)),user="impala")
+        impala.execute_query("invalidate metadata")
         #removes Feedback file
         HDFSClient.delete_folder("{0}/{1}/scored_results/{2}{3}{4}/feedback/ml_feedback.csv".format(HUSER,self._table_name,yr,mn,dy))
         #removes json files from the storyboard
         HDFSClient.delete_folder("{0}/{1}/oa/{2}/{3}/{4}/{5}".format(HUSER,self._table_name,"storyboard",yr,mn,dy))
 
-        
-
-
     def _create_folder_structure(self):   
 
         self._logger.info("Creating folder structure for OA (data and ipynb)")       

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/b85e3270/spot-oa/oa/proxy/proxy_oa.py
----------------------------------------------------------------------
diff --git a/spot-oa/oa/proxy/proxy_oa.py b/spot-oa/oa/proxy/proxy_oa.py
index 02a9297..d54219e 100644
--- a/spot-oa/oa/proxy/proxy_oa.py
+++ b/spot-oa/oa/proxy/proxy_oa.py
@@ -72,7 +72,6 @@ class OA(object):
 
         # initialize data engine
         self._db = self._spot_conf.get('conf', 'DBNAME').replace("'", "").replace('"', '')
-        self._engine = Data(self._db, self._table_name,self._logger)
 
 
     def start(self):
@@ -116,9 +115,10 @@ class OA(object):
         table_schema=['suspicious', 'edge','threat_investigation', 'timeline', 'storyboard', 'summary' ] 
 
         for path in table_schema:
-            HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}/d={5}".format(HUSER,self._table_name,path,yr,mn,dy),user="impala")
-        
-        HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}".format(HUSER,self._table_name,"summary",yr,mn),user="impala")
+            HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}/d={5}".format(HUSER,self._table_name,path,yr,int(mn),int(dy)),user="impala")        
+        HDFSClient.delete_folder("{0}/{1}/hive/oa/{2}/y={3}/m={4}".format(HUSER,self._table_name,"summary",yr,int(mn)),user="impala")
+        impala.execute_query("invalidate metadata")
+
         #removes Feedback file
         HDFSClient.delete_folder("{0}/{1}/scored_results/{2}{3}{4}/feedback/ml_feedback.csv".format(HUSER,self._table_name,yr,mn,dy))
         #removes json files from the storyboard
@@ -354,4 +354,4 @@ class OA(object):
                 
         else:
             self._logger.info("No data found for the ingest summary")
-        
\ No newline at end of file
+