You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by le...@apache.org on 2017/09/11 15:32:17 UTC

[1/3] climate git commit: optimizing traversing

Repository: climate
Updated Branches:
  refs/heads/master 9a30e195b -> cf4fb57fd


optimizing traversing

Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/c800e48c
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/c800e48c
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/c800e48c

Branch: refs/heads/master
Commit: c800e48cd2750f8fe2bbeb77750efe55c62fd05a
Parents: cb93fce
Author: 象道 <w....@gmail.com>
Authored: Tue May 30 10:55:31 2017 -0400
Committer: GitHub <no...@github.com>
Committed: Tue May 30 10:55:31 2017 -0400

----------------------------------------------------------------------
 mccsearch/code/mccSearch.py | 45 ++++++++++++++++------------------------
 1 file changed, 18 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/c800e48c/mccsearch/code/mccSearch.py
----------------------------------------------------------------------
diff --git a/mccsearch/code/mccSearch.py b/mccsearch/code/mccSearch.py
index 15ce0c6..3a84dd5 100644
--- a/mccsearch/code/mccSearch.py
+++ b/mccsearch/code/mccSearch.py
@@ -644,17 +644,10 @@ def findPrecipRate(TRMMdirName, timelist):
 		
 		brightnesstemp = np.squeeze(brightnesstemp1, axis=0)
 		
-		if int(fileHr1) % temporalRes == 0:
-			fileHr = fileHr1
-		else:
-			fileHr = (int(fileHr1)/temporalRes) * temporalRes
-		
-		if fileHr < 10:
-			fileHr = '0'+str(fileHr)
-		else:
-			str(fileHr)
+		fileHr = fileHr1 if int(fileHr1) % temporalRes == 0 else (int(fileHr1) / temporalRes) * temporalRes
+		fileHr = '0' + str(fileHr) if fileHr < 10 else str(fileHr)
 
-		TRMMfileName = TRMMdirName+"/3B42."+ str(fileDate) + "."+str(fileHr)+".7A.nc"
+		TRMMfileName = TRMMdirName+"/3B42."+ str(fileDate) + "."+ fileHr +".7A.nc"
 		TRMMData = Dataset(TRMMfileName,'r', format='NETCDF4')
 		precipRate = TRMMData.variables['pcp'][:,:,:]
 		latsrawTRMMData = TRMMData.variables['latitude'][:]
@@ -724,12 +717,10 @@ def findPrecipRate(TRMMdirName, timelist):
 		finalCETRMMvalues = ma.zeros((brightnesstemp1.shape))
 		#-----------End most of NETCDF file stuff ------------------------------------	
 		for index,value in np.ndenumerate(brightnesstemp):
-			lat_index, lon_index = index
-			currTimeValue = 0
 			if value > 0:
-
+				lat_index, lon_index = index
+				currTimeValue = 0
 				finalCETRMMvalues[0,lat_index,lon_index] = regriddedTRMM[int(np.where(LAT[:,0]==LAT[lat_index,0])[0]), int(np.where(LON[0,:]==LON[0,lon_index])[0])]
-				
 
 		rainFallacc[:] = finalCETRMMvalues
 		currNetCDFTRMMData.close()
@@ -918,7 +909,7 @@ def findMCC (prunedGraph):
 
 			#Do the first part of checking for the MCC feature
 			#find the path
-			treeTraversalList = traverseTree(aSubGraph, orderedPath[0],[],[])
+			treeTraversalList = traverseTree(aSubGraph, orderedPath[0],[], set(), [])
 			#print "treeTraversalList is ", treeTraversalList
 			#check the nodes to determine if a MCC on just the area criteria (consecutive nodes meeting the area and temp requirements)
 			MCCList = checkedNodesMCC(prunedGraph, treeTraversalList)
@@ -1004,7 +995,7 @@ def findMCC (prunedGraph):
 		
 	return definiteMCC, definiteMCS
 #******************************************************************
-def traverseTree(subGraph,node, stack, checkedNodes=None):
+def traverseTree(subGraph,node, stack, bookkeeper_stack, checkedNodes=None):
 	'''
 	Purpose:: 
 		To traverse a tree using a modified depth-first iterative deepening (DFID) search algorithm 
@@ -1029,30 +1020,30 @@ def traverseTree(subGraph,node, stack, checkedNodes=None):
 
 	if not checkedNodes:
 		stack =[]
+		bookkeeper_stack = set()
 		checkedNodes.append(node)
 		
 	#check one level infront first...if something does exisit, stick it at the front of the stack
 	upOneLevel = subGraph.predecessors(node)
 	downOneLevel = subGraph.successors(node)
 	for parent in upOneLevel:
-		if parent not in checkedNodes and parent not in stack:
+		if parent not in checkedNodes and parent not in bookkeeper_stack:
 			for child in downOneLevel:
-				if child not in checkedNodes and child not in stack:
+				if child not in checkedNodes and child not in bookkeeper_stack:
 					stack.insert(0,child)
-		
-			stack.insert(0,parent)	
+					bookkeeper_stack.add(child)
+			stack.insert(0,parent)
+			bookkeeper_stack.add(parent)
 
 	for child in downOneLevel:
-		if child not in checkedNodes and child not in stack:
-			if len(subGraph.predecessors(child)) > 1 or node in checkedNodes:
-				stack.insert(0,child)
-			else:
-				stack.append(child)		
-	
+		if child not in checkedNodes and child not in bookkeeper_stack:
+			stack.insert(0,child)
+			bookkeeper_stack.add(child)
+
 	for eachNode in stack:
 		if eachNode not in checkedNodes:
 			checkedNodes.append(eachNode)
-			return traverseTree(subGraph, eachNode, stack, checkedNodes)
+			return traverseTree(subGraph, eachNode, stack, bookkeeper_stack, checkedNodes)
 	
 	return checkedNodes 
 #******************************************************************


[2/3] climate git commit: CLIMATE-922 Optimize traversing in mccSearch

Posted by le...@apache.org.
http://git-wip-us.apache.org/repos/asf/climate/blob/cf4fb57f/mccsearch/code/mccSearch.py
----------------------------------------------------------------------
diff --cc mccsearch/code/mccSearch.py
index c63b243,3a84dd5..0d3b669
--- a/mccsearch/code/mccSearch.py
+++ b/mccsearch/code/mccSearch.py
@@@ -44,2626 -44,2114 +44,2891 @@@ import ocw.plotter as plotte
  
  #----------------------- GLOBAL VARIABLES --------------------------
  # --------------------- User defined variables ---------------------
 -#FYI the lat lon values are not necessarily inclusive of the points given. These are the limits
 -#the first point closest the the value (for the min) from the MERG data is used, etc.
 -LATMIN = '5.0' #min latitude; -ve values in the SH e.g. 5S = -5
 -LATMAX = '19.0' #max latitude; -ve values in the SH e.g. 5S = -5 20.0
 -LONMIN = '-5.0' #min longitude; -ve values in the WH e.g. 59.8W = -59.8 -30
 -LONMAX = '5.0' #min longitude; -ve values in the WH e.g. 59.8W = -59.8  30
 -XRES = 4.0				#x direction spatial resolution in km
 -YRES = 4.0				#y direction spatial resolution in km
 -TRES = 1 				#temporal resolution in hrs
 -LAT_DISTANCE = 111.0 	#the avg distance in km for 1deg lat for the region being considered 
 -LON_DISTANCE = 111.0	#the avg distance in km for 1deg lon for the region being considered
 -STRUCTURING_ELEMENT = [[0,1,0],[1,1,1],[0,1,0]] #the matrix for determining the pattern for the contiguous boxes and must
 -												#have same rank of the matrix it is being compared against 
 -#criteria for determining cloud elements and edges
 -T_BB_MAX = 243  #warmest temp to allow (-30C to -55C according to Morel and Sensi 2002)
 -T_BB_MIN = 218  #cooler temp for the center of the system
 -CONVECTIVE_FRACTION = 0.90 #the min temp/max temp that would be expected in a CE.. this is highly conservative (only a 10K difference)
 -MIN_MCS_DURATION = 3	#minimum time for a MCS to exist
 -AREA_MIN = 2400.0		#minimum area for CE criteria in km^2 according to Vila et al. (2008) is 2400
 -MIN_OVERLAP= 10000.00   #km^2  from Williams and Houze 1987, indir ref in Arnaud et al 1992
 +# FYI the lat lon values are not necessarily inclusive of the points given. These are the limits
 +# the first point closest the the value (for the min) from the MERG data
 +# is used, etc.
 +LATMIN = '5.0'  # min latitude; -ve values in the SH e.g. 5S = -5
 +LATMAX = '19.0'  # max latitude; -ve values in the SH e.g. 5S = -5 20.0
 +LONMIN = '-5.0'  # min longitude; -ve values in the WH e.g. 59.8W = -59.8 -30
 +LONMAX = '5.0'  # min longitude; -ve values in the WH e.g. 59.8W = -59.8  30
 +XRES = 4.0  # x direction spatial resolution in km
 +YRES = 4.0  # y direction spatial resolution in km
 +TRES = 1  # temporal resolution in hrs
 +LAT_DISTANCE = 111.0  # the avg distance in km for 1deg lat for the region being considered
 +LON_DISTANCE = 111.0  # the avg distance in km for 1deg lon for the region being considered
 +# the matrix for determining the pattern for the contiguous boxes and must
 +STRUCTURING_ELEMENT = [[0, 1, 0], [1, 1, 1], [0, 1, 0]]
 +# have same rank of the matrix it is being compared against
 +# criteria for determining cloud elements and edges
 +# warmest temp to allow (-30C to -55C according to Morel and Sensi 2002)
 +T_BB_MAX = 243
 +T_BB_MIN = 218  # cooler temp for the center of the system
 +# the min temp/max temp that would be expected in a CE.. this is highly
 +# conservative (only a 10K difference)
 +CONVECTIVE_FRACTION = 0.90
 +MIN_MCS_DURATION = 3  # minimum time for a MCS to exist
 +# minimum area for CE criteria in km^2 according to Vila et al. (2008) is 2400
 +AREA_MIN = 2400.0
 +# km^2  from Williams and Houze 1987, indir ref in Arnaud et al 1992
 +MIN_OVERLAP = 10000.00
  
  #---the MCC criteria
 -ECCENTRICITY_THRESHOLD_MAX = 1.0  #tending to 1 is a circle e.g. hurricane, 
 -ECCENTRICITY_THRESHOLD_MIN = 0.50 #tending to 0 is a linear e.g. squall line
 -OUTER_CLOUD_SHIELD_AREA = 80000.0 #km^2
 -INNER_CLOUD_SHIELD_AREA = 30000.0 #km^2
 -OUTER_CLOUD_SHIELD_TEMPERATURE = 233 #in K
 -INNER_CLOUD_SHIELD_TEMPERATURE = 213 #in K
 -MINIMUM_DURATION = 6 #min number of frames the MCC must exist for (assuming hrly frames, MCCs is 6hrs)
 -MAXIMUM_DURATION = 24#max number of framce the MCC can last for 
 +ECCENTRICITY_THRESHOLD_MAX = 1.0  # tending to 1 is a circle e.g. hurricane,
 +ECCENTRICITY_THRESHOLD_MIN = 0.50  # tending to 0 is a linear e.g. squall line
 +OUTER_CLOUD_SHIELD_AREA = 80000.0  # km^2
 +INNER_CLOUD_SHIELD_AREA = 30000.0  # km^2
 +OUTER_CLOUD_SHIELD_TEMPERATURE = 233  # in K
 +INNER_CLOUD_SHIELD_TEMPERATURE = 213  # in K
 +# min number of frames the MCC must exist for (assuming hrly frames, MCCs
 +# is 6hrs)
 +MINIMUM_DURATION = 6
 +MAXIMUM_DURATION = 24  # max number of framce the MCC can last for
  #------------------- End user defined Variables -------------------
 -edgeWeight = [1,2,3] #weights for the graph edges
 -#graph object fo the CEs meeting the criteria
 +edgeWeight = [1, 2, 3]  # weights for the graph edges
 +# graph object fo the CEs meeting the criteria
  CLOUD_ELEMENT_GRAPH = nx.DiGraph()
 -#graph meeting the CC criteria
 +# graph meeting the CC criteria
  PRUNED_GRAPH = nx.DiGraph()
  #------------------------ End GLOBAL VARS -------------------------
 +
  #************************ Begin Functions *************************
  #******************************************************************
 -def readMergData(dirname, filelist = None):
 -	'''
 -	Purpose::
 -		Read MERG data into RCMES format
 -	
 -	Input::
 -		dirname: a string representing the directory to the MERG files in NETCDF format
 -		filelist (optional): a list of strings representing the filenames betweent the start and end dates provided
 -	
 -	Output::
 -		A 3D masked array (t,lat,lon) with only the variables which meet the minimum temperature 
 -		criteria for each frame
 -
 -	Assumptions::
 -		The MERG data has been converted to NETCDF using LATS4D
 -		The data has the same lat/lon format
 -
 -	TODO:: figure out how to use netCDF4 to do the clipping tmp = netCDF4.Dataset(filelist[0])
 -
 -	'''
 -
 -	global LAT
 -	global LON
 -
 -	# these strings are specific to the MERG data
 -	mergVarName = 'ch4'
 -	mergTimeVarName = 'time'
 -	mergLatVarName = 'latitude'
 -	mergLonVarName = 'longitude'
 -	
 -	filelistInstructions = dirname + '/*'
 -	if filelist == None:
 -		filelist = glob.glob(filelistInstructions)
 -
 -	
 -	#sat_img is the array that will contain all the masked frames
 -	mergImgs = []
 -	#timelist of python time strings
 -	timelist = [] 
 -	time2store = None
 -	tempMaskedValueNp =[]
 -	
 -
 -	filelist.sort()
 -	nfiles = len(filelist)
 -
 -	# Crash nicely if there are no netcdf files
 -	if nfiles == 0:
 -		print 'Error: no files in this directory! Exiting elegantly'
 -		sys.exit()
 -	else:
 -		# Open the first file in the list to read in lats, lons and generate the  grid for comparison
 -		tmp = Dataset(filelist[0], 'r+',format='NETCDF4')
 -
 -		alllatsraw = tmp.variables[mergLatVarName][:]
 -		alllonsraw = tmp.variables[mergLonVarName][:]
 -		alllonsraw[alllonsraw > 180] = alllonsraw[alllonsraw > 180] - 360.  # convert to -180,180 if necessary
 -		
 -		#get the lat/lon info data (different resolution)
 -		latminNETCDF = find_nearest(alllatsraw, float(LATMIN))
 -		latmaxNETCDF = find_nearest(alllatsraw, float(LATMAX))
 -		lonminNETCDF = find_nearest(alllonsraw, float(LONMIN))
 -		lonmaxNETCDF = find_nearest(alllonsraw, float(LONMAX))
 -		latminIndex = (np.where(alllatsraw == latminNETCDF))[0][0]
 -		latmaxIndex = (np.where(alllatsraw == latmaxNETCDF))[0][0]
 -		lonminIndex = (np.where(alllonsraw == lonminNETCDF))[0][0]
 -		lonmaxIndex = (np.where(alllonsraw == lonmaxNETCDF))[0][0]
 -		
 -		#subsetting the data
 -		latsraw = alllatsraw[latminIndex: latmaxIndex]
 -		lonsraw = alllonsraw[lonminIndex:lonmaxIndex]
 -		
 -		LON, LAT = np.meshgrid(lonsraw, latsraw)
 -		#clean up
 -		latsraw =[]
 -		lonsraw = []
 -		nygrd = len(LAT[:, 0]); nxgrd = len(LON[0, :])
 -		tmp.close
 -	
 -	for files in filelist:
 -		try:
 -			thisFile = Dataset(files, format='NETCDF4')
 -			#clip the dataset according to user lat,lon coordinates
 -			tempRaw = thisFile.variables[mergVarName][:,latminIndex:latmaxIndex,lonminIndex:lonmaxIndex].astype('int16')
 -			tempMask = ma.masked_array(tempRaw, mask=(tempRaw > T_BB_MAX), fill_value=0) 
 -			
 -			#get the actual values that the mask returned
 -			tempMaskedValue = ma.zeros((tempRaw.shape)).astype('int16')
 -			for index, value in maenumerate(tempMask): 
 -				time_index, lat_index, lon_index = index			
 -				tempMaskedValue[time_index,lat_index, lon_index]=value	
 -			
 -			xtimes = thisFile.variables[mergTimeVarName]
 -			#convert this time to a python datastring
 -			time2store, _ = getModelTimes(xtimes, mergTimeVarName)
 -			#extend instead of append because getModelTimes returns a list already and we don't 
 -			#want a list of list
 -			timelist.extend(time2store)
 -			mergImgs.extend(tempMaskedValue) 
 -			thisFile.close
 -			thisFile = None
 -			
 -		except:
 -			print "bad file! ", files
 -
 -	mergImgs = ma.array(mergImgs)
 -
 -	return mergImgs, timelist
 +
 +
 +def readMergData(dirname, filelist=None):
 +    '''
 +    Purpose::
 +            Read MERG data into RCMES format
 +
 +    Input::
 +            dirname: a string representing the directory to the MERG files in NETCDF format
 +            filelist (optional): a list of strings representing the filenames betweent the start and end dates provided
 +
 +    Output::
 +            A 3D masked array (t,lat,lon) with only the variables which meet the minimum temperature
 +            criteria for each frame
 +
 +    Assumptions::
 +            The MERG data has been converted to NETCDF using LATS4D
 +            The data has the same lat/lon format
 +
 +    TODO:: figure out how to use netCDF4 to do the clipping tmp = netCDF4.Dataset(filelist[0])
 +
 +    '''
 +
 +    global LAT
 +    global LON
 +
 +    # these strings are specific to the MERG data
 +    mergVarName = 'ch4'
 +    mergTimeVarName = 'time'
 +    mergLatVarName = 'latitude'
 +    mergLonVarName = 'longitude'
 +    filelistInstructions = dirname + '/*'
 +    if filelist is None:
 +        filelist = glob.glob(filelistInstructions)
 +
 +    # sat_img is the array that will contain all the masked frames
 +    mergImgs = []
 +    # timelist of python time strings
 +    timelist = []
 +    time2store = None
 +    tempMaskedValueNp = []
 +
 +    filelist.sort()
 +    nfiles = len(filelist)
 +
 +    # Crash nicely if there are no netcdf files
 +    if nfiles == 0:
 +        print("Error: no files in this directory! Exiting elegantly")
 +        sys.exit()
 +    else:
 +        # Open the first file in the list to read in lats, lons and generate
 +        # the  grid for comparison
 +        tmp = Dataset(filelist[0], 'r+', format='NETCDF4')
 +
 +        alllatsraw = tmp.variables[mergLatVarName][:]
 +        alllonsraw = tmp.variables[mergLonVarName][:]
 +        alllonsraw[alllonsraw > 180] = alllonsraw[alllonsraw >
 +                                                  180] - 360.  # convert to -180,180 if necessary
 +
 +        # get the lat/lon info data (different resolution)
 +        latminNETCDF = find_nearest(alllatsraw, float(LATMIN))
 +        latmaxNETCDF = find_nearest(alllatsraw, float(LATMAX))
 +        lonminNETCDF = find_nearest(alllonsraw, float(LONMIN))
 +        lonmaxNETCDF = find_nearest(alllonsraw, float(LONMAX))
 +        latminIndex = (np.where(alllatsraw == latminNETCDF))[0][0]
 +        latmaxIndex = (np.where(alllatsraw == latmaxNETCDF))[0][0]
 +        lonminIndex = (np.where(alllonsraw == lonminNETCDF))[0][0]
 +        lonmaxIndex = (np.where(alllonsraw == lonmaxNETCDF))[0][0]
 +
 +        # subsetting the data
 +        latsraw = alllatsraw[latminIndex: latmaxIndex]
 +        lonsraw = alllonsraw[lonminIndex:lonmaxIndex]
 +
 +        LON, LAT = np.meshgrid(lonsraw, latsraw)
 +        # clean up
 +        latsraw = []
 +        lonsraw = []
 +        nygrd = len(LAT[:, 0]); nxgrd = len(LON[0, :])
 +        tmp.close
 +
 +    for files in filelist:
 +        try:
 +            thisFile = Dataset(files, format='NETCDF4')
 +            # clip the dataset according to user lat,lon coordinates
 +            tempRaw = thisFile.variables[mergVarName][:,
 +                                                      latminIndex:latmaxIndex,
 +                                                      lonminIndex:lonmaxIndex].astype('int16')
 +            tempMask = ma.masked_array(
 +                tempRaw, mask=(
 +                    tempRaw > T_BB_MAX), fill_value=0)
 +
 +            # get the actual values that the mask returned
 +            tempMaskedValue = ma.zeros((tempRaw.shape)).astype('int16')
 +            for index, value in maenumerate(tempMask):
 +                time_index, lat_index, lon_index = index
 +                tempMaskedValue[time_index, lat_index, lon_index] = value
 +
 +            xtimes = thisFile.variables[mergTimeVarName]
 +            # convert this time to a python datastring
 +            time2store, _ = getModelTimes(xtimes, mergTimeVarName)
 +            # extend instead of append because getModelTimes returns a list already and we don't
 +            # want a list of list
 +            timelist.extend(time2store)
 +            mergImgs.extend(tempMaskedValue)
 +            thisFile.close
 +            thisFile = None
 +
 +        except BaseException:
 +            print(("bad file! %s" % files))
 +
 +    mergImgs = ma.array(mergImgs)
 +
 +    return mergImgs, timelist
  #******************************************************************
 -def findCloudElements(mergImgs,timelist,TRMMdirName=None):
 -	'''
 -	Purpose::
 -		Determines the contiguous boxes for a given time of the satellite images i.e. each frame
 -		using scipy ndimage package
 -	
 -	Input::	
 -		mergImgs: masked numpy array in (time,lat,lon),T_bb representing the satellite data. This is masked based on the
 -		maximum acceptable temperature, T_BB_MAX
 -		timelist: a list of python datatimes
 -		TRMMdirName (optional): string representing the path where to find the TRMM datafiles
 -		
 -	Output::
 -		CLOUD_ELEMENT_GRAPH: a Networkx directed graph where each node contains the information in cloudElementDict
 -		The nodes are determined according to the area of contiguous squares. The nodes are linked through weighted edges.
 -
 -		cloudElementDict = {'uniqueID': unique tag for this CE, 
 -							'cloudElementTime': time of the CE,
 -							'cloudElementLatLon': (lat,lon,value) of MERG data of CE, 
 -							'cloudElementCenter':list of floating-point [lat,lon] representing the CE's center 
 -							'cloudElementArea':floating-point representing the area of the CE, 
 -							'cloudElementEccentricity': floating-point representing the shape of the CE, 
 -							'cloudElementTmax':integer representing the maximum Tb in CE, 
 -							'cloudElementTmin': integer representing the minimum Tb in CE, 
 -							'cloudElementPrecipTotal':floating-point representing the sum of all rainfall in CE if TRMMdirName entered,
 -							'cloudElementLatLonTRMM':(lat,lon,value) of TRMM data in CE if TRMMdirName entered, 
 -							'TRMMArea': floating-point representing the CE if TRMMdirName entered,
 -							'CETRMMmax':floating-point representing the max rate in the CE if TRMMdirName entered, 
 -							'CETRMMmin':floating-point representing the min rate in the CE if TRMMdirName entered}
 -	Assumptions::
 -		Assumes we are dealing with MERG data which is 4kmx4km resolved, thus the smallest value 
 -		required according to Vila et al. (2008) is 2400km^2 
 -		therefore, 2400/16 = 150 contiguous squares
 -	'''
 -
 -	frame = ma.empty((1,mergImgs.shape[1],mergImgs.shape[2]))
 -	CEcounter = 0
 -	frameCEcounter = 0
 -	frameNum = 0
 -	cloudElementEpsilon = 0.0
 -	cloudElementDict = {} 
 -	cloudElementCenter = []		#list with two elements [lat,lon] for the center of a CE
 -	prevFrameCEs = []			#list for CEs in previous frame
 -	currFrameCEs = []			#list for CEs in current frame
 -	cloudElementLat = []		#list for a particular CE's lat values
 -	cloudElementLon = []		#list for a particular CE's lon values
 -	cloudElementLatLons = []	#list for a particular CE's (lat,lon) values
 -	
 -	prevLatValue = 0.0
 -	prevLonValue = 0.0
 -	TIR_min = 0.0
 -	TIR_max = 0.0
 -	temporalRes = 3 # TRMM data is 3 hourly
 -	precipTotal = 0.0
 -	CETRMMList =[]
 -	precip =[]
 -	TRMMCloudElementLatLons =[]
 -
 -	minCELatLimit = 0.0
 -	minCELonLimit = 0.0
 -	maxCELatLimit = 0.0
 -	maxCELonLimit = 0.0
 -	
 -	nygrd = len(LAT[:, 0]); nxgrd = len(LON[0, :])
 -	
 -	#openfile for storing ALL cloudElement information 
 -	cloudElementsFile = open((MAINDIRECTORY+'/textFiles/cloudElements.txt'),'wb')
 -	#openfile for storing cloudElement information meeting user criteria i.e. MCCs in this case
 -	cloudElementsUserFile = open((MAINDIRECTORY+'/textFiles/cloudElementsUserFile.txt'),'w')
 -	
 -	#NB in the TRMM files the info is hours since the time thus 00Z file has in 01, 02 and 03 times
 -	for t in xrange(mergImgs.shape[0]):
 -		#-------------------------------------------------
 -		# #textfile name for saving the data for arcgis
 -		# thisFileName = MAINDIRECTORY+'/' + (str(timelist[t])).replace(" ", "_") + '.txt'
 -		# cloudElementsTextFile = open(thisFileName,'w')
 -		#-------------------------------------------------
 -
 -		#determine contiguous locations with temeperature below the warmest temp i.e. cloudElements in each frame
 -	   	frame, CEcounter = ndimage.measurements.label(mergImgs[t,:,:], structure=STRUCTURING_ELEMENT)
 -	   	frameCEcounter=0
 -		frameNum += 1
 -
 -		#for each of the areas identified, check to determine if it a valid CE via an area and T requirement
 -	   	for count in xrange(CEcounter):
 -	   		#[0] is time dimension. Determine the actual values from the data
 -	   		#loc is a masked array
 -	   		try:
 -	   			loc = ndimage.find_objects(frame==(count+1))[0]
 -	   		except Exception, e:
 -	   			print "Error is ", e
 -	   			continue
 -
 -
 -	   		cloudElement = mergImgs[t,:,:][loc]
 -	   		labels, lcounter = ndimage.label(cloudElement)
 -	   		
 -	   		#determine the true lats and lons for this particular CE
 -   			cloudElementLat = LAT[loc[0],0]
 -   			cloudElementLon = LON[0,loc[1]] 
 -   			
 -	   		#determine number of boxes in this cloudelement
 -	   		numOfBoxes = np.count_nonzero(cloudElement)
 -	   		cloudElementArea = numOfBoxes*XRES*YRES
 -
 -	   		#If the area is greater than the area required, or if the area is smaller than the suggested area, check if it meets a convective fraction requirement
 -	   		#consider as CE
 -
 -	   		if cloudElementArea >= AREA_MIN or (cloudElementArea < AREA_MIN and ((ndimage.minimum(cloudElement, labels=labels))/float((ndimage.maximum(cloudElement, labels=labels)))) < CONVECTIVE_FRACTION ):
 -
 -	   			#get some time information and labeling info
 -	   			frameTime = str(timelist[t])
 -	   			frameCEcounter +=1
 -	   			CEuniqueID = 'F'+str(frameNum)+'CE'+str(frameCEcounter) 
 -
 -	   			#-------------------------------------------------
 -				#textfile name for accesing CE data using MATLAB code
 -				# thisFileName = MAINDIRECTORY+'/' + (str(timelist[t])).replace(" ", "_") + CEuniqueID +'.txt'
 -				# cloudElementsTextFile = open(thisFileName,'w')
 -				#-------------------------------------------------
 -
 -				# ------ NETCDF File stuff for brightness temp stuff ------------------------------------
 -				thisFileName = MAINDIRECTORY +'/MERGnetcdfCEs/cloudElements'+ (str(timelist[t])).replace(" ", "_") + CEuniqueID +'.nc'
 -				currNetCDFCEData = Dataset(thisFileName, 'w', format='NETCDF4')
 -				currNetCDFCEData.description = 'Cloud Element '+CEuniqueID + ' temperature data'
 -				currNetCDFCEData.calendar = 'standard'
 -				currNetCDFCEData.conventions = 'COARDS'
 -				# dimensions
 -				currNetCDFCEData.createDimension('time', None)
 -				currNetCDFCEData.createDimension('lat', len(LAT[:,0]))
 -				currNetCDFCEData.createDimension('lon', len(LON[0,:]))
 -				# variables
 -				tempDims = ('time','lat', 'lon',)
 -				times = currNetCDFCEData.createVariable('time', 'f8', ('time',))
 -				times.units = 'hours since '+ str(timelist[t])[:-6]
 -				latitudes = currNetCDFCEData.createVariable('latitude', 'f8', ('lat',))
 -				longitudes = currNetCDFCEData.createVariable('longitude', 'f8', ('lon',))
 -				brightnesstemp = currNetCDFCEData.createVariable('brightnesstemp', 'i16',tempDims )
 -				brightnesstemp.units = 'Kelvin'
 -				# NETCDF data
 -				dates=[timelist[t]+timedelta(hours=0)]
 -				times[:] =  date2num(dates,units=times.units)
 -				longitudes[:] = LON[0,:]
 -				longitudes.units = "degrees_east" 
 -				longitudes.long_name = "Longitude" 
 -
 -				latitudes[:] =  LAT[:,0]
 -				latitudes.units = "degrees_north"
 -				latitudes.long_name ="Latitude"
 -				
 -				#generate array of zeros for brightness temperature
 -				brightnesstemp1 = ma.zeros((1,len(latitudes), len(longitudes))).astype('int16')
 -				#-----------End most of NETCDF file stuff ------------------------------------
 -
 -				#if other dataset (TRMM) assumed to be a precipitation dataset was entered
 -				if TRMMdirName:
 -					#------------------TRMM stuff -------------------------------------------------
 -					fileDate = ((str(timelist[t])).replace(" ", "")[:-8]).replace("-","")
 -					fileHr1 = (str(timelist[t])).replace(" ", "")[-8:-6]
 -					
 -					if int(fileHr1) % temporalRes == 0:
 -						fileHr = fileHr1
 -					else:
 -						fileHr = (int(fileHr1)/temporalRes) * temporalRes
 -					if fileHr < 10:
 -						fileHr = '0'+str(fileHr)
 -					else:
 -						str(fileHr)
 -
 -					#open TRMM file for the resolution info and to create the appropriate sized grid
 -					TRMMfileName = TRMMdirName+'/3B42.'+ fileDate + "."+str(fileHr)+".7A.nc"
 -					
 -					TRMMData = Dataset(TRMMfileName,'r', format='NETCDF4')
 -					precipRate = TRMMData.variables['pcp'][:,:,:]
 -					latsrawTRMMData = TRMMData.variables['latitude'][:]
 -					lonsrawTRMMData = TRMMData.variables['longitude'][:]
 -					lonsrawTRMMData[lonsrawTRMMData > 180] = lonsrawTRMMData[lonsrawTRMMData>180] - 360.
 -					LONTRMM, LATTRMM = np.meshgrid(lonsrawTRMMData, latsrawTRMMData)
 -
 -					nygrdTRMM = len(LATTRMM[:,0]); nxgrdTRMM = len(LONTRMM[0,:])
 -					precipRateMasked = ma.masked_array(precipRate, mask=(precipRate < 0.0))
 -					#---------regrid the TRMM data to the MERG dataset ----------------------------------
 -					#regrid using the do_regrid stuff from the Apache OCW 
 -					regriddedTRMM = ma.zeros((0, nygrd, nxgrd))
 -					#regriddedTRMM = process.do_regrid(precipRateMasked[0,:,:], LATTRMM,  LONTRMM, LAT, LON, order=1, mdi= -999999999)
 -					regriddedTRMM = do_regrid(precipRateMasked[0,:,:], LATTRMM,  LONTRMM, LAT, LON, order=1, mdi= -999999999)
 -					#----------------------------------------------------------------------------------
 -		
 -					# #get the lat/lon info from cloudElement
 -					#get the lat/lon info from the file
 -					latCEStart = LAT[0][0]
 -					latCEEnd = LAT[-1][0]
 -					lonCEStart = LON[0][0]
 -					lonCEEnd = LON[0][-1]
 -					
 -					#get the lat/lon info for TRMM data (different resolution)
 -					latStartT = find_nearest(latsrawTRMMData, latCEStart)
 -					latEndT = find_nearest(latsrawTRMMData, latCEEnd)
 -					lonStartT = find_nearest(lonsrawTRMMData, lonCEStart)
 -					lonEndT = find_nearest(lonsrawTRMMData, lonCEEnd)
 -					latStartIndex = np.where(latsrawTRMMData == latStartT)
 -					latEndIndex = np.where(latsrawTRMMData == latEndT)
 -					lonStartIndex = np.where(lonsrawTRMMData == lonStartT)
 -					lonEndIndex = np.where(lonsrawTRMMData == lonEndT)
 -
 -					#get the relevant TRMM info 
 -					CEprecipRate = precipRate[:,(latStartIndex[0][0]-1):latEndIndex[0][0],(lonStartIndex[0][0]-1):lonEndIndex[0][0]]
 -					TRMMData.close()
 -					
 -					# ------ NETCDF File info for writing TRMM CE rainfall ------------------------------------
 -					thisFileName = MAINDIRECTORY+'/TRMMnetcdfCEs/TRMM' + (str(timelist[t])).replace(" ", "_") + CEuniqueID +'.nc'
 -					currNetCDFTRMMData = Dataset(thisFileName, 'w', format='NETCDF4')
 -					currNetCDFTRMMData.description = 'Cloud Element '+CEuniqueID + ' precipitation data'
 -					currNetCDFTRMMData.calendar = 'standard'
 -					currNetCDFTRMMData.conventions = 'COARDS'
 -					# dimensions
 -					currNetCDFTRMMData.createDimension('time', None)
 -					currNetCDFTRMMData.createDimension('lat', len(LAT[:,0]))
 -					currNetCDFTRMMData.createDimension('lon', len(LON[0,:]))
 -					
 -					# variables
 -					TRMMprecip = ('time','lat', 'lon',)
 -					times = currNetCDFTRMMData.createVariable('time', 'f8', ('time',))
 -					times.units = 'hours since '+ str(timelist[t])[:-6]
 -					latitude = currNetCDFTRMMData.createVariable('latitude', 'f8', ('lat',))
 -					longitude = currNetCDFTRMMData.createVariable('longitude', 'f8', ('lon',))
 -					rainFallacc = currNetCDFTRMMData.createVariable('precipitation_Accumulation', 'f8',TRMMprecip )
 -					rainFallacc.units = 'mm'
 -
 -					longitude[:] = LON[0,:]
 -					longitude.units = "degrees_east" 
 -					longitude.long_name = "Longitude" 
 -
 -					latitude[:] =  LAT[:,0]
 -					latitude.units = "degrees_north"
 -					latitude.long_name ="Latitude"
 -
 -					finalCETRMMvalues = ma.zeros((brightnesstemp.shape))
 -					#-----------End most of NETCDF file stuff ------------------------------------
 -
 -	   			#populate cloudElementLatLons by unpacking the original values from loc to get the actual value for lat and lon
 -				#TODO: KDW - too dirty... play with itertools.izip or zip and the enumerate with this
 -				# 			as cloudElement is masked
 -				for index,value in np.ndenumerate(cloudElement):
 -					if value != 0 : 
 -						lat_index,lon_index = index
 -						lat_lon_tuple = (cloudElementLat[lat_index], cloudElementLon[lon_index],value)
 -
 -						#generate the comma separated file for GIS
 -						cloudElementLatLons.append(lat_lon_tuple)
 -
 -						#temp data for CE NETCDF file
 -						brightnesstemp1[0,int(np.where(LAT[:,0]==cloudElementLat[lat_index])[0]),int(np.where(LON[0,:]==cloudElementLon[lon_index])[0])] = value
 -						
 -						if TRMMdirName:
 -							finalCETRMMvalues[0,int(np.where(LAT[:,0]==cloudElementLat[lat_index])[0]),int(np.where(LON[0,:]==cloudElementLon[lon_index])[0])] = regriddedTRMM[int(np.where(LAT[:,0]==cloudElementLat[lat_index])[0]),int(np.where(LON[0,:]==cloudElementLon[lon_index])[0])]
 -							CETRMMList.append((cloudElementLat[lat_index], cloudElementLon[lon_index], finalCETRMMvalues[0,cloudElementLat[lat_index], cloudElementLon[lon_index]]))
 -
 -
 -				brightnesstemp[:] = brightnesstemp1[:]
 -				currNetCDFCEData.close()
 -
 -				if TRMMdirName:
 -
 -					#calculate the total precip associated with the feature
 -					for index, value in np.ndenumerate(finalCETRMMvalues):
 -						precipTotal += value 
 -						precip.append(value)
 -			
 -					rainFallacc[:] = finalCETRMMvalues[:]
 -					currNetCDFTRMMData.close()
 -					TRMMnumOfBoxes = np.count_nonzero(finalCETRMMvalues)
 -					TRMMArea = TRMMnumOfBoxes*XRES*YRES
 -					try:
 -						maxCEprecipRate = np.max(finalCETRMMvalues[np.nonzero(finalCETRMMvalues)])
 -						minCEprecipRate = np.min(finalCETRMMvalues[np.nonzero(finalCETRMMvalues)])
 -					except:
 -						pass
 -
 -				#sort cloudElementLatLons by lats
 -				cloudElementLatLons.sort(key=lambda tup: tup[0])	
 -
 -				#determine if the cloud element the shape 
 -				cloudElementEpsilon = eccentricity (cloudElement)
 -				cloudElementsUserFile.write("\n\nTime is: %s" %(str(timelist[t])))
 -				cloudElementsUserFile.write("\nCEuniqueID is: %s" %CEuniqueID)
 -				latCenter, lonCenter = ndimage.measurements.center_of_mass(cloudElement, labels=labels)
 -				
 -				#latCenter and lonCenter are given according to the particular array defining this CE
 -				#so you need to convert this value to the overall domain truth
 -				latCenter = cloudElementLat[round(latCenter)]
 -				lonCenter = cloudElementLon[round(lonCenter)]
 -				cloudElementsUserFile.write("\nCenter (lat,lon) is: %.2f\t%.2f" %(latCenter, lonCenter))
 -				cloudElementCenter.append(latCenter)
 -				cloudElementCenter.append(lonCenter)
 -				cloudElementsUserFile.write("\nNumber of boxes are: %d" %numOfBoxes)
 -				cloudElementsUserFile.write("\nArea is: %.4f km^2" %(cloudElementArea))
 -				cloudElementsUserFile.write("\nAverage brightness temperature is: %.4f K" %ndimage.mean(cloudElement, labels=labels))
 -				cloudElementsUserFile.write("\nMin brightness temperature is: %.4f K" %ndimage.minimum(cloudElement, labels=labels))
 -				cloudElementsUserFile.write("\nMax brightness temperature is: %.4f K" %ndimage.maximum(cloudElement, labels=labels))
 -				cloudElementsUserFile.write("\nBrightness temperature variance is: %.4f K" %ndimage.variance(cloudElement, labels=labels))
 -				cloudElementsUserFile.write("\nConvective fraction is: %.4f " %(((ndimage.minimum(cloudElement, labels=labels))/float((ndimage.maximum(cloudElement, labels=labels))))*100.0))
 -				cloudElementsUserFile.write("\nEccentricity is: %.4f " %(cloudElementEpsilon))
 -				#populate the dictionary
 -				if TRMMdirName:
 -					cloudElementDict = {'uniqueID': CEuniqueID, 'cloudElementTime': timelist[t],'cloudElementLatLon': cloudElementLatLons, 'cloudElementCenter':cloudElementCenter, 'cloudElementArea':cloudElementArea, 'cloudElementEccentricity':cloudElementEpsilon, 'cloudElementTmax':TIR_max, 'cloudElementTmin': TIR_min, 'cloudElementPrecipTotal':precipTotal,'cloudElementLatLonTRMM':CETRMMList, 'TRMMArea': TRMMArea,'CETRMMmax':maxCEprecipRate, 'CETRMMmin':minCEprecipRate}
 -				else:
 -					cloudElementDict = {'uniqueID': CEuniqueID, 'cloudElementTime': timelist[t],'cloudElementLatLon': cloudElementLatLons, 'cloudElementCenter':cloudElementCenter, 'cloudElementArea':cloudElementArea, 'cloudElementEccentricity':cloudElementEpsilon, 'cloudElementTmax':TIR_max, 'cloudElementTmin': TIR_min,}
 -				
 -				#current frame list of CEs
 -				currFrameCEs.append(cloudElementDict)
 -				
 -				#draw the graph node
 -				CLOUD_ELEMENT_GRAPH.add_node(CEuniqueID, cloudElementDict)
 -				
 -				if frameNum != 1:
 -					for cloudElementDict in prevFrameCEs:
 -						thisCElen = len(cloudElementLatLons)
 -						percentageOverlap, areaOverlap = cloudElementOverlap(cloudElementLatLons, cloudElementDict['cloudElementLatLon'])
 -						
 -						#change weights to integers because the built in shortest path chokes on floating pts according to Networkx doc
 -						#according to Goyens et al, two CEs are considered related if there is atleast 95% overlap between them for consecutive imgs a max of 2 hrs apart
 -						if percentageOverlap >= 0.95: 
 -							CLOUD_ELEMENT_GRAPH.add_edge(cloudElementDict['uniqueID'], CEuniqueID, weight=edgeWeight[0])
 -							
 -						elif percentageOverlap >= 0.90 and percentageOverlap < 0.95 :
 -							CLOUD_ELEMENT_GRAPH.add_edge(cloudElementDict['uniqueID'], CEuniqueID, weight=edgeWeight[1])
 -
 -						elif areaOverlap >= MIN_OVERLAP:
 -							CLOUD_ELEMENT_GRAPH.add_edge(cloudElementDict['uniqueID'], CEuniqueID, weight=edgeWeight[2])
 -
 -				else:
 -					#TODO: remove this else as we only wish for the CE details
 -					#ensure only the non-zero elements are considered
 -					#store intel in allCE file
 -					labels, _ = ndimage.label(cloudElement)
 -					cloudElementsFile.write("\n-----------------------------------------------")
 -					cloudElementsFile.write("\n\nTime is: %s" %(str(timelist[t])))
 -					# cloudElementLat = LAT[loc[0],0]
 -					# cloudElementLon = LON[0,loc[1]] 
 -					
 -					#populate cloudElementLatLons by unpacking the original values from loc
 -					#TODO: KDW - too dirty... play with itertools.izip or zip and the enumerate with this
 -					# 			as cloudElement is masked
 -					for index,value in np.ndenumerate(cloudElement):
 -						if value != 0 : 
 -							lat_index,lon_index = index
 -							lat_lon_tuple = (cloudElementLat[lat_index], cloudElementLon[lon_index])
 -							cloudElementLatLons.append(lat_lon_tuple)
 -	
 -					cloudElementsFile.write("\nLocation of rejected CE (lat,lon) points are: %s" %cloudElementLatLons)
 -					#latCenter and lonCenter are given according to the particular array defining this CE
 -		   			#so you need to convert this value to the overall domain truth
 -					latCenter, lonCenter = ndimage.measurements.center_of_mass(cloudElement, labels=labels)
 -					latCenter = cloudElementLat[round(latCenter)]
 -					lonCenter = cloudElementLon[round(lonCenter)]
 -					cloudElementsFile.write("\nCenter (lat,lon) is: %.2f\t%.2f" %(latCenter, lonCenter))
 -					cloudElementsFile.write("\nNumber of boxes are: %d" %numOfBoxes)
 -					cloudElementsFile.write("\nArea is: %.4f km^2" %(cloudElementArea))
 -					cloudElementsFile.write("\nAverage brightness temperature is: %.4f K" %ndimage.mean(cloudElement, labels=labels))
 -					cloudElementsFile.write("\nMin brightness temperature is: %.4f K" %ndimage.minimum(cloudElement, labels=labels))
 -					cloudElementsFile.write("\nMax brightness temperature is: %.4f K" %ndimage.maximum(cloudElement, labels=labels))
 -					cloudElementsFile.write("\nBrightness temperature variance is: %.4f K" %ndimage.variance(cloudElement, labels=labels))
 -					cloudElementsFile.write("\nConvective fraction is: %.4f " %(((ndimage.minimum(cloudElement, labels=labels))/float((ndimage.maximum(cloudElement, labels=labels))))*100.0))
 -					cloudElementsFile.write("\nEccentricity is: %.4f " %(cloudElementEpsilon))
 -					cloudElementsFile.write("\n-----------------------------------------------")
 -					
 -			#reset list for the next CE
 -			nodeExist = False
 -			cloudElementCenter=[]
 -			cloudElement = []
 -			cloudElementLat=[]
 -			cloudElementLon =[]
 -			cloudElementLatLons =[]
 -			brightnesstemp1 =[]
 -			brightnesstemp =[]
 -			finalCETRMMvalues =[]
 -			CEprecipRate =[]
 -			CETRMMList =[]
 -			precipTotal = 0.0
 -			precip=[]
 -			TRMMCloudElementLatLons=[]
 -			
 -		#reset for the next time
 -		prevFrameCEs =[]
 -		prevFrameCEs = currFrameCEs
 -		currFrameCEs =[]
 -						
 -	cloudElementsFile.close
 -	cloudElementsUserFile.close
 -	#if using ARCGIS data store code, uncomment this file close line
 -	#cloudElementsTextFile.close
 -
 -	#clean up graph - remove parent and childless nodes
 -	outAndInDeg = CLOUD_ELEMENT_GRAPH.degree_iter()
 -	toRemove = [node[0] for node in outAndInDeg if node[1]<1]
 -	CLOUD_ELEMENT_GRAPH.remove_nodes_from(toRemove)
 -	
 -	print "number of nodes are: ", CLOUD_ELEMENT_GRAPH.number_of_nodes()
 -	print "number of edges are: ", CLOUD_ELEMENT_GRAPH.number_of_edges()
 -	print ("*"*80)
 -
 -	#hierachial graph output
 -	graphTitle = "Cloud Elements observed over somewhere from 0000Z to 0000Z" 
 -	#drawGraph(CLOUD_ELEMENT_GRAPH, graphTitle, edgeWeight)
 -
 -	return CLOUD_ELEMENT_GRAPH	
 +
 +
 +def findCloudElements(mergImgs, timelist, TRMMdirName=None):
 +    '''
 +    Purpose::
 +            Determines the contiguous boxes for a given time of the satellite images i.e. each frame
 +            using scipy ndimage package
 +
 +    Input::
 +            mergImgs: masked numpy array in (time,lat,lon),T_bb representing the satellite data. This is masked based on the
 +            maximum acceptable temperature, T_BB_MAX
 +            timelist: a list of python datatimes
 +            TRMMdirName (optional): string representing the path where to find the TRMM datafiles
 +
 +    Output::
 +            CLOUD_ELEMENT_GRAPH: a Networkx directed graph where each node contains the information in cloudElementDict
 +            The nodes are determined according to the area of contiguous squares. The nodes are linked through weighted edges.
 +
 +            cloudElementDict = {'uniqueID': unique tag for this CE,
 +                                                    'cloudElementTime': time of the CE,
 +                                                    'cloudElementLatLon': (lat,lon,value) of MERG data of CE,
 +                                                    'cloudElementCenter':list of floating-point [lat,lon] representing the CE's center
 +                                                    'cloudElementArea':floating-point representing the area of the CE,
 +                                                    'cloudElementEccentricity': floating-point representing the shape of the CE,
 +                                                    'cloudElementTmax':integer representing the maximum Tb in CE,
 +                                                    'cloudElementTmin': integer representing the minimum Tb in CE,
 +                                                    'cloudElementPrecipTotal':floating-point representing the sum of all rainfall in CE if TRMMdirName entered,
 +                                                    'cloudElementLatLonTRMM':(lat,lon,value) of TRMM data in CE if TRMMdirName entered,
 +                                                    'TRMMArea': floating-point representing the CE if TRMMdirName entered,
 +                                                    'CETRMMmax':floating-point representing the max rate in the CE if TRMMdirName entered,
 +                                                    'CETRMMmin':floating-point representing the min rate in the CE if TRMMdirName entered}
 +    Assumptions::
 +            Assumes we are dealing with MERG data which is 4kmx4km resolved, thus the smallest value
 +            required according to Vila et al. (2008) is 2400km^2
 +            therefore, 2400/16 = 150 contiguous squares
 +    '''
 +
 +    frame = ma.empty((1, mergImgs.shape[1], mergImgs.shape[2]))
 +    CEcounter = 0
 +    frameCEcounter = 0
 +    frameNum = 0
 +    cloudElementEpsilon = 0.0
 +    cloudElementDict = {}
 +    cloudElementCenter = []  # list with two elements [lat,lon] for the center of a CE
 +    prevFrameCEs = []  # list for CEs in previous frame
 +    currFrameCEs = []  # list for CEs in current frame
 +    cloudElementLat = []  # list for a particular CE's lat values
 +    cloudElementLon = []  # list for a particular CE's lon values
 +    cloudElementLatLons = []  # list for a particular CE's (lat,lon) values
 +
 +    prevLatValue = 0.0
 +    prevLonValue = 0.0
 +    TIR_min = 0.0
 +    TIR_max = 0.0
 +    temporalRes = 3  # TRMM data is 3 hourly
 +    precipTotal = 0.0
 +    CETRMMList = []
 +    precip = []
 +    TRMMCloudElementLatLons = []
 +
 +    minCELatLimit = 0.0
 +    minCELonLimit = 0.0
 +    maxCELatLimit = 0.0
 +    maxCELonLimit = 0.0
 +
 +    nygrd = len(LAT[:, 0]); nxgrd = len(LON[0, :])
 +
 +    # openfile for storing ALL cloudElement information
 +    cloudElementsFile = open(
 +        (MAINDIRECTORY + '/textFiles/cloudElements.txt'), 'wb')
 +    # openfile for storing cloudElement information meeting user criteria i.e.
 +    # MCCs in this case
 +    cloudElementsUserFile = open(
 +        (MAINDIRECTORY + '/textFiles/cloudElementsUserFile.txt'), 'w')
 +
 +    # NB in the TRMM files the info is hours since the time thus 00Z file has
 +    # in 01, 02 and 03 times
 +    for t in range(mergImgs.shape[0]):
 +
 +        #-------------------------------------------------
 +        # #textfile name for saving the data for arcgis
 +        # thisFileName = MAINDIRECTORY+'/' + (str(timelist[t])).replace(" ", "_") + '.txt'
 +        # cloudElementsTextFile = open(thisFileName,'w')
 +        #-------------------------------------------------
 +
 +        # determine contiguous locations with temeperature below the warmest
 +        # temp i.e. cloudElements in each frame
 +        frame, CEcounter = ndimage.measurements.label(
 +            mergImgs[t, :, :], structure=STRUCTURING_ELEMENT)
 +        frameCEcounter = 0
 +        frameNum += 1
 +
 +        # for each of the areas identified, check to determine if it a valid CE
 +        # via an area and T requirement
 +        for count in range(CEcounter):
 +            #[0] is time dimension. Determine the actual values from the data
 +            # loc is a masked array
 +            try:
 +                loc = ndimage.find_objects(frame == (count + 1))[0]
 +            except Exception as e:
 +                print(("Error is %s" % e))
 +                continue
 +
 +            cloudElement = mergImgs[t, :, :][loc]
 +            labels, lcounter = ndimage.label(cloudElement)
 +
 +            # determine the true lats and lons for this particular CE
 +            cloudElementLat = LAT[loc[0], 0]
 +            cloudElementLon = LON[0, loc[1]]
 +
 +            # determine number of boxes in this cloudelement
 +            numOfBoxes = np.count_nonzero(cloudElement)
 +            cloudElementArea = numOfBoxes * XRES * YRES
 +
 +            # If the area is greater than the area required, or if the area is smaller than the suggested area, check if it meets a convective fraction requirement
 +            # consider as CE
 +
 +            if cloudElementArea >= AREA_MIN or (
 +                cloudElementArea < AREA_MIN and (
 +                    (ndimage.minimum(
 +                        cloudElement,
 +                        labels=labels)) /
 +                    float(
 +                        (ndimage.maximum(
 +                    cloudElement,
 +                    labels=labels)))) < CONVECTIVE_FRACTION):
 +
 +                # get some time information and labeling info
 +                frameTime = str(timelist[t])
 +                frameCEcounter += 1
 +                CEuniqueID = 'F' + str(frameNum) + 'CE' + str(frameCEcounter)
 +
 +                #-------------------------------------------------
 +                # textfile name for accesing CE data using MATLAB code
 +                # thisFileName = MAINDIRECTORY+'/' + (str(timelist[t])).replace(" ", "_") + CEuniqueID +'.txt'
 +                # cloudElementsTextFile = open(thisFileName,'w')
 +                #-------------------------------------------------
 +
 +                # ------ NETCDF File stuff for brightness temp stuff ----------
 +                thisFileName = MAINDIRECTORY + '/MERGnetcdfCEs/cloudElements' + \
 +                    (str(timelist[t])).replace(" ", "_") + CEuniqueID + '.nc'
 +                currNetCDFCEData = Dataset(thisFileName, 'w', format='NETCDF4')
 +                currNetCDFCEData.description = 'Cloud Element ' + CEuniqueID + ' temperature data'
 +                currNetCDFCEData.calendar = 'standard'
 +                currNetCDFCEData.conventions = 'COARDS'
 +                # dimensions
 +                currNetCDFCEData.createDimension('time', None)
 +                currNetCDFCEData.createDimension('lat', len(LAT[:, 0]))
 +                currNetCDFCEData.createDimension('lon', len(LON[0, :]))
 +                # variables
 +                tempDims = ('time', 'lat', 'lon',)
 +                times = currNetCDFCEData.createVariable(
 +                    'time', 'f8', ('time',))
 +                times.units = 'hours since ' + str(timelist[t])[:-6]
 +                latitudes = currNetCDFCEData.createVariable(
 +                    'latitude', 'f8', ('lat',))
 +                longitudes = currNetCDFCEData.createVariable(
 +                    'longitude', 'f8', ('lon',))
 +                brightnesstemp = currNetCDFCEData.createVariable(
 +                    'brightnesstemp', 'i16', tempDims)
 +                brightnesstemp.units = 'Kelvin'
 +                # NETCDF data
 +                dates = [timelist[t] + timedelta(hours=0)]
 +                times[:] = date2num(dates, units=times.units)
 +                longitudes[:] = LON[0, :]
 +                longitudes.units = "degrees_east"
 +                longitudes.long_name = "Longitude"
 +
 +                latitudes[:] = LAT[:, 0]
 +                latitudes.units = "degrees_north"
 +                latitudes.long_name = "Latitude"
 +
 +                # generate array of zeros for brightness temperature
 +                brightnesstemp1 = ma.zeros(
 +                    (1, len(latitudes), len(longitudes))).astype('int16')
 +                #-----------End most of NETCDF file stuff ---------------------
 +
 +                # if other dataset (TRMM) assumed to be a precipitation dataset
 +                # was entered
 +                if TRMMdirName:
 +                    #------------------TRMM stuff -----------------------------
 +                    fileDate = ((str(timelist[t])).replace(
 +                        " ", "")[:-8]).replace("-", "")
 +                    fileHr1 = (str(timelist[t])).replace(" ", "")[-8:-6]
 +
 +                    if int(fileHr1) % temporalRes == 0:
 +                        fileHr = fileHr1
 +                    else:
 +                        fileHr = (int(fileHr1) / temporalRes) * temporalRes
 +                    if fileHr < 10:
 +                        fileHr = '0' + str(fileHr)
 +                    else:
 +                        str(fileHr)
 +
 +                    # open TRMM file for the resolution info and to create the
 +                    # appropriate sized grid
 +                    TRMMfileName = TRMMdirName + '/3B42.' + \
 +                        fileDate + "." + str(fileHr) + ".7A.nc"
 +
 +                    TRMMData = Dataset(TRMMfileName, 'r', format='NETCDF4')
 +                    precipRate = TRMMData.variables['pcp'][:, :, :]
 +                    latsrawTRMMData = TRMMData.variables['latitude'][:]
 +                    lonsrawTRMMData = TRMMData.variables['longitude'][:]
 +                    lonsrawTRMMData[lonsrawTRMMData >
 +                                    180] = lonsrawTRMMData[lonsrawTRMMData > 180] - 360.
 +                    LONTRMM, LATTRMM = np.meshgrid(
 +                        lonsrawTRMMData, latsrawTRMMData)
 +
 +                    nygrdTRMM = len(LATTRMM[:, 0]); nxgrdTRMM = len(
 +                        LONTRMM[0, :])
 +                    precipRateMasked = ma.masked_array(
 +                        precipRate, mask=(precipRate < 0.0))
 +                    #---------regrid the TRMM data to the MERG dataset --------
 +                    # regrid using the do_regrid stuff from the Apache OCW
 +                    regriddedTRMM = ma.zeros((0, nygrd, nxgrd))
 +                    #regriddedTRMM = process.do_regrid(precipRateMasked[0,:,:], LATTRMM,  LONTRMM, LAT, LON, order=1, mdi= -999999999)
 +                    regriddedTRMM = do_regrid(
 +                        precipRateMasked[0, :, :], LATTRMM, LONTRMM, LAT, LON, order=1, mdi=-999999999)
 +                    #----------------------------------------------------------
 +
 +                    # #get the lat/lon info from cloudElement
 +                    # get the lat/lon info from the file
 +                    latCEStart = LAT[0][0]
 +                    latCEEnd = LAT[-1][0]
 +                    lonCEStart = LON[0][0]
 +                    lonCEEnd = LON[0][-1]
 +
 +                    # get the lat/lon info for TRMM data (different resolution)
 +                    latStartT = find_nearest(latsrawTRMMData, latCEStart)
 +                    latEndT = find_nearest(latsrawTRMMData, latCEEnd)
 +                    lonStartT = find_nearest(lonsrawTRMMData, lonCEStart)
 +                    lonEndT = find_nearest(lonsrawTRMMData, lonCEEnd)
 +                    latStartIndex = np.where(latsrawTRMMData == latStartT)
 +                    latEndIndex = np.where(latsrawTRMMData == latEndT)
 +                    lonStartIndex = np.where(lonsrawTRMMData == lonStartT)
 +                    lonEndIndex = np.where(lonsrawTRMMData == lonEndT)
 +
 +                    # get the relevant TRMM info
 +                    CEprecipRate = precipRate[:,
 +                                              (latStartIndex[0][0] - 1):latEndIndex[0][0],
 +                                              (lonStartIndex[0][0] - 1):lonEndIndex[0][0]]
 +                    TRMMData.close()
 +
 +                    # ------ NETCDF File info for writing TRMM CE rainfall ----
 +                    thisFileName = MAINDIRECTORY + '/TRMMnetcdfCEs/TRMM' + \
 +                        (str(timelist[t])).replace(" ", "_") + CEuniqueID + '.nc'
 +                    currNetCDFTRMMData = Dataset(
 +                        thisFileName, 'w', format='NETCDF4')
 +                    currNetCDFTRMMData.description = 'Cloud Element ' + \
 +                        CEuniqueID + ' precipitation data'
 +                    currNetCDFTRMMData.calendar = 'standard'
 +                    currNetCDFTRMMData.conventions = 'COARDS'
 +                    # dimensions
 +                    currNetCDFTRMMData.createDimension('time', None)
 +                    currNetCDFTRMMData.createDimension('lat', len(LAT[:, 0]))
 +                    currNetCDFTRMMData.createDimension('lon', len(LON[0, :]))
 +
 +                    # variables
 +                    TRMMprecip = ('time', 'lat', 'lon',)
 +                    times = currNetCDFTRMMData.createVariable(
 +                        'time', 'f8', ('time',))
 +                    times.units = 'hours since ' + str(timelist[t])[:-6]
 +                    latitude = currNetCDFTRMMData.createVariable(
 +                        'latitude', 'f8', ('lat',))
 +                    longitude = currNetCDFTRMMData.createVariable(
 +                        'longitude', 'f8', ('lon',))
 +                    rainFallacc = currNetCDFTRMMData.createVariable(
 +                        'precipitation_Accumulation', 'f8', TRMMprecip)
 +                    rainFallacc.units = 'mm'
 +
 +                    longitude[:] = LON[0, :]
 +                    longitude.units = "degrees_east"
 +                    longitude.long_name = "Longitude"
 +
 +                    latitude[:] = LAT[:, 0]
 +                    latitude.units = "degrees_north"
 +                    latitude.long_name = "Latitude"
 +
 +                    finalCETRMMvalues = ma.zeros((brightnesstemp.shape))
 +                    #-----------End most of NETCDF file stuff -----------------
 +
 +                # populate cloudElementLatLons by unpacking the original values from loc to get the actual value for lat and lon
 +                # TODO: KDW - too dirty... play with itertools.izip or zip and the enumerate with this
 +                # 			as cloudElement is masked
 +                for index, value in np.ndenumerate(cloudElement):
 +                    if value != 0:
 +                        lat_index, lon_index = index
 +                        lat_lon_tuple = (
 +                            cloudElementLat[lat_index],
 +                            cloudElementLon[lon_index],
 +                            value)
 +
 +                        # generate the comma separated file for GIS
 +                        cloudElementLatLons.append(lat_lon_tuple)
 +
 +                        # temp data for CE NETCDF file
 +                        brightnesstemp1[0, int(np.where(LAT[:, 0] == cloudElementLat[lat_index])[0]), int(
 +                            np.where(LON[0, :] == cloudElementLon[lon_index])[0])] = value
 +
 +                        if TRMMdirName:
 +                            finalCETRMMvalues[0, int(np.where(LAT[:, 0] == cloudElementLat[lat_index])[0]), int(np.where(LON[0, :] == cloudElementLon[lon_index])[
 +                                0])] = regriddedTRMM[int(np.where(LAT[:, 0] == cloudElementLat[lat_index])[0]), int(np.where(LON[0, :] == cloudElementLon[lon_index])[0])]
 +                            CETRMMList.append((cloudElementLat[lat_index],
 +                                               cloudElementLon[lon_index],
 +                                               finalCETRMMvalues[0,
 +                                                                 cloudElementLat[lat_index],
 +                                                                 cloudElementLon[lon_index]]))
 +                brightnesstemp[:] = brightnesstemp1[:]
 +                currNetCDFCEData.close()
 +
 +                if TRMMdirName:
 +
 +                    # calculate the total precip associated with the feature
 +                    for index, value in np.ndenumerate(finalCETRMMvalues):
 +                        precipTotal += value
 +                        precip.append(value)
 +
 +                    rainFallacc[:] = finalCETRMMvalues[:]
 +                    currNetCDFTRMMData.close()
 +                    TRMMnumOfBoxes = np.count_nonzero(finalCETRMMvalues)
 +                    TRMMArea = TRMMnumOfBoxes * XRES * YRES
 +                    try:
 +                        maxCEprecipRate = np.max(
 +                            finalCETRMMvalues[np.nonzero(finalCETRMMvalues)])
 +                        minCEprecipRate = np.min(
 +                            finalCETRMMvalues[np.nonzero(finalCETRMMvalues)])
 +                    except BaseException:
 +                        pass
 +
 +                # sort cloudElementLatLons by lats
 +                cloudElementLatLons.sort(key=lambda tup: tup[0])
 +
 +                # determine if the cloud element the shape
 +                cloudElementEpsilon = eccentricity(cloudElement)
 +                cloudElementsUserFile.write(
 +                    "\n\nTime is: %s" % (str(timelist[t])))
 +                cloudElementsUserFile.write("\nCEuniqueID is: %s" % CEuniqueID)
 +                latCenter, lonCenter = ndimage.measurements.center_of_mass(
 +                    cloudElement, labels=labels)
 +
 +                # latCenter and lonCenter are given according to the particular array defining this CE
 +                # so you need to convert this value to the overall domain truth
 +                latCenter = cloudElementLat[round(latCenter)]
 +                lonCenter = cloudElementLon[round(lonCenter)]
 +                cloudElementsUserFile.write(
 +                    "\nCenter (lat,lon) is: %.2f\t%.2f" %
 +                    (latCenter, lonCenter))
 +                cloudElementCenter.append(latCenter)
 +                cloudElementCenter.append(lonCenter)
 +                cloudElementsUserFile.write(
 +                    "\nNumber of boxes are: %d" % numOfBoxes)
 +                cloudElementsUserFile.write(
 +                    "\nArea is: %.4f km^2" %
 +                    (cloudElementArea))
 +                cloudElementsUserFile.write(
 +                    "\nAverage brightness temperature is: %.4f K" %
 +                    ndimage.mean(
 +                        cloudElement, labels=labels))
 +                cloudElementsUserFile.write(
 +                    "\nMin brightness temperature is: %.4f K" %
 +                    ndimage.minimum(
 +                        cloudElement, labels=labels))
 +                cloudElementsUserFile.write(
 +                    "\nMax brightness temperature is: %.4f K" %
 +                    ndimage.maximum(
 +                        cloudElement, labels=labels))
 +                cloudElementsUserFile.write(
 +                    "\nBrightness temperature variance is: %.4f K" %
 +                    ndimage.variance(
 +                        cloudElement, labels=labels))
 +                cloudElementsUserFile.write(
 +                    "\nConvective fraction is: %.4f " %
 +                    (((ndimage.minimum(
 +                        cloudElement,
 +                        labels=labels)) /
 +                        float(
 +                        (ndimage.maximum(
 +                            cloudElement,
 +                            labels=labels)))) *
 +                        100.0))
 +                cloudElementsUserFile.write(
 +                    "\nEccentricity is: %.4f " %
 +                    (cloudElementEpsilon))
 +                # populate the dictionary
 +                if TRMMdirName:
 +                    cloudElementDict = {
 +                        'uniqueID': CEuniqueID,
 +                        'cloudElementTime': timelist[t],
 +                        'cloudElementLatLon': cloudElementLatLons,
 +                        'cloudElementCenter': cloudElementCenter,
 +                        'cloudElementArea': cloudElementArea,
 +                        'cloudElementEccentricity': cloudElementEpsilon,
 +                        'cloudElementTmax': TIR_max,
 +                        'cloudElementTmin': TIR_min,
 +                        'cloudElementPrecipTotal': precipTotal,
 +                        'cloudElementLatLonTRMM': CETRMMList,
 +                        'TRMMArea': TRMMArea,
 +                        'CETRMMmax': maxCEprecipRate,
 +                        'CETRMMmin': minCEprecipRate}
 +                else:
 +                    cloudElementDict = {
 +                        'uniqueID': CEuniqueID,
 +                        'cloudElementTime': timelist[t],
 +                        'cloudElementLatLon': cloudElementLatLons,
 +                        'cloudElementCenter': cloudElementCenter,
 +                        'cloudElementArea': cloudElementArea,
 +                        'cloudElementEccentricity': cloudElementEpsilon,
 +                        'cloudElementTmax': TIR_max,
 +                        'cloudElementTmin': TIR_min,
 +                    }
 +
 +                # current frame list of CEs
 +                currFrameCEs.append(cloudElementDict)
 +
 +                # draw the graph node
 +                CLOUD_ELEMENT_GRAPH.add_node(CEuniqueID, cloudElementDict)
 +
 +                if frameNum != 1:
 +                    for cloudElementDict in prevFrameCEs:
 +                        thisCElen = len(cloudElementLatLons)
 +                        percentageOverlap, areaOverlap = cloudElementOverlap(
 +                            cloudElementLatLons, cloudElementDict['cloudElementLatLon'])
 +
 +                        # change weights to integers because the built in shortest path chokes on floating pts according to Networkx doc
 +                        # according to Goyens et al, two CEs are considered
 +                        # related if there is atleast 95% overlap between them
 +                        # for consecutive imgs a max of 2 hrs apart
 +                        if percentageOverlap >= 0.95:
 +                            CLOUD_ELEMENT_GRAPH.add_edge(
 +                                cloudElementDict['uniqueID'], CEuniqueID, weight=edgeWeight[0])
 +
 +                        elif percentageOverlap >= 0.90 and percentageOverlap < 0.95:
 +                            CLOUD_ELEMENT_GRAPH.add_edge(
 +                                cloudElementDict['uniqueID'], CEuniqueID, weight=edgeWeight[1])
 +
 +                        elif areaOverlap >= MIN_OVERLAP:
 +                            CLOUD_ELEMENT_GRAPH.add_edge(
 +                                cloudElementDict['uniqueID'], CEuniqueID, weight=edgeWeight[2])
 +
 +                else:
 +                    # TODO: remove this else as we only wish for the CE details
 +                    # ensure only the non-zero elements are considered
 +                    # store intel in allCE file
 +                    labels, _ = ndimage.label(cloudElement)
 +                    cloudElementsFile.write(
 +                        "\n-----------------------------------------------")
 +                    cloudElementsFile.write(
 +                        "\n\nTime is: %s" % (str(timelist[t])))
 +                    # cloudElementLat = LAT[loc[0],0]
 +                    # cloudElementLon = LON[0,loc[1]]
 +
 +                    # populate cloudElementLatLons by unpacking the original values from loc
 +                    # TODO: KDW - too dirty... play with itertools.izip or zip and the enumerate with this
 +                    # 			as cloudElement is masked
 +                    for index, value in np.ndenumerate(cloudElement):
 +                        if value != 0:
 +                            lat_index, lon_index = index
 +                            lat_lon_tuple = (
 +                                cloudElementLat[lat_index],
 +                                cloudElementLon[lon_index])
 +                            cloudElementLatLons.append(lat_lon_tuple)
 +
 +                    cloudElementsFile.write(
 +                        "\nLocation of rejected CE (lat,lon) points are: %s" %
 +                        cloudElementLatLons)
 +                    # latCenter and lonCenter are given according to the particular array defining this CE
 +                    # so you need to convert this value to the overall domain
 +                    # truth
 +                    latCenter, lonCenter = ndimage.measurements.center_of_mass(
 +                        cloudElement, labels=labels)
 +                    latCenter = cloudElementLat[round(latCenter)]
 +                    lonCenter = cloudElementLon[round(lonCenter)]
 +                    cloudElementsFile.write(
 +                        "\nCenter (lat,lon) is: %.2f\t%.2f" %
 +                        (latCenter, lonCenter))
 +                    cloudElementsFile.write(
 +                        "\nNumber of boxes are: %d" % numOfBoxes)
 +                    cloudElementsFile.write(
 +                        "\nArea is: %.4f km^2" %
 +                        (cloudElementArea))
 +                    cloudElementsFile.write(
 +                        "\nAverage brightness temperature is: %.4f K" %
 +                        ndimage.mean(
 +                            cloudElement, labels=labels))
 +                    cloudElementsFile.write(
 +                        "\nMin brightness temperature is: %.4f K" %
 +                        ndimage.minimum(
 +                            cloudElement, labels=labels))
 +                    cloudElementsFile.write(
 +                        "\nMax brightness temperature is: %.4f K" %
 +                        ndimage.maximum(
 +                            cloudElement, labels=labels))
 +                    cloudElementsFile.write(
 +                        "\nBrightness temperature variance is: %.4f K" %
 +                        ndimage.variance(
 +                            cloudElement, labels=labels))
 +                    cloudElementsFile.write(
 +                        "\nConvective fraction is: %.4f " %
 +                        (((ndimage.minimum(
 +                            cloudElement,
 +                            labels=labels)) /
 +                            float(
 +                            (ndimage.maximum(
 +                                cloudElement,
 +                                labels=labels)))) *
 +                            100.0))
 +                    cloudElementsFile.write(
 +                        "\nEccentricity is: %.4f " %
 +                        (cloudElementEpsilon))
 +                    cloudElementsFile.write(
 +                        "\n-----------------------------------------------")
 +
 +            # reset list for the next CE
 +            nodeExist = False
 +            cloudElementCenter = []
 +            cloudElement = []
 +            cloudElementLat = []
 +            cloudElementLon = []
 +            cloudElementLatLons = []
 +            brightnesstemp1 = []
 +            brightnesstemp = []
 +            finalCETRMMvalues = []
 +            CEprecipRate = []
 +            CETRMMList = []
 +            precipTotal = 0.0
 +            precip = []
 +            TRMMCloudElementLatLons = []
 +
 +        # reset for the next time
 +        prevFrameCEs = []
 +        prevFrameCEs = currFrameCEs
 +        currFrameCEs = []
 +
 +    cloudElementsFile.close
 +    cloudElementsUserFile.close
 +    # if using ARCGIS data store code, uncomment this file close line
 +    # cloudElementsTextFile.close
 +
 +    # clean up graph - remove parent and childless nodes
 +    outAndInDeg = CLOUD_ELEMENT_GRAPH.degree_iter()
 +    toRemove = [node[0] for node in outAndInDeg if node[1] < 1]
 +    CLOUD_ELEMENT_GRAPH.remove_nodes_from(toRemove)
 +
 +    print(("number of nodes are: %s" % CLOUD_ELEMENT_GRAPH.number_of_nodes()))
 +    print(("number of edges are: %s" % CLOUD_ELEMENT_GRAPH.number_of_edges()))
 +    print(("*" * 80))
 +
 +    # hierachial graph output
 +    graphTitle = "Cloud Elements observed over somewhere from 0000Z to 0000Z"
 +    #drawGraph(CLOUD_ELEMENT_GRAPH, graphTitle, edgeWeight)
 +
 +    return CLOUD_ELEMENT_GRAPH
  #******************************************************************
 +
 +
  def findPrecipRate(TRMMdirName, timelist):
 -	''' 
 -	Purpose:: 
 -		Determines the precipitation rates for MCSs found if TRMMdirName was not entered in findCloudElements this can be used
 -
 -	Input:: 
 -		TRMMdirName: a string representing the directory for the original TRMM netCDF files
 -		timelist: a list of python datatimes
 -
 -	Output:: a list of dictionary of the TRMM data 
 -		NB: also creates netCDF with TRMM data for each CE (for post processing) index
 -			in MAINDIRECTORY/TRMMnetcdfCEs
 -   
 -	Assumptions:: Assumes that findCloudElements was run without the TRMMdirName value 
 - 
 -	'''
 -	allCEnodesTRMMdata =[]
 -	TRMMdataDict={}
 -	precipTotal = 0.0
 -
 -	os.chdir((MAINDIRECTORY+'/MERGnetcdfCEs/'))
 -	imgFilename = ''
 -	temporalRes = 3 #3 hours for TRMM
 -	
 -	#sort files
 -	files = filter(os.path.isfile, glob.glob("*.nc"))
 -	files.sort(key=lambda x: os.path.getmtime(x))
 -	
 -	for afile in files:
 -		fullFname = os.path.splitext(afile)[0]
 -		noFrameExtension = (fullFname.replace("_","")).split('F')[0]
 -		CEuniqueID = 'F' +(fullFname.replace("_","")).split('F')[1]
 -		fileDateTimeChar = (noFrameExtension.replace(":","")).split('s')[1]
 -		fileDateTime = fileDateTimeChar.replace("-","")
 -		fileDate = fileDateTime[:-6]
 -		fileHr1=fileDateTime[-6:-4]
 -
 -		cloudElementData = Dataset(afile,'r', format='NETCDF4')
 -		brightnesstemp1 = cloudElementData.variables['brightnesstemp'][:,:,:] 
 -		latsrawCloudElements = cloudElementData.variables['latitude'][:]
 -		lonsrawCloudElements = cloudElementData.variables['longitude'][:]
 -		
 -		brightnesstemp = np.squeeze(brightnesstemp1, axis=0)
 -		
 -		fileHr = fileHr1 if int(fileHr1) % temporalRes == 0 else (int(fileHr1) / temporalRes) * temporalRes
 -		fileHr = '0' + str(fileHr) if fileHr < 10 else str(fileHr)
 -
 -		TRMMfileName = TRMMdirName+"/3B42."+ str(fileDate) + "."+ fileHr +".7A.nc"
 -		TRMMData = Dataset(TRMMfileName,'r', format='NETCDF4')
 -		precipRate = TRMMData.variables['pcp'][:,:,:]
 -		latsrawTRMMData = TRMMData.variables['latitude'][:]
 -		lonsrawTRMMData = TRMMData.variables['longitude'][:]
 -		lonsrawTRMMData[lonsrawTRMMData > 180] = lonsrawTRMMData[lonsrawTRMMData>180] - 360.
 -		LONTRMM, LATTRMM = np.meshgrid(lonsrawTRMMData, latsrawTRMMData)
 -
 -		#nygrdTRMM = len(LATTRMM[:,0]); nxgrd = len(LONTRMM[0,:])
 -		nygrd = len(LAT[:, 0]); nxgrd = len(LON[0, :])
 -
 -		precipRateMasked = ma.masked_array(precipRate, mask=(precipRate < 0.0))
 -		#---------regrid the TRMM data to the MERG dataset ----------------------------------
 -		#regrid using the do_regrid stuff from the Apache OCW 
 -		regriddedTRMM = ma.zeros((0, nygrd, nxgrd))
 -		regriddedTRMM = do_regrid(precipRateMasked[0,:,:], LATTRMM,  LONTRMM, LAT, LON, order=1, mdi= -999999999)
 -		#regriddedTRMM = process.do_regrid(precipRateMasked[0,:,:], LATTRMM,  LONTRMM, LAT, LON, order=1, mdi= -999999999)
 -		#----------------------------------------------------------------------------------
 -
 -		# #get the lat/lon info from
 -		latCEStart = LAT[0][0]
 -		latCEEnd = LAT[-1][0]
 -		lonCEStart = LON[0][0]
 -		lonCEEnd = LON[0][-1]
 -
 -		#get the lat/lon info for TRMM data (different resolution)
 -		latStartT = find_nearest(latsrawTRMMData, latCEStart)
 -		latEndT = find_nearest(latsrawTRMMData, latCEEnd)
 -		lonStartT = find_nearest(lonsrawTRMMData, lonCEStart)
 -		lonEndT = find_nearest(lonsrawTRMMData, lonCEEnd)
 -		latStartIndex = np.where(latsrawTRMMData == latStartT)
 -		latEndIndex = np.where(latsrawTRMMData == latEndT)
 -		lonStartIndex = np.where(lonsrawTRMMData == lonStartT)
 -		lonEndIndex = np.where(lonsrawTRMMData == lonEndT)
 -
 -		#get the relevant TRMM info 
 -		CEprecipRate = precipRate[:,(latStartIndex[0][0]-1):latEndIndex[0][0],(lonStartIndex[0][0]-1):lonEndIndex[0][0]]
 -		TRMMData.close()
 -			
 -		
 -		# ------ NETCDF File stuff ------------------------------------
 -		thisFileName = MAINDIRECTORY+'/TRMMnetcdfCEs/'+ fileDateTime + CEuniqueID +'.nc'
 -		currNetCDFTRMMData = Dataset(thisFileName, 'w', format='NETCDF4')
 -		currNetCDFTRMMData.description = 'Cloud Element '+CEuniqueID + ' rainfall data'
 -		currNetCDFTRMMData.calendar = 'standard'
 -		currNetCDFTRMMData.conventions = 'COARDS'
 -		# dimensions
 -		currNetCDFTRMMData.createDimension('time', None)
 -		currNetCDFTRMMData.createDimension('lat', len(LAT[:,0]))
 -		currNetCDFTRMMData.createDimension('lon', len(LON[0,:]))
 -		# variables
 -		TRMMprecip = ('time','lat', 'lon',)
 -		times = currNetCDFTRMMData.createVariable('time', 'f8', ('time',))
 -		times.units = 'hours since '+ fileDateTime[:-6] 
 -		latitude = currNetCDFTRMMData.createVariable('latitude', 'f8', ('lat',))
 -		longitude = currNetCDFTRMMData.createVariable('longitude', 'f8', ('lon',))
 -		rainFallacc = currNetCDFTRMMData.createVariable('precipitation_Accumulation', 'f8',TRMMprecip )
 -		rainFallacc.units = 'mm'
 -
 -		longitude[:] = LON[0,:]
 -		longitude.units = "degrees_east" 
 -		longitude.long_name = "Longitude" 
 -
 -		latitude[:] =  LAT[:,0]
 -		latitude.units = "degrees_north"
 -		latitude.long_name ="Latitude"
 -
 -		finalCETRMMvalues = ma.zeros((brightnesstemp1.shape))
 -		#-----------End most of NETCDF file stuff ------------------------------------	
 -		for index,value in np.ndenumerate(brightnesstemp):
 -			if value > 0:
 -				lat_index, lon_index = index
 -				currTimeValue = 0
 -				finalCETRMMvalues[0,lat_index,lon_index] = regriddedTRMM[int(np.where(LAT[:,0]==LAT[lat_index,0])[0]), int(np.where(LON[0,:]==LON[0,lon_index])[0])]
 -
 -		rainFallacc[:] = finalCETRMMvalues
 -		currNetCDFTRMMData.close()
 -
 -		for index, value in np.ndenumerate(finalCETRMMvalues):
 -			precipTotal += value 
 -
 -		TRMMnumOfBoxes = np.count_nonzero(finalCETRMMvalues)
 -		TRMMArea = TRMMnumOfBoxes*XRES*YRES	
 -
 -		try:
 -			minCEprecipRate = np.min(finalCETRMMvalues[np.nonzero(finalCETRMMvalues)])
 -		except:
 -			minCEprecipRate = 0.0
 -
 -		try:
 -			maxCEprecipRate = np.max(finalCETRMMvalues[np.nonzero(finalCETRMMvalues)])
 -		except:
 -			maxCEprecipRate = 0.0
 -
 -		#add info to CLOUDELEMENTSGRAPH
 -		#TODO try block
 -		for eachdict in CLOUD_ELEMENT_GRAPH.nodes(CEuniqueID):
 -			if eachdict[1]['uniqueID'] == CEuniqueID:
 -				if not 'cloudElementPrecipTotal' in eachdict[1].keys():
 -					eachdict[1]['cloudElementPrecipTotal'] = precipTotal
 -				if not 'cloudElementLatLonTRMM' in eachdict[1].keys():
 -					eachdict[1]['cloudElementLatLonTRMM'] = finalCETRMMvalues
 -				if not 'TRMMArea' in eachdict[1].keys():
 -					eachdict[1]['TRMMArea'] = TRMMArea
 -				if not 'CETRMMmin' in eachdict[1].keys():
 -					eachdict[1]['CETRMMmin'] = minCEprecipRate
 -				if not 'CETRMMmax' in eachdict[1].keys():
 -					eachdict[1]['CETRMMmax'] = maxCEprecipRate
 -
 -		#clean up
 -		precipTotal = 0.0
 -		latsrawTRMMData =[]
 -		lonsrawTRMMData = []
 -		latsrawCloudElements=[]
 -		lonsrawCloudElements=[]
 -		finalCETRMMvalues =[]
 -		CEprecipRate =[]
 -		brightnesstemp =[]
 -		TRMMdataDict ={}
 -
 -	return allCEnodesTRMMdata
 -#******************************************************************	
 +    '''
 +    Purpose::
 +            Determines the precipitation rates for MCSs found if TRMMdirName was not entered in findCloudElements this can be used
 +
 +    Input::
 +            TRMMdirName: a string representing the directory for the original TRMM netCDF files
 +            timelist: a list of python datatimes
 +
 +    Output:: a list of dictionary of the TRMM data
 +            NB: also creates netCDF with TRMM data for each CE (for post processing) index
 +                    in MAINDIRECTORY/TRMMnetcdfCEs
 +
 +    Assumptions:: Assumes that findCloudElements was run without the TRMMdirName value
 +
 +    '''
 +    allCEnodesTRMMdata = []
 +    TRMMdataDict = {}
 +    precipTotal = 0.0
 +
 +    os.chdir((MAINDIRECTORY + '/MERGnetcdfCEs/'))
 +    imgFilename = ''
 +    temporalRes = 3  # 3 hours for TRMM
 +
 +    # sort files
 +    files = list(filter(os.path.isfile, glob.glob("*.nc")))
 +    files.sort(key=lambda x: os.path.getmtime(x))
 +
 +    for afile in files:
 +        fullFname = os.path.splitext(afile)[0]
 +        noFrameExtension = (fullFname.replace("_", "")).split('F')[0]
 +        CEuniqueID = 'F' + (fullFname.replace("_", "")).split('F')[1]
 +        fileDateTimeChar = (noFrameExtension.replace(":", "")).split('s')[1]
 +        fileDateTime = fileDateTimeChar.replace("-", "")
 +        fileDate = fileDateTime[:-6]
 +        fileHr1 = fileDateTime[-6:-4]
 +
 +        cloudElementData = Dataset(afile, 'r', format='NETCDF4')
 +        brightnesstemp1 = cloudElementData.variables['brightnesstemp'][:, :, :]
 +        latsrawCloudElements = cloudElementData.variables['latitude'][:]
 +        lonsrawCloudElements = cloudElementData.variables['longitude'][:]
 +
 +        brightnesstemp = np.squeeze(brightnesstemp1, axis=0)
 +
-         if int(fileHr1) % temporalRes == 0:
-             fileHr = fileHr1
-         else:
-             fileHr = (int(fileHr1) / temporalRes) * temporalRes
- 
-         if fileHr < 10:
-             fileHr = '0' + str(fileHr)
-         else:
-             str(fileHr)
++        fileHr = fileHr1 if int(fileHr1) % temporalRes == 0 else (int(fileHr1) / temporalRes) * temporalRes
++        fileHr = '0' + str(fileHr) if fileHr < 10 else str(fileHr)
 +
-         TRMMfileName = TRMMdirName + "/3B42." + \
-             str(fileDate) + "." + str(fileHr) + ".7A.nc"
++        TRMMfileName = TRMMdirName + "/3B42." + str(fileDate) + "." + fileHr + ".7A.nc"
 +        TRMMData = Dataset(TRMMfileName, 'r', format='NETCDF4')
 +        precipRate = TRMMData.variables['pcp'][:, :, :]
 +        latsrawTRMMData = TRMMData.variables['latitude'][:]
 +        lonsrawTRMMData = TRMMData.variables['longitude'][:]
 +        lonsrawTRMMData[lonsrawTRMMData >
 +                        180] = lonsrawTRMMData[lonsrawTRMMData > 180] - 360.
 +        LONTRMM, LATTRMM = np.meshgrid(lonsrawTRMMData, latsrawTRMMData)
 +
 +        #nygrdTRMM = len(LATTRMM[:,0]); nxgrd = len(LONTRMM[0,:])
 +        nygrd = len(LAT[:, 0]); nxgrd = len(LON[0, :])
 +
 +        precipRateMasked = ma.masked_array(precipRate, mask=(precipRate < 0.0))
 +        #---------regrid the TRMM data to the MERG dataset --------------------
 +        # regrid using the do_regrid stuff from the Apache OCW
 +        regriddedTRMM = ma.zeros((0, nygrd, nxgrd))
 +        regriddedTRMM = do_regrid(
 +            precipRateMasked[0, :, :], LATTRMM, LONTRMM, LAT, LON, order=1, mdi=-999999999)
 +        #regriddedTRMM = process.do_regrid(precipRateMasked[0,:,:], LATTRMM,  LONTRMM, LAT, LON, order=1, mdi= -999999999)
 +        #----------------------------------------------------------------------
 +
 +        # #get the lat/lon info from
 +        latCEStart = LAT[0][0]
 +        latCEEnd = LAT[-1][0]
 +        lonCEStart = LON[0][0]
 +        lonCEEnd = LON[0][-1]
 +
 +        # get the lat/lon info for TRMM data (different resolution)
 +        latStartT = find_nearest(latsrawTRMMData, latCEStart)
 +        latEndT = find_nearest(latsrawTRMMData, latCEEnd)
 +        lonStartT = find_nearest(lonsrawTRMMData, lonCEStart)
 +        lonEndT = find_nearest(lonsrawTRMMData, lonCEEnd)
 +        latStartIndex = np.where(latsrawTRMMData == latStartT)
 +        latEndIndex = np.where(latsrawTRMMData == latEndT)
 +        lonStartIndex = np.where(lonsrawTRMMData == lonStartT)
 +        lonEndIndex = np.where(lonsrawTRMMData == lonEndT)
 +
 +        # get the relevant TRMM info
 +        CEprecipRate = precipRate[:,
 +                                  (latStartIndex[0][0] - 1):latEndIndex[0][0],
 +                                  (lonStartIndex[0][0] - 1):lonEndIndex[0][0]]
 +        TRMMData.close()
 +
 +        # ------ NETCDF File stuff ------------------------------------
 +        thisFileName = MAINDIRECTORY + '/TRMMnetcdfCEs/' + \
 +            fileDateTime + CEuniqueID + '.nc'
 +        currNetCDFTRMMData = Dataset(thisFileName, 'w', format='NETCDF4')
 +        currNetCDFTRMMData.description = 'Cloud Element ' + CEuniqueID + ' rainfall data'
 +        currNetCDFTRMMData.calendar = 'standard'
 +        currNetCDFTRMMData.conventions = 'COARDS'
 +        # dimensions
 +        currNetCDFTRMMData.createDimension('time', None)
 +        currNetCDFTRMMData.createDimension('lat', len(LAT[:, 0]))
 +        currNetCDFTRMMData.createDimension('lon', len(LON[0, :]))
 +        # variables
 +        TRMMprecip = ('time', 'lat', 'lon',)
 +        times = currNetCDFTRMMData.createVariable('time', 'f8', ('time',))
 +        times.units = 'hours since ' + fileDateTime[:-6]
 +        latitude = currNetCDFTRMMData.createVariable(
 +            'latitude', 'f8', ('lat',))
 +        longitude = currNetCDFTRMMData.createVariable(
 +            'longitude', 'f8', ('lon',))
 +        rainFallacc = currNetCDFTRMMData.createVariable(
 +            'precipitation_Accumulation', 'f8', TRMMprecip)
 +        rainFallacc.units = 'mm'
 +
 +        longitude[:] = LON[0, :]
 +        longitude.units = "degrees_east"
 +        longitude.long_name = "Longitude"
 +
 +        latitude[:] = LAT[:, 0]
 +        latitude.units = "degrees_north"
 +        latitude.long_name = "Latitude"
 +
 +        finalCETRMMvalues = ma.zeros((brightnesstemp1.shape))
 +        #-----------End most of NETCDF file stuff -----------------------------
 +        for index, value in np.ndenumerate(brightnesstemp):
 +            lat_index, lon_index = index
 +            currTimeValue = 0
 +            if value > 0:
 +
 +                finalCETRMMvalues[0, lat_index, lon_index] = regriddedTRMM[int(np.where(
 +                    LAT[:, 0] == LAT[lat_index, 0])[0]), int(np.where(LON[0, :] == LON[0, lon_index])[0])]
 +
 +        rainFallacc[:] = finalCETRMMvalues
 +        currNetCDFTRMMData.close()
 +
 +        for index, value in np.ndenumerate(finalCETRMMvalues):
 +            precipTotal += value
 +
 +        TRMMnumOfBoxes = np.count_nonzero(finalCETRMMvalues)
 +        TRMMArea = TRMMnumOfBoxes * XRES * YRES
 +
 +        try:
 +            minCEprecipRate = np.min(
 +                finalCETRMMvalues[np.nonzero(finalCETRMMvalues)])
 +        except BaseException:
 +            minCEprecipRate = 0.0
 +
 +        try:
 +            maxCEprecipRate = np.max(
 +                finalCETRMMvalues[np.nonzero(finalCETRMMvalues)])
 +        except BaseException:
 +            maxCEprecipRate = 0.0
 +
 +        # add info to CLOUDELEMENTSGRAPH
 +        # TODO try block
 +        for eachdict in CLOUD_ELEMENT_GRAPH.nodes(CEuniqueID):
 +            if eachdict[1]['uniqueID'] == CEuniqueID:
 +                if 'cloudElementPrecipTotal' not in list(eachdict[1].keys()):
 +                    eachdict[1]['cloudElementPrecipTotal'] = precipTotal
 +                if 'cloudElementLatLonTRMM' not in list(eachdict[1].keys()):
 +                    eachdict[1]['cloudElementLatLonTRMM'] = finalCETRMMvalues
 +                if 'TRMMArea' not in list(eachdict[1].keys()):
 +                    eachdict[1]['TRMMArea'] = TRMMArea
 +                if 'CETRMMmin' not in list(eachdict[1].keys()):
 +                    eachdict[1]['CETRMMmin'] = minCEprecipRate
 +                if 'CETRMMmax' not in list(eachdict[1].keys()):
 +                    eachdict[1]['CETRMMmax'] = maxCEprecipRate
 +
 +        # clean up
 +        precipTotal = 0.0
 +        latsrawTRMMData = []
 +        lonsrawTRMMData = []
 +        latsrawCloudElements = []
 +        lonsrawCloudElements = []
 +        finalCETRMMvalues = []
 +        CEprecipRate = []
 +        brightnesstemp = []
 +        TRMMdataDict = {}
 +
 +    return allCEnodesTRMMdata
- #******************************************************************
 +
 +
  def findCloudClusters(CEGraph):
 -	'''
 -	Purpose:: 
 -		Determines the cloud clusters properties from the subgraphs in 
 -		the graph i.e. prunes the graph according to the minimum depth
 -
 -	Input:: 
 -		CEGraph: a Networkx directed graph of the CEs with weighted edges
 -		according the area overlap between nodes (CEs) of consectuive frames
 -	
 -	Output:: 
 -		PRUNED_GRAPH: a Networkx directed graph of with CCs/ MCSs
 -
 -	'''
 -
 -	seenNode = []
 -	allMCSLists =[]
 -	pathDictList =[]
 -	pathList=[]
 -
 -	cloudClustersFile = open((MAINDIRECTORY+'/textFiles/cloudClusters.txt'),'wb')
 -	
 -	for eachNode in CEGraph:
 -		#check if the node has been seen before
 -		if eachNode not in dict(enumerate(zip(*seenNode))):
 -			#look for all trees associated with node as the root
 -			thisPathDistanceAndLength = nx.single_source_dijkstra(CEGraph, eachNode)
 -			#determine the actual shortestPath and minimum depth/length
 -			maxDepthAndMinPath = findMaxDepthAndMinPath(thisPathDistanceAndLength)
 -			if maxDepthAndMinPath:
 -				maxPathLength = maxDepthAndMinPath[0] 
 -				shortestPath = maxDepthAndMinPath[1]
 -				
 -				#add nodes and paths to PRUNED_GRAPH
 -				for i in xrange(len(shortestPath)):
 -					if PRUNED_GRAPH.has_node(shortestPath[i]) is False:
 -						PRUNED_GRAPH.add_node(shortestPath[i])
 -						
 -					#add edge if necessary
 -					if i < (len(shortestPath)-1) and PRUNED_GRAPH.has_edge(shortestPath[i], shortestPath[i+1]) is False:
 -						prunedGraphEdgeweight = CEGraph.get_edge_data(shortestPath[i], shortestPath[i+1])['weight']
 -						PRUNED_GRAPH.add_edge(shortestPath[i], shortestPath[i+1], weight=prunedGraphEdgeweight)
 -
 -				#note information in a file for consideration later i.e. checking to see if it works
 -				cloudClustersFile.write("\nSubtree pathlength is %d and path is %s" %(maxPathLength, shortestPath))
 -				#update seenNode info
 -				seenNode.append(shortestPath)	
 -
 -	print "pruned graph"
 -	print "number of nodes are: ", PRUNED_GRAPH.number_of_nodes()
 -	print "number of edges are: ", PRUNED_GRAPH.number_of_edges()
 -	print ("*"*80)		
 -					
 -	graphTitle = "Cloud Clusters observed over somewhere during sometime"
 -	#drawGraph(PRUNED_GRAPH, graphTitle, edgeWeight)
 -	cloudClustersFile.close
 -	
 -	return PRUNED_GRAPH  
 +    '''
 +    Purpose::
 +            Determines the cloud clusters properties from the subgraphs in
 +            the graph i.e. prunes the graph according to the minimum depth
 +
 +    Input::
 +            CEGraph: a Networkx directed graph of the CEs with weighted edges
 +            according the area overlap between nodes (CEs) of consectuive frames
 +
 +    Output::
 +            PRUNED_GRAPH: a Networkx directed graph of with CCs/ MCSs
 +    '''
 +
 +    seenNode = []
 +    allMCSLists = []
 +    pathDictList = []
 +    pathList = []
 +
 +    cloudClustersFile = open(
 +        (MAINDIRECTORY + '/textFiles/cloudClusters.txt'), 'wb')
 +
 +    for eachNode in CEGraph:
 +        # check if the node has been seen before
 +        if eachNode not in dict(enumerate(zip(*seenNode))):
 +            # look for all trees associated with node as the root
 +            thisPathDistanceAndLength = nx.single_source_dijkstra(
 +                CEGraph, eachNode)
 +            # determine the actual shortestPath and minimum depth/length
 +            maxDepthAndMinPath = findMaxDepthAndMinPath(
 +                thisPathDistanceAndLength)
 +            if maxDepthAndMinPath:
 +                maxPathLength = maxDepthAndMinPath[0]
 +                shortestPath = maxDepthAndMinPath[1]
 +
 +                # add nodes and paths to PRUNED_GRAPH
 +                for i in range(len(shortestPath)):
 +                    if PRUNED_GRAPH.has_node(shortestPath[i]) is False:
 +                        PRUNED_GRAPH.add_node(shortestPath[i])
 +
 +                    # add edge if necessary
 +                    if i < (len(shortestPath) - 1) and PRUNED_GRAPH.has_edge(
 +                            shortestPath[i], shortestPath[i + 1]) is False:
 +                        prunedGraphEdgeweight = CEGraph.get_edge_data(
 +                            shortestPath[i], shortestPath[i + 1])['weight']
 +                        PRUNED_GRAPH.add_edge(
 +                            shortestPath[i], shortestPath[i + 1], weight=prunedGraphEdgeweight)
 +
 +                # note information in a file for consideration later i.e.
 +                # checking to see if it works
 +                cloudClustersFile.write(
 +                    "\nSubtree pathlength is %d and path is %s" %
 +                    (maxPathLength, shortestPath))
 +                # update seenNode info
 +                seenNode.append(shortestPath)
 +
 +    print("pruned graph")
-     print(("number of nodes are: %s", PRUNED_GRAPH.number_of_nodes()))
-     print(("number of edges are: %s", PRUNED_GRAPH.number_of_edges()))
-     print(("*" * 80))
++    print("number of nodes are: %s", PRUNED_GRAPH.number_of_nodes())
++    print("number of edges are: %s", PRUNED_GRAPH.number_of_edges())
++    print("*" * 80)
 +
 +    graphTitle = "Cloud Clusters observed over somewhere during sometime"
 +    #drawGraph(PRUNED_GRAPH, graphTitle, edgeWeight)
 +    cloudClustersFile.close
 +
 +    return PRUNED_GRAPH
  #******************************************************************
 -def findMCC (prunedGraph):
 -	'''
 -	Purpose:: 
 -		Determines if subtree is a MCC according to Laurent et al 1998 criteria
 -
 -	Input:: 
 -		prunedGraph: a Networkx Graph representing the CCs 
 -
 -	Output:: 
 -		finalMCCList: a list of list of tuples representing a MCC
 -			 
 -	Assumptions: 
 -		frames are ordered and are equally distributed in time e.g. hrly satellite images
 - 
 -	'''
 -	MCCList = []
 -	MCSList = []
 -	definiteMCC = []
 -	definiteMCS = []
 -	eachList =[]
 -	eachMCCList =[]
 -	maturing = False
 -	decaying = False
 -	fNode = ''
 -	lNode = ''
 -	removeList =[]
 -	imgCount = 0
 -	imgTitle =''
 -	
 -	maxShieldNode = ''
 -	orderedPath =[]
 -	treeTraversalList =[]
 -	definiteMCCFlag = False
 -	unDirGraph = nx.Graph()
 -	aSubGraph = nx.DiGraph()
 -	definiteMCSFlag = False
 -
 -	
 -	#connected_components is not available for DiGraph, so generate graph as undirected 
 -	unDirGraph = PRUNED_GRAPH.to_undirected()
 -	subGraph = nx.connected_component_subgraphs(unDirGraph)
 -
 -	#for each path in the subgraphs determined
 -	for path in subGraph:
 -		#definite is a subTree provided the duration is longer than 3 hours
 -
 -		if len(path.nodes()) > MIN_MCS_DURATION:
 -			orderedPath = path.nodes()
 -			orderedPath.sort(key=lambda item:(len(item.split('C')[0]), item.split('C')[0]))
 -			#definiteMCS.append(orderedPath)
 -
 -			#build back DiGraph for checking purposes/paper purposes
 -			aSubGraph.add_nodes_from(path.nodes())	
 -			for eachNode in path.nodes():
 -				if prunedGraph.predecessors(eachNode):
 -					for node in prunedGraph.predecessors(eachNode):
 -						aSubGraph.add_edge(node,eachNode,weight=edgeWeight[0])
 -
 -				if prunedGraph.successors(eachNode):
 -					for node in prunedGraph.successors(eachNode):
 -						aSubGraph.add_edge(eachNode,node,weight=edgeWeight[0])
 -			imgTitle = 'CC'+str(imgCount+1)
 -			#drawGraph(aSubGraph, imgTitle, edgeWeight) #for eachNode in path:
 -			imgCount +=1
 -			#----------end build back ---------------------------------------------
 -
 -			mergeList, splitList = hasMergesOrSplits(path)	
 -			#add node behavior regarding neutral, merge, split or both
 -			for node in path:
 -				if node in mergeList and node in splitList:
 -					addNodeBehaviorIdentifier(node,'B')
 -				elif node in mergeList and not node in splitList:
 -					addNodeBehaviorIdentifier(node,'M')
 -				elif node in splitList and not node in mergeList:
 -					addNodeBehaviorIdentifier(node,'S')
 -				else:
 -					addNodeBehaviorIdentifier(node,'N')
 -			
 -
 -			#Do the first part of checking for the MCC feature
 -			#find the path
 -			treeTraversalList = traverseTree(aSubGraph, orderedPath[0],[], set(), [])
 -			#print "treeTraversalList is ", treeTraversalList
 -			#check the nodes to determine if a MCC on just the area criteria (consecutive nodes meeting the area and temp requirements)
 -			MCCList = checkedNodesMCC(prunedGraph, treeTraversalList)
 -			for aDict in MCCList:
 -				for eachNode in aDict["fullMCSMCC"]:
 -					addNodeMCSIdentifier(eachNode[0],eachNode[1])
 -				
 -			#do check for if MCCs overlap
 -			if MCCList:
 -				if len(MCCList) > 1:
 -					for count in range(len(MCCList)): #for eachDict in MCCList:
 -						#if there are more than two lists
 -						if count >= 1:
 -							#and the first node in this list
 -							eachList = list(x[0] for x in MCCList[count]["possMCCList"])
 -							eachList.sort(key=lambda nodeID:(len(nodeID.split('C')[0]), nodeID.split('C')[0]))
 -							if eachList:
 -								fNode = eachList[0]
 -								#get the lastNode in the previous possMCC list
 -								eachList = list(x[0] for x in MCCList[(count-1)]["possMCCList"])
 -								eachList.sort(key=lambda nodeID:(len(nodeID.split('C')[0]), nodeID.split('C')[0]))
 -								if eachList:
 -									lNode = eachList[-1]
 -									if lNode in CLOUD_ELEMENT_GRAPH.predecessors(fNode):
 -										for aNode in CLOUD_ELEMENT_GRAPH.predecessors(fNode):
 -											if aNode in eachList and aNode == lNod

<TRUNCATED>

[3/3] climate git commit: CLIMATE-922 Optimize traversing in mccSearch

Posted by le...@apache.org.
CLIMATE-922 Optimize traversing in mccSearch


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/cf4fb57f
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/cf4fb57f
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/cf4fb57f

Branch: refs/heads/master
Commit: cf4fb57fda5f129494cbd1bd424a7018fa297a83
Parents: 9a30e19 c800e48
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Mon Sep 11 08:31:56 2017 -0700
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Mon Sep 11 08:31:56 2017 -0700

----------------------------------------------------------------------
 mccsearch/code/mccSearch.py | 318 +++++++++++++++++++++++++++++++++++----
 1 file changed, 292 insertions(+), 26 deletions(-)
----------------------------------------------------------------------