You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by bo...@apache.org on 2013/08/07 23:45:10 UTC

svn commit: r1511511 - /incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py

Author: boustani
Date: Wed Aug  7 21:45:10 2013
New Revision: 1511511

URL: http://svn.apache.org/r1511511
Log:
now supports to get all parameters metadata and cleaning code

Modified:
    incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py

Modified: incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py?rev=1511511&r1=1511510&r2=1511511&view=diff
==============================================================================
--- incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py (original)
+++ incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py Wed Aug  7 21:45:10 2013
@@ -33,151 +33,47 @@ from dataset import Dataset
 URL = 'http://rcmes.jpl.nasa.gov/query-api/query.php?'
 
 
-'''
-def normalizeDatetimes(datetimes, time_step):
-    """
-    Input::
-        datetimes - list of datetime objects that need to be normalized
-        time_step - string of value ('daily' | 'monthly')
-    Output::
-        normalDatetimes - list of datetime objects that have been normalized
-    
-    Normalization Rules::
-        Daily data will be forced to an hour value of 00:00:00
-        Monthly data will be forced to the first of the month at midnight 
-    """
-    normalDatetimes = []
-    if time_step.lower() == 'monthly':
-        for inputDatetime in datetimes:
-            if inputDatetime.day != 1:
-                # Clean the inputDatetime
-                inputDatetimeString = inputDatetime.strftime('%Y%m%d')
-                normalInputDatetimeString = inputDatetimeString[:6] + '01'
-                inputDatetime = datetime.datetime.strptime(normalInputDatetimeString, '%Y%m%d')
-
-            normalDatetimes.append(inputDatetime)
-
-    elif time_step.lower() == 'daily':
-        for inputDatetime in datetimes:
-            if inputDatetime.hour != 0 or inputDatetime.minute != 0 or inputDatetime.second != 0:
-                datetimeString = inputDatetime.strftime('%Y%m%d%H%M%S')
-                normalDatetimeString = datetimeString[:8] + '000000'
-                inputDatetime = datetime.datetime.strptime(normalDatetimeString, '%Y%m%d%H%M%S')
-            
-            normalDatetimes.append(inputDatetime)
-
-
-    return normalDatetimes
-'''
-
-'''
-def _expand_date(start_time, end_time, time_step):
-    if time_step.lower() == 'monthly':
-        if start_time.day != 1:
-            # Clean the startTime
-            startTimeString = start_time.strftime('%Y%m%d')
-            normalInputDatetimeString = startTimeString[:6] + '01'
-            start_time = datetime.strptime(normalInputDatetimeString, '%Y%m%d')
-            ##TODO: Change the 3 lines above with this line:
-            ##start_time = datetime(start_time.year, start_time.month, 1)
-
-        
-        lastDayOfMonth = calendar.monthrange(end_time.year, end_time.month)[1]
-        if end_time.day != lastDayOfMonth:
-            # Clean the endTime
-            endTimeString = end_time.strftime('%Y%m%d')
-            endTimeString = endTimeString[:6] + str(lastDayOfMonth)
-            end_time = datetime.strptime(endTimeString, '%Y%m%d')
-            ##TODO: Change the 3 lines above with this line:
-            ##end_time = datetime(end_time.year, end_time.month, lastDayOfMonth)
-
-    elif time_step.lower() == 'daily':
-        if start_time.hour != 0 or start_time.minute != 0 or start_time.second != 0:
-            datetimeString = start_time.strftime('%Y%m%d%H%M%S')
-            normalDatetimeString = datetimeString[:8] + '000000'
-            start_time = datetime.strptime(normalDatetimeString, '%Y%m%d%H%M%S')
-            ##TODO: Change the 3 lines above with this line:
-            ##start_time = datetime(start_time.year, start_time.month, start_time.day, 00, 00, 00)
-        
-        endTimeString = end_time.strftime('%Y%m%d%H%M%S')
-        endTimeString = endTimeString[:8] + '235959'
-        end_time = datetime.strptime(endTimeString, '%Y%m%d%H%M%S')
-        ##TODO: Change the 3 lines above with this line:
-        ##end_time = datetime(end_time.year, end_time.month, end_time.day, 23, 59, 59)
-
-    return start_time, end_time
-'''
-
-'''
-def _reshape_arrays(lats, lons, levels, values, unique_lats_count, unique_lons_count, unique_levels_count, unique_times_count):
-
-    # Reshape arrays
-    lats = lats.reshape(unique_times_count, unique_lats_count, unique_lons_count, unique_levels_count)
-    lons = lons.reshape(unique_times_count, unique_lats_count, unique_lons_count, unique_levels_count)
-    levels = np.array(levels).reshape(unique_times_count, unique_lats_count, unique_lons_count, unique_levels_count)
-    values = values.reshape(unique_times_count, unique_lats_count, unique_lons_count, unique_levels_count)
-
-    # Flatten dimension if only single level
-    if unique_levels_count == 1:
-        values = values[:, :, :, 0]
-        lats = lats[0, :, :, 0]
-        lons = lons[0, :, :, 0]
-    
-    return lats, lons, levels, values
-'''
+def parameters_metadata():
+    '''Get the metadata of all parameter from RCMED.
 
-'''
-def _reorder_data(lats, lons, times, values):
+    :returns: Dictionary of information for each parameter stored in one list
+    :rtype: List of dictionaries
+    '''
 
-    # Re-order values in values array such that when reshaped everywhere is where it should be
-    #  (as DB doesn't necessarily return everything in order)
-    order = np.lexsort((lons, lats, times))
-    counter = 0
-    sorted_values = np.zeros_like(values)
-    sorted_lats = np.zeros_like(lats)
-    sorted_lons = np.zeros_like(lons)
-    for i in order:
-        sorted_values[counter] = values[i]
-        sorted_lats[counter] = lats[i]
-        sorted_lons[counter] = lons[i]
-        counter += 1
-    
-    return sorted_lats, sorted_lons, sorted_values
-'''
+    param_info_list = []
+    url = URL + "&param_info=yes"
+    string = urllib2.urlopen(url)
+    data_string = string.read()
+    json_format_data = json.loads(data_string)
+    fields_name = json_format_data['fields_name']
+    data = json_format_data['data']
+    for row in data:
+        dic = {}
+        for name in fields_name:
+            dic[name] = row[fields_name.index(name)]
+        param_info_list.append(dic)
 
-'''
-def _calculate_len(unique_lat, unique_lon, unique_level, unique_time):
+    return param_info_list
 
-    unique_lats_count = len(unique_lat)
-    unique_lons_count = len(unique_lon)
-    unique_levels_count = len(unique_level)
-    unique_times_count = len(unique_time)
-    
-    return unique_lats_count, unique_lons_count, unique_levels_count, unique_times_count
-'''
 
+def _make_mask_array(values):  
+    '''Created masked array to deal with missing values
 
-def parameters_metadata():
-    '''
+    :param values: Numpy array of values which may contain missing values
+    :type values: Numpy array
+    :returns: Masked array of values
+    :rtype: Masked array
     '''
-    pass
 
+    missing_values = -9999
+    values = ma.masked_array(values, mask=(values == missing_values))
 
-def _make_mask_array(values):  
-    '''
-    '''
-
-    # Created masked array to deal with missing values
-    #  -these make functions like values.mean(), values.max() etc ignore missing values
-    mdi = -9999  # TODO: extract this value from the DB retrieval metadata
-    values = ma.masked_array(values, mask=(values == mdi))
-    
     return values
 
 
 def _reshape_values(values, unique_values):
     '''Reshape values into 4D array.
-    
+
     :param values: Raw values data
     :type values: numpy array
     :param unique_values: Tuple of unique latitudes, longitudes, levels and times data.
@@ -191,38 +87,37 @@ def _reshape_values(values, unique_value
     lons_len = len(unique_values[1])
     levels_len = len(unique_values[2])
     times_len = len(unique_values[3])
-    
+
     values = values.reshape(levels_len, times_len, lats_len, lons_len)
-    
+
     return values
 
 
 def _calculate_time(unique_times, time_step):
     '''Convert each time to the datetime object.
-    
+
     :param unique_times: Unique time data
     :type unique_times: String
     :param time_step: Time step
     :type time_step: String
-    
+
     :returns: Unique datetime objects of time data
     :rtype: List
     '''
-    
+
     time_format = "%Y-%m-%d %H:%M:%S"
     unique_times = [datetime.strptime(time, time_format) for time in unique_times]
     #There is no need to sort time.
     #This function may required still in RCMES
     #unique_times.sort()
     #This function should be moved to the data_process.
-    #unique_times = normalizeDatetimes(unique_times, time_step)
-    
+
     return unique_times
 
 
 def _make_unique(lats, lons, levels, times):
     '''Find the unique values of input data.
-    
+
     :param lats: lats
     :type lats: Numpy array
     :param lons: lons
@@ -231,7 +126,7 @@ def _make_unique(lats, lons, levels, tim
     :type levels: Numpy array
     :param times: times
     :type times: Numpy array
-    
+
     :returns: Unique numpy arrays of latitudes, longitudes, levels and times
     :rtype: Tuple
     '''
@@ -240,16 +135,16 @@ def _make_unique(lats, lons, levels, tim
     unique_lons = np.unique(lons)
     unique_levels = np.unique(levels)
     unique_times = np.unique(times)
-    
+
     return (unique_lats, unique_lons, unique_levels, unique_times)
 
 
 def _get_data(url):
     '''Reterive data from database.
-    
+
     :param url: url to query from database
     :type url: String
-    
+
     :returns: Latitudes, longitudes, levels, times and values data
     :rtype: (list, list, list, list, list)
     '''
@@ -258,7 +153,7 @@ def _get_data(url):
     data_string = string.read()    
     index_of_data = re.search('data: \r\n', data_string)
     data = data_string[index_of_data.end():len(data_string)]
-    data = data.split('\r\n')    
+    data = data.split('\r\n') 
 
     lats = []
     lons = []
@@ -273,18 +168,18 @@ def _get_data(url):
         levels.append(np.float32(row[2]))
         times.append(row[3])
         values.append(np.float32(row[4]))
-        
+
     return lats, lons, levels, times, values
 
 
 def _beginning_of_date(time, time_step):
     '''Calculate the beginning of given time, based on time step.
-    
+
     :param time: Given time
     :type time: Datetime
     :param time_step: Time step (monthly or daily)
     :type time_step: String
-    
+
     :returns: Beginning of given time
     :rtype: Datetime
     '''
@@ -303,18 +198,18 @@ def _beginning_of_date(time, time_step):
             time = datetime.strptime(start_time_string, '%Y%m%d%H%M%S')
             ##TODO: Change the 3 lines above with this line:
             ##time = datetime(time.year, time.month, time.day, 00, 00, 00)
-    
+
     return time
 
 
 def _end_of_date(time, time_step):
     '''Calculate the end of given time, based on time step.
-    
+
     :param time: Given time
     :type time: Datetime
     :param time_step: Time step (monthly or daily)
     :type time_step: String
-    
+
     :returns: End of given time
     :rtype: Datetime
     '''
@@ -336,10 +231,9 @@ def _end_of_date(time, time_step):
     return time
 
 
-
 def _generate_query_url(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time, time_step):
     '''Generate the url to query from database
-    
+
     :param dataset_id: Dataset id.
     :type dataset_id: Integer
     :param parameter_id: Parameter id
@@ -377,7 +271,6 @@ def _generate_query_url(dataset_id, para
     return url_request
 
 
-
 def _get_parameter_info(dataset_id, parameter_id):
     '''General information for given parameter id.
 
@@ -385,7 +278,7 @@ def _get_parameter_info(dataset_id, para
     :type dataset_id: Integer
     :param parameter_id: Parameter id
     :type parameter_id: Integer
-    
+
     :returns: Database name, time step, realm, instrument, start_date, end_date and unit for given parameter
     :rtype: (string, string, string, string, string, string, string)
     '''
@@ -403,14 +296,13 @@ def _get_parameter_info(dataset_id, para
     start_date = data_string["start_date"]
     end_date = data_string["end_date"]
     unit = data_string["units"]
-    
-    return (database, time_step, realm, instrument, start_date, end_date, unit)
 
+    return (database, time_step, realm, instrument, start_date, end_date, unit)
 
 
 def parameter_dataset(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time):
     '''Get data from one database(parameter).
-    
+
     :param dataset_id: Dataset id.
     :type dataset_id: Integer
     :param parameter_id: Parameter id
@@ -427,7 +319,7 @@ def parameter_dataset(dataset_id, parame
     :type start_time: Datetime
     :param end_time: End time 
     :type end_time: Datetime
-    
+
     :returns: Dataset object
     :rtype: Object
     '''
@@ -437,7 +329,7 @@ def parameter_dataset(dataset_id, parame
     parameter_name = parameter_info[0]
     url = _generate_query_url(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time, time_step)
     lats, lons, levels, times, values = _get_data(url)
-    
+
     lats = np.array(lats)
     lons = np.array(lons)
     times = np.array(times)