You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by bo...@apache.org on 2013/08/07 23:45:10 UTC
svn commit: r1511511 -
/incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py
Author: boustani
Date: Wed Aug 7 21:45:10 2013
New Revision: 1511511
URL: http://svn.apache.org/r1511511
Log:
now supports to get all parameters metadata and cleaning code
Modified:
incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py
Modified: incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py?rev=1511511&r1=1511510&r2=1511511&view=diff
==============================================================================
--- incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py (original)
+++ incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py Wed Aug 7 21:45:10 2013
@@ -33,151 +33,47 @@ from dataset import Dataset
URL = 'http://rcmes.jpl.nasa.gov/query-api/query.php?'
-'''
-def normalizeDatetimes(datetimes, time_step):
- """
- Input::
- datetimes - list of datetime objects that need to be normalized
- time_step - string of value ('daily' | 'monthly')
- Output::
- normalDatetimes - list of datetime objects that have been normalized
-
- Normalization Rules::
- Daily data will be forced to an hour value of 00:00:00
- Monthly data will be forced to the first of the month at midnight
- """
- normalDatetimes = []
- if time_step.lower() == 'monthly':
- for inputDatetime in datetimes:
- if inputDatetime.day != 1:
- # Clean the inputDatetime
- inputDatetimeString = inputDatetime.strftime('%Y%m%d')
- normalInputDatetimeString = inputDatetimeString[:6] + '01'
- inputDatetime = datetime.datetime.strptime(normalInputDatetimeString, '%Y%m%d')
-
- normalDatetimes.append(inputDatetime)
-
- elif time_step.lower() == 'daily':
- for inputDatetime in datetimes:
- if inputDatetime.hour != 0 or inputDatetime.minute != 0 or inputDatetime.second != 0:
- datetimeString = inputDatetime.strftime('%Y%m%d%H%M%S')
- normalDatetimeString = datetimeString[:8] + '000000'
- inputDatetime = datetime.datetime.strptime(normalDatetimeString, '%Y%m%d%H%M%S')
-
- normalDatetimes.append(inputDatetime)
-
-
- return normalDatetimes
-'''
-
-'''
-def _expand_date(start_time, end_time, time_step):
- if time_step.lower() == 'monthly':
- if start_time.day != 1:
- # Clean the startTime
- startTimeString = start_time.strftime('%Y%m%d')
- normalInputDatetimeString = startTimeString[:6] + '01'
- start_time = datetime.strptime(normalInputDatetimeString, '%Y%m%d')
- ##TODO: Change the 3 lines above with this line:
- ##start_time = datetime(start_time.year, start_time.month, 1)
-
-
- lastDayOfMonth = calendar.monthrange(end_time.year, end_time.month)[1]
- if end_time.day != lastDayOfMonth:
- # Clean the endTime
- endTimeString = end_time.strftime('%Y%m%d')
- endTimeString = endTimeString[:6] + str(lastDayOfMonth)
- end_time = datetime.strptime(endTimeString, '%Y%m%d')
- ##TODO: Change the 3 lines above with this line:
- ##end_time = datetime(end_time.year, end_time.month, lastDayOfMonth)
-
- elif time_step.lower() == 'daily':
- if start_time.hour != 0 or start_time.minute != 0 or start_time.second != 0:
- datetimeString = start_time.strftime('%Y%m%d%H%M%S')
- normalDatetimeString = datetimeString[:8] + '000000'
- start_time = datetime.strptime(normalDatetimeString, '%Y%m%d%H%M%S')
- ##TODO: Change the 3 lines above with this line:
- ##start_time = datetime(start_time.year, start_time.month, start_time.day, 00, 00, 00)
-
- endTimeString = end_time.strftime('%Y%m%d%H%M%S')
- endTimeString = endTimeString[:8] + '235959'
- end_time = datetime.strptime(endTimeString, '%Y%m%d%H%M%S')
- ##TODO: Change the 3 lines above with this line:
- ##end_time = datetime(end_time.year, end_time.month, end_time.day, 23, 59, 59)
-
- return start_time, end_time
-'''
-
-'''
-def _reshape_arrays(lats, lons, levels, values, unique_lats_count, unique_lons_count, unique_levels_count, unique_times_count):
-
- # Reshape arrays
- lats = lats.reshape(unique_times_count, unique_lats_count, unique_lons_count, unique_levels_count)
- lons = lons.reshape(unique_times_count, unique_lats_count, unique_lons_count, unique_levels_count)
- levels = np.array(levels).reshape(unique_times_count, unique_lats_count, unique_lons_count, unique_levels_count)
- values = values.reshape(unique_times_count, unique_lats_count, unique_lons_count, unique_levels_count)
-
- # Flatten dimension if only single level
- if unique_levels_count == 1:
- values = values[:, :, :, 0]
- lats = lats[0, :, :, 0]
- lons = lons[0, :, :, 0]
-
- return lats, lons, levels, values
-'''
+def parameters_metadata():
+ '''Get the metadata of all parameter from RCMED.
-'''
-def _reorder_data(lats, lons, times, values):
+ :returns: Dictionary of information for each parameter stored in one list
+ :rtype: List of dictionaries
+ '''
- # Re-order values in values array such that when reshaped everywhere is where it should be
- # (as DB doesn't necessarily return everything in order)
- order = np.lexsort((lons, lats, times))
- counter = 0
- sorted_values = np.zeros_like(values)
- sorted_lats = np.zeros_like(lats)
- sorted_lons = np.zeros_like(lons)
- for i in order:
- sorted_values[counter] = values[i]
- sorted_lats[counter] = lats[i]
- sorted_lons[counter] = lons[i]
- counter += 1
-
- return sorted_lats, sorted_lons, sorted_values
-'''
+ param_info_list = []
+ url = URL + "¶m_info=yes"
+ string = urllib2.urlopen(url)
+ data_string = string.read()
+ json_format_data = json.loads(data_string)
+ fields_name = json_format_data['fields_name']
+ data = json_format_data['data']
+ for row in data:
+ dic = {}
+ for name in fields_name:
+ dic[name] = row[fields_name.index(name)]
+ param_info_list.append(dic)
-'''
-def _calculate_len(unique_lat, unique_lon, unique_level, unique_time):
+ return param_info_list
- unique_lats_count = len(unique_lat)
- unique_lons_count = len(unique_lon)
- unique_levels_count = len(unique_level)
- unique_times_count = len(unique_time)
-
- return unique_lats_count, unique_lons_count, unique_levels_count, unique_times_count
-'''
+def _make_mask_array(values):
+ '''Created masked array to deal with missing values
-def parameters_metadata():
- '''
+ :param values: Numpy array of values which may contain missing values
+ :type values: Numpy array
+ :returns: Masked array of values
+ :rtype: Masked array
'''
- pass
+ missing_values = -9999
+ values = ma.masked_array(values, mask=(values == missing_values))
-def _make_mask_array(values):
- '''
- '''
-
- # Created masked array to deal with missing values
- # -these make functions like values.mean(), values.max() etc ignore missing values
- mdi = -9999 # TODO: extract this value from the DB retrieval metadata
- values = ma.masked_array(values, mask=(values == mdi))
-
return values
def _reshape_values(values, unique_values):
'''Reshape values into 4D array.
-
+
:param values: Raw values data
:type values: numpy array
:param unique_values: Tuple of unique latitudes, longitudes, levels and times data.
@@ -191,38 +87,37 @@ def _reshape_values(values, unique_value
lons_len = len(unique_values[1])
levels_len = len(unique_values[2])
times_len = len(unique_values[3])
-
+
values = values.reshape(levels_len, times_len, lats_len, lons_len)
-
+
return values
def _calculate_time(unique_times, time_step):
'''Convert each time to the datetime object.
-
+
:param unique_times: Unique time data
:type unique_times: String
:param time_step: Time step
:type time_step: String
-
+
:returns: Unique datetime objects of time data
:rtype: List
'''
-
+
time_format = "%Y-%m-%d %H:%M:%S"
unique_times = [datetime.strptime(time, time_format) for time in unique_times]
#There is no need to sort time.
#This function may required still in RCMES
#unique_times.sort()
#This function should be moved to the data_process.
- #unique_times = normalizeDatetimes(unique_times, time_step)
-
+
return unique_times
def _make_unique(lats, lons, levels, times):
'''Find the unique values of input data.
-
+
:param lats: lats
:type lats: Numpy array
:param lons: lons
@@ -231,7 +126,7 @@ def _make_unique(lats, lons, levels, tim
:type levels: Numpy array
:param times: times
:type times: Numpy array
-
+
:returns: Unique numpy arrays of latitudes, longitudes, levels and times
:rtype: Tuple
'''
@@ -240,16 +135,16 @@ def _make_unique(lats, lons, levels, tim
unique_lons = np.unique(lons)
unique_levels = np.unique(levels)
unique_times = np.unique(times)
-
+
return (unique_lats, unique_lons, unique_levels, unique_times)
def _get_data(url):
'''Reterive data from database.
-
+
:param url: url to query from database
:type url: String
-
+
:returns: Latitudes, longitudes, levels, times and values data
:rtype: (list, list, list, list, list)
'''
@@ -258,7 +153,7 @@ def _get_data(url):
data_string = string.read()
index_of_data = re.search('data: \r\n', data_string)
data = data_string[index_of_data.end():len(data_string)]
- data = data.split('\r\n')
+ data = data.split('\r\n')
lats = []
lons = []
@@ -273,18 +168,18 @@ def _get_data(url):
levels.append(np.float32(row[2]))
times.append(row[3])
values.append(np.float32(row[4]))
-
+
return lats, lons, levels, times, values
def _beginning_of_date(time, time_step):
'''Calculate the beginning of given time, based on time step.
-
+
:param time: Given time
:type time: Datetime
:param time_step: Time step (monthly or daily)
:type time_step: String
-
+
:returns: Beginning of given time
:rtype: Datetime
'''
@@ -303,18 +198,18 @@ def _beginning_of_date(time, time_step):
time = datetime.strptime(start_time_string, '%Y%m%d%H%M%S')
##TODO: Change the 3 lines above with this line:
##time = datetime(time.year, time.month, time.day, 00, 00, 00)
-
+
return time
def _end_of_date(time, time_step):
'''Calculate the end of given time, based on time step.
-
+
:param time: Given time
:type time: Datetime
:param time_step: Time step (monthly or daily)
:type time_step: String
-
+
:returns: End of given time
:rtype: Datetime
'''
@@ -336,10 +231,9 @@ def _end_of_date(time, time_step):
return time
-
def _generate_query_url(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time, time_step):
'''Generate the url to query from database
-
+
:param dataset_id: Dataset id.
:type dataset_id: Integer
:param parameter_id: Parameter id
@@ -377,7 +271,6 @@ def _generate_query_url(dataset_id, para
return url_request
-
def _get_parameter_info(dataset_id, parameter_id):
'''General information for given parameter id.
@@ -385,7 +278,7 @@ def _get_parameter_info(dataset_id, para
:type dataset_id: Integer
:param parameter_id: Parameter id
:type parameter_id: Integer
-
+
:returns: Database name, time step, realm, instrument, start_date, end_date and unit for given parameter
:rtype: (string, string, string, string, string, string, string)
'''
@@ -403,14 +296,13 @@ def _get_parameter_info(dataset_id, para
start_date = data_string["start_date"]
end_date = data_string["end_date"]
unit = data_string["units"]
-
- return (database, time_step, realm, instrument, start_date, end_date, unit)
+ return (database, time_step, realm, instrument, start_date, end_date, unit)
def parameter_dataset(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time):
'''Get data from one database(parameter).
-
+
:param dataset_id: Dataset id.
:type dataset_id: Integer
:param parameter_id: Parameter id
@@ -427,7 +319,7 @@ def parameter_dataset(dataset_id, parame
:type start_time: Datetime
:param end_time: End time
:type end_time: Datetime
-
+
:returns: Dataset object
:rtype: Object
'''
@@ -437,7 +329,7 @@ def parameter_dataset(dataset_id, parame
parameter_name = parameter_info[0]
url = _generate_query_url(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time, time_step)
lats, lons, levels, times, values = _get_data(url)
-
+
lats = np.array(lats)
lons = np.array(lons)
times = np.array(times)