You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by bo...@apache.org on 2013/08/02 01:41:26 UTC
svn commit: r1509471 -
/incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py
Author: boustani
Date: Thu Aug 1 23:41:26 2013
New Revision: 1509471
URL: http://svn.apache.org/r1509471
Log:
first version of rcmed.py under data_source folder
Added:
incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py
Added: incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py?rev=1509471&view=auto
==============================================================================
--- incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py (added)
+++ incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py Thu Aug 1 23:41:26 2013
@@ -0,0 +1,451 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+'''
+Classes:
+ RCMED - A class for retrieving data from Regional Climate Model Evalutaion Database (JPL).
+'''
+
+import urllib, urllib2
+import re
+import json
+import numpy as np
+import numpy.ma as ma
+from datetime import datetime
+import calendar
+from dataset import Dataset
+
+
+URL = 'http://rcmes.jpl.nasa.gov/query-api/query.php?'
+
+
+'''
+def normalizeDatetimes(datetimes, time_step):
+ """
+ Input::
+ datetimes - list of datetime objects that need to be normalized
+ time_step - string of value ('daily' | 'monthly')
+ Output::
+ normalDatetimes - list of datetime objects that have been normalized
+
+ Normalization Rules::
+ Daily data will be forced to an hour value of 00:00:00
+ Monthly data will be forced to the first of the month at midnight
+ """
+ normalDatetimes = []
+ if time_step.lower() == 'monthly':
+ for inputDatetime in datetimes:
+ if inputDatetime.day != 1:
+ # Clean the inputDatetime
+ inputDatetimeString = inputDatetime.strftime('%Y%m%d')
+ normalInputDatetimeString = inputDatetimeString[:6] + '01'
+ inputDatetime = datetime.datetime.strptime(normalInputDatetimeString, '%Y%m%d')
+
+ normalDatetimes.append(inputDatetime)
+
+ elif time_step.lower() == 'daily':
+ for inputDatetime in datetimes:
+ if inputDatetime.hour != 0 or inputDatetime.minute != 0 or inputDatetime.second != 0:
+ datetimeString = inputDatetime.strftime('%Y%m%d%H%M%S')
+ normalDatetimeString = datetimeString[:8] + '000000'
+ inputDatetime = datetime.datetime.strptime(normalDatetimeString, '%Y%m%d%H%M%S')
+
+ normalDatetimes.append(inputDatetime)
+
+
+ return normalDatetimes
+'''
+
+'''
+def _expand_date(start_time, end_time, time_step):
+ if time_step.lower() == 'monthly':
+ if start_time.day != 1:
+ # Clean the startTime
+ startTimeString = start_time.strftime('%Y%m%d')
+ normalInputDatetimeString = startTimeString[:6] + '01'
+ start_time = datetime.strptime(normalInputDatetimeString, '%Y%m%d')
+ ##TODO: Change the 3 lines above with this line:
+ ##start_time = datetime(start_time.year, start_time.month, 1)
+
+
+ lastDayOfMonth = calendar.monthrange(end_time.year, end_time.month)[1]
+ if end_time.day != lastDayOfMonth:
+ # Clean the endTime
+ endTimeString = end_time.strftime('%Y%m%d')
+ endTimeString = endTimeString[:6] + str(lastDayOfMonth)
+ end_time = datetime.strptime(endTimeString, '%Y%m%d')
+ ##TODO: Change the 3 lines above with this line:
+ ##end_time = datetime(end_time.year, end_time.month, lastDayOfMonth)
+
+ elif time_step.lower() == 'daily':
+ if start_time.hour != 0 or start_time.minute != 0 or start_time.second != 0:
+ datetimeString = start_time.strftime('%Y%m%d%H%M%S')
+ normalDatetimeString = datetimeString[:8] + '000000'
+ start_time = datetime.strptime(normalDatetimeString, '%Y%m%d%H%M%S')
+ ##TODO: Change the 3 lines above with this line:
+ ##start_time = datetime(start_time.year, start_time.month, start_time.day, 00, 00, 00)
+
+ endTimeString = end_time.strftime('%Y%m%d%H%M%S')
+ endTimeString = endTimeString[:8] + '235959'
+ end_time = datetime.strptime(endTimeString, '%Y%m%d%H%M%S')
+ ##TODO: Change the 3 lines above with this line:
+ ##end_time = datetime(end_time.year, end_time.month, end_time.day, 23, 59, 59)
+
+ return start_time, end_time
+'''
+
+'''
+def _reshape_arrays(lats, lons, levels, values, unique_lats_count, unique_lons_count, unique_levels_count, unique_times_count):
+
+ # Reshape arrays
+ lats = lats.reshape(unique_times_count, unique_lats_count, unique_lons_count, unique_levels_count)
+ lons = lons.reshape(unique_times_count, unique_lats_count, unique_lons_count, unique_levels_count)
+ levels = np.array(levels).reshape(unique_times_count, unique_lats_count, unique_lons_count, unique_levels_count)
+ values = values.reshape(unique_times_count, unique_lats_count, unique_lons_count, unique_levels_count)
+
+ # Flatten dimension if only single level
+ if unique_levels_count == 1:
+ values = values[:, :, :, 0]
+ lats = lats[0, :, :, 0]
+ lons = lons[0, :, :, 0]
+
+ return lats, lons, levels, values
+'''
+
+'''
+def _reorder_data(lats, lons, times, values):
+
+ # Re-order values in values array such that when reshaped everywhere is where it should be
+ # (as DB doesn't necessarily return everything in order)
+ order = np.lexsort((lons, lats, times))
+ counter = 0
+ sorted_values = np.zeros_like(values)
+ sorted_lats = np.zeros_like(lats)
+ sorted_lons = np.zeros_like(lons)
+ for i in order:
+ sorted_values[counter] = values[i]
+ sorted_lats[counter] = lats[i]
+ sorted_lons[counter] = lons[i]
+ counter += 1
+
+ return sorted_lats, sorted_lons, sorted_values
+'''
+
+'''
+def _calculate_len(unique_lat, unique_lon, unique_level, unique_time):
+
+ unique_lats_count = len(unique_lat)
+ unique_lons_count = len(unique_lon)
+ unique_levels_count = len(unique_level)
+ unique_times_count = len(unique_time)
+
+ return unique_lats_count, unique_lons_count, unique_levels_count, unique_times_count
+'''
+
+
+def parameters_metadata():
+ '''
+ '''
+ pass
+
+
+def _make_mask_array(values):
+ '''
+ '''
+
+ # Created masked array to deal with missing values
+ # -these make functions like values.mean(), values.max() etc ignore missing values
+ mdi = -9999 # TODO: extract this value from the DB retrieval metadata
+ values = ma.masked_array(values, mask=(values == mdi))
+
+ return values
+
+
+def _reshape_values(values, unique_values):
+ '''Reshape values into 4D array.
+
+ :param values: Raw values data
+ :type values: numpy array
+ :param unique_values: Tuple of unique latitudes, longitudes, levels and times data.
+ :type unique_values: Tuple
+
+ :returns: Reshaped values data
+ :rtype: Numpy array
+ '''
+
+ lats_len = len(unique_values[0])
+ lons_len = len(unique_values[1])
+ levels_len = len(unique_values[2])
+ times_len = len(unique_values[3])
+
+ values = values.reshape(levels_len, times_len, lats_len, lons_len)
+
+ return values
+
+
+def _calculate_time(unique_times, time_step):
+ '''Convert each time to the datetime object.
+
+ :param unique_times: Unique time data
+ :type unique_times: String
+ :param time_step: Time step
+ :type time_step: String
+
+ :returns: Unique datetime objects of time data
+ :rtype: List
+ '''
+
+ time_format = "%Y-%m-%d %H:%M:%S"
+ unique_times = [datetime.strptime(time, time_format) for time in unique_times]
+ #There is no need to sort time.
+ #This function may required still in RCMES
+ #unique_times.sort()
+ #This function should be moved to the data_process.
+ #unique_times = normalizeDatetimes(unique_times, time_step)
+
+ return unique_times
+
+
+def _make_unique(lats, lons, levels, times):
+ '''Find the unique values of input data.
+
+ :param lats: lats
+ :type lats: Numpy array
+ :param lons: lons
+ :type lons: Numpy array
+ :param levels: levels
+ :type levels: Numpy array
+ :param times: times
+ :type times: Numpy array
+
+ :returns: Unique numpy arrays of latitudes, longitudes, levels and times
+ :rtype: Tuple
+ '''
+
+ unique_lats = np.unique(lats)
+ unique_lons = np.unique(lons)
+ unique_levels = np.unique(levels)
+ unique_times = np.unique(times)
+
+ return (unique_lats, unique_lons, unique_levels, unique_times)
+
+
+def _get_data(url):
+ '''Reterive data from database.
+
+ :param url: url to query from database
+ :type url: String
+
+ :returns: Latitudes, longitudes, levels, times and values data
+ :rtype: (list, list, list, list, list)
+ '''
+
+ string = urllib2.urlopen(url)
+ data_string = string.read()
+ index_of_data = re.search('data: \r\n', data_string)
+ data = data_string[index_of_data.end():len(data_string)]
+ data = data.split('\r\n')
+
+ lats = []
+ lons = []
+ levels = []
+ values = []
+ times = []
+
+ for i in range(len(data) - 1): # Because the last row is empty, "len(data)-1" is used.
+ row = data[i].split(',')
+ lats.append(np.float32(row[0]))
+ lons.append(np.float32(row[1]))
+ levels.append(np.float32(row[2]))
+ times.append(row[3])
+ values.append(np.float32(row[4]))
+
+ return lats, lons, levels, times, values
+
+
+def _beginning_of_date(time, time_step):
+ '''Calculate the beginning of given time, based on time step.
+
+ :param time: Given time
+ :type time: Datetime
+ :param time_step: Time step (monthly or daily)
+ :type time_step: String
+
+ :returns: Beginning of given time
+ :rtype: Datetime
+ '''
+
+ if time_step.lower() == 'monthly':
+ if time.day != 1:
+ start_time_string = time.strftime('%Y%m%d')
+ start_time_string = start_time_string[:6] + '01'
+ time = datetime.strptime(start_time_string, '%Y%m%d')
+ ##TODO: Change the 3 lines above with this line:
+ ##time = datetime(time.year, time.month, 1)
+ elif time_step.lower() == 'daily':
+ if time.hour != 0 or time.minute != 0 or time.second != 0:
+ start_time_string = time.strftime('%Y%m%d%H%M%S')
+ start_time_string = start_time_string[:8] + '000000'
+ time = datetime.strptime(start_time_string, '%Y%m%d%H%M%S')
+ ##TODO: Change the 3 lines above with this line:
+ ##time = datetime(time.year, time.month, time.day, 00, 00, 00)
+
+ return time
+
+
+def _end_of_date(time, time_step):
+ '''Calculate the end of given time, based on time step.
+
+ :param time: Given time
+ :type time: Datetime
+ :param time_step: Time step (monthly or daily)
+ :type time_step: String
+
+ :returns: End of given time
+ :rtype: Datetime
+ '''
+
+ last_day_of_month = calendar.monthrange(time.year, time.month)[1]
+ if time.day != last_day_of_month:
+ end_time_string = time.strftime('%Y%m%d')
+ end_time_string = end_time_string[:6] + str(last_day_of_month)
+ time = datetime.strptime(end_time_string, '%Y%m%d')
+ ##TODO: Change the 3 lines above with this line:
+ ##time = datetime(time.year, time.month, lastDayOfMonth)
+ elif time_step.lower() == 'daily':
+ end_time_string = time.strftime('%Y%m%d%H%M%S')
+ end_time_string = end_time_string[:8] + '235959'
+ time = datetime.strptime(end_time_string, '%Y%m%d%H%M%S')
+ ##TODO: Change the 3 lines above with this line:
+ ##time = datetime(time.year, time.month, end_time.day, 23, 59, 59)
+
+ return time
+
+
+
+def _generate_query_url(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time, time_step):
+ '''Generate the url to query from database
+
+ :param dataset_id: Dataset id.
+ :type dataset_id: Integer
+ :param parameter_id: Parameter id
+ :type parameter_id: Integer
+ :param min_lat: Minimum latitude
+ :type min_lat: Float
+ :param max_lat: Maximum latitude
+ :type max_lat: Float
+ :param min_lon: Minimum longitude
+ :type min_lon: Float
+ :param max_lon: Maximum longitude
+ :type max_lon: Float
+ :param start_time: Start time
+ :type start_time: Datetime
+ :param end_time: End time
+ :type end_time: Datetime
+ :param time_step: Time step
+ :type time_step: String
+
+ :returns: url to query from database
+ :rtype: String
+ '''
+
+ start_time = _beginning_of_date(start_time, time_step)
+ end_time = _end_of_date(end_time, time_step)
+ start_time = start_time.strftime("%Y%m%dT%H%MZ")
+ end_time = end_time.strftime("%Y%m%dT%H%MZ")
+
+ query = [('datasetId',dataset_id), ('parameterId',parameter_id), ('latMin',min_lat), ('latMax',max_lat),
+ ('lonMin', min_lon), ('lonMax',max_lon), ('timeStart', start_time), ('timeEnd', end_time)]
+
+ query_url = urllib.urlencode(query)
+ url_request = URL + query_url
+
+ return url_request
+
+
+
+def _get_parameter_info(dataset_id, parameter_id):
+ '''General information for given parameter id.
+
+ :param dataset_id: Dataset id.
+ :type dataset_id: Integer
+ :param parameter_id: Parameter id
+ :type parameter_id: Integer
+
+ :returns: Database name, time step, realm, instrument, start_date, end_date and unit for given parameter
+ :rtype: (string, string, string, string, string, string, string)
+ '''
+
+ query = [('datasetId',dataset_id), ('parameterId',parameter_id)]
+ query_url = urllib.urlencode(query)
+ url = URL + query_url + "&info=yes"
+ string = urllib2.urlopen(url)
+ data_string = string.read()
+ data_string = json.loads(data_string)
+ database = data_string["database"]
+ time_step = data_string["timestep"]
+ realm = data_string["realm"]
+ instrument = data_string["instrument"]
+ start_date = data_string["start_date"]
+ end_date = data_string["end_date"]
+ unit = data_string["units"]
+
+ return (database, time_step, realm, instrument, start_date, end_date, unit)
+
+
+
+def parameter_dataset(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time):
+ '''Get data from one database(parameter).
+
+ :param dataset_id: Dataset id.
+ :type dataset_id: Integer
+ :param parameter_id: Parameter id
+ :type parameter_id: Integer
+ :param min_lat: Minimum latitude
+ :type min_lat: Float
+ :param max_lat: Maximum latitude
+ :type max_lat: Float
+ :param min_lon: Minimum longitude
+ :type min_lon: Float
+ :param max_lon: Maximum longitude
+ :type max_lon: Float
+ :param start_time: Start time
+ :type start_time: Datetime
+ :param end_time: End time
+ :type end_time: Datetime
+
+ :returns: Dataset object
+ :rtype: Object
+ '''
+
+ parameter_info = _get_parameter_info(dataset_id, parameter_id)
+ time_step = parameter_info[1]
+ parameter_name = parameter_info[0]
+ url = _generate_query_url(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time, time_step)
+ lats, lons, levels, times, values = _get_data(url)
+
+ lats = np.array(lats)
+ lons = np.array(lons)
+ times = np.array(times)
+ values = np.array(values)
+
+ unique_lats_lons_levels_times = _make_unique(lats, lons, levels, times)
+ unique_times = _calculate_time(unique_lats_lons_levels_times[3], time_step)
+ values = _reshape_values(values, unique_lats_lons_levels_times)
+ values = _make_mask_array(values)
+
+ return Dataset(unique_lats_lons_levels_times[0], unique_lats_lons_levels_times[1], unique_times, values, parameter_name)
\ No newline at end of file