You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by fg...@apache.org on 2019/01/23 17:56:51 UTC
[incubator-sdap-ningesterpy] 28/29: SDAP-155 add processor to
extract timestamp from granule metadata (#10)
This is an automated email from the ASF dual-hosted git repository.
fgreg pushed a commit to branch v1.0.0-rc1
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-ningesterpy.git
commit b26a22c75b288a524745787f1c8db02a377abc2e
Author: echyam <ec...@users.noreply.github.com>
AuthorDate: Tue Oct 23 10:31:28 2018 -0700
SDAP-155 add processor to extract timestamp from granule metadata (#10)
* SDAP-155 add processor to extract timestamp from granule metadata
* add ExtractTimestampProcessor to INSTALLED_PROCESSORS for ProcessorChain
* remove unused vars and check for tile_type
* take timestamp pattern as parameter, use param in error log
* give tile information when call timestamp processor on wrong tiletype
Co-Authored-By: echyam <ec...@gmail.com>
---
.idea/inspectionProfiles/Project_Default.xml | 21 ++++++++
.idea/misc.xml | 2 +-
.idea/ningesterpy.iml | 5 +-
sdap/processors/__init__.py | 5 +-
sdap/processors/extracttimestampprocessor.py | 66 ++++++++++++++++++++++++
tests/datafiles/not_empty_gpm.HDF5 | Bin 0 -> 5302102 bytes
tests/datafiles/not_empty_modis.nc | Bin 0 -> 10224432 bytes
tests/extracttimestamp_test.py | 72 +++++++++++++++++++++++++++
8 files changed, 165 insertions(+), 6 deletions(-)
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
new file mode 100644
index 0000000..bdf9b07
--- /dev/null
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,21 @@
+<component name="InspectionProjectProfileManager">
+ <profile version="1.0">
+ <option name="myName" value="Project Default" />
+ <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
+ <option name="ignoredPackages">
+ <value>
+ <list size="8">
+ <item index="0" class="java.lang.String" itemvalue="flask-accept" />
+ <item index="1" class="java.lang.String" itemvalue="protobuf" />
+ <item index="2" class="java.lang.String" itemvalue="werkzeug" />
+ <item index="3" class="java.lang.String" itemvalue="six" />
+ <item index="4" class="java.lang.String" itemvalue="PyYAML" />
+ <item index="5" class="java.lang.String" itemvalue="nexusproto" />
+ <item index="6" class="java.lang.String" itemvalue="pytz" />
+ <item index="7" class="java.lang.String" itemvalue="flask" />
+ </list>
+ </value>
+ </option>
+ </inspection_tool>
+ </profile>
+</component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 0218159..3c94e77 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,4 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
- <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5.4 (~/anaconda/envs/ningesterpy/bin/python)" project-jdk-type="Python SDK" />
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6.6 (~/anaconda3/envs/sdap-ningesterpy/bin/python)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
diff --git a/.idea/ningesterpy.iml b/.idea/ningesterpy.iml
index 1f079f2..30a41bc 100644
--- a/.idea/ningesterpy.iml
+++ b/.idea/ningesterpy.iml
@@ -4,12 +4,9 @@
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$" isTestSource="false" />
</content>
- <orderEntry type="jdk" jdkName="Python 3.5.4 (~/anaconda/envs/ningesterpy/bin/python)" jdkType="Python SDK" />
+ <orderEntry type="jdk" jdkName="Python 3.6.6 (~/anaconda3/envs/sdap-ningesterpy/bin/python)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
- <component name="PyDocumentationSettings">
- <option name="renderExternalDocumentation" value="true" />
- </component>
<component name="TemplatesService">
<option name="TEMPLATE_FOLDERS">
<list>
diff --git a/sdap/processors/__init__.py b/sdap/processors/__init__.py
index 6d4a679..2cb2f73 100644
--- a/sdap/processors/__init__.py
+++ b/sdap/processors/__init__.py
@@ -62,6 +62,7 @@ from sdap.processors.subtract180longitude import Subtract180Longitude
from sdap.processors.tilereadingprocessor import GridReadingProcessor, SwathReadingProcessor, TimeSeriesReadingProcessor
from sdap.processors.tilesummarizingprocessor import TileSummarizingProcessor
from sdap.processors.winddirspeedtouv import WindDirSpeedToUV
+from sdap.processors.extracttimestampprocessor import ExtractTimestampProcessor
INSTALLED_PROCESSORS = {
"CallNcpdq": CallNcpdq,
@@ -78,5 +79,7 @@ INSTALLED_PROCESSORS = {
"SwathReadingProcessor": SwathReadingProcessor,
"TimeSeriesReadingProcessor": TimeSeriesReadingProcessor,
"TileSummarizingProcessor": TileSummarizingProcessor,
- "WindDirSpeedToUV": WindDirSpeedToUV
+ "WindDirSpeedToUV": WindDirSpeedToUV,
+ "ExtractTimestampProcessor": ExtractTimestampProcessor
}
+
diff --git a/sdap/processors/extracttimestampprocessor.py b/sdap/processors/extracttimestampprocessor.py
new file mode 100644
index 0000000..98b4147
--- /dev/null
+++ b/sdap/processors/extracttimestampprocessor.py
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import nexusproto
+from nexusproto.serialization import from_shaped_array
+
+import datetime
+import time
+import logging
+from netCDF4 import Dataset, num2date
+from pytz import timezone
+
+from sdap.processors import NexusTileProcessor
+
+EPOCH = timezone('UTC').localize(datetime.datetime(1970, 1, 1))
+
+
+class BadTimestampExtractionException(Exception):
+ pass
+
+def to_seconds_from_epoch(timestamp, pattern):
+ try:
+ seconds = int(time.mktime(time.strptime(timestamp, pattern)))
+ return seconds
+ except ValueError:
+ logging.error('{} timestamp is not of the format {}'.format(timestamp, pattern))
+
+class ExtractTimestampProcessor(NexusTileProcessor):
+
+ def __init__(self, timestamp_name, timestamp_pattern, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ self.timestamp_name = timestamp_name
+ self.timestamp_pattern = timestamp_pattern
+
+ def process_nexus_tile(self, nexus_tile):
+ output_tile = nexusproto.DataTile_pb2.NexusTile()
+ output_tile.CopyFrom(nexus_tile)
+
+ file_path = output_tile.summary.granule
+ file_path = file_path[len('file:'):] if file_path.startswith('file:') else file_path
+
+ tile_type = nexus_tile.tile.WhichOneof("tile_type")
+
+ with Dataset(file_path) as ds:
+ timestamp = getattr(ds,self.timestamp_name)
+ seconds = to_seconds_from_epoch(timestamp, self.timestamp_pattern)
+
+ if tile_type == "grid_tile":
+ nexus_tile.tile.grid_tile.time = seconds
+ else:
+ raise BadTimestampExtractionException("Unsupported tile type: {}".format(tile_type))
+
+ yield nexus_tile
diff --git a/tests/datafiles/not_empty_gpm.HDF5 b/tests/datafiles/not_empty_gpm.HDF5
new file mode 100644
index 0000000..01397a7
Binary files /dev/null and b/tests/datafiles/not_empty_gpm.HDF5 differ
diff --git a/tests/datafiles/not_empty_modis.nc b/tests/datafiles/not_empty_modis.nc
new file mode 100644
index 0000000..11bc775
Binary files /dev/null and b/tests/datafiles/not_empty_modis.nc differ
diff --git a/tests/extracttimestamp_test.py b/tests/extracttimestamp_test.py
new file mode 100644
index 0000000..e2800ab
--- /dev/null
+++ b/tests/extracttimestamp_test.py
@@ -0,0 +1,72 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+from os import path
+import logging
+
+import sdap.processors
+from nexusproto import DataTile_pb2 as nexusproto
+from sdap.processors.extracttimestampprocessor import BadTimestampExtractionException
+
+
+class TestExtractTimestamp(unittest.TestCase):
+ def setUp(self):
+ self.module = sdap.processors.ExtractTimestampProcessor('time_coverage_start', '%Y-%m-%dT%H:%M:%S.000Z')
+
+ def test_extract_timestamp_from_metadata(self):
+ test_file = path.join(path.dirname(__file__), 'datafiles', 'not_empty_modis.nc')
+
+ input_tile = nexusproto.NexusTile()
+ tile_summary = nexusproto.TileSummary()
+ tile_summary.granule = "file:%s" % test_file
+ tile_summary.section_spec = "time:0:1,lat:0:10,lon:0:10"
+ input_tile.summary.CopyFrom(tile_summary)
+
+ input_tile.tile.grid_tile.CopyFrom(nexusproto.GridTile())
+ results = list(self.module.process_nexus_tile(input_tile))
+ nexus_tile_after = results[0]
+
+ self.assertEqual(1537428301, nexus_tile_after.tile.grid_tile.time)
+
+ def test_extract_timestamp_swath_exception(self):
+ test_file = path.join(path.dirname(__file__), 'datafiles', 'not_empty_modis.nc')
+
+ input_tile = nexusproto.NexusTile()
+ tile_summary = nexusproto.TileSummary()
+ tile_summary.granule = "file:%s" % test_file
+ tile_summary.section_spec = "time:0:1,lat:0:10,lon:0:10"
+ input_tile.summary.CopyFrom(tile_summary)
+
+ input_tile.tile.swath_tile.CopyFrom(nexusproto.SwathTile())
+
+ with self.assertRaises(BadTimestampExtractionException):
+ list(self.module.process_nexus_tile(input_tile))[0].tile.swath_tile.time
+
+ def test_extract_timestamp_timeseries_exception(self):
+ test_file = path.join(path.dirname(__file__), 'datafiles', 'not_empty_modis.nc')
+
+ input_tile = nexusproto.NexusTile()
+ tile_summary = nexusproto.TileSummary()
+ tile_summary.granule = "file:%s" % test_file
+ tile_summary.section_spec = "time:0:1,lat:0:10,lon:0:10"
+ input_tile.summary.CopyFrom(tile_summary)
+
+ input_tile.tile.time_series_tile.CopyFrom(nexusproto.TimeSeriesTile())
+
+ with self.assertRaises(BadTimestampExtractionException):
+ list(self.module.process_nexus_tile(input_tile))[0].tile.time_series_tile.time
+
+if __name__ == '__main__':
+ unittest.main()
\ No newline at end of file