You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by fg...@apache.org on 2019/01/23 17:56:51 UTC

[incubator-sdap-ningesterpy] 28/29: SDAP-155 add processor to extract timestamp from granule metadata (#10)

This is an automated email from the ASF dual-hosted git repository.

fgreg pushed a commit to branch v1.0.0-rc1
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-ningesterpy.git

commit b26a22c75b288a524745787f1c8db02a377abc2e
Author: echyam <ec...@users.noreply.github.com>
AuthorDate: Tue Oct 23 10:31:28 2018 -0700

    SDAP-155 add processor to extract timestamp from granule metadata (#10)
    
    * SDAP-155 add processor to extract timestamp from granule metadata
    
    * add ExtractTimestampProcessor to INSTALLED_PROCESSORS for ProcessorChain
    
    * remove unused vars and check for tile_type
    
    * take timestamp pattern as parameter, use param in error log
    
    * give tile information when call timestamp processor on wrong tiletype
    
    Co-Authored-By: echyam <ec...@gmail.com>
---
 .idea/inspectionProfiles/Project_Default.xml |  21 ++++++++
 .idea/misc.xml                               |   2 +-
 .idea/ningesterpy.iml                        |   5 +-
 sdap/processors/__init__.py                  |   5 +-
 sdap/processors/extracttimestampprocessor.py |  66 ++++++++++++++++++++++++
 tests/datafiles/not_empty_gpm.HDF5           | Bin 0 -> 5302102 bytes
 tests/datafiles/not_empty_modis.nc           | Bin 0 -> 10224432 bytes
 tests/extracttimestamp_test.py               |  72 +++++++++++++++++++++++++++
 8 files changed, 165 insertions(+), 6 deletions(-)

diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
new file mode 100644
index 0000000..bdf9b07
--- /dev/null
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,21 @@
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredPackages">
+        <value>
+          <list size="8">
+            <item index="0" class="java.lang.String" itemvalue="flask-accept" />
+            <item index="1" class="java.lang.String" itemvalue="protobuf" />
+            <item index="2" class="java.lang.String" itemvalue="werkzeug" />
+            <item index="3" class="java.lang.String" itemvalue="six" />
+            <item index="4" class="java.lang.String" itemvalue="PyYAML" />
+            <item index="5" class="java.lang.String" itemvalue="nexusproto" />
+            <item index="6" class="java.lang.String" itemvalue="pytz" />
+            <item index="7" class="java.lang.String" itemvalue="flask" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 0218159..3c94e77 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,4 +1,4 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5.4 (~/anaconda/envs/ningesterpy/bin/python)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6.6 (~/anaconda3/envs/sdap-ningesterpy/bin/python)" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/.idea/ningesterpy.iml b/.idea/ningesterpy.iml
index 1f079f2..30a41bc 100644
--- a/.idea/ningesterpy.iml
+++ b/.idea/ningesterpy.iml
@@ -4,12 +4,9 @@
     <content url="file://$MODULE_DIR$">
       <sourceFolder url="file://$MODULE_DIR$" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.5.4 (~/anaconda/envs/ningesterpy/bin/python)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.6.6 (~/anaconda3/envs/sdap-ningesterpy/bin/python)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
-  <component name="PyDocumentationSettings">
-    <option name="renderExternalDocumentation" value="true" />
-  </component>
   <component name="TemplatesService">
     <option name="TEMPLATE_FOLDERS">
       <list>
diff --git a/sdap/processors/__init__.py b/sdap/processors/__init__.py
index 6d4a679..2cb2f73 100644
--- a/sdap/processors/__init__.py
+++ b/sdap/processors/__init__.py
@@ -62,6 +62,7 @@ from sdap.processors.subtract180longitude import Subtract180Longitude
 from sdap.processors.tilereadingprocessor import GridReadingProcessor, SwathReadingProcessor, TimeSeriesReadingProcessor
 from sdap.processors.tilesummarizingprocessor import TileSummarizingProcessor
 from sdap.processors.winddirspeedtouv import WindDirSpeedToUV
+from sdap.processors.extracttimestampprocessor import ExtractTimestampProcessor
 
 INSTALLED_PROCESSORS = {
     "CallNcpdq": CallNcpdq,
@@ -78,5 +79,7 @@ INSTALLED_PROCESSORS = {
     "SwathReadingProcessor": SwathReadingProcessor,
     "TimeSeriesReadingProcessor": TimeSeriesReadingProcessor,
     "TileSummarizingProcessor": TileSummarizingProcessor,
-    "WindDirSpeedToUV": WindDirSpeedToUV
+    "WindDirSpeedToUV": WindDirSpeedToUV,
+    "ExtractTimestampProcessor": ExtractTimestampProcessor
 }
+
diff --git a/sdap/processors/extracttimestampprocessor.py b/sdap/processors/extracttimestampprocessor.py
new file mode 100644
index 0000000..98b4147
--- /dev/null
+++ b/sdap/processors/extracttimestampprocessor.py
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import nexusproto
+from nexusproto.serialization import from_shaped_array
+
+import datetime
+import time
+import logging
+from netCDF4 import Dataset, num2date
+from pytz import timezone
+
+from sdap.processors import NexusTileProcessor
+
+EPOCH = timezone('UTC').localize(datetime.datetime(1970, 1, 1))
+
+
+class BadTimestampExtractionException(Exception):
+    pass
+
+def to_seconds_from_epoch(timestamp, pattern):
+    try:
+        seconds = int(time.mktime(time.strptime(timestamp, pattern)))
+        return seconds
+    except ValueError:
+        logging.error('{} timestamp is not of the format {}'.format(timestamp, pattern))
+
+class ExtractTimestampProcessor(NexusTileProcessor):
+
+    def __init__(self, timestamp_name, timestamp_pattern, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        self.timestamp_name = timestamp_name
+        self.timestamp_pattern = timestamp_pattern
+
+    def process_nexus_tile(self, nexus_tile):
+        output_tile = nexusproto.DataTile_pb2.NexusTile()
+        output_tile.CopyFrom(nexus_tile)
+
+        file_path = output_tile.summary.granule
+        file_path = file_path[len('file:'):] if file_path.startswith('file:') else file_path
+
+        tile_type = nexus_tile.tile.WhichOneof("tile_type")
+
+        with Dataset(file_path) as ds:
+            timestamp = getattr(ds,self.timestamp_name)
+            seconds = to_seconds_from_epoch(timestamp, self.timestamp_pattern)
+
+            if tile_type == "grid_tile":
+                nexus_tile.tile.grid_tile.time = seconds
+            else:
+                raise BadTimestampExtractionException("Unsupported tile type: {}".format(tile_type))
+
+        yield nexus_tile
diff --git a/tests/datafiles/not_empty_gpm.HDF5 b/tests/datafiles/not_empty_gpm.HDF5
new file mode 100644
index 0000000..01397a7
Binary files /dev/null and b/tests/datafiles/not_empty_gpm.HDF5 differ
diff --git a/tests/datafiles/not_empty_modis.nc b/tests/datafiles/not_empty_modis.nc
new file mode 100644
index 0000000..11bc775
Binary files /dev/null and b/tests/datafiles/not_empty_modis.nc differ
diff --git a/tests/extracttimestamp_test.py b/tests/extracttimestamp_test.py
new file mode 100644
index 0000000..e2800ab
--- /dev/null
+++ b/tests/extracttimestamp_test.py
@@ -0,0 +1,72 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+from os import path
+import logging
+
+import sdap.processors
+from nexusproto import DataTile_pb2 as nexusproto
+from sdap.processors.extracttimestampprocessor import BadTimestampExtractionException
+
+
+class TestExtractTimestamp(unittest.TestCase):
+    def setUp(self):
+        self.module = sdap.processors.ExtractTimestampProcessor('time_coverage_start', '%Y-%m-%dT%H:%M:%S.000Z')
+
+    def test_extract_timestamp_from_metadata(self):
+        test_file = path.join(path.dirname(__file__), 'datafiles', 'not_empty_modis.nc')
+
+        input_tile = nexusproto.NexusTile()
+        tile_summary = nexusproto.TileSummary()
+        tile_summary.granule = "file:%s" % test_file
+        tile_summary.section_spec = "time:0:1,lat:0:10,lon:0:10"
+        input_tile.summary.CopyFrom(tile_summary)
+
+        input_tile.tile.grid_tile.CopyFrom(nexusproto.GridTile())
+        results = list(self.module.process_nexus_tile(input_tile))
+        nexus_tile_after = results[0]
+
+        self.assertEqual(1537428301, nexus_tile_after.tile.grid_tile.time)
+
+    def test_extract_timestamp_swath_exception(self):
+        test_file = path.join(path.dirname(__file__), 'datafiles', 'not_empty_modis.nc')
+
+        input_tile = nexusproto.NexusTile()
+        tile_summary = nexusproto.TileSummary()
+        tile_summary.granule = "file:%s" % test_file
+        tile_summary.section_spec = "time:0:1,lat:0:10,lon:0:10"
+        input_tile.summary.CopyFrom(tile_summary)
+
+        input_tile.tile.swath_tile.CopyFrom(nexusproto.SwathTile())
+
+        with self.assertRaises(BadTimestampExtractionException):
+            list(self.module.process_nexus_tile(input_tile))[0].tile.swath_tile.time
+
+    def test_extract_timestamp_timeseries_exception(self):
+        test_file = path.join(path.dirname(__file__), 'datafiles', 'not_empty_modis.nc')
+
+        input_tile = nexusproto.NexusTile()
+        tile_summary = nexusproto.TileSummary()
+        tile_summary.granule = "file:%s" % test_file
+        tile_summary.section_spec = "time:0:1,lat:0:10,lon:0:10"
+        input_tile.summary.CopyFrom(tile_summary)
+
+        input_tile.tile.time_series_tile.CopyFrom(nexusproto.TimeSeriesTile())
+
+        with self.assertRaises(BadTimestampExtractionException):
+            list(self.module.process_nexus_tile(input_tile))[0].tile.time_series_tile.time
+
+if __name__ == '__main__':
+    unittest.main()
\ No newline at end of file