You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by ea...@apache.org on 2020/08/04 02:18:44 UTC

[incubator-sdap-ingester] branch fix_syntax_error created (now 53e22c6)

This is an automated email from the ASF dual-hosted git repository.

eamonford pushed a change to branch fix_syntax_error
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-ingester.git.


      at 53e22c6  SDAP-270: Fix bad solr endpoints in solr history manager (#8)

This branch includes the following new commits:

     new 53e22c6  SDAP-270: Fix bad solr endpoints in solr history manager (#8)

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[incubator-sdap-ingester] 01/01: SDAP-270: Fix bad solr endpoints in solr history manager (#8)

Posted by ea...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

eamonford pushed a commit to branch fix_syntax_error
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-ingester.git

commit 53e22c62c6a7175dbdf5c20f615f090c919b540c
Author: Eamon Ford <ea...@gmail.com>
AuthorDate: Mon Aug 3 19:06:18 2020 -0700

    SDAP-270: Fix bad solr endpoints in solr history manager (#8)
    
    Co-authored-by: Eamon Ford <ea...@jpl.nasa.gov>
---
 .../history_manager/SolrIngestionHistory.py        | 40 ++++++++++------------
 1 file changed, 18 insertions(+), 22 deletions(-)

diff --git a/collection_manager/collection_manager/services/history_manager/SolrIngestionHistory.py b/collection_manager/collection_manager/services/history_manager/SolrIngestionHistory.py
index 1ae7156..79d6eef 100644
--- a/collection_manager/collection_manager/services/history_manager/SolrIngestionHistory.py
+++ b/collection_manager/collection_manager/services/history_manager/SolrIngestionHistory.py
@@ -33,10 +33,10 @@ class SolrIngestionHistory(IngestionHistory):
 
     def __init__(self, solr_url: str, dataset_id: str, signature_fun=None):
         try:
-            self._solr_url = solr_url
+            self._url_prefix = f"{solr_url.strip('/')}/solr"
             self._create_collection_if_needed()
-            self._solr_granules = pysolr.Solr('/'.join([solr_url.strip('/'), self._granule_collection_name]))
-            self._solr_datasets = pysolr.Solr('/'.join([solr_url.strip('/'), self._dataset_collection_name]))
+            self._solr_granules = pysolr.Solr(f"{self._url_prefix}/{self._granule_collection_name}")
+            self._solr_datasets = pysolr.Solr(f"{self._url_prefix}/{self._dataset_collection_name}")
             self._dataset_id = dataset_id
             self._signature_fun = md5sum_from_filepath if signature_fun is None else signature_fun
             self._latest_ingested_file_update = self._get_latest_file_update()
@@ -63,7 +63,7 @@ class SolrIngestionHistory(IngestionHistory):
             self._solr_datasets.add([{
                 'id': self._dataset_id,
                 'dataset_s': self._dataset_id,
-                'latest_update_l': self._latest_ingested_file_update}])
+                'latest_update_l': int(self._latest_ingested_file_update)}])
             self._solr_datasets.commit()
 
     def _get_latest_file_update(self):
@@ -87,8 +87,8 @@ class SolrIngestionHistory(IngestionHistory):
                 self._req_session = requests.session()
 
             payload = {'action': 'CLUSTERSTATUS'}
-            result = self._req_session.get('/'.join([self._solr_url.strip('/'), 'admin', 'collections']),
-                                           params=payload)
+            collections_endpoint = f"{self._url_prefix}/admin/collections"
+            result = self._req_session.get(collections_endpoint, params=payload)
             response = result.json()
             node_number = len(response['cluster']['live_nodes'])
 
@@ -100,17 +100,15 @@ class SolrIngestionHistory(IngestionHistory):
                            'name': self._granule_collection_name,
                            'numShards': node_number
                            }
-                result = self._req_session.get('/'.join([self._solr_url.strip("/"), 'admin', 'collections']),
-                                               params=payload)
+                result = self._req_session.get(collections_endpoint, params=payload)
                 response = result.json()
                 logger.info(f"solr collection created {response}")
+
                 # Update schema
-                schema_url = '/'.join([self._solr_url.strip('/'), self._granule_collection_name, 'schema'])
-                # granule_s # dataset_s so that all the granule of a dataset are less likely to be on the same shard
-                # self.add_unique_key_field(schema_url, "uniqueKey_s", "StrField")
-                self._add_field(schema_url, "dataset_s", "StrField")
-                self._add_field(schema_url, "granule_s", "StrField")
-                self._add_field(schema_url, "granule_signature_s", "StrField")
+                schema_endpoint = f"{self._url_prefix}/{self._granule_collection_name}/schema"
+                self._add_field(schema_endpoint, "dataset_s", "string")
+                self._add_field(schema_endpoint, "granule_s", "string")
+                self._add_field(schema_endpoint, "granule_signature_s", "string")
 
             else:
                 logger.info(f"collection {self._granule_collection_name} already exists")
@@ -121,16 +119,14 @@ class SolrIngestionHistory(IngestionHistory):
                            'name': self._dataset_collection_name,
                            'numShards': node_number
                            }
-                result = self._req_session.get('/'.join([self._solr_url.strip('/'), 'admin', 'collections']),
-                                               params=payload)
+                result = self._req_session.get(collections_endpoint, params=payload)
                 response = result.json()
                 logger.info(f"solr collection created {response}")
+
                 # Update schema
-                # http://localhost:8983/solr/nexusdatasets/schema?_=1588555874864&wt=json
-                schema_url = '/'.join([self._solr_url.strip('/'), self._dataset_collection_name, 'schema'])
-                # self.add_unique_key_field(schema_url, "uniqueKey_s", "StrField")
-                self._add_field(schema_url, "dataset_s", "StrField")
-                self._add_field(schema_url, "latest_update_l", "TrieLongField")
+                schema_endpoint = f"{self._url_prefix}/{self._dataset_collection_name}/schema"
+                self._add_field(schema_endpoint, "dataset_s", "string")
+                self._add_field(schema_endpoint, "latest_update_l", "TrieLongField")
 
             else:
                 logger.info(f"collection {self._dataset_collection_name} already exists")
@@ -154,7 +150,7 @@ class SolrIngestionHistory(IngestionHistory):
                 "stored": False
             }
         }
-        result = self._req_session.post(schema_url, data=add_field_payload.__str__())
+        return self._req_session.post(schema_url, data=str(add_field_payload).encode('utf-8'))
 
 
 class DatasetIngestionHistorySolrException(Exception):