You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by ea...@apache.org on 2020/08/04 02:18:45 UTC
[incubator-sdap-ingester] 01/01: SDAP-270: Fix bad solr endpoints
in solr history manager (#8)
This is an automated email from the ASF dual-hosted git repository.
eamonford pushed a commit to branch fix_syntax_error
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-ingester.git
commit 53e22c62c6a7175dbdf5c20f615f090c919b540c
Author: Eamon Ford <ea...@gmail.com>
AuthorDate: Mon Aug 3 19:06:18 2020 -0700
SDAP-270: Fix bad solr endpoints in solr history manager (#8)
Co-authored-by: Eamon Ford <ea...@jpl.nasa.gov>
---
.../history_manager/SolrIngestionHistory.py | 40 ++++++++++------------
1 file changed, 18 insertions(+), 22 deletions(-)
diff --git a/collection_manager/collection_manager/services/history_manager/SolrIngestionHistory.py b/collection_manager/collection_manager/services/history_manager/SolrIngestionHistory.py
index 1ae7156..79d6eef 100644
--- a/collection_manager/collection_manager/services/history_manager/SolrIngestionHistory.py
+++ b/collection_manager/collection_manager/services/history_manager/SolrIngestionHistory.py
@@ -33,10 +33,10 @@ class SolrIngestionHistory(IngestionHistory):
def __init__(self, solr_url: str, dataset_id: str, signature_fun=None):
try:
- self._solr_url = solr_url
+ self._url_prefix = f"{solr_url.strip('/')}/solr"
self._create_collection_if_needed()
- self._solr_granules = pysolr.Solr('/'.join([solr_url.strip('/'), self._granule_collection_name]))
- self._solr_datasets = pysolr.Solr('/'.join([solr_url.strip('/'), self._dataset_collection_name]))
+ self._solr_granules = pysolr.Solr(f"{self._url_prefix}/{self._granule_collection_name}")
+ self._solr_datasets = pysolr.Solr(f"{self._url_prefix}/{self._dataset_collection_name}")
self._dataset_id = dataset_id
self._signature_fun = md5sum_from_filepath if signature_fun is None else signature_fun
self._latest_ingested_file_update = self._get_latest_file_update()
@@ -63,7 +63,7 @@ class SolrIngestionHistory(IngestionHistory):
self._solr_datasets.add([{
'id': self._dataset_id,
'dataset_s': self._dataset_id,
- 'latest_update_l': self._latest_ingested_file_update}])
+ 'latest_update_l': int(self._latest_ingested_file_update)}])
self._solr_datasets.commit()
def _get_latest_file_update(self):
@@ -87,8 +87,8 @@ class SolrIngestionHistory(IngestionHistory):
self._req_session = requests.session()
payload = {'action': 'CLUSTERSTATUS'}
- result = self._req_session.get('/'.join([self._solr_url.strip('/'), 'admin', 'collections']),
- params=payload)
+ collections_endpoint = f"{self._url_prefix}/admin/collections"
+ result = self._req_session.get(collections_endpoint, params=payload)
response = result.json()
node_number = len(response['cluster']['live_nodes'])
@@ -100,17 +100,15 @@ class SolrIngestionHistory(IngestionHistory):
'name': self._granule_collection_name,
'numShards': node_number
}
- result = self._req_session.get('/'.join([self._solr_url.strip("/"), 'admin', 'collections']),
- params=payload)
+ result = self._req_session.get(collections_endpoint, params=payload)
response = result.json()
logger.info(f"solr collection created {response}")
+
# Update schema
- schema_url = '/'.join([self._solr_url.strip('/'), self._granule_collection_name, 'schema'])
- # granule_s # dataset_s so that all the granule of a dataset are less likely to be on the same shard
- # self.add_unique_key_field(schema_url, "uniqueKey_s", "StrField")
- self._add_field(schema_url, "dataset_s", "StrField")
- self._add_field(schema_url, "granule_s", "StrField")
- self._add_field(schema_url, "granule_signature_s", "StrField")
+ schema_endpoint = f"{self._url_prefix}/{self._granule_collection_name}/schema"
+ self._add_field(schema_endpoint, "dataset_s", "string")
+ self._add_field(schema_endpoint, "granule_s", "string")
+ self._add_field(schema_endpoint, "granule_signature_s", "string")
else:
logger.info(f"collection {self._granule_collection_name} already exists")
@@ -121,16 +119,14 @@ class SolrIngestionHistory(IngestionHistory):
'name': self._dataset_collection_name,
'numShards': node_number
}
- result = self._req_session.get('/'.join([self._solr_url.strip('/'), 'admin', 'collections']),
- params=payload)
+ result = self._req_session.get(collections_endpoint, params=payload)
response = result.json()
logger.info(f"solr collection created {response}")
+
# Update schema
- # http://localhost:8983/solr/nexusdatasets/schema?_=1588555874864&wt=json
- schema_url = '/'.join([self._solr_url.strip('/'), self._dataset_collection_name, 'schema'])
- # self.add_unique_key_field(schema_url, "uniqueKey_s", "StrField")
- self._add_field(schema_url, "dataset_s", "StrField")
- self._add_field(schema_url, "latest_update_l", "TrieLongField")
+ schema_endpoint = f"{self._url_prefix}/{self._dataset_collection_name}/schema"
+ self._add_field(schema_endpoint, "dataset_s", "string")
+ self._add_field(schema_endpoint, "latest_update_l", "TrieLongField")
else:
logger.info(f"collection {self._dataset_collection_name} already exists")
@@ -154,7 +150,7 @@ class SolrIngestionHistory(IngestionHistory):
"stored": False
}
}
- result = self._req_session.post(schema_url, data=add_field_payload.__str__())
+ return self._req_session.post(schema_url, data=str(add_field_payload).encode('utf-8'))
class DatasetIngestionHistorySolrException(Exception):