You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by rk...@apache.org on 2023/03/13 17:49:35 UTC
[incubator-sdap-in-situ-data-services] branch SDAP-445-asf-compliance updated (7664731 -> c59d8db)
This is an automated email from the ASF dual-hosted git repository.
rkk pushed a change to branch SDAP-445-asf-compliance
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-in-situ-data-services.git
from 7664731 /version 0.3.0
add aba83f0 feat: add CLI script
add 4f72eb3 chore: update readme
add 8d78b1b chore: add changelog
add 0c1c4d1 fix: added `meta` column as defaul column
add 7221ffe fix: add default column in correct place
add c2a6d2a chore: update changelog + swagger
add c97b9b2 chore: merged from apache master
add 65c4651 chore: merge from apache master
add bc2e14a chore: switch to SDAP ticket
add 57ed8dc Merge branch 'master' of github.com:wphyojpl/incubator-sdap-in-situ-data-services
add 786d92a Merge branch 'master' of github.com:apache/incubator-sdap-in-situ-data-services
add 982d498 feat: add ci/cd to build lambda zip file
add 3d0fe1c breaking:Elasticsearch Logic (#1)
add 576b1c1 fix: get lambda function working
add 8d374b0 fix: throw runtime err when ES ingest fail
add f2095f3 chore: add lambda logger
add e304425 feat: use cdms_schema json to create spark struct obj
add 9824c6e chore: merge from master
add c45898c chore: use insitu schema json to get column names
add c001092 feat: add observation type counts
add f33bf06 fix: add missig param in calling stats retriever
add eeff2b9 fix: add observation agg in query
add 6398704 fix: get parquet stat to ES working for SQS multiple records input
add 69c4e39 fix: not throwing error when deleting items that do not exist in ES
add 9e30423 fix: allow config w/o checking mandatory variables
add 0fbb6a0 feat: download small parquet file to extract stat locally
add 07f6000 fix: use singleton to re-use session to reduce time
add e31e246 fix: return NULL if query not found in ES + enhance statistics endpoint to include depth, time and bbox
add c77c943 fix: use unquote_plus to replace `+` to ` ` for s3 url
add acc410f feat: not waiting for ingest to finish
add 9800a38 fix: ci/cd for lambda-docker
add fbd7f20 fix: get lambda with ECS working with pyspark
add f7d28d6 fix: add missing files
add 040776c fix: update makefile
add 3adf08a fix: use pandas to avoid int + float in source data
add 1d83a52 fix:s3 ingestion lambda works as a zip now
add 09f82be feat: make 30x30 tiles + different log level for spark vs cdms flask
add 4d8a2e8 fix: query-missing-depth = float, log statement when deleting ES, ingest w/o pandas + mandatory depth conversion to float
add 8b046e0 fix: UnsupportedOperationException:org.apache.parquet.column.values.dictionary.PlainValuesDictionary$PlainLongDictionary workaround
add 858e23a fix: wind from & to is in long which screws with the schema
add feaf344 fix: make unique spark-app-name
add cefb0f0 feat: use alias instead of real index
add f3d7466 fix: update test constants to use alias
add 2daba45 feat: Configurable partitioning (#3)
add 69da174 Merge branch 'es.branch' of github.com:wphyojpl/incubator-sdap-in-situ-data-services into es.branch
add 5c70764 fix: config is in string form. not in int form
add e03a862 fix: config is in string form. not in int form
add c8779b1 feat: add elasticsearch index for ddb table
add 7eab73f chore: add missing test file
add 140d017 feat: add ES based metadata table
add 32de57e feat: using ES for metadata
add 8c6b786 fix: need to pass empty str, not None
add 1209603 fix: parallel validator bug
add c59d8db Merge pull request #12 from wphyojpl/es.branch
No new revisions were added by this update.
Summary of changes:
.gitignore | 4 +-
ci.cd/Makefile | 41 +
ci.cd/create_s3_zip.sh | 27 +
ci.cd/lambda_docker_upload.sh | 6 +
ci.cd/local_upload.sh | 8 +
docker/parquet.lambda.Dockerfile | 43 +
documentations/navair.demo.md | 106 ++
etc/elasticsearch/all_alias.json | 7 +
etc/elasticsearch/entry_file_records.json | 20 +
etc/elasticsearch/parquet_stats_v1.json | 64 +
etc/elasticsearch/setup_es.txt | 15 +
etc/lambda-spark/spark-class | 8 +
etc/lambda-spark/spark-defaults.conf | 4 +
k8s_spark/k8s_spark/org.alues.yaml | 731 ++++++++
k8s_spark/nohup.out | 4 +
.../parquet.spark.helm/charts/spark-5.9.4.tgz | Bin 0 -> 36223 bytes
k8s_spark/parquet.spark.helm/nohup.out | 1973 ++++++++++++++++++++
.../parquet.spark.helm/templates/deployment.yaml | 4 +
k8s_spark/parquet.spark.helm/values.yaml | 10 +-
nohup.out | 4 +
one_offs/local_flask.py | 16 +
one_offs/py_geo_hash_test.py | 12 +
one_offs/trigger.s3.ingest.py | 43 +
parquet_flask/__init__.py | 5 +-
parquet_flask/__main__.py | 9 +-
parquet_flask/aws/aws_cred.py | 23 +-
parquet_flask/aws/es_abstract.py | 55 +
parquet_flask/aws/es_factory.py | 16 +
parquet_flask/aws/es_middleware.py | 202 ++
parquet_flask/aws/es_middleware_aws.py | 30 +
.../cdms_lambda_func/cdms_lambda_constants.py | 8 +
.../cdms_lambda_func/index_to_es}/__init__.py | 0
.../cdms_lambda_func/index_to_es/execute_lambda.py | 18 +
.../index_to_es/parquet_file_es_indexer.py | 85 +
.../index_to_es/parquet_stat_extractor.py | 35 +
.../index_to_es/s3_stat_extractor.py | 202 ++
.../ingest_s3_to_cdms/ingest_s3_to_cdms.py | 66 +-
parquet_flask/cdms_lambda_func/lambda_func_env.py | 3 +
.../cdms_lambda_func/lambda_logger_generator.py | 30 +
.../cdms_lambda_func/s3_records}/__init__.py | 0
.../cdms_lambda_func/s3_records/s3_2_sqs.py | 165 ++
.../s3_records/s3_event_validator_abstract.py | 19 +
parquet_flask/io_logic/cdms_constants.py | 21 +-
parquet_flask/io_logic/cdms_schema.py | 89 +
parquet_flask/io_logic/ingest_new_file.py | 92 +-
.../{metadata_tbl_io.py => metadata_tbl_es.py} | 44 +-
parquet_flask/io_logic/metadata_tbl_interface.py | 4 +
parquet_flask/io_logic/metadata_tbl_io.py | 4 +
.../io_logic/parquet_paths_es_retriever.py | 114 ++
.../parquet_query_condition_management_v3.py | 19 +-
...py => parquet_query_condition_management_v4.py} | 79 +-
parquet_flask/io_logic/partitioned_parquet_path.py | 48 +
parquet_flask/io_logic/query_v4.py | 51 +-
parquet_flask/io_logic/raw_query.py | 2 +-
parquet_flask/io_logic/replace_file.py | 2 +-
.../io_logic/sub_collection_statistics.py | 290 +++
.../parquet_stat_extractor}/__init__.py | 0
.../parquet_stat_extractor/local_spark_session.py | 16 +
.../local_statistics_retriever.py | 34 +
.../parquet_stat_extractor/statistics_retriever.py | 206 ++
.../statistics_retriever_wrapper.py | 39 +
parquet_flask/utils/config.py | 16 +-
parquet_flask/utils/factory_abstract.py | 7 +
parquet_flask/utils/general_utils.py | 8 +
parquet_flask/utils/parallel_json_validator.py | 15 +-
parquet_flask/utils/spatial_utils.py | 30 +
parquet_flask/utils/time_utils.py | 12 +-
parquet_flask/v1/__init__.py | 10 +-
.../v1/extract_statistics_from_parquet_file.py | 47 +
parquet_flask/v1/ingest_aws_json.py | 17 +-
.../v1/insitu_query_swagger/insitu-spec-0.0.1.yml | 2 +-
.../v1/query_data_doms_custom_pagination.py | 18 +-
.../v1/sub_collection_statistics_endpoint.py | 80 +
rotate_keys.bash | 28 +
setup.py | 5 +-
setup.py => setup_lambda.py | 18 +-
tests/back_to_basis/Test1/._SUCCESS.crc | Bin 0 -> 8 bytes
.../back_to_basis/Test1/_SUCCESS | 0
{parquet_cli => tests/back_to_basis}/__init__.py | 0
tests/back_to_basis/local_spark.py | 54 +
tests/back_to_basis/s3_read.py | 28 +
tests/back_to_basis/s3_spark.py | 51 +
tests/bench_mark/bench_mark.py | 52 +-
tests/bench_mark/bench_parallel_process.py | 32 +-
tests/get_aws_creds.py | 16 +
.../parquet_flask/aws}/__init__.py | 0
.../aws/manual_test_es_middleware_aws.py | 31 +
.../parquet_flask/cdms_lambda_func}/__init__.py | 0
.../cdms_lambda_func/index_to_es}/__init__.py | 0
.../manual_test_parquet_file_es_indexer.py | 75 +
.../index_to_es/test_parquet_stat_extractor.py | 37 +
.../index_to_es/test_s3_stat_extractor.py | 44 +
.../cdms_lambda_func/s3_records}/__init__.py | 0
.../cdms_lambda_func/s3_records/test_s3_s2_sqs.py | 32 +
.../manual_test_parquet_paths_es_retriever.py | 33 +
tests/parquet_flask/io_logic/test_cdms_schema.py | 29 +
.../parquet_flask/io_logic/test_ingest_new_file.py | 20 +
.../parquet_flask/io_logic/test_metadata_tbl_es.py | 54 +
.../test_parquet_query_condition_management_v3.py | 147 +-
.../io_logic/test_partitioned_parquet_path.py | 53 +-
.../parquet_stat_extractor}/__init__.py | 0
.../parquet_stat_extractor/in_situ_schema.json | 0
...882-3536-435b-b736-96bf3be9ee29.c000.gz.parquet | Bin 0 -> 17393 bytes
.../test_local_statistics_retriever.py | 170 ++
.../test_statistics_retriever.py | 63 +
tests/parquet_flask/utils/test_general_utils.py | 7 +
tests/parquet_flask/utils/test_spatial_utils.py | 23 +
107 files changed, 6367 insertions(+), 272 deletions(-)
create mode 100644 ci.cd/Makefile
create mode 100755 ci.cd/create_s3_zip.sh
create mode 100644 ci.cd/lambda_docker_upload.sh
create mode 100755 ci.cd/local_upload.sh
create mode 100644 docker/parquet.lambda.Dockerfile
create mode 100644 documentations/navair.demo.md
create mode 100644 etc/elasticsearch/all_alias.json
create mode 100644 etc/elasticsearch/entry_file_records.json
create mode 100644 etc/elasticsearch/parquet_stats_v1.json
create mode 100644 etc/elasticsearch/setup_es.txt
create mode 100644 etc/lambda-spark/spark-class
create mode 100644 etc/lambda-spark/spark-defaults.conf
create mode 100644 k8s_spark/k8s_spark/org.alues.yaml
create mode 100644 k8s_spark/nohup.out
create mode 100644 k8s_spark/parquet.spark.helm/charts/spark-5.9.4.tgz
create mode 100644 k8s_spark/parquet.spark.helm/nohup.out
create mode 100644 nohup.out
create mode 100644 one_offs/local_flask.py
create mode 100644 one_offs/py_geo_hash_test.py
create mode 100644 one_offs/trigger.s3.ingest.py
create mode 100644 parquet_flask/aws/es_abstract.py
create mode 100644 parquet_flask/aws/es_factory.py
create mode 100644 parquet_flask/aws/es_middleware.py
create mode 100644 parquet_flask/aws/es_middleware_aws.py
create mode 100644 parquet_flask/cdms_lambda_func/cdms_lambda_constants.py
copy {parquet_cli => parquet_flask/cdms_lambda_func/index_to_es}/__init__.py (100%)
create mode 100644 parquet_flask/cdms_lambda_func/index_to_es/execute_lambda.py
create mode 100644 parquet_flask/cdms_lambda_func/index_to_es/parquet_file_es_indexer.py
create mode 100644 parquet_flask/cdms_lambda_func/index_to_es/parquet_stat_extractor.py
create mode 100644 parquet_flask/cdms_lambda_func/index_to_es/s3_stat_extractor.py
create mode 100644 parquet_flask/cdms_lambda_func/lambda_logger_generator.py
copy {parquet_cli => parquet_flask/cdms_lambda_func/s3_records}/__init__.py (100%)
create mode 100644 parquet_flask/cdms_lambda_func/s3_records/s3_2_sqs.py
create mode 100644 parquet_flask/cdms_lambda_func/s3_records/s3_event_validator_abstract.py
copy parquet_flask/io_logic/{metadata_tbl_io.py => metadata_tbl_es.py} (57%)
create mode 100644 parquet_flask/io_logic/parquet_paths_es_retriever.py
copy parquet_flask/io_logic/{parquet_query_condition_management_v3.py => parquet_query_condition_management_v4.py} (64%)
create mode 100644 parquet_flask/io_logic/sub_collection_statistics.py
copy {parquet_cli => parquet_flask/parquet_stat_extractor}/__init__.py (100%)
create mode 100644 parquet_flask/parquet_stat_extractor/local_spark_session.py
create mode 100644 parquet_flask/parquet_stat_extractor/local_statistics_retriever.py
create mode 100644 parquet_flask/parquet_stat_extractor/statistics_retriever.py
create mode 100644 parquet_flask/parquet_stat_extractor/statistics_retriever_wrapper.py
create mode 100644 parquet_flask/utils/factory_abstract.py
create mode 100644 parquet_flask/utils/spatial_utils.py
create mode 100644 parquet_flask/v1/extract_statistics_from_parquet_file.py
create mode 100644 parquet_flask/v1/sub_collection_statistics_endpoint.py
create mode 100755 rotate_keys.bash
copy setup.py => setup_lambda.py (65%)
create mode 100644 tests/back_to_basis/Test1/._SUCCESS.crc
copy parquet_cli/__init__.py => tests/back_to_basis/Test1/_SUCCESS (100%)
copy {parquet_cli => tests/back_to_basis}/__init__.py (100%)
create mode 100644 tests/back_to_basis/local_spark.py
create mode 100644 tests/back_to_basis/s3_read.py
create mode 100644 tests/back_to_basis/s3_spark.py
create mode 100644 tests/get_aws_creds.py
copy {parquet_cli => tests/parquet_flask/aws}/__init__.py (100%)
create mode 100644 tests/parquet_flask/aws/manual_test_es_middleware_aws.py
copy {parquet_cli => tests/parquet_flask/cdms_lambda_func}/__init__.py (100%)
copy {parquet_cli => tests/parquet_flask/cdms_lambda_func/index_to_es}/__init__.py (100%)
create mode 100644 tests/parquet_flask/cdms_lambda_func/index_to_es/manual_test_parquet_file_es_indexer.py
create mode 100644 tests/parquet_flask/cdms_lambda_func/index_to_es/test_parquet_stat_extractor.py
create mode 100644 tests/parquet_flask/cdms_lambda_func/index_to_es/test_s3_stat_extractor.py
copy {parquet_cli => tests/parquet_flask/cdms_lambda_func/s3_records}/__init__.py (100%)
create mode 100644 tests/parquet_flask/cdms_lambda_func/s3_records/test_s3_s2_sqs.py
create mode 100644 tests/parquet_flask/io_logic/manual_test_parquet_paths_es_retriever.py
create mode 100644 tests/parquet_flask/io_logic/test_cdms_schema.py
create mode 100644 tests/parquet_flask/io_logic/test_ingest_new_file.py
create mode 100644 tests/parquet_flask/io_logic/test_metadata_tbl_es.py
copy {parquet_cli => tests/parquet_flask/parquet_stat_extractor}/__init__.py (100%)
copy in_situ_schema.json => tests/parquet_flask/parquet_stat_extractor/in_situ_schema.json (100%)
create mode 100644 tests/parquet_flask/parquet_stat_extractor/part-00000-74ebb882-3536-435b-b736-96bf3be9ee29.c000.gz.parquet
create mode 100644 tests/parquet_flask/parquet_stat_extractor/test_local_statistics_retriever.py
create mode 100644 tests/parquet_flask/parquet_stat_extractor/test_statistics_retriever.py
create mode 100644 tests/parquet_flask/utils/test_spatial_utils.py