You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airavata.apache.org by ma...@apache.org on 2019/05/22 23:17:08 UTC
[airavata-django-portal] 01/04: AIRAVATA-3016 Factor out data
product handling from datastore
This is an automated email from the ASF dual-hosted git repository.
machristie pushed a commit to branch airavata-3016
in repository https://gitbox.apache.org/repos/asf/airavata-django-portal.git
commit d69d3d9b4691c2f45dc4e16a566e76c1c9550c45
Author: Marcus Christie <ma...@apache.org>
AuthorDate: Wed May 22 13:37:49 2019 -0400
AIRAVATA-3016 Factor out data product handling from datastore
---
django_airavata/apps/api/data_products_helper.py | 92 ++++++++---
django_airavata/apps/api/datastore.py | 202 +++++------------------
django_airavata/apps/api/serializers.py | 6 +-
django_airavata/apps/api/views.py | 40 +----
4 files changed, 126 insertions(+), 214 deletions(-)
diff --git a/django_airavata/apps/api/data_products_helper.py b/django_airavata/apps/api/data_products_helper.py
index 65b8024..008f3bb 100644
--- a/django_airavata/apps/api/data_products_helper.py
+++ b/django_airavata/apps/api/data_products_helper.py
@@ -1,4 +1,6 @@
+import logging
import os
+from urllib.parse import urlparse
from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist
@@ -13,28 +15,39 @@ from airavata.model.data.replica.ttypes import (
from . import datastore, models
+logger = logging.getLogger(__name__)
+
def save(request, path, file):
- # return data_product
- # TODO
- pass
+ "Save file in path in the user's storage."
+ username = request.user.username
+ full_path = datastore.save(username, path, file)
+ data_product = _save_data_product(request, full_path)
+ return data_product
def open(request, data_product):
- # return file object
- # TODO
- pass
+ "Return file object for replica if it exists in user storage."
+ path = _get_replica_filepath(data_product)
+ return datastore.open(request.user.username, path)
def exists(request, data_product):
- # return boolean
- # TODO
- pass
+ "Return True if replica for data_product exists in user storage."
+ path = _get_replica_filepath(data_product)
+ return datastore.exists(request.user.username, path)
def delete(request, data_product):
- # TODO
- pass
+ "Delete replica for data product in this data store."
+ path = _get_replica_filepath(data_product)
+ try:
+ datastore.delete(request.user.username, path)
+ _delete_data_product(request, path)
+ except Exception as e:
+ logger.exception("Unable to delete file {} for data product uri {}"
+ .format(path, data_product.productUri))
+ raise
def listdir(request, path):
@@ -58,6 +71,18 @@ def listdir(request, path):
raise ObjectDoesNotExist("User storage path does not exist")
+def get_experiment_dir(request,
+ project_name=None,
+ experiment_name=None,
+ path=None):
+ return datastore.get_experiment_dir(
+ request.user.username, project_name, experiment_name, path)
+
+
+def create_user_dir(request, path):
+ return datastore.create_user_dir(request.user.username, path)
+
+
def _get_data_product_uri(request, full_path):
user_file = models.User_Files.objects.filter(
@@ -65,17 +90,34 @@ def _get_data_product_uri(request, full_path):
if user_file.exists():
product_uri = user_file[0].file_dpu
else:
- data_product = _create_data_product(request.user.username, full_path)
- product_uri = request.airavata_client.registerDataProduct(
- request.authz_token, data_product)
- user_file_instance = models.User_Files(
- username=request.user.username,
- file_path=full_path,
- file_dpu=product_uri)
- user_file_instance.save()
+ data_product = _save_data_product(request, full_path)
+ product_uri = data_product.productUri
return product_uri
+def _save_data_product(request, full_path):
+ "Create, register and record in DB a data product for full_path."
+ data_product = _create_data_product(request.user.username, full_path)
+ product_uri = request.airavata_client.registerDataProduct(
+ request.authz_token, data_product)
+ data_product.productUri = product_uri
+ user_file_instance = models.User_Files(
+ username=request.user.username,
+ file_path=full_path,
+ file_dpu=product_uri)
+ user_file_instance.save()
+ return data_product
+
+
+def _delete_data_product(request, full_path):
+ # TODO: call API to delete data product from replica catalog when it is
+ # available (not currently implemented)
+ user_file = models.User_Files.objects.filter(
+ username=request.user.username, file_path=full_path)
+ if user_file.exists():
+ user_file.delete()
+
+
def _create_data_product(username, full_path):
data_product = DataProductModel()
data_product.gatewayId = settings.GATEWAY_ID
@@ -97,3 +139,15 @@ def _create_data_product(username, full_path):
full_path)
data_product.replicaLocations = [data_replica_location]
return data_product
+
+
+def _get_replica_filepath(data_product):
+ replica_filepaths = [rep.filePath
+ for rep in data_product.replicaLocations
+ if rep.replicaLocationCategory ==
+ ReplicaLocationCategory.GATEWAY_DATA_STORE]
+ replica_filepath = (replica_filepaths[0]
+ if len(replica_filepaths) > 0 else None)
+ if replica_filepath:
+ return urlparse(replica_filepath).path
+ return None
diff --git a/django_airavata/apps/api/datastore.py b/django_airavata/apps/api/datastore.py
index 2b5f9b5..fa625f0 100644
--- a/django_airavata/apps/api/datastore.py
+++ b/django_airavata/apps/api/datastore.py
@@ -1,166 +1,93 @@
import logging
import os
-from urllib.parse import urlparse
from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist, SuspiciousFileOperation
-from django.core.files.storage import FileSystemStorage
-
-from airavata.model.data.replica.ttypes import (
- DataProductModel,
- DataProductType,
- DataReplicaLocationModel,
- ReplicaLocationCategory,
- ReplicaPersistentType
-)
+from django.core.files.storage import FileSystemStorage, Storage
experiment_data_storage = FileSystemStorage(
location=settings.GATEWAY_DATA_STORE_DIR)
logger = logging.getLogger(__name__)
-# TODO: exists(username, path)
-def exists(data_product):
+def exists(username, path):
"""Check if replica for data product exists in this data store."""
- filepath = _get_replica_filepath(data_product)
try:
- return experiment_data_storage.exists(filepath) if filepath else False
+ return _user_data_storage(username).exists(path)
except SuspiciousFileOperation as e:
- logger.warning("Unable to find file at {} for data product uri {}"
- .format(filepath, data_product.productUri))
+ logger.warning("Invalid path for user {}: {}".format(username, str(e)))
return False
-# TODO: open(username, path)
-def open(data_product):
- """Open replica for data product if it exists in this data store."""
- if exists(data_product):
- filepath = _get_replica_filepath(data_product)
- return experiment_data_storage.open(filepath)
+def open(username, path):
+ """Open path for user if it exists in this data store."""
+ if exists(username, path):
+ return _user_data_storage(username).open(path)
else:
- raise ObjectDoesNotExist("Replica file does not exist")
+ raise ObjectDoesNotExist("File path does not exist: {}".format(path))
-def save_user(username, file):
- """Save file to username/project name/experiment_name in data store."""
- user_dir = os.path.join(
- experiment_data_storage.get_valid_name(username),
- 'MyFiles')
- # file.name may be full path, so get just the name of the file
- file_name = os.path.basename(file.name)
- file_path = os.path.join(
- user_dir,
- experiment_data_storage.get_valid_name(file_name))
- input_file_name = experiment_data_storage.save(file_path, file)
- input_file_fullpath = experiment_data_storage.path(input_file_name)
- # Create DataProductModel instance with DataReplicaLocationModel
- data_product = DataProductModel()
- data_product.gatewayId = settings.GATEWAY_ID
- data_product.ownerName = username
- data_product.productName = file_name
- data_product.dataProductType = DataProductType.FILE
- data_replica_location = DataReplicaLocationModel()
- data_replica_location.storageResourceId = \
- settings.GATEWAY_DATA_STORE_RESOURCE_ID
- data_replica_location.replicaName = \
- "{} gateway data store copy".format(file_name)
- data_replica_location.replicaLocationCategory = \
- ReplicaLocationCategory.GATEWAY_DATA_STORE
- data_replica_location.replicaPersistentType = \
- ReplicaPersistentType.TRANSIENT
- data_replica_location.filePath = \
- "file://{}:{}".format(settings.GATEWAY_DATA_STORE_HOSTNAME,
- input_file_fullpath)
- data_product.replicaLocations = [data_replica_location]
- return data_product
-
-
-# TODO: save(username, path, file)
-def save(username, project_name, experiment_name, file):
+def save(username, path, file):
"""Save file to username/project name/experiment_name in data store."""
- exp_dir = os.path.join(
- _user_dir_name(username),
- experiment_data_storage.get_valid_name(project_name),
- experiment_data_storage.get_valid_name(experiment_name))
# file.name may be full path, so get just the name of the file
file_name = os.path.basename(file.name)
+ user_data_storage = _user_data_storage(username)
file_path = os.path.join(
- exp_dir,
- experiment_data_storage.get_valid_name(file_name))
- input_file_name = experiment_data_storage.save(file_path, file)
- input_file_fullpath = experiment_data_storage.path(input_file_name)
- # Create DataProductModel instance with DataReplicaLocationModel
- data_product = _create_data_product(username, input_file_fullpath)
- return data_product
-
-
-def save_user_file(username, path, file):
- experiment_data_storage.save(os.path.join(
- _user_dir_name(username),
- experiment_data_storage.get_valid_name(path),
- experiment_data_storage.get_valid_name(file.name)
- ), file)
-
-
-def create_user_dir(username, path, dir_name):
- user_dir = os.path.join(
- _user_dir_name(username),
- path,
- experiment_data_storage.get_valid_name(dir_name))
- if not experiment_data_storage.exists(user_dir):
- os.mkdir(experiment_data_storage.path(user_dir))
- else:
- raise Exception(
- "Directory {} already exists at that path".format(dir_name))
+ path, user_data_storage.get_valid_name(file_name))
+ input_file_name = user_data_storage.save(file_path, file)
+ input_file_fullpath = user_data_storage.path(input_file_name)
+ return input_file_fullpath
-# TODO: copy(username, source_path, target_path)
-def copy(username, project_name, experiment_name, data_product):
- """Copy a data product into username/project_name/experiment_name dir."""
- f = open(data_product)
- return save(username, project_name, experiment_name, f)
+def create_user_dir(username, path):
+ user_data_storage = _user_data_storage(username)
+ user_dir = user_data_storage.get_valid_name(path)
+ if not user_data_storage.exists(user_dir):
+ os.mkdir(user_data_storage.path(user_dir))
+ else:
+ raise Exception(
+ "Directory {} already exists".format(path))
-# TODO: delete(username, path)
-def delete(data_product):
+def copy(username, source_path, target_path):
+ """Copy a user file into target_path dir."""
+ f = open(username, source_path)
+ return save(username, target_path, f)
- """Delete replica for data product in this data store."""
- if exists(data_product):
- filepath = _get_replica_filepath(data_product)
- try:
- experiment_data_storage.delete(filepath)
- except Exception as e:
- logger.error("Unable to delete file {} for data product uri {}"
- .format(filepath, data_product.productUri))
- raise
+def delete(username, path):
+ """Delete file in this data store."""
+ if exists(username, path):
+ user_data_storage = _user_data_storage(username)
+ user_data_storage.delete(path)
else:
- raise ObjectDoesNotExist("Replica file does not exist")
+ raise ObjectDoesNotExist("File path does not exist: {}".format(path))
+# TODO: update this to just return an available experiment directory name
def get_experiment_dir(
username,
project_name=None,
experiment_name=None,
path=None):
"""Return an experiment directory (full path) for the given experiment."""
+ user_experiment_data_storage = _user_data_storage(username)
if path is None:
experiment_dir_name = os.path.join(
- _user_dir_name(username),
- experiment_data_storage.get_valid_name(project_name),
- experiment_data_storage.get_valid_name(experiment_name))
- experiment_dir = experiment_data_storage.path(experiment_dir_name)
+ user_experiment_data_storage.get_valid_name(project_name),
+ user_experiment_data_storage.get_valid_name(experiment_name))
+ experiment_dir = user_experiment_data_storage.path(experiment_dir_name)
else:
# path can be relative to the user's storage space or absolute (as long
# as it is still inside the user's storage space)
user_experiment_data_storage = _user_data_storage(username)
experiment_dir = user_experiment_data_storage.path(path)
- if not experiment_data_storage.exists(experiment_dir):
+ if not user_experiment_data_storage.exists(experiment_dir):
os.makedirs(experiment_dir,
- mode=experiment_data_storage.directory_permissions_mode)
+ mode=user_experiment_data_storage.directory_permissions_mode)
# os.mkdir mode isn't always respected so need to chmod to be sure
os.chmod(experiment_dir,
- mode=experiment_data_storage.directory_permissions_mode)
+ mode=user_experiment_data_storage.directory_permissions_mode)
return experiment_dir
@@ -177,16 +104,6 @@ def user_file_exists(username, file_path):
return False
-def get_data_product(username, file_path):
- """Get a DataProduct instance for file in user's data storage space."""
- if user_file_exists(username, file_path):
- full_path = experiment_data_storage.path(
- os.path.join(_user_dir_name(username), file_path))
- return _create_data_product(username, full_path)
- else:
- raise ObjectDoesNotExist("User file does not exist")
-
-
def list_user_dir(username, file_path):
logger.debug("file_path={}".format(file_path))
user_data_storage = _user_data_storage(username)
@@ -198,43 +115,8 @@ def path(username, file_path):
return user_data_storage.path(file_path)
-def _get_replica_filepath(data_product):
- replica_filepaths = [rep.filePath
- for rep in data_product.replicaLocations
- if rep.replicaLocationCategory ==
- ReplicaLocationCategory.GATEWAY_DATA_STORE]
- replica_filepath = (replica_filepaths[0]
- if len(replica_filepaths) > 0 else None)
- if replica_filepath:
- return urlparse(replica_filepath).path
- return None
-
-
-def _create_data_product(username, full_path):
- data_product = DataProductModel()
- data_product.gatewayId = settings.GATEWAY_ID
- data_product.ownerName = username
- file_name = os.path.basename(full_path)
- data_product.productName = file_name
- data_product.dataProductType = DataProductType.FILE
- data_replica_location = DataReplicaLocationModel()
- data_replica_location.storageResourceId = \
- settings.GATEWAY_DATA_STORE_RESOURCE_ID
- data_replica_location.replicaName = \
- "{} gateway data store copy".format(file_name)
- data_replica_location.replicaLocationCategory = \
- ReplicaLocationCategory.GATEWAY_DATA_STORE
- data_replica_location.replicaPersistentType = \
- ReplicaPersistentType.TRANSIENT
- data_replica_location.filePath = \
- "file://{}:{}".format(settings.GATEWAY_DATA_STORE_HOSTNAME,
- full_path)
- data_product.replicaLocations = [data_replica_location]
- return data_product
-
-
def _user_dir_name(username):
- return experiment_data_storage.get_valid_name(username)
+ return Storage().get_valid_name(username)
def _user_data_storage(username):
diff --git a/django_airavata/apps/api/serializers.py b/django_airavata/apps/api/serializers.py
index 86e235e..05825b8 100644
--- a/django_airavata/apps/api/serializers.py
+++ b/django_airavata/apps/api/serializers.py
@@ -52,7 +52,7 @@ from airavata.model.status.ttypes import ExperimentStatus
from airavata.model.user.ttypes import UserProfile
from airavata.model.workspace.ttypes import Project
-from . import datastore, models, thrift_utils
+from . import data_products_helper, models, thrift_utils
log = logging.getLogger(__name__)
@@ -435,8 +435,8 @@ class DataProductSerializer(
def get_downloadURL(self, data_product):
"""Getter for downloadURL field."""
- if datastore.exists(data_product):
- request = self.context['request']
+ request = self.context['request']
+ if data_products_helper.exists(request, data_product):
return (request.build_absolute_uri(
reverse('django_airavata_api:download_file')) +
'?' +
diff --git a/django_airavata/apps/api/views.py b/django_airavata/apps/api/views.py
index 21c7b6c..bc2edfc 100644
--- a/django_airavata/apps/api/views.py
+++ b/django_airavata/apps/api/views.py
@@ -287,6 +287,7 @@ class ExperimentViewSet(APIBackedViewSet):
def _copy_cloned_experiment_input_uris(self, cloned_experiment):
# update the experimentInputs of type URI, copying files in data store
request = self.request
+ # TODO: create experiment data directory and copy inputs into it
target_project = request.airavata_client.getProject(
self.authz_token, cloned_experiment.projectId)
for experiment_input in cloned_experiment.experimentInputs:
@@ -859,17 +860,16 @@ class DataProductView(APIView):
@login_required
def upload_input_file(request):
try:
- username = request.user.username
project_id = request.POST['project-id']
project = request.airavata_client.getProject(
request.authz_token, project_id)
exp_name = request.POST['experiment-name']
input_file = request.FILES['file']
- data_product = datastore.save(username, project.name, exp_name,
- input_file)
- data_product_uri = request.airavata_client.registerDataProduct(
- request.authz_token, data_product)
- data_product.productUri = data_product_uri
+ # TODO: experiment_data_dir should be passed in
+ experiment_data_dir = data_products_helper.get_experiment_dir(
+ request, project_name=project.name, experiment_name=exp_name)
+ data_product = data_products_helper.save(request, experiment_data_dir,
+ input_file)
serializer = serializers.DataProductSerializer(
data_product, context={'request': request})
return JsonResponse({'uploaded': True,
@@ -894,7 +894,7 @@ def download_file(request):
.format(data_product_uri), exc_info=True)
raise Http404("data product does not exist") from e
try:
- data_file = datastore.open(data_product)
+ data_file = data_products_helper.open(request, data_product)
response = FileResponse(data_file,
content_type="application/octet-stream")
file_name = os.path.basename(data_file.name)
@@ -928,7 +928,7 @@ def delete_file(request):
if (data_product.gatewayId != settings.GATEWAY_ID or
data_product.ownerName != request.user.username):
raise PermissionDenied()
- datastore.delete(data_product)
+ data_products_helper.delete(request, data_product)
return HttpResponse(status=204)
except ObjectDoesNotExist as e:
raise Http404(str(e)) from e
@@ -1383,30 +1383,6 @@ class UserStoragePathView(APIView):
context={'request': request})
return Response(serializer.data)
- def post(self, request, path="/", format=None):
- # TODO: this needs to be fixed or rethought
- username = request.user.username
- user_storage_path = path
- if user_storage_path.startswith("/"):
- user_storage_path = "." + user_storage_path
- serializer = self.serializer_class(
- data=request.data, context={
- 'request': request})
- serializer.is_valid(raise_exception=True)
- if serializer.validated_data['type'] == 'file':
- upload_file = request.FILES['file']
- datastore.save_user_file(username, user_storage_path, upload_file)
- elif serializer.validated_data['type'] == 'dir':
- datastore.create_user_dir(
- username, user_storage_path, serializer.validated_data['name'])
-
- # TODO return representation of created item
- listing = datastore.list_user_dir(
- request.user.username, user_storage_path)
- serializer = self.serializer_class(
- listing, many=True, context={'request': request})
- return Response(serializer.data)
-
class WorkspacePreferencesView(APIView):
serializer_class = serializers.WorkspacePreferencesSerializer