You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@chemistry.apache.org by "Jeff Potts (JIRA)" <ji...@apache.org> on 2017/08/31 14:52:00 UTC

[jira] [Closed] (CMIS-996) BrowserBinding getObjecyByPath: unicode is not supported by urllib.quote

     [ https://issues.apache.org/jira/browse/CMIS-996?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Jeff Potts closed CMIS-996.
---------------------------

> BrowserBinding getObjecyByPath: unicode is not supported by urllib.quote
> ------------------------------------------------------------------------
>
>                 Key: CMIS-996
>                 URL: https://issues.apache.org/jira/browse/CMIS-996
>             Project: Chemistry
>          Issue Type: Bug
>          Components: python-cmislib
>            Reporter: Laurent Mignon
>            Assignee: Jeff Potts
>             Fix For: cmislib 0.6.0
>
>
> The function getObjectByPath use urllib.quote to quote the path into the url. urllib.quote doesn't support unicode values. 
> see https://github.com/apache/chemistry-cmislib/pull/4
> patch code from see https://github.com/apache/chemistry-cmislib/pull/4.patch
> {code}
> From f0276c81ef55b6e16a2cbe096870049f0d578747 Mon Sep 17 00:00:00 2001
> From: Laurent Mignon <la...@acsone.eu>
> Date: Fri, 9 Sep 2016 19:52:30 +0200
> Subject: [PATCH] Fix UnicodeEncodeError when calling urllib.quote whith value
>  containing accented chars In the same time ensure that all the sources files
>  are in the same encoding as python itself (utf-8) by adding an encoding
>  declaration at first line of any files.
> ---
>  src/cmislib/atompub/binding.py |  1 +
>  src/cmislib/browser/binding.py | 35 +++++++++++++++++++--------
>  src/cmislib/cmis_services.py   |  1 +
>  src/cmislib/domain.py          |  1 +
>  src/cmislib/exceptions.py      |  1 +
>  src/cmislib/messages.py        |  1 +
>  src/cmislib/model.py           |  1 +
>  src/cmislib/net.py             |  1 +
>  src/cmislib/util.py            |  1 +
>  src/tests/cmislibtest.py       | 55 +++++++++++++++++++++---------------------
>  10 files changed, 61 insertions(+), 37 deletions(-)
> diff --git a/src/cmislib/atompub/binding.py b/src/cmislib/atompub/binding.py
> index f1a629f..55a066a 100644
> --- a/src/cmislib/atompub/binding.py
> +++ b/src/cmislib/atompub/binding.py
> @@ -1,3 +1,4 @@
> +# -*- coding: utf-8 -*-
>  #
>  #      Licensed to the Apache Software Foundation (ASF) under one
>  #      or more contributor license agreements.  See the NOTICE file
> diff --git a/src/cmislib/browser/binding.py b/src/cmislib/browser/binding.py
> index 7d7d758..45b6aa6 100644
> --- a/src/cmislib/browser/binding.py
> +++ b/src/cmislib/browser/binding.py
> @@ -1,3 +1,4 @@
> +# -*- coding: utf-8 -*-
>  #
>  #      Licensed to the Apache Software Foundation (ASF) under one
>  #      or more contributor license agreements.  See the NOTICE file
> @@ -715,7 +716,7 @@ def getObjectByPath(self, path, **kwargs):
>           - includeAllowableActions
>          """
>  
> -        byPathUrl = self.getRootFolderUrl() + quote(path) + "?cmisselector=object"
> +        byPathUrl = self.getRootFolderUrl() + safe_quote(path) + "?cmisselector=object"
>          result = self._cmisClient.binding.get(byPathUrl.encode('utf-8'),
>                                                self._cmisClient.username,
>                                                self._cmisClient.password,
> @@ -926,7 +927,7 @@ def getTypeChildren(self,
>          typesUrl = self.getRepositoryUrl() + "?cmisselector=typeChildren"
>  
>          if typeId is not None:
> -            typesUrl += "&typeId=%s" % (quote(typeId))
> +            typesUrl += "&typeId=%s" % (safe_quote(typeId))
>  
>          result = self._cmisClient.binding.get(typesUrl,
>                                                self._cmisClient.username,
> @@ -986,7 +987,7 @@ def getTypeDescendants(self, typeId=None, depth=None, **kwargs):
>          typesUrl = self.getRepositoryUrl() + "?cmisselector=typeDescendants"
>  
>          if typeId is not None:
> -            typesUrl += "&typeId=%s" % (quote(typeId))
> +            typesUrl += "&typeId=%s" % (safe_quote(typeId))
>          if depth is not None:
>              typesUrl += "&depth=%s" % (depth)
>          print typesUrl
> @@ -1180,7 +1181,7 @@ def query(self, statement, **kwargs):
>          """
>  
>          # build the CMIS query XML that we're going to POST
> -        queryUrl = self.getRepositoryUrl() + "?cmisaction=query&q=" + quote(statement)
> +        queryUrl = self.getRepositoryUrl() + "?cmisaction=query&q=" + safe_quote(statement)
>  
>          # do the POST
>          result = self._cmisClient.binding.post(queryUrl.encode('utf-8'),
> @@ -3161,6 +3162,15 @@ def encode_multipart_formdata(fields, contentFile, contentType):
>      return content_type, body
>  
>  
> +def to_utf8(value):
> +
> +    """ Safe encodng of value to utf-8 taking care of unicode values
> +    """
> +    if isinstance(value, unicode):
> +        value = value.encode('utf8')
> +    return value
> +
> +
>  def safe_urlencode(in_dict):
>  
>      """
> @@ -3171,17 +3181,22 @@ def safe_urlencode(in_dict):
>      def encoded_dict(in_dict):
>          out_dict = {}
>          for k, v in in_dict.iteritems():
> -            if isinstance(v, unicode):
> -                v = v.encode('utf8')
> -            elif isinstance(v, str):
> -                # Must be encoded in UTF-8
> -                v.decode('utf8')
> -            out_dict[k] = v
> +            out_dict[k] = to_utf8(v)
>          return out_dict
>  
>      return urlencode(encoded_dict(in_dict))
>  
>  
> +def safe_quote(value):
> +
> +    """
> +    Safe encoding of value taking care of unicode value
> +    urllib.quote doesn't like unicode values
> +    """
> +
> +    return quote(to_utf8(value))
> +
> +
>  class ResultsSerializer(object):
>  
>      """
> diff --git a/src/cmislib/cmis_services.py b/src/cmislib/cmis_services.py
> index 8c59cdb..4659d02 100644
> --- a/src/cmislib/cmis_services.py
> +++ b/src/cmislib/cmis_services.py
> @@ -1,3 +1,4 @@
> +# -*- coding: utf-8 -*-
>  #
>  #      Licensed to the Apache Software Foundation (ASF) under one
>  #      or more contributor license agreements.  See the NOTICE file
> diff --git a/src/cmislib/domain.py b/src/cmislib/domain.py
> index a2f7a25..a362339 100644
> --- a/src/cmislib/domain.py
> +++ b/src/cmislib/domain.py
> @@ -1,3 +1,4 @@
> +# -*- coding: utf-8 -*-
>  #
>  #      Licensed to the Apache Software Foundation (ASF) under one
>  #      or more contributor license agreements.  See the NOTICE file
> diff --git a/src/cmislib/exceptions.py b/src/cmislib/exceptions.py
> index 207280d..58d601d 100644
> --- a/src/cmislib/exceptions.py
> +++ b/src/cmislib/exceptions.py
> @@ -1,3 +1,4 @@
> +# -*- coding: utf-8 -*-
>  #
>  #      Licensed to the Apache Software Foundation (ASF) under one
>  #      or more contributor license agreements.  See the NOTICE file
> diff --git a/src/cmislib/messages.py b/src/cmislib/messages.py
> index 2df8e45..0d49756 100644
> --- a/src/cmislib/messages.py
> +++ b/src/cmislib/messages.py
> @@ -1,3 +1,4 @@
> +# -*- coding: utf-8 -*-
>  #
>  #      Licensed to the Apache Software Foundation (ASF) under one
>  #      or more contributor license agreements.  See the NOTICE file
> diff --git a/src/cmislib/model.py b/src/cmislib/model.py
> index 0a04c09..03ede82 100644
> --- a/src/cmislib/model.py
> +++ b/src/cmislib/model.py
> @@ -1,3 +1,4 @@
> +# -*- coding: utf-8 -*-
>  #
>  #      Licensed to the Apache Software Foundation (ASF) under one
>  #      or more contributor license agreements.  See the NOTICE file
> diff --git a/src/cmislib/net.py b/src/cmislib/net.py
> index 3984918..e5124a9 100644
> --- a/src/cmislib/net.py
> +++ b/src/cmislib/net.py
> @@ -1,3 +1,4 @@
> +# -*- coding: utf-8 -*-
>  #
>  #      Licensed to the Apache Software Foundation (ASF) under one
>  #      or more contributor license agreements.  See the NOTICE file
> diff --git a/src/cmislib/util.py b/src/cmislib/util.py
> index 7b6855f..0a2ff6a 100644
> --- a/src/cmislib/util.py
> +++ b/src/cmislib/util.py
> @@ -1,3 +1,4 @@
> +# -*- coding: utf-8 -*-
>  #
>  #      Licensed to the Apache Software Foundation (ASF) under one
>  #      or more contributor license agreements.  See the NOTICE file
> diff --git a/src/tests/cmislibtest.py b/src/tests/cmislibtest.py
> index a81be56..70f9415 100644
> --- a/src/tests/cmislibtest.py
> +++ b/src/tests/cmislibtest.py
> @@ -346,33 +346,34 @@ def testGetFolder(self):
>      def testGetObjectByPath(self):
>          """Create test objects (one folder, one document) then try to get
>          them by path"""
> -        # names of folders and test docs
> -        parentFolderName = 'testGetObjectByPath folder'
> -        subFolderName = 'subfolder'
> -        docName = 'testdoc'
> -
> -        # create the folder structure
> -        parentFolder = self._testFolder.createFolder(parentFolderName)
> -        subFolder = parentFolder.createFolder(subFolderName)
> -        # use the subfolder path to get the folder by path
> -        subFolderPath = subFolder.getProperties().get("cmis:path")
> -        searchFolder = self._repo.getObjectByPath(subFolderPath)
> -        self.assertEquals(subFolder.getObjectId(), searchFolder.getObjectId())
> -
> -        # create a test doc
> -        doc = subFolder.createDocument(docName)
> -        # ask the doc for its paths
> -        searchDocPaths = doc.getPaths()
> -        # for each path in the list, try to get the object by path
> -        # this is better than building a path with the doc's name b/c the name
> -        # isn't guaranteed to be used as the path segment (see CMIS-232)
> -        for path in searchDocPaths:
> -            searchDoc = self._repo.getObjectByPath(path)
> -            self.assertEquals(doc.getObjectId(), searchDoc.getObjectId())
> -
> -        # get the subfolder by path, then ask for its children
> -        subFolder = self._repo.getObjectByPath(subFolderPath)
> -        self.assertEquals(len(subFolder.getChildren().getResults()), 1)
> +        # names of folders and test docs (without and with unicode char)
> +        for suffix in ['', u'_éà€$']:
> +            parentFolderName = 'testGetObjectByPath folder' + suffix
> +            subFolderName = 'subfolder' + suffix
> +            docName = 'testdoc' + suffix
> +
> +            # create the folder structure
> +            parentFolder = self._testFolder.createFolder(parentFolderName)
> +            subFolder = parentFolder.createFolder(subFolderName)
> +            # use the subfolder path to get the folder by path
> +            subFolderPath = subFolder.getProperties().get("cmis:path")
> +            searchFolder = self._repo.getObjectByPath(subFolderPath)
> +            self.assertEquals(subFolder.getObjectId(), searchFolder.getObjectId())
> +
> +            # create a test doc
> +            doc = subFolder.createDocument(docName)
> +            # ask the doc for its paths
> +            searchDocPaths = doc.getPaths()
> +            # for each path in the list, try to get the object by path
> +            # this is better than building a path with the doc's name b/c the name
> +            # isn't guaranteed to be used as the path segment (see CMIS-232)
> +            for path in searchDocPaths:
> +                searchDoc = self._repo.getObjectByPath(path)
> +                self.assertEquals(doc.getObjectId(), searchDoc.getObjectId())
> +
> +            # get the subfolder by path, then ask for its children
> +            subFolder = self._repo.getObjectByPath(subFolderPath)
> +            self.assertEquals(len(subFolder.getChildren().getResults()), 1)
>  
>      # getting unfiled documents may work for the atom pub binding for some servers
>      # but it isn't part of the spec so removing this test for now
> {code}



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)