You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/08/06 17:00:40 UTC

impala git commit: IMPALA-7387: Set correct MIME type for JSON webpages

Repository: impala
Updated Branches:
  refs/heads/master 312a8d60d -> 3334c167a


IMPALA-7387: Set correct MIME type for JSON webpages

Right now Impala sets "text/plain" as the MIME type for all non-HTML
pages. As per RFC-4627[1], JSON's prescribed MIME type is
"application/json".

Additionally, this commit also sets UTF-8 charset for text/[html/plain]
encodings. As per [2] application/json does not require it to be set.

Testing:
- Inspected HTTP headers manually in the browser.
- Added tests.

[1] http://www.ietf.org/rfc/rfc4627.txt
[2] https://www.iana.org/assignments/media-types/application/json

Change-Id: I7ad94343151730851a4ad01a1f4b9326a36a37ea
Reviewed-on: http://gerrit.cloudera.org:8080/11110
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Philip Zeyliger <ph...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/3334c167
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/3334c167
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/3334c167

Branch: refs/heads/master
Commit: 3334c167a69a6506d595898dff73fb16410266e6
Parents: 312a8d6
Author: Bharath Vissapragada <bh...@cloudera.com>
Authored: Tue Jul 31 23:29:34 2018 -0700
Committer: Bharath Vissapragada <bh...@cloudera.com>
Committed: Mon Aug 6 16:29:10 2018 +0000

----------------------------------------------------------------------
 be/src/util/webserver.cc          | 15 +++++++++++----
 be/src/util/webserver.h           | 15 ++++++++++++++-
 tests/webserver/test_web_pages.py | 13 +++++++++++++
 3 files changed, 38 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/3334c167/be/src/util/webserver.cc
----------------------------------------------------------------------
diff --git a/be/src/util/webserver.cc b/be/src/util/webserver.cc
index a77c6da..c8307b5 100644
--- a/be/src/util/webserver.cc
+++ b/be/src/util/webserver.cc
@@ -148,14 +148,13 @@ enum ResponseCode {
 // Builds a valid HTTP header given the response code and a content type.
 string BuildHeaderString(ResponseCode response, ContentType content_type) {
   static const string RESPONSE_TEMPLATE = "HTTP/1.1 $0 $1\r\n"
-      "Content-Type: text/$2\r\n"
+      "Content-Type: $2\r\n"
       "Content-Length: %d\r\n"
       "X-Frame-Options: $3\r\n"
       "\r\n";
 
   return Substitute(RESPONSE_TEMPLATE, response, response == OK ? "OK" : "Not found",
-      content_type == HTML ? "html" : "plain",
-      FLAGS_webserver_x_frame_options.c_str());
+      Webserver::GetMimeType(content_type), FLAGS_webserver_x_frame_options.c_str());
 }
 
 Webserver::Webserver()
@@ -445,7 +444,7 @@ void Webserver::RenderUrlWithTemplate(const ArgumentMap& arguments,
     PrettyWriter<StringBuffer> writer(strbuf);
     document.Accept(writer);
     (*output) << strbuf.GetString();
-    *content_type = PLAIN;
+    *content_type = JSON;
   } else {
     if (arguments.find("raw") != arguments.end()) {
       document.AddMember(ENABLE_RAW_JSON_KEY, "true", document.GetAllocator());
@@ -490,4 +489,12 @@ void Webserver::RegisterUrlCallback(const string& path, const RawUrlCallback& ca
   url_handlers_.insert(make_pair(path, UrlHandler(callback)));
 }
 
+const string Webserver::GetMimeType(const ContentType& content_type) {
+  switch (content_type) {
+    case HTML: return "text/html; charset=UTF-8";
+    case PLAIN: return "text/plain; charset=UTF-8";
+    case JSON: return "application/json";
+    default: DCHECK(false) << "Invalid content_type: " << content_type;
+  }
+}
 }

http://git-wip-us.apache.org/repos/asf/impala/blob/3334c167/be/src/util/webserver.h
----------------------------------------------------------------------
diff --git a/be/src/util/webserver.h b/be/src/util/webserver.h
index 1fc7bb2..c060d5e 100644
--- a/be/src/util/webserver.h
+++ b/be/src/util/webserver.h
@@ -33,8 +33,14 @@ namespace impala {
 
 /// Supported HTTP content types
 enum ContentType {
+  /// Corresponds to text/html content-type and used for rendering HTML webpages.
   HTML,
-  PLAIN
+  /// Following two are used for rendering text-only pages and correspond to text/plain
+  /// and application/json content-types respectively. JSON type additionally pretty
+  /// prints the underlying JSON content. They are primarily used for debugging and for
+  /// integration with third-party tools that can consume the text format easily.
+  PLAIN,
+  JSON
 };
 
 /// Wrapper class for the Squeasel web server library. Clients may register callback
@@ -93,6 +99,9 @@ class Webserver {
   /// Returns the URL to the webserver as a string.
   string Url();
 
+  /// Returns the appropriate MIME type for a given ContentType.
+  static const std::string GetMimeType(const ContentType& content_type);
+
  private:
   /// Contains all information relevant to rendering one Url. Each Url has one callback
   /// that produces the output to render. The callback either produces a Json document
@@ -147,6 +156,10 @@ class Webserver {
       struct sq_request_info* request_info);
 
   /// Renders URLs through the Mustache templating library.
+  /// - Default ContentType is HTML.
+  /// - Argument 'raw' renders the page with PLAIN ContentType.
+  /// - Argument 'json' renders the page with JSON ContentType. The underlying JSON is
+  ///   pretty-printed.
   void RenderUrlWithTemplate(const ArgumentMap& arguments, const UrlHandler& url_handler,
       std::stringstream* output, ContentType* content_type);
 

http://git-wip-us.apache.org/repos/asf/impala/blob/3334c167/tests/webserver/test_web_pages.py
----------------------------------------------------------------------
diff --git a/tests/webserver/test_web_pages.py b/tests/webserver/test_web_pages.py
index 000e386..1ac1576 100644
--- a/tests/webserver/test_web_pages.py
+++ b/tests/webserver/test_web_pages.py
@@ -35,6 +35,7 @@ class TestWebPage(ImpalaTestSuite):
   QUERY_FINSTANCES_URL = "http://localhost:{0}/query_finstances"
   RPCZ_URL = "http://localhost:{0}/rpcz"
   THREAD_GROUP_URL = "http://localhost:{0}/thread-group"
+  METRICS_URL = "http://localhost:{0}/metrics"
   # log4j changes do not apply to the statestore since it doesn't
   # have an embedded JVM. So we make two sets of ports to test the
   # log level endpoints, one without the statestore port and the
@@ -77,6 +78,7 @@ class TestWebPage(ImpalaTestSuite):
   def get_debug_page(self, page_url):
     """Returns the content of the debug page 'page_url' as json."""
     response = self.get_and_check_status(page_url + "?json", ports_to_test=[25000])
+    assert "application/json" in response.headers['Content-Type']
     return json.loads(response)
 
   def get_and_check_status_jvm(self, url, string_to_search = ""):
@@ -84,6 +86,17 @@ class TestWebPage(ImpalaTestSuite):
     return self.get_and_check_status(url, string_to_search,
                                      ports_to_test=self.TEST_PORTS_WITHOUT_SS)
 
+  def test_content_type(self):
+    """Checks that an appropriate content-type is set for various types of pages."""
+    # Mapping from each page to its MIME type.
+    page_to_mime =\
+        {"?json": "application/json", "?raw": "text/plain; charset=UTF-8",
+        "": "text/html; charset=UTF-8"}
+    for port in self.TEST_PORTS_WITH_SS:
+      for page, content_type in page_to_mime.items():
+        url = self.METRICS_URL.format(port) + page
+        assert content_type == requests.get(url).headers['Content-Type']
+
   def test_log_level(self):
     """Test that the /log_level page outputs are as expected and work well on basic and
     malformed inputs. This however does not test that the log level changes are actually