You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficserver.apache.org by zw...@apache.org on 2022/03/29 15:20:13 UTC
[trafficserver] branch 9.2.x updated: Support transforming range requests when origin returns full resource. (#8657)

This is an automated email from the ASF dual-hosted git repository.

zwoop pushed a commit to branch 9.2.x
in repository https://gitbox.apache.org/repos/asf/trafficserver.git


The following commit(s) were added to refs/heads/9.2.x by this push:
     new 93315e1  Support transforming range requests when origin returns full resource. (#8657)
93315e1 is described below

commit 93315e181ba40663888b283c8907afb05c6ddedc
Author: Chris McFarlen <ch...@mcfarlen.us>
AuthorDate: Mon Mar 21 14:32:00 2022 -0500

    Support transforming range requests when origin returns full resource. (#8657)
    
    * Support transforming range requests when origin returns full resource.
    
    Resolves #8161:
    
    This PR adds support for handling range requests after a response is received and the response source is the origin server.
    
     - Allow caching responses when a range is requested (requires proxy.config.http.cache.range.write is set, but slightly changes behavior)
     - Handle range setup even if the response source is the origin server.
     - Call do_range_setup_if_necessary on forward server response.
     - Enable API flag to cache original response when a partial response is tranformed (api_info.cache_untransformed = true)
    
    * Add autests
    
    * autopep
    
    * update documentation for updated setting
    
    * Do not setup range transform if the server response is encoded
    
    * use f string instead of .format to format string
    
    * all perform range transform and cache write when replacing cache due to revalidation.
    
    * add autest to cover case of replacing cache with a requested range
    
    * verify cache replace using via string
    
    Co-authored-by: Chris McFarlen <cm...@apple.com>
    (cherry picked from commit 099ca4ce0849336656932f758cfa774e52f35508)
---
 doc/admin-guide/files/records.config.en.rst        |   9 +-
 proxy/http/HttpSM.cc                               |  85 +++++++++++-----
 proxy/http/HttpTransact.cc                         |   7 ++
 .../gold_tests/cache/cache-range-response.test.py  |  40 ++++++++
 .../cache/replay/cache-range-response.replay.yaml  | 113 +++++++++++++++++++++
 5 files changed, 225 insertions(+), 29 deletions(-)

diff --git a/doc/admin-guide/files/records.config.en.rst b/doc/admin-guide/files/records.config.en.rst
index 1280a82..33dbfc0 100644
--- a/doc/admin-guide/files/records.config.en.rst
+++ b/doc/admin-guide/files/records.config.en.rst
@@ -2278,10 +2278,11 @@ Cache Control
    :overridable:
 
    When enabled (``1``), |TS| will attempt to write (lock) the URL
-   to cache. This is rarely useful (at the moment), since it'll only be able
-   to write to cache if the origin has ignored the ``Range:`` header. For a use
-   case where you know the origin will respond with a full (``200``) response,
-   you can turn this on to allow it to be cached.
+   to cache for a request specifying a range. This is useful when the origin server
+   might ignore a range request and respond with a full (``200``) response.
+   Additionally, this setting will attempt to transform a 200 response from the origin
+   server to a partial (``206``) response, honoring the requested range, while
+   caching the full response.
 
 .. ts:cv:: CONFIG proxy.config.http.cache.ignore_accept_mismatch INT 2
    :reloadable:
diff --git a/proxy/http/HttpSM.cc b/proxy/http/HttpSM.cc
index 46ee36f..29dcdbd 100644
--- a/proxy/http/HttpSM.cc
+++ b/proxy/http/HttpSM.cc
@@ -4629,15 +4629,19 @@ HttpSM::parse_range_and_compare(MIMEField *field, int64_t content_length)
     ranges[nr]._end   = end;
     ++nr;
 
-    if (!cache_sm.cache_read_vc->is_pread_capable() && cache_config_read_while_writer == 2) {
-      // write in progress, check if request range not in cache yet
-      HTTPInfo::FragOffset *frag_offset_tbl = t_state.cache_info.object_read->get_frag_table();
-      int frag_offset_cnt                   = t_state.cache_info.object_read->get_frag_offset_count();
-
-      if (!frag_offset_tbl || !frag_offset_cnt || (frag_offset_tbl[frag_offset_cnt - 1] < static_cast<uint64_t>(end))) {
-        Debug("http_range", "request range in cache, end %" PRId64 ", frg_offset_cnt %d" PRId64, end, frag_offset_cnt);
-        t_state.range_in_cache = false;
+    if (cache_sm.cache_read_vc) {
+      if (!cache_sm.cache_read_vc->is_pread_capable() && cache_config_read_while_writer == 2) {
+        // write in progress, check if request range not in cache yet
+        HTTPInfo::FragOffset *frag_offset_tbl = t_state.cache_info.object_read->get_frag_table();
+        int frag_offset_cnt                   = t_state.cache_info.object_read->get_frag_offset_count();
+
+        if (!frag_offset_tbl || !frag_offset_cnt || (frag_offset_tbl[frag_offset_cnt - 1] < static_cast<uint64_t>(end))) {
+          Debug("http_range", "request range in cache, end %" PRId64 ", frg_offset_cnt %d" PRId64, end, frag_offset_cnt);
+          t_state.range_in_cache = false;
+        }
       }
+    } else {
+      t_state.range_in_cache = false;
     }
   }
 
@@ -4691,10 +4695,16 @@ HttpSM::calculate_output_cl(int64_t num_chars_for_ct, int64_t num_chars_for_cl)
 void
 HttpSM::do_range_parse(MIMEField *range_field)
 {
-  int num_chars_for_ct = 0;
-  t_state.cache_info.object_read->response_get()->value_get(MIME_FIELD_CONTENT_TYPE, MIME_LEN_CONTENT_TYPE, &num_chars_for_ct);
+  int num_chars_for_ct   = 0;
+  int64_t content_length = 0;
 
-  int64_t content_length   = t_state.cache_info.object_read->object_size_get();
+  if (t_state.cache_info.object_read != nullptr) {
+    t_state.cache_info.object_read->response_get()->value_get(MIME_FIELD_CONTENT_TYPE, MIME_LEN_CONTENT_TYPE, &num_chars_for_ct);
+    content_length = t_state.cache_info.object_read->object_size_get();
+  } else {
+    content_length = t_state.hdr_info.server_response.get_content_length();
+    t_state.hdr_info.server_response.value_get(MIME_FIELD_CONTENT_TYPE, MIME_LEN_CONTENT_TYPE, &num_chars_for_ct);
+  }
   int64_t num_chars_for_cl = num_chars_for_int(content_length);
 
   parse_range_and_compare(range_field, content_length);
@@ -4709,8 +4719,6 @@ HttpSM::do_range_setup_if_necessary()
 {
   MIMEField *field;
 
-  ink_assert(t_state.cache_info.object_read != nullptr);
-
   field = t_state.hdr_info.client_request.field_find(MIME_FIELD_RANGE, MIME_LEN_RANGE);
   ink_assert(field != nullptr);
 
@@ -4722,7 +4730,7 @@ HttpSM::do_range_setup_if_necessary()
     if (t_state.range_setup == HttpTransact::RANGE_REQUESTED) {
       bool do_transform = false;
 
-      if (!t_state.range_in_cache) {
+      if (!t_state.range_in_cache && t_state.cache_info.object_read) {
         Debug("http_range", "range can't be satisfied from cache, force origin request");
         t_state.cache_lookup_result = HttpTransact::CACHE_LOOKUP_MISS;
         return;
@@ -4740,7 +4748,12 @@ HttpSM::do_range_setup_if_necessary()
           t_state.range_setup      = HttpTransact::RANGE_NOT_SATISFIABLE;
         }
       } else {
-        if (cache_sm.cache_read_vc->is_pread_capable()) {
+        // if revalidating and cache is stale we want to transform
+        if (t_state.cache_info.action == HttpTransact::CACHE_DO_REPLACE &&
+            t_state.hdr_info.server_response.status_get() == HTTP_STATUS_OK) {
+          Debug("http_range", "Serving transform after stale cache re-serve");
+          do_transform = true;
+        } else if (cache_sm.cache_read_vc && cache_sm.cache_read_vc->is_pread_capable()) {
           // If only one range entry and pread is capable, no need transform range
           t_state.range_setup = HttpTransact::RANGE_NOT_TRANSFORM_REQUESTED;
         } else {
@@ -4752,15 +4765,34 @@ HttpSM::do_range_setup_if_necessary()
       if (do_transform) {
         if (api_hooks.get(TS_HTTP_RESPONSE_TRANSFORM_HOOK) == nullptr) {
           int field_content_type_len = -1;
-          const char *content_type   = t_state.cache_info.object_read->response_get()->value_get(
-            MIME_FIELD_CONTENT_TYPE, MIME_LEN_CONTENT_TYPE, &field_content_type_len);
+          const char *content_type   = nullptr;
+          int64_t content_length     = 0;
+
+          if (t_state.cache_info.object_read && t_state.cache_info.action != HttpTransact::CACHE_DO_REPLACE) {
+            content_type = t_state.cache_info.object_read->response_get()->value_get(MIME_FIELD_CONTENT_TYPE, MIME_LEN_CONTENT_TYPE,
+                                                                                     &field_content_type_len);
+            content_length = t_state.cache_info.object_read->object_size_get();
+          } else {
+            // We don't want to transform a range request if the server response has a content encoding.
+            if (t_state.hdr_info.server_response.presence(MIME_PRESENCE_CONTENT_ENCODING)) {
+              Debug("http_trans", "Cannot setup range transform for server response with content encoding");
+              t_state.range_setup = HttpTransact::RANGE_NONE;
+              return;
+            }
+
+            // Since we are transforming the range from the server, we want to cache the original response
+            t_state.api_info.cache_untransformed = true;
+            content_type =
+              t_state.hdr_info.server_response.value_get(MIME_FIELD_CONTENT_TYPE, MIME_LEN_CONTENT_TYPE, &field_content_type_len);
+            content_length = t_state.hdr_info.server_response.get_content_length();
+          }
 
           Debug("http_trans", "Unable to accelerate range request, fallback to transform");
 
           // create a Range: transform processor for requests of type Range: bytes=1-2,4-5,10-100 (eg. multiple ranges)
-          INKVConnInternal *range_trans = transformProcessor.range_transform(
-            mutex.get(), t_state.ranges, t_state.num_range_fields, &t_state.hdr_info.transform_response, content_type,
-            field_content_type_len, t_state.cache_info.object_read->object_size_get());
+          INKVConnInternal *range_trans = transformProcessor.range_transform(mutex.get(), t_state.ranges, t_state.num_range_fields,
+                                                                             &t_state.hdr_info.transform_response, content_type,
+                                                                             field_content_type_len, content_length);
           api_hooks.append(TS_HTTP_RESPONSE_TRANSFORM_HOOK, range_trans);
         } else {
           // ToDo: Do we do something here? The theory is that multiple transforms do not behave well with
@@ -6112,7 +6144,8 @@ HttpSM::perform_transform_cache_write_action()
           HttpDebugNames::get_cache_action_name(t_state.cache_info.action));
 
   if (t_state.range_setup) {
-    return;
+    SMDebug("http", "[%" PRId64 "] perform_transform_cache_write_action %s (with range setup)", sm_id,
+            HttpDebugNames::get_cache_action_name(t_state.cache_info.action));
   }
 
   switch (t_state.cache_info.transform_action) {
@@ -6123,10 +6156,12 @@ HttpSM::perform_transform_cache_write_action()
   }
 
   case HttpTransact::CACHE_DO_WRITE: {
-    transform_cache_sm.close_read();
-    t_state.cache_info.transform_write_status = HttpTransact::CACHE_WRITE_IN_PROGRESS;
-    setup_cache_write_transfer(&transform_cache_sm, transform_info.entry->vc, &t_state.cache_info.transform_store,
-                               client_response_hdr_bytes, "cache write t");
+    if (t_state.api_info.cache_untransformed == false) {
+      transform_cache_sm.close_read();
+      t_state.cache_info.transform_write_status = HttpTransact::CACHE_WRITE_IN_PROGRESS;
+      setup_cache_write_transfer(&transform_cache_sm, transform_info.entry->vc, &t_state.cache_info.transform_store,
+                                 client_response_hdr_bytes, "cache write t");
+    }
     break;
   }
 
diff --git a/proxy/http/HttpTransact.cc b/proxy/http/HttpTransact.cc
index f4f51d8..19e1e6c 100644
--- a/proxy/http/HttpTransact.cc
+++ b/proxy/http/HttpTransact.cc
@@ -4570,6 +4570,10 @@ HttpTransact::handle_cache_operation_on_forward_server_response(State *s)
       } else {
         ink_assert(s->cache_info.object_read != nullptr);
         s->cache_info.action = CACHE_DO_REPLACE;
+
+        if (s->hdr_info.client_request.presence(MIME_PRESENCE_RANGE)) {
+          s->state_machine->do_range_setup_if_necessary();
+        }
       }
 
     } else if (s->cache_info.action == CACHE_DO_WRITE) {
@@ -4579,6 +4583,9 @@ HttpTransact::handle_cache_operation_on_forward_server_response(State *s)
         s->cache_info.action = CACHE_DO_NO_ACTION;
       } else {
         s->cache_info.action = CACHE_DO_WRITE;
+        if (s->hdr_info.client_request.presence(MIME_PRESENCE_RANGE)) {
+          s->state_machine->do_range_setup_if_necessary();
+        }
       }
 
     } else if (s->cache_info.action == CACHE_DO_DELETE) {
diff --git a/tests/gold_tests/cache/cache-range-response.test.py b/tests/gold_tests/cache/cache-range-response.test.py
new file mode 100644
index 0000000..4175bb5
--- /dev/null
+++ b/tests/gold_tests/cache/cache-range-response.test.py
@@ -0,0 +1,40 @@
+'''
+Verify that caching a range request when origin returns full response works.
+'''
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+Test.Summary = '''
+Verify correct caching behavior for range requests.
+'''
+
+ts = Test.MakeATSProcess("ts")
+replay_file = "replay/cache-range-response.replay.yaml"
+server = Test.MakeVerifierServerProcess("server0", replay_file)
+ts.Disk.records_config.update({
+    'proxy.config.diags.debug.enabled': 1,
+    'proxy.config.diags.debug.tags': 'http.*|cache.*',
+    'proxy.config.http.cache.range.write': 1,
+    'proxy.config.http.cache.when_to_revalidate': 4,
+    'proxy.config.http.insert_response_via_str': 3,
+})
+ts.Disk.remap_config.AddLine(
+    f'map / http://127.0.0.1:{server.Variables.http_port}'
+)
+tr = Test.AddTestRun("Verify range request is transformed from a 200 response")
+tr.Processes.Default.StartBefore(server)
+tr.Processes.Default.StartBefore(ts)
+tr.AddVerifierClientProcess("client0", replay_file, http_ports=[ts.Variables.port])
diff --git a/tests/gold_tests/cache/replay/cache-range-response.replay.yaml b/tests/gold_tests/cache/replay/cache-range-response.replay.yaml
new file mode 100644
index 0000000..768f0dd
--- /dev/null
+++ b/tests/gold_tests/cache/replay/cache-range-response.replay.yaml
@@ -0,0 +1,113 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+---
+meta:
+  version: "1.0"
+
+sessions:
+  - transactions:
+      # Populate the cache with a response to a GET request.
+      - client-request:
+          method: "GET"
+          version: "1.1"
+          url: /some/path
+          headers:
+            fields:
+              - [ Host, example.com ]
+              - [ uuid, 1 ]
+              - [ Range, bytes=0-10 ]
+        server-response:
+          status: 200
+          reason: OK
+          headers:
+            fields:
+              - [ Content-Length, 16 ]
+              - [ Cache-Control, max-age=300 ]
+              - [ X-Response, first_get_response ]
+        proxy-response:
+          status: 206
+          headers:
+            fields:
+              - [ X-Response, { value: first_get_response, as: equal} ]
+              - [ Content-Range, { value: "bytes 0-10/16", as: equal}]
+      # Subsequent range request served from cache
+      - client-request:
+          method: "GET"
+          version: "1.1"
+          url: /some/path
+          headers:
+            fields:
+              - [ Host, example.com ]
+              - [ uuid, 2 ]
+              - [ Range, bytes=0-5 ]
+        server-response:
+          status: 500
+          reason: OK
+          headers:
+            fields:
+              - [ X-Response, internal_server_error ]
+        proxy-response:
+          status: 206
+          headers:
+            fields:
+              - [ X-Response, { value: first_get_response, as: equal} ]
+              - [ Content-Range, { value: "bytes 0-5/16", as: equal}]
+      # Should get full response from cache without a range header
+      - client-request:
+          method: "GET"
+          version: "1.1"
+          url: /some/path
+          headers:
+            fields:
+              - [ Host, example.com ]
+              - [ uuid, 3 ]
+        server-response:
+          status: 500
+          reason: OK
+          headers:
+            fields:
+              - [ X-Response, internal_server_error ]
+        proxy-response:
+          status: 200
+          headers:
+            fields:
+              - [ X-Response, { value: first_get_response, as: equal} ]
+              - [ Content-Length, { value: "16", as: equal}]
+      # Revalidate and replace cache still returns 206
+      - client-request:
+          method: "GET"
+          version: "1.1"
+          url: /some/path
+          headers:
+            fields:
+              - [ Host, example.com ]
+              - [ uuid, 4 ]
+              - [ Range, bytes=0-10 ]
+              - [ If-Modified-Since, "Wed, 16 Mar 2022 22:52:09 GMT"]
+        server-response:
+          status: 200
+          reason: OK
+          headers:
+            fields:
+              - [ Content-Length, 100 ]
+              - [ Cache-Control, max-age=300 ]
+        proxy-response:
+          status: 206
+          headers:
+            fields:
+              - [ Content-Range, { value: "bytes 0-10/100", as: equal}]
+              - [ Via, { value: "uIcSsSfUpSeN:t cCSp sS", as: contains }]