You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2022/07/12 07:19:35 UTC

[airflow] branch main updated: Patch getfqdn with more resilient version (#24981)

This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new a3f2df0f45 Patch getfqdn with more resilient version (#24981)
a3f2df0f45 is described below

commit a3f2df0f45973ddb97990361fdc5caa256c175ca
Author: Jarek Potiuk <ja...@polidea.com>
AuthorDate: Tue Jul 12 09:19:23 2022 +0200

    Patch getfqdn with more resilient version (#24981)
    
    We keep on having repeated issue reports about non-matching
    hostname of workers. This seems to be trceable to getfqdn method
    of socket in Kubernetes that in some circumstances (race condition
    with netwrking setup when starting) can return different hostname
    at different times.
    
    There seems to be a related issue in Python that has not been
    resolved in more than 13 years (!)
    
    https://github.com/python/cpython/issues/49254
    
    The error seems to be related to the way how canonicalname is
    derived by getfqdn (it uses gethostbyaddr which sometimes
    provides different name than canonical name (it returns the
    first DNS name resolved that contains ".").
    
    We are fixing it in two ways:
    
    * instead of using gethostbyaddr we are using getadddrinfo with
      AI_CANONNAME flag which (according to the docs):
    
      https://man7.org/linux/man-pages/man3/getaddrinfo.3.html
    
        If hints.ai_flags includes the AI_CANONNAME flag, then the
        ai_canonname field of the first of the addrinfo structures in the
        returned list is set to point to the official name of the host.
    
    * we are caching the name returned by first time retrieval per
      interpreter. This way at least inside the same interpreter, the
      name of the host should not change.
---
 airflow/api/auth/backend/kerberos_auth.py    |  2 +-
 airflow/config_templates/config.yml          |  6 +++---
 airflow/config_templates/default_airflow.cfg |  6 +++---
 airflow/utils/net.py                         | 28 +++++++++++++++++++++++++---
 newsfragments/24981.improvement.rst          |  1 +
 tests/api/auth/backend/test_kerberos_auth.py |  4 ++--
 tests/core/test_configuration.py             | 12 +++++++-----
 tests/utils/test_net.py                      |  2 +-
 8 files changed, 43 insertions(+), 18 deletions(-)

diff --git a/airflow/api/auth/backend/kerberos_auth.py b/airflow/api/auth/backend/kerberos_auth.py
index fb76e8a1aa..fff5cfb033 100644
--- a/airflow/api/auth/backend/kerberos_auth.py
+++ b/airflow/api/auth/backend/kerberos_auth.py
@@ -43,7 +43,6 @@
 import logging
 import os
 from functools import wraps
-from socket import getfqdn
 from typing import Any, Callable, Optional, Tuple, TypeVar, Union, cast
 
 import kerberos
@@ -51,6 +50,7 @@ from flask import Response, _request_ctx_stack as stack, g, make_response, reque
 from requests_kerberos import HTTPKerberosAuth
 
 from airflow.configuration import conf
+from airflow.utils.net import getfqdn
 
 log = logging.getLogger(__name__)
 
diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml
index 133ba65755..ee9f10aacf 100644
--- a/airflow/config_templates/config.yml
+++ b/airflow/config_templates/config.yml
@@ -33,15 +33,15 @@
         Hostname by providing a path to a callable, which will resolve the hostname.
         The format is "package.function".
 
-        For example, default value "socket.getfqdn" means that result from getfqdn() of "socket"
-        package will be used as hostname.
+        For example, default value "airflow.utils.net.getfqdn" means that result from patched
+        version of socket.getfqdn() - see https://github.com/python/cpython/issues/49254.
 
         No argument should be required in the function specified.
         If using IP address as hostname is preferred, use value ``airflow.utils.net.get_host_ip_address``
       version_added: ~
       type: string
       example: ~
-      default: "socket.getfqdn"
+      default: "airflow.utils.net.getfqdn"
     - name: default_timezone
       description: |
         Default timezone in case supplied date times are naive
diff --git a/airflow/config_templates/default_airflow.cfg b/airflow/config_templates/default_airflow.cfg
index 12091b62e9..f432c13a84 100644
--- a/airflow/config_templates/default_airflow.cfg
+++ b/airflow/config_templates/default_airflow.cfg
@@ -36,12 +36,12 @@ dags_folder = {AIRFLOW_HOME}/dags
 # Hostname by providing a path to a callable, which will resolve the hostname.
 # The format is "package.function".
 #
-# For example, default value "socket.getfqdn" means that result from getfqdn() of "socket"
-# package will be used as hostname.
+# For example, default value "airflow.utils.net.getfqdn" means that result from patched
+# version of socket.getfqdn() - see https://github.com/python/cpython/issues/49254.
 #
 # No argument should be required in the function specified.
 # If using IP address as hostname is preferred, use value ``airflow.utils.net.get_host_ip_address``
-hostname_callable = socket.getfqdn
+hostname_callable = airflow.utils.net.getfqdn
 
 # Default timezone in case supplied date times are naive
 # can be utc (default), system, or any IANA timezone string (e.g. Europe/Amsterdam)
diff --git a/airflow/utils/net.py b/airflow/utils/net.py
index f697bfc4ad..f28d30a882 100644
--- a/airflow/utils/net.py
+++ b/airflow/utils/net.py
@@ -17,18 +17,40 @@
 # under the License.
 #
 import socket
+from functools import lru_cache
 
 from airflow.configuration import conf
 
 
+# patched version of socket.getfqdn() - see https://github.com/python/cpython/issues/49254
+@lru_cache(maxsize=None)
+def getfqdn(name=''):
+    """Get fully qualified domain name from name.
+    An empty argument is interpreted as meaning the local host.
+    """
+    name = name.strip()
+    if not name or name == '0.0.0.0':
+        name = socket.gethostname()
+    try:
+        addrs = socket.getaddrinfo(name, None, 0, socket.SOCK_DGRAM, 0, socket.AI_CANONNAME)
+    except OSError:
+        pass
+    else:
+        for addr in addrs:
+            if addr[3]:
+                name = addr[3]
+                break
+    return name
+
+
 def get_host_ip_address():
     """Fetch host ip address."""
-    return socket.gethostbyname(socket.getfqdn())
+    return socket.gethostbyname(getfqdn())
 
 
 def get_hostname():
     """
     Fetch the hostname using the callable from the config or using
-    `socket.getfqdn` as a fallback.
+    `airflow.utils.net.getfqdn` as a fallback.
     """
-    return conf.getimport('core', 'hostname_callable', fallback='socket.getfqdn')()
+    return conf.getimport('core', 'hostname_callable', fallback='airflow.utils.net.getfqdn')()
diff --git a/newsfragments/24981.improvement.rst b/newsfragments/24981.improvement.rst
new file mode 100644
index 0000000000..e6936d8562
--- /dev/null
+++ b/newsfragments/24981.improvement.rst
@@ -0,0 +1 @@
+Default value for [core] hostname_callable is ``airflow.utils.net.getfqdn`` which should provide more stable canonical host name. You can still use ``socket.getfqdn``or any other ``hostname_callable`` you had configured..
diff --git a/tests/api/auth/backend/test_kerberos_auth.py b/tests/api/auth/backend/test_kerberos_auth.py
index 0f60527dce..6fd16c55cd 100644
--- a/tests/api/auth/backend/test_kerberos_auth.py
+++ b/tests/api/auth/backend/test_kerberos_auth.py
@@ -18,7 +18,6 @@
 
 import json
 import os
-import socket
 from datetime import datetime
 from unittest import mock
 
@@ -26,6 +25,7 @@ import pytest
 
 from airflow.api.auth.backend.kerberos_auth import CLIENT_AUTH
 from airflow.models import DagBag
+from airflow.utils.net import getfqdn
 from airflow.www import app
 from tests.test_utils.config import conf_vars
 from tests.test_utils.db import clear_db_dags
@@ -70,7 +70,7 @@ class TestApiKerberos:
             )
             assert 401 == response.status_code
 
-            response.url = f'http://{socket.getfqdn()}'
+            response.url = f'http://{getfqdn()}'
 
             class Request:
                 headers = {}
diff --git a/tests/core/test_configuration.py b/tests/core/test_configuration.py
index 991695a969..3532aac7e5 100644
--- a/tests/core/test_configuration.py
+++ b/tests/core/test_configuration.py
@@ -875,11 +875,11 @@ sql_alchemy_conn=sqlite://test
         test_conf.deprecated_values = {
             'core': {'hostname_callable': (re.compile(r':'), r'.', '2.1')},
         }
-        test_conf.read_dict({'core': {'hostname_callable': 'socket:getfqdn'}})
+        test_conf.read_dict({'core': {'hostname_callable': 'airflow.utils.net:getfqdn'}})
 
         with pytest.warns(FutureWarning):
             test_conf.validate()
-            assert test_conf.get('core', 'hostname_callable') == 'socket.getfqdn'
+            assert test_conf.get('core', 'hostname_callable') == 'airflow.utils.net.getfqdn'
 
     @pytest.mark.parametrize(
         "old, new",
@@ -930,7 +930,7 @@ sql_alchemy_conn=sqlite://test
         "conf_dict",
         [
             {},  # Even if the section is absent from config file, environ still needs replacing.
-            {'core': {'hostname_callable': 'socket:getfqdn'}},
+            {'core': {'hostname_callable': 'airflow.utils.net.getfqdn'}},
         ],
     )
     def test_deprecated_values_from_environ(self, conf_dict):
@@ -953,9 +953,11 @@ sql_alchemy_conn=sqlite://test
             return test_conf
 
         with pytest.warns(FutureWarning):
-            with unittest.mock.patch.dict('os.environ', AIRFLOW__CORE__HOSTNAME_CALLABLE='socket:getfqdn'):
+            with unittest.mock.patch.dict(
+                'os.environ', AIRFLOW__CORE__HOSTNAME_CALLABLE='airflow.utils.net:getfqdn'
+            ):
                 test_conf = make_config()
-                assert test_conf.get('core', 'hostname_callable') == 'socket.getfqdn'
+                assert test_conf.get('core', 'hostname_callable') == 'airflow.utils.net.getfqdn'
 
         with reset_warning_registry():
             with warnings.catch_warnings(record=True) as warning:
diff --git a/tests/utils/test_net.py b/tests/utils/test_net.py
index 3ac2e344af..b1ae32904e 100644
--- a/tests/utils/test_net.py
+++ b/tests/utils/test_net.py
@@ -31,7 +31,7 @@ def get_hostname():
 
 
 class TestGetHostname(unittest.TestCase):
-    @mock.patch('socket.getfqdn', return_value='first')
+    @mock.patch('airflow.utils.net.getfqdn', return_value='first')
     @conf_vars({('core', 'hostname_callable'): None})
     def test_get_hostname_unset(self, mock_getfqdn):
         assert 'first' == net.get_hostname()