You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2022/07/12 07:19:35 UTC
[airflow] branch main updated: Patch getfqdn with more resilient version (#24981)
This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new a3f2df0f45 Patch getfqdn with more resilient version (#24981)
a3f2df0f45 is described below
commit a3f2df0f45973ddb97990361fdc5caa256c175ca
Author: Jarek Potiuk <ja...@polidea.com>
AuthorDate: Tue Jul 12 09:19:23 2022 +0200
Patch getfqdn with more resilient version (#24981)
We keep on having repeated issue reports about non-matching
hostname of workers. This seems to be trceable to getfqdn method
of socket in Kubernetes that in some circumstances (race condition
with netwrking setup when starting) can return different hostname
at different times.
There seems to be a related issue in Python that has not been
resolved in more than 13 years (!)
https://github.com/python/cpython/issues/49254
The error seems to be related to the way how canonicalname is
derived by getfqdn (it uses gethostbyaddr which sometimes
provides different name than canonical name (it returns the
first DNS name resolved that contains ".").
We are fixing it in two ways:
* instead of using gethostbyaddr we are using getadddrinfo with
AI_CANONNAME flag which (according to the docs):
https://man7.org/linux/man-pages/man3/getaddrinfo.3.html
If hints.ai_flags includes the AI_CANONNAME flag, then the
ai_canonname field of the first of the addrinfo structures in the
returned list is set to point to the official name of the host.
* we are caching the name returned by first time retrieval per
interpreter. This way at least inside the same interpreter, the
name of the host should not change.
---
airflow/api/auth/backend/kerberos_auth.py | 2 +-
airflow/config_templates/config.yml | 6 +++---
airflow/config_templates/default_airflow.cfg | 6 +++---
airflow/utils/net.py | 28 +++++++++++++++++++++++++---
newsfragments/24981.improvement.rst | 1 +
tests/api/auth/backend/test_kerberos_auth.py | 4 ++--
tests/core/test_configuration.py | 12 +++++++-----
tests/utils/test_net.py | 2 +-
8 files changed, 43 insertions(+), 18 deletions(-)
diff --git a/airflow/api/auth/backend/kerberos_auth.py b/airflow/api/auth/backend/kerberos_auth.py
index fb76e8a1aa..fff5cfb033 100644
--- a/airflow/api/auth/backend/kerberos_auth.py
+++ b/airflow/api/auth/backend/kerberos_auth.py
@@ -43,7 +43,6 @@
import logging
import os
from functools import wraps
-from socket import getfqdn
from typing import Any, Callable, Optional, Tuple, TypeVar, Union, cast
import kerberos
@@ -51,6 +50,7 @@ from flask import Response, _request_ctx_stack as stack, g, make_response, reque
from requests_kerberos import HTTPKerberosAuth
from airflow.configuration import conf
+from airflow.utils.net import getfqdn
log = logging.getLogger(__name__)
diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml
index 133ba65755..ee9f10aacf 100644
--- a/airflow/config_templates/config.yml
+++ b/airflow/config_templates/config.yml
@@ -33,15 +33,15 @@
Hostname by providing a path to a callable, which will resolve the hostname.
The format is "package.function".
- For example, default value "socket.getfqdn" means that result from getfqdn() of "socket"
- package will be used as hostname.
+ For example, default value "airflow.utils.net.getfqdn" means that result from patched
+ version of socket.getfqdn() - see https://github.com/python/cpython/issues/49254.
No argument should be required in the function specified.
If using IP address as hostname is preferred, use value ``airflow.utils.net.get_host_ip_address``
version_added: ~
type: string
example: ~
- default: "socket.getfqdn"
+ default: "airflow.utils.net.getfqdn"
- name: default_timezone
description: |
Default timezone in case supplied date times are naive
diff --git a/airflow/config_templates/default_airflow.cfg b/airflow/config_templates/default_airflow.cfg
index 12091b62e9..f432c13a84 100644
--- a/airflow/config_templates/default_airflow.cfg
+++ b/airflow/config_templates/default_airflow.cfg
@@ -36,12 +36,12 @@ dags_folder = {AIRFLOW_HOME}/dags
# Hostname by providing a path to a callable, which will resolve the hostname.
# The format is "package.function".
#
-# For example, default value "socket.getfqdn" means that result from getfqdn() of "socket"
-# package will be used as hostname.
+# For example, default value "airflow.utils.net.getfqdn" means that result from patched
+# version of socket.getfqdn() - see https://github.com/python/cpython/issues/49254.
#
# No argument should be required in the function specified.
# If using IP address as hostname is preferred, use value ``airflow.utils.net.get_host_ip_address``
-hostname_callable = socket.getfqdn
+hostname_callable = airflow.utils.net.getfqdn
# Default timezone in case supplied date times are naive
# can be utc (default), system, or any IANA timezone string (e.g. Europe/Amsterdam)
diff --git a/airflow/utils/net.py b/airflow/utils/net.py
index f697bfc4ad..f28d30a882 100644
--- a/airflow/utils/net.py
+++ b/airflow/utils/net.py
@@ -17,18 +17,40 @@
# under the License.
#
import socket
+from functools import lru_cache
from airflow.configuration import conf
+# patched version of socket.getfqdn() - see https://github.com/python/cpython/issues/49254
+@lru_cache(maxsize=None)
+def getfqdn(name=''):
+ """Get fully qualified domain name from name.
+ An empty argument is interpreted as meaning the local host.
+ """
+ name = name.strip()
+ if not name or name == '0.0.0.0':
+ name = socket.gethostname()
+ try:
+ addrs = socket.getaddrinfo(name, None, 0, socket.SOCK_DGRAM, 0, socket.AI_CANONNAME)
+ except OSError:
+ pass
+ else:
+ for addr in addrs:
+ if addr[3]:
+ name = addr[3]
+ break
+ return name
+
+
def get_host_ip_address():
"""Fetch host ip address."""
- return socket.gethostbyname(socket.getfqdn())
+ return socket.gethostbyname(getfqdn())
def get_hostname():
"""
Fetch the hostname using the callable from the config or using
- `socket.getfqdn` as a fallback.
+ `airflow.utils.net.getfqdn` as a fallback.
"""
- return conf.getimport('core', 'hostname_callable', fallback='socket.getfqdn')()
+ return conf.getimport('core', 'hostname_callable', fallback='airflow.utils.net.getfqdn')()
diff --git a/newsfragments/24981.improvement.rst b/newsfragments/24981.improvement.rst
new file mode 100644
index 0000000000..e6936d8562
--- /dev/null
+++ b/newsfragments/24981.improvement.rst
@@ -0,0 +1 @@
+Default value for [core] hostname_callable is ``airflow.utils.net.getfqdn`` which should provide more stable canonical host name. You can still use ``socket.getfqdn``or any other ``hostname_callable`` you had configured..
diff --git a/tests/api/auth/backend/test_kerberos_auth.py b/tests/api/auth/backend/test_kerberos_auth.py
index 0f60527dce..6fd16c55cd 100644
--- a/tests/api/auth/backend/test_kerberos_auth.py
+++ b/tests/api/auth/backend/test_kerberos_auth.py
@@ -18,7 +18,6 @@
import json
import os
-import socket
from datetime import datetime
from unittest import mock
@@ -26,6 +25,7 @@ import pytest
from airflow.api.auth.backend.kerberos_auth import CLIENT_AUTH
from airflow.models import DagBag
+from airflow.utils.net import getfqdn
from airflow.www import app
from tests.test_utils.config import conf_vars
from tests.test_utils.db import clear_db_dags
@@ -70,7 +70,7 @@ class TestApiKerberos:
)
assert 401 == response.status_code
- response.url = f'http://{socket.getfqdn()}'
+ response.url = f'http://{getfqdn()}'
class Request:
headers = {}
diff --git a/tests/core/test_configuration.py b/tests/core/test_configuration.py
index 991695a969..3532aac7e5 100644
--- a/tests/core/test_configuration.py
+++ b/tests/core/test_configuration.py
@@ -875,11 +875,11 @@ sql_alchemy_conn=sqlite://test
test_conf.deprecated_values = {
'core': {'hostname_callable': (re.compile(r':'), r'.', '2.1')},
}
- test_conf.read_dict({'core': {'hostname_callable': 'socket:getfqdn'}})
+ test_conf.read_dict({'core': {'hostname_callable': 'airflow.utils.net:getfqdn'}})
with pytest.warns(FutureWarning):
test_conf.validate()
- assert test_conf.get('core', 'hostname_callable') == 'socket.getfqdn'
+ assert test_conf.get('core', 'hostname_callable') == 'airflow.utils.net.getfqdn'
@pytest.mark.parametrize(
"old, new",
@@ -930,7 +930,7 @@ sql_alchemy_conn=sqlite://test
"conf_dict",
[
{}, # Even if the section is absent from config file, environ still needs replacing.
- {'core': {'hostname_callable': 'socket:getfqdn'}},
+ {'core': {'hostname_callable': 'airflow.utils.net.getfqdn'}},
],
)
def test_deprecated_values_from_environ(self, conf_dict):
@@ -953,9 +953,11 @@ sql_alchemy_conn=sqlite://test
return test_conf
with pytest.warns(FutureWarning):
- with unittest.mock.patch.dict('os.environ', AIRFLOW__CORE__HOSTNAME_CALLABLE='socket:getfqdn'):
+ with unittest.mock.patch.dict(
+ 'os.environ', AIRFLOW__CORE__HOSTNAME_CALLABLE='airflow.utils.net:getfqdn'
+ ):
test_conf = make_config()
- assert test_conf.get('core', 'hostname_callable') == 'socket.getfqdn'
+ assert test_conf.get('core', 'hostname_callable') == 'airflow.utils.net.getfqdn'
with reset_warning_registry():
with warnings.catch_warnings(record=True) as warning:
diff --git a/tests/utils/test_net.py b/tests/utils/test_net.py
index 3ac2e344af..b1ae32904e 100644
--- a/tests/utils/test_net.py
+++ b/tests/utils/test_net.py
@@ -31,7 +31,7 @@ def get_hostname():
class TestGetHostname(unittest.TestCase):
- @mock.patch('socket.getfqdn', return_value='first')
+ @mock.patch('airflow.utils.net.getfqdn', return_value='first')
@conf_vars({('core', 'hostname_callable'): None})
def test_get_hostname_unset(self, mock_getfqdn):
assert 'first' == net.get_hostname()