Commit 13508a8e authored by Yuan Gao's avatar Yuan Gao
Browse files

Fix an issue where watchdog crashed during restart because stunnel was killed...

Fix an issue where watchdog crashed during restart because stunnel was killed and pid key was removed from state file
parent 914889d0
......@@ -75,6 +75,34 @@ commands:
name: Check changelog
command: |
rpm -q --changelog amazon-efs-utils
build-suse-rpm:
steps:
- checkout
- run:
name: Refresh source
command: |
zypper refresh
- run:
name: Install dependencies
command: |
zypper install -y --force-resolution rpm-build
zypper install -y make systemd
- run:
name: Build RPM
command: |
make rpm
- run:
name: Install package
command: |
zypper install -y --allow-unsigned-rpm build/amazon-efs-utils*rpm
- run:
name: Check installed successfully
command: |
mount.efs --version
- run:
name: Check changelog
command: |
rpm -q --changelog amazon-efs-utils
jobs:
test:
parameters:
......@@ -103,6 +131,15 @@ jobs:
image: << parameters.image >>
steps:
- build-rpm
build-suse-rpm-package:
parameters:
image:
type: string
executor:
name: linux
image: << parameters.image >>
steps:
- build-suse-rpm
workflows:
workflow:
jobs:
......@@ -168,4 +205,7 @@ workflows:
image: fedora:32
- build-rpm-package:
name: fedora33
image: fedora:33
\ No newline at end of file
image: fedora:33
- build-suse-rpm-package:
name: opensuse-leap15.1
image: opensuse/leap:15.1
\ No newline at end of file
......@@ -11,7 +11,7 @@ set -ex
BASE_DIR=$(pwd)
BUILD_ROOT=${BASE_DIR}/build/debbuild
VERSION=1.26.2
VERSION=1.26.3
DEB_SYSTEM_RELEASE_PATH=/etc/os-release
UBUNTU18_REGEX="Ubuntu 18"
DEBIAN11_REGEX="Debian GNU/Linux bullseye"
......
......@@ -8,4 +8,4 @@
[global]
version=1.26
release=2
release=3
Package: amazon-efs-utils
Architecture: all
Version: 1.26.2
Version: 1.26.3
Section: utils
Depends: python|python2, nfs-common, stunnel4 (>= 4.56), openssl (>= 1.0.2), util-linux
Priority: optional
......
......@@ -11,7 +11,7 @@
%else
%if 0%{?fedora} || 0%{?el8}
%if 0%{?fedora} || 0%{?el8} || 0%{?is_opensuse}
%global python_requires python3
%else
%global python_requires python2
......@@ -27,7 +27,7 @@
Name : amazon-efs-utils
Version : 1.26
Release : 2%{?dist}
Release : 3%{?dist}
Summary : This package provides utilities for simplifying the use of EFS file systems
Group : Amazon/Tools
......@@ -126,6 +126,9 @@ fi
%clean
%changelog
* Wed Jul 1 2020 Yuan Gao <ygaochn@amazon.com> - 1.26.3
- Fix an issue where watchdog crashed during restart because stunnel was killed and pid key was removed from state file
* Tue Jun 16 2020 Karthik Basavaraj <kbbasav@amazon.com> - 1.26.2
- Clean up stunnel PIDs in state files persisted by previous efs-csi-driver to ensure watchdog spawns a new stunnel after driver restarts.
- Fix an issue where fs cannot be mounted with tls using systemd.automount-units due to mountpoint check
......
#!/bin/bash
RPM_SYSTEM_RELEASE_PATH=/etc/system-release
RPM_OS_RELEASE_PATH=/etc/os-release
RHEL8_REGEX="Red Hat Enterprise Linux release 8"
FEDORA_REGEX="Fedora release"
CENTOS8_REGEX="CentOS Linux release 8"
SUSE15_REGEX="openSUSE Leap"
# RHEL8, Fedora30+ and CentOS8 treat shebangs of the form "#!/usr/bin/env python" as errors
if [ -f $RPM_SYSTEM_RELEASE_PATH ] && [[ "$(cat $RPM_SYSTEM_RELEASE_PATH)" =~ $RHEL8_REGEX|$FEDORA_REGEX|$CENTOS8_REGEX ]]; then
function correct_python_executable() {
echo 'Correcting python executable'
# Replace the first line in .py to "#!/usr/bin/env python3" no matter what it was before
sed -i -e '1 s/^.*$/\#!\/usr\/bin\/env python3/' src/watchdog/__init__.py
sed -i -e '1 s/^.*$/\#!\/usr\/bin\/env python3/' src/mount_efs/__init__.py
fi
\ No newline at end of file
}
# RHEL8, Fedora30+ and CentOS8 treat shebangs of the form "#!/usr/bin/env python" as errors
if [ -f $RPM_SYSTEM_RELEASE_PATH ] && [[ "$(cat $RPM_SYSTEM_RELEASE_PATH)" =~ $RHEL8_REGEX|$FEDORA_REGEX|$CENTOS8_REGEX ]]; then
correct_python_executable
fi
# OpenSUSE use python3 as default python version
if [ -f $RPM_OS_RELEASE_PATH ] && [[ $(grep PRETTY_NAME $RPM_OS_RELEASE_PATH) =~ $SUSE15_REGEX ]]; then
correct_python_executable
fi
......@@ -68,7 +68,7 @@ except ImportError:
from urllib.error import URLError, HTTPError
VERSION = '1.26.2'
VERSION = '1.26.3'
SERVICE = 'elasticfilesystem'
CONFIG_FILE = '/etc/amazon/efs/efs-utils.conf'
......@@ -196,10 +196,12 @@ STUNNEL_EFS_CONFIG = {
WATCHDOG_SERVICE = 'amazon-efs-mount-watchdog'
SYSTEM_RELEASE_PATH = '/etc/system-release'
OS_RELEASE_PATH = '/etc/os-release'
RHEL8_RELEASE_NAME = 'Red Hat Enterprise Linux release 8'
CENTOS8_RELEASE_NAME = 'CentOS Linux release 8'
FEDORA_RELEASE_NAME = 'Fedora release'
SKIP_NO_LIBWRAP_RELEASES = [RHEL8_RELEASE_NAME, CENTOS8_RELEASE_NAME, FEDORA_RELEASE_NAME]
SUSE_RELEASE_NAME = 'openSUSE Leap'
SKIP_NO_LIBWRAP_RELEASES = [RHEL8_RELEASE_NAME, CENTOS8_RELEASE_NAME, FEDORA_RELEASE_NAME, SUSE_RELEASE_NAME]
def fatal_error(user_message, log_message=None, exit_code=1):
......@@ -610,14 +612,21 @@ def find_command_path(command, install_method):
def get_system_release_version():
system_release_version = 'unknown'
try:
with open(SYSTEM_RELEASE_PATH) as f:
system_release_version = f.read().strip()
return f.read().strip()
except IOError:
logging.debug('Unable to read %s', SYSTEM_RELEASE_PATH)
return system_release_version
try:
with open(OS_RELEASE_PATH) as f:
for line in f:
if 'PRETTY_NAME' in line:
return line.split('=')[1].strip()
except IOError:
logging.debug('Unable to read %s', OS_RELEASE_PATH)
return 'unknown'
def write_stunnel_config_file(config, state_file_dir, fs_id, mountpoint, tls_port, dns_name, verify_level, ocsp_enabled,
......
......@@ -45,7 +45,7 @@ except ImportError:
from urllib.error import URLError
from urllib.request import urlopen
VERSION = '1.26.2'
VERSION = '1.26.3'
SERVICE = 'elasticfilesystem'
CONFIG_FILE = '/etc/amazon/efs/efs-utils.conf'
......@@ -354,6 +354,8 @@ def get_state_files(state_file_dir):
def is_pid_running(pid):
if not pid:
return False
try:
os.kill(pid, 0)
return True
......@@ -384,7 +386,10 @@ def clean_up_mount_state(state_file_dir, state_file, pid, is_running, mount_stat
if is_pid_running(pid):
logging.info('TLS tunnel: %d is still running, will retry termination', pid)
else:
logging.info('TLS tunnel: %d is no longer running, cleaning up state', pid)
if not pid:
logging.info('TLS tunnel has been killed, cleaning up state')
else:
logging.info('TLS tunnel: %d is no longer running, cleaning up state', pid)
state_file_path = os.path.join(state_file_dir, state_file)
with open(state_file_path) as f:
state = json.load(f)
......@@ -465,7 +470,7 @@ def check_efs_mounts(config, child_procs, unmount_grace_period_sec, state_file_d
if 'unmount_time' in state:
if state['unmount_time'] + unmount_grace_period_sec < current_time:
logging.info('Unmount grace period expired for %s', state_file)
clean_up_mount_state(state_file_dir, state_file, state['pid'], is_running, state.get('mountStateDir'))
clean_up_mount_state(state_file_dir, state_file, state.get('pid'), is_running, state.get('mountStateDir'))
elif mount not in nfs_mounts:
logging.info('No mount found for "%s"', state_file)
......@@ -546,12 +551,12 @@ def check_certificate(config, state, state_file_dir, state_file, base_path=STATE
rewrite_state_file(state, state_file_dir, state_file)
# send SIGHUP to force a reload of the configuration file to trigger the stunnel process to notice the new certificate
if is_pid_running(state['pid']):
process_group = os.getpgid(state['pid'])
logging.info('SIGHUP signal to stunnel. PID: %d, group ID: %s', state['pid'], process_group)
pid = state.get('pid')
if is_pid_running(pid):
process_group = os.getpgid(pid)
logging.info('SIGHUP signal to stunnel. PID: %d, group ID: %s', pid, process_group)
os.killpg(process_group, SIGHUP)
if not is_pid_running(state['pid']):
else:
logging.warning('TLS tunnel is not running for %s', state_file)
......
......@@ -99,6 +99,21 @@ def test_clean_up_pid_still_lives(mocker, tmpdir):
assert os.path.exists(abs_state_file)
def test_clean_up_pid_already_killed(mocker, tmpdir):
pid = None
is_running = watchdog.is_pid_running(pid)
killpg_mock = setup_mock(mocker, is_running)
state_dir, state_file, abs_state_file = create_state_file(tmpdir)
assert os.path.exists(abs_state_file)
watchdog.clean_up_mount_state(state_dir, state_file, pid, is_running=is_running)
utils.assert_not_called(killpg_mock)
assert not os.path.exists(abs_state_file)
def test_pid_not_running(mocker, tmpdir):
killpg_mock = setup_mock(mocker, False)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment