armbian-next: still fighting tee leaking: now I think I won, again

- avoid using the "last resort, use lazy umount" by killing tee_pid _and descendants_ before hand. Works!
- introduce `get_descendants_of_pid_array()` using recursive trick to get all descendants
- do not `unset CURRENT_LOGFILE` -- during `logging_error_show_log()` (from SHOW_LOG=no days)
  - do `unset CURRENT_LOGFILE` immediately before processing logs -- although it magically survives. mysteries of traps in bash...
- `declare -g` instead of `export` for `start_logging_section()`
- don't `check_and_close_fd_13()` in `run_cleanup_handlers()`
- do run `check_and_close_fd_13()`, but only after archiving the old logs (in logging trap!)
- add a default error for unhandled TRAP's -- of which should be done
- fix reset of `global_tee_pid=0` in section-logging
This commit is contained in:
Ricardo Pardini
2023-01-18 20:22:39 +01:00
parent c1100fa461
commit bf3f346bee
6 changed files with 78 additions and 29 deletions

View File

@@ -30,17 +30,18 @@ function logging_init() {
else
declare wsl2_type
wsl2_detect_type
if [[ "${wsl2_type}" != "none" ]]; then
local windows_emoji="💲" # 💰 or 💲 for M$ -- get it?
if [[ "${wsl2_type}" != "none" ]]; then
local windows_emoji="💲" # 💰 or 💲 for M$ -- get it?
declare -g left_marker="${gray_color}[${windows_emoji}|${normal_color}"
fi
fi
}
function logging_error_show_log() {
[[ "${SHOW_LOG}" == "yes" ]] && return 0 # Do nothing if we're already showing the log on stderr.
# Do NOT unset CURRENT_LOGFILE here... it's used by traps.
local logfile_to_show="${CURRENT_LOGFILE}" # store current logfile in separate var
unset CURRENT_LOGFILE # stop logging, otherwise crazy
[[ "${SHOW_LOG}" == "yes" ]] && return 0 # Do nothing if we're already showing the log on stderr.
if [[ "${CI}" == "true" ]]; then # Close opened CI group, even if there is none; errors would be buried otherwise.
echo "::endgroup::"
fi
@@ -48,14 +49,14 @@ function logging_error_show_log() {
if [[ -f "${logfile_to_show}" ]]; then
local prefix_sed_contents="${normal_color}${left_marker}${padding}👉${padding}${right_marker} "
local prefix_sed_cmd="s/^/${prefix_sed_contents}/;"
display_alert " 👇👇👇 Showing logfile below 👇👇👇" "${logfile_to_show}" "err"
CURRENT_LOGFILE="" display_alert " 👇👇👇 Showing logfile below 👇👇👇" "${logfile_to_show}" "err"
# shellcheck disable=SC2002 # my cat is great. thank you, shellcheck.
cat "${logfile_to_show}" | grep -v -e "^$" | sed -e "${prefix_sed_cmd}" 1>&2 # write it to stderr!!
display_alert " 👆👆👆 Showing logfile above 👆👆👆" "${logfile_to_show}" "err"
CURRENT_LOGFILE="" display_alert " 👆👆👆 Showing logfile above 👆👆👆" "${logfile_to_show}" "err"
else
display_alert "✋ Error log not available at this stage of build" "check messages above" "debug"
CURRENT_LOGFILE="" display_alert "✋ Error log not available at this stage of build" "check messages above" "debug"
fi
return 0
}
@@ -108,25 +109,44 @@ function print_current_asset_log_base_file() {
}
function check_and_close_fd_13() {
sync # let the disk catch up
if [[ -e /proc/self/fd/13 ]]; then
sync # let the disk catch up
display_alert "Closing fd 13" "log still open" "cleanup" # no reason to be alarmed
exec 13>&- || true # close the file descriptor, lest sed keeps running forever.
sync # make sure the file is written to disk
sleep 1 # give it a second to die.
else
display_alert "Not closing fd 13" "log already closed" "cleanup"
fi
# "tee_pid" is a misnomer: it in reality is a shell pid with tee and sed children.
display_alert "Checking if global_tee_pid is set and running" "global_tee_pid: ${global_tee_pid}" "cleanup"
if [[ -n "${global_tee_pid}" && ${global_tee_pid} -gt 1 ]] && ps -p "${global_tee_pid}" > /dev/null; then
display_alert "Killing global_tee_pid" "${global_tee_pid}" "cleanup"
kill "${global_tee_pid}" && wait "${global_tee_pid}"
display_alert "Killing global_tee_pid's children" "global_tee_pid: ${global_tee_pid}" "cleanup"
declare -a descendants_of_pid_array_result=()
get_descendants_of_pid_array "${global_tee_pid}" || true
# loop over descendants_of_pid_array_result and kill'em'all
for descendant_pid in "${descendants_of_pid_array_result[@]}"; do
# check if PID is still alive before killing; it might have died already due to death of parent.
if ps -p "${descendant_pid}" > /dev/null; then
display_alert "Killing descendant pid" "${descendant_pid}" "cleanup"
{ kill "${descendant_pid}" && wait "${global_tee_pid}"; } || true
else
display_alert "Descendant PID already dead" "${descendant_pid}" "cleanup"
fi
done
# If the global_tee_pid is still alive, kill it.
if ps -p "${global_tee_pid}" > /dev/null; then
display_alert "Killing global_tee_pid" "${global_tee_pid}" "cleanup"
kill "${global_tee_pid}" && wait "${global_tee_pid}"
else
display_alert "global_tee_pid already dead after descendants killed" "${global_tee_pid}" "cleanup"
fi
sync # wait for the disk to catch up
else
display_alert "Not killing global_tee_pid" "${global_tee_pid} not running" "cleanup"
fi
}
function discard_logs_tmp_dir() {