armbian-next: replace bare sync invocations by introducing wait_for_disk_sync(); sync early at start of build

- wait 10s for sync call; otherwise warn user to be patient - wait_for_sync "reason for sync" everywhere - add @TODO for /run/user/0 etc and XDG_RUNTIME_DIR in chroot
2025-09-24 19:47:06 +07:00 · 2023-01-20 01:44:46 +01:00
parent 4d43c05f7b
commit 72c61f2f68
9 changed files with 52 additions and 22 deletions
--- a/lib/functions/compilation/ccache.sh
+++ b/lib/functions/compilation/ccache.sh
@@ -21,9 +21,9 @@ function do_with_ccache_statistics() {
 		display_alert "ccache dir size before" "${ccache_dir_size_before_human}" "ccache"

 		# Show the ccache configuration
+		wait_for_disk_sync "before ccache config"
 		display_alert "ccache configuration" "ccache" "ccache"
 		run_host_command_logged ccache --show-config "&&" sync
-		sync
 	fi

 	display_alert "Running ccache'd build..." "ccache" "ccache"
--- a/lib/functions/general/chroot-helpers.sh
+++ b/lib/functions/general/chroot-helpers.sh
@@ -7,7 +7,7 @@ mount_chroot() {
 	local target
 	target="$(realpath "$1")" # normalize, remove last slash if dir
 	display_alert "mount_chroot" "$target" "debug"
-	mount -t tmpfs tmpfs "${target}/tmp"
+	mount -t tmpfs tmpfs "${target}/tmp" # @TODO: maybe /run/user/0 etc and XDG_RUNTIME_DIR
 	mount -t proc chproc "${target}"/proc
 	mount -t sysfs chsys "${target}"/sys
 	mount -t devtmpfs chdev "${target}"/dev || mount --bind /dev "${target}"/dev
@@ -28,8 +28,8 @@ umount_chroot() {
 		umount --recursive "${target}"/dev || true
 		umount "${target}"/proc || true
 		umount "${target}"/sys || true
-		umount "${target}"/tmp || true
-		sync
+		umount "${target}"/tmp || true # @TODO: maybe /run/user/0 etc and XDG_RUNTIME_DIR
+		wait_for_disk_sync "after umount chroot"
 		run_host_command_logged grep -E "'${target}/(dev|proc|sys|tmp)'" /proc/mounts "||" true
 	done
 }
@@ -49,7 +49,7 @@ function umount_chroot_recursive() {
 		return 0
 	fi
 	display_alert "Unmounting recursively" "${description} - be patient" ""
-	sync # sync. coalesce I/O. wait for writes to flush to disk. it might take a second.
+	wait_for_disk_sync "before recursive umount ${description}" # sync. coalesce I/O. wait for writes to flush to disk. it might take a second.
 	# First, try to umount some well-known dirs, in a certain order. for speed.
 	local -a well_known_list=("dev/pts" "dev" "proc" "sys" "boot/efi" "boot/firmware" "boot" "tmp" ".")
 	for well_known in "${well_known_list[@]}"; do
@@ -64,7 +64,7 @@ function umount_chroot_recursive() {
 			display_alert "${#current_mount_list[@]} dirs still mounted after ${tries} tries:" "${current_mount_list[*]}" "wrn"
 		fi
 		cut -d " " -f 2 "/proc/mounts" | grep "^${target}" | xargs -n1 umount --recursive &> /dev/null || true # ignore errors
-		sync                                                                                                   # wait for fsync, then count again for next loop.
+		wait_for_disk_sync "during recursive umount ${description}"                                            # sync. coalesce I/O. wait for writes to flush to disk. it might take a second.
 		mapfile -t current_mount_list < <(cut -d " " -f 2 "/proc/mounts" | grep "^${target}")
 		tries=$((tries + 1))
 	done
--- a/lib/functions/host/host-utils.sh
+++ b/lib/functions/host/host-utils.sh
@@ -293,3 +293,31 @@ function get_descendants_of_pid_array() {
 	descendants_of_pid_array_result=(${descendants})
 	display_alert "Descendants of PID $1: ${descendants_of_pid_array_result[*]}" "array = get_descendants_of_pid_array" "debug"
 }
+
+# This essentially runs "sync". It's a bit more complicated than that, though.
+# If "sync" takes more than 5 seconds, we want to let the user know.
+# So run sync under "timeout" in a loop, until it completes successfully.
+# call: wait_for_disk_sync "after writing a huge file to disk"
+function wait_for_disk_sync() {
+	declare -i timeout_seconds=10
+	declare -i sync_worked=0
+	declare -i sync_timeout_count=0
+	declare -i total_wait=0
+
+	while [[ "${sync_worked}" -eq 0 ]]; do
+		declare -i sync_timeout=0
+		display_alert "wait_for_disk_sync: $*" "sync start, timeout_seconds:${timeout_seconds}" "debug"
+		# run a bash sending all to /dev/null, then send to /dev/null again, so we don't get "Killed" messages
+		bash -c "timeout --signal=9 ${timeout_seconds} sync &> /dev/null" &> /dev/null && sync_worked=1 || sync_timeout=1
+		display_alert "wait_for_disk_sync: $*" "sync done worked: ${sync_worked} sync_timeout: ${sync_timeout}" "debug"
+		if [[ "${sync_timeout}" -eq 1 ]]; then
+			total_wait=$((total_wait + timeout_seconds))
+			sync_timeout_count=$((sync_timeout_count + 1))
+			display_alert "Waiting sync to storage $*" "fsync taking more than ${total_wait}s - be patient" "warn"
+		else
+			if [[ "${sync_timeout_count}" -gt 0 ]]; then
+				display_alert "Sync to storage OK $*" "fsync took more than ${total_wait}s" "info"
+			fi
+		fi
+	done
+}
--- a/lib/functions/host/tmpfs-utils.sh
+++ b/lib/functions/host/tmpfs-utils.sh
@@ -71,13 +71,13 @@ function cleanup_tmpfs_for() {
 	if [[ "${umount_tmpfs}" == "umount_tmpfs" ]]; then
 		display_alert "cleanup_tmpfs_for: umount tmpfs" "${tmpfs_name}" "cleanup"
 		cd "${SRC}" || echo "cleanup_tmpfs_for: cd failed to ${SRC}" >&2 # avoid cwd in use error
-		sync                                                             # let disk coalesce
+		wait_for_disk_sync "before tmps cleanup ${tmpfs_name}"           # let disk coalesce
 		umount "${tmpfs_path}" &> /dev/null || {
 			display_alert "cleanup_tmpfs_for: umount failed" "${tmpfs_name} tmpfs umount failed, will try lazy" "cleanup"
 			# Do a lazy umount... last-resort...
-			sync
+			wait_for_disk_sync "before tmps cleanup ${tmpfs_name} lazy" # let disk coalesce
 			umount -l "${tmpfs_path}" &> /dev/null || display_alert "cleanup_tmpfs_for: lazy umount failed" "${tmpfs_name} tmpfs lazy umount also failed" "cleanup"
-			sync
+			wait_for_disk_sync "after tmps cleanup ${tmpfs_name} lazy" # let disk coalesce
 		}

 		# Check if the tmpfs is still mounted after all that trying, log error, show debug, and give up with error
--- a/lib/functions/image/partitioning.sh
+++ b/lib/functions/image/partitioning.sh
@@ -134,7 +134,7 @@ function prepare_partitions() {
 	# stage: create blank image
 	display_alert "Creating blank image for rootfs" "truncate: $sdsize MiB" "info"
 	run_host_command_logged truncate "--size=${sdsize}M" "${SDCARD}".raw # please provide EVIDENCE of problems with this; using dd is very slow
-	run_host_command_logged sync
+	wait_for_disk_sync "after truncate SDCARD.raw"

 	# stage: calculate boot partition size
 	local bootstart=$(($OFFSET * 2048))
@@ -240,7 +240,7 @@ function prepare_partitions() {
 		if [[ $ROOTFS_TYPE == btrfs && $BTRFS_COMPRESSION != none ]]; then
 			local fscreateopt="-o compress-force=${BTRFS_COMPRESSION}"
 		fi
-		sync # force writes to be really flushed
+		wait_for_disk_sync "after mkfs" # force writes to be really flushed
 		display_alert "Mounting rootfs" "$rootdevice"
 		run_host_command_logged mount ${fscreateopt} $rootdevice $MOUNT/
 		# create fstab (and crypttab) entry
--- a/lib/functions/image/rootfs-to-image.sh
+++ b/lib/functions/image/rootfs-to-image.sh
@@ -80,7 +80,7 @@ function create_image_from_sdcard_rootfs() {
 		run_host_command_logged sfdisk -l "${LOOP}" # @TODO: use asset..
 	fi

-	run_host_command_logged sync
+	wait_for_disk_sync "before umount MOUNT"

 	umount_chroot_recursive "${MOUNT}" "MOUNT"
 	[[ $CRYPTROOT_ENABLE == yes ]] && cryptsetup luksClose "$ROOT_MAPPER"
--- a/lib/functions/logging/logging.sh
+++ b/lib/functions/logging/logging.sh
@@ -112,11 +112,11 @@ function print_current_asset_log_base_file() {
 }

 function check_and_close_fd_13() {
-	sync # let the disk catch up
+	wait_for_disk_sync "before closing fd 13" # let the disk catch up
 	if [[ -e /proc/self/fd/13 ]]; then
 		display_alert "Closing fd 13" "log still open" "cleanup" # no reason to be alarmed
 		exec 13>&- || true                                       # close the file descriptor, lest sed keeps running forever.
-		sync                                                     # make sure the file is written to disk
+		wait_for_disk_sync "after fd 13 closure"                 # make sure the file is written to disk
 	else
 		display_alert "Not closing fd 13" "log already closed" "cleanup"
 	fi
@@ -146,7 +146,7 @@ function check_and_close_fd_13() {
 		else
 			display_alert "global_tee_pid already dead after descendants killed" "${global_tee_pid}" "cleanup"
 		fi
-		sync # wait for the disk to catch up
+		wait_for_disk_sync "after killing tee pid" # wait for the disk to catch up
 	else
 		display_alert "Not killing global_tee_pid" "${global_tee_pid} not running" "cleanup"
 	fi
--- a/lib/functions/main/default-build.sh
+++ b/lib/functions/main/default-build.sh
@@ -1,6 +1,8 @@
 # This does NOT run under the logging manager. We should invoke the do_with_logging wrapper for
 # strategic parts of this. Attention: rootfs does it's own logging, so just let that be.
 function main_default_build_single() {
+	wait_for_disk_sync "before starting build" # fsync, wait for disk to sync, and then continue. alert user if takes too long.
+
 	# Check that WORKDIR_BASE_TMP exists; if not, create it.
 	if [[ ! -d "${WORKDIR_BASE_TMP}" ]]; then
 		mkdir -p "${WORKDIR_BASE_TMP}"
@@ -21,7 +23,8 @@ function main_default_build_single() {
 	declare -g -x CCACHE_TEMPDIR="${WORKDIR}/ccache_tmp" # Export CCACHE_TEMPDIR, under Workdir, which is hopefully under tmpfs. Thanks @the-Going for this.
 	declare -g -x XDG_RUNTIME_DIR="${WORKDIR}/xdg_tmp"   # XDG_RUNTIME_DIR is used by the likes of systemd/freedesktop centric apps.

-	declare start=$(date +%s)
+	declare start
+	start=$(date +%s)

 	### Write config summary
 	LOG_SECTION="config_summary" do_with_logging write_config_summary_output_file
@@ -192,7 +195,6 @@ function main_default_build_single() {
 	# At this point, the WORKDIR should be clean. Add debug info.
 	debug_tmpfs_show_usage "AFTER ALL PKGS BUILT"

-
 	# build rootfs, if not only kernel.
 	if [[ $KERNEL_ONLY != yes ]]; then
 		display_alert "Building image" "${BOARD}" "target-started"
--- a/lib/functions/rootfs/create-cache.sh
+++ b/lib/functions/rootfs/create-cache.sh
@@ -297,7 +297,7 @@ function create_new_rootfs_cache() {

 	# stage: make rootfs cache archive
 	display_alert "Ending debootstrap process and preparing cache" "$RELEASE" "info"
-	sync
+	wait_for_disk_sync "before tar rootfs"

 	# the only reason to unmount here is compression progress display based on rootfs size calculation
 	# also, it doesn't make sense to copy stuff like "/dev" etc. those are filtered below anyway