#!/usr/bin/env bash
# aufs-find
# Author: lcpterid, 2026
# License: GPL v3

prog_name="$(basename "$0")"
nu="[0-9]"
help_text="Syntax: $prog_name [OPTIONS] directory_to_check

Find files (including symlinks/pipes/sockets) within a directory, filtered by
how many aufs branches, or which aufs branches, contain them.

Options:
-a:          Find files in all branches. Overrides -b and -c.
-bn[,m,..]:  Branch filter: Find files in the given numbered branches n, m, etc.
-cn[+|-]:    Count filter: Find files which have n versions (within the selected branches).
             If followed by a +, find files with n or more versions. -c+ is short for -c2+.
             If followed by a -, find files with n or fewer versions.
             The default is 1+, i.e. any number.
-f:          Files only. Excludes symlinks, pipes and sockets from the results.
-p:          Do parallel finds in separate processes. Can give a modest performance
             gain. Does not affect short output.
-r:          Find files recursively in subfolders.
-s:          Short output. Just count the branches, don't state the
             branch numbers. (This is significantly faster.)
"

script_dir="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &>/dev/null && pwd )"
lib_path="$(realpath -m "${script_dir}/../lib/libaufs-dir-tools")"
if ! [ -x "$lib_path" ]; then
       echo "Couldn't find library file!" >&2
       exit 1
fi
. "$lib_path"

[ "$#" -eq 0 ] && error_quit 1 ""
mount_check || error_quit 2 "AUFS mounts do not match requirements. This tool requires one AUFS branch system, mounted on root (/) and optionally sub-mounted elsewhere."
branch_files_check || error_quit 2 "could not find AUFS branch files in /sys/fs/. Are you on an AUFS file system?"

aufs_br_dir="$(echo /sys/fs/aufs/si*/)"

TMP='/tmp'

recursive="no"
version_count="1"
version_op="+"
output_type="long"
branch_filter=""
show_all="no"
user_filtered="no"
files_only="no"
parallel_finds="no"

while getopts ":hab:c:frsp" thisopt; do
	case "$thisopt" in
		h)
			error_quit 0 ""
			;;
		a)
			show_all="yes"
			;;
		b)
			user_filtered="yes"
			branch_list_arg="$OPTARG"
			if ! [[ "$branch_list_arg" =~ ^$nu+(,$nu+)*$ ]]; then
				error_quit 1 "-b option provided, but argument is not a comma-separated list of numbers"
			fi

			while read -r bnum; do
				! [ -f "${aufs_br_dir}br${bnum}" ] && error_quit 1 "-b option provided with branch number ${bnum}, which was not found"
			done < <(echo "$branch_list_arg" | tr ',' '\n')

			branch_filter="$(echo "$branch_list_arg" | tr ',' '|')"
			;;
		c)
			user_filtered="yes"
			version_count_arg="$OPTARG"
			[ "$version_count_arg" = "+" ] && version_count_arg="2+"

			if ! [[ "$version_count_arg" =~ ^$nu+[+-]?$ ]]; then
				error_quit 1 "-c option provided, but argument is not in the form n, n+ or n-"
			fi

			if [[ "$version_count_arg" =~ ^$nu+$ ]]; then
				version_count="$version_count_arg"
				version_op="="
			else
				version_count="${version_count_arg:0:-1}"
				version_op="${version_count_arg: -1}"
			fi
			;;
		f)
			files_only="yes"
			;;
		r)
			recursive="yes"
			;;
		s)
			output_type="short"
			;;
		p)
			parallel_finds="yes"
			;;
		:)
			error_quit 1 "-b or -c option provided with no branch list or version count"
			;;
		?)
			error_quit 1 "invalid option -$OPTARG"
			;;
	esac
done

shift $(( OPTIND - 1 ))

if [ "$show_all" = "yes" ]; then
	version_count="1"
	version_op="+"
	branch_filter=""
	[ "$user_filtered" = "yes" ] && warning_msg "-a option overrides filter options."
fi

if [ -z "$1" ]; then
	error_quit 1 "no directory_to_check provided"
fi

root_dir="$(realpath -s "$1" | aufs_real_path)"
if ! [ -d "$root_dir" ]; then
	error_quit 2 "$root_dir is not a directory"
fi

aufs_folder_check "$root_dir" || error_quit 3 "Directory $root_dir is not in the AUFS filesystem"

if [ -n "$branch_filter" ]; then
	branch_filter_regex="br(${branch_filter})$"
else
	branch_filter_regex="br"
fi

if [ "$files_only" = "yes" ]; then
	find_types='f'
else
	find_types='f,l,p,s'
fi

# Helper functions

get_filtered_branch_files () {
	find "$aufs_br_dir" -name "br[0-9]*" | grep -E "$branch_filter_regex"
}

find_with_option () {
	local this_root="$1"
	if [ "$recursive" = "yes" ]; then
		find "$this_root" -type "$find_types"
	else
		find "$this_root" -maxdepth 1 -type "$find_types"
	fi
}

# Checks only number of versions (-c), not named branches (-b)
branch_count_check () {
	local count="$1"
	local path="$2"
	local will_print="no"
	if [ -n "$path" ]; then
		case "$version_op" in
			"+")
				[ "$count" -ge "$version_count" ] && will_print="yes"
				;;
			"-")
				[ "$count" -le "$version_count" ] && will_print="yes"
				;;
			"=")
				[ "$count" -eq "$version_count" ] && will_print="yes"
				;;
		esac
	fi

	if [ "$will_print" = "yes" ]; then
		return 0
	else
		return 1
	fi
}

# Checks if the argument $1 (expected to be like "1,2,3")
# contains any branch from the branch filter (which is like "4|5|6")
branch_number_check () {

	local branches_of_file="$1"

	if [ -z "$branch_filter" ]; then
		return 0
	elif [[ "$branches_of_file" =~ ^(${nu}+,)*(${branch_filter})(,${nu}+)*$ ]]; then
		return 0
	else
		return 1
	fi
}

long_print_if_appropriate () {
	local group_count="$1"
	local group_path="$3"
	local group_nums

	if branch_count_check "$group_count" "$group_path"; then
		group_nums="$(echo "$2" | tr ',' '\n' | sort -n | paste -sd,)"
		printf "%d\t%s\t%s\n" "$group_count" "$group_nums" "$group_path"
	fi
}



# Two-column code
# (number of branches, filepath)

do_short_output () {
	all_branches="$(get_filtered_branch_files | xargs cat | sed 's/=..\(+[a-z_]*\)\?$//')"

	all_files_in_branches="$(while read -r broot; do
		[ -d "${broot}${root_dir}" ] &&
			find_with_option "${broot}${root_dir}" |
			cut -c "$(( ${#broot} + 1))-" |
			delete_sources
	done < <(echo "$all_branches"))"

	while read -r line_count line_path; do
		branch_count_check "$line_count" "$line_path" && printf "%s\t%s\n" "$line_count" "$line_path"
	done < <(echo "$all_files_in_branches" | sort | uniq -c | sed 's/^ *//' | sed 's/  */\t/')
}


# Three-column code
# (number of branches, comma-separated list of branch numbers, filepath)
do_long_output () {
	all_branches="$(while read -r brf; do
		br_num="${brf##*br}"
		br_root="$(rev "$brf" | cut -d '=' -f 2- | rev)"
		printf "%s %s\n" "$br_num" "$br_root"
	done < <(get_filtered_branch_files))"
	all_branches_sorted="$(echo "$all_branches" | sort -n)"

	# if parallel flag is set, call find_with_option_parallel (to be written!) to save each branch to a /tmp file
	# then wait for all jobs to be complete
	# and compose all_files_in_branches from the /tmp files

	if [ "$parallel_finds" = "yes" ]; then
		rm "${TMP}"/aufs-find_tmp.*
		while read -r br_num br_root; do
			[ -d "${br_root}${root_dir}" ] &&
				find_with_option "${br_root}${root_dir}" "$br_num" |
					cut -c "$(( ${#br_root} + 1))-" |
					delete_sources > "${TMP}/aufs-find_tmp.${br_num}" & 
		done < <(echo "$all_branches_sorted")
		wait
		all_files_in_branches="$(for br_file in "${TMP}"/aufs-find_tmp.*;
			do
				br_num="${br_file##*aufs-find_tmp.}"
				while read -r beheaded_path; do
					printf "%d\t%s\n" "$br_num" "$beheaded_path"
				done < "$br_file"

			done)"

	else
		all_files_in_branches="$(while read -r br_num br_root; do
			[ -d "${br_root}${root_dir}" ] &&
				while read -r beheaded_path; do
					printf "%d\t%s\n" "$br_num" "$beheaded_path"
				done < <(find_with_option "${br_root}${root_dir}" | cut -c "$(( ${#br_root} + 1))-" | delete_sources)
		done < <(echo "$all_branches_sorted"))"
	fi
	all_files_in_branches_sorted="$(echo "$all_files_in_branches" | sort -k 2)"

	group_path=""
	group_nums=""
	group_count=0
	while read -r this_num this_path; do
		if [ "$this_path" != "$group_path" ]; then
			[ -n "$group_path" ] && long_print_if_appropriate "$group_count" "$group_nums" "$group_path"
			group_path="$this_path"
			group_nums="$this_num"
			group_count=1
		else
			group_nums+=",$this_num"
			group_count=$((group_count + 1))
		fi
	done < <(echo "$all_files_in_branches_sorted")
	# just repeat to get the last line done
	long_print_if_appropriate "$group_count" "$group_nums" "$group_path" 
}

if [ "$output_type" = "short" ]; then
	do_short_output
else
	do_long_output
fi

