#!/bin/bash
#
# depfinder - a script that finds slackware package dependencies.
#
# Written by George Vlahavas 
# (vlahavas~at~gmail~dot~com)
#
# Licensed under the GPL v3

VERSION=1.4.4

CWD=`pwd`

# Set default settings
CREATEDEPFILE=0
CHECKALLFILES=0
VERSIONINFO=0
PYTHONDEPS=0
JOBS=1
STARTDIR=""
DEPVER=""
DEBUG=${DEBUG:-0}
DEPFINDERSEARCH="/usr/libexec/depfinder-search"
LIBDIRSUFFIX=""

# help message
help_msg()
{
echo \
"depfinder: finds the dependencies of Slackware packages

usage: depfinder [OPTIONS] <package.tgz|.txz|.tlz|.tbz|.tbr or packaging-dir>

OPTIONS:
    -f,          Output dependencies to a file instead of stdout.
                 By default it creates a .dep file. See also -s.
    -s,          Outputs dependencies with version information. If
                 combined with -f, it creates a slack-required file
    -a,          Checks all files in the directory tree instead of
                 following the FHS
    -p,          Attempts to detect python dependencies.
    -3,          Uses python 3 instead of 2 (works only with -p)
    -j [jobs],   The number of parallel depfinder jobs to run. Faster
                 if it matches the number of your CPUs/cores
                 (default: 1)
    -v,          Show version information
    -h,          This help message"
}

# version message
ver_msg()
{
echo \
"depfinder version $VERSION (c) 2009 George Vlahavas
Run "depfinder -h" for help information"
}

# Routine to find out where each dependency comes from and output to
# a file. The C++ code "borrowed" from zpm makes all the difference
# compared to requiredbuilder here.
depfind()
{
	touch DEPS$1
	for i in `cat LIBS$1`; do
		[ -h /$i ] && i=`readlink -e /$i`
		$DEPFINDERSEARCH  $i $$ | uniq > TMP$1
		if [ `cat TMP$1 | wc -l` -gt 1 ];then
			cat TMP$1 | tr '\n' ' ' | \
			sed "s/ /|/g" | sed "s/|$//" >> DEPS$1
			echo "" >> DEPS$1
		else
			cat TMP$1 >> DEPS$1
		fi
	done
}

# Function that finds python files in the current directory, where the
# current directory is always the packaging directory here. Since
# python scripts could be all over the place, we're looking everywhere
# in the package by default.
findpython()
{
	LANG=C find ./ \
	-type f -exec bash -c \
	"file -b \"{}\" | grep -i \"python script\" 1>/dev/null && echo \"{}\"" \; \
	2> /dev/null
	find ./ \
	-type f -name "*.py"
}

# Function that parses python scripts for modules that it imports
parsepython()
{
	sed "s/;/\n/g" $1 | \
	sed -e :a -e '/\\$/N; s/\\\n//; ta' | \
	sed "s/[ \t]\+/ /g" | \
	grep "^ *import \|^ *from .* import " | \
	sed "s/#/@COMMENT@/" | \
	sed "s/\(.*\)@COMMENT@.*/\1/" | \
	sed "s/.*__import__('\(.*\)').*/import \1/" | \
	sed "s/from \(.*\) import .*/\1/" | \
	sed "s/^ *import \(.*\)/\1/" | \
	sed "s/\(.*\) as .*/\1/"  | \
	sed "s/^[ \t]*//;s/[ \t]*$//" | \
	sed "/ [^,]\+ /d" | \
	sed "s/,/\n/g" | \
	sed "s/^[ \t]*//" | \
	sed "s/\./@MODULESEPARATOR@/" | \
	sed "s/\(.*\)@MODULESEPARATOR@.*/\1/" 
}

# Function that cleans up temp files
cleanup()
{
	if [ $DEBUG -eq 0 ]; then
		rm -rf /tmp/depfinder.$$
	fi
}

# Function to cleanly exit on error (just to be sure)
err()
{
	cleanup
	exit 1
}

#
# This is where everything starts...
#


# If no arguments are passed, show an error message
if [ $# -eq 0 ]; then
	help_msg
	echo "ERROR: No arguments found." >&2
	err
fi

# check which switches are used
while getopts  ":hvfsap3j:" flag
do
	# if switch is unknown or if asking for help
	if [ $flag = "?" ] || [ $flag = "h" ]; then
		help_msg
		err
	fi
	if [ $flag = "v" ]; then
		ver_msg
		err
	fi
	if [ $flag = "f" ]; then
		CREATEDEPFILE=1
	fi
	if [ $flag = "s" ]; then
		VERSIONINFO=1
	fi
	if [ $flag = "a" ]; then
		CHECKALLFILES=1
	fi
	if [ $flag = "p" ]; then
		PYTHONDEPS=1
	fi
	if [ $flag = "3" ]; then
		PYTHON="python3"
	else
		PYTHON="python"
	fi
	if [ $flag = "j" ]; then
		# we check if it's a number
		if [ `echo $OPTARG` -eq `echo $OPTARG` 2> /dev/null ]; then 
			# and if it's in range (positive number)
			if [ $OPTARG -ge 1 2> /dev/null ]; then
			# and only then change the default value
			JOBS=$OPTARG
			# if it's out of range show help
			else
				help_msg
				echo "ERROR: Number of jobs must be greater than one" >&2
				err
			fi
		# if it's not a number show help
		else
			help_msg
			err
		fi
	fi
done

# we shift positions in $@ so that we discard all previous switches
# and leave only files as additional arguments
shift $((OPTIND-1))

# Only one package at a time!
if [ $# -gt 1 ]; then
	help_msg
	echo "ERROR: Too many arguments" >&2
	err
fi

# test if the package file is there
if [ ! -d $1 ] && [ ! -f $1 ] ; then
	arg=$1
	echo "ERROR: $arg not found" >&2
	err
fi

# test if the files is actually a directory...
if [ -d $1 ]; then
	STARTDIR=`readlink -f $1`/
else
	# ... or has the .tgz|tbz|tlz extension (is a package file)
	if [ ! `echo $1 | grep "\.t[gblx]z$\|\.tbr"` ]; then
		arg=$1
		echo "ERROR: $arg is not a package file or a directory!" >&2
		err
	fi
fi

# Cleanup any leftover files before really doing anything
cleanup

# Create temp dir and extract package
mkdir -p /tmp/depfinder.$$
ls /var/log/packages > /tmp/depfinder.$$/pkglist.log

# Extract only if argument is not a directory
if [ `echo $1 | grep "\.t[gb]z$"` ]; then
	[ -z $STARTDIR ] && tar xf $1 -C /tmp/depfinder.$$
elif [ `echo $1 | grep "\.tlz$"` ]; then
	lzma -d -c $1 | tar xf - -C /tmp/depfinder.$$
elif [ `echo $1 | grep "\.txz$"` ]; then
	xz -d -c $1 | tar xf - -C /tmp/depfinder.$$
elif [ `echo $1 | grep "\.tbr$"` ]; then
	bro --decompress --input $1 | tar xf - -C /tmp/depfinder.$$
fi
cd /tmp/depfinder.$$

PKGNAME=`basename $1 | sed "s/\.t[gblx]z\|\.tbr\|\/$//"`

# Find all binaries and libs in the package
[ -z $STARTDIR ] || cd $STARTDIR
touch /tmp/depfinder.$$/FILES
if [ $CHECKALLFILES -eq 0 ]; then
	# usually only in standard locations
	find \
	{sbin,usr/sbin,usr/bin,bin,usr/lib${LIBDIRSUFFIX},lib${LIBDIRSUFFIX},usr/libexec} \
	-type f -exec bash -c \
	"file {} | grep -q ELF && echo "{}" >> /tmp/depfinder.$$/FILES" \; 2> /dev/null
else
	# or anywhere in the package if -a is specified
	find ./ \
	-type f -exec bash -c \
	"file {} | grep -q ELF && echo "{}" >> /tmp/depfinder.$$/FILES" \; 2> /dev/null
fi

# Look for python dependencies
if [ $PYTHONDEPS -eq 1 ]; then
	# Determine in which directories python looks for modules
	$PYTHON -c "import sys; print(sys.path)" | \
	sed "s/'\|,\|\[\|\]//g" | \
	sed "s/ /\n/g" > /tmp/depfinder.$$/PYTHONDIRS

	# Find all python scripts in the package
	touch /tmp/depfinder.$$/PYTHONFILES
	findpython >> /tmp/depfinder.$$/PYTHONFILES

	# Determine which python modules are imported (used)
	touch /tmp/depfinder.$$/PYTHONMODULESRAW
	for i in `cat /tmp/depfinder.$$/PYTHONFILES`; do
		parsepython $i >> /tmp/depfinder.$$/PYTHONMODULESRAW
	done

	# Sort and clean modules list
	cat -A /tmp/depfinder.$$/PYTHONMODULESRAW | \
	sort | sed 's/\^M\$\|\$//' | \
	uniq > /tmp/depfinder.$$/PYTHONMODULESSORTED
	
	# Find which modules are included in the package and create a
	# list with the ones that are not
	touch /tmp/depfinder.$$/PYTHONMODULES
	for i in `cat /tmp/depfinder.$$/PYTHONMODULESSORTED`; do
		FOUND=`find ./ -type f | grep "/$i\.py$\|/$i/__init__\.py$\|$\.so"`
		if [[ ! "$FOUND" ]]; then
			echo "$i" >> /tmp/depfinder.$$/PYTHONMODULES
		fi
	done

	# Remove python built-in modules from the list
	$PYTHON -c "import sys; print(sys.builtin_module_names)" | \
	sed "s/'\|,\|(\|)//g" > /tmp/depfinder.$$/PYTHONBUILTIN
	for i in `cat /tmp/depfinder.$$/PYTHONBUILTIN`; do
		sed -i "/$i/d" /tmp/depfinder.$$/PYTHONMODULES
	done

	# Find the specific python files that provide the needed python modules
	touch /tmp/depfinder.$$/PYTHONNEEDEDMODULES
	for dir in `cat /tmp/depfinder.$$/PYTHONDIRS`; do
		if [ -d $dir ]; then
			for i in `cat /tmp/depfinder.$$/PYTHONMODULES`; do
				if [ -f $dir/$i.py ]; then
					echo "$dir/$i.py" \
					>> /tmp/depfinder.$$/PYTHONNEEDEDMODULES
					sed -i "/^${i}$/d" /tmp/depfinder.$$/PYTHONMODULES
				elif [ -f $dir/$i/__init__.py ]; then
					echo "$dir/$i/__init__.py" \
					>> /tmp/depfinder.$$/PYTHONNEEDEDMODULES
					sed -i "/^${i}$/d" /tmp/depfinder.$$/PYTHONMODULES
				elif [ -f $dir/$i.so ]; then
					echo "$dir/$i.so" \
					>> /tmp/depfinder.$$/PYTHONNEEDEDMODULES
					sed -i "/^${i}$/d" /tmp/depfinder.$$/PYTHONMODULES
				fi		
			done
		fi
	done
	# Display a warning for python modules that cannot be located in
	# the system. These are not errors because the modules could be
	# optional.
	for i in `cat /tmp/depfinder.$$/PYTHONMODULES`; do
		echo "WARNING: python module $i not found." >&2
	done
fi

cd /tmp/depfinder.$$

# Dump the dependencies to a file
touch LIBSM
for i in `cat FILES`; do
	LANG=C ldd ${STARTDIR}${i} 2> /dev/null \
	| grep "=>" \
	| sed "s/\(.*\) => \/\(.*\) \(.*\)/\2/" \
	| sed "s/=> not found/NOT FOUND/" \
        | grep -v "=>" >> LIBSM
done

# linux-gate.so.1 is always a dead-end so we don't look for that at all
sort LIBSM | uniq | grep -v "linux-gate\.so" > LIBS

# Find which libraries are included in the package
find $STARTDIR | grep "\.so" | \
sed "s/\(.*\)\/\(.*\)\.so\(.*\)/\2\.so/" >> INCLUDED

# Exclude the included libs from the search list
if [ -f INCLUDED ]; then
	for i in `cat INCLUDED`; do 
		sed -i "/^$i$/d" LIBS
		sed -i "/\t$i NOT FOUND/d" LIBS
	done
fi

# Check if any dependencies are not found and bail out if they are
if [[ `grep "NOT FOUND" LIBS` ]]; then
	echo "ERROR: package requires libraries not found in the system:" >&2
	grep "NOT FOUND" LIBS |\
	sed "s/NOT FOUND//" |\
	sed "s/\t/ /" >&2
	if [ "x$DEPFINDERNOEXIT" != "x1" ]; then
		err
	fi
fi

# Append the list of python modules to the list of libraries that are
# external dependencies
if [ $PYTHONDEPS -eq 1 ]; then
	cat PYTHONNEEDEDMODULES >> LIBS
fi

# Some files in /usr/lib have nasty paths like /usr/lib/../lib/
# as reported by ldd
sed -i "s|lib${LIBDIRSUFFIX}/\.\./||" LIBS

# Get the total number of libraries in the package 
TOTALLINES=`wc -l LIBS | sed "s/\(.*\) \(.*\)/\1/"`

# The number of jobs can not exceed the number of libraries present in
# the package and cannot be 0 either
[ $JOBS -gt $TOTALLINES ] && JOBS=$TOTALLINES
[ $TOTALLINES -eq 0 ] && JOBS=1

# Maximum file number should be $JOBS-1, split starts counting from 0
let MAXFILENO=$JOBS-1

# Split the libraries list in parts for running multiple jobs
let LINES=($TOTALLINES/$JOBS)+1
split -d -l $LINES LIBS LIBS

# Find the dependencies (run multiple jobs if -j # is set)
for i in $( seq -f "%02.f" 0 $MAXFILENO ); do
	touch LIBS$i
	depfind $i &
done
wait

# Put all dependencies in a single file
touch DEPS
for i in $( seq -f "%02.f" 0 $MAXFILENO ); do
	cat DEPS$i >> DEPS
done

# Exclude the present package from the dependency list
sed -i "s/|${PKGNAME}//" DEPS
sed -i "s/${PKGNAME}|//" DEPS
sed -i "s/${PKGNAME}//" DEPS

if [ $VERSIONINFO -eq 1 ]; then
	sort DEPS | uniq | \
	sed ':a;s/\([^ \|]\+\)-\(\([^ \|-]\+\)-\([^ \|-]\+\)-\([^ \|-]\+\)\|?\)/\1 >= \2/g;ta' \
	> DEPSNAMES
	for i in `echo $ADD | sed "s/,/ /g"`; do
		DEPVER=`ls /var/log/packages/$i-* | sed "s|/var/log/packages/$i-||" | grep "^[^\-]*\-[^\-]*\-[^\-]*$"`
		sed -i "s/^$i$/$i >= $DEPVER/" DEPSNAMES
	done
else
	sort DEPS | uniq | \
	sed ':a;s/\([^ \|]\+\)-\(\([^ \|-]\+\)-\([^ \|-]\+\)-\([^ \|-]\+\)\|?\)/\1/g;ta'  \
	> DEPSNAMES
fi

# Output dependencies list
if [ $VERSIONINFO -eq 1 ]; then
	# If requesting version info, output the deps to a
	# slack-required file or to stdout
	if [ $CREATEDEPFILE -eq 1 ]; then
		cp DEPSNAMES $CWD/slack-required
	else
		cat DEPSNAMES
	fi
else
	# If not requesting version info, output the deps to a
	# .dep file or to stdout
	if [ $CREATEDEPFILE -eq 1 ]; then
		BASENAME=`basename "$PKGNAME"`
		DEPFILENAME=`echo $BASENAME | sed "s/\.t[xgbl]z$\|\.tbr//"`
		cat DEPSNAMES | \
		tr "\n" " " | \
		sed "s/ /,/g" | \
		sed "s/,$/\n/" | \
		sed "s/^,//" > $CWD/$DEPFILENAME.dep
	else
		cat DEPSNAMES | \
		tr "\n" " " | \
		sed "s/ /,/g" | \
		sed "s/,$/\n/" | \
		sed "s/^,//"
	fi
fi

# Clean up before exit
cleanup

