/* dupedit 5.5
 *
 * This program becomes public domain at the start of year 2025 (UTC).
 * Until then:

 * Copyright 2009-2012 Andreas Nordal (andreas.nordal@gmail.com)
 *
 * This program is free software: you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation, either version 3 of
 * the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>
 */

#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <locale.h>
#include <errno.h>

#include <dirent.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#ifdef _WIN32
# include <windows.h>
# include <winbase.h>
#endif
#ifdef __linux__
# include <sys/resource.h>
#endif

#include <which.h>
#include "lang.h"

void set_max_open_files(){
#ifdef __linux__
	struct rlimit rlim;
	if(getrlimit(RLIMIT_NOFILE, &rlim)) goto fail;
	rlim.rlim_cur = rlim.rlim_max;
	if(setrlimit(RLIMIT_NOFILE, &rlim)) goto fail;
	return;

	fail:
	perror("getrlimit/setrlimit");
#endif
}

struct fil{
	struct fil *next;
	char *name;
	uintmax_t reading;
	union{
		struct{
			dev_t device;
			ino_t inode;
		} first;
		struct{
			FILE *fp;
			struct fil *link;
		} sist;
	} U;
};

struct execinfo{
	char *prog;	//which(args[0])
	char **envp;
	char **args;
	unsigned arglen;
};

struct global_settings{
	unsigned unhide_files:1;
	unsigned dupcount;	//Set by list(), read by main()
	unsigned long dupsize;	//Set by list(), read by main()
	struct execinfo exec;	//Set by main(), read by list()
} glob={0, 0, 0, {NULL, NULL, NULL, 0}};

void list(struct fil *start){
	unsigned long long filesize;
	if(start->U.sist.fp) filesize = ftell(start->U.sist.fp);
	else                 filesize = start->reading;
	static unsigned eachgroup=0;
	printf("\n-- #%u -- %llu B --\n", eachgroup, filesize);
	unsigned eachfile=0, argtot=0;
	struct fil *group = start;
	do{
		if(group->U.sist.fp) fclose(group->U.sist.fp);
		struct fil *links = group;
		_Bool haslinks = (_Bool) group->U.sist.link;
		group = group->next;
		unsigned eachlink=0;
		do{
			printf("%x.%x", eachgroup, eachfile);
			if(haslinks) printf(".%x", eachlink);
			putchar('\t');
			puts(links->name);

			struct fil *prev = links;
			links = links->U.sist.link;
			if(glob.exec.prog == NULL) free(prev);
			eachlink++;
		}while(links);
		argtot += eachlink;
		eachfile++;
	}while(group);
	glob.dupcount += argtot;
	glob.dupsize += (eachfile-1) * filesize;
	eachgroup++;

	if(glob.exec.prog){
		char *args[glob.exec.arglen + argtot + 1];
		args[0] = glob.exec.prog;
		unsigned i;
		for(i=1; i != glob.exec.arglen; i++){
			args[i] = glob.exec.args[i];
		}
		group = start;
		do{
			struct fil *links = group;
			group = group->next;
			do{
				args[i++] = links->name;
				struct fil *prev = links;
				links = links->U.sist.link;
			}while(links);
		}while(group);
		args[i] = NULL;

		int ret = spawnwait(args, glob.exec.envp);
		if(ret < 0){
			fprintf(stderr, "%s: %s\n", glob.exec.prog, strerror(-ret));
		}else if(ret){
			fprintf(stderr, EXEC_RETURNED, glob.exec.args[0], ret);
		}

		do{
			struct fil *links = start;
			start = start->next;
			do{
				struct fil *prev = links;
				links = links->U.sist.link;
				free(prev);
			}while(links);
		}while(start);
	}
}

void destroy(struct fil *this){
	if(this->U.sist.link){
		if(this->U.sist.fp) fseek(this->U.sist.fp, 0, SEEK_END);
		list(this);
	}else{
		if(this->U.sist.fp) fclose(this->U.sist.fp);
		free(this);
	}
}

void groupByContent(struct fil *old_start){
	if(!old_start) return;
	if(!(old_start->next)){
		destroy(old_start);
		return;
	}
	struct fil *new_start, *old_prev, *cur;
	for(;;){//Read through files's contents
		cur = old_start;
		new_start = NULL;
		old_prev = old_start;
		while(cur = cur->next){
			if(cur->reading == old_start->reading){
				//cur remains in old list
				old_prev = cur;
				fread((void*) &(cur->reading),
				1, sizeof(intmax_t), cur->U.sist.fp);
			}else{
				//move cur to new list
				old_prev->next = cur->next;
				cur->next = new_start;
				new_start = cur;
				cur = old_prev;
			}
		}
		//Old list now contains identical files
		// (up to (current file position)-1 ).
		//The rest is in new list, which might be empty.
		if(new_start){
			if(new_start->next && old_start->next){
				//Outsource new list
				groupByContent(new_start);
				//This could be done in a separate
				// thread, but reading fewer files
				// at once is probably better.
			}else if(new_start->next){
				destroy(old_start);
				old_start = new_start;
				continue;
			}else if(old_start->next){
				destroy(new_start);
			}else{
				//If either list contains only one
				// file, discard it and do nothing
				// more with it.
				destroy(new_start);
				destroy(old_start);
				break;
			}
		}
		//Stop at files' end
		if(! fread((void*) &(old_start->reading),
		1, sizeof(intmax_t), old_start->U.sist.fp)){
			list(old_start);
			break;
		}
	}
}

void branchLinks(struct fil **start){
	/* Relink nodes representing sym- and hardlinks to
	 * the "link" pointer of the first of those nodes.
	 * Purpose: Let groupByContent() skip all but one.
	 * Sym- and hardlinks are recognised by all having
	 * the same device-id and inode as given by stat()
	 */
	struct fil *comparand = *start;
	struct fil **prevfopen = start;
	do{
		dev_t dev = comparand->U.first.device;
		ino_t ino = comparand->U.first.inode;
		//U is a union: This is where sist takes over
		// from first, in nodes iterated by comparand.
		struct fil *comparator = comparand;
		struct fil *prev       = comparand;
		struct fil *links      = comparand;
		while(comparator=comparator->next){
			if(comparator->U.first.inode  == ino
			&& comparator->U.first.device == dev){
				prev->next = comparator->next;
				links->U.sist.link = comparator;
				links = comparator;
			}else{
				prev = comparator;
			}
		}
		links->U.sist.link = NULL;

		comparand->U.sist.fp = fopen(comparand->name, "rb");
		if(comparand->U.sist.fp){
			comparand->reading = 0; //Hvorfor?
			fread((void*) &(comparand->reading),
			1, sizeof(intmax_t), comparand->U.sist.fp);
			prevfopen = &(comparand->next);
			comparand = comparand->next;
		}else{
			perror(comparand->name);
			*prevfopen = comparand->next;
			comparand->next = NULL;
			destroy(comparand);
			comparand = *prevfopen;
		}
	}while(comparand);
}

void groupBySize(struct fil *old_start){
	struct fil *new_start, *old_prev, *cur;
	sortOld_start:
	new_start = NULL;
	cur = old_start;
	old_prev = old_start;
	while(cur=cur->next){
		if(cur->reading == old_start->reading){
			//cur remains in old list
			old_prev = cur;
		}else{  //different filesize
			//move cur to new list
			old_prev->next = cur->next;
			cur->next = new_start;
			new_start = cur;
			cur = old_prev;
		}
	}
	if(new_start){
		if(new_start->next && old_start->next){
			branchLinks(&old_start);
			groupByContent(old_start);
			old_start = new_start;
			goto sortOld_start;
		}else if(new_start->next){//The common case
			free(old_start);
			old_start = new_start;
			goto sortOld_start;
		}else if(old_start->next){
			free(new_start);
			branchLinks(&old_start);
			groupByContent(old_start);
		}else{
			free(new_start);
			free(old_start);
		}
	}else{
		branchLinks(&old_start);
		groupByContent(old_start);
	}
}

struct fil *dirdig(char *dirname, struct fil *list, int depth){
	DIR *dp = opendir(dirname);
	if(!dp) return;
	depth--;//0 becomes more than anyone would need
	struct dirent *dirinfo;
	int len_dirname_p2 = strlen(dirname);
	if(!(dirname[0]=='.' && dirname[1]=='\0')) len_dirname_p2 += 2;

	while(dirinfo = readdir(dp)){
		char *basename = dirinfo->d_name;
		if(basename[0] == '.'){
			if(glob.unhide_files){
				if(basename[1] == '\0') continue;
				if(basename[1] == '.'
				&& basename[2] == '\0') continue;
			}else{
				continue;
			}
		}

		struct fil *nyfil;
		//2 mallocs in 1:
		nyfil = malloc(sizeof(struct fil) + len_dirname_p2+strlen(basename));
		if(!nyfil){
			perror(basename);
			break;
		}
		nyfil->name = ((char*) nyfil) + sizeof(struct fil);
		if(dirname[0]=='.' && dirname[1]=='\0'){
			strcpy(nyfil->name, basename);
		}else{
			sprintf(nyfil->name, "%s/%s", dirname, basename);
		}
#ifdef _WIN32
		HANDLE handletur = CreateFile(
			nyfil->name,
			0,
			FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
			NULL,
			OPEN_EXISTING,
			FILE_FLAG_BACKUP_SEMANTICS,
			NULL
		);
		BY_HANDLE_FILE_INFORMATION info;
		BOOL ok = GetFileInformationByHandle(handletur, &info);
		CloseHandle(handletur);
		if(!ok){
			char *msg;
			FormatMessage(
				FORMAT_MESSAGE_ALLOCATE_BUFFER |
				FORMAT_MESSAGE_FROM_SYSTEM |
				FORMAT_MESSAGE_IGNORE_INSERTS,
				NULL,
				GetLastError(),
				0,
				(LPTSTR) &msg,
				0,
				NULL
			);
			fprintf(stderr, "%s: %s\n", nyfil->name, msg);
			LocalFree(msg);
			free(nyfil);
			continue;
		}
#else
		struct stat info;
		if(stat(nyfil->name, &info) == -1){
			perror(nyfil->name);
			free(nyfil);
			continue;
		}
#endif

#ifdef _WIN32
		if(info.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
#else
		if(S_ISDIR(info.st_mode))
#endif
		{
			if(depth){
				list = dirdig(nyfil->name, list, depth);
			}
			free(nyfil);
		}else{
#ifdef _WIN32
			nyfil->reading = (uintmax_t)info.nFileSizeHigh << 32 | info.nFileSizeLow;
			nyfil->U.first.device = info.dwVolumeSerialNumber;
			nyfil->U.first.inode = (uintmax_t)info.nFileIndexHigh << 32 | info.nFileIndexLow;
#else
			nyfil->reading = (uintmax_t)info.st_size;
			nyfil->U.first.device = info.st_dev;
			nyfil->U.first.inode = info.st_ino;
#endif
			nyfil->next = list;
			list = nyfil;
		}
	}
	closedir(dp);
	return list;
}

int exec_init(char **args){
	unsigned len = 0;
	while(args[len] && !(args[len][0] == '%' && args[len][1] == '\0')) len++;
	if(len == 0) goto forget_it;
	if(args[len] == NULL){
		fputs(MISTERM_EXEC, stderr);
		goto fail;
	}
	glob.exec.prog = which(args[0]);
	if(!(glob.exec.prog)){
		perror(args[0]);
		goto fail;
	}

	forget_it:
	return len;

	fail:
	exit(EXIT_FAILURE);
}

int main(int argc, char *argv[], char *envp[]){
	setlocale(LC_ALL, "");
	glob.exec.envp=envp;
	glob.exec.prog=NULL;
	unsigned depth=~0;
	unsigned i = 1;
	while(i < argc && argv[i][0] == '-'){
		char *opt = strtok(argv[i], "-=");
		i++;
		if(!opt)
			break;
		else if(!strcmp(opt, "help"))
			puts(HELP);
		else if(!strcmp(opt, "version"))
			puts("dupedit 5.5");
		else if(!strcmp(opt, "all"))
			glob.unhide_files = 1;
		else if(!strcmp(opt, "exec")){
			glob.exec.args = argv + i;
			glob.exec.arglen = exec_init(argv + i);
			i += 1 + glob.exec.arglen;
		}
		else if(!strcmp(opt, "depth"))
			sscanf(strtok(NULL, ""), "%d", &depth);
	}
	set_max_open_files();
	struct fil *cur=NULL;
	while(i < argc){
#ifdef _WIN32
		HANDLE handletur = CreateFile(
			argv[i],
			0,
			FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
			NULL,
			OPEN_EXISTING,
			FILE_FLAG_BACKUP_SEMANTICS,
			NULL
		);
		BY_HANDLE_FILE_INFORMATION info;
		BOOL ok = 0;
		if(handletur != INVALID_HANDLE_VALUE){
			ok = GetFileInformationByHandle(handletur, &info);
			CloseHandle(handletur);
		}
		if(!ok){
			char *msg;
			FormatMessage(
				FORMAT_MESSAGE_ALLOCATE_BUFFER |
				FORMAT_MESSAGE_FROM_SYSTEM |
				FORMAT_MESSAGE_IGNORE_INSERTS,
				NULL,
				GetLastError(),
				0,
				(LPTSTR) &msg,
				0,
				NULL
			);
			fprintf(stderr, "%s: %s\n", argv[i], msg);
			LocalFree(msg);
		}else if(info.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
#else
		struct stat info;
		if(stat(argv[i], &info)){
			perror(argv[i]);
		}else if(S_ISDIR(info.st_mode))
#endif
		{
			if(depth){
				register char *dirend = argv[i];
				while(*(++dirend));
				dirend--;
				if(*dirend == '/') *dirend = '\0';
				cur = dirdig(argv[i], cur, depth);
			}
		}else{
			struct fil *nyfil;
			nyfil = malloc(sizeof(struct fil));
			if(!nyfil){
				perror(argv[i]);
				break;
			}
			nyfil->name = argv[i];
#ifdef _WIN32
			nyfil->reading = (uintmax_t)info.nFileSizeHigh << 32 | info.nFileSizeLow;
			nyfil->U.first.device = info.dwVolumeSerialNumber;
			nyfil->U.first.inode = (uintmax_t)info.nFileIndexHigh << 32 | info.nFileIndexLow;
#else
			nyfil->reading = (uintmax_t)info.st_size;
			nyfil->U.first.device = info.st_dev;
			nyfil->U.first.inode = info.st_ino;
#endif
			nyfil->next = cur;
			cur = nyfil;
		}
		i++;
	}
	if(cur){
		if(cur->next){
			groupBySize(cur);
			if(glob.exec.prog){
				free(glob.exec.prog);
			}
		}else{
			free(cur);
		}
		printf(OPPSUM, glob.dupcount, glob.dupsize);
	}
	return EXIT_SUCCESS;
}
