#define UNICODE

#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <io.h>
#include <direct.h>
#include <string.h>
#include <ctype.h>
#include <stdarg.h>

#include <string>
#include <list>
#include <map>
#include <algorithm>

#include <windows.h>

void logg_console (std::wstring s)
{
	DWORD wr;
	HANDLE hdl=GetStdHandle (STD_OUTPUT_HANDLE);

	assert (WriteConsole (hdl, s.c_str(), wcslen (s.c_str()), &wr, NULL)==TRUE);
};

void logg (std::wstring l_fname, std::wstring s)
{
	DWORD wr;
	HANDLE f_h=CreateFile (l_fname.c_str(), GENERIC_WRITE, 0, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
	int l=wcslen (s.c_str());
	char out_UTF8[10240];

	logg_console (s);

	if (f_h==INVALID_HANDLE_VALUE)
	{
		DWORD err=GetLastError();
		wprintf (L"unable to create file [%s], GetLastError: %08X\n", l_fname.c_str(), err);

		return;
	};
	
	SetFilePointer (f_h, 0, 0, FILE_END);
	assert (WideCharToMultiByte (CP_UTF8, 0, s.c_str(), -1, (char*)&out_UTF8[0], 10240, NULL, NULL)!=0);
	assert (WriteFile (f_h, out_UTF8, strlen (out_UTF8), &wr, NULL)==TRUE);
	assert (wr==strlen (out_UTF8));
	CloseHandle (f_h);
};

void phase1_loop (UINT64 fsize_limit, std::wstring start_dir, std::list<UINT64> & sizes, std::map<UINT64, std::list<std::wstring> > & sizes_list_of_files)
{
	WIN32_FIND_DATA ff;
	HANDLE hfile;

	if (SetCurrentDirectory (start_dir.c_str())==FALSE)
	{
		logg_console (L"cannot change directory to [" + start_dir + L"]\n");
		return;
	};

	logg_console (L"phase 1: ");

	if (start_dir.size()>60)
	{
		std::wstring t=L"XXX..." + start_dir.substr(start_dir.size()-57, start_dir.size());
		t[0]=start_dir[0];
		t[1]=start_dir[1];
		t[2]=start_dir[2];
		logg_console (t);
	}
	else
		logg_console (start_dir);

	if (start_dir.size()<60)
	{
		int i;

		for (i=0; i<60-start_dir.size()+3; i++)
			logg_console (L" ");
	};

	logg_console (L"\r");

	if ((hfile=FindFirstFile (L"*", &ff))==INVALID_HANDLE_VALUE)
		return;
	do
	{
		if (ff.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
		{
			if (ff.cFileName[0]!='.')
				phase1_loop (fsize_limit, start_dir + ff.cFileName + L"\\", sizes, sizes_list_of_files);
		}
		else
		{
			UINT64 sz=((UINT64)ff.nFileSizeHigh << 32) | (UINT64)ff.nFileSizeLow;
			if (sz>=fsize_limit)
			{
				sizes.push_front (sz);
				sizes_list_of_files[sz].push_front (start_dir + ff.cFileName);
			};
		};
	} while (FindNextFile (hfile, &ff)!=0);

	FindClose (hfile);
};

std::wstring number_with_commas (UINT64 in)
{
	UINT64 val=in;
	std::wstring rt;
	bool first_3=true;
	wchar_t tmp[128];

	// 7FFFFFFFFFFFFFFF =  9,223,372,036,854,775,807
	// FFFFFFFFFFFFFFFF = 18,...

	UINT64 divisor=1000000000000000000;

	for (;;)
	{
		if ((val/divisor)!=0)
		{
			if (first_3)
				_snwprintf (tmp, 128, L"%u", val/divisor);
			else
				_snwprintf (tmp, 128, L"%03u", val/divisor);

			rt+=tmp;

			if (divisor!=1) 
				rt+=L",";
			val-=(val/divisor)*divisor;
			first_3=false;
		};

		if (divisor==1)
			break;
		divisor/=1000;
	};

	return rt;
};

bool compare_files (std::wstring f1, std::wstring f2, UINT64 flen)
{
	// approx. 1 MiB
#define BUF_SIZE 0x100000

	HANDLE h1=CreateFile (f1.c_str(), GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
	HANDLE h2=CreateFile (f2.c_str(), GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
	BYTE *buf1=(BYTE *)malloc (BUF_SIZE);
	BYTE *buf2=(BYTE *)malloc (BUF_SIZE);
	DWORD red1, red2;
	UINT64 processed=0;
	bool rt=true;

	if (h1==INVALID_HANDLE_VALUE)
	{
		logg_console (L"cannot open file " + f1 + L"\n");
		return false;
	};

	if (h2==INVALID_HANDLE_VALUE)
	{
		logg_console (L"cannot open file " + f2 + L"\n");
		if (h1!=NULL)
			CloseHandle (h1);
		return false;
	};

	//wprintf ("compare files %s and %s\n", f1.c_str(), f2.c_str());

	do
	{
		assert (ReadFile (h1, buf1, BUF_SIZE, &red1, NULL)==TRUE);
		assert (ReadFile (h2, buf2, BUF_SIZE, &red2, NULL)==TRUE);

		assert (red1==red2);

		processed+=red1;

		wprintf (L"comparing... %3.0f%%\r", ((float)processed/(float)flen)*100);

		if (memcmp (buf1, buf2, red1)!=0)
		{
			rt=false;
			break;
		};
	}
	while (red1==BUF_SIZE);

	free (buf1);
	free (buf2);
	CloseHandle (h1);
	CloseHandle (h2);

	return rt;

#undef BUF_SIZE
};

bool find_identical_files_in_list (std::wstring l_fname, std::list<std::wstring> & in, UINT64 flen, std::wstring size_with_commas)
{
	// show which files are identical
	std::list<std::wstring>::iterator i;
	std::wstring first_file;
	bool was_deletion=true;
	bool first_file_printed=false;
	bool identical_files_found=false;

	if (in.size()<2)
		return false;

	i=in.begin();
	first_file=*i;
	in.erase (i);

	while (was_deletion)
	{
		was_deletion=false;
		for (i=in.begin(); i!=in.end(); i++)
		{
			if (compare_files (first_file, *i, flen))
			{
				identical_files_found=true;

				if (first_file_printed==false)
				{
					first_file_printed=true;
					logg (l_fname, L"\r\n");
					logg (l_fname, L"-- size=");
					logg (l_fname, size_with_commas.c_str());
					logg (l_fname, L"\r\n");
					logg (l_fname, first_file); logg (l_fname, L"\r\n");
				};

				logg (l_fname, *i); logg (l_fname, L"\r\n");
				in.erase (i);
				was_deletion=true;
				break;
			};
		};
	};
	return identical_files_found;
};

int __cdecl main(int argc, char** argv)
{
	bool		to_scan[256];
	UINT64		fsize_limit;
	std::list<UINT64>	sizes;
	std::map<UINT64, std::list<std::wstring> >	sizes_list_of_files;
	bool found_something=false;
	wchar_t *cur_dir;
	std::wstring l_fname;

	fsize_limit=1000000;

	_unlink ("dupes_locator.log");

	wchar_t * p_n=L"dupes_locator";

	wprintf (L"%s / <dennis@conus.info> / 25-jan-2009\n", p_n);
	wprintf (L"\n");
	wprintf (L"usage:\n");
	wprintf (L"  %s <list of drives to scan> <minimal file size in bytes>\n", p_n);
	wprintf (L"\n");
	wprintf (L"if drives list is not mentioned, only local fixed disks will be scanned\n");
	wprintf (L"default minimal file size (if not mentioned) is one megabyte\n");
	wprintf (L"\n");
	wprintf (L"examples:\n");
	wprintf (L"  %s\n", p_n);
	wprintf (L"  %s C\n", p_n);
	wprintf (L"  %s CDE\n", p_n);
	wprintf (L"  %s F 100000\n", p_n);
	wprintf (L"\n");

	if (argv[1]!=NULL)
		if (argv[2]!=NULL)
			fsize_limit=atoi (argv[2]);

	if (fsize_limit<0)
		fsize_limit=0;

	wprintf (L"assuming minimal file size: %I64d bytes\n", fsize_limit);

	memset (to_scan, false, 256);

	if (argv[1]!=NULL)
	{
		char *s=argv[1];

		while (*s)
			if (isalpha (*s++))
				to_scan[toupper (*(s-1))]=true;
	}
	else
	{
		for (char q='A'; q<='Z'; q++)
		{
			wchar_t pt[5];
			swprintf (pt, L"%c:\\", q);
			if (GetDriveType (pt)==DRIVE_FIXED)
				to_scan[q]=true;
		};
	};

	wprintf (L"we will scan these drives: ");

	for (int i=0;i<256;i++)
		if (to_scan[i])
			wprintf (L"%c: ", i);

	wprintf (L"\n");

	{
		int l=GetCurrentDirectory (0, NULL); // results is in BYTES
		cur_dir=(wchar_t*)malloc (l*sizeof (wchar_t));
		GetCurrentDirectory (l, cur_dir);
		l_fname=std::wstring (cur_dir) + std::wstring (L"\\dupes_locator.log");
	};

	for (int i=0;i<256;i++)
		if (to_scan[i]==true)
		{
			std::wstring path=L"X:\\";
			path[0]=i;
			phase1_loop(fsize_limit, path, sizes, sizes_list_of_files);
			//              01234567890123456789012345678901234567890123456789012345678901234567890123456789
			logg_console (L"                                                                              \n");
		};      

	if (sizes.size()==0)
	{
		wprintf (L"no files at all\n");
		return 0;
	};

	sizes.sort();
	sizes.unique();

	std::reverse (sizes.begin(), sizes.end());

	logg_console (L"\n");

	for (std::list<UINT64>::iterator it=sizes.begin(); it!=sizes.end(); it++)
	{
		UINT64 sz=*it;
		while (sizes_list_of_files[sz].size()>1)
			while (find_identical_files_in_list (l_fname, sizes_list_of_files[sz], sz, number_with_commas (sz))==true)
				found_something=true;
	};

	assert (SetCurrentDirectory (cur_dir)==TRUE);

	if (found_something==false)
	{
		logg_console (L"nothing found\n");
		_unlink ("dupes_locator.log");
	};

	logg_console (L"See dupes_locator.log file for log (UTF8)\n");

	return 0;
};
