/***************************************************************************/
/* 		This code is part of Nscache - viewer of Netscape(tm)	   */
/*		browsers disk cache					   */
/*		Copyright (c) 1999,2000 Ondrejicka Stefan		   */
/*		(ondrej@idata.sk)					   */
/*		created 2008 by Harald Foerster				   */
/*		(harald_foerster@users.sourceforge.net)			   */
/*		Distributed under GPL 2 or later			   */
/***************************************************************************/

#include <ctype.h>
#include <string.h>

#include "utf8-tools.h"

/*
#include <glib.h>
#include <unicode.h>
#include "charconv.h"
#include "nls.h"
*/

#ifndef PRE_INCREMENT
#ifdef __powerpc__
#define PRE_INCREMENT		1
#endif
#endif

/*
	From RFC 2044:
	--------------

	UCS-4 range (hex.)	UTF-8 octet sequence (binary)
	0000 0000-0000 007F	0xxxxxxx
	0000 0080-0000 07FF	110xxxxx 10xxxxxx
	0000 0800-0000 FFFF	1110xxxx 10xxxxxx 10xxxxxx
	0001 0000-001F FFFF	11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
	0020 0000-03FF FFFF	111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
	0400 0000-7FFF FFFF	1111110x 10xxxxxx ... 10xxxxxx
*/

#if GLIB_MAJOR_VERSION == 1

#ifdef HAVE_UNICODE

/* for unicode_init() see 'charconv.c' */

#define UNICODE_CHAR_T			unicode_char_t

#define UTF8_TOOLS_FROM_UNICHAR(c, p)	utf8tl_from_unichar((c), (p))
#define UTF8_TOOLS_GET_CHAR(p, c)	(unicode_get_utf8((p), &(c)) != NULL)
#define UTF8_TOOLS_ISALNUM(c)		unicode_isalnum(c)
#define UTF8_TOOLS_ISPRINT(c)		unicode_isprint(c)
#define UTF8_TOOLS_TOLOWER(c)		unicode_tolower(c)

#endif /* HAVE_UNICODE */

#else /* GLIB_MAJOR_VERSION > 1 */

#define UNICODE_CHAR_T			gunichar

#define UTF8_TOOLS_FROM_UNICHAR(c, p)	g_unichar_to_utf8((c), (p))
#define UTF8_TOOLS_GET_CHAR(p, c)	((c) = g_utf8_get_char_validated((p), -1), \
					((c) != (gunichar) -1 && (c) != (gunichar) -2))
#define UTF8_TOOLS_ISALNUM(c)		g_unichar_isalnum(c)
#define UTF8_TOOLS_ISPRINT(c)		g_unichar_isprint(c)
#define UTF8_TOOLS_TOLOWER(c)		g_unichar_tolower(c)

#endif /* #if GLIB_MAJOR_VERSION == 1 .. #else */

/* tools.c */
extern char *tl_mem_find_str(const char *mem, int mlen, const char *str, int slen);

gchar *utf8tl_strstr_igncase(const gchar *str, gint slen,
					const gchar *pattern, gint plen)
{
	gchar *start, *found;

	if (slen < 0)
	{
		slen = strlen(str);
	}

	start = utf8tl_strndup_to_lower(str, slen);
	found = tl_mem_find_str(start, slen, pattern, plen);

	if (found == NULL)
	{
		str = NULL;
	}

	else
	{
		str += found - start;
	}

	g_free(start);

	return (gchar *) str;

} /* gchar *utf8tl_strstr_igncase(const gchar*, gint, const gchar*, gint) */

#if GLIB_MAJOR_VERSION == 1 && ! defined(HAVE_UNICODE)

gchar *utf8tl_find_prev_char(const gchar *beg, const gchar *p)
{
	while (p > beg)
	{
		guint c = *(--p);

		if ((c & 0xc0) != 0x80)
		{
			return (gchar *) p;
		}
	}

	return NULL;

} /* gchar *utf8tl_find_prev_char(const gchar*, const gchar*) */

gchar *utf8tl_find_next_char(const gchar *p, const gchar *end)
{
	while (end > p)
	{
		guint c = *(++p);

		if ((c & 0xc0) != 0x80)
		{
			return (gchar *) p;
		}
	}

	return NULL;

} /* gchar *utf8tl_find_next_char(const gchar*, const gchar*) */

gint utf8tl_is_alnum_str(const gchar *beg, const gchar *str, gint slen)
{
	guint c;

	if (str > beg)
	{
		gchar *p = utf8tl_find_prev_char(beg, str);

		if (p == NULL)
		{
			return FALSE;
		}

		c = p[0];

		if (c > 0x7f || isalnum(c))
		{
			return FALSE;
		}
	}

	c = str[slen];

	if (c > 0x7f || isalnum(c))
	{
		return FALSE;
	}

	return TRUE;

} /* gint utf8tl_is_alnum_str(const gchar*, const gchar*, gint) */

gchar *utf8tl_get_linestart(const gchar *beg, const gchar *p)
{
	const gchar *first;

	while (first = p, (p = (gchar *) utf8tl_find_prev_char(beg, p)) != NULL)
	{
		guint c = p[0];

		if (c < 0x80 && isprint(c) == FALSE)
		{
			return (gchar *) first;
		}
	}

	return (gchar *) beg;

} /* gchar *utf8tl_get_linestart(const gchar*, const gchar*) */

gchar *utf8tl_get_linestop(const gchar *p, const gchar *end)
{
	if (end > p)
	{
		do
		{
			guint c = *p;

			if (c < 0x80 && isprint(c) == FALSE)
			{
				return (gchar *) p;
			}

			p = (gchar *) utf8tl_find_next_char(p, end);
		}
		while (p != NULL);
	}

	return (gchar *) end;

} /* gchar *utf8tl_get_linestop(const gchar*, const gchar*) */

gchar *utf8tl_strndup_to_lower(const gchar *str, gint slen)
{
	char *ptr;

	if (slen < 0)
	{
		slen = strlen(str);
	}

	ptr = g_malloc(slen + 1);
	ptr[slen] = '\0';

	while (--slen >= 0)
	{
		unsigned int c = str[slen];

		if (c < 0x80)
		{
			c = tolower(c);
		}

		ptr[slen] = c;
	}

	return ptr;

} /* gchar *utf8tl_strndup_to_lower(const gchar*, gint) */

#else /* GLIB_MAJOR_VERSION > 1 || defined(HAVE_UNICODE) */

static gint utf8tl_from_unichar(UNICODE_CHAR_T c, gchar *p, gint slen)
{
	guint len;

	if (c < 0x80)
	{
		len = 1;
	}

	else if (c < 0x800)
	{
		len = 2;
	}

	else if (c < 0x10000)
	{
		len = 3;
	}

	else if (c < 0x200000)
	{
		len = 4;
	}

	else if (c < 0x4000000)
	{
		len = 5;
	}

	else
	{
		len = 6;
	}

	if (len > slen)
	{
		return -1;
	}

	if (p)
	{
		static const gint first[] =
			{0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};

		guint i = len;

		while (--i > 0)
		{
			p[i]   = (c & 0x3f) | 0x80;
			c    >>= 6;
		}

		p[0] = (first[len - 1] | c);
	}

	return len;

} /* static gint utf8tl_from_unichar(UNICODE_CHAR_T, gchar*, gint) */

gint utf8tl_is_alnum_str(const gchar *beg, const gchar *str, gint slen)
{
	UNICODE_CHAR_T c;

	if (str > beg)
	{
		gchar *p = UTF8_TOOLS_PREV_CHAR(beg, str);

		if (p == NULL)
		{
			return FALSE;
		}

		if (UTF8_TOOLS_GET_CHAR(p, c) == FALSE ||
					UTF8_TOOLS_ISALNUM(c))
		{
			return FALSE;
		}
	}

	if (UTF8_TOOLS_GET_CHAR(&str[slen], c) == FALSE ||
					UTF8_TOOLS_ISALNUM(c))
	{
		return FALSE;
	}

	return TRUE;

} /* gint utf8tl_is_alnum_str(const gchar*, const gchar*, gint) */

gchar *utf8tl_get_linestart(const gchar *beg, const gchar *p)
{
	const gchar *first;

#ifdef HAVE_UNICODE
	while ((p > beg) &&
		(first = p, (p = (gchar *) UTF8_TOOLS_PREV_CHAR(beg, p)) != NULL))
#else
	while (first = p, (p = (gchar *) UTF8_TOOLS_PREV_CHAR(beg, p)) != NULL)
#endif
	{
		UNICODE_CHAR_T c;

		if (UTF8_TOOLS_GET_CHAR(p, c) == FALSE ||
				UTF8_TOOLS_ISPRINT(c) == FALSE)
		{
			return (gchar *) first;
		}
	}

	return (gchar *) beg;

} /* gchar *utf8tl_get_linestart(const gchar*, const gchar*) */

gchar *utf8tl_get_linestop(const gchar *p, const gchar *end)
{
#ifdef HAVE_UNICODE
	while (p != NULL && end > p)
#else
	if (end > p) do
#endif
	{
		UNICODE_CHAR_T c;

		if (UTF8_TOOLS_GET_CHAR(p, c) == FALSE ||
				UTF8_TOOLS_ISPRINT(c) == FALSE)
		{
			return (gchar *) p;
		}

		p = UTF8_TOOLS_NEXT_CHAR(p, end);
	}
#ifndef HAVE_UNICODE
	while (p != NULL);
#endif

	return (gchar *) end;

} /* gchar *utf8tl_get_linestop(const gchar*, const gchar*) */

gchar *utf8tl_strndup_to_lower(const gchar *str, gint slen)
{
	gchar *p, *ptr, *stop;

	if (slen < 0)
	{
		slen = strlen(str);
	}

	p = ptr = g_malloc(slen + 1);
	stop = &p[slen];

	while (stop > p)
	{
		gint		i;
		UNICODE_CHAR_T	c;

#if GLIB_MAJOR_VERSION == 1
		if ((str = unicode_get_utf8(str, &c)) == NULL)
		{
			break;
		}
#else
		if (str == NULL || UTF8_TOOLS_GET_CHAR(str, c) == FALSE)
		{
			break;
		}

		str = UTF8_TOOLS_NEXT_CHAR(str, NULL);
#endif
		c = UTF8_TOOLS_TOLOWER(c);
		i = utf8tl_from_unichar(c, p, stop - p);

		if (i < 0)
		{
			break;
		}

		p += i;
	}

	*p = '\0';

	return ptr;

} /* gchar *utf8tl_strndup_to_lower(const gchar*, gint) */

#if defined(HAVE_UNICODE) || defined(PANGO_LOCALE)

gchar *utf8tl_replace_non_ascii_chars_in_str(gchar *str)
{

	guint  chr;
	guchar *from, *to;

#ifdef PRE_INCREMENT

	to = from = str - 1;

	while ((chr = *(++from)) != '\0')
	{
		if (chr > 0x7f)
		{
			from	= UTF8_TOOLS_NEXT_CHAR(from, NULL);
			chr	= '?';

			if (from == NULL)
			{
				to[2] = '\0';
				break;
			}

			from--;
		}

		*(++to) = chr;
	}

	*(++to) = chr;

#else /* ! PRE_INCREMENT */

	to = from = str;

	while ((chr = *(from++)) != '\0')
	{
		if (chr > 0x7f)
		{
			from	= UTF8_TOOLS_NEXT_CHAR(&from[-1], NULL);
			chr	= '?';

			if (from == NULL)
			{
				to[1] = '\0';
				break;
			}
		}

		*(to++) = chr;
	}

	*to = chr;

#endif /* #ifdef PRE_INCREMENT .. #else */

	return str;

} /* gchar *utf8tl_replace_non_ascii_chars_in_str(gchar*) */

#endif /* defined(HAVE_UNICODE) || defined(PANGO_LOCALE) */

#endif /* #if GLIB_MAJOR_VERSION == 1 && ! defined(HAVE_UNICODE) .. #else */

/* EOF */
