/*
 *
 *   Copyright (C) 2005 by Raymond Huang
 *   plushuang at users.sourceforge.net
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *
 *  ---
 *
 *  In addition, as a special exception, the copyright holders give
 *  permission to link the code of portions of this program with the
 *  OpenSSL library under certain conditions as described in each
 *  individual source file, and distribute linked combinations
 *  including the two.
 *  You must obey the GNU Lesser General Public License in all respects
 *  for all of the code used other than OpenSSL.  If you modify
 *  file(s) with this exception, you may extend this exception to your
 *  version of the file(s), but you are not obligated to do so.  If you
 *  do not wish to do so, delete this exception statement from your
 *  version.  If you delete this exception statement from all source
 *  files in the program, then also delete it here.
 *
 */

#include <stdio.h>
#include <string.h>
#include <urlglib/html_import.h>
#include <urlglib/urlglib_util.h>

HtmlImportElement* html_import_element_new (UrlFilter* filter, gchar* element_name, gchar* attr_name)
{
	int  len1, len2;
	HtmlImportElement* hie;

	len1 = strlen (element_name) + 1;
	len2 = strlen (attr_name) + 1;
	hie = g_malloc (sizeof (HtmlImportElement) + len1 + len2);

	url_filter_ref (filter);
	hie->filter = filter;
	hie->element_name = (char*)(hie + 1);
	hie->attr_name    = hie->element_name + len1;
	memcpy (hie->element_name, element_name, len1);
	memcpy (hie->attr_name, attr_name, len2);

	return hie;
}

void html_import_element_free (HtmlImportElement* hie)
{
	url_filter_unref (hie->filter);
	g_free (hie);
}

/*
static void html_import_start_element (GMarkupParseContext* context,
                                       const gchar*         element_name,
                                       const gchar**        attr_names,
                                       const gchar**        attr_values,
                                       HtmlImport*          hi,
                                       GError**             error)
*/
void html_import_start_element (HtmlImport*   hi,
                                const gchar*  element_name,
                                const gchar** attr_names,
                                const gchar** attr_values)
{
	GSList* node;
	gint    index;
	HtmlImportElement* hie = NULL;

	// check tag <base href="some.base.address">
	if (g_ascii_strcasecmp (element_name, "base")==0) {
		for (index=0; attr_names[index]; index++) {
			if (g_ascii_strcasecmp (attr_names[index], "href")==0 ) {
				g_free (hi->base_href);
				hi->base_href = g_strdup (attr_values[index]);
			}
		}
		return;
	}

	// find tag with element_list
	for (node=hi->element_list; node; node=node->next)
	{
		hie = node->data;
		if (g_ascii_strcasecmp (element_name, hie->element_name)==0)
		{
			hie = node->data;
			// check attr_name
			for (index=0; attr_names[index]; index++)
			{
				if (g_ascii_strcasecmp (attr_names[index], hie->attr_name)==0)
				{
					url_filter_add_item (hie->filter, (gchar*)attr_values[index], hi->base_href);
					break;
				}
			}
			break;
		}
	}
}

HtmlImport* html_import_new()
{
	HtmlImport* hi = g_malloc (sizeof (HtmlImport));

	hi->base_href = NULL;

	hi->name_array = g_ptr_array_sized_new (16);
	hi->value_array = g_ptr_array_sized_new (16);

	hi->element_list = NULL;
	hi->gstring = g_string_sized_new (80);

	return hi;
}

void html_import_free (HtmlImport* hi)
{
	GSList* node;

	g_free (hi->base_href);

	g_ptr_array_free (hi->name_array, TRUE);
	g_ptr_array_free (hi->value_array, TRUE);

	for (node=hi->element_list; node; node=node->next)
		html_import_element_free (node->data);

	g_slist_free (hi->element_list);
	g_string_free (hi->gstring, TRUE);
	g_free (hi);
}

void html_import_add_element (HtmlImport* html_import, UrlFilter* filter,
                              gchar* element_name, gchar* attr_name)
{
	HtmlImportElement* hie;

	hie = html_import_element_new (filter, element_name, attr_name);

	html_import->element_list = g_slist_append (html_import->element_list, hie);
}

void html_import_parse_tag (HtmlImport* hi, gchar* tag_str)
{
	gchar*   current      = tag_str;
	gchar*   element_name = NULL;
	gchar*   attr_name    = NULL;
	gchar*   attr_value   = NULL;
	gint     inside_level;
	gchar    inside_chr;

	g_ptr_array_set_size (hi->name_array, 0);
	g_ptr_array_set_size (hi->value_array, 0);

	// element
	while (*current) {
		if (*current == ' ') {
			*current++ = 0;
			element_name = tag_str;
			break;
		}
		current++;
	}

	if (element_name==NULL || *element_name==0)
		return;

	while (*current) {
		// skip space
		while (*current == ' ')
			current++;

		// attribute name
		attr_name  = current;
		while (*current) {
			if (*current == '=') {
				*current++ = 0;
				break;
			}
			else if (*current == ' ') {
				attr_name = NULL;
				break;
			}
			current++;
		}

		if (attr_name==NULL)
			continue;

		// attribute value
		attr_value = current;
		inside_level = 0;
		inside_chr   = 0;
		while (*current) {
			if (*current == '"' || *current == '\'') {
				if (inside_chr != *current) {
					if (inside_level==0)
						attr_value = current+1;
					inside_chr = *current;
					inside_level++;
				}
				else if (inside_chr == *current) {
					inside_chr = (inside_chr=='"') ? '\'' : '"';
					inside_level--;
				}

				if (current[1] == 0 || current[1] == ' ' || inside_level==0) {
					*current = 0;
					break;
				}
			}
			else if (*current == ' ' && inside_level == 0) {
				*current = 0;
				break;
			}
			current++;
		}
		// add attribute
		g_ptr_array_add (hi->name_array, attr_name);
		g_ptr_array_add (hi->value_array, attr_value);
	}
	g_ptr_array_add (hi->name_array, NULL);
	g_ptr_array_add (hi->value_array, NULL);

	// call
	html_import_start_element (hi, element_name,
	                           (const gchar**)hi->name_array->pdata,
	                           (const gchar**)hi->value_array->pdata);
}

gboolean html_import_read_file (HtmlImport* hi, const gchar* filename)
{
	FILE*    file;
	guint8  *buffer, *data_end, *current;

	guint    read_size;
	gboolean inside_tag = FALSE;
	GString* gstring;

	file = ug_fopen (filename, "r");

	if (file==NULL)
		return FALSE;

	buffer = g_malloc (4096);
	gstring = g_string_sized_new (4096);

	do {
		read_size = fread (buffer, 1, 4096, file);
		data_end = buffer + read_size;

		for (current=buffer; current!=data_end; current++) {
			switch (*current) {
			case '<':
				inside_tag = TRUE;
				g_string_truncate (gstring, 0);
				break;
			case '>':
				if (inside_tag)
					html_import_parse_tag (hi, gstring->str);
				inside_tag = FALSE;
				g_string_truncate (gstring, 0);
				break;
			case '\r':
			case '\n':
				// skip
				break;
			default:
				if (inside_tag)
					g_string_append_c (gstring, *current);
			}
			// check <tag> length
			if( gstring->len >= 4096 ) {
				inside_tag=FALSE;
				g_string_truncate(gstring, 0);
			}
		}
	} while (read_size);

	g_string_free (gstring, TRUE);
	g_free (buffer);

	fclose (file);

	return TRUE;
}

/*
gboolean html_import_read_file (HtmlImport* hi,
                                gchar* filename)
{
	GMarkupParseContext* parser_context;
	GMarkupParser        parser;

	FILE*    file;
	guint    size;
	guint8*  buffer;
	GError*  error = NULL;
	gboolean parse_ok = TRUE;

	parser.start_element = html_import_start_element;
	parser.end_element   = NULL;
	parser.text          = NULL;
	parser.passthrough   = NULL;
	parser.error         = NULL;
	parser_context = g_markup_parse_context_new (&parser, 0,
	                                             hi, NULL);

	file = ug_fopen (filename, "r");
	if (file==NULL)
		return FALSE;

	buffer = g_malloc (4096);

	while ( (size=fread (buffer, 1, 4096, file)) && parse_ok) {
		parse_ok = g_markup_parse_context_parse (parser_context,
		                                         buffer, size, &error);
	}

	if (error)
		g_error_free (error);

	g_markup_parse_context_end_parse (parser_context, NULL);
	g_markup_parse_context_free (parser_context);

	g_free (buffer);
	fclose (file);

	return parse_ok;
}
*/
