/*
 *  Copyright (C) 2002 Marco Pesenti Gritti
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2, or (at your option)
 *  any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */

/* Galeon includes */
#include "misc_string.h"
#include "prefs.h"
#include "eel-gconf-extensions.h"
#include "mozilla.h"

/* system includes */
#include <string.h>
#include <glib.h>
#include <libgnome/libgnome.h>
#include <libgnome/gnome-i18n.h>
#include <libgnomevfs/gnome-vfs-mime.h>
#include <gnome-xml/parser.h>
#include <gnome-xml/tree.h>

/* local prototypes */
static gboolean misc_string_is_punctuation (gchar c);

/**
 * misc_string_free_array: frees array, a string array of the given size.
 */ 
void 
misc_string_free_array (char *array[], int size)
{
	int i;

	if (array)
	{
		for (i = 0; i < size; i++)
			if (array[i])
				g_free (array[i]);
		g_free (array);
	}
}

/**
 * misc_string_new_num_accel: creates a string with a numbered/lettered
 * accel (caller must free). returns NULL if num is out of the range of
 * acceleratable nums/letters
 */
gchar *
misc_string_new_num_accel (gint num, gchar *text, gboolean lettersok)
{
	gchar *label = NULL;

	if (num < 9)
		label = g_strdup_printf	("_%i. %s", num+1, text);
	else if (num == 9)
		label = g_strdup_printf	("_%i. %s", 0, text);
	else if (num < 36 && lettersok)
		label = g_strdup_printf	("_%c. %s", 'a'+num-10, text);
	return label;
}

/**
 * misc_string_strip_uline_accel: strip the _ out of a string like
 * gtk_label_parse_uline would do.  caller is responsible for freeing the
 * returned string
 */
gchar *
misc_string_strip_uline_accel (const gchar *text)
{
	GString *out;
	const gchar *u, *cur = text;

	out = g_string_new (NULL);
	while ((u = strchr (cur, '_')))
	{
		if (*(u+1) == '_')
		{
			/* __ in the string is equal to _ in the output 
			 * so include the _ in the output, skip over the 
			 * second _ and continue scanning. */
			g_string_sprintfa (out, "%.*s", u - cur + 1, cur);
			cur = u + 2;
		} else {
			/* copy everything before the _ and skip over it */
			g_string_sprintfa (out, "%.*s", u - cur , cur);
			cur = u + 1;
			/* only one accel per string, so we are done now */
			break;
		}
	}
	if (cur && *cur)
	{
		/* attach rest of string */
		g_string_append (out, cur);
	}

	u = out->str;
	g_string_free (out, FALSE); /* don't free char data, caller must */
	return (gchar *)u;
}

/**
 * misc_string_escape_uline_accel: escape _'s in string such that the
 * gtk_label_parse_uline will display as normal.  caller is responsible for
 * freeing the returned string
 */
/* FIXME: utf8 */
gchar *
misc_string_escape_uline_accel (const gchar *text)
{
	GString *out;
	const gchar *u, *cur = text;

	out = g_string_new (NULL);
	while ((u = strchr (cur, '_')))
	{
		/* All we need to do is double every _, so include the _ in 
		 * the output, add another _, and continue scanning. */
		g_string_sprintfa (out, "%.*s_", u - cur + 1, cur);
		cur = u + 1;
	}
	if (cur && *cur)
	{
		/* attach rest of string */
		g_string_append (out, cur);
	}

	u = out->str;
	g_string_free (out, FALSE); /* don't free char data, caller must */
	return (gchar *)u;
}

/**
 * misc_string_is_punctuation: tests whether the given char is punctuation
 * or not.
 */
static gboolean
misc_string_is_punctuation (gchar c)
{
	return (c == ' ' || c == '.' || c == '!' || c == '|' ||
		c == ',' || c == ':' || c == ';');
}

/**
 * misc_string_shorten_name: try to shorten a page title (ideally to
 * target_length or less). The heurstics here seems to give pretty good
 * results even down to quite small lengths, generally remaining
 * comprehensible down to around six to eight characters.
 */
/* FIXME: utf8? */
gchar *
misc_string_shorten_name (const gchar *input_name, gint target_length)
{
	gint i, j, length;
	gchar *name;
	char c;

	if (input_name == NULL)
		return (g_strdup (_("Untitled")));
	
	/* copy and clean name */
	name = g_strdup (input_name);
	g_strstrip (name);
	length = strlen (name);

	/* look for some common prefixes -- should these be translated? */
	if (eel_gconf_get_boolean (CONF_TABS_TABBED_PREFIX))
	{
		/* prefixes that only come by themselves */
		if (strncasecmp (name, "index of ", 9) == 0)
		{
			length -= 9;
			memmove(name, name + 9, length + 1);
		}
		else if (strncasecmp (name, "re: ", 4) == 0)
		{
			length -= 4;
			memmove(name, name + 4, length + 1);
		}
		else if (strncasecmp (name, "fwd: ", 5) == 0)
		{
			length -= 5;
			memmove(name, name + 5, length + 1);
		}
		else if (strncasecmp (name, "www.", 4) == 0)
		{
			length -= 4;
			memmove(name, name + 4, length + 1);
		}
		else 
		{
			/* prefixes that can be followed by other
			 * prefixes */
			if (strncasecmp (name, "welcome to ", 11) == 0)
			{
				length -= 11;
				memmove(name, name + 11, length + 1);
			}

			/* prefixes that follow the ones in the
			 * previous group */
			if (strncasecmp (name, "a ", 2) == 0)
			{
				length -= 2;
				memmove(name, name + 2, length + 1);
			}
			else if (strncasecmp (name, "my ", 3) == 0)
			{
				length -= 3;
				memmove(name, name + 3, length + 1);
			}
			else if (strncasecmp (name, "the ", 4) == 0)
			{
				length -= 4;
				memmove(name, name + 4, length + 1);
			}
		}

		/* remove any leading whitespace */
		g_strchug (name);
	}

	/* check length */
	length = strlen (name);
	if (length <= target_length)
	{
		return name;
	}

	/* find in name the first occurence of one of
	 * several common separators, and set it to '\0' */
	if (eel_gconf_get_boolean (CONF_TABS_TABBED_SEPARATOR))
	{
		gchar *first;
		gchar *str;

		/* set first to initially point to the terminating '\0'
		 * character */
		first = name + sizeof (gchar) * strlen (name);

		/* search for various separators... we can now search
		 * for ": ", becuase because we have stripped "re:" and
		 * "fwd: " in an earlier test */
		str = strstr (name, " - ");
		if (str != NULL && str < first) first = str;
		str = strstr (name, " -- ");
		if (str != NULL && str < first) first = str;
		str = strstr (name, " | ");
		if (str != NULL && str < first) first = str;
		str = strstr (name, " || ");
		if (str != NULL && str < first) first = str;
		str = strstr (name, ": ");
		if (str != NULL && str < first) first = str;
		str = strstr (name, " :: ");
		if (str != NULL && str < first) first = str;

		*first = '\0';

		/* check length */
		g_strchomp (name);
		length = strlen (name);
		if (length <= target_length)
		{
			return name;
		}
	}

	/* getting tricky, try losing a few vowels */
	/* yes, this is very anglocentric -- I don't know of any strategies
	 * for other languages (particularly non-European) -- MattA */
	if (eel_gconf_get_boolean (CONF_TABS_TABBED_VOWEL))
	{
		for (i = length - 1, j = length - 1; i >= 0; i--)
		{
			c = name[i];
			/* keep the letter if the string is short enough
			 * already, if the letter is at the beginning of a
			 * word, or if it's not a lowercase vowel */
			if (length <= target_length || i == 0 ||
 			    (i > 0 && name[i - 1] == ' ') ||
 			    /* TRANSLATOR: Put in this string those characters
 			     that can be skipped without making a word totally
 			     irrecognizable. In most languages, those are the
 			     vowels (with and without accents) */
 			    !strchr (_("aeiouyAEIOUY"), c))
			{
				name[j] = c;
				j--;
			}
			else
			{
				length--;
			}
		}
		/* shift */
		memmove(name, name + j + 1, length + 1);

		/* short enough yet? */
		if (length <= target_length)
		{
			return name;
		}
	}

	/* argh -- try chopping out whole words */
	for (i = target_length; i > 0; i--)
	{
		if (misc_string_is_punctuation (name[i]))
		{
			while (misc_string_is_punctuation (name[i]))
			{
				i--;
			}
			/* this will do */
			name[i + 1] = '\0';
			g_strchomp (name);
			return name;
		}
	}

	/* just chop off and add ellipsis */
	for (i = 0; i < 3; i++)
	{
		/* don't overflow string length */
		if (name[target_length + i] == '\0')
			break;
		
		name[target_length + i] = '.';
	}
	
	/* terminate string */
	name[target_length + i] = '\0';
	
	/* return it */
	return name;
}

/**
 * misc_string_shorten: returns a newly allocated shortened version of str.
 * the new string will be no longer than target_length characters, and will
 * be of the form "http://blahblah...blahblah.html".
 */
gchar *
misc_string_shorten (const gchar *str, gint target_length)
{
	gchar *new_str;
	gint actual_length, first_length, second_length;

	if (!str) return NULL;

	actual_length = strlen (str);

	/* if the string is already short enough, or if it's too short for
	 * us to shorten it, return a new copy */
	if (actual_length <= target_length ||
	    actual_length <= 3)
		return g_strdup (str);

	/* allocate new string */
	new_str = g_new (gchar, target_length + 1);

	/* calc lengths to take from beginning and ending of str */
	second_length = (target_length - 3) / 2;
	first_length = target_length - 3 - second_length;

	/* create string */
	strncpy (new_str, str, first_length);
	strncpy (new_str + first_length, "...", 3);
	strncpy (new_str + first_length + 3,
		 str + actual_length - second_length, second_length);
	new_str[target_length] = '\0';

	return new_str;
}

/**
 * misc_string_strcasestr: test if a string b is a substring of string a,
 * independent of case.
 */
const gchar *
misc_string_strcasestr (const gchar *a, const gchar *b)
{
	gchar *down_a;
	gchar *down_b;
	gchar *ptr;

	/* copy and lower case the strings */
	down_a = g_strdup (a);
	down_b = g_strdup (b);
	g_strdown (down_a);
	g_strdown (down_b);

	/* compare */
	ptr = strstr (down_a, down_b);

	/* free allocated strings */
	g_free (down_a);
	g_free (down_b);
	
	/* return result of comparison */
	return ptr == NULL ? NULL : (a + (ptr - down_a));
}

/**
 * misc_string_strdup_replace: returns a new string (which must be freed
 * later) identical to str, but with all occurrences of a replaced by b.
 * if any of the given strings are null, or if a is empty, null is
 * returned. */
gchar *
misc_string_strdup_replace (const gchar *str, const gchar *a, const gchar *b)
{
	gchar *new_str, *start_str, *end_str, *dest_str;
	gint str_len, a_len, b_len, new_str_len;

	/* return if any of the strings are null, or if a is empty */
	if (!str || !a || !b || a[0] == '\0') return NULL;

	/* get the lengths of the user-supplied strings */
	str_len = new_str_len = strlen (str);
	a_len = strlen (a);
	b_len = strlen (b);

	/* find the length of the new string */
	start_str = (gchar *) str;
	while ((start_str = strstr (start_str, a)))
	{
		new_str_len -= a_len;
		new_str_len += b_len;
		start_str = &(start_str[a_len]);
	}

	/* allocate memory for the new string */
	new_str = g_new0 (gchar, new_str_len + 1);
	g_return_val_if_fail (new_str, NULL);

	/* walk through the string, replacing a with b */
	start_str = (gchar *) str;
	dest_str = new_str;
	while ((end_str = strstr (start_str, a)))
	{
		/* copy the stuff preceding a */
		memcpy (dest_str, start_str, end_str - start_str);
		dest_str = &(dest_str[end_str - start_str]);
		/* copy b in the place of a */
		memcpy (dest_str, b, b_len);
		dest_str = &(dest_str[b_len]);
		/* move to the end of the occurrence of a */
		start_str = &(end_str[a_len]);
	}

	/* copy the last part of the string and terminate it */
	memcpy (dest_str, start_str, strlen (start_str));
	dest_str[strlen (start_str)] = '\0';

	return new_str;
}

/**
 * misc_string_escape_xml_prop: escape chars in string such that xmlGetProp
 * doesn't screw it up.  caller is responsible for freeing the returned
 * string
 */
gchar *
misc_string_escape_xml_prop (const gchar *text)
{
	GString *out = g_string_new (NULL);
	guchar c;
	while ((c = *text))
	{
		if (c<0x20 || c>0x7F || c=='&' || c=='<' || c=='>' || c=='%' ||
				c=='"') //Should anything else be escaped?
			g_string_sprintfa (out, "%%%02X", c);
		else
			g_string_append_c (out, c);
		++text;
	}
	text = out->str;
	g_string_free (out, FALSE); /* don't free char data, caller must */
	return (gchar *)text;
}

/**
 * misc_string_escape_path:
 */
gchar *
misc_string_escape_path (const gchar *text)
{
	GString *out = g_string_new (NULL);
	guchar c;
	while ((c = *text))
	{
		/* Should anything else be escaped? */
		if (c=='/' ||  c==' ' || c=='?' || c=='#' || c=='%')
			g_string_sprintfa (out, "%%%02X", c);
		else
			g_string_append_c (out, c);
		++text;
	}
	text = out->str;
	g_string_free (out, FALSE); /* don't free char data, caller must */
	return (gchar *)text;
}

/**
 * misc_string_unescape_hexed_string:
 */
gchar *
misc_string_unescape_hexed_string (const gchar *text)
{
	GString *out = g_string_new (NULL);
	const gchar *u, *cur = text;
	gchar decodebuf[3];

	decodebuf[2] = '\0';

	while ((u = strchr (cur, '%')))
	{
		decodebuf[0]=u[1];
		decodebuf[1]=u[2];
		g_string_sprintfa (out, "%.*s%c", u-cur, cur, 
					(char)strtol(decodebuf, NULL, 16));
		cur = u + 3;
	}

	if (cur && *cur)
	{
		/* attach rest of string */
		g_string_append (out, cur);
	}

	u = out->str;
	g_string_free (out, FALSE); /* don't free char data, caller must */

	return (gchar *)u;
}

/**
 * misc_string_store_time_in_string:
 * NOTE: str must be at least 256 chars long
 */
void
misc_string_store_time_in_string (GTime t, gchar *str)
{
	struct tm stm;
	int length;

	if (t > 0)
	{
		/* convert to local time */
		localtime_r ((time_t *)&t, &stm);

		/* format into string */
		/* this is used whenever a brief date is needed, like
		 * in the history (for last visited, first time visited) */
		length = strftime (str, 255, _("%Y-%m-%d"), &stm);
		str[length] = '\0';
	}
	else
	{
		str[0] = '\0';
	}
}

/**
 * misc_string_store_full_time_in_string:
 */
void
misc_string_store_full_time_in_string (GTime t, gchar *str)
{
	struct tm stm;
	int length;

	if (t > 0)
	{
		/* convert to local time */
		localtime_r ((time_t *)&t, &stm);

		/* format into string */
		/* this is used when querying overwriting, as in "are you
		 * sure you want to overwrite file foo, last modified ..." */
		length = strftime (str, 255, _("%Y-%m-%d %H:%M:%S"), &stm);
		str[length] = '\0';
	}
	else
	{
		str[0] = '\0';
	}
}

/**
 * misc_string_time_to_string:
 */
gchar *
misc_string_time_to_string (GTime t)
{
	gchar str[256];

	/* write into stack string */
	misc_string_store_time_in_string (t, str);

	/* copy in heap and return */
	return g_strdup (str);
}

/**
 * misc_string_simplify_uri: Simplify a local filesystem uri, eg
 * /home/jorn/blah/../txt to /home/jorn/txt. Caller should free string.
 */
gchar *
misc_string_simplify_uri (const gchar *uri)
{
	gchar **chars, *ret;
	GString *gstring;
	gint i;
	GList *l, *list = NULL;
	gboolean leading_slash = FALSE;

	if (uri[0] == '/') leading_slash = TRUE;
	
	chars = g_strsplit (uri, "/", 0);

	for (i = 0; chars[i] != NULL; i++)
	{
		if (strcmp (chars[i], ".") == 0)
		{
			g_free (chars[i]);
			chars[i] = g_strdup ("");
		}
		else if (strcmp (chars[i], "..") == 0)
		{
		 	GList *l;
			gint c;

			for (c = i - 1; c > 0 && chars[c] != NULL; c--)
			{
				if (strcmp (chars[c], "..") != 0 &&
				    strlen (chars[c]) > 0)
				{
					break;
				}
			}

			g_assert (chars[c] != NULL);

			l = g_list_find (list, chars[c]);
			if (l)
			{
				list = g_list_remove (list, l->data);
				g_free (chars[c]);
				chars[c] = g_strdup ("");
			}
		}
		else
		{
			list = g_list_append (list, chars[i]);
		}
	}

	if (leading_slash)
		gstring = g_string_new ("/");
	else
		gstring = g_string_new ("");

	for (l = list; l; l = g_list_next (l))
	{
		if (strcmp ((gchar *) l->data, "") == 0) continue;

		if (strcmp (gstring->str,  "") != 0 &&
		    strcmp (gstring->str, "/") != 0)
		{
			g_string_append_c (gstring, '/');
		}
		g_string_append (gstring, (gchar *) l->data);
	}

	ret = gstring->str;
	
	g_string_free (gstring, FALSE);
	g_list_free (list);
	g_strfreev (chars);
	  
	return ret;
}

/**
 * misc_string_xml_decode_entity: translate &amp; et al and return a new
 * expanded string 
 */
gchar *
misc_string_xml_decode_entity (const unsigned char *encoded)
{
	gchar *buffer;
	gint i, j, length;
	
	if (encoded == NULL)
	{
		return NULL;
	}

	buffer = g_strdup (encoded);

	/* Optimize for case where no escape codes are found, otherwise
	 * use the first found code as the starting index */
	if ((i = (char *) strstr (buffer, "&") - buffer) < 0)
	{
		return buffer;
	}
	
	length = strlen (buffer);
	for (j = i; i < length; i++)
	{
		if (encoded[i] == '&')
		{
			if (strncmp (encoded + i + 1, "amp;", 4) == 0)
			{
				buffer[j++] = '&';
				i += 4;
			}
			else if (strncmp (encoded + i + 1, "lt;", 3) == 0)
			{
				buffer[j++] = '<';
				i += 3;
			}
			else if (strncmp (encoded + i + 1, "gt;", 3) == 0)
			{
				buffer[j++] = '>';
				i += 3;
			}
			else if (strncmp (encoded + i + 1, "quot;", 5) == 0)
			{
				buffer[j++] = '"';
				i += 5;
			}
			else if (strncmp (encoded + i + 1, "apos;", 5) == 0)
			{
				buffer[j++] = '\'';
				i += 5;
			}
			else if (encoded[i + 1] == '#')
			{
				buffer[j++] = atoi (encoded + i + 2);
				i += 5;
			}
		}
		else
		{
			buffer[j++] = encoded[i];
		}
	}

	buffer[j] = '\0';
	return buffer;
}

/**
 * misc_string_parse_uri:
 */
gchar *
misc_string_parse_uri (const gchar *uri)
{
	const gchar *filename;
	const gchar *mime_type;

	/* check it's a file uri */
	if (strncmp (uri, "file:", 5) != 0)
	{
		return NULL;
	}

	/* check it exists */
	filename = uri + 5;
	if (!(g_file_exists (filename)))
	{
		return NULL;
	}

	/* get the mime type and check its a nautilus link */
	mime_type = gnome_vfs_get_file_mime_type (filename, NULL, FALSE);
	/* old style */
	if (strcmp (mime_type, "application/x-nautilus-link") == 0)
	{
		xmlDocPtr doc;
		gchar *url;
		
		/* attempt to parse it */
		doc = xmlParseFile (filename);
		
		/* check the document */
		if (doc == NULL)
		{
			return NULL;
		}
		
		/* check the structure */
		if (doc->root == NULL || doc->root->name == NULL ||
		    g_strcasecmp (doc->root->name, "nautilus_object") != 0)
		{
			xmlFreeDoc (doc);
			return NULL;
		}
		
		/* get the url */
		url = xmlGetProp (doc->root, "link");
		if (url == NULL || strlen (url) == 0)
		{
			xmlFreeDoc (doc);
			return NULL;
		}
		
		/* return the URL */
		xmlFreeDoc (doc);
		return url;
	}
	
	return NULL;
}

/**
 * misc_string_strip_newline: strip newline from an utf8 string
 */
gchar *
misc_string_strip_newline (const gchar *c)
{
	gint i = 0;
	gchar *l, *ret;
	
	if (c == NULL) return NULL;

	l = mozilla_utf8_to_locale (c);

	/* get rid of newline chars */
	while (l[i] != '\0')
	{
		if (l[i] == '\n')
			l[i] = ' ';
		i++;
	}

	ret = mozilla_locale_to_utf8 (l);
	g_free (l);

	return ret;
}

/**
 * misc_string_remove_outside_whitespace: removes leading and trailing
 * whitespace.  returns a newly-allocated string.
 */
gchar *
misc_string_remove_outside_whitespace (const gchar *str)
{
	gint start, end;

	for (start = 0; str[start] != '\0' && (str[start] == ' ' ||
		str[start] == '\n' || str[start] == '\t' ||
		str[start] == '\r'); start++);
	for (end = strlen (str) - 1; end > start && (str[end] == ' ' ||
		str[end] == '\n' || str[end] == '\t' ||
		str[end] == '\r'); end--);
	return g_strndup (&(str[start]), end + 1 - start);
}

/**
 * misc_string_expand_home_dir: expands a leading ~ in the string to the
 * user's home directory.  doesn't handle ~some_user syntax.  returns a
 * newly-allocated string.
 */
gchar *
misc_string_expand_home_dir (const gchar *str)
{
	if (!str) return NULL;
	if (str[0] != '~') return g_strdup (str);

	return g_strconcat (g_get_home_dir (), str + 1, NULL);
}
