#include "hstore.h"
#include <ctype.h>

PG_MODULE_MAGIC;

typedef struct
{
	char	   *begin;
	char	   *ptr;
	char	   *cur;
	char	   *word;
	int			wordlen;

	Pairs	   *pairs;
	int			pcur;
	int			plen;
}	HSParser;

#define RESIZEPRSBUF \
do { \
		if ( state->cur - state->word + 1 >= state->wordlen ) \
		{ \
				int4 clen = state->cur - state->word; \
				state->wordlen *= 2; \
				state->word = (char*)repalloc( (void*)state->word, state->wordlen ); \
				state->cur = state->word + clen; \
		} \
} while (0)


#define GV_WAITVAL 0
#define GV_INVAL 1
#define GV_INESCVAL 2
#define GV_WAITESCIN 3
#define GV_WAITESCESCIN 4

static bool
get_val(HSParser * state, bool ignoreeq, bool *escaped)
{
	int			st = GV_WAITVAL;

	state->wordlen = 32;
	state->cur = state->word = palloc(state->wordlen);
	*escaped = false;

	while (1)
	{
		if (st == GV_WAITVAL)
		{
			if (*(state->ptr) == '"')
			{
				*escaped = true;
				st = GV_INESCVAL;
			}
			else if (*(state->ptr) == '\0')
			{
				return false;
			}
			else if (*(state->ptr) == '=' && !ignoreeq)
			{
				elog(ERROR, "Syntax error near '%c' at postion %d", *(state->ptr), (int4) (state->ptr - state->begin));
			}
			else if (*(state->ptr) == '\\')
			{
				st = GV_WAITESCIN;
			}
			else if (!isspace((unsigned char) *(state->ptr)))
			{
				*(state->cur) = *(state->ptr);
				state->cur++;
				st = GV_INVAL;
			}
		}
		else if (st == GV_INVAL)
		{
			if (*(state->ptr) == '\\')
			{
				st = GV_WAITESCIN;
			}
			else if (*(state->ptr) == '=' && !ignoreeq)
			{
				state->ptr--;
				return true;
			}
			else if (*(state->ptr) == ',' && ignoreeq)
			{
				state->ptr--;
				return true;
			}
			else if (isspace((unsigned char) *(state->ptr)))
			{
				return true;
			}
			else if (*(state->ptr) == '\0')
			{
				state->ptr--;
				return true;
			}
			else
			{
				RESIZEPRSBUF;
				*(state->cur) = *(state->ptr);
				state->cur++;
			}
		}
		else if (st == GV_INESCVAL)
		{
			if (*(state->ptr) == '\\')
			{
				st = GV_WAITESCESCIN;
			}
			else if (*(state->ptr) == '"')
			{
				return true;
			}
			else if (*(state->ptr) == '\0')
			{
				elog(ERROR, "Unexpected end of string");
			}
			else
			{
				RESIZEPRSBUF;
				*(state->cur) = *(state->ptr);
				state->cur++;
			}
		}
		else if (st == GV_WAITESCIN)
		{
			if (*(state->ptr) == '\0')
				elog(ERROR, "Unexpected end of string");
			RESIZEPRSBUF;
			*(state->cur) = *(state->ptr);
			state->cur++;
			st = GV_INVAL;
		}
		else if (st == GV_WAITESCESCIN)
		{
			if (*(state->ptr) == '\0')
				elog(ERROR, "Unexpected end of string");
			RESIZEPRSBUF;
			*(state->cur) = *(state->ptr);
			state->cur++;
			st = GV_INESCVAL;
		}
		else
			elog(ERROR, "Unknown state %d at postion line %d in file '%s'", st, __LINE__, __FILE__);

		state->ptr++;
	}

	return false;
}

#define WKEY	0
#define WVAL	1
#define WEQ 2
#define WGT 3
#define WDEL	4


static void
parse_hstore(HSParser * state)
{
	int			st = WKEY;
	bool		escaped = false;

	state->plen = 16;
	state->pairs = (Pairs *) palloc(sizeof(Pairs) * state->plen);
	state->pcur = 0;
	state->ptr = state->begin;
	state->word = NULL;

	while (1)
	{
		if (st == WKEY)
		{
			if (!get_val(state, false, &escaped))
				return;
			if (state->pcur >= state->plen)
			{
				state->plen *= 2;
				state->pairs = (Pairs *) repalloc(state->pairs, sizeof(Pairs) * state->plen);
			}
			state->pairs[state->pcur].key = state->word;
			state->pairs[state->pcur].keylen = state->cur - state->word;
			state->pairs[state->pcur].val = NULL;
			state->word = NULL;
			st = WEQ;
		}
		else if (st == WEQ)
		{
			if (*(state->ptr) == '=')
			{
				st = WGT;
			}
			else if (*(state->ptr) == '\0')
			{
				elog(ERROR, "Unexpectd end of string");
			}
			else if (!isspace((unsigned char) *(state->ptr)))
			{
				elog(ERROR, "Syntax error near '%c' at postion %d", *(state->ptr), (int4) (state->ptr - state->begin));
			}
		}
		else if (st == WGT)
		{
			if (*(state->ptr) == '>')
			{
				st = WVAL;
			}
			else if (*(state->ptr) == '\0')
			{
				elog(ERROR, "Unexpectd end of string");
			}
			else
			{
				elog(ERROR, "Syntax error near '%c' at postion %d", *(state->ptr), (int4) (state->ptr - state->begin));
			}
		}
		else if (st == WVAL)
		{
			if (!get_val(state, true, &escaped))
				elog(ERROR, "Unexpected end of string");
			state->pairs[state->pcur].val = state->word;
			state->pairs[state->pcur].vallen = state->cur - state->word;
			state->pairs[state->pcur].isnull = false;
			state->pairs[state->pcur].needfree = true;
			if (state->cur - state->word == 4 && !escaped)
			{
				state->word[4] = '\0';
				if (0 == pg_strcasecmp(state->word, "null"))
					state->pairs[state->pcur].isnull = true;
			}
			state->word = NULL;
			state->pcur++;
			st = WDEL;
		}
		else if (st == WDEL)
		{
			if (*(state->ptr) == ',')
			{
				st = WKEY;
			}
			else if (*(state->ptr) == '\0')
			{
				return;
			}
			else if (!isspace((unsigned char) *(state->ptr)))
			{
				elog(ERROR, "Syntax error near '%c' at postion %d", *(state->ptr), (int4) (state->ptr - state->begin));
			}
		}
		else
			elog(ERROR, "Unknown state %d at line %d in file '%s'", st, __LINE__, __FILE__);

		state->ptr++;
	}
}

int
comparePairs(const void *a, const void *b)
{
	if (((Pairs *) a)->keylen == ((Pairs *) b)->keylen)
	{
		int			res = strncmp(
								  ((Pairs *) a)->key,
								  ((Pairs *) b)->key,
								  ((Pairs *) a)->keylen
		);

		if (res)
			return res;

		/* guarantee that neddfree willl be later */
		if (((Pairs *) b)->needfree == ((Pairs *) a)->needfree)
			return 0;
		else if (((Pairs *) a)->needfree)
			return 1;
		else
			return -1;
	}
	return (((Pairs *) a)->keylen > ((Pairs *) b)->keylen) ? 1 : -1;
}

int
uniquePairs(Pairs * a, int4 l, int4 *buflen)
{
	Pairs	   *ptr,
			   *res;

	*buflen = 0;
	if (l < 2)
	{
		if (l == 1)
			*buflen = a->keylen + ((a->isnull) ? 0 : a->vallen);
		return l;
	}

	qsort((void *) a, l, sizeof(Pairs), comparePairs);
	ptr = a + 1;
	res = a;
	while (ptr - a < l)
	{
		if (ptr->keylen == res->keylen && strncmp(ptr->key, res->key, res->keylen) == 0)
		{
			if (ptr->needfree)
			{
				pfree(ptr->key);
				pfree(ptr->val);
			}
		}
		else
		{
			*buflen += res->keylen + ((res->isnull) ? 0 : res->vallen);
			res++;
			memcpy(res, ptr, sizeof(Pairs));
		}

		ptr++;
	}

	*buflen += res->keylen + ((res->isnull) ? 0 : res->vallen);
	return res + 1 - a;
}

static void
freeHSParse(HSParser * state)
{
	int			i;

	if (state->word)
		pfree(state->word);
	for (i = 0; i < state->pcur; i++)
		if (state->pairs[i].needfree)
		{
			if (state->pairs[i].key)
				pfree(state->pairs[i].key);
			if (state->pairs[i].val)
				pfree(state->pairs[i].val);
		}
	pfree(state->pairs);
}

PG_FUNCTION_INFO_V1(hstore_in);
Datum		hstore_in(PG_FUNCTION_ARGS);
Datum
hstore_in(PG_FUNCTION_ARGS)
{
	HSParser	state;
	int4		len,
				buflen,
				i;
	HStore	   *out;
	HEntry	   *entries;
	char	   *ptr;

	state.begin = PG_GETARG_CSTRING(0);

	parse_hstore(&state);

	if (state.pcur == 0)
	{
		freeHSParse(&state);
		len = CALCDATASIZE(0, 0);
		out = palloc(len);
		out->len = len;
		out->size = 0;
		PG_RETURN_POINTER(out);
	}

	state.pcur = uniquePairs(state.pairs, state.pcur, &buflen);

	len = CALCDATASIZE(state.pcur, buflen);
	out = palloc(len);
	out->len = len;
	out->size = state.pcur;

	entries = ARRPTR(out);
	ptr = STRPTR(out);

	for (i = 0; i < out->size; i++)
	{
		entries[i].keylen = state.pairs[i].keylen;
		entries[i].pos = ptr - STRPTR(out);
		memcpy(ptr, state.pairs[i].key, state.pairs[i].keylen);
		ptr += entries[i].keylen;

		entries[i].valisnull = state.pairs[i].isnull;
		if (entries[i].valisnull)
			entries[i].vallen = 4;		/* null */
		else
		{
			entries[i].vallen = state.pairs[i].vallen;
			memcpy(ptr, state.pairs[i].val, state.pairs[i].vallen);
			ptr += entries[i].vallen;
		}
	}

	freeHSParse(&state);
	PG_RETURN_POINTER(out);
}

static char *
cpw(char *dst, char *src, int len)
{
	char	   *ptr = src;

	while (ptr - src < len)
	{
		if (*ptr == '"' || *ptr == '\\')
			*dst++ = '\\';
		*dst++ = *ptr++;
	}
	return dst;
}

PG_FUNCTION_INFO_V1(hstore_out);
Datum		hstore_out(PG_FUNCTION_ARGS);
Datum
hstore_out(PG_FUNCTION_ARGS)
{
	HStore	   *in = PG_GETARG_HS(0);
	int			buflen,
				i;
	char	   *out,
			   *ptr;
	char	   *base = STRPTR(in);
	HEntry	   *entries = ARRPTR(in);

	if (in->size == 0)
	{
		out = palloc(1);
		*out = '\0';
		PG_FREE_IF_COPY(in, 0);
		PG_RETURN_CSTRING(out);
	}

	buflen = (4 /* " */ + 2 /* => */ + 2 /* , */ ) * in->size +
		2 /* esc */ * (in->len - CALCDATASIZE(in->size, 0));

	out = ptr = palloc(buflen);
	for (i = 0; i < in->size; i++)
	{
		*ptr++ = '"';
		ptr = cpw(ptr, base + entries[i].pos, entries[i].keylen);
		*ptr++ = '"';
		*ptr++ = '=';
		*ptr++ = '>';
		if (entries[i].valisnull)
		{
			*ptr++ = 'N';
			*ptr++ = 'U';
			*ptr++ = 'L';
			*ptr++ = 'L';
		}
		else
		{
			*ptr++ = '"';
			ptr = cpw(ptr, base + entries[i].pos + entries[i].keylen, entries[i].vallen);
			*ptr++ = '"';
		}

		if (i + 1 != in->size)
		{
			*ptr++ = ',';
			*ptr++ = ' ';
		}
	}
	*ptr = '\0';

	PG_FREE_IF_COPY(in, 0);
	PG_RETURN_CSTRING(out);
}
