#include "trgm.h"
#include <ctype.h>
#include "utils/array.h"
#include "catalog/pg_type.h"

PG_MODULE_MAGIC;

float4		trgm_limit = 0.3;

PG_FUNCTION_INFO_V1(set_limit);
Datum		set_limit(PG_FUNCTION_ARGS);
Datum
set_limit(PG_FUNCTION_ARGS)
{
	float4		nlimit = PG_GETARG_FLOAT4(0);

	if (nlimit < 0 || nlimit > 1.0)
		elog(ERROR, "wrong limit, should be between 0 and 1");
	trgm_limit = nlimit;
	PG_RETURN_FLOAT4(trgm_limit);
}

PG_FUNCTION_INFO_V1(show_limit);
Datum		show_limit(PG_FUNCTION_ARGS);
Datum
show_limit(PG_FUNCTION_ARGS)
{
	PG_RETURN_FLOAT4(trgm_limit);
}

#define WORDWAIT		0
#define INWORD			1

static int
comp_trgm(const void *a, const void *b)
{
	return CMPTRGM(a, b);
}

static int
unique_array(trgm * a, int len)
{
	trgm	   *curend,
			   *tmp;

	curend = tmp = a;
	while (tmp - a < len)
		if (CMPTRGM(tmp, curend))
		{
			curend++;
			CPTRGM(curend, tmp);
			tmp++;
		}
		else
			tmp++;

	return curend + 1 - a;
}


TRGM *
generate_trgm(char *str, int slen)
{
	TRGM	   *trg;
	char	   *buf,
			   *sptr,
			   *bufptr;
	trgm	   *tptr;
	int			state = WORDWAIT;
	int			wl,
				len;

	trg = (TRGM *) palloc(TRGMHRDSIZE + sizeof(trgm) * (slen / 2 + 1) * 3);
	trg->flag = ARRKEY;
	trg->len = TRGMHRDSIZE;

	if (slen + LPADDING + RPADDING < 3 || slen == 0)
		return trg;

	tptr = GETARR(trg);

	buf = palloc(sizeof(char) * (slen + 4));
	sptr = str;

	if (LPADDING > 0)
	{
		*buf = ' ';
		if (LPADDING > 1)
			*(buf + 1) = ' ';
	}

	bufptr = buf + LPADDING;
	while (sptr - str < slen)
	{
		if (state == WORDWAIT)
		{
			if (
#ifdef	KEEPONLYALNUM
				isalnum((unsigned char) *sptr)
#else
				!isspace((unsigned char) *sptr)
#endif
				)
			{
				*bufptr = *sptr;	/* start put word in buffer */
				bufptr++;
				state = INWORD;
				if (sptr - str == slen - 1 /* last char */ )
					goto gettrg;
			}
		}
		else
		{
			if (
#ifdef	KEEPONLYALNUM
				!isalnum((unsigned char) *sptr)
#else
				isspace((unsigned char) *sptr)
#endif
				)
			{
		gettrg:
				/* word in buffer, so count trigrams */
				*bufptr = ' ';
				*(bufptr + 1) = ' ';
				wl = bufptr - (buf + LPADDING) - 2 + LPADDING + RPADDING;
				if (wl <= 0)
				{
					bufptr = buf + LPADDING;
					state = WORDWAIT;
					sptr++;
					continue;
				}

#ifdef IGNORECASE
				do
				{				/* lower word */
					int			wwl = bufptr - buf;

					bufptr = buf + LPADDING;
					while (bufptr - buf < wwl)
					{
						*bufptr = tolower((unsigned char) *bufptr);
						bufptr++;
					}
				} while (0);
#endif
				bufptr = buf;
				/* set trigrams */
				while (bufptr - buf < wl)
				{
					CPTRGM(tptr, bufptr);
					bufptr++;
					tptr++;
				}
				bufptr = buf + LPADDING;
				state = WORDWAIT;
			}
			else
			{
				*bufptr = *sptr;	/* put in buffer */
				bufptr++;
				if (sptr - str == slen - 1)
					goto gettrg;
			}
		}
		sptr++;
	}

	pfree(buf);

	if ((len = tptr - GETARR(trg)) == 0)
		return trg;

	if (len > 0)
	{
		qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm);
		len = unique_array(GETARR(trg), len);
	}

	trg->len = CALCGTSIZE(ARRKEY, len);

	return trg;
}


PG_FUNCTION_INFO_V1(show_trgm);
Datum		show_trgm(PG_FUNCTION_ARGS);
Datum
show_trgm(PG_FUNCTION_ARGS)
{
	text	   *in = PG_GETARG_TEXT_P(0);
	TRGM	   *trg;
	Datum	   *d;
	ArrayType  *a;
	trgm	   *ptr;

	trg = generate_trgm(VARDATA(in), VARSIZE(in) - VARHDRSZ);
	d = (Datum *) palloc(sizeof(Datum) * (1 + ARRNELEM(trg)));

	ptr = GETARR(trg);
	while (ptr - GETARR(trg) < ARRNELEM(trg))
	{
		text	   *item = (text *) palloc(VARHDRSZ + 3);

		VARATT_SIZEP(item) = VARHDRSZ + 3;
		CPTRGM(VARDATA(item), ptr);
		d[ptr - GETARR(trg)] = PointerGetDatum(item);
		ptr++;
	}

	a = construct_array(
						d,
						ARRNELEM(trg),
						TEXTOID,
						-1,
						false,
						'i'
		);

	ptr = GETARR(trg);
	while (ptr - GETARR(trg) < ARRNELEM(trg))
	{
		pfree(DatumGetPointer(d[ptr - GETARR(trg)]));
		ptr++;
	}

	pfree(d);
	pfree(trg);
	PG_FREE_IF_COPY(in, 0);

	PG_RETURN_POINTER(a);
}

float4
cnt_sml(TRGM * trg1, TRGM * trg2)
{
	trgm	   *ptr1,
			   *ptr2;
	int			count = 0;
	int			len1,
				len2;

	ptr1 = GETARR(trg1);
	ptr2 = GETARR(trg2);

	len1 = ARRNELEM(trg1);
	len2 = ARRNELEM(trg2);

	while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2)
	{
		int			res = CMPTRGM(ptr1, ptr2);

		if (res < 0)
			ptr1++;
		else if (res > 0)
			ptr2++;
		else
		{
			ptr1++;
			ptr2++;
			count++;
		}
	}

#ifdef DIVUNION
	return ((((float4) count) / ((float4) (len1 + len2 - count))));
#else
	return (((float) count) / ((float) ((len1 > len2) ? len1 : len2)));
#endif

}

PG_FUNCTION_INFO_V1(similarity);
Datum		similarity(PG_FUNCTION_ARGS);
Datum
similarity(PG_FUNCTION_ARGS)
{
	text	   *in1 = PG_GETARG_TEXT_P(0);
	text	   *in2 = PG_GETARG_TEXT_P(1);
	TRGM	   *trg1,
			   *trg2;
	float4		res;

	trg1 = generate_trgm(VARDATA(in1), VARSIZE(in1) - VARHDRSZ);
	trg2 = generate_trgm(VARDATA(in2), VARSIZE(in2) - VARHDRSZ);

	res = cnt_sml(trg1, trg2);

	pfree(trg1);
	pfree(trg2);
	PG_FREE_IF_COPY(in1, 0);
	PG_FREE_IF_COPY(in2, 1);

	PG_RETURN_FLOAT4(res);
}

PG_FUNCTION_INFO_V1(similarity_op);
Datum		similarity_op(PG_FUNCTION_ARGS);
Datum
similarity_op(PG_FUNCTION_ARGS)
{
	float4		res = DatumGetFloat4(DirectFunctionCall2(
														 similarity,
														 PG_GETARG_DATUM(0),
														 PG_GETARG_DATUM(1)
														 ));

	PG_RETURN_BOOL(res >= trgm_limit);
}
