#include "detk/utf8.h"

#include <assert.h>

/** !Return number of characters in a UTF-8 encoded character
This function return the number of bytes in an encoded UTF-8 character. */
unsigned int mx_utf8_len(const char *s)
{
	/* We are at the end of a string */
	if (!*s)
		return 0;

	/* Single byte */
	if ((s[0] & 0x80) == 0)
		return 1;

	/* Two bytes */
	if (((s[0] & 0xe0) == 0xc0) && ((s[1] & 0xc0) == 0x80))
		return 2;

	/* Three bytes */
	if (((s[0] & 0xf0) == 0xe0) && ((s[1] & 0xc0) == 0x80)
		&& ((s[2] & 0xc0) == 0x80))
		return 3;

	/* Four bytes */
	if (((s[0] & 0xf8) == 0xf0) && ((s[1] & 0xc0) == 0x80)
		&& ((s[2] & 0xc0) == 0x80) && ((s[3] & 0xc0) == 0x80))
		return 4;

	/* Five bytes */
	if (((s[0] & 0xfc) == 0xf8) && ((s[1] & 0xc0) == 0x80)
		&& ((s[2] & 0xc0) == 0x80) && ((s[3] & 0xc0) == 0x80)
		&& ((s[4] & 0xc0) == 0x80))
		return 5;

	/* Six bytes */
	if (((s[0] & 0xfe) == 0xfc) && ((s[1] & 0xc0) == 0x80)
		&& ((s[2] & 0xc0) == 0x80) && ((s[3] & 0xc0) == 0x80)
		&& ((s[4] & 0xc0) == 0x80) && ((s[5] & 0xc0) == 0x80))
		return 6;

	/* Invalid sequence, assume a single step */
	return 1;
}

/** !Read a UTF-8 character
This function interprets a number of bytes as a encoded UTF-8 character. */
long mx_utf8_char(const char *s, unsigned int len)
{
	switch (len) {

	case 2:
		return ((((unsigned int) s[0]) & 0x1f) << 6) | (((unsigned int) s[1]) & 0x3f);
	case 3:
		return ((((unsigned int) s[0]) & 0x0f) << 12) | ((((unsigned int) s[1]) & 0x3f) << 6) | (((unsigned int) s[2])
																								 & 0x3f);
	case 4:
		return ((((unsigned int) s[0]) & 0x07) << 18) | ((((unsigned int) s[1]) & 0x3f) << 12) | ((((unsigned int) s[2]) & 0x3f) << 6)
			| (((unsigned int) s[3]) & 0x3f);
	case 5:
		return ((((unsigned int) s[0]) & 0x03) << 24) | ((((unsigned int) s[1]) & 0x3f) << 18) | ((((unsigned int) s[2]) & 0x3f) << 12)
			| ((((unsigned int) s[3]) & 0x3f) << 6) | (((unsigned int) s[4]) & 0x3f);
	case 6:
		return ((((unsigned int) s[0]) & 0x01) << 30) | ((((unsigned int) s[1]) & 0x3f) << 24) | ((((unsigned int) s[2]) & 0x3f) << 18)
			| ((((unsigned int) s[3]) & 0x3f) << 12) | ((((unsigned int) s[4]) & 0x3f) << 6) | (((unsigned int) s[5])
																								& 0x3f);
	default:
		break;
	}

	return *s;
}
