
#include <windows.h>
#include <stdio.h>
#include <stdlib.h>
#include <mbctype.h>
#include <io.h>
#include <fcntl.h>
#include <locale.h>
#define XML_STATIC
#include <expat.h>

#pragma warning(disable:4996)

//

void XMLCALL element_start(void *userData, const XML_Char *name, const XML_Char *atts[])
{
	printf("start element: %s\n", name);
}

void XMLCALL element_end(void *userData, const XML_Char *name)
{
	printf("end element: %s\n", name);
}

void XMLCALL character_data(void *userData, const XML_Char *s, int len)
{
	wchar_t *tmp = (wchar_t *)malloc(sizeof(wchar_t) * (len + 1));
	int n = MultiByteToWideChar(CP_UTF8, 0, s, len, tmp, len);
	tmp[n] = 0;
	_putws(tmp);
	free(tmp);
}

//

unsigned short jistojms(unsigned short c)
{
	c -= 0x2121;
	if (c & 0x100) {
		c += 0x9e;
	} else {
		c += 0x40;
	}
	if ((unsigned char)c >= 0x7f) {
		c++;
	}
	c = (((c >> (8 + 1)) + 0x81) << 8) | ((unsigned char)c);
	if (c >= 0xa000) {
		c += 0x4000;
	}
	return c;
}

int XMLCALL eucjp_convert(void *data, const char *s)
{
	wchar_t c = 0x3013;
	if ((s[0] & 0x80) && (s[1] & 0x80)) {
		if ((unsigned char)s[0] == 0x8e) {
			c = (unsigned char)s[1] - 0x34 + 0xff00;
		} else {
			char tmp[2];
			unsigned short t;
			t = jistojms(((s[0] & 0x7f) << 8) | (s[1] & 0x7f));
			tmp[0] = t >> 8;
			tmp[1] = t & 0xff;
			MultiByteToWideChar(CP_ACP, 0, tmp, 2, &c, 1);
		}
	}
	return c;
}

int XMLCALL sjis_convert(void *data, const char *s)
{
	wchar_t c = 0x3013;
	int n = 1;
	if (_ismbblead(*s)) {
		n = 2;
	}
	MultiByteToWideChar(CP_ACP, 0, s, n, &c, 1);
	return c;
}

int XMLCALL encoding_handler(void *encodingHandlerData, const XML_Char *name, XML_Encoding *info)
{
	int i;
	char enc[10];
	i = 0;
	while (*name) {
		if (isalnum((unsigned char)*name)) {
			if (i < 9) {
				enc[i] = toupper(*name);
				i++;
			}
		}
		name++;
	}
	enc[i] = 0;

	for (i = 0; i < 256; i++) {
		info->map[i] = i;
	}

	if (strcmp(enc, "EUCJP") == 0) {
		for (i = 128; i < 256; i++) {
			info->map[i] = -2;
		}
		info->convert = eucjp_convert;
	} else if (strcmp(enc, "SJIS") == 0 || strcmp(enc, "SHIFTJIS") == 0) {
		for (i = 128; i < 256; i++) {
			if (_ismbblead(i)) {
				info->map[i] = -2;
			}
		}
		info->convert = sjis_convert;
	}

	return XML_STATUS_OK;
}

int main(int argc, char *argv[])
{
	char const *source = "D:\\sjis.xml";
	XML_Parser parser;
	int fd;

	setlocale(LC_ALL, "Japanese");

	parser = XML_ParserCreate(NULL);
	if (!parser) {
		fprintf(stderr, "failed to create expat parser\n");
		exit(-1);
	}
	XML_SetUnknownEncodingHandler(parser, encoding_handler, 0);
	XML_SetElementHandler(parser, element_start, element_end);
	XML_SetCharacterDataHandler(parser, character_data);

	fd = open(source, O_RDONLY | O_BINARY);
	if (fd != -1) {
		while (1) {
			char tmp[4096];
			int n = read(fd, tmp, sizeof(tmp));
			if (n < 1) {
				XML_Parse(parser, tmp, 0, 1);
				break;
			}
			XML_Parse(parser, tmp, n, 0);
		}
		close(fd);
	}

	XML_ParserFree(parser);

	return 0;
}

