/*
 * File: extract.c
 *
 * Book lookup utility
 *
 * XML data extraction
 *
 * Bob Eager   April 2014
 *
 */

#include "blu.h"
#include <expat.h>

/* Constants */

#define		STACKSIZE	10	/* Size of internal state stack */
#define		MAXATTRTEXT	200	/* Length of an attribute's text part */

/* Type definitions */

typedef enum {				/* State values */
ST_NEUTRAL, ST_TITLE, ST_AUTHORS, ST_PERSON
} STATE;

typedef struct _ATTR {			/* Attribute storage */
CHAR	text[MAXATTRTEXT+1];		/* Text component */
UINT	len;				/* Text length */
BOOL	quiet;				/* TRUE for no more messages */
} ATTR, *PATTR;

/* Forward references */

static	VOID	char_handler(PVOID, const XML_Char *, INT);
static	VOID	end_handler(PVOID, const XML_Char *);
static	VOID	start_handler(PVOID, const XML_Char *, const XML_Char **);
static	VOID	pop_state(VOID);
static	VOID	push_state(STATE);

/* Local storage */

static	STATE	state;			/* Current parse state */
static	STATE	stack[STACKSIZE];	/* Tag stack */
static	INT	stkptr;			/* Tag stack pointer */
static	ATTR	a_title;		/* Title attributes */
static	ATTR	a_author;		/* Author attributes */


/*
 * Extract details for a book, from the raw XML.
 *
 *	Inputs:
 *		buf	buffer containing XML
 *		len	length of data in buffer
 *		book	pointer to book structure for output
 *
 *	Outputs:
 *		book structure filled in; if 'valid' field is FALSE,
 *		the extraction failed.
 *
 */

VOID extract_book(PCHAR buf, UINT len, PBOOK book)
{	INT rc;
	XML_Parser xp;

	book->valid = FALSE;		/* Pro tem */

	/* Set up for parsing */

	xp = XML_ParserCreate(NULL);
	if(xp == NULL) return;
	XML_SetUserData(xp, book);
	XML_SetElementHandler(xp, &start_handler, &end_handler);
	XML_SetCharacterDataHandler(xp, &char_handler);
	state = ST_NEUTRAL;
	stkptr = 0;

	/* Do the parse */

	rc = XML_Parse(xp, buf, len, TRUE);

	/* Clean up the parser */

	XML_ParserFree(xp);

	if(rc == 0) return;		/* parse failed */

	book->valid = TRUE;
}


/*
 * Start tag element handler.
 * This is a callback from the XML parser, and is invoked when a start tag
 * is processed.
 *
 *	Inputs:
 *		vbook	VOID pointer to book structure
 *		name	XML tag name
 *		attr	pointer to array of attributes
 *
 *	Outputs:
 *		none
 *
 */

static VOID start_handler(PVOID vbook, const XML_Char *name,
		const XML_Char ** attr)
{	PBOOK book = (PBOOK) vbook;

	if(book->verbose == TRUE)
		fprintf(stderr, "\nHandling <%s>\n", name);

	if(strcasecmp(name, "Title") == 0) {
		a_title.len = 0;
		a_title.quiet = FALSE;
		push_state(ST_TITLE);
		return;
	}
	if(strcasecmp(name, "Authors") == 0) {
		book->nauthors = 0;
		push_state(ST_AUTHORS);
		return;
	}
	if(strcasecmp(name, "Person") == 0) {
		if(state != ST_AUTHORS) return;
		a_author.len = 0;
		a_author.quiet = FALSE;
		push_state(ST_PERSON);
		return;
	}
}


/*
 * End tag element handler.
 * This is a callback from the XML parser, and is invoked when an end tag
 * is processed.
 *
 *	Inputs:
 *		vbook	VOID pointer to book structure
 *		name	XML tag name
 *
 *	Outputs:
 *		none
 *
 */

static VOID end_handler(PVOID vbook, const XML_Char *name)
{	PBOOK book = (PBOOK) vbook;

	if(book->verbose == TRUE)
		fprintf(stderr, "\nHandling </%s>\n", name);

	if(strcasecmp(name, "Title") == 0) {
		a_title.text[a_title.len] = '\0';
		strcpy(book->title, a_title.text);
		pop_state();
		return;
	}
	if(strcasecmp(name, "Person") == 0) {
		pop_state();
		a_author.text[a_author.len] = '\0';
		if(book->nauthors >= MAXAUTHORS) {
			dolog("Too many authors for book");
			return;
		}
		strcpy(book->authors[book->nauthors], a_author.text);
		book->nauthors++;
		return;
	}
	if(strcasecmp(name, "Authors") == 0) {
		pop_state();
		return;
	}
}


/*
 * Character data handler.
 *
 */

static VOID char_handler(PVOID vbook, const XML_Char *s, INT len)
{	PBOOK book = (PBOOK) vbook;
	INT i;

	if(book->verbose == TRUE && len > 1) {
		for(i = 0; i < len; i++)
			fputc(s[i], stderr);
	}

	switch(state) {
		case ST_NEUTRAL:
		case ST_AUTHORS:
			break;

		case ST_TITLE:
			if(a_title.len + len > MAXATTRTEXT) {
				len = MAXATTRTEXT - a_title.len;
				if(a_title.quiet == FALSE) {
					dolog("title text truncated");
					a_title.quiet = TRUE;
				}
			}
			strncpy(&a_title.text[a_title.len], s, len);
			a_title.len = a_title.len + len;
			break;

		case ST_PERSON:
			if(a_author.len + len > MAXATTRTEXT) {
				len = MAXATTRTEXT - a_author.len;
				if(a_author.quiet == FALSE) {
					dolog("author text truncated");
					a_author.quiet = TRUE;
				}
			}
			strncpy(&a_author.text[a_author.len], s, len);
			a_author.len = a_author.len + len;
			break;

		default:		/* Ignore */
			break;
	}
}


/*
 * Stack the old state and set a new one.
 *
 */

static VOID push_state(STATE newstate)
{	if(stkptr >= STACKSIZE -1 ) {
		fprintf(stderr, "Fatal error: stack overflow\n");
		exit(EXIT_FAILURE);
	}
	stack[stkptr++] = state;
	state = newstate;
}


/*
 * Unstack the previous state.
 *
 */

static VOID pop_state(VOID)
{	if(stkptr == 0) {
		fprintf(stderr, "Fatal error: stack underflow!\n");
		exit(EXIT_FAILURE);
	}
	state = stack[--stkptr];
}

/*
 * End of file: extract.c
 *
 */
