#include <xml/pathnode.h>
#include <xml/listpcre.h>
#include <xml/greptree.h>
#include <xml/gstrfuncs.h>
#include <string.h>
#include <pcre.h>

/* cater for pre-C99 compilers */
#define ___ {
#define ____ }

#ifdef NO_UTF8
#define g_utf8_get_char(S)  ((gunichar)*(guchar*)(S))
#undef  g_utf8_next_char
#define g_utf8_next_char(S) ((S)+1)
#endif

#define X(A) /* g_printerr A */ /* debugging */

#define pcre_exec_KEY(_code_,_extra_,_subject_,_ovector_,_ovecsize_) \
        pcre_exec (_code_,_extra_,_subject_,strlen(_subject_),\
                   0,PCRE_NOTEMPTY,_ovector_,_ovecsize_)

/*
   HINT: this file implements a walk-tree that looks for all instances
   that match an xpath string - that's different over xmlgadds.c (and
   the regex variant in xmlgp....)
  
   The xpath syntax herein only knows the abbreviated forms! And only a
   few of them: "/name" = "/child::name" and "." = "self-node()" and
   "//" = "/descendant-or-self::node()/" and "|*" = any/all-children
   plus it can have simple appendix notations as [1] = [position() = 1]
   and "@name" for [attribute::name] existance. However attribute matching
   might slow processing considerably..
  
   Oh, we don't interpret axis-notations. Instead each name-part may
   match with a pcre-spec - we do however we let the match only succeed
   if the element-name/attribute-name did match completly by default.
   To change it back to pcre's substring default you can specify a '*'
   at the start of the the xpathspec string - i.e. "name|addr" would
   match "name" and "addr" nodes but not "surnames" - but it would match
   if you specify "*name|addr". 
   
   for the time being there is no regex match on @attribute notations,
   it is only for the element-names, but that should be still okay,...
 */
inline static const gchar*
advance_to_name_end (const gchar* str)
{
    do 
{
	register guchar c = *str;
	if (c <= ' ' || c == '/' || c == '['  || c == ']' || c == '@')
	    return str;
	str++;
    }
while (1); }
inline static const gchar*
advance_to_attribute_end (const gchar* str)
{
    do 
{
	register guchar c = *str;
	if (c <= ' ' || c == '/' || c == '@')
	    return str;
	str++;
    }
while (1); }
/*
   yes - if the query query string contains attribute matches then the
   matching will be slowed considerably. That's okay as long as it runs
   fast with the normal child matches. Most attrib-matches are for leaf
   nodes anyway - so this call is run for a few nodes only anyway.
 */
static const gchar*
attr (xml_GNode* node, const gchar* str) /* a.k.a. attribute_match(node, str)*/
{
    /* assert (*str == '@') */ 
    if (! node->name || node->name[0] == '<' || node->name[0] == ':')
	return 0;
    str++;
    ___ const gchar* end = advance_to_attribute_end (str);
    /* invalid name = can not match */
    if (*end && *end != '/') return 0; 
    ___ gchar* key = g_strndup (str, end-str);
    if (! key) return 0;
    if (! xml_node_grep_attribute (node, key)) /* <- pcre here ! */
	end = 0;
    g_free (key); 
    return end;
    ____;____;
}
/* ______________________________________________________________ */
xml_GList*
xml_path_pcre_to_list (xml_GNode* node, const gchar* xpath, xml_GList* list)
{
    if (! node) return list;
    if (! xpath) xpath = ".";

    if (xpath[0] == '.') xpath++;
    if (! xpath[0]) return xml_g_list_prepend (list, node); /* recurse-stop! */

    ___ int depth = 0;
    while (xpath[0] == '/') 
{ xpath++; depth++; }
___ const gchar* end = advance_to_name_end (xpath); if (end == xpath) return (*end ? list : xml_g_list_prepend (list, node)); ___ pcre* regex = 0; const char* errmsg; int erridx; int ovector[33]; if (strchr("^(?+|\\", *xpath))
{
	gchar* name = g_strndup (xpath, end-xpath);
        if (*name == '+') 
	    regex = pcre_compile (name+1, 0, &errmsg, &erridx, 0);
        else if (*name == '?' || *name == '^') 
	    regex = pcre_compile (name+1, PCRE_ANCHORED, &errmsg, &erridx, 0);
	else
	    regex = pcre_compile (name, PCRE_ANCHORED, &errmsg, &erridx, 0);

	g_free (name);
	if (! regex) return list;
    }
___ int len = end-xpath; if (end[0] == '@' && len == 1 && xpath[0] == '*')
{
	const gchar* nxt = end; if ((nxt = attr(node, nxt)))
	
{ return xml_path_pcre_to_list (node, nxt, list); }
}
___ int serial = 0; if (end[0] == '[' && g_ascii_isdigit (end[1]))
{
	serial = strtol (&end[1], (gchar**) &end, 0);
	if (! end[0] == ']') goto returns; else end++;
    }
if (0)
{
	if (serial)
	    g_printerr ("<%.*s[%i]>", len, xpath, serial);
	else
	    g_printerr ("<%.*s>", len, xpath);
    }
node = node->children; for (; node ; node = node->next)
{   /* optimized for absolute paths */
	if (! node->name) continue;
	if (memcmp (node->name, xpath, len) || node->name[len])
	    goto match;
    found: 
	if (--serial > 0) continue;
	___ const gchar* nxt = end; 
	if (*nxt == '@') if (!(nxt = attr(node, nxt))) goto deeper;
	list = xml_path_pcre_to_list (node, nxt, list); ____;
	if (! serial) goto returns;
	continue;
    match:
	if (*xpath == '*') 
	
{
	    if (len == 1) goto found;
	    if (xml_g_strstr (node->name, xpath+1, len-1)) goto found;
	}
else if (regex)
{
	    if (0< pcre_exec_KEY (regex, 0, node->name, ovector, 33 ) &&
		(*xpath == '+' || *xpath == '^' || ! node->name[ovector[1]]))
		goto found;
	}
deeper: if (depth > 1) list = xml_path_pcre_to_list (node, xpath-depth, list); }
returns: if (regex) pcre_free (regex); return list; ____;____;____;____;____; }
xml_GList*
xml_path_pcre_list (xml_GNode* tree, const gchar* xpath)
{
    return xml_g_list_reverse (xml_path_pcre_to_list (tree, xpath, 0));
}
/* ______________________________________________________________ */
void
xml_path_pcre_foreach (xml_GNode* node, const gchar* xpath,
			xml_GNodeForeachFunc func, gpointer data)
{
    if (! node) return;
    if (! xpath) xpath = ".";

    if (xpath[0] == '.') xpath++;
    if (! xpath[0]) 
    
{   /* recurse-stop ! */
	func (node, data);
	return;
    }
___ int depth = 0; while (xpath[0] == '/')
{ xpath++; depth++; }
___ const gchar* end = advance_to_name_end (xpath); if (end == xpath)
{
	if (! *end) func (node, data);
	return;
    }
___ pcre* regex = 0; const char* errmsg; int erridx; int ovector[33]; if (strchr("^(?+|\\", *xpath))
{
	gchar* name = g_strndup (xpath, end-xpath);
        if (*name == '+') 
	    regex = pcre_compile (name+1, 0, &errmsg, &erridx, 0);
        else if (*name == '?' || *name == '^') 
	    regex = pcre_compile (name+1, PCRE_ANCHORED, &errmsg, &erridx, 0);
	else
	    regex = pcre_compile (name, PCRE_ANCHORED, &errmsg, &erridx, 0);

	g_free (name);
	if (! regex) return;
    }
___ int len = end-xpath; if (end[0] == '@' && len == 1 && xpath[0] == '*')
{
	const gchar* nxt = end; if ((nxt = attr(node, nxt)))
	
{ xml_path_pcre_foreach (node, nxt, func, data); return; }
}
___ int serial = 0; if (end[0] == '[' && g_ascii_isdigit (end[1]))
{
	serial = strtol (&end[1], (gchar**) &end, 0);
	if (! end[0] == ']') goto returns; else end++;
    }
node = node->children; for (; node ; node = node->next)
{   /* optimized for absolute paths */
	if (! node->name) continue;
	if (memcmp (node->name, xpath, len) || node->name[len])
	    goto match;
    found:
	if (--serial > 0) continue;
	___ const gchar* nxt = end;
	if (*nxt == '@') if (!(nxt = attr(node, nxt))) goto deeper;
	xml_path_pcre_foreach (node, nxt, func, data); ____;
	if (! serial) goto returns;
	continue;
    match:
	if (*xpath == '*') 
	
{
	    if (len == 1) goto found;
	    if (xml_g_strstr (node->name, xpath+1, len-1)) goto found;
	}
else if (regex)
{
	    if (0< pcre_exec_KEY (regex, 0, node->name, ovector, 33 ) &&
		(*xpath == '+' || *xpath == '^' || ! node->name[ovector[1]]))
		goto found;
	}
deeper: if (depth > 1) xml_path_pcre_foreach (node, xpath-depth, func, data); }
returns: if (regex) pcre_free (regex); return; ____;____;____;____;____; }
/* 
   Local variables:
   c-file-style: "stroustrup"
   End:
 */