#include <xml/gconfig.h>
#include <xml/gerror.h>

#include <xml/addspcre.h>
#include <xml/addsnode.h>
#include <xml/listnode.h>
#include <xml/listpcre.h>
#include <stdlib.h>
#include <string.h>

#include <pcre.h>

#define ___ {
#define ____ }

typedef struct
{
    pcre*        regex;
    const gchar* markup;
    int          count;
    int          ovector[33];
}
 Data;
/* this variant is faster but it could match lookbehind assertions at
   the start of the text - or not match startofstring if requested.
 */
void xml_path_pcre_add_funcc (xml_GNode* node, gpointer data)
{
    Data* use = data;
    if (node->off == node->end) return;
    if (! node->text) return;

    use->ovector[1] = node->off;
 again:
    if (0< pcre_exec (use->regex, 0, 
		      node->text->str, node->end, 
		      use->ovector[1], 0, use->ovector, 33))
    
{
	/* g_printerr ("[%i..%i]", use->ovector[0], use->ovector[1]); */
	if (xml_tree_add2 (node, use->markup, 
			use->ovector[0], 
			use->ovector[1]))
	
{ use->count++; }
goto again; }
}
/* this is more correct but slower */
void xml_path_pcre_add_func (xml_GNode* node, gpointer data)
{
    Data* use = data;
    if (node->off == node->end) return;
    if (! node->text) return;

    use->ovector[1] = 0;
 again:
    if (0< pcre_exec (use->regex, 0, 
		      node->text->str + node->off, node->end - node->off, 
		      use->ovector[1], 0, use->ovector, 33))
    
{
	if (xml_tree_add2 (node, use->markup, 
			node->off + use->ovector[0], 
			node->off + use->ovector[1]))
	
{ use->count++; }
goto again; }
}
#define g_ascii_is_alnum_space(c) \ ((g_ascii_table[(guchar) (c)] & (G_ASCII_ALNUM|G_ASCII_SPACE)))
/**
   foreach node matching the simpliefied xpath, run the regex on the
   enclosed textarray, and matching spans should be marked with new
   nodes.
  
   as a special extension to pcre: compile-options can not only be
   pushed as (?sx) but at the start of the string also as {sx}.
   (that's okay since a quantifier spec is not quite useful there).
 */
int
xml_path_pcre_add (xml_GNode* tree, const gchar* pathXE, 
                    const gchar* regex, const gchar* markup, GError** error)
{
    g_return_val_if_fail (tree, 0);
    g_return_val_if_fail (regex && *regex, 0);
    if (! markup) markup = "_";

    ___ int flag = 0;
    if (*regex == '{')
    
{
	while (*++regex)
	
{
	    switch (*regex)
	    
{
	    case 'i': flag |= PCRE_CASELESS; continue;
	    case 'm': flag |= PCRE_MULTILINE; continue;
	    case 's': flag |= PCRE_DOTALL; continue;
	    case 'x': flag |= PCRE_EXTENDED; continue;
	    case 'X': flag |= PCRE_EXTRA; continue;
	    case 'U': flag |= PCRE_UNGREEDY; continue;
	    case 'A': flag |= PCRE_ANCHORED; continue;
	    case 'Z': flag |= PCRE_DOLLAR_ENDONLY; continue;
	    case 'q': flag |= PCRE_NOTEMPTY; continue;
	    case '}': regex++; goto breaks;
	    }
}
breaks: /* g_printerr ("[%x]", flag) */ ; }
if (*regex == '*' || *regex == '+') regex++; else if (*regex == '?' || *regex == '^')
{ regex++; flag |= PCRE_ANCHORED; }
___ auto const char* errmsg; int erridx; pcre* compiled = pcre_compile (regex, flag, &errmsg, &erridx, 0); if (! compiled)
{
	xml_g_set_error (error, 4, "errornous regex: %s", errmsg);
	return -1;
    }
___ auto Data data =
{ compiled, markup }
; if (PCRE_NOTEMPTY& flag || g_ascii_is_alnum_space ((*regex))) xml_path_pcre_foreach (tree, pathXE, xml_path_pcre_add_funcc, &data); else xml_path_pcre_foreach (tree, pathXE, xml_path_pcre_add_func, &data); pcre_free (compiled); return data.count; ____;____;____; }
/* ............................................................ */
typedef struct
{
    pcre*         regex;
    const gchar** markup;
    int           count;
    int           ovector[33];
}
 Data9;
/* this variant is faster but it could match lookbehind assertions at
   the start of the text - or not match startofstring if requested.
 */
void xml_path_pcre_add9_func (xml_GNode* node, gpointer data)
{
    Data9* use = data;
    if (node->off == node->end) return;
    if (! node->text) return;

    use->ovector[1] = node->off;
 again:
    if (0< pcre_exec (use->regex, 0, 
		      node->text->str, node->end, 
		      use->ovector[1], 0, use->ovector, 33))
    
{
	/* g_printerr ("[%i..%i]", use->ovector[0], use->ovector[1]); */
	if (xml_tree_add9 (node, use->ovector, use->markup))
	
{ use->count++; }
goto again; }
}
#define g_ascii_is_alnum_space(c) \ ((g_ascii_table[(guchar) (c)] & (G_ASCII_ALNUM|G_ASCII_SPACE)))
/**
   foreach node matching the simpliefied xpath, run the regex on the
   enclosed textarray, and matching spans should be marked with new
   nodes.
  
   as a special extension to pcre: compile-options can not only be
   pushed as (?sx) but at the start of the string also as {sx}.
   (that's okay since a quantifier spec is not quite useful there).
  
   The markup-field given is interpreted as in perl: the string at
   index $0 has the whole match, subsequent $1..$9 shall be the
   markup for parts of the match. The markup-list must be null-
   terminated. If you want to leave out a markup then fill the
   place with the empty-string "".
 */
int
xml_path_pcre_add9 (xml_GNode* tree, const gchar* pathXE, 
                     const gchar* regex, const gchar* markup[], GError** error)
{
    g_return_val_if_fail (tree, 0);
    g_return_val_if_fail (regex && *regex, 0);
    if (! markup) 
{ static const gchar* m[] = 
{ "_", 0 }
; markup = m; }
___ int flag = 0; if (*regex == '{')
{
	while (*++regex)
	
{
	    switch (*regex)
	    
{
	    case 'i': flag |= PCRE_CASELESS; continue;
	    case 'm': flag |= PCRE_MULTILINE; continue;
	    case 's': flag |= PCRE_DOTALL; continue;
	    case 'x': flag |= PCRE_EXTENDED; continue;
	    case 'X': flag |= PCRE_EXTRA; continue;
	    case 'U': flag |= PCRE_UNGREEDY; continue;
	    case 'A': flag |= PCRE_ANCHORED; continue;
	    case 'Z': flag |= PCRE_DOLLAR_ENDONLY; continue;
	    case 'q': flag |= PCRE_NOTEMPTY; continue;
	    case '}': regex++; goto breaks;
	    }
}
breaks: /* g_printerr ("[%x]", flag) */ ; }
if (*regex == '*' || *regex == '+') regex++; else if (*regex == '?' || *regex == '^')
{ regex++; flag |= PCRE_ANCHORED; }
___ auto const char* errmsg; int erridx; pcre* compiled = pcre_compile (regex, flag, &errmsg, &erridx, 0); if (! compiled)
{
	xml_g_set_error (error, 4, "errornous regex: %s", errmsg);
	return -1;
    }
___ auto Data9 data =
{ compiled, markup }
; xml_path_pcre_foreach (tree, pathXE, xml_path_pcre_add9_func, &data); pcre_free (compiled); return data.count; ____;____;____; }
/* ............................................................ */
typedef struct
{
    pcre*  regex;
    xml_GNodeAddFunc func;
    gpointer data;
    int    count;
    int    ovector[33];
}
 ForData;
/* this variant is faster but it could match lookbehind assertions at
   the start of the text - or not match startofstring if requested.
 */
void xml_path_pcre_add_with_funcc (xml_GNode* node, gpointer data)
{
    ForData* use = data;
    if (node->off == node->end) return;
    if (! node->text) return;

    use->ovector[1] = node->off;
 again:
    if (0< pcre_exec (use->regex, 0, 
		      node->text->str, node->end, 
		      use->ovector[1], 0, use->ovector, 33))
    
{
        if(0) g_printerr ("[%i..%i]", use->ovector[0], use->ovector[1]);
	if (use->func (node, use->data, 
		       use->ovector[0], 
		       use->ovector[1]))
	
{ use->count++; }
goto again; }
}
/* this is more correct but slower */
void xml_path_pcre_add_with_func (xml_GNode* node, gpointer data)
{
    ForData* use = data;
    if (node->off == node->end) return;
    if (! node->text) return;

    use->ovector[1] = 0;
 again:
    if (0< pcre_exec (use->regex, 0, 
		      node->text->str + node->off, node->end - node->off, 
		      use->ovector[1], 0, use->ovector, 33))
    
{
        if (0) g_printerr ("[%i..%i]", use->ovector[0], use->ovector[1]);
        if (0) g_printerr ("[%.*s]\n", use->ovector[1] - use->ovector[0],
                           node->text->str + use->ovector[0] + node->off);
	if (use->func (node, use->data, 
		       node->off + use->ovector[0], 
		       node->off + use->ovector[1]))
	
{ use->count++; }
goto again; }
}
int
xml_path_pcre_add_with (xml_GNode* tree, const gchar* pathXE, 
                         const gchar* regex,  
                         xml_GNodeAddFunc func, gpointer data, GError** error)
{
    g_return_val_if_fail (tree, 0);
    g_return_val_if_fail (regex && *regex, 0);
    if (! func) func = (xml_GNodeAddFunc) xml_tree_add;

    ___ int flag = 0;
    if (*regex == '{')
    
{
	while (*++regex)
	
{
	    switch (*regex)
	    
{
	    case 'i': flag |= PCRE_CASELESS; continue;
	    case 'm': flag |= PCRE_MULTILINE; continue;
	    case 's': flag |= PCRE_DOTALL; continue;
	    case 'x': flag |= PCRE_EXTENDED; continue;
	    case 'X': flag |= PCRE_EXTRA; continue;
	    case 'U': flag |= PCRE_UNGREEDY; continue;
	    case 'A': flag |= PCRE_ANCHORED; continue;
	    case 'Z': flag |= PCRE_DOLLAR_ENDONLY; continue;
	    case 'q': flag |= PCRE_NOTEMPTY; continue;
	    case '}': regex++; goto breaks;
	    }
}
breaks: /* g_printerr ("[%x]", flag) */ ; }
___ auto const char* errmsg; int erridx; pcre* compiled = pcre_compile (regex, flag, &errmsg, &erridx, 0); if (! compiled)
{
	xml_g_set_error (error, 4, "errornous regex: %s", errmsg);
	return -1;
    }
___ auto ForData dat =
{ compiled, func, (gpointer) data }
; if (PCRE_NOTEMPTY& flag || g_ascii_is_alnum_space ((*regex))) xml_path_pcre_foreach (tree, pathXE, xml_path_pcre_add_with_funcc, &dat); else xml_path_pcre_foreach (tree, pathXE, xml_path_pcre_add_with_func, &dat); pcre_free (compiled); return dat.count; ____;____;____; }
/* 
   Local variables:
   c-file-style: "stroustrup"
   End:
 */