#include <xml/pcss-src.h>
#include <xml/pdoc-cpp.h>
#include <xml/nextpcre.h>
#include <xml/savetree.h>
#include <xml/addsnode.h>
#include <xml/nextnode.h>
#include <xml/listnode.h>
#include <xml/attrnode.h>
#include <string.h>
#include <stdlib.h>

/* unlike in C, the "#" is not special and "//" do not exist either.
   Still however we parse "..." and '...' strings away. Deeper
   tokenization is not done in preparse - e.g. all parens and brackets
   are supposed to occur in pairs and @rules can guide processing.
   http://www.w3.org/Style/CSS/       http://www.w3.org/TR/REC-CSS2/
   
 */
#define ___ { #define ____ } # define CDOC "ccomment" # define CCHR "cliteral" # define CSTR "sliteral" # define CERR "xxsyntax" # define CBRK "br" # define LINE "line" /* attribute */ # define CCO 1 /* C comment */ # define CHR 4 /* char literal */ # define STR 8 /* string literal */ # define COM (CCO) /* any comment */ # define LIT (CHR|STR) /* any literal */
xml_GNode*
xml_pcss_preparse (xml_GNode* tree)
{
    if (! tree || !tree->text || tree->children) return tree;
    ___ int off = tree->off; /* the current char to look at */
    gchar C = 0; /* pushback or previous char */
    gchar X = 0; /* the parsing state */
    xml_GNode* node = 0; /* the current node to parse to */
    gchar c; /* the next char to process */
    int line = 0;
    gchar* filename = 
	g_strdup (xml_node_attribute_lookup (tree, "filename"));

    node = xml_g_node_append_data (tree, CBRK);
    node->text = tree->text; /* newr->off = newr->end = 0 */
    node->off = node->end = off;
    xml_node_attribute_add_value (node, "line",
	g_strdup_printf ("%i %s", ++line, filename));
    node = 0;
    goto start;

 next_new_node_2:
    node->text = tree->text; node->end = off; node->off = off-1;
    goto next;
 next_new_node:
    node->text = tree->text; node->end = off; node->off = off;
    goto next;

    /* for (i = 0; i < textlen; i++) : */
 next: 
    C = c;
 next2:
    off++; if (off >= tree->end) goto returns; /* break loop */

    if (node) node->end++; 
 start:
    switch ((c = tree->text->str[off]))
    
{
    case '\"':
	if (X&(CHR|COM) || C=='\\') goto next;
	if (X&STR) 
{ X = 0; node->end++; node = 0; goto next; }
X = STR; node = xml_g_node_append_data (tree, CSTR); goto next_new_node; case '\'': if (X&(STR|COM) || C=='\\') goto next; if (X&CHR)
{ X = 0; node->end++; node = 0; goto next; }
X = CHR; node = xml_g_node_append_data (tree, CCHR); goto next_new_node; case '\\': if (C != '\\') goto next; C = ' '; goto next2; case '\n': case '\f': ___ xml_GNode* newr; newr = xml_g_node_append_data ((node ? node : tree), CBRK); newr->text = tree->text; newr->off = off; newr->end = off+1; if (C=='\\') newr->off--; xml_node_attribute_add_value (newr, "line", g_strdup_printf ("%i %s", ++line, filename)); ____; goto next; case '/': if (C=='*' && X&CCO)
{ X = 0; node->end++; node = 0; goto next; }
goto next; case '*': if (C!='/') goto next; if (X&(COM|CHR|STR)) goto next; X = CCO; node = xml_g_node_append_data (tree, CDOC); goto next_new_node_2; default: goto next; }
; returns: if (C!='\n' && C!='\f')
{
	xml_GNode* newr = xml_g_node_append_data (tree, CBRK);
	newr->text = tree->text;  newr->off = newr->end = off;
	xml_node_attribute_add_value (newr, "line",
	    g_strdup_printf ("%i %s", ++line, filename));
    }
return tree; ____; }
enum _stat 
{ _none, _name, _name_after, _value_before, _value, _error }
;
xml_GNode* 
xml_pcss_parse_props (xml_GNode* tree, xml_GNode* head)
{
    g_assert (xml_node_hasname_as_(tree, "*cbloc"));
    ___ gchar* media = (head) ? xml_node_attribute_lookup (head, "media") :0;
    gchar* off = tree->text->str + tree->off;
    gchar* end = tree->text->str + tree->end;
    gchar* A = 0; gchar* E = 0; xml_GNode* key = 0;
    enum _stat X = _none;
    if (off < end && *off == '{') off++; /* we are in a 'bloc' */
    xml_path_pcre_text_to_attr (tree, 
      "//*bloc", "-nobloc", ".");

    for (; off < end ; off++)
    
{
	switch (X)
	
{
	case _none: 
	    if (g_ascii_isspace(*off)) continue;
	    if (!g_ascii_isalnum(*off) && !strchr ("-+.", *off))
		goto _syntax_error;
	    A = off; X = _name; /* fallthrough */
	case _name:
	    E = off;
	    if (g_ascii_isalnum(*off) || strchr("-+.", *off)) continue;
	    X = _name_after; /* fallthrough */
	case _name_after:
	    if (g_ascii_isspace(*off)) continue;
	    if (*off != ':') goto _syntax_error;
	    key = xml_tree_adds (tree, "prop_key",
				A - tree->text->str, E - tree->text->str);
	    X = _value_before; continue;
	case _value_before:
	    if (g_ascii_isspace(*off)) continue;
	    if (*off == '}') goto ends;
	    if (*off == ';') 
	    
{
		xml_GNode* val = 
		    xml_tree_add1 (tree, "prop_value", off - tree->text->str);
		if (! val) 
{ X = _none; continue; }
if (key)
{
		    xml_node_attribute_add_value (val, "prop",
			g_strndup (tree->text->str + key->off, 
				   key->end - key->off));
		}
if (media)
{
		    xml_node_attribute_add (val, "media", media);
		}
X = _none ; continue; }
A = off; E = off+1; X = _value; continue; case _value: if (g_ascii_isspace(*off)) continue; if (*off == '}') goto ends; if (*off == ';')
{
		xml_GNode* val = 
		    xml_tree_adds (tree, "prop_value", 
				   A - tree->text->str, E - tree->text->str);
		if (! val) 
{ X = _none; continue; }
if (key)
{
		    xml_node_attribute_add_value (val, "prop",
			g_strndup (tree->text->str + key->off, 
				   key->end - key->off));
		}
if (media)
{
		    xml_node_attribute_add (val, "media", media);
		}
X = _none; continue; }
E = off+1; continue; _syntax_error: X = _error; case _error: if (*off == ';') X = _none; /* g_printerr ("{%c}", *off); */ continue; }
}
ends: if ( X == _value_before || X == _value)
{
	xml_GNode* val; 
	if (X == _value_before)
	    val = xml_tree_add1 (tree, "prop_value", off - tree->text->str);
	else
	    val = xml_tree_adds (tree, "prop_value", 
				 A - tree->text->str, E - tree->text->str);
	    
	if (key && val)
	
{
	    xml_node_attribute_add_value (val, "prop",
		g_strndup (tree->text->str + key->off, key->end - key->off));
	}
if (media) xml_node_attribute_add (val, "media", media); }
____; xml_path_pcre_text_restore (tree, "//*@-nobloc", "-nobloc"); return tree; }
static xml_GNode* find_block_for (xml_GNode* node, gsize off)
{
    if (! node) return node;
    node = node->children;
    for (; node ; node = node->next)
    
{
	if (xml_node_hasname_as_(node, "*bloc") && node->off == off)
	    break;
    }
return node; }
xml_GNode*
xml_pcss_parse_specs (xml_GNode* tree, xml_GNode* head)
{
    int X = 0; 
    gchar* off = tree->text->str + tree->off;
    gchar* end = tree->text->str + tree->end;
    gchar* headA = 0; gchar* headE = 0;

    xml_GNode* block = 0;
    if (head && off < end && *off == '{') off++;  /* for recursive calls */
    else if (off +4 < end && !memcmp (off, "<!--", 4)) off += 4; /* embedded */

    for (; off < end; off++)
    
{
	switch (X)
	
{
	case 0:
	    if (g_ascii_isspace(*off)) continue;
	    if (*off == ';') continue;
	    if (*off == '{') goto skiptoends; 
	    headA = off; headE = off+1; X = 1; continue;
	case 1:
	    if (g_ascii_isspace(*off)) continue;
	    if (*off == ';') goto found_line;
	    if (*off == '{') goto found_block;
	    headE = off+1; continue;
	default:
	    continue;
	}
found_line: if (*headA == '@')
{
	    xml_tree_add (tree, "line_atrule",
			  headA - tree->text->str, headE - tree->text->str);
	}
X = 0; continue; /* lines being not atrules are ignored so far */ found_block: block = find_block_for (tree, off - tree->text->str); if (*headA != '@')
{
	    xml_GNode* node = xml_tree_add (tree, "prop_head",
		headA - tree->text->str, headE - tree->text->str);
	    if (head)
	    
{
		gchar* media = xml_node_attribute_lookup (head, "media");
		if (media)
		    xml_node_attribute_add (node, "media", media);
	    }
if (block) xml_pcss_parse_props (block, node); }
else if (!memcmp (headA, "@media", 6) && !g_ascii_isalnum (headA[6]))
{
	    xml_GNode* node = xml_tree_add (tree, "item_media", 
		headA - tree->text->str, headE - tree->text->str);
	    headA += 6; headE--;
	    while (headA < headE && g_ascii_isspace(*headA)) headA++;
	    while (headA < headE && g_ascii_isspace(*headE)) headE--;
	    if (headA < headE) 
	    
{ 
		headE++;
		xml_node_attribute_add_value (node, "media",
		    g_strndup (headA, headE-headA)); 
	    }
if (block) xml_pcss_parse_specs (block, node); }
else if (!memcmp (headA, "@page", 5) && !g_ascii_isalnum (headA[5]))
{
	    xml_GNode* node = xml_tree_add (tree, "item_page",
		headA - tree->text->str, headE - tree->text->str);
	    if (head)
	    
{
		gchar* media = xml_node_attribute_lookup (head, "media");
		if (media)
		    xml_node_attribute_add (node, "media", media);
	    }
if (block) xml_pcss_parse_props (block, node); }
else
{
	    /* g_printerr ("UNMATCHED'%.*s'", headE-headA, headA); */
	}
/* fallthrough */ skiptoend: if (block)
{ 
	    off = tree->text->str + block->end; 
	}
else
{
	    headA = memchr (off, '}', end-off);
	    off = headA ? headA : end;
	}
block = 0; X = 0; continue; skiptoends: block = find_block_for (tree, off - tree->text->str); goto skiptoend; }
return tree; }
xml_GNode*
xml_pcss_parse (xml_GNode* tree)
{
    if (! tree) return tree;
    if (! tree->children)
	xml_pcss_preparse (tree);

    tree = xml_pdoc_c_blocks (tree);

    xml_path_pcre_text_to_attr (tree, 
      "//*comment", "-noscan", " ");
    xml_path_pcre_text_to_attr (tree, 
      "//*literal", "-noscan", ".");

    tree = xml_pcss_parse_specs (tree, 0);

    xml_path_pcre_text_restore (tree, "//*@-noscan", "-noscan");
    return tree;
}
void
xml_pcss_node_scan_selectors (xml_GNode* node)
{
    /* ' .name {}' or ' *.name {}' or ' *[class="name"] {}' */

    ___ static const gchar* _1[] = 
{ "", "=class", " when-id", " pseudo", 0 }
; xml_pcre_match_add9 (node->text->str, node->off, node->end, "?" "\\s*[*]?\\s*\\.([\\w-]+)" "\\s*(#\\w+)?\\s*(:[\\w-]+)?\\s*", node, _1); xml_pcre_match_add9 (node->text->str, node->off, node->end, "?" "\\s*[*]?\\s*\\[\\s*class=~?([\\w-]+)\\s*\\]" "\\s*(#\\w+)?\\s*(:[\\w-]+)?\\s*", node, _1); xml_pcre_match_add9 (node->text->str, node->off, node->end, "?" "\\s*[*]?\\s*\\[\\s*class=~?\"([^\"]+)\"\\s*\\]" "\\s*(#\\w+)?\\s*(:[\\w-]+)?\\s*", node, _1); ____; /* 'F .name {}' i.e. element for class */ ___ static const gchar* _2[] =
{ "", "=mark", "=class", " when-id", " pseudo", 0 }
; xml_pcre_match_add9 (node->text->str, node->off, node->end, "?" "\\s*[*]?\\s*([\\w-]+)\\s*\\.([\\w+-]+)" "\\s*(#\\w+)?\\s*(:[\\w-]+)?\\s*", node, _2); ____; /* 'E F .name {}' i.e. element for class when descendent of another */ ___ static const gchar* _3[] =
{ "", " when-inside", "=mark", "=class", " when-id", " pseudo", 0 }
; xml_pcre_match_add9 (node->text->str, node->off, node->end, "?" "\\s*([\\w-]+)\\s*\\b([\\w-]+)\\s*\\.([\\w+-]+)" "\\s*(#\\w+)?\\s*(:[\\w-]+)?\\s*", node, _3); ____; /* 'E > F .name {}' i.e. element for class when child of another */ ___ static const gchar* _4[] =
{ "", " when-childof", "=mark", "=class", " when-id", " pseudo", 0 }
; xml_pcre_match_add9 (node->text->str, node->off, node->end, "?" "\\s*([\\w-]+)\\s*[>]\\s*([\\w-]+)\\s*\\.([\\w+-]+)" "\\s*(#\\w+)?\\s*(:[\\w-]+)?\\s*", node, _4); ____; /* 'E + F .name {}' i.e. element for class when following another */ ___ static const gchar* _5[] =
{ "", " when-after", "=mark", "=class", " when-id", " pseudo", 0 }
; xml_pcre_match_add9 (node->text->str, node->off, node->end, "?" "\\s*([\\w-]+)\\s*[+]\\s*([\\w-]+)\\s*\\.([\\w+-]+)" "\\s*(#\\w+)?\\s*(:[\\w-]+)?\\s*", node, _5); ____; /* and start over with the self-referring markups (non-class) */ /* 'F {}' i.e. element style */ ___ static const gchar* _6[] =
{ "", "=mark=class", " when-id", " pseudo", 0 }
; xml_pcre_match_add9 (node->text->str, node->off, node->end, "?" "\\s*[*]?\\s*([\\w-]+)" "\\s*(#\\w+)?\\s*(:[\\w-]+)?\\s*", node, _6); ____; /* 'E F {}' i.e. element style when descendent of another */ ___ static const gchar* _7[] =
{ "", " when-inside", "=mark=class", " when-id", " pseudo", 0 }
; xml_pcre_match_add9 (node->text->str, node->off, node->end, "?" "\\s*([\\w-]+)\\s*\\b([\\w-]+)" "\\s*(#\\w+)?\\s*(:[\\w-]+)?\\s*", node, _7); ____; /* 'E > F {}' i.e. element style when child of another */ ___ static const gchar* _8[] =
{ "", " when-childof", "=mark=class", " when-id", " pseudo", 0 }
; xml_pcre_match_add9 (node->text->str, node->off, node->end, "?" "\\s*([\\w-]+)\\s*[>]\\s*([\\w-]+)" "\\s*(#\\w+)?\\s*(:[\\w-]+)?\\s*", node, _8); ____; /* 'E + F {}' i.e. element style when following another */ ___ static const gchar* _9[] =
{ "", " when-after", "=mark=class", " when-id", " pseudo", 0 }
; xml_pcre_match_add9 (node->text->str, node->off, node->end, "?" "\\s*([\\w-]+)\\s*[+]\\s*([\\w-]+)" "\\s*(#\\w+)?\\s*(:[\\w-]+)?\\s*", node, _9); ____; }
void
xml_pcss_tree_scan_selectors (xml_GNode* tree)
{
    if (! tree) return;
    xml_path_node_foreach (tree, "//prop_head", (xml_GNodeForeachFunc)
			    xml_pcss_node_scan_selectors, 0);
}
/* __________________________ - old stuff - ____________________________ */ #if 0
enum
{
    ATRULE_NONE = 1,
    ATRULE_HEAD,
    ATRULE_BODY,
    SELECT_HEAD,
    SELECT_ERROR,
    PROPS_NONE,
    PROPS_NAME,
    PROPS_NAME_AFTER,
    PROPS_VALUE_BEFORE,
    PROPS_VALUE,
    PROPS_ERROR,
}
;
# define OUTSIDE 1 # define ATRULE 2 # define SELECTOR 4 # define PROPERTIES 8 # define RULE (SELECTOR|PROPERTIES)
xml_GNode*
xml_pcss_parse_direct (xml_GNode* tree)
{
    if (! tree) return tree;
    if (! tree->children)
	xml_pcss_preparse (tree);

    xml_path_pcre_text_to_attr (tree, 
      "//+(comment|literal)", "-noscan");

    ___ X = ATRULE_NONE; gsize from = off; gsize ends = off;
    xml_GNode* atrule = 0; xml_GNode* props = 0;  xml_GNode* leader = 0;
    int depth; gsize keyword = 0;

    for (; off < tree->end ; off++) 
    
{
	gchar c = tree->text->str[off];
	switch (X)
	
{
	case ATRULE_BODY:
	case ATRULE_NONE:
	    if (g_ascii_isspace(c)) continue;
	    X = (c == '@') ? ATRULE_HEAD_KEYWORD : SELECT_HEAD;
	    from = off; ends = off+1; continue;
	case ATRULE_HEAD_KEYWORD:
	    if (g_ascii_isalnum (c)) continue;
	    keyword = off;
	    X = ATRULE_HEAD; /* fallthrough */
	case ATRULE_HEAD:
	    if (g_ascii_isspace(c)) continue;
	    if (c == '}') 
{ ends = from = off+1; continue; }
/* ignore */ if (c == ';')
{ 
		xml_tree_adds (tree, from, ends, "atrule_line");
		X = ATRULE_NONE; continue;
	    }
if (c == '{')
{ 
		if (atrule) 
{ X = SELECT_ERROR; continue; }
atrule = xml_tree_add (tree, from, ends, "atrule_head"); if (!memcmp (tree->text->str + from, "@media", keyword - from))
{
		    /* the space-trimmed content (!!) is the media-value */
		    from = keyword;
		    while (from < ends && 
			   g_ascii_isspace(tree->text->str[from])) from++;
		    while (from < ends && 
			   g_ascii_isspace(tree->text->str[ends-1])) ends--;
		    if (from != ends)
			xml_node_attribute_add_value (atrule, "media",
			    g_strndup (tree->text->str + from, ends - from));
		}
else
{
		    /* that @ follows a keyword, take as the media-value */
		    if (from + 1 < keyword)
			xml_node_attribute_add_value (atrule, "media",
			    g_strndup (tree->text->str + from, keyword- from));
		}
X = ATRULE_BODY; continue; }
ends = off+1; continue; case SELECT_HEAD: if (g_ascii_isspace(c)) continue; if (c == '}')
{ 
		if (atrule) /* closing the atrule-group */
		    xml_tree_add (tree, atrule->off, ends, "item_atrule");
		X = ATRULE_NONE; continue;
	    }
if (c == ';')
{ /* ignore */
		X = ATRULE_NONE; continue;
	    }
if (c == '{')
{ 
		leader = xml_tree_add (tree, from, ends, "prop_selector");
		if (atrule)
		
{
		    const gchar* media = 
			xml_node_attribute_lookup (atrule, "media");
		    if (media)
			xml_node_attribute_add_value (leader, "media",
			    g_strndup (tree->text->str +from, keyword -from));
		}
X = PROPS_NONE; continue; }
ends = off+1; continue; case PROPS_NONE: if (g_ascii_isspace(c)) continue; if (g_ascii_isalnum(c) || strchr (c, "-+."))
{ 
		X = PROPS_NAME; from = off; ends = off+1; continue;
	    }
X = PROPS_ERROR; continue; case PROPS_NAME: if (g_ascii_isalnum(c) || strchr (c, "-+."))
{ ends = off+1; continue; }
if (!g_ascii_isspace(c) && c != ':')
{ X = PROPS_ERROR; continue; }
ends = off; X = PROPS_NAME_AFTER; /* fallthrough */ case PROPS_NAME_AFTER: if (g_ascii_isspace (c)) continue; if (c != ':')
{ X = PROPS_ERROR; continue; }
props = xml_tree_add (tree, from, ends, "props_name"); X = PROPS_VALUE_BEFORE; continue; case PROPS_VALUE_BEFORE: if (g_ascii_isspace(c)) continue; from = off; ends = off; depth = 0; X = PROPS_VALUE; /* fallthrough */ case PROPS_VALUE: if (g_ascii_isspace(c)) continue; if (c == '{')
{ depth++; continue; }
if (c == '}')
{ if (depth) 
{ depth--; continue; }
}
else if (c != ';')
{ ends = off+1; continue; }
g_assert (c == '}' || c == ';'); ___ xml_GNode* value = xml_tree_add (tree, from, ends, "props_value"); if (props)
{
		xml_node_attribute_add_value (value, "prop",
		    g_strndup (props->text->str + props->off, 
			       props->end - props->off));
		props = 0;
	    }
/* fallthrough */ case PROPS_ERROR: if (c == ';')
{ X = PROPS_NONE; continue; }
if (c == '}')
{
		if (leader) 
{
		    xml_tree_add (tree, leader->off, off+1, "prop_list");
		    ___ const gchar* media =
			xml_g_attribute_lookup (leader, "media");
		    if (media)
			xml_node_attribute_add_value (leader, "media",
			    g_strndup (tree->text->str +from, keyword -from));
		    ____;
		    leader = 0;
		}
X = ATRULE_NONE; continue; }
case ATRULE_ERROR: if (c == ';' || c == '}')
{ X = ATRULE_NONE; continue; }
continue; }
}
xml_path_pcre_text_to_attr (tree, "//*@-noscan", "-noscan"); }
#endif
/* 
   Local variables:
   c-file-style: "stroustrup"
   End:
 */