gboolean
xml_g_parse_context_parse (xml_GParseContext *context,
const gchar *text,
gssize text_len,
GError **error)
{
const gchar *first_invalid; const gchar* recover = text;
# define RECOVER(STATE) \
if (context->state == STATE_ERROR && context->flags & G_MARKUP_CONTINUE) \
{ context->state = STATE; \
while (recover >= context->iter) \
if (!advance_char(context)) { context->state = STATE_ERROR; break; }\
}
g_return_val_if_fail (context != NULL, FALSE);
g_return_val_if_fail (text != NULL, FALSE);
g_return_val_if_fail (context->state != STATE_ERROR, FALSE);
g_return_val_if_fail (!context->parsing, FALSE);
if (text_len < 0)
text_len = strlen (text);
if (text_len == 0)
return TRUE;
context->parsing = TRUE;
if (context->leftover_char_portion)
{
const gchar *first_char;
if ((*text & 0xc0) != 0x80)
first_char = text;
else
first_char = g_utf8_find_next_char (text, text + text_len);
if (first_char)
{
GString *portion = context->leftover_char_portion;
g_string_append_len (context->leftover_char_portion,
text, first_char - text);
context->parsing = FALSE;
context->leftover_char_portion = NULL;
if (!xml_g_parse_context_parse (context,
portion->str, portion->len,
error))
{
g_assert (context->state == STATE_ERROR);
} |
g_string_free (portion, TRUE);
context->parsing = TRUE;
text_len -= (first_char - text);
text = first_char;
} |
else
{
g_string_append_len (context->leftover_char_portion,
text, text_len);
if (context->leftover_char_portion->len > 7)
{
set_error (context,
error,
G_MARKUP_ERROR_BAD_UTF8,
_("Invalid UTF-8 encoded text"));
} |
goto finished;
} |
} |
context->current_text = text;
context->current_text_len = text_len;
context->iter = context->current_text;
context->start = context->iter;
if (context->current_text_len == 0)
goto finished;
if ((*context->current_text & 0xc0) == 0x80)
{
set_error (context,
error,
G_MARKUP_ERROR_BAD_UTF8,
_("Invalid UTF-8 encoded text"));
goto finished;
} |
find_current_text_end (context);
if (!g_utf8_validate (context->current_text,
context->current_text_len,
&first_invalid))
{
gint newlines = 0;
const gchar *p;
p = context->current_text;
while (p != context->current_text_end)
{
if (*p == '\n')
++newlines;
++p;
} |
context->line_number += newlines;
set_error (context,
error,
G_MARKUP_ERROR_BAD_UTF8,
_("Invalid UTF-8 encoded text"));
goto finished;
} |
while (context->iter != context->current_text_end)
{
recover = context->iter;
switch (context->state)
{
case STATE_START:
g_assert (context->tag_stack == NULL);
skip_spaces (context);
if (context->iter != context->current_text_end)
{
if (*context->iter == '<')
{
advance_char (context);
context->state = STATE_AFTER_OPEN_ANGLE;
context->start = context->iter;
context->document_empty = FALSE;
} |
else
{
set_error (context,
error,
G_MARKUP_ERROR_PARSE,
_("Document must begin with an element (e.g. <book>)"));
} |
} |
break;
case STATE_AFTER_OPEN_ANGLE:
if (strchr ("?!#%$@<'`*.", *context->iter))
{
const gchar *openangle = "<";
add_to_partial (context, openangle, openangle + 1);
context->start = context->iter;
context->balance = 1;
context->state = STATE_INSIDE_PASSTHROUGH;
} |
else if (*context->iter == '/')
{
advance_char (context);
context->state = STATE_AFTER_CLOSE_TAG_SLASH;
} |
else if (is_name_start_char (g_utf8_get_char (context->iter)))
{
context->state = STATE_INSIDE_OPEN_TAG_NAME;
context->start = context->iter;
} |
else
{
gchar buf[7];
set_error (context,
error,
G_MARKUP_ERROR_PARSE,
_("'%s' is not a valid character following "
"a '<' character; it may not begin an "
"element name"),
utf8_str (context->iter, buf));
} |
break;
case STATE_AFTER_CLOSE_ANGLE:
if (context->tag_stack == NULL)
{
context->start = NULL;
context->state = STATE_START;
} |
else
{
context->start = context->iter;
context->state = STATE_INSIDE_TEXT;
} |
break;
case STATE_AFTER_ELISION_SLASH:
{
GError *tmp_error = NULL;
g_assert (context->tag_stack != NULL);
tmp_error = NULL;
if (context->parser->end_element)
(* context->parser->end_element) ((void*) context,
context->tag_stack->data,
context->user_data,
&tmp_error);
if (tmp_error)
{
mark_error (context, tmp_error);
xml_g_propagate_error (error, tmp_error);
} |
else
{
if (*context->iter == '>')
{
advance_char (context);
context->state = STATE_AFTER_CLOSE_ANGLE;
} |
else
{
gchar buf[7];
set_error (context,
error,
G_MARKUP_ERROR_PARSE,
_("Odd character '%s', expected a '>' character "
"to end the start tag of element '%s'"),
utf8_str (context->iter, buf),
current_element (context));
} |
} |
g_free (context->tag_stack->data);
context->tag_stack = g_slist_delete_link (context->tag_stack,
context->tag_stack);
} |
break;
case STATE_INSIDE_OPEN_TAG_NAME:
advance_to_name_end (context);
if (context->iter == context->current_text_end)
{
add_to_partial (context, context->start, context->iter);
} |
else
{
add_to_partial (context, context->start, context->iter);
context->tag_stack =
g_slist_prepend (context->tag_stack,
g_string_free (context->partial_chunk,
FALSE));
context->partial_chunk = NULL;
context->state = STATE_BETWEEN_ATTRIBUTES;
context->start = NULL;
} |
break;
case STATE_INSIDE_ATTRIBUTE_NAME:
advance_to_name_end (context);
if (context->iter == context->current_text_end)
{
add_to_partial (context, context->start, context->iter);
} |
else
{
add_to_partial (context, context->start, context->iter);
add_attribute (context, g_string_free (context->partial_chunk, FALSE));
context->partial_chunk = NULL;
context->start = NULL;
if (*context->iter == '=')
{
advance_char (context);
context->state = STATE_AFTER_ATTRIBUTE_EQUALS_SIGN;
} |
else if (context->flags & G_MARKUP_SIMPLIFIED)
{
context->attr_values[context->cur_attr] = g_strdup("");
context->state = STATE_BETWEEN_ATTRIBUTES;
} |
else
{
gchar buf[7];
set_error (context,
error,
G_MARKUP_ERROR_PARSE,
_("Odd character '%s', expected a '=' after "
"attribute name '%s' of element '%s'"),
utf8_str (context->iter, buf),
current_attribute (context),
current_element (context));
} |
} |
RECOVER(STATE_BETWEEN_ATTRIBUTES);
break;
case STATE_BETWEEN_ATTRIBUTES:
skip_spaces (context);
if (context->iter != context->current_text_end)
{
if (*context->iter == '/')
{
advance_char (context);
context->state = STATE_AFTER_ELISION_SLASH;
} |
else if (*context->iter == '>')
{
advance_char (context);
context->state = STATE_AFTER_CLOSE_ANGLE;
} |
else if (is_name_start_char (g_utf8_get_char (context->iter)))
{
context->state = STATE_INSIDE_ATTRIBUTE_NAME;
context->start = context->iter;
} |
else
{
gchar buf[7];
set_error (context,
error,
G_MARKUP_ERROR_PARSE,
_("Odd character '%s', expected a '>' or '/' "
"character to end the start tag of "
"element '%s', or optionally an attribute; "
"perhaps you used an invalid character in "
"an attribute name"),
utf8_str (context->iter, buf),
current_element (context));
} |
if (context->state == STATE_AFTER_ELISION_SLASH ||
context->state == STATE_AFTER_CLOSE_ANGLE)
{
const gchar *start_name;
const gchar *empty = NULL;
const gchar **attr_names = ∅
const gchar **attr_values = ∅
GError *tmp_error;
start_name = current_element (context);
if (context->cur_attr >= 0)
{
attr_names = (const gchar**)context->attr_names;
attr_values = (const gchar**)context->attr_values;
} |
tmp_error = NULL;
if (context->parser->start_element)
(* context->parser->start_element) ((void*) context,
start_name,
(const gchar **)attr_names,
(const gchar **)attr_values,
context->user_data,
&tmp_error);
for (; context->cur_attr >= 0; context->cur_attr--)
{
int pos = context->cur_attr;
g_free (context->attr_names[pos]);
g_free (context->attr_values[pos]);
context->attr_names[pos] = context->attr_values[pos] = NULL;
} |
g_assert (context->cur_attr == -1);
g_assert (context->attr_names == NULL ||
context->attr_names[0] == NULL);
g_assert (context->attr_values == NULL ||
context->attr_values[0] == NULL);
if (tmp_error != NULL)
{
mark_error (context, tmp_error);
xml_g_propagate_error (error, tmp_error);
} |
} |
} |
RECOVER(STATE_BETWEEN_ATTRIBUTES);
break;
case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
if (*context->iter == '"')
{
advance_char (context);
context->state = STATE_INSIDE_ATTRIBUTE_VALUE_DQ;
context->start = context->iter;
} |
else if (*context->iter == '\'')
{
advance_char (context);
context->state = STATE_INSIDE_ATTRIBUTE_VALUE_SQ;
context->start = context->iter;
} |
else if (context->flags & G_MARKUP_SIMPLIFIED && (
g_ascii_isalnum (*context->iter) ||
strchr ("_#%$@", *context->iter)))
{
advance_char (context);
context->state = STATE_INSIDE_ATTRIBUTE_VALUE_DIRECT;
context->start = context->iter - 1;
} |
else
{
gchar buf[7];
set_error (context,
error,
G_MARKUP_ERROR_PARSE,
_("Odd character '%s', expected an open quote mark "
"after the equals sign when giving value for "
"attribute '%s' of element '%s'"),
utf8_str (context->iter, buf),
current_attribute (context),
current_element (context));
} |
RECOVER(STATE_BETWEEN_ATTRIBUTES);
break;
case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
case STATE_INSIDE_ATTRIBUTE_VALUE_DIRECT:
if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ)
{
do
{
if (*context->iter == '\'')
break;
} |
while (advance_char (context));
} |
else if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_DQ)
{
do
{
if (*context->iter == '"')
break;
} |
while (advance_char (context));
} |
else if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_DIRECT)
{
int dq = 0;
do
{
if (! g_ascii_isgraph (*context->iter))
break;
if (*context->iter == '<' || *context->iter == '>')
break;
if (*context->iter == '/' && !dq)
break;
if (*context->iter == ':' || *context->iter == '.')
dq = 1;
} |
while (advance_char (context));
} |
if (context->iter == context->current_text_end)
{
add_to_partial (context, context->start, context->iter);
} |
else
{
add_to_partial (context, context->start, context->iter);
g_assert (context->cur_attr >= 0);
if (unescape_text (context,
context->partial_chunk->str,
context->partial_chunk->str +
context->partial_chunk->len,
&context->attr_values[context->cur_attr],
error))
{
if (context->state != STATE_INSIDE_ATTRIBUTE_VALUE_DIRECT)
advance_char (context);
context->state = STATE_BETWEEN_ATTRIBUTES;
context->start = NULL;
} |
truncate_partial (context);
} |
break;
case STATE_INSIDE_TEXT:
do
{
if (*context->iter == '<')
break;
} |
while (advance_char (context));
add_to_partial (context, context->start, context->iter);
if (context->iter != context->current_text_end)
{
gchar *unescaped = NULL;
if (unescape_text (context,
context->partial_chunk->str,
context->partial_chunk->str +
context->partial_chunk->len,
&unescaped,
error))
{
GError *tmp_error = NULL;
if (context->parser->text)
(*context->parser->text) ((void*) context,
unescaped,
strlen (unescaped),
context->user_data,
&tmp_error);
g_free (unescaped);
if (tmp_error == NULL)
{
advance_char (context);
context->state = STATE_AFTER_OPEN_ANGLE;
context->start = context->iter;
} |
else
{
mark_error (context, tmp_error);
xml_g_propagate_error (error, tmp_error);
} |
} |
truncate_partial (context);
} |
break;
case STATE_AFTER_CLOSE_TAG_SLASH:
if (is_name_start_char (g_utf8_get_char (context->iter)) ||
(*context->iter == '>' && context->flags & G_MARKUP_SIMPLIFIED))
{
context->state = STATE_INSIDE_CLOSE_TAG_NAME;
context->start = context->iter;
} |
else
{
gchar buf[7];
set_error (context,
error,
G_MARKUP_ERROR_PARSE,
_("'%s' is not a valid character following "
"the characters '</'; '%s' may not begin an "
"element name"),
utf8_str (context->iter, buf),
utf8_str (context->iter, buf));
} |
break;
case STATE_INSIDE_CLOSE_TAG_NAME:
advance_to_name_end (context);
if (context->iter == context->current_text_end)
{
add_to_partial (context, context->start, context->iter);
} |
else
{
gchar *close_name;
add_to_partial (context, context->start, context->iter);
close_name = g_string_free (context->partial_chunk, FALSE);
context->partial_chunk = NULL;
while (*context->iter != '>')
{
if (!g_ascii_isspace (*context->iter)) break;
if (!advance_char (context)) break;
} |
if (*context->iter != '>' && !(context->flags&G_MARKUP_SILENCED))
{
gchar buf[7];
set_error (context,
error,
G_MARKUP_ERROR_PARSE,
_("'%s' is not a valid character after "
"the close element name '%s'; the allowed "
"character is '>'"),
utf8_str (context->iter, buf),
close_name);
} |
while (*context->iter != '>')
{
if ((guchar)*context->iter > '~') break;
if ((guchar)*context->iter < ' ') break;
if ((guchar)*context->iter == '<') break;
if (!advance_char (context)) break;
} |
if (*context->iter != '>')
{
gchar buf[7];
set_error (context,
error,
G_MARKUP_ERROR_PARSE,
_("'%s' is not a valid character inside "
"the close element name '%s'; the allowed "
"character is '>'"),
utf8_str (context->iter, buf),
close_name);
} |
else if (context->tag_stack == NULL)
{
set_error (context,
error,
G_MARKUP_ERROR_PARSE,
_("Element '%s' was closed, no element "
"is currently open"),
close_name);
} |
else if (strcmp (close_name, current_element (context)) != 0 &&
(*close_name || !(context->flags&G_MARKUP_SIMPLIFIED)))
{
set_error (context,
error,
G_MARKUP_ERROR_PARSE,
_("Element '%s' was closed, but the currently "
"open element is '%s'"),
close_name,
current_element (context));
} |
else
{
GError *tmp_error;
advance_char (context);
context->state = STATE_AFTER_CLOSE_ANGLE;
context->start = NULL;
if (!*close_name && context->flags & G_MARKUP_SIMPLIFIED)
{
g_free (close_name);
close_name = g_strdup(current_element(context));
} |
tmp_error = NULL;
if (context->parser->end_element)
(* context->parser->end_element) ((void*) context,
close_name,
context->user_data,
&tmp_error);
g_free (context->tag_stack->data);
context->tag_stack = g_slist_delete_link (context->tag_stack,
context->tag_stack);
if (tmp_error)
{
mark_error (context, tmp_error);
xml_g_propagate_error (error, tmp_error);
} |
} |
g_free (close_name);
} |
break;
case STATE_INSIDE_PASSTHROUGH:
do
{
if (*context->iter == '<')
context->balance++;
if (*context->iter == '>')
{
context->balance--;
add_to_partial (context, context->start, context->iter);
context->start = context->iter;
if (str_has_pairs (context->partial_chunk->str, "<?","?") ||
str_has_pairs (context->partial_chunk->str, "<'","'") ||
str_has_pairs (context->partial_chunk->str, "<`","`") ||
str_has_pairs (context->partial_chunk->str, "<<",">") ||
str_has_pairs (context->partial_chunk->str, "<@","@") ||
str_has_pairs (context->partial_chunk->str, "<%","%") ||
str_has_pairs (context->partial_chunk->str, "<#","#") ||
str_has_pairs (context->partial_chunk->str, "<$","") ||
str_has_pairs (context->partial_chunk->str, "<*","") ||
str_has_pairs (context->partial_chunk->str, "<.","") ||
(str_has_prefix (context->partial_chunk->str, "<!--")
&& str_has_suffix (context->partial_chunk->str, "--")) ||
(str_has_prefix (context->partial_chunk->str, "<![CDATA[")
&& str_has_suffix (context->partial_chunk->str, "]]")) ||
(str_has_prefix (context->partial_chunk->str, "<!")
&& g_ascii_isalnum (context->partial_chunk->str[2])
&& context->balance == 0))
break;
} |
} |
while (advance_char (context));
if (context->iter == context->current_text_end)
{
add_to_partial (context, context->start, context->iter);
} |
else
{
GError *tmp_error = NULL;
advance_char (context);
add_to_partial (context, context->start, context->iter);
if (str_has_prefix (context->partial_chunk->str, "<![CDATA["))
{
if (context->parser->text)
(*context->parser->text) ((void*) context,
context->partial_chunk->str + 9,
context->partial_chunk->len - 12,
context->user_data,
&tmp_error);
} |
else
{
if (context->parser->passthrough)
(*context->parser->passthrough) ((void*) context,
context->partial_chunk->str,
context->partial_chunk->len,
context->user_data,
&tmp_error);
} |
truncate_partial (context);
if (tmp_error == NULL)
{
context->state = STATE_AFTER_CLOSE_ANGLE;
context->start = context->iter;
} |
else
{
mark_error (context, tmp_error);
xml_g_propagate_error (error, tmp_error);
} |
} |
break;
case STATE_ERROR:
RECOVER(STATE_INSIDE_TEXT);
if (context->state == STATE_ERROR)
goto finished;
break;
default:
g_assert_not_reached ();
break;
} |
RECOVER(STATE_INSIDE_TEXT);
} |
finished:
context->parsing = FALSE;
return context->state != STATE_ERROR;
} |
|