From 11520403a58aa3afdc253be168b034d92a4067cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Dr=C3=B6ge?= Date: Fri, 23 Dec 2022 17:25:06 +0200 Subject: [PATCH] typefindfunctions: Make XML typefinder more strict If a XMLDec is found, check also for its end. Similarly, check for the end of the XML tag we're looking for and make sure that the following characters are valid. Fixes https://gitlab.freedesktop.org/gstreamer/gstreamer/-/issues/1536 Part-of: --- .../gst/typefind/gsttypefindfunctions.c | 183 ++++++++++-------- 1 file changed, 100 insertions(+), 83 deletions(-) diff --git a/subprojects/gst-plugins-base/gst/typefind/gsttypefindfunctions.c b/subprojects/gst-plugins-base/gst/typefind/gsttypefindfunctions.c index 5a4df3187d..2e1344fc08 100644 --- a/subprojects/gst-plugins-base/gst/typefind/gsttypefindfunctions.c +++ b/subprojects/gst-plugins-base/gst/typefind/gsttypefindfunctions.c @@ -534,73 +534,115 @@ hls_type_find (GstTypeFind * tf, gpointer unused) /*** application/xml **********************************************************/ -#define XML_BUFFER_SIZE 16 -#define XML_INC_BUFFER { \ - pos++; \ - if (pos == XML_BUFFER_SIZE) { \ - pos = 0; \ - offset += XML_BUFFER_SIZE; \ - data = gst_type_find_peek (tf, offset, XML_BUFFER_SIZE); \ - if (data == NULL) return FALSE; \ - } else { \ - data++; \ - } \ -} - -#define XML_INC_BUFFER_DATA { \ - pos++; \ - if (pos >= length) { \ - return FALSE; \ - } else { \ - data++; \ - } \ -} - static gboolean xml_check_first_element_from_data (const guint8 * data, guint length, const gchar * element, guint elen, gboolean strict) { gboolean got_xmldec; - guint pos = 0; + const guint8 *ptr; g_return_val_if_fail (data != NULL, FALSE); - if (length <= 5) + /* search for an opening tag */ + ptr = memchr (data, '<', length); + if (!ptr) + return FALSE; + + length -= (ptr - data); + data = ptr; + + if (length < 5) return FALSE; /* look for the XMLDec * see XML spec 2.8, Prolog and Document Type Declaration * http://www.w3.org/TR/2004/REC-xml-20040204/#sec-prolog-dtd */ got_xmldec = (memcmp (data, " */ + data += 5; + length -= 5; + ptr = memchr (data, '?', length); + if (!ptr) + return FALSE; + + length -= (ptr - data); + data = ptr; + + got_xmldec = (memcmp (data, "?>", 2) == 0); + if (!got_xmldec) + return FALSE; + + data += 2; + length -= 2; + } if (strict && !got_xmldec) return FALSE; - /* skip XMLDec in any case if we've got one */ if (got_xmldec) { - pos += 5; - data += 5; + /* search for the next opening tag */ + ptr = memchr (data, '<', length); + if (!ptr) + return FALSE; + + length -= (ptr - data); + data = ptr; } - /* look for the first element, it has to be the requested element. Bail - * out if it is not within the first 4kB. */ - while (pos < MIN (4096, length)) { - while (*data != '<' && pos < MIN (4096, length)) { - XML_INC_BUFFER_DATA; - } + /* skip XML comments */ + while (length >= 4 && memcmp (data, ""); + if (!ptr) return FALSE; - return (element && strncmp ((const char *) data, element, elen) == 0); + ptr += 3; + + length -= (ptr - data); + data = ptr; + + /* search for the next opening tag */ + ptr = memchr (data, '<', length); + if (!ptr) + return FALSE; + + length -= (ptr - data); + data = ptr; + } + + if (elen == 0) + return TRUE; + + /* look for the first element, it has to be the requested element. Bail + * out otherwise. */ + if (length < elen + 1) + return FALSE; + + data += 1; + length -= 1; + if (memcmp (data, element, elen) != 0) + return FALSE; + + data += elen; + length -= elen; + + /* check if there's a closing `>` following */ + ptr = memchr (data, '>', length); + if (!ptr) + return FALSE; + + /* between ``, there should only be spaces, alphanum or `:` + * until the first `=` for an attribute value */ + while (data < ptr) { + if (*data == '>' || *data == '=') + return TRUE; + + if (!g_ascii_isprint (*data) && *data != '\n' && *data != '\r') + return FALSE; + + data++; } return FALSE; @@ -610,50 +652,25 @@ static gboolean xml_check_first_element (GstTypeFind * tf, const gchar * element, guint elen, gboolean strict) { - gboolean got_xmldec; const guint8 *data; - guint offset = 0; - guint pos = 0; + guint length; - data = gst_type_find_peek (tf, 0, XML_BUFFER_SIZE); + length = gst_type_find_get_length (tf); + + /* try a default that should be enough */ + if (length == 0) + length = 512; + else if (length < 64) + return FALSE; + else /* the first few bytes should be enough */ + length = MIN (4096, length); + + data = gst_type_find_peek (tf, 0, length); if (!data) return FALSE; - /* look for the XMLDec - * see XML spec 2.8, Prolog and Document Type Declaration - * http://www.w3.org/TR/2004/REC-xml-20040204/#sec-prolog-dtd */ - got_xmldec = (memcmp (data, "