From ae76f9ab23ef3b9fcfb651ec3461c339b611f814 Mon Sep 17 00:00:00 2001 From: Seungha Yang Date: Fri, 20 Jul 2018 21:33:24 +0900 Subject: [PATCH] ttmlparse: Collect buffers until detecting complete xml document Given buffer could be fragmented and we might need to collect buffers until end tag is detected. And/or, a buffer can consist of multiple ttml documents. Fixes: https://gitlab.freedesktop.org/gstreamer/gst-plugins-bad/issues/494 --- ext/ttml/gstttmlparse.c | 44 +++++++++++++++++++++++++++-------------- ext/ttml/ttmlparse.c | 36 +++++++++++++++++++++++++-------- ext/ttml/ttmlparse.h | 4 ++-- 3 files changed, 59 insertions(+), 25 deletions(-) diff --git a/ext/ttml/gstttmlparse.c b/ext/ttml/gstttmlparse.c index e6f21bee1e..d827a48122 100644 --- a/ext/ttml/gstttmlparse.c +++ b/ext/ttml/gstttmlparse.c @@ -427,11 +427,11 @@ feed_textbuf (GstTtmlParse * self, GstBuffer * buf) input = convert_encoding (self, (const gchar *) data, avail, &consumed); if (input && consumed > 0) { - if (self->textbuf) { - g_string_free (self->textbuf, TRUE); - self->textbuf = NULL; - } - self->textbuf = g_string_new (input); + if (!self->textbuf) + self->textbuf = g_string_new (input); + else + self->textbuf = g_string_append (self->textbuf, input); + gst_adapter_unmap (self->adapter); gst_adapter_flush (self->adapter, consumed); } else { @@ -446,9 +446,11 @@ handle_buffer (GstTtmlParse * self, GstBuffer * buf) { GstFlowReturn ret = GST_FLOW_OK; GstCaps *caps = NULL; - GList *subtitle_list, *subtitle; + GList *subtitle_list = NULL; + GList *iter; GstClockTime begin = GST_BUFFER_PTS (buf); GstClockTime duration = GST_BUFFER_DURATION (buf); + guint consumed; if (self->first_buffer) { GstMapInfo map; @@ -474,19 +476,31 @@ handle_buffer (GstTtmlParse * self, GstBuffer * buf) self->need_segment = FALSE; } - subtitle_list = ttml_parse (self->textbuf->str, begin, duration); + do { + consumed = ttml_parse (self->textbuf->str, begin, duration, &subtitle_list); - for (subtitle = subtitle_list; subtitle; subtitle = subtitle->next) { - GstBuffer *op_buffer = subtitle->data; - self->segment.position = GST_BUFFER_PTS (op_buffer); + if (!consumed) { + GST_DEBUG_OBJECT (self, "need more data"); + return ret; + } - ret = gst_pad_push (self->srcpad, op_buffer); + self->textbuf = g_string_erase (self->textbuf, 0, consumed); - if (ret != GST_FLOW_OK) - GST_DEBUG_OBJECT (self, "flow: %s", gst_flow_get_name (ret)); - } + for (iter = subtitle_list; iter; iter = g_list_next (iter)) { + GstBuffer *op_buffer = GST_BUFFER (iter->data); + self->segment.position = GST_BUFFER_PTS (op_buffer); + + ret = gst_pad_push (self->srcpad, op_buffer); + + if (ret != GST_FLOW_OK) { + GST_DEBUG_OBJECT (self, "flow: %s", gst_flow_get_name (ret)); + break; + } + } + + g_list_free (subtitle_list); + } while (TRUE); - g_list_free (subtitle_list); return ret; } diff --git a/ext/ttml/ttmlparse.c b/ext/ttml/ttmlparse.c index 102ac26dec..25895463af 100644 --- a/ext/ttml/ttmlparse.c +++ b/ext/ttml/ttmlparse.c @@ -1926,9 +1926,11 @@ ttml_find_child (xmlNodePtr parent, const gchar * name) return child; } +#define TTML_END_TAG "" -GList * -ttml_parse (const gchar * input, GstClockTime begin, GstClockTime duration) +guint +ttml_parse (const gchar * input, GstClockTime begin, GstClockTime duration, + GList ** parsed) { xmlDocPtr doc; xmlNodePtr root_node, head_node, body_node; @@ -1938,30 +1940,45 @@ ttml_parse (const gchar * input, GstClockTime begin, GstClockTime duration) gchar *value; guint cellres_x, cellres_y; TtmlWhitespaceMode doc_whitespace_mode = TTML_WHITESPACE_MODE_DEFAULT; + guint consumed = 0; + gchar *end_tt; + g_return_val_if_fail (parsed != NULL, 0); + + *parsed = NULL; if (!g_utf8_validate (input, -1, NULL)) { GST_CAT_ERROR (ttmlparse_debug, "Input isn't valid UTF-8."); - return NULL; + return 0; } GST_CAT_LOG (ttmlparse_debug, "Input:\n%s", input); + end_tt = g_strrstr (input, TTML_END_TAG); + + if (!end_tt) { + GST_CAT_DEBUG (ttmlparse_debug, "Need more data"); + return 0; + } + + consumed = end_tt - input + strlen (TTML_END_TAG); + styles_table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, (GDestroyNotify) ttml_delete_element); regions_table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, (GDestroyNotify) ttml_delete_element); /* Parse input. */ - doc = xmlReadMemory (input, strlen (input), "any_doc_name", NULL, 0); + doc = xmlReadMemory (input, consumed, "any_doc_name", NULL, 0); if (!doc) { GST_CAT_ERROR (ttmlparse_debug, "Failed to parse document."); - return NULL; + return 0; } + root_node = xmlDocGetRootElement (doc); if (xmlStrcmp (root_node->name, (const xmlChar *) "tt") != 0) { GST_CAT_ERROR (ttmlparse_debug, "Root element of document is not tt:tt."); xmlFreeDoc (doc); - return NULL; + return 0; } if ((value = ttml_get_xml_property (root_node, "cellResolution"))) { @@ -1988,8 +2005,9 @@ ttml_parse (const gchar * input, GstClockTime begin, GstClockTime duration) if (!(head_node = ttml_find_child (root_node, "head"))) { GST_CAT_ERROR (ttmlparse_debug, "No element found."); xmlFreeDoc (doc); - return NULL; + return 0; } + ttml_parse_head (head_node, styles_table, regions_table); if ((body_node = ttml_find_child (root_node, "body"))) { @@ -2030,5 +2048,7 @@ ttml_parse (const gchar * input, GstClockTime begin, GstClockTime duration) g_hash_table_destroy (styles_table); g_hash_table_destroy (regions_table); - return output_buffers; + *parsed = output_buffers; + + return consumed; } diff --git a/ext/ttml/ttmlparse.h b/ext/ttml/ttmlparse.h index b5f21bf66b..0e26b9bcca 100644 --- a/ext/ttml/ttmlparse.h +++ b/ext/ttml/ttmlparse.h @@ -27,8 +27,8 @@ G_BEGIN_DECLS -GList *ttml_parse (const gchar * file, GstClockTime begin, - GstClockTime duration); +guint ttml_parse (const gchar * file, GstClockTime begin, + GstClockTime duration, GList **parsed); G_END_DECLS #endif /* _TTML_PARSE_H_ */