From e86d1dd432d9c5b9068f3dc3bf71f01f27c61cdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Dr=C3=B6ge?= Date: Wed, 15 Oct 2008 11:25:09 +0000 Subject: [PATCH] gst/subparse/gstsubparse.c: Improve typefinding a bit. If we don't have a Unicode charset try GST_SUBTITLE_ENCODING a... Original commit message from CVS: * gst/subparse/gstsubparse.c: (gst_subparse_type_find): Improve typefinding a bit. If we don't have a Unicode charset try GST_SUBTITLE_ENCODING and otherwise try ISO-8859-15. --- ChangeLog | 6 ++++++ gst/subparse/gstsubparse.c | 34 +++++++++++++++++++++++++++++++--- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index c2817d97a4..24c23d02bc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2008-10-15 Sebastian Dröge + + * gst/subparse/gstsubparse.c: (gst_subparse_type_find): + Improve typefinding a bit. If we don't have a Unicode charset + try GST_SUBTITLE_ENCODING and otherwise try ISO-8859-15. + 2008-10-14 Edward Hervey * ext/theora/theoradec.c: (theora_dec_decode_buffer): diff --git a/gst/subparse/gstsubparse.c b/gst/subparse/gstsubparse.c index 8200f1be72..bba1e08fa8 100644 --- a/gst/subparse/gstsubparse.c +++ b/gst/subparse/gstsubparse.c @@ -1408,6 +1408,7 @@ gst_subparse_type_find (GstTypeFind * tf, gpointer private) GstCaps *caps; gchar *str; gchar *encoding = NULL; + const gchar *end; if (!(data = gst_type_find_peek (tf, 0, 129))) return; @@ -1427,12 +1428,39 @@ gst_subparse_type_find (GstTypeFind * tf, gpointer private) err->message); g_error_free (err); g_free (encoding); + } else { + g_free (str); + str = converted_str; + g_free (encoding); + } + } + + /* Check if at least the first 120 chars are valid UTF8, + * otherwise convert as always */ + if (!g_utf8_validate (str, 128, &end) && (end - str) < 120) { + gchar *converted_str; + GError *err = NULL; + gsize tmp; + const gchar *enc; + + enc = g_getenv ("GST_SUBTITLE_ENCODING"); + if (enc == NULL || *enc == '\0') { + /* if local encoding is UTF-8 and no encoding specified + * via the environment variable, assume ISO-8859-15 */ + if (g_get_charset (&enc)) { + enc = "ISO-8859-15"; + } + } + converted_str = gst_convert_to_utf8 (str, 128, enc, &tmp, &err); + if (converted_str == NULL) { + GST_DEBUG ("Charset conversion failed: %s", err->message); + g_error_free (err); g_free (str); return; + } else { + g_free (str); + str = converted_str; } - g_free (str); - - str = converted_str; } format = gst_sub_parse_data_format_autodetect (str);