From cc156bc97c5877b22cd3a5f4ebf89787239d4558 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim-Philipp=20M=C3=BCller?= Date: Thu, 26 Oct 2006 10:49:00 +0000 Subject: [PATCH] gst-libs/gst/riff/riff-read.c: If strings in INFO chunk are not UTF-8, do something similar to what we do for ID3v1 t... Original commit message from CVS: * gst-libs/gst/riff/riff-read.c: (freeform_string_to_utf8), (gst_riff_parse_info): If strings in INFO chunk are not UTF-8, do something similar to what we do for ID3v1 tags: check a number of environment variables (GST_AVI_TAG_ENCODING, GST_RIFF_TAG_ENCODING, GST_TAG_ENCODING) for character sets to try, otherwise try the current locale and/or fall back on ISO-8859-1. Fixes #360552. --- ChangeLog | 10 ++++ gst-libs/gst/riff/riff-read.c | 98 ++++++++++++++++++++++++++++++----- 2 files changed, 95 insertions(+), 13 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3f05de7a1a..4917f61f1f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +2006-10-26 Tim-Philipp Müller + + * gst-libs/gst/riff/riff-read.c: (freeform_string_to_utf8), + (gst_riff_parse_info): + If strings in INFO chunk are not UTF-8, do something similar to + what we do for ID3v1 tags: check a number of environment variables + (GST_AVI_TAG_ENCODING, GST_RIFF_TAG_ENCODING, GST_TAG_ENCODING) for + character sets to try, otherwise try the current locale and/or fall + back on ISO-8859-1. Fixes #360552. + 2006-10-23 Tim-Philipp Müller * gst/videotestsrc/gstvideotestsrc.c: diff --git a/gst-libs/gst/riff/riff-read.c b/gst-libs/gst/riff/riff-read.c index 143415b385..67312deb0f 100644 --- a/gst-libs/gst/riff/riff-read.c +++ b/gst-libs/gst/riff/riff-read.c @@ -577,6 +577,78 @@ too_small: } } +static gchar * +freeform_string_to_utf8 (const gchar * data, gint size, const gchar ** env_vars) +{ + const gchar *env = NULL; + gsize bytes_read; + gchar *utf8 = NULL; + + g_return_val_if_fail (data != NULL, NULL); + + if (size < 0) + size = strlen (data); + + /* Should we try the charsets specified + * via environment variables FIRST ? */ + if (g_utf8_validate (data, size, NULL)) + return g_strndup (data, size); + + while ((env == NULL || *env == '\0') && env_vars && *env_vars != NULL) { + env = g_getenv (*env_vars); + ++env_vars; + } + + /* Try charsets specified via the environment */ + if (env != NULL && *env != '\0') { + gchar **c, **csets; + + csets = g_strsplit (env, G_SEARCHPATH_SEPARATOR_S, -1); + + for (c = csets; c && *c; ++c) { + if ((utf8 = g_convert (data, size, "UTF-8", *c, &bytes_read, NULL, NULL))) { + if (bytes_read == size) { + g_strfreev (csets); + goto beach; + } + g_free (utf8); + utf8 = NULL; + } + } + + g_strfreev (csets); + } + + /* Try current locale (if not UTF-8) */ + if (!g_get_charset (&env)) { + if ((utf8 = g_locale_to_utf8 (data, size, &bytes_read, NULL, NULL))) { + if (bytes_read == size) { + goto beach; + } + g_free (utf8); + utf8 = NULL; + } + } + + /* Try ISO-8859-1 */ + utf8 = g_convert (data, size, "UTF-8", "ISO-8859-1", &bytes_read, NULL, NULL); + if (utf8 != NULL && bytes_read == size) { + goto beach; + } + + g_free (utf8); + return NULL; + +beach: + + g_strchomp (utf8); + if (utf8 && utf8[0] != '\0') + return utf8; + + g_free (utf8); + return NULL; +} + /** * gst_riff_parse_info: * @element: caller element (used for debugging/error). @@ -595,9 +667,7 @@ gst_riff_parse_info (GstElement * element, guint size, tsize; guint32 tag; const gchar *type; - gchar *name; GstTagList *taglist; - gboolean have_tags = FALSE; g_return_if_fail (_taglist != NULL); g_return_if_fail (buf != NULL); @@ -700,17 +770,19 @@ gst_riff_parse_info (GstElement * element, break; } - if (type) { - if (data[0] != '\0') { - /* read, NULL-terminate */ - name = g_new (gchar, tsize + 1); - name[tsize] = '\0'; - memcpy (name, data, tsize); + if (type != NULL && data[0] != '\0') { + static const gchar *env_vars[] = { "GST_AVI_TAG_ENCODING", + "GST_RIFF_TAG_ENCODING", "GST_TAG_ENCODING", NULL + }; + gchar *val; - /* add to list */ - have_tags = TRUE; - gst_tag_list_add (taglist, GST_TAG_MERGE_APPEND, type, name, NULL); - g_free (name); + val = freeform_string_to_utf8 ((gchar *) data, tsize, env_vars); + + if (val) { + gst_tag_list_add (taglist, GST_TAG_MERGE_APPEND, type, val, NULL); + g_free (val); + } else { + GST_WARNING_OBJECT (element, "could not extract %s tag", type); } } @@ -718,7 +790,7 @@ gst_riff_parse_info (GstElement * element, size -= tsize; } - if (have_tags) { + if (!gst_tag_list_is_empty (taglist)) { *_taglist = taglist; } else { *_taglist = NULL;