Logo Search packages:      
Sourcecode: pan version File versions  Download package

gmime-param.c

/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/*
 *  Authors: Jeffrey Stedfast <fejj@helixcode.com>
 *
 *  Copyright 2000 Helix Code, Inc. (www.helixcode.com)
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
 *
 */


#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <string.h>
#include <ctype.h>
#include <errno.h>

#include "gmime-param.h"
#include "gmime-table-private.h"
#include "gmime-charset.h"
#include "gmime-utils.h"
#include "gmime-iconv.h"
#include "gmime-iconv-utils.h"

#include <glib/gmem.h>
#include <glib/gmessages.h>
#include <glib/gstrfuncs.h>
#include <glib/galloca.h>

#define d(x) x
#define w(x)


static unsigned char tohex[16] = {
      '0', '1', '2', '3', '4', '5', '6', '7',
      '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
};


/**
 * g_mime_param_new: Create a new MIME Param object
 * @name: parameter name
 * @value: parameter value
 *
 * Creates a new GMimeParam node with name @name and value @value.
 *
 * Returns a new paramter structure.
 **/
GMimeParam *
g_mime_param_new (const char *name, const char *value)
{
      GMimeParam *param;
      
      param = g_new (GMimeParam, 1);
      
      param->next = NULL;
      param->name = g_strdup (name);
      param->value = g_strdup (value);
      
      return param;
}


#define HEXVAL(c) (isdigit (c) ? (c) - '0' : tolower (c) - 'a' + 10)

static size_t
hex_decode (const unsigned char *in, size_t len, unsigned char *out)
{
      register const unsigned char *inptr;
      register unsigned char *outptr;
      const unsigned char *inend;
      
      inptr = in;
      inend = in + len;
      
      outptr = out;
      
      while (inptr < inend) {
            if (*inptr == '%') {
                  if (isxdigit (inptr[1]) && isxdigit (inptr[2])) {
                        *outptr++ = HEXVAL (inptr[1]) * 16 + HEXVAL (inptr[2]);
                        inptr += 3;
                  } else
                        *outptr++ = *inptr++;
            } else
                  *outptr++ = *inptr++;
      }
      
      *outptr = '\0';
      
      return outptr - out;
}

/* an rfc2184 encoded string looks something like:
 * us-ascii'en'This%20is%20even%20more%20
 */
static char *
rfc2184_decode (const char *in, size_t len)
{
      const char *inptr = in;
      const char *inend = in + len;
      const char *charset = NULL;
      char *decoded = NULL;
      char *charenc;
      
      /* skips to the end of the charset / beginning of the locale */
      inptr = memchr (inptr, '\'', len);
      if (!inptr)
            return NULL;
      
      /* save the charset */
      len = inptr - in;
      charenc = g_alloca (len + 1);
      memcpy (charenc, in, len);
      charenc[len] = '\0';
      charset = charenc;
      
      /* skip to the end of the locale */
      inptr = memchr (inptr + 1, '\'', (unsigned int) (inend - inptr - 1));
      if (!inptr)
            return NULL;
      
      inptr++;
      if (inptr < inend) {
            len = inend - inptr;
            if (g_strcasecmp (charset, "UTF-8") != 0) {
                  char *udecoded;
                  iconv_t cd;
                  
                  decoded = g_alloca (len + 1);
                  len = hex_decode (inptr, len, decoded);
                  
                  cd = g_mime_iconv_open ("UTF-8", charset);
                  if (cd == (iconv_t) -1) {
                        d(g_warning ("Cannot convert from %s to UTF-8, param display may "
                                   "be corrupt: %s", charset, g_strerror (errno)));
                        charset = g_mime_charset_locale_name ();
                        cd = g_mime_iconv_open ("UTF-8", charset);
                        if (cd == (iconv_t) -1)
                              return NULL;
                  }
                  
                  udecoded = g_mime_iconv_strndup (cd, decoded, len);
                  g_mime_iconv_close (cd);
                  
                  if (!udecoded) {
                        d(g_warning ("Failed to convert \"%.*s\" to UTF-8, display may be "
                                   "corrupt: %s", (int)len, decoded, g_strerror (errno)));
                  }
                  
                  decoded = udecoded;
            } else {
                  decoded = g_malloc (len + 1);
                  hex_decode (inptr, len, decoded);
            }
      }
      
      return decoded;
}

static void
decode_lwsp (const char **in)
{
      const char *inptr = *in;
      
      while (*inptr && (*inptr == '(' || is_lwsp (*inptr))) {
            while (*inptr && is_lwsp (*inptr))
                  inptr++;
            
            /* skip over any comments */
            if (*inptr == '(') {
                  int depth = 1;
                  
                  inptr++;
                  while (*inptr && depth) {
                        if (*inptr == '\\' && *(inptr + 1))
                              inptr++;
                        else if (*inptr == '(')
                              depth++;
                        else if (*inptr == ')')
                              depth--;
                        
                        inptr++;
                  }
            }
      }
      
      *in = inptr;
}

static int
decode_int (const char **in)
{
      const char *inptr = *in;
      int n = 0;
      
      decode_lwsp (&inptr);
      
      while (isdigit ((int) *inptr)) {
            n = n * 10 + (*inptr - '0');
            inptr++;
      }
      
      *in = inptr;
      
      return n;
}

static char *
decode_quoted_string (const char **in)
{
      const char *start, *inptr = *in;
      char *out = NULL;
      
      decode_lwsp (&inptr);
      if (*inptr == '"') {
            start = inptr++;
            
            while (*inptr && *inptr != '"') {
                  if (*inptr++ == '\\')
                        inptr++;
            }
            
            if (*inptr == '"') {
                  start++;
                  out = g_strndup (start, (unsigned int) (inptr - start));
                  inptr++;
            } else {
                  /* string wasn't properly quoted */
                  out = g_strndup (start, (unsigned int) (inptr - start));
            }
      }
      
      *in = inptr;
      
      return out;
}

static char *
decode_token (const char **in)
{
      const char *inptr = *in;
      const char *start;
      
      decode_lwsp (&inptr);
      
      start = inptr;
      while (is_ttoken (*inptr))
            inptr++;
      if (inptr > start) {
            *in = inptr;
            return g_strndup (start, (unsigned int) (inptr - start));
      } else {
            return NULL;
      }
}

static char *
decode_value (const char **in)
{
      const char *inptr = *in;
      
      decode_lwsp (&inptr);
      
      if (*inptr == '"') {
            return decode_quoted_string (in);
      } else if (is_ttoken (*inptr)) {
            return decode_token (in);
      }
      
      return NULL;
}

/* This function is basically the same as decode_token()
 * except that it will not accept *'s which have a special
 * meaning for rfc2184 params */
static char *
decode_param_token (const char **in)
{
      const char *inptr = *in;
      const char *start;
      
      decode_lwsp (&inptr);
      
      start = inptr;
      while (is_ttoken (*inptr) && *inptr != '*')
            inptr++;
      if (inptr > start) {
            *in = inptr;
            return g_strndup (start, (unsigned int) (inptr - start));
      } else {
            return NULL;
      }
}

static gboolean
decode_rfc2184_param (const char **in, char **paramp, int *part, gboolean *value_is_encoded)
{
      gboolean is_rfc2184 = FALSE;
      const char *inptr = *in;
      char *param;
      
      *value_is_encoded = FALSE;
      *part = -1;
      
      param = decode_param_token (&inptr);
      
      decode_lwsp (&inptr);
      
      if (*inptr == '*') {
            is_rfc2184 = TRUE;
            inptr++;
            
            decode_lwsp (&inptr);
            if (*inptr == '=') {
                  /* form := param*=value */
                  if (value_is_encoded)
                        *value_is_encoded = TRUE;
            } else {
                  /* form := param*#=value or param*#*=value */
                  *part = decode_int (&inptr);
                  
                  decode_lwsp (&inptr);
                  if (*inptr == '*') {
                        /* form := param*#*=value */
                        if (value_is_encoded)
                              *value_is_encoded = TRUE;
                        inptr++;
                        decode_lwsp (&inptr);
                  }
            }
      }
      
      if (paramp)
            *paramp = param;
      
      if (param)
            *in = inptr;
      
      return is_rfc2184;
}

static int
decode_param (const char **in, char **paramp, char **valuep, gboolean *is_rfc2184_param)
{
      gboolean is_rfc2184_encoded = FALSE;
      gboolean is_rfc2184 = FALSE;
      const char *inptr = *in;
      char *param, *value = NULL;
      int rfc2184_part = -1;
      
      *is_rfc2184_param = FALSE;
      
      is_rfc2184 = decode_rfc2184_param (&inptr, &param, &rfc2184_part,
                                 &is_rfc2184_encoded);
      
      if (*inptr == '=') {
            inptr++;
            value = decode_value (&inptr);
            
            if (is_rfc2184) {
                  /* We have ourselves an rfc2184 parameter */
                  if (rfc2184_part == -1) {
                        /* rfc2184 allows the value to be broken into
                         * multiple parts - this isn't one of them so
                         * it is safe to decode it.
                         */
                        char *val;
                        
                        val = rfc2184_decode (value, strlen (value));
                        if (val) {
                              g_free (value);
                              value = val;
                        }
                  } else {
                        /* Since we are expecting to find the rest of
                         * this paramter value later, let our caller know.
                         */
                        *is_rfc2184_param = TRUE;
                  }
            } else if (value && !strncmp (value, "=?", 2)) {
                  /* We have a broken param value that is rfc2047 encoded.
                   * Since both Outlook and Netscape/Mozilla do this, we
                   * should handle this case.
                   */
                  char *val;
                  
                  val = g_mime_utils_8bit_header_decode (value);
                  if (val) {
                        g_free (value);
                        value = val;
                  }
            }
      }
      
      if (value && !g_utf8_validate (value, -1, NULL)) {
            /* A (broken) mailer has sent us an unencoded 8bit value.
             * Attempt to save it by assuming it's in the user's
             * locale and converting to UTF-8 */
            char *buf;
            
            buf = g_mime_iconv_locale_to_utf8 (value);
            if (buf) {
                  g_free (value);
                  value = buf;
            } else {
                  d(g_warning ("Failed to convert %s param value (\"%s\") to UTF-8: %s",
                             param, value, g_strerror (errno)));
            }
      }
      
      if (param && value) {
            *paramp = param;
            *valuep = value;
            *in = inptr;
            return 0;
      } else {
            g_free (param);
            g_free (value);
            return 1;
      }
}

static GMimeParam *
decode_param_list (const char **in)
{
      const char *inptr = *in;
      GMimeParam *head = NULL, *tail = NULL;
      gboolean last_was_rfc2184 = FALSE;
      gboolean is_rfc2184 = FALSE;
      
      decode_lwsp (&inptr);
      
      do {
            GMimeParam *param = NULL;
            char *name, *value;
            
            /* invalid format? */
            if (decode_param (&inptr, &name, &value, &is_rfc2184) != 0) {
                  if (*inptr == ';') {
                        continue;
                  }
                  break;
            }
            
            if (is_rfc2184 && tail && !g_strcasecmp (name, tail->name)) {
                  /* rfc2184 allows a parameter to be broken into multiple parts
                   * and it looks like we've found one. Append this value to the
                   * last value.
                   */
                  GString *gvalue;
                  
                  gvalue = g_string_new (tail->value);
                  g_string_append (gvalue, value);
                  g_free (tail->value);
                  g_free (value);
                  g_free (name);
                  
                  tail->value = gvalue->str;
                  g_string_free (gvalue, FALSE);
            } else {
                  if (last_was_rfc2184) {
                        /* We've finished gathering the values for the last param
                         * so it is now safe to decode it.
                         */
                        char *val;
                        
                        val = rfc2184_decode (tail->value, strlen (tail->value));
                        if (val) {
                              g_free (tail->value);
                              tail->value = val;
                        }
                  }
                  
                  param = g_new (GMimeParam, 1);
                  param->next = NULL;
                  param->name = name;
                  param->value = value;
                  
                  if (head == NULL)
                        head = param;
                  if (tail)
                        tail->next = param;
                  tail = param;
            }
            
            last_was_rfc2184 = is_rfc2184;
            
            decode_lwsp (&inptr);
      } while (*inptr++ == ';');
      
      if (last_was_rfc2184) {
            /* We've finished gathering the values for the last param
             * so it is now safe to decode it.
             */
            char *val;
            
            val = rfc2184_decode (tail->value, strlen (tail->value));
            if (val) {
                  g_free (tail->value);
                  tail->value = val;
            }
      }
      
      *in = inptr;
      
      return head;
}


/**
 * g_mime_param_new_from_string: Create a new MIME Param object
 * @string: input string
 *
 * Creates a parameter list based on the input string.
 *
 * Returns a GMimeParam structure based on @string.
 **/
GMimeParam *
g_mime_param_new_from_string (const char *string)
{
      g_return_val_if_fail (string != NULL, NULL);
      
      return decode_param_list (&string);
}


/**
 * g_mime_param_destroy: Destroy the MIME Param
 * @param: Mime param list to destroy
 *
 * Releases all memory used by this mime param back to the Operating
 * System.
 **/
void
g_mime_param_destroy (GMimeParam *param)
{
      GMimeParam *next;
      
      while (param) {
            next = param->next;
            g_free (param->name);
            g_free (param->value);
            g_free (param);
            param = next;
      }
}


/**
 * g_mime_param_append:
 * @params: param list
 * @name: new param name
 * @value: new param value
 *
 * Appends a new parameter with name @name and value @value to the
 * parameter list @params.
 *
 * Returns a param list with the new param of name @name and value
 * @value appended to the list of params @params.
 **/
GMimeParam *
g_mime_param_append (GMimeParam *params, const char *name, const char *value)
{
      GMimeParam *param, *p;
      
      g_return_val_if_fail (name != NULL, params);
      g_return_val_if_fail (value != NULL, params);
      
      param = g_mime_param_new (name, value);
      if (params) {
            p = params;
            while (p->next)
                  p = p->next;
            p->next = param;
      } else
            params = param;
      
      return params;
}


/**
 * g_mime_param_append_param:
 * @params: param list
 * @param: param to append
 *
 * Appends @param to the param list @params.
 *
 * Returns a param list with the new param @param appended to the list
 * of params @params.
 **/
GMimeParam *
g_mime_param_append_param (GMimeParam *params, GMimeParam *param)
{
      GMimeParam *p;
      
      g_return_val_if_fail (param != NULL, params);
      
      if (params) {
            p = params;
            while (p->next)
                  p = p->next;
            p->next = param;
      } else
            params = param;
      
      return params;
}

/* FIXME: I wrote this in a quick & dirty fasion - it may not be 100% correct */
static char *
encode_param (const unsigned char *in, gboolean *encoded)
{
      register const unsigned char *inptr;
      unsigned char *outbuf = NULL;
      iconv_t cd = (iconv_t) -1;
      const char *charset = NULL;
      char *outstr;
      GString *out;
      
      *encoded = FALSE;
      
      for (inptr = in; *inptr && inptr - in < GMIME_FOLD_LEN; inptr++)
            if (*inptr > 127)
                  break;
      
      if (*inptr == '\0')
            return g_strdup (in);
      
      if (*inptr > 127)
            charset = g_mime_charset_best (in, strlen (in));
      
      if (!charset)
            charset = "iso-8859-1";
      
      if (g_strcasecmp (charset, "UTF-8") != 0)
            cd = g_mime_iconv_open (charset, "UTF-8");
      
      if (cd != (iconv_t) -1) {
            outbuf = g_mime_iconv_strdup (cd, in);
            g_mime_iconv_close (cd);
            inptr = outbuf;
      } else {
            charset = "UTF-8";
            inptr = in;
      }
      
      /* FIXME: set the 'language' as well, assuming we can get that info...? */
      out = g_string_new ("");
      g_string_sprintfa (out, "%s''", charset);
      
      while (inptr && *inptr) {
            unsigned char c = *inptr++;
            
            /* FIXME: make sure that '\'', '*', and ';' are also encoded */
            
            if (c > 127) {
                  g_string_sprintfa (out, "%%%c%c", tohex[(c >> 4) & 0xf], tohex[c & 0xf]);
            } else if (is_lwsp (c) || !(gmime_special_table[c] & IS_ESAFE)) {
                  g_string_sprintfa (out, "%%%c%c", tohex[(c >> 4) & 0xf], tohex[c & 0xf]);
            } else {
                  g_string_append_c (out, c);
            }
      }
      
      g_free (outbuf);
      
      outstr = out->str;
      g_string_free (out, FALSE);
      *encoded = TRUE;
      
      return outstr;
}

static void
g_string_append_len_quoted (GString *out, const char *in, size_t len)
{
      register const char *inptr;
      const char *inend;
      
      g_string_append_c (out, '"');
      
      inptr = in;
      inend = in + len;
      
      while (inptr < inend) {
            if ((*inptr == '"') || *inptr == '\\')
                  g_string_append_c (out, '\\');
            
            g_string_append_c (out, *inptr);
            
            inptr++;
      }
      
      g_string_append_c (out, '"');
}

static void
param_list_format (GString *out, GMimeParam *param, gboolean fold)
{
      int used = out->len;
      
      while (param) {
            gboolean encoded = FALSE;
            gboolean quote = FALSE;
            unsigned nlen, vlen;
            int here = out->len;
            char *value;
            
            if (!param->value) {
                  param = param->next;
                  continue;
            }
            
            value = encode_param (param->value, &encoded);
            if (!value) {
                  w(g_warning ("appending parameter %s=%s violates rfc2184",
                             param->name, param->value));
                  value = g_strdup (param->value);
            }
            
            if (!encoded) {
                  char *ch;
                  
                  for (ch = value; *ch; ch++) {
                        if (is_tspecial (*ch) || is_lwsp (*ch))
                              break;
                  }
                  
                  quote = ch && *ch;
            }
            
            nlen = strlen (param->name);
            vlen = strlen (value);
            
            if (used + nlen + vlen > GMIME_FOLD_LEN - 8) {
                  if (fold)
                        g_string_append (out, ";\n\t");
                  else
                        g_string_append (out, "; ");
                  
                  here = out->len;
                  used = 0;
            } else
                  out = g_string_append (out, "; ");
            
            if (nlen + vlen > GMIME_FOLD_LEN - 10) {
                  /* we need to do special rfc2184 parameter wrapping */
                  int maxlen = GMIME_FOLD_LEN - (nlen + 10);
                  char *inptr, *inend;
                  int i = 0;
                  
                  inptr = value;
                  inend = value + vlen;
                  
                  while (inptr < inend) {
                        char *ptr = inptr + MIN (inend - inptr, maxlen);
                        
                        if (encoded && ptr < inend) {
                              /* be careful not to break an encoded char (ie %20) */
                              char *q = ptr;
                              int j = 2;
                              
                              for ( ; j > 0 && q > inptr && *q != '%'; j--, q--);
                              if (*q == '%')
                                    ptr = q;
                        }
                        
                        if (i != 0) {
                              if (fold)
                                    g_string_append (out, ";\n\t");
                              else
                                    g_string_append (out, "; ");
                              
                              here = out->len;
                              used = 0;
                        }
                        
                        g_string_sprintfa (out, "%s*%d%s=", param->name, i++, encoded ? "*" : "");
                        
                        if (encoded || !quote)
                              g_string_append_len (out, inptr, ptr - inptr);
                        else
                              g_string_append_len_quoted (out, inptr, ptr - inptr);
                        
                        used += (out->len - here);
                        
                        inptr = ptr;
                  }
            } else {
                  g_string_sprintfa (out, "%s%s=", param->name, encoded ? "*" : "");
                  
                  if (encoded || !quote)
                        g_string_append_len (out, value, vlen);
                  else
                        g_string_append_len_quoted (out, value, vlen);
                  
                  used += (out->len - here);
            }
            
            g_free (value);
            
            param = param->next;
      }
}


/**
 * g_mime_param_write_to_string:
 * @param: MIME Param list
 * @fold: specifies whether or not to fold headers
 * @string: output string
 *
 * Assumes the output string contains only the Content-* header and
 * it's immediate value.
 *
 * Writes the params out to the string @string.
 **/
void
g_mime_param_write_to_string (GMimeParam *param, gboolean fold, GString *string)
{
      g_return_if_fail (string != NULL);
      
      param_list_format (string, param, fold);
}

Generated by  Doxygen 1.6.0   Back to index