Logo Search packages:      
Sourcecode: pan version File versions  Download package

gmime-utils.c

/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/*
 *  Authors: Michael Zucchi <notzed@helixcode.com>
 *           Jeffrey Stedfast <fejj@helixcode.com>
 *
 *  Copyright 2000 Helix Code, Inc. (www.helixcode.com)
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
 *
 */


#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>

#include "gmime-utils.h"
#include "gmime-table-private.h"
#include "gmime-part.h"
#include "gmime-charset.h"
#include "gmime-iconv.h"
#include "gmime-iconv-utils.h"

#define d(x)
#define w(x) x

#ifndef HAVE_ISBLANK
#define isblank(c) (c == ' ' || c == '\t')
#endif

#define GMIME_UUENCODE_CHAR(c) ((c) ? (c) + ' ' : '`')
#define     GMIME_UUDECODE_CHAR(c) (((c) - ' ') & 077)

#define GMIME_FOLD_PREENCODED  (GMIME_FOLD_LEN / 2)

/* date parser macros */
#define NUMERIC_CHARS          "1234567890"
#define WEEKDAY_CHARS          "SundayMondayTuesdayWednesdayThursdayFridaySaturday"
#define MONTH_CHARS            "JanuaryFebruaryMarchAprilMayJuneJulyAugustSeptemberOctoberNovemberDecember"
#define TIMEZONE_ALPHA_CHARS   "UTCGMTESTEDTCSTCDTMSTPSTPDTZAMNY()"
#define TIMEZONE_NUMERIC_CHARS "-+1234567890"
#define TIME_CHARS             "1234567890:"

#define DATE_TOKEN_NON_NUMERIC          (1 << 0)
#define DATE_TOKEN_NON_WEEKDAY          (1 << 1)
#define DATE_TOKEN_NON_MONTH            (1 << 2)
#define DATE_TOKEN_NON_TIME             (1 << 3)
#define DATE_TOKEN_HAS_COLON            (1 << 4)
#define DATE_TOKEN_NON_TIMEZONE_ALPHA   (1 << 5)
#define DATE_TOKEN_NON_TIMEZONE_NUMERIC (1 << 6)
#define DATE_TOKEN_HAS_SIGN             (1 << 7)


static char *base64_alphabet =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

static unsigned char tohex[16] = {
      '0', '1', '2', '3', '4', '5', '6', '7',
      '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
};

static unsigned char gmime_base64_rank[256] = {
      255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
      255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
      255,255,255,255,255,255,255,255,255,255,255, 62,255,255,255, 63,
       52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255,  0,255,255,
      255,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
       15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255,255,
      255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
       41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,255,255,255,255,255,
      255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
      255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
      255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
      255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
      255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
      255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
      255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
      255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
};

static unsigned char gmime_uu_rank[256] = {
       32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
       48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
        0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
       16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
       32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
       48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
        0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
       16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
       32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
       48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
        0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
       16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
       32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
       48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
        0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
       16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
};

static unsigned char gmime_datetok_table[256] = {
      128,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
      111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
      111,111,111,111,111,111,111,111, 79, 79,111,175,111,175,111,111,
       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,119,111,111,111,111,111,
      111, 75,111, 79, 75, 79,105, 79,111,111,107,111,111, 73, 75,107,
       79,111,111, 73, 77, 79,111,109,111, 79, 79,111,111,111,111,111,
      111,105,107,107,109,105,111,107,105,105,111,111,107,107,105,105,
      107,111,105,105,105,105,107,111,111,105,111,111,111,111,111,111,
      111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
      111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
      111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
      111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
      111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
      111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
      111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
      111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
};

/* hrm, is there a library for this shit? */
static struct {
      char *name;
      int offset;
} tz_offsets [] = {
      { "UT", 0 },
      { "GMT", 0 },
      { "EST", -500 },  /* these are all US timezones.  bloody yanks */
      { "EDT", -400 },
      { "CST", -600 },
      { "CDT", -500 },
      { "MST", -700 },
      { "MDT", -600 },
      { "PST", -800 },
      { "PDT", -700 },
      { "Z", 0 },
      { "A", -100 },
      { "M", -1200 },
      { "N", 100 },
      { "Y", 1200 },
};

static char *tm_months[] = {
      "Jan", "Feb", "Mar", "Apr", "May", "Jun",
      "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
};

static char *tm_days[] = {
      "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
};


/**
 * g_mime_utils_header_format_date:
 * @time: time_t date representation
 * @offset: Timezone offset
 *
 * Allocates a string buffer containing the rfc822 formatted date
 * string represented by @time and @offset.
 *
 * Returns a valid string representation of the date.
 **/
char *
g_mime_utils_header_format_date (time_t time, int offset)
{
      struct tm tm;
      
      time += ((offset / 100) * (60 * 60)) + (offset % 100) * 60;
      
#ifdef HAVE_GMTIME_R
      gmtime_r (&time, &tm);
#else
      memcpy (&tm, gmtime (&time), sizeof (tm));
#endif
      
      return g_strdup_printf ("%s, %02d %s %04d %02d:%02d:%02d %+05d",
                        tm_days[tm.tm_wday], tm.tm_mday,
                        tm_months[tm.tm_mon],
                        tm.tm_year + 1900,
                        tm.tm_hour, tm.tm_min, tm.tm_sec,
                        offset);
}

/* This is where it gets ugly... */

struct _date_token {
      struct _date_token *next;
      const unsigned char *start;
      unsigned int len;
      unsigned int mask;
};

static struct _date_token *
datetok (const char *date)
{
      struct _date_token *tokens = NULL, *token, *tail = (struct _date_token *) &tokens;
      const unsigned char *start, *end;
      unsigned int mask;
      
      start = date;
      while (*start) {
            /* kill leading whitespace */
            while (*start && isspace ((int) *start))
                  start++;
      
            if (*start == '\0')
                  break;

            mask = gmime_datetok_table[*start];
            
            /* find the end of this token */
            end = start+1;
            while (*end && !strchr ("-/,\t\r\n ", *end))
                  mask |= gmime_datetok_table[*end++];
            
            if (end != start) {
                  token = g_malloc (sizeof (struct _date_token));
                  token->next = NULL;
                  token->start = start;
                  token->len = end - start;
                  token->mask = mask;
                  
                  tail->next = token;
                  tail = token;
            }
            
            if (*end)
                  start = end + 1;
            else
                  break;
      }
      
      return tokens;
}

static int
decode_int (const unsigned char *in, unsigned int inlen)
{
      register const unsigned char *inptr;
      const unsigned char *inend;
      int sign = 1, val = 0;
      
      inptr = in;
      inend = in + inlen;
      
      if (*inptr == '-') {
            sign = -1;
            inptr++;
      } else if (*inptr == '+')
            inptr++;
      
      for ( ; inptr < inend; inptr++) {
            if (!isdigit ((int) *inptr))
                  return  -1;
            else
                  val = (val * 10) + (*inptr - '0');
      }
      
      val *= sign;
      
      return val;
}

#if 0
static int
get_days_in_month (int month, int year)
{
        switch (month) {
      case 1:
      case 3:
      case 5:
      case 7:
      case 8:
      case 10:
      case 12:
              return 31;
      case 4:
      case 6:
      case 9:
      case 11:
              return 30;
      case 2:
              if (g_date_is_leap_year (year))
                    return 29;
            else
                    return 28;
      default:
              return 0;
      }
}
#endif

static int
get_wday (const unsigned char *in, unsigned int inlen)
{
      int wday;
      
      g_return_val_if_fail (in != NULL, -1);
      
      if (inlen < 3)
            return -1;
      
      for (wday = 0; wday < 7; wday++)
            if (!g_strncasecmp (in, tm_days[wday], 3))
                  return wday;
      
      return -1;  /* unknown week day */
}

static int
get_mday (const unsigned char *in, unsigned int inlen)
{
      int mday;
      
      g_return_val_if_fail (in != NULL, -1);
      
      mday = decode_int (in, inlen);
      
      if (mday < 0 || mday > 31)
            mday = -1;
      
      return mday;
}

static int
get_month (const unsigned char *in, unsigned int inlen)
{
      int i;
      
      g_return_val_if_fail (in != NULL, -1);
      
      if (inlen < 3)
            return -1;
      
      for (i = 0; i < 12; i++)
            if (!g_strncasecmp (in, tm_months[i], 3))
                  return i;
      
      return -1;  /* unknown month */
}

static int
get_year (const unsigned char *in, unsigned int inlen)
{
      int year;
      
      g_return_val_if_fail (in != NULL, -1);
      
      year = decode_int (in, inlen);
      if (year == -1)
            return -1;
      
      if (year < 100)
            year += (year < 70) ? 2000 : 1900;
      
      if (year < 1969)
            return -1;
      
      return year;
}

static gboolean
get_time (const unsigned char *in, unsigned int inlen, int *hour, int *min, int *sec)
{
      register const unsigned char *inptr;
      const unsigned char *inend;
      int *val, colons = 0;
      
      *hour = *min = *sec = 0;
      
      inend = in + inlen;
      val = hour;
      for (inptr = in; inptr < inend; inptr++) {
            if (*inptr == ':') {
                  colons++;
                  switch (colons) {
                  case 1:
                        val = min;
                        break;
                  case 2:
                        val = sec;
                        break;
                  default:
                        return FALSE;
                  }
            } else if (!isdigit ((int) *inptr))
                  return FALSE;
            else
                  *val = (*val * 10) + (*inptr - '0');
      }
      
      return TRUE;
}

static int
get_tzone (struct _date_token **token)
{
      int i;
      
      for (i = 0; *token && i < 2; *token = (*token)->next, i++) {
            const unsigned char *inptr = (*token)->start;
            unsigned int inlen = (*token)->len;
            
            if (*inptr == '+' || *inptr == '-') {
                  return decode_int (inptr, inlen);
            } else {
                  int t;
                  
                  if (*inptr == '(')
                        inptr++;
                  
                  for (t = 0; t < 15; t++) {
                        unsigned int len = MIN (strlen (tz_offsets[t].name), inlen - 1);
                        
                        if (!strncmp (inptr, tz_offsets[t].name, len))
                              return tz_offsets[t].offset;
                  }
            }
      }
      
      return -1;
}

static time_t
parse_rfc822_date (struct _date_token *tokens, int *tzone)
{
      int hour, min, sec, offset, n;
      struct _date_token *token;
      struct tm tm;
      time_t t;
      
      g_return_val_if_fail (tokens != NULL, (time_t) 0);
      
      token = tokens;
      
      memset ((void *) &tm, 0, sizeof (struct tm));
      
      if ((n = get_wday (token->start, token->len)) != -1) {
            /* not all dates may have this... */
            tm.tm_wday = n;
            token = token->next;
      }
      
      /* get the mday */
      if (!token || (n = get_mday (token->start, token->len)) == -1)
            return (time_t) 0;
      
      tm.tm_mday = n;
      token = token->next;
      
      /* get the month */
      if (!token || (n = get_month (token->start, token->len)) == -1)
            return (time_t) 0;
      
      tm.tm_mon = n;
      token = token->next;
      
      /* get the year */
      if (!token || (n = get_year (token->start, token->len)) == -1)
            return (time_t) 0;
      
      tm.tm_year = n - 1900;
      token = token->next;
      
      /* get the hour/min/sec */
      if (!token || !get_time (token->start, token->len, &hour, &min, &sec))
            return (time_t) 0;
      
      tm.tm_hour = hour;
      tm.tm_min = min;
      tm.tm_sec = sec;
      token = token->next;
      
      /* get the timezone */
      if (!token || (n = get_tzone (&token)) == -1) {
            /* I guess we assume tz is GMT? */
            offset = 0;
      } else {
            offset = n;
      }
      
      t = mktime (&tm);
#if defined(HAVE_TIMEZONE)
      t -= timezone;
#elif defined(HAVE_TM_GMTOFF)
      t += tm.tm_gmtoff;
#else
#error Neither HAVE_TIMEZONE nor HAVE_TM_GMTOFF defined. Rerun autoheader, autoconf, etc.
#endif
      
      /* t is now GMT of the time we want, but not offset by the timezone ... */
      
      /* this should convert the time to the GMT equiv time */
      t -= ((offset / 100) * 60 * 60) + (offset % 100) * 60;
      
      if (tzone)
            *tzone = offset;
      
      return t;
}


#define date_token_mask(t)  (((struct _date_token *) t)->mask)
#define is_numeric(t)       ((date_token_mask (t) & DATE_TOKEN_NON_NUMERIC) == 0)
#define is_weekday(t)       ((date_token_mask (t) & DATE_TOKEN_NON_WEEKDAY) == 0)
#define is_month(t)         ((date_token_mask (t) & DATE_TOKEN_NON_MONTH) == 0)
#define is_time(t)          (((date_token_mask (t) & DATE_TOKEN_NON_TIME) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_COLON))
#define is_tzone_alpha(t)   ((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_ALPHA) == 0)
#define is_tzone_numeric(t) (((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_NUMERIC) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_SIGN))
#define is_tzone(t)         (is_tzone_alpha (t) || is_tzone_numeric (t))

static time_t
parse_broken_date (struct _date_token *tokens, int *tzone)
{
      gboolean got_wday, got_month, got_tzone;
      int hour, min, sec, offset, n;
      struct _date_token *token;
      struct tm tm;
      time_t time;
      
      memset ((void *) &tm, 0, sizeof (struct tm));
      got_wday = got_month = got_tzone = FALSE;
      offset = 0;
      
      token = tokens;
      while (token) {
            if (is_weekday (token) && !got_wday) {
                  if ((n = get_wday (token->start, token->len)) != -1) {
                        d(printf ("weekday; ");)
                        got_wday = TRUE;
                        tm.tm_wday = n;
                        goto next_token;
                  }
            }
            
            if (is_month (token) && !got_month) {
                  if ((n = get_month (token->start, token->len)) != -1) {
                        d(printf ("month; ");)
                        got_month = TRUE;
                        tm.tm_mon = n;
                        goto next_token;
                  }
            }
            
            if (is_time (token) && !tm.tm_hour && !tm.tm_min && !tm.tm_sec) {
                  if (get_time (token->start, token->len, &hour, &min, &sec)) {
                        d(printf ("time; ");)
                        tm.tm_hour = hour;
                        tm.tm_min = min;
                        tm.tm_sec = sec;
                        goto next_token;
                  }
            }
            
            if (is_tzone (token) && !got_tzone) {
                  struct _date_token *t = token;
                  
                  if ((n = get_tzone (&t)) != -1) {
                        d(printf ("tzone; ");)
                        got_tzone = TRUE;
                        offset = n;
                        goto next_token;
                  }
            }
            
            if (is_numeric (token)) {
                  if (token->len == 4 && !tm.tm_year) {
                        if ((n = get_year (token->start, token->len)) != -1) {
                              d(printf ("year; ");)
                              tm.tm_year = n - 1900;
                              goto next_token;
                        }
                  } else {
                        if (!got_month && !got_wday && token->next && is_numeric (token->next)) {
                              d(printf ("mon; ");)
                              n = decode_int (token->start, token->len);
                              got_month = TRUE;
                              tm.tm_mon = n - 1;
                              goto next_token;
                        } else if (!tm.tm_mday && (n = get_mday (token->start, token->len)) != -1) {
                              d(printf ("mday; ");)
                              tm.tm_mday = n;
                              goto next_token;
                        } else if (!tm.tm_year) {
                              d(printf ("2-digit year; ");)
                              n = get_year (token->start, token->len);
                              tm.tm_year = n - 1900;
                              goto next_token;
                        }
                  }
            }
            
            d(printf ("???; ");)
            
      next_token:
            
            token = token->next;
      }
      
      d(printf ("\n");)
            
      time = mktime (&tm);
#if defined(HAVE_TIMEZONE)
      time -= timezone;
#elif defined(HAVE_TM_GMTOFF)
      time += tm.tm_gmtoff;
#else
#error Neither HAVE_TIMEZONE nor HAVE_TM_GMTOFF defined. Rerun autoheader, autoconf, etc.
#endif
      
      /* t is now GMT of the time we want, but not offset by the timezone ... */
      
      /* this should convert the time to the GMT equiv time */
      time -= ((offset / 100) * 60 * 60) + (offset % 100) * 60;
      
      if (tzone)
            *tzone = offset;
      
      return time;
}

#if 0
static void
gmime_datetok_table_init ()
{
      int i;
      
      memset (gmime_datetok_table, 0, sizeof (gmime_datetok_table));
      
      for (i = 0; i < 256; i++) {
            if (!strchr (NUMERIC_CHARS, i))
                  gmime_datetok_table[i] |= DATE_TOKEN_NON_NUMERIC;
            
            if (!strchr (WEEKDAY_CHARS, i))
                  gmime_datetok_table[i] |= DATE_TOKEN_NON_WEEKDAY;
            
            if (!strchr (MONTH_CHARS, i))
                  gmime_datetok_table[i] |= DATE_TOKEN_NON_MONTH;
            
            if (!strchr (TIME_CHARS, i))
                  gmime_datetok_table[i] |= DATE_TOKEN_NON_TIME;
            
            if (!strchr (TIMEZONE_ALPHA_CHARS, i))
                  gmime_datetok_table[i] |= DATE_TOKEN_NON_TIMEZONE_ALPHA;
            
            if (!strchr (TIMEZONE_NUMERIC_CHARS, i))
                  gmime_datetok_table[i] |= DATE_TOKEN_NON_TIMEZONE_NUMERIC;
            
            if (((char) i) == ':')
                  gmime_datetok_table[i] |= DATE_TOKEN_HAS_COLON;
            
            if (strchr ("+-", i))
                  gmime_datetok_table[i] |= DATE_TOKEN_HAS_SIGN;
      }
      
      printf ("static unsigned char gmime_datetok_table[256] = {");
      for (i = 0; i < 256; i++) {
            if (i % 16 == 0)
                  printf ("\n\t");
            printf ("%3d,", gmime_datetok_table[i]);
      }
      printf ("\n};\n");
}
#endif


/**
 * g_mime_utils_header_decode_date:
 * @in: input date string
 * @saveoffset:
 *
 * Decodes the rfc822 date string and saves the GMT offset into
 * @saveoffset if non-NULL.
 *
 * Returns the time_t representation of the date string specified by
 * @in. If 'saveoffset' is non-NULL, the value of the timezone offset
 * will be stored.
 **/
time_t
g_mime_utils_header_decode_date (const char *in, int *saveoffset)
{
      struct _date_token *token, *tokens;
      time_t date;
      
      tokens = datetok (in);
      
      date = parse_rfc822_date (tokens, saveoffset);
      if (!date)
            date = parse_broken_date (tokens, saveoffset);
      
      /* cleanup */
      while (tokens) {
            token = tokens;
            tokens = tokens->next;
            g_free (token);
      }
      
      return date;
}


/**
 * g_mime_utils_header_fold:
 * @in: input header string
 *
 * Folds a header according to the rules in rfc822.
 *
 * Returns an allocated string containing the folded header.
 **/
char *
g_mime_utils_header_fold (const char *in)
{
      gboolean last_was_lwsp = FALSE;
      register const char *inptr;
      size_t len, outlen, i;
      GString *out;
      char *ret;
      
      inptr = in;
      len = strlen (in);
      if (len <= GMIME_FOLD_LEN)
            return g_strdup (in);
      
      out = g_string_new ("");
      outlen = 0;
      while (*inptr) {
            len = strcspn (inptr, " \t");
            
            if (outlen + len > GMIME_FOLD_LEN) {                  
                  if (last_was_lwsp)
                        g_string_truncate (out, out->len - 1);
                  
                  g_string_append (out, "\n\t");
                  outlen = 1;
                  
                  /* check for very long words, just cut them up */
                  while (outlen + len > GMIME_FOLD_LEN) {
                        for (i = 0; i < GMIME_FOLD_LEN - outlen; i++)
                              g_string_append_c (out, inptr[i]);
                        inptr += GMIME_FOLD_LEN - outlen;
                        len -= GMIME_FOLD_LEN - outlen;
                        g_string_append (out, "\n\t");
                        outlen = 1;
                  }
                  last_was_lwsp = FALSE;
            } else if (len > 0) {
                  outlen += len;
                  g_string_append_len (out, inptr, len);
                  inptr += len;
                  last_was_lwsp = FALSE;
            } else {
                  if (*inptr == '\t') {
                        /* tabs are a good place to fold, odds
                                   are that this is where the previous
                                   mailer folded it */
                        g_string_append (out, "\n\t");
                        outlen = 1;
                        inptr++;
                        last_was_lwsp = FALSE;
                  } else {
                        g_string_append_c (out, *inptr++);
                        outlen++;
                        last_was_lwsp = TRUE;
                  }
            }
      }
      
      ret = out->str;
      g_string_free (out, FALSE);
      
      return ret;
}


/**
 * g_mime_utils_header_printf:
 * @format: string format
 * @Varargs: arguments
 *
 * Allocates a buffer containing a formatted header specified by the
 * @Varargs.
 *
 * Returns an allocated string containing the folded header specified
 * by @format and the following arguments.
 **/
char *
g_mime_utils_header_printf (const char *format, ...)
{
      char *buf, *ret;
      va_list ap;
      
      va_start (ap, format);
      buf = g_strdup_vprintf (format, ap);
      va_end (ap);
      
      ret = g_mime_utils_header_fold (buf);
      g_free (buf);
      
      return ret;
}

static gboolean
need_quotes (const char *string)
{
      gboolean quoted = FALSE;
      const char *inptr;
      
      inptr = string;
      
      while (*inptr) {
            if (*inptr == '\\')
                  inptr++;
            else if (*inptr == '"')
                  quoted = !quoted;
            else if (!quoted && (is_tspecial (*inptr) || *inptr == '.'))
                  return TRUE;
            
            if (*inptr)
                  inptr++;
      }
      
      return FALSE;
}

/**
 * g_mime_utils_quote_string:
 * @string: input string
 *
 * Quotes @string as needed according to the rules in rfc2045.
 * 
 * Returns an allocated string containing the escaped and quoted (if
 * needed to be) input string. The decision to quote the string is
 * based on whether or not the input string contains any 'tspecials'
 * as defined by rfc2045.
 **/
char *
g_mime_utils_quote_string (const char *string)
{
      gboolean quote;
      const char *c;
      char *qstring;
      GString *out;
      
      out = g_string_new ("");
      quote = need_quotes (string);
      
      for (c = string; *c; c++) {
            if ((*c == '"' && quote) || *c == '\\')
                  g_string_append_c (out, '\\');
            
            g_string_append_c (out, *c);
      }
      
      if (quote) {
            g_string_prepend_c (out, '"');
            g_string_append_c (out, '"');
      }
      
      qstring = out->str;
      g_string_free (out, FALSE);
      
      return qstring;
}


/**
 * g_mime_utils_unquote_string: Unquote a string.
 * @string: string
 * 
 * Unquotes and unescapes a string.
 **/
void
g_mime_utils_unquote_string (char *string)
{
      /* if the string is quoted, unquote it */
      char *inptr, *inend;
      
      if (!string)
            return;
      
      inptr = string;
      inend = string + strlen (string);
      
      /* get rid of the wrapping quotes */
      if (*inptr == '"' && *(inend - 1) == '"') {
            inend--;
            *inend = '\0';
            if (*inptr)
                  memmove (inptr, inptr + 1, inend - inptr);
      }
      
      /* un-escape the string */
      inend--;
      while (inptr < inend) {
            if (*inptr == '\\') {
                  memmove (inptr, inptr + 1, inend - inptr);
                  inend--;
            }
            
            inptr++;
      }
}


/**
 * g_mime_utils_text_is_8bit:
 * @text: text to check for 8bit chars
 * @len: text length
 *
 * Determines if @text contains 8bit characters within the first @len
 * bytes.
 *
 * Returns %TRUE if the text contains 8bit characters or %FALSE
 * otherwise.
 **/
gboolean
g_mime_utils_text_is_8bit (const unsigned char *text, size_t len)
{
      const unsigned char *c, *inend;
      
      g_return_val_if_fail (text != NULL, FALSE);
      
      inend = text + len;
      for (c = text; c < inend; c++)
            if (*c > (unsigned char) 127)
                  return TRUE;
      
      return FALSE;
}


/**
 * g_mime_utils_best_encoding:
 * @text: text to encode
 * @len: text length
 *
 * Determines the best content encoding for the first @len bytes of
 * @text.
 *
 * Returns a #GMimePartEncodingType that is determined to be the best
 * encoding type for the specified block of text. ("best" in this
 * particular case means best compression)
 **/
GMimePartEncodingType
g_mime_utils_best_encoding (const unsigned char *text, size_t len)
{
      const unsigned char *ch, *inend;
      size_t count = 0;
      
      inend = text + len;
      for (ch = text; ch < inend; ch++)
            if (*ch > (unsigned char) 127)
                  count++;
      
      if ((float) count <= len * 0.17)
            return GMIME_PART_ENCODING_QUOTEDPRINTABLE;
      else
            return GMIME_PART_ENCODING_BASE64;
}

/* this decodes rfc2047's version of quoted-printable */
static gssize
quoted_decode (const unsigned char *in, size_t len, unsigned char *out)
{
      register const unsigned char *inptr;
      register unsigned char *outptr;
      const unsigned char *inend;
      unsigned char c, c1;
      
      inend = in + len;
      outptr = out;
      
      inptr = in;
      while (inptr < inend) {
            c = *inptr++;
            if (c == '=') {
                  if (inend - inptr >= 2) {
                        c = toupper (*inptr++);
                        c1 = toupper (*inptr++);
                        *outptr++ = (((c >= 'A' ? c - 'A' + 10 : c - '0') & 0x0f) << 4)
                              | ((c1 >= 'A' ? c1 - 'A' + 10 : c1 - '0') & 0x0f);
                  } else {
                        /* data was truncated */
                        return -1;
                  }
            } else if (c == '_') {
                  /* _'s are an rfc2047 shortcut for encoding spaces */
                  *outptr++ = ' ';
            } else {
                  *outptr++ = c;
            }
      }
      
      return (outptr - out);
}

#define is_rfc2047_encoded_word(atom, len) (len >= 7 && !strncmp (atom, "=?", 2) && !strncmp (atom + len - 2, "?=", 2))

static unsigned char *
rfc2047_decode_word (const unsigned char *in, size_t inlen)
{
      const register unsigned char *inptr;
      const unsigned char *inend;
      const char *charset;
      unsigned char *buf;
      char *charenc, *p;
      size_t len;
      iconv_t cd;
      
      inptr = in + 2;
      inend = in + inlen - 2;
      
      inptr = memchr (inptr, '?', inend - inptr);
      if (inptr && inptr[2] == '?') {
            unsigned char *decoded;
            gssize declen;
            int state = 0;
            int save = 0;
            
            inptr++;
            
            switch (*inptr) {
            case 'B':
            case 'b':
                  inptr += 2;
                  decoded = g_alloca (inend - inptr);
                  declen = g_mime_utils_base64_decode_step (inptr, inend - inptr, decoded, &state, &save);
                  break;
            case 'Q':
            case 'q':
                  inptr += 2;
                  decoded = g_alloca (inend - inptr);
                  declen = quoted_decode (inptr, inend - inptr, decoded);
                  
                  if (declen == -1) {
                        d(fprintf (stderr, "encountered broken 'Q' encoding\n"));
                        return NULL;
                  }
                  break;
            default:
                  d(fprintf (stderr, "unknown encoding\n"));
                  return NULL;
            }
            
            len = (inptr - 3) - (in + 2);
            charenc = g_alloca (len + 1);
            memcpy (charenc, in + 2, len);
            charenc[len] = '\0';
            charset = charenc;
            
            /* rfc2231 updates rfc2047 encoded words...
             * The ABNF given in RFC 2047 for encoded-words is:
             *   encoded-word := "=?" charset "?" encoding "?" encoded-text "?="
             * This specification changes this ABNF to:
             *   encoded-word := "=?" charset ["*" language] "?" encoding "?" encoded-text "?="
             */
            
            /* trim off the 'language' part if it's there... */
            p = strchr (charset, '*');
            if (p)
                  *p = '\0';
            
            /* slight optimization */
            if (!g_strcasecmp (charset, "UTF-8"))
                  return g_strndup (decoded, declen);
            
            cd = g_mime_iconv_open ("UTF-8", charset);
            if (cd == (iconv_t) -1) {
                  w(g_warning ("Cannot convert from %s to UTF-8, header display may "
                             "be corrupt: %s", charset, g_strerror (errno)));
                  charset = g_mime_charset_locale_name ();
                  cd = g_mime_iconv_open ("UTF-8", charset);
                  if (cd == (iconv_t) -1)
                        return NULL;
            }
            
            buf = g_mime_iconv_strndup (cd, decoded, declen);
            g_mime_iconv_close (cd);
            
            if (!buf) {
                  w(g_warning ("Failed to convert \"%.*s\" to UTF-8, display may be "
                             "corrupt: %s", declen, decoded, g_strerror (errno)));
            }
            
            return buf;
      }
      
      return NULL;
}


/**
 * g_mime_utils_8bit_header_decode:
 * @in: header to decode
 *
 * Decodes and rfc2047 encoded header.
 *
 * Returns the mime encoded header as 8bit text.
 **/
char *
g_mime_utils_8bit_header_decode (const unsigned char *in)
{
      GString *out, *lwsp, *atom;
      const unsigned char *inptr;
      unsigned char *decoded;
      gboolean last_was_encoded = FALSE;
      gboolean last_was_space = FALSE;
      
      out = g_string_sized_new (256);
      lwsp = g_string_sized_new (256);
      atom = g_string_sized_new (256);
      inptr = in;
      
      while (inptr && *inptr) {
            unsigned char c = *inptr++;
            
            if (!is_atom (c) && !last_was_space) {
                  /* we reached the end of an atom */
                  unsigned char *dword = NULL;
                  const unsigned char *word;
                  gboolean was_encoded;
                  
                  if ((was_encoded = is_rfc2047_encoded_word (atom->str, atom->len)))
                        word = dword = rfc2047_decode_word (atom->str, atom->len);
                  else
                        word = atom->str;
                  
                  if (word) {
                        if (!(last_was_encoded && was_encoded)) {
                              /* rfc2047 states that you
                                           must ignore all whitespace
                                           between encoded words */
                              g_string_append (out, lwsp->str);
                        }
                        
                        g_string_append (out, word);
                        g_free (dword);
                  } else {
                        was_encoded = FALSE;
                        g_string_append (out, lwsp->str);
                        g_string_append (out, atom->str);
                  }
                  
                  last_was_encoded = was_encoded;
                  
                  g_string_truncate (lwsp, 0);
                  g_string_truncate (atom, 0);
                  
                  if (is_lwsp (c)) {
                        g_string_append_c (lwsp, c);
                        last_was_space = TRUE;
                  } else {
                        /* This is mostly here for interoperability with broken
                                   mailers that might do something stupid like:
                                   =?iso-8859-1?Q?blah?=:\t=?iso-8859-1?Q?I_am_broken?= */
                        g_string_append_c (out, c);
                        last_was_encoded = FALSE;
                        last_was_space = FALSE;
                  }
                  
                  continue;
            }
            
            if (is_atom (c)) {
                  g_string_append_c (atom, c);
                  last_was_space = FALSE;
            } else {
                  g_string_append_c (lwsp, c);
                  last_was_space = TRUE;
            }
      }
      
      if (atom->len || lwsp->len) {
            unsigned char *dword = NULL;
            const unsigned char *word;
            gboolean was_encoded;
            
            if ((was_encoded = is_rfc2047_encoded_word (atom->str, atom->len)))
                  word = dword = rfc2047_decode_word (atom->str, atom->len);
            else
                  word = atom->str;
            
            if (word) {
                  if (!(last_was_encoded && was_encoded)) {
                        /* rfc2047 states that you
                           must ignore all whitespace
                           between encoded words */
                        g_string_append (out, lwsp->str);
                  }
                  
                  g_string_append (out, word);
                  g_free (dword);
            } else {
                  g_string_append (out, lwsp->str);
                  g_string_append (out, atom->str);
            }
      }
      
      g_string_free (lwsp, TRUE);
      g_string_free (atom, TRUE);
      
      decoded = out->str;
      g_string_free (out, FALSE);
      
      return (char *) decoded;
}

/* rfc2047 version of quoted-printable */
static size_t
quoted_encode (const unsigned char *in, size_t len, unsigned char *out, gushort safemask)
{
      register const unsigned char *inptr;
      register unsigned char *outptr;
      const unsigned char *inend;
      unsigned char c;
      
      inptr = in;
      inend = in + len;
      outptr = out;
      
      while (inptr < inend) {
            c = *inptr++;
            if (c == ' ') {
                  *outptr++ = '_';
            } else if (gmime_special_table[c] & safemask) {
                  *outptr++ = c;
            } else {
                  *outptr++ = '=';
                  *outptr++ = tohex[(c >> 4) & 0xf];
                  *outptr++ = tohex[c & 0xf];
            }
      }
      
      return (outptr - out);
}

static void
rfc2047_encode_word (GString *string, const unsigned char *word, size_t len,
                 const char *charset, gushort safemask)
{
      unsigned char *encoded, *ptr;
      unsigned char *uword = NULL;
      iconv_t cd = (iconv_t) -1;
      size_t enclen, pos;
      int state = 0;
      int save = 0;
      char encoding;
      
      
      if (g_strcasecmp (charset, "UTF-8") != 0)
            cd = g_mime_iconv_open (charset, "UTF-8");
      
      if (cd != (iconv_t) -1) {
            uword = g_mime_iconv_strndup (cd, word, len);
            g_mime_iconv_close (cd);
      }
      
      if (uword) {
            len = strlen (uword);
            word = uword;
      } else {
            charset = "UTF-8";
      }
      
      switch (g_mime_utils_best_encoding (word, len)) {
      case GMIME_PART_ENCODING_BASE64:
            enclen = BASE64_ENCODE_LEN (len);
            encoded = g_alloca (enclen);
            
            encoding = 'b';
            
            pos = g_mime_utils_base64_encode_close (word, len, encoded, &state, &save);
            encoded[pos] = '\0';
            
            /* remove \n chars as headers need to be wrapped differently */
            ptr = encoded;
            while ((ptr = memchr (ptr, '\n', strlen (ptr))))
                  memmove (ptr, ptr + 1, strlen (ptr));
            
            break;
      case GMIME_PART_ENCODING_QUOTEDPRINTABLE:
            enclen = QP_ENCODE_LEN (len);
            encoded = g_alloca (enclen);
            
            encoding = 'q';
            
            pos = quoted_encode (word, len, encoded, safemask);
            encoded[pos] = '\0';
            
            break;
      default:
            g_assert_not_reached ();
      }
      
      g_free (uword);
      
      g_string_sprintfa (string, "=?%s?%c?%s?=", charset, encoding, encoded);
}


/**
 * g_mime_utils_8bit_header_encode_phrase:
 * @in: header to encode
 *
 * Encodes a header phrase according to the rules in rfc2047.
 *
 * Returns the header phrase as 1 encoded atom. Useful for encoding
 * internet addresses.
 **/
char *
g_mime_utils_8bit_header_encode_phrase (const unsigned char *in)
{
      const char *charset;
      GString *string;
      size_t len;
      char *str;
      
      if (in == NULL)
            return NULL;
      
      len = strlen (in);
      
      charset = g_mime_charset_best (in, len);
      charset = charset ? charset : "iso-8859-1";
      
      string = g_string_new ("");
      
      rfc2047_encode_word (string, in, strlen (in), charset, IS_ESAFE);
      
      str = string->str;
      g_string_free (string, FALSE);
      
      return str;
}


enum _phrase_word_t {
      WORD_ATOM,
      WORD_2047
};

struct _phrase_word {
      struct _phrase_word *next;
      const unsigned char *start, *end;
      enum _phrase_word_t type;
      int encoding;
};

static gboolean
word_types_compatable (enum _phrase_word_t type1, enum _phrase_word_t type2)
{
      switch (type1) {
      case WORD_ATOM:
            return FALSE;
      case WORD_2047:
            return type2 == WORD_2047;
      default:
            return FALSE;
      }
}

static struct _phrase_word *
rfc2047_encode_phrase_get_words (const unsigned char *in)
{
      const unsigned char *inptr, *start, *last;
      struct _phrase_word *words, *tail, *word;
      enum _phrase_word_t type = WORD_ATOM;
      int count = 0, encoding = 0;
      
      words = NULL;
      tail = (struct _phrase_word *) &words;
      
      last = start = inptr = in;
      while (inptr && *inptr) {
            const char *newinptr;
            gunichar c;
            
            newinptr = g_utf8_next_char (inptr);
            c = g_utf8_get_char (inptr);
            if (newinptr == NULL || !g_unichar_validate (c)) {
                  d(g_warning ("Invalid UTF-8 sequence encountered"));
                  inptr++;
                  continue;
            }
            
            inptr = newinptr;
            
            if (g_unichar_isspace (c)) {
                  if (count > 0) {
                        word = g_new (struct _phrase_word, 1);
                        word->next = NULL;
                        word->start = start;
                        word->end = last;
                        word->type = type;
                        word->encoding = encoding;
                        
                        tail->next = word;
                        tail = word;
                        count = 0;
                  }
                  
                  start = inptr;
                  type = WORD_ATOM;
                  encoding = 0;
            } else {
                  count++;
                  if (c > 127 && c < 256) {
                        type = WORD_2047;
                        encoding = MAX (encoding, 2);
                  } else if (c >= 256) {
                        type = WORD_2047;
                        encoding = 2;
                  }
            }
            
            last = inptr;
      }
      
      if (count > 0) {
            word = g_new (struct _phrase_word, 1);
            word->next = NULL;
            word->start = start;
            word->end = last;
            word->type = type;
            word->encoding = encoding;
            
            tail->next = word;
            tail = word;
      }
      
      return words;
}

static gboolean
rfc2047_encode_phrase_merge_words (struct _phrase_word **wordsp)
{
      struct _phrase_word *word, *next, *words = *wordsp;
      gboolean merged = FALSE;
      
      /* scan the list, checking for words of similar types that can be merged */
      word = words;
      while (word) {
            next = word->next;
            
            while (next) {
                  /* merge nodes of the same type AND we are not creating too long a string */
                  if (word_types_compatable (word->type, next->type)) {
                        if (next->end - word->start < GMIME_FOLD_PREENCODED) {
                              /* the resulting word type is the MAX of the 2 types */
                              word->type = MAX (word->type, next->type);
                              
                              word->end = next->end;
                              word->next = next->next;
                              
                              g_free (next);
                              
                              next = word->next;
                              
                              merged = TRUE;
                        } else {
                              /* if it is going to be too long, make sure we include the
                                 separating whitespace */
                              word->end = next->start;
                              break;
                        }
                  } else {
                        break;
                  }
            }
            
            word = word->next;
      }
      
      *wordsp = words;
      
      return merged;
}

static char *
rfc2047_encode_phrase (const unsigned char *in)
{
      struct _phrase_word *words, *word, *prev = NULL;
      GString *out;
      char *outstr;
      
      if (in == NULL)
            return NULL;
      
      words = rfc2047_encode_phrase_get_words (in);
      if (!words)
            return NULL;
      
      while (rfc2047_encode_phrase_merge_words (&words))
            ;
      
      out = g_string_new ("");
      
      /* output words now with spaces between them */
      word = words;
      while (word) {
            const char *start;
            size_t len;
            
            /* append correct number of spaces between words */
            if (prev && !(prev->type == WORD_2047 && word->type == WORD_2047)) {
                  /* one or both of the words are not encoded so we write the spaces out untouched */
                  len = word->start - prev->end;
                  g_string_append_len (out, prev->end, len);
            }
            
            switch (word->type) {
            case WORD_ATOM:
                  g_string_append_len (out, word->start, word->end - word->start);
                  break;
            case WORD_2047:
                  if (prev && prev->type == WORD_2047) {
                        /* include the whitespace chars between these 2 words in the
                                   resulting rfc2047 encoded word. */
                        len = word->end - prev->end;
                        start = prev->end;
                        
                        /* encoded words need to be separated by linear whitespace */
                        g_string_append_c (out, ' ');
                  } else {
                        len = word->end - word->start;
                        start = word->start;
                  }
                  
                  if (word->encoding == 1)
                        rfc2047_encode_word (out, start, len, "iso-8859-1", IS_PSAFE);
                  else
                        rfc2047_encode_word (out, start, len,
                                         g_mime_charset_best (start, len), IS_PSAFE);
                  break;
            }
            
            g_free (prev);
            prev = word;
            word = word->next;
      }
      
      g_free (prev);
      
      outstr = out->str;
      g_string_free (out, FALSE);
      
      return outstr;
}


/**
 * g_mime_utils_8bit_header_encode:
 * @in: header to encode
 *
 * Encodes a header according to the rules in rfc2047.
 *
 * Returns the header as several encoded atoms. Useful for encoding
 * headers like "Subject".
 **/
char *
g_mime_utils_8bit_header_encode (const unsigned char *in)
{
      return rfc2047_encode_phrase (in);
}


/**
 * g_mime_utils_base64_encode_close:
 * @in: input stream
 * @inlen: length of the input
 * @out: output string
 * @state: holds the number of bits that are stored in @save
 * @save: leftover bits that have not yet been encoded
 *
 * Base64 encodes the input stream to the output stream. Call this
 * when finished encoding data with #g_mime_utils_base64_encode_step
 * to flush off the last little bit.
 *
 * Returns the number of bytes encoded.
 **/
size_t
g_mime_utils_base64_encode_close (const unsigned char *in, size_t inlen, unsigned char *out, int *state, guint32 *save)
{
      unsigned char *outptr = out;
      int c1, c2;
      
      if (inlen > 0)
            outptr += g_mime_utils_base64_encode_step (in, inlen, outptr, state, save);
      
      c1 = ((unsigned char *)save)[1];
      c2 = ((unsigned char *)save)[2];
      
      switch (((unsigned char *)save)[0]) {
      case 2:
            outptr[2] = base64_alphabet [(c2 & 0x0f) << 2];
            goto skip;
      case 1:
            outptr[2] = '=';
      skip:
            outptr[0] = base64_alphabet [c1 >> 2];
            outptr[1] = base64_alphabet [c2 >> 4 | ((c1 & 0x3) << 4)];
            outptr[3] = '=';
            outptr += 4;
            break;
      }
      
      *outptr++ = '\n';
      
      *save = 0;
      *state = 0;
      
      return (outptr - out);
}


/**
 * g_mime_utils_base64_encode_step:
 * @in: input stream
 * @inlen: length of the input
 * @out: output string
 * @state: holds the number of bits that are stored in @save
 * @save: leftover bits that have not yet been encoded
 *
 * Base64 encodes a chunk of data. Performs an 'encode step', only
 * encodes blocks of 3 characters to the output at a time, saves
 * left-over state in state and save (initialise to 0 on first
 * invocation).
 *
 * Returns the number of bytes encoded.
 **/
size_t
g_mime_utils_base64_encode_step (const unsigned char *in, size_t inlen, unsigned char *out, int *state, guint32 *save)
{
      const register unsigned char *inptr;
      register unsigned char *outptr;
      
      if (inlen <= 0)
            return 0;
      
      inptr = in;
      outptr = out;
      
      if (inlen + ((unsigned char *)save)[0] > 2) {
            const unsigned char *inend = in + inlen - 2;
            register int c1 = 0, c2 = 0, c3 = 0;
            register int already;
            
            already = *state;
            
            switch (((char *)save)[0]) {
            case 1:     c1 = ((unsigned char *)save)[1]; goto skip1;
            case 2:     c1 = ((unsigned char *)save)[1];
                  c2 = ((unsigned char *)save)[2]; goto skip2;
            }
            
            /* yes, we jump into the loop, no i'm not going to change it, its beautiful! */
            while (inptr < inend) {
                  c1 = *inptr++;
            skip1:
                  c2 = *inptr++;
            skip2:
                  c3 = *inptr++;
                  *outptr++ = base64_alphabet [c1 >> 2];
                  *outptr++ = base64_alphabet [(c2 >> 4) | ((c1 & 0x3) << 4)];
                  *outptr++ = base64_alphabet [((c2 & 0x0f) << 2) | (c3 >> 6)];
                  *outptr++ = base64_alphabet [c3 & 0x3f];
                  /* this is a bit ugly ... */
                  if ((++already) >= 19) {
                        *outptr++ = '\n';
                        already = 0;
                  }
            }
            
            ((unsigned char *)save)[0] = 0;
            inlen = 2 - (inptr - inend);
            *state = already;
      }
      
      d(printf ("state = %d, inlen = %d\n", (int)((char *)save)[0], inlen));
      
      if (inlen > 0) {
            register char *saveout;
            
            /* points to the slot for the next char to save */
            saveout = & (((char *)save)[1]) + ((char *)save)[0];
            
            /* inlen can only be 0 1 or 2 */
            switch (inlen) {
            case 2:     *saveout++ = *inptr++;
            case 1:     *saveout++ = *inptr++;
            }
            ((char *)save)[0] += inlen;
      }
      
      d(printf ("mode = %d\nc1 = %c\nc2 = %c\n",
              (int)((char *)save)[0],
              (int)((char *)save)[1],
              (int)((char *)save)[2]));
      
      return (outptr - out);
}


/**
 * g_mime_utils_base64_decode_step:
 * @in: input stream
 * @inlen: max length of data to decode
 * @out: output stream
 * @state: holds the number of bits that are stored in @save
 * @save: leftover bits that have not yet been decoded
 *
 * Decodes a chunk of base64 encoded data.
 *
 * Returns the number of bytes decoded (which have been dumped in @out).
 **/
size_t
g_mime_utils_base64_decode_step (const unsigned char *in, size_t inlen, unsigned char *out, int *state, guint32 *save)
{
      const register unsigned char *inptr;
      register unsigned char *outptr;
      const unsigned char *inend;
      register guint32 saved;
      unsigned char c;
      int i;
      
      inend = in + inlen;
      outptr = out;
      
      /* convert 4 base64 bytes to 3 normal bytes */
      saved = *save;
      i = *state;
      inptr = in;
      while (inptr < inend) {
            c = gmime_base64_rank[*inptr++];
            if (c != 0xff) {
                  saved = (saved << 6) | c;
                  i++;
                  if (i == 4) {
                        *outptr++ = saved >> 16;
                        *outptr++ = saved >> 8;
                        *outptr++ = saved;
                        i = 0;
                  }
            }
      }
      
      *save = saved;
      *state = i;
      
      /* quick scan back for '=' on the end somewhere */
      /* fortunately we can drop 1 output char for each trailing = (upto 2) */
      i = 2;
      while (inptr > in && i) {
            inptr--;
            if (gmime_base64_rank[*inptr] != 0xff) {
                  if (*inptr == '=' && outptr > out)
                        outptr--;
                  i--;
            }
      }
      
      /* if i != 0 then there is a truncation error! */
      return (outptr - out);
}


/**
 * g_mime_utils_uuencode_close:
 * @in: input stream
 * @inlen: input stream length
 * @out: output stream
 * @uubuf: temporary buffer of 60 bytes
 * @state: holds the number of bits that are stored in @save
 * @save: leftover bits that have not yet been encoded
 *
 * Uuencodes a chunk of data. Call this when finished encoding data
 * with #g_mime_utils_uuencode_step to flush off the last little bit.
 *
 * Returns the number of bytes encoded.
 **/
size_t
g_mime_utils_uuencode_close (const unsigned char *in, size_t inlen, unsigned char *out, unsigned char *uubuf, int *state, guint32 *save)
{
      register unsigned char *outptr, *bufptr;
      register guint32 saved;
      int uulen, uufill, i;
      
      outptr = out;
      
      if (inlen > 0)
            outptr += g_mime_utils_uuencode_step (in, inlen, out, uubuf, state, save);
      
      uufill = 0;
      
      saved = *save;
      i = *state & 0xff;
      uulen = (*state >> 8) & 0xff;
      
      bufptr = uubuf + ((uulen / 3) * 4);
      
      if (i > 0) {
            while (i < 3) {
                  saved <<= 8 | 0;
                  uufill++;
                  i++;
            }
            
            if (i == 3) {
                  /* convert 3 normal bytes into 4 uuencoded bytes */
                  unsigned char b0, b1, b2;
                  
                  b0 = saved >> 16;
                  b1 = saved >> 8 & 0xff;
                  b2 = saved & 0xff;
                  
                  *bufptr++ = GMIME_UUENCODE_CHAR ((b0 >> 2) & 0x3f);
                  *bufptr++ = GMIME_UUENCODE_CHAR (((b0 << 4) | ((b1 >> 4) & 0xf)) & 0x3f);
                  *bufptr++ = GMIME_UUENCODE_CHAR (((b1 << 2) | ((b2 >> 6) & 0x3)) & 0x3f);
                  *bufptr++ = GMIME_UUENCODE_CHAR (b2 & 0x3f);
                  
                  i = 0;
                  saved = 0;
                  uulen += 3;
            }
      }
      
      if (uulen > 0) {
            int cplen = ((uulen / 3) * 4);
            
            *outptr++ = GMIME_UUENCODE_CHAR ((uulen - uufill) & 0xff);
            memcpy (outptr, uubuf, cplen);
            outptr += cplen;
            *outptr++ = '\n';
            uulen = 0;
      }
      
      *outptr++ = GMIME_UUENCODE_CHAR (uulen & 0xff);
      *outptr++ = '\n';
      
      *save = 0;
      *state = 0;
      
      return (outptr - out);
}


/**
 * g_mime_utils_uuencode_step:
 * @in: input stream
 * @inlen: input stream length
 * @out: output stream
 * @uubuf: temporary buffer of 60 bytes
 * @state: holds the number of bits that are stored in @save
 * @save: leftover bits that have not yet been encoded
 *
 * Uuencodes a chunk of data. Performs an 'encode step', only encodes
 * blocks of 45 characters to the output at a time, saves left-over
 * state in @uubuf, @state and @save (initialize to 0 on first
 * invocation).
 *
 * Returns the number of bytes encoded.
 **/
size_t
g_mime_utils_uuencode_step (const unsigned char *in, size_t inlen, unsigned char *out, unsigned char *uubuf, int *state, guint32 *save)
{
      register unsigned char *outptr, *bufptr;
      const register unsigned char *inptr;
      const unsigned char *inend;
      register guint32 saved;
      int uulen, i;
      
      saved = *save;
      i = *state & 0xff;
      uulen = (*state >> 8) & 0xff;
      
      inptr = in;
      inend = in + inlen;
      
      outptr = out;
      
      bufptr = uubuf + ((uulen / 3) * 4);
      
      while (inptr < inend) {
            while (uulen < 45 && inptr < inend) {
                  while (i < 3 && inptr < inend) {
                        saved = (saved << 8) | *inptr++;
                        i++;
                  }
                  
                  if (i == 3) {
                        /* convert 3 normal bytes into 4 uuencoded bytes */
                        unsigned char b0, b1, b2;
                        
                        b0 = saved >> 16;
                        b1 = saved >> 8 & 0xff;
                        b2 = saved & 0xff;
                        
                        *bufptr++ = GMIME_UUENCODE_CHAR ((b0 >> 2) & 0x3f);
                        *bufptr++ = GMIME_UUENCODE_CHAR (((b0 << 4) | ((b1 >> 4) & 0xf)) & 0x3f);
                        *bufptr++ = GMIME_UUENCODE_CHAR (((b1 << 2) | ((b2 >> 6) & 0x3)) & 0x3f);
                        *bufptr++ = GMIME_UUENCODE_CHAR (b2 & 0x3f);
                        
                        i = 0;
                        saved = 0;
                        uulen += 3;
                  }
            }
            
            if (uulen >= 45) {
                  *outptr++ = GMIME_UUENCODE_CHAR (uulen & 0xff);
                  memcpy (outptr, uubuf, ((uulen / 3) * 4));
                  outptr += ((uulen / 3) * 4);
                  *outptr++ = '\n';
                  uulen = 0;
                  bufptr = uubuf;
            }
      }
      
      *save = saved;
      *state = ((uulen & 0xff) << 8) | (i & 0xff);
      
      return (outptr - out);
}


/**
 * g_mime_utils_uudecode_step:
 * @in: input stream
 * @inlen: max length of data to decode
 * @out: output stream
 * @state: holds the number of bits that are stored in @save
 * @save: leftover bits that have not yet been decoded
 *
 * Uudecodes a chunk of data. Performs a 'decode step' on a chunk of
 * uuencoded data. Assumes the "begin <mode> <file name>" line has
 * been stripped off.
 *
 * Returns the number of bytes decoded.
 **/
size_t
g_mime_utils_uudecode_step (const unsigned char *in, size_t inlen, unsigned char *out, int *state, guint32 *save)
{
      const register unsigned char *inptr;
      register unsigned char *outptr;
      const unsigned char *inend;
      unsigned char ch;
      register guint32 saved;
      gboolean last_was_eoln;
      int uulen, i;
      
      if (*state & GMIME_UUDECODE_STATE_END)
            return 0;
      
      saved = *save;
      i = *state & 0xff;
      uulen = (*state >> 8) & 0xff;
      if (uulen == 0)
            last_was_eoln = TRUE;
      else
            last_was_eoln = FALSE;
      
      inend = in + inlen;
      outptr = out;
      
      inptr = in;
      while (inptr < inend) {
            if (*inptr == '\n') {
                  last_was_eoln = TRUE;
                  
                  inptr++;
                  continue;
            } else if (!uulen || last_was_eoln) {
                  /* first octet on a line is the uulen octet */
                  uulen = gmime_uu_rank[*inptr];
                  last_was_eoln = FALSE;
                  if (uulen == 0) {
                        *state |= GMIME_UUDECODE_STATE_END;
                        break;
                  }
                  
                  inptr++;
                  continue;
            }
            
            ch = *inptr++;
            
            if (uulen > 0) {
                  /* save the byte */
                  saved = (saved << 8) | ch;
                  i++;
                  if (i == 4) {
                        /* convert 4 uuencoded bytes to 3 normal bytes */
                        unsigned char b0, b1, b2, b3;
                        
                        b0 = saved >> 24;
                        b1 = saved >> 16 & 0xff;
                        b2 = saved >> 8 & 0xff;
                        b3 = saved & 0xff;
                        
                        if (uulen >= 3) {
                              *outptr++ = gmime_uu_rank[b0] << 2 | gmime_uu_rank[b1] >> 4;
                              *outptr++ = gmime_uu_rank[b1] << 4 | gmime_uu_rank[b2] >> 2;
                                *outptr++ = gmime_uu_rank[b2] << 6 | gmime_uu_rank[b3];
                        } else {
                              if (uulen >= 1) {
                                    *outptr++ = gmime_uu_rank[b0] << 2 | gmime_uu_rank[b1] >> 4;
                              }
                              if (uulen >= 2) {
                                    *outptr++ = gmime_uu_rank[b1] << 4 | gmime_uu_rank[b2] >> 2;
                              }
                        }
                        
                        i = 0;
                        saved = 0;
                        uulen -= 3;
                  }
            } else {
                  break;
            }
      }
      
      *save = saved;
      *state = (*state & GMIME_UUDECODE_STATE_MASK) | ((uulen & 0xff) << 8) | (i & 0xff);
      
      return (outptr - out);
}


/**
 * g_mime_utils_quoted_encode_close:
 * @in: input stream
 * @inlen: length of the input
 * @out: output string
 * @state: holds the number of bits that are stored in @save
 * @save: leftover bits that have not yet been encoded
 *
 * Quoted-printable encodes a block of text. Call this when finished
 * encoding data with #g_mime_utils_quoted_encode_step to flush off
 * the last little bit.
 *
 * Returns the number of bytes encoded.
 **/
size_t
g_mime_utils_quoted_encode_close (const unsigned char *in, size_t inlen, unsigned char *out, int *state, int *save)
{
      register unsigned char *outptr = out;
      int last;
      
      if (inlen > 0)
            outptr += g_mime_utils_quoted_encode_step (in, inlen, outptr, state, save);
      
      last = *state;
      if (last != -1) {
            /* space/tab must be encoded if its the last character on
               the line */
            if (is_qpsafe (last) && !isblank (last)) {
                  *outptr++ = last;
            } else {
                  *outptr++ = '=';
                  *outptr++ = tohex[(last >> 4) & 0xf];
                  *outptr++ = tohex[last & 0xf];
            }
      }
      
      *outptr++ = '\n';
      
      *save = 0;
      *state = -1;
      
      return (outptr - out);
}


/**
 * g_mime_utils_quoted_encode_step:
 * @in: input stream
 * @inlen: length of the input
 * @out: output string
 * @state: holds the number of bits that are stored in @save
 * @save: leftover bits that have not yet been encoded
 *
 * Quoted-printable encodes a block of text. Performs an 'encode
 * step', saves left-over state in state and save (initialise to -1 on
 * first invocation).
 *
 * Returns the number of bytes encoded.
 **/
size_t
g_mime_utils_quoted_encode_step (const unsigned char *in, size_t inlen, unsigned char *out, int *state, int *save)
{
      const register unsigned char *inptr, *inend;
      register unsigned char *outptr;
      unsigned char c;
      register int sofar = *save;  /* keeps track of how many chars on a line */
      register int last = *state;  /* keeps track if last char to end was a space cr etc */
      
      inptr = in;
      inend = in + inlen;
      outptr = out;
      while (inptr < inend) {
            c = *inptr++;
            if (c == '\r') {
                  if (last != -1) {
                        *outptr++ = '=';
                        *outptr++ = tohex[(last >> 4) & 0xf];
                        *outptr++ = tohex[last & 0xf];
                        sofar += 3;
                  }
                  last = c;
            } else if (c == '\n') {
                  if (last != -1 && last != '\r') {
                        *outptr++ = '=';
                        *outptr++ = tohex[(last >> 4) & 0xf];
                        *outptr++ = tohex[last & 0xf];
                  }
                  *outptr++ = '\n';
                  sofar = 0;
                  last = -1;
            } else {
                  if (last != -1) {
                        if (is_qpsafe (last)) {
                              *outptr++ = last;
                              sofar++;
                        } else {
                              *outptr++ = '=';
                              *outptr++ = tohex[(last >> 4) & 0xf];
                              *outptr++ = tohex[last & 0xf];
                              sofar += 3;
                        }
                  }
                  
                  if (is_qpsafe (c)) {
                        if (sofar > 74) {
                              *outptr++ = '=';
                              *outptr++ = '\n';
                              sofar = 0;
                        }
                        
                        /* delay output of space char */
                        if (isblank (c)) {
                              last = c;
                        } else {
                              *outptr++ = c;
                              sofar++;
                              last = -1;
                        }
                  } else {
                        if (sofar > 72) {
                              *outptr++ = '=';
                              *outptr++ = '\n';
                              sofar = 3;
                        } else
                              sofar += 3;
                        
                        *outptr++ = '=';
                        *outptr++ = tohex[(c >> 4) & 0xf];
                        *outptr++ = tohex[c & 0xf];
                        last = -1;
                  }
            }
      }
      *save = sofar;
      *state = last;
      
      return (outptr - out);
}


/**
 * g_mime_utils_quoted_decode_step: decode a chunk of QP encoded data
 * @in: input stream
 * @inlen: max length of data to decode
 * @out: output stream
 * @savestate: holds the number of bits that are stored in @save
 * @saved: leftover bits that have not yet been decoded
 *
 * Decodes a block of quoted-printable encoded data. Performs a
 * 'decode step' on a chunk of QP encoded data.
 *
 * Returns the number of bytes decoded.
 **/
size_t
g_mime_utils_quoted_decode_step (const unsigned char *in, size_t inlen, unsigned char *out, int *savestate, int *saved)
{
      /* FIXME: this does not strip trailing spaces from lines (as
       * it should, rfc 2045, section 6.7) Should it also
       * canonicalise the end of line to CR LF??
       *
       * Note: Trailing rubbish (at the end of input), like = or =x
       * or =\r will be lost.
       */
      const register unsigned char *inptr;
      register unsigned char *outptr;
      const unsigned char *inend;
      unsigned char c;
      int state, save;
      
      inend = in + inlen;
      outptr = out;
      
      d(printf ("quoted-printable, decoding text '%.*s'\n", inlen, in));
      
      state = *savestate;
      save = *saved;
      inptr = in;
      while (inptr < inend) {
            switch (state) {
            case 0:
                  while (inptr < inend) {
                        c = *inptr++;
                        /* FIXME: use a specials table to avoid 3 comparisons for the common case */
                        if (c == '=') { 
                              state = 1;
                              break;
                        }
#ifdef CANONICALISE_EOL
                        /*else if (c=='\r') {
                              state = 3;
                        } else if (c=='\n') {
                              *outptr++ = '\r';
                              *outptr++ = c;
                              } */
#endif
                        else {
                              *outptr++ = c;
                        }
                  }
                  break;
            case 1:
                  c = *inptr++;
                  if (c == '\n') {
                        /* soft break ... unix end of line */
                        state = 0;
                  } else {
                        save = c;
                        state = 2;
                  }
                  break;
            case 2:
                  c = *inptr++;
                  if (isxdigit (c) && isxdigit (save)) {
                        c = toupper (c);
                        save = toupper (save);
                        *outptr++ = (((save >= 'A' ? save - 'A' + 10 : save - '0') & 0x0f) << 4)
                              | ((c >= 'A' ? c - 'A' + 10 : c - '0') & 0x0f);
                  } else if (c == '\n' && save == '\r') {
                        /* soft break ... canonical end of line */
                  } else {
                        /* just output the data */
                        *outptr++ = '=';
                        *outptr++ = save;
                        *outptr++ = c;
                  }
                  state = 0;
                  break;
#ifdef CANONICALISE_EOL
            case 3:
                  /* convert \n -> to \r\n, leaves \r\n alone */
                  c = *inptr++;
                  if (c == '\n') {
                        *outptr++ = '\r';
                        *outptr++ = c;
                  } else {
                        *outptr++ = '\r';
                        *outptr++ = '\n';
                        *outptr++ = c;
                  }
                  state = 0;
                  break;
#endif
            }
      }
      
      *savestate = state;
      *saved = save;
      
      return (outptr - out);
}

Generated by  Doxygen 1.6.0   Back to index