Home > .Net, c#, Code samples > RFC2047 decoder in C#

RFC2047 decoder in C#

Just spent the evening writing a class to decode RFC2047 strings in C#. It was requested by a user on ASPSidan, a Swedish site dedicated to ASP, ASP.Net and other Microsoft technologies but before I was done someone had already found another implementation on the net. Anyway, the code shouldn’t go to waste so I’m posting it here if anyone needs it.

using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.Globalization;

namespace CrazyBeavers.Net.Mail
{
    public static class RFC2047Decoder
    {
        public static string Parse(string input)
        {
            StringBuilder sb = new StringBuilder();
            StringBuilder currentWord = new StringBuilder();
            bool readingWord = false;

            Int32 i = 0;
            while (i < input.Length)
            {
                char currentChar = input[i];
                char peekAhead;
                switch (currentChar)
                {
                    case '=':
                        peekAhead = (i == input.Length - 1) ? ' ' : input[i + 1];

                        if (peekAhead == '?')
                            readingWord = true;
                        break;

                    case '?':
                        peekAhead = (i == input.Length - 1) ? ' ' : input[i + 1];

                        if (peekAhead == '=')
                        {
                            readingWord = false;

                            currentWord.Append(currentChar);
                            currentWord.Append(peekAhead);

                            sb.Append(ParseEncodedWord(currentWord.ToString()));
                            currentWord = new StringBuilder();

                            i += 2;
                            continue;
                        }
                        break;
                }

                if (readingWord)
                {
                    currentWord.Append(currentChar);
                    i++;
                }
                else
                {
                    sb.Append(currentChar);
                    i++;
                }
            }

            return sb.ToString();
        }

        private static string ParseEncodedWord(string input)
        {
            StringBuilder sb = new StringBuilder();

            if (!input.StartsWith("=?"))
                return input;

            if (!input.EndsWith("?="))
                return input;

            // Get the name of the encoding but skip the leading =?
            string encodingName = input.Substring(2, input.IndexOf("?", 2) - 2);
            Encoding enc = Encoding.GetEncoding(encodingName);

            // Get the type of the encoding
            char type = input[encodingName.Length + 3];

            // Start after the name of the encoding and the other required parts
            Int32 i = encodingName.Length + 5;

            switch (char.ToLowerInvariant(type))
            {
                case 'q':
                    while (i < input.Length)
                    {
                        char currentChar = input[i];
                        char[] peekAhead = new char[2];
                        switch (currentChar)
                        {
                            case '=':
                                peekAhead = (i >= input.Length - 2) ? null : new char[] { input[i + 1], input[i + 2] };

                                if (peekAhead == null)
                                {
                                    sb.Append(currentChar);
                                    i++;
                                    break;
                                }

                                string decodedChar = enc.GetString(new byte[] { Convert.ToByte(new string(peekAhead, 0, 2), 16) });
                                sb.Append(decodedChar);
                                i += 3;
                                break;
                            case '?':
                                if (input[i + 1] == '=')
                                    i += 2;
                                break;
                            default:
                                sb.Append(currentChar);
                                i++;
                                break;
                        }
                    }
                    break;
                case 'b':
                    string baseString = input.Substring(i, input.Length - i - 2);
                    byte[] baseDecoded = Convert.FromBase64String(baseString);
                    sb.Append(enc.GetString(baseDecoded));
                    break;
            }
            return sb.ToString();
        }
    }
}
Categories: .Net, c#, Code samples Tags: , , , ,
  1. Alex
    March 17th, 2011 at 15:48 | #1

    I like yout code very much.

    But in case of ” if (peekAhead == null) break; ” we’ll have forever loop.
    Can You fix it?

  2. March 29th, 2011 at 16:49 | #2

    Hi Alex,

    I’ve added a quick fix that just appends the char to the stringbuilder and continues on. If the string is correctly formated this shouldn’t ever happen but you never now what kind of input you get. Thanks for noticing it!

    / Karl

  1. No trackbacks yet.