How to convert Quoted-Print String

后端 未结 5 358
执笔经年
执笔经年 2021-01-14 22:18

I\'m working on French String in .NET Decoding a Mail body , I receive \"Chasn=C3=A9 sur illet\" I would like to get \"Chasné sur illet\" and i don\'t find any solution ave

相关标签:
5条回答
  • 2021-01-14 22:25

    This is UTF8 encoding.

    Using this post:

    http://www.dpit.co.uk/decoding-quoted-printable-email-in-c/

    Here is the code (don't forget to accept the answer if helped):

    using System;
    using System.Text;
    using System.Text.RegularExpressions;
    
    namespace ConsoleApplication1
    {
        class Program
        {
            static void Main(string[] args)
            {
    
                Console.WriteLine(DecodeQuotedPrintable("Chasn=C3=A9 sur illet"));
                Console.ReadKey();
            }
    
            static string DecodeQuotedPrintable(string input)
            {
                var occurences = new Regex(@"(=[0-9A-Z][0-9A-Z])+", RegexOptions.Multiline);
                var matches = occurences.Matches(input);
                foreach (Match m in matches)
                {
                    byte[] bytes = new byte[m.Value.Length / 3];
                    for (int i = 0; i < bytes.Length; i++)
                    {
                        string hex = m.Value.Substring(i * 3 + 1, 2);
                        int iHex = Convert.ToInt32(hex, 16);
                        bytes[i] = Convert.ToByte(iHex);
                    }
                    input = input.Replace(m.Value, Encoding.UTF8.GetString(bytes));
                }
                return input.Replace("=rn", "");
            }
        }
    }
    
    0 讨论(0)
  • 2021-01-14 22:28

    We had an issue with this method - that it is VERY slow. The following enhanced performance A LOT

    public static string FromMailTransferEncoding(this string messageText, Encoding enc, string transferEncoding)
    {
        if (string.IsNullOrEmpty(transferEncoding)) 
            return messageText;
    
        if ("quoted-printable".Equals(transferEncoding.ToLower())) 
        {
            StringBuilder sb = new StringBuilder();               
            string delimitorRegEx = @"=[\r][\n]";
            string[] parts = Regex.Split(messageText, delimitorRegEx);
    
            foreach (string part in parts)
            {
                string subPart = part;
                Regex occurences = new Regex(@"(=[0-9A-Z][0-9A-Z])+", RegexOptions.Multiline);
                MatchCollection matches = occurences.Matches(subPart);
    
                foreach (Match m in matches)
                {
                    byte[] bytes = new byte[m.Value.Length / 3];
                    for (int i = 0; i < bytes.Length; i++)
                    {
                        string hex = m.Value.Substring(i * 3 + 1, 2);
                        int iHex = Convert.ToInt32(hex, 16);
                        bytes[i] = Convert.ToByte(iHex);
                    }
    
                    subPart = occurences.Replace(subPart, enc.GetString(bytes), 1);
                }
    
                sb.Append(subPart);
            }
            return sb.ToString();
        }        
    return messageText;
    }
    
    0 讨论(0)
  • 2021-01-14 22:32

    Or the easiest of all, just use the QuotedPrintableDecoder from my MimeKit library:

    static string DecodeQuotedPrintable (string input, string charset)
    {
        var decoder = new QuotedPrintableDecoder ();
        var buffer = Encoding.ASCII.GetBytes (input);
        var output = new byte[decoder.EstimateOutputLength (buffer.Length)];
        int used = decoder.Decode (buffer, 0, buffer.Length, output);
        var encoding = Encoding.GetEncoding (charset);
        return encoding.GetString (output, 0, used);
    }
    

    Note that the other answers above assume the decoded content will be ASCII or UTF-8, but that isn't necessarily the case. You'll need to get the charset parameter from the Content-Type header of the MIME part that you are decoding.

    Of course... if you don't know how to get that info, you could simply use my awesome MailKit library to get the MIME part from IMAP and have it do all of this work for you.

    0 讨论(0)
  • 2021-01-14 22:45
        static string ConverFromHex(string source)
        {
            string target = string.Empty;
    
            int startPos = source.IndexOf('=', 0);
            int prevStartPos = 0;
            while (startPos >= 0)
            {
                // concat with substring from source
                target += source.Substring(prevStartPos, startPos - prevStartPos);
    
                // next offset
                startPos++;
    
                // update prev pos
                prevStartPos = startPos;
    
                // get substring
                string hexString = source.Substring(startPos, 2);
    
                // get int equiv
                int hexNum = 0;
                if (int.TryParse(hexString, System.Globalization.NumberStyles.AllowHexSpecifier, System.Globalization.CultureInfo.InvariantCulture, out hexNum))
                {
                    // add to target string
                    target += (char)hexNum;
    
                    // add hex length
                    prevStartPos += 2;
                }
    
                // next occurence
                startPos = source.IndexOf('=', startPos);
            }
    
            // add rest of source
            target += source.Substring(prevStartPos);
    
            return target;
        }
    
    0 讨论(0)
  • 2021-01-14 22:49

    From : https://stackoverflow.com/a/36803911/6403521 My solution :

        [TestMethod]
        public void TestMethod1()
        {
    
            Assert.AreEqual("La Bouichère", quotedprintable("La Bouich=C3=A8re", "utf-8"));
            Assert.AreEqual("Chasné sur illet", quotedprintable("Chasn=C3=A9 sur illet", "utf-8"));
            Assert.AreEqual("é è", quotedprintable("=C3=A9 =C3=A8", "utf-8"));
        }
        private string quotedprintable(string pStrIn, string encoding)
        {
            String strOut = pStrIn.Replace("=\r\n", "");
            // Find the first =
            int position = strOut.IndexOf("=");
            while (position != -1)
            { 
                // String before the =
                string leftpart = strOut.Substring(0, position);
                // get the QuotedPrintable String in a ArrayList
                System.Collections.ArrayList hex = new System.Collections.ArrayList();
                // The first Part
                hex.Add(strOut.Substring(1 + position, 2));
                // Look for the next parts
                while (position + 3 < strOut.Length && strOut.Substring(position + 3, 1) == "=")
                {
                    position = position + 3;
                    hex.Add(strOut.Substring(1 + position, 2));
                }
                // In the hex Array, we have two items 
                // Convert using the GetEncoding Function
                byte[] bytes = new byte[hex.Count];
                for (int i = 0; i < hex.Count; i++)
                {
                    bytes[i] = System.Convert.ToByte(new string(((string)hex[i]).ToCharArray()), 16);
                }
                string equivalent = System.Text.Encoding.GetEncoding(encoding).GetString(bytes);
                // Part of the orignal String after the last QP Symbol
                string rightpart = strOut.Substring(position + 3);
                // Re build the String
                strOut = leftpart + equivalent + rightpart;
                // find the new QP Position
                position = leftpart.Length + equivalent.Length;
                if (rightpart.Length == 0)
                {
                    position = -1;
                }
                else
                {
                    position = strOut.IndexOf("=", position + 1);
                }
            }
            return strOut;
        }
    
    0 讨论(0)
提交回复
热议问题