Can Not Read UNICODE URL in C#

天涯浪子 提交于 2021-02-08 04:39:18

问题


The following code won't work:

using System;
using System.IO;
using System.Net;
using System.Web;

namespace Proyecto_Prueba_04
{
    class Program
    {
        /// <summary>
        /// 
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        public static string GetWebText(string url)
        {
            HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url);

            request.UserAgent = "A .NET Web Crawler";

            WebResponse response = request.GetResponse();

            Stream stream = response.GetResponseStream();

            StreamReader reader = new StreamReader(stream);

            string htmlText = reader.ReadToEnd();

            return htmlText;
        } // End of the GetWebText method.

        /// <summary>
        /// 
        /// </summary>
        /// <param name="args"></param>
        public static void Main(string[] args)
        {
            string urlPrueba = Uri.UnescapeDataString("http://?????????.??/");
            Console.WriteLine("urlPrueba" + " = " + urlPrueba);

            var encoded = HttpUtility.UrlPathEncode(urlPrueba);
            Console.WriteLine("encoded" + " = " + encoded);

            string codigoHTML = GetWebText(encoded);
            Console.WriteLine("codigoHTML" + " = " + codigoHTML);

            Console.ReadLine();
        } // End of the Main method.
    } // End of the Program class.
} // End of the Proyecto_Prueba_04 namespace.

I can't understand how do I have to handle a UNICODE URL.

Any ideas?

Thanks.


回答1:


You can use IdnMapping class.

  string idn = "президент.рф";

  IdnMapping mapping = new IdnMapping();
  string asciiIdn = mapping.GetAscii(idn);
  Console.WriteLine(asciiIdn);    

  var text = GetWebText("http://" + asciiIdn);
  Console.WriteLine(text);



回答2:


Use System.Uri instead of url in string.

This is working, I tried it:

using System;
using System.IO;
using System.Net;
using System.Web;

namespace Proyecto_Prueba_04
{
internal class Program
{
    public static string GetWebText(Uri uri)
    {
        var request = (HttpWebRequest)WebRequest.Create(uri);
        request.UserAgent = "A .NET Web Crawler";

        string htmlText = null;
        using (var response = request.GetResponse())
        {
            using (Stream stream = response.GetResponseStream())
            {
                if (stream != null)
                {
                    using (var reader = new StreamReader(stream))
                    {
                        htmlText = reader.ReadToEnd();
                    }
                }
            }
        }

        return htmlText;
    }

    public static void Main(string[] args)
    {
        const string urlPrueba = "http://президент.рф/";
        var uri = new Uri(urlPrueba);

        Console.WriteLine("urlPrueba" + " = " + uri.AbsoluteUri);

        string codigoHTML = GetWebText(uri);
        Console.WriteLine("codigoHTML" + " = " + codigoHTML);

        Console.ReadLine();
    }
}

} 


来源:https://stackoverflow.com/questions/12381578/can-not-read-unicode-url-in-c-sharp

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!