[IE编程]C#如何获取剪贴板中htmlText

获取一个网页的html源码网上已有成熟代码可用。但小弟需要获取剪贴板中的html源码, (string)Clipboard.GetData(DataFormats.Html);得到的是包含乱码(中文)的字符串,尝试进行编码转换,但都 失败了。请问如何获得不包含乱码的剪贴板html代码?


        public static string GetHtmlTextFromClipboard()
        {
            if (Clipboard.ContainsData(DataFormats.Html))
            {
                string SourceURL = "";
                string temp = (string)Clipboard.GetData(DataFormats.Html);
                Regex reg = new Regex(@"SourceURL:(?<url>[^\s]+)\s*");
                Match m = reg.Match(temp);
                if (m.Success)
                {
                    SourceURL = m.Groups["url"].Value;
                    if (SourceURL != null)
                    {
                        string date = (string)Clipboard.GetData(DataFormats.Html);
                        byte[] buf = new WebClient().DownloadData(SourceURL);
                        string html = Encoding.UTF8.GetString(buf);
                        Encoding encoding = GetEncoding(html);
                        if (encoding != null)
                        {
                            byte[] b = encoding.GetBytes(date);
                            return encoding.GetString(b);//换成其它编码也是乱码
                        }
                    }
                }
            }
            return null;
        }//end fun

        public static Encoding GetEncoding(string html)
        {
            string pattern = @"(?i)\bcharset=(?<charset>[-a-zA-Z_0-9]+)";
            string charset = Regex.Match(html, pattern).Groups["charset"].Value;
            try { return Encoding.GetEncoding(charset); }
            catch (ArgumentException) { return null; }
        }

admin -
  • admin - 1年前

    试下这段代码~~~

            public static string GetHtmlTextFromClipboard() 
            {
                if (Clipboard.ContainsText(TextDataFormat.Html))
                {
                    System.IO.MemoryStream vMemoryStream = Clipboard.GetData("Html Format") as System.IO.MemoryStream;
                    vMemoryStream.Position = 0; byte[] vBytes = new byte[vMemoryStream.Length];
                    vMemoryStream.Read(vBytes, 0, (int)vMemoryStream.Length);
                    return Encoding.UTF8.GetString(vBytes);
                }
                return null;
            }//end fun