获取一个网页的html源码网上已有成熟代码可用。但小弟需要获取剪贴板中的html源码, (string)Clipboard.GetData(DataFormats.Html);得到的是包含乱码(中文)的字符串,尝试进行编码转换,但都 失败了。请问如何获得不包含乱码的剪贴板html代码?
public static string GetHtmlTextFromClipboard()
{
if (Clipboard.ContainsData(DataFormats.Html))
{
string SourceURL = "";
string temp = (string)Clipboard.GetData(DataFormats.Html);
Regex reg = new Regex(@"SourceURL:(?<url>[^\s]+)\s*");
Match m = reg.Match(temp);
if (m.Success)
{
SourceURL = m.Groups["url"].Value;
if (SourceURL != null)
{
string date = (string)Clipboard.GetData(DataFormats.Html);
byte[] buf = new WebClient().DownloadData(SourceURL);
string html = Encoding.UTF8.GetString(buf);
Encoding encoding = GetEncoding(html);
if (encoding != null)
{
byte[] b = encoding.GetBytes(date);
return encoding.GetString(b);//换成其它编码也是乱码
}
}
}
}
return null;
}//end fun
public static Encoding GetEncoding(string html)
{
string pattern = @"(?i)\bcharset=(?<charset>[-a-zA-Z_0-9]+)";
string charset = Regex.Match(html, pattern).Groups["charset"].Value;
try { return Encoding.GetEncoding(charset); }
catch (ArgumentException) { return null; }
}
试下这段代码~~~
public static string GetHtmlTextFromClipboard()
{
if (Clipboard.ContainsText(TextDataFormat.Html))
{
System.IO.MemoryStream vMemoryStream = Clipboard.GetData("Html Format") as System.IO.MemoryStream;
vMemoryStream.Position = 0; byte[] vBytes = new byte[vMemoryStream.Length];
vMemoryStream.Read(vBytes, 0, (int)vMemoryStream.Length);
return Encoding.UTF8.GetString(vBytes);
}
return null;
}//end fun