日期:2014-05-18 浏览次数:21313 次
/// <summary>
/// 得到整个网页的源码
/// </summary>
/// <param name="Url"></param>
/// <returns></returns>
public static string _GetHtml(string Url)
{
Stream MyInStream = null;
string Html = "";
try
{
HttpWebRequest MyRequest = (HttpWebRequest)WebRequest.Create(Url);
HttpWebResponse MyResponse = (HttpWebResponse)MyRequest.GetResponse();
MyInStream = MyResponse.GetResponseStream();
Encoding encode = System.Text.Encoding.UTF8;
StreamReader sr = new StreamReader(MyInStream, encode);
Char[] read = new Char[256];
int count = sr.Read(read, 0, 256);
while (count > 0)
{
String str = new String(read, 0, count);
Html += str;
count = sr.Read(read, 0, 256);
}
}
catch (Exception)
{
Html = "错误";
}
finally
{
if (MyInStream != null)
{
MyInStream.Close();
}
}
return Html;
}
static void Main(string[] args)
{
string htmlStr = _GetHtml("http://topic.csdn.net/u/20120225/22/b5912ce0-ed81-4932-8bb3-a456708d69d4.html");
Regex re = new Regex(@"[\u4e00-\u9fa5]+", RegexOptions.None);
MatchCollection mc = re.Matches(htmlStr);
foreach (Match ma in mc)
{
Console.WriteLine(ma.Value);
}
Console.ReadLine();
}
------解决方案--------------------