当前位置: 首页>html>过滤到所有HTML标签asp.netc#

过滤到所有HTML标签asp.netc#

时间:2015-06-11 17:39 来源:网络整理 作者:KKWL 点击:
public static string StripHTML(string source) { try { string result; result = source.Replace("\r", " "); result = result.Replace("\n", " "); result = result.Replace("'", " "); result = result.Replace("\t", string.Empty); result = System.Tex

public static string StripHTML(string source)
{

try
{
string result;
result = source.Replace("\r", " ");
result = result.Replace("\n", " ");
result = result.Replace("'", " ");
result = result.Replace("\t", string.Empty);
result = System.Text.RegularExpressions.Regex.Replace(result, @"( )+", " ");
result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*head([^>])*>","<head>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"(<( )*(/)( )*head( )*>)","</head>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, "(<head>).*(</head>)",string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*script([^>])*>","<script>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"(<( )*(/)( )*script( )*>)","</script>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"(<script>).*(</script>)",string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*style([^>])*>","<style>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"(<( )*(/)( )*style( )*>)","</style>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, "(<style>).*(</style>)",string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*td([^>])*>","\t", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*br( )*>","\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*li( )*>","\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*div([^>])*>","\r\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*tr([^>])*>","\r\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"<( )*p([^>])*>","\r\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"<[^>]*>",string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"&nbsp;"," ", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"&bull;"," * ", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"&lsaquo;","<", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"&rsaquo;",">", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"&trade;","(tm)", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"&frasl;","/", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"<","<", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @">",">", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"&copy;","(c)", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"&reg;","(r)", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, @"&(.{2,6});",string.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = result.Replace("\n", "\r");
result = System.Text.RegularExpressions.Regex.Replace(result, "(\r)( )+(\r)","\r\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, "(\t)( )+(\t)","\t\t", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, "(\t)( )+(\r)","\t\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, "(\r)( )+(\t)","\r\t", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, "(\r)(\t)+(\r)","\r\r", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result, "(\r)(\t)+","\r\t", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
string breaks = "\r\r\r";
string tabs = "\t\t\t\t\t";
for (int index=0; index<result.Length; index++)
{
result = result.Replace(breaks, "\r\r");
result = result.Replace(tabs, "\t\t\t\t");
breaks = breaks + "\r";
tabs = tabs + "\t";
}
return result;
}
catch
{
//MessageBox.Show("Error");
return source;
}

}

顶一下
(0)
0%
踩一下
(0)
0%
------分隔线----------------------------