ASP.NET过滤HTML标签只保留换行与空格的方法
本文实例讲述了asp.net过滤html标签只保留换行与空格的方法。分享给大家供大家参考。具体分析如下:
自己从网上找了一个过滤html标签的方法,我也不知道谁的才是原创的,反正很多都一样。我把那方法复制下来,代码如下:
/// 去除html标记
/// </summary>
/// <param name="nohtml">包括html的源码 </param>
/// <returns>已经去除后的文字</returns>
public static string nohtml(string htmlstring)
{
//删除脚本
htmlstring = regex.replace(htmlstring, @"<script[^>]*?>.*?</script>", "",
regexoptions.ignorecase);
//删除html
htmlstring = regex.replace(htmlstring, @"<(.[^>]*)>", "",
regexoptions.ignorecase);
htmlstring = regex.replace(htmlstring, @"([\r\n])[\s]+", "",
regexoptions.ignorecase);
htmlstring = regex.replace(htmlstring, @"-->", "", regexoptions.ignorecase);
htmlstring = regex.replace(htmlstring, @"<!--.*", "", regexoptions.ignorecase);
htmlstring = regex.replace(htmlstring, @"&(quot|#34);", "\"",
regexoptions.ignorecase);
htmlstring = regex.replace(htmlstring, @"&(amp|#38);", "&",
regexoptions.ignorecase);
htmlstring = regex.replace(htmlstring, @"&(lt|#60);", "<",
regexoptions.ignorecase);
htmlstring = regex.replace(htmlstring, @"&(gt|#62);", ">",
regexoptions.ignorecase);
htmlstring = regex.replace(htmlstring, @"&(nbsp|#160);", " ",
regexoptions.ignorecase);
htmlstring = regex.replace(htmlstring, @"&(iexcl|#161);", "\xa1",
regexoptions.ignorecase);
htmlstring = regex.replace(htmlstring, @"&(cent|#162);", "\xa2",
regexoptions.ignorecase);
htmlstring = regex.replace(htmlstring, @"&(pound|#163);", "\xa3",
regexoptions.ignorecase);
htmlstring = regex.replace(htmlstring, @"&(copy|#169);", "\xa9",
regexoptions.ignorecase);
htmlstring = regex.replace(htmlstring, @"(\d+);", "",
regexoptions.ignorecase);
htmlstring.replace("<", "");
htmlstring.replace(">", "");
htmlstring.replace("\r\n", "");
htmlstring = httpcontext.current.server.htmlencode(htmlstring).trim();
return htmlstring;
}
以上代码是从网上直接复制过来的,这个确实能过滤掉所有的html标签,但是这个不是我想要的,这个过滤得太干净了,我如果用textarea输入框的话,我是要保留空格跟换行的。
然后我就自己改了一下这个方法,textarea的换行是\n,所以我得把这些标签重新匹配替换成<br>,这样的话从数据库中读取到页面时,就能正确的换行了,把空格替换成html的空格符,大功告成。
/// 去除html标记(保留br跟\r\n)
/// </summary>
/// <param name="nohtml">包括html的源码 </param>
/// <returns>已经去除后的文字</returns>
public static string newnohtml(string htmlstring)
{
//htmlstring.replace("\\r\\n", "%r%n").replace("<br>","%br%").replace("<br/>","%br&%").replace("\\n","%n");
//删除脚本
htmlstring = regex.replace(htmlstring, @"<script[^>]*?>.*?</script>", "",
regexoptions.ignorecase);
//删除html
htmlstring = regex.replace(htmlstring, @"<(.[^>]*)>", "",
regexoptions.ignorecase);
htmlstring = regex.replace(htmlstring, @"-->", "", regexoptions.ignorecase);
htmlstring = regex.replace(htmlstring, @"<!--.*", "", regexoptions.ignorecase);
htmlstring = regex.replace(htmlstring, @"&(quot|#34);", "\"",
regexoptions.ignorecase);
htmlstring = regex.replace(htmlstring, @"&(amp|#38);", "&",
regexoptions.ignorecase);
htmlstring = regex.replace(htmlstring, @"&(lt|#60);", "<",
regexoptions.ignorecase);
htmlstring = regex.replace(htmlstring, @"&(gt|#62);", ">",
regexoptions.ignorecase);
htmlstring = regex.replace(htmlstring, @"&(nbsp|#160);", " ",
regexoptions.ignorecase);
htmlstring = regex.replace(htmlstring, @"&(iexcl|#161);", "\xa1",
regexoptions.ignorecase);
htmlstring = regex.replace(htmlstring, @"&(cent|#162);", "\xa2",
regexoptions.ignorecase);
htmlstring = regex.replace(htmlstring, @"&(pound|#163);", "\xa3",
regexoptions.ignorecase);
htmlstring = regex.replace(htmlstring, @"&(copy|#169);", "\xa9",
regexoptions.ignorecase);
htmlstring = regex.replace(htmlstring, @"(\d+);", "",
regexoptions.ignorecase);
htmlstring.replace("<", "");
htmlstring.replace(">", "");
//htmlstring.replace("\r\n", "");
htmlstring = httpcontext.current.server.htmlencode(htmlstring);
htmlstring = regex.replace(htmlstring, @"((\r\n))", "<br>");
htmlstring = regex.replace(htmlstring, @"(\r|\n)", "<br>");
htmlstring = regex.replace(htmlstring, @"(\s)", " ");
return htmlstring;
}
这个过滤可以用于让用户输入发布内容时的过滤。
希望本文所述对大家的asp.net程序设计有所帮助。
上一篇: JSP&Servlet中字符编码的转换
下一篇: C语言预处理