c#中过滤html的正则表达式
程序员文章站
2023-11-17 18:59:52
实现代码
///
/// 去除html标记
///
///
实现代码
/// <summary> /// 去除html标记 /// </summary> /// <param name=”nohtml”>包括html的源码 </param> /// <returns>已经去除后的文字</returns> public static string nohtml(string htmlstring) { //删除脚本 htmlstring = regex.replace(htmlstring, @"<script[^>]*?>.*?</script>", "", regexoptions.ignorecase); //删除html htmlstring = regex.replace(htmlstring, @"<(.[^>]*)>", "", regexoptions.ignorecase); htmlstring = regex.replace(htmlstring, @"([\r\n])[\s]+", "", regexoptions.ignorecase); htmlstring = regex.replace(htmlstring, @"–>", "", regexoptions.ignorecase); htmlstring = regex.replace(htmlstring, @"<!–.*", "", regexoptions.ignorecase); htmlstring = regex.replace(htmlstring, @"&(quot|#34);", "\"", regexoptions.ignorecase); htmlstring = regex.replace(htmlstring, @"&(amp|#38);", "&", regexoptions.ignorecase); htmlstring = regex.replace(htmlstring, @"&(lt|#60);", "<", regexoptions.ignorecase); htmlstring = regex.replace(htmlstring, @"&(gt|#62);", ">", regexoptions.ignorecase); htmlstring = regex.replace(htmlstring, @"&(nbsp|#160);", " ", regexoptions.ignorecase); htmlstring = regex.replace(htmlstring, @"&(iexcl|#161);", "\xa1", regexoptions.ignorecase); htmlstring = regex.replace(htmlstring, @"&(cent|#162);", "\xa2", regexoptions.ignorecase); htmlstring = regex.replace(htmlstring, @"&(pound|#163);", "\xa3", regexoptions.ignorecase); htmlstring = regex.replace(htmlstring, @"&(copy|#169);", "\xa9", regexoptions.ignorecase); htmlstring = regex.replace(htmlstring, @"(\d+);", "", regexoptions.ignorecase); htmlstring.replace("<", ""); htmlstring.replace(">", ""); htmlstring.replace("\r\n", ""); htmlstring = httpcontext.current.server.htmlencode(htmlstring).trim(); return htmlstring; }
c#过滤html标签及空格
public static string filterhtml(string htmlstr) { if (!string.isnullorempty(htmlstr)) return system.text.regularexpressions.regex.replace(htmlstr, "<[^>]*>| ", ""); else return ""; }
写一个静态方法移除html标签
#region /// <summary> /// 移除html标签 /// </summary> /// <param name="htmlstr">htmlstr</param> public static string parsetags(string htmlstr) { return system.text.regularexpressions.regex.replace(htmlstr, "<[^>]*>", ""); } #endregion
取出文本中的图片地址
#region /// <summary> /// 取出文本中的图片地址 /// </summary> /// <param name="htmlstr">htmlstr</param> public static string getimgurl(string htmlstr) { string str = string.empty; string spattern = @"^<img\s+[^>]*>"; regex r = new regex(@"<img\s+[^>]*\s*src\s*=\s*([']?)(?<url>\s+)'?[^>]*>", regexoptions.compiled); match m = r.match(htmlstr.tolower()); if (m.success) str = m.result("${url}"); return str; } #endregion
提取html代码中文字的c#函数
/// <summary> /// 提取html代码中文字的c#函数 /// </summary> /// <param name="strhtml">包括html的源码 </param> /// <returns>已经去除后的文字</returns> using system; using system.text.regularexpressions; public class striphtmltest { public static void main() { string s = striphtml( "<html><head><title>中国石龙信息平台</title></head><body>faddfs龙信息平台</body></html>"); console.writeline(s); } public static string striphtml(string strhtml) { string[]aryreg = { @"<script[^>]*?>.*?</script>", @"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[" "'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>", @"([\r\n])[\s]+", @ "&(quot|#34);", @"&(amp|#38);", @"&(lt|#60);", @"&(gt|#62);", @ "&(nbsp|#160);", @"&(iexcl|#161);", @"&(cent|#162);", @"&(pound|#163);", @"&(copy|#169);", @"(\d+);", @"-->", @"<!--.*\n" }; string[]aryrep = { "", "", "", "\"", "&", "<", ">", " ", "\xa1", //chr(161), "\xa2", //chr(162), "\xa3", //chr(163), "\xa9", //chr(169), "", "\r\n", "" }; string newreg = aryreg[0]; string stroutput = strhtml; for (int i = 0; i < aryreg.length; i++) { regex regex = new regex(aryreg[i], regexoptions.ignorecase); stroutput = regex.replace(stroutput, aryrep[i]); } stroutput.replace("<", ""); stroutput.replace(">", ""); stroutput.replace("\r\n", ""); return stroutput; } }
tempcontent 表示包含有html的字符串;
tempcontent = system.text.regularexpressions.regex.replace(tempcontent,"<[^>]+>","");至少一个
tempcontent = system.text.regularexpressions.regex.replace(tempcontent,"<[^>]*>","");任意个
上一篇: Android 单线程模型详解及实例
下一篇: repeater分页 内容显示