private string StripHT(string strHtml)
  {
   Regex regex=ne">

利用正则表达式去掉html代码

80酷酷网    80kuku.com

  正则

using System.Text.RegularExpressions;//需要引用

  // 利用正则表达式去掉"<"和">"之间的内容
  private string StripHT(string strHtml)
  {
   Regex regex=new Regex("<.+?>",RegexOptions.IgnoreCase);
   string strOutput=regex.Replace(strHtml,"");
   return strOutput;
  }

//方法二(不知为什么此方法占用CPU100%)

public static string DropHTML(string strHtml)
  {
   string [] aryReg ={
          "<script[^>]*?>.*?</script>",
          "<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""''])(*)?>",
          "([\r])[\s]+",
          "&(quot|#34);",
          "&(amp|#38);",
          "&(lt|#60);",
          "&(gt|#62);",
          "&(nbsp|#160);",
          "&(iexcl|#161);",
          "&(cent|#162);",
          "&(pound|#163);",
          "&(copy|#169);",
          "&#(\d+);",
          "-->",
          "<!--.*"        
         };

   string [] aryRep = {
           "",
           "",
           "",
           "\"",
           "&",
           "<",
           ">",
           " ",
           "\xa1",//chr(161),
           "\xa2",//chr(162),
           "\xa3",//chr(163),
           "\xa9",//chr(169),
           "",
           "\r",
           ""   
          };

   string newReg =aryReg[0];
   string strOutput=strHtml;
   for(int i = 0;i<aryReg.Length;i++)
   {
    Regex regex = new Regex(aryReg[i],RegexOptions.IgnoreCase );
    strOutput = regex.Replace(strOutput,aryRep[i]);
   }

   strOutput.Replace("<","");
   strOutput.Replace(">","");
   strOutput.Replace("\r","");
   return strOutput;
     
  }

分享到
  • 微信分享
  • 新浪微博
  • QQ好友
  • QQ空间
点击: