×îºó£¬ÎÒÃǶÔËùÓеġ°¶ñÒâµ¥´Ê¡±½øÐÐÌæ»»£º
using System;
using System.Text.RegularExpressions;
/// <summary>
/// Sanitize contains functionality to remove unaccepted tags or attributes
/// </summary>
public static class Sanitize
{
// list of accepted/harmeless tags (in lower case)
private static string[] allowedTags =
{ "p", "h1", "b", "i", "a", "ul", "li", "pre", "hr", "blockquote", "img" };
// list of attributes that need to be sanitized
private static string badAttributes =
"onerror|onmousemove|onmouseout|onmouseover|" +
"onkeypress|onkeydown|onkeyup|javascript:";
// sanitizes the HTML code in $inputHTML
public static string FixTags(string inputHtml)
{
// define the match evaluator
// MatchEvaluator ÊÇÒ»¸öίÍУ¬Ëüµ÷ÓÃfixTag·½·¨
MatchEvaluator fixThisLink = new MatchEvaluator(Sanitize.fixTag);
// process each tags in the input string
string fixedHtml = Regex.Replace(inputHtml, //ÐèÒªÌæ»»µÄ×Ö·û´®
"(<.*?>)", //ÕýÔò±í´ïʽ£º×¢Òâ¡°?¡±µÄʹÓà --Ì°À·Ä£Ê½
fixThisLink, //ίÍС°ÊµÀý¡±×ö²ÎÊý
RegexOptions.IgnoreCase);
//Õû¾ä´úÂëµÄÒâ˼¾ÍÊÇ£º½«ÊäÈë×Ö·û´®inputHtmlÖÐÄÜÆ¥ÅäÉÏ"(<.*?>)"µÄ²¿·Ö£¨Ò²¾ÍÊDZ»< >°ü¹üµÄ±êÇ©£©ÓÃfixThisLink·½·¨½øÐд¦Àí
// return the "fixed" input string
return fixedHtml;
}
// remove tag if is not in the list of allowed tags
private static string fixTag(Match tagMatch)
{
string tag = tagMatch.Value;
// extrag the tag name, such as "a" or "h1"
Match m = Regex.Match(tag,
@"</?(?<tagName>[^\s/]*)[>\s/]",
RegexOptions.IgnoreCase);
string tagName = m.Groups["tagName"].Value.ToLower();
// if the tag isn't in the list of allowed tags, it should be removed
if (Array.IndexOf(allowedTags, tagName) < 0)
{
return "";
}
// remove bad attributes from the tag
string fixedTag = Regex.Replace(tag,
"(" + Sanitize.badAttributes + @")(\s*)(?==)", // ×¢Òâ"?=="µÄÒâ˼ --ÕýÏòÔ¤²é
"SANITIZED", RegexOptions.IgnoreCase);
// return the altered tag
return fixedTag;
}
}
×¢Òâ´úÂëÖÐÁ½´¦ÕýÔò±í´ïʽµÄ¸ß¼¶Ó÷¨£¬Ì°À·Ä£Ê½ºÍÕýÏòÔ¤²é£¬Ïêϸ¿É²Î¿¼Ì°À·Ä£Ê½ºÍÕýÏòÔ¤²é
ÕâÀïÎÒÃǾͿÉÒÔ¿´µ½ÕýÔò±í´ïʽ˵Æðµ½µÄÇ¿´ó×÷Ó᪡ª²Ù×÷×Ö·û´®µÄÎÞÉÏÀûÆ÷°¡£¡
2. ³ýÁË×¢Èë¹¥»÷£¬ÁíÒ»ÖÖ±ØÐëʹÓõļ¼ÊõÊÇnofollow¡£ÒòΪGoogleµÄÁ´½Ó¼ÛÖµËã·¨£¬ÎÒÃǶ¼Ï£ÍûÄÜÓи߼ÛÖµµÄÁ´½ÓÄÜÖ¸ÏòÎÒÃǵÄÍøÕ¾£¬ÒÔÌá¸ßÎÒÃÇÍøÕ¾µÄµÈ¼¶¡£Ò»ÖÖ¼òµ¥µÄ·½Ê½¾ÍÊǵ½ÆäËûÍøÕ¾£¨ÈçÐÂÀË£©ÉêÇëÒ»¸ö²©¿Í£¬È»ºóÔÚ²©¿ÍÀïÌí¼ÓÒ»ÌõÁ´½Ó£¬Ö¸Ïò×Ô¼ºµÄÍøÕ¾¼´¿É¡£µ«Èç¹ûÎÒÃÇ×Ô¼ºÊÇÐÂÀË£¬ÎÒÃǵ±È»²»Ô¸ÒâÓÐÆäËûÈËÕâÑù×ö£¨±Ï¾¹ÎÒÃDz»ÖªµÀÆäËûÈËÁ´½ÓÖ¸ÏòµÄÍøÕ¾¾¿¾¹ÊǺÃÊÇ»µ£¬Èç¹ûÊÇÒ»¸öÀ¬»øÍøÕ¾£¬»áÇ£Á¬µ½ÎÒÃÇ×Ô¼ºµÄ£©¡£µ«ÊÇÄØ£¬ÎÒÃÇÒ²²»Ô¸ÒâÍêÈ«½ûÖ¹µôÁ´½ÓµÄʹÓ㨱ÈÈç¼òµ¥µÄ¶ÔÁ´½Ó½øÐбàÂ룬ÈÃÁ´½Óʧȥ×÷Óã©£¬ÒòΪ±Ï¾¹ºÜ¶àÁ´½Ó»òÐíÖ»ÊÇÄÚ²¿Á´½Ó£¬¶øÇÒÒ»¸öÄÜÖ±½Óµã»÷µÄÁ´½ÓÄÜ´øÀ´¸üºÃµÄÓû§ÌåÑé¡£
ΪÁ˽â¾öÕâ¸öÎÊÌ⣬Google¸ø³öÁËÒ»¸ö·½·¨£¬ÔÚÁ´½ÓÖмÓÉϹؼü×Önofollow£¬ÈçÏÂËùʾ£º
<a rel=¡°nofollow¡± href=¡°http://too.much.spam¡±>cool link</a>
ÕâÑù£¬Á´½ÓÄÜÖ±½Óµã»÷£¬µ«²»»á´øÀ´Á´½Ó¼ÛÖµ¡ª¡ª¼´Google²»»áÈÏΪÄãÈÏ¿É»òÍƼöÁ˸ÃÁ´½ÓÖ¸ÏòµÄÍøÕ¾¡£¿´¿´²©¿ÍÔ°ÓÐûÓÐÕâÑù×ö£¬¡¡£¬ºÇºÇ£¬ºÃÏñûÓУ¬ºÜ´ó¶ÈÓ´¡£²»¹ý¾Ý˵GoogleÒ²»áÖð²½½µµÍÁ´½Ó¼ÛÖµµÄ×÷Óã¬Ò¥ÑÔÁË£¬ËæËûÈ¥°É¡¡