T-SQL Algorithm to Encode Unsafe HTML Characters as HTML Character Entity References

前端 未结 3 2075
情深已故
情深已故 2021-01-03 12:40

I need to create an after insert trigger in my SQL Server 2008 R2 database.

The trigger needs to take some fields from INSERTED, process them and pu

相关标签:
3条回答
  • 2021-01-03 12:57

    How about creating a SQL UDF in .NET that calls Server.HtmlEncode(myUnsafeHtml) and loading that into the SQL server? To get started:

    http://blog.sqlauthority.com/2008/10/19/sql-server-introduction-to-clr-simple-example-of-clr-stored-procedure/

    http://www.codeproject.com/Articles/37377/SQL-Server-CLR-Functions

    http://msdn.microsoft.com/en-us/library/ms254498(v=vs.80).aspx

    0 讨论(0)
  • 2021-01-03 13:00

    Encoding five special characters with recursive CTE:

    DECLARE 
      @unsafe NVARCHAR(MAX),
      @safe   NVARCHAR(MAX) 
    
    --
    -- Create the unsafe html string
    -- 
    SET @unsafe = N'html''s encoding "method" is <= or >= & 1234 ' + NCHAR(129) 
    --
    -- Use a recursive CTE to iterate through each character in the string
    -- 
    ;WITH cte AS 
    (
      -- 
      -- The first row will contain the original
      -- string, an empty string to be used to 
      -- build the "safe" string, and a position
      -- column to mark the character position
      -- of the loop
      -- 
      SELECT 
        @unsafe AS unsafe_html,
        CONVERT(NVARCHAR(MAX), '') AS safe_html,
        1 AS pos
      WHERE @unsafe IS NOT NULL AND LEN(@unsafe) > 0 
      UNION ALL
      -- 
      -- Create a loop: 
      -- The anchor row starts at position one.
      -- Increment the position by one for each pass.
      -- Stop when the position value is equal to the string lenth.
      -- Evaluate the character in each string
      -- If the ASCII value > 128, use the &# format.
      -- Otherwise, check for 5 special characters: " & ' < >
      -- Use the encoding reference or just the original character
      --
      SELECT 
        @unsafe AS unsafe_html,
        CONVERT(NVARCHAR(MAX), safe_html + 
        CASE WHEN UNICODE(SUBSTRING(unsafe_html, pos, 1)) > 128 
             THEN '&#' + CONVERT(NVARCHAR(10), UNICODE(SUBSTRING(unsafe_html, pos, 1)))  
             ELSE CASE SUBSTRING(unsafe_html, pos, 1)
                  WHEN '"'  THEN '&quot' 
                  WHEN '&'  THEN '&amp'
                  WHEN '''' THEN '&apos'
                  WHEN '<'  THEN '&lt'
                  WHEN '>'  THEN '&gt'
                  ELSE SUBSTRING(unsafe_html, pos, 1)
                  END 
             END ) AS safe_html,
        pos + 1 AS pos
      FROM cte
      WHERE pos <= LEN(@unsafe)
    ) 
    --
    -- Each pass through the string creates a row in the CTE
    -- The last row will have the position value of the string length + 1
    -- Use that row as the safe html string
    -- SQL Server allows a max recursion of 32767
    -- 
    SELECT @safe = (
      SELECT safe_html 
      FROM cte
      WHERE pos = LEN(@unsafe) + 1
    ) 
    OPTION (MAXRECURSION 32767) 
    
    SELECT @safe
    
    -- html&aposs encoding &quotmethod&quot is &lt= or &gt= &amp 1234 &#129
    

    Initial version:

    DECLARE @s NVARCHAR(100)
    
    SET @s = '<html>unsafe & safe<html>'
    SELECT @s 
    SELECT (SELECT @s FOR XML PATH(''))
    
    ---------------------------------------
    <html>unsafe & safe<html>
    
    -----------------------------------------
    &lt;html&gt;unsafe &amp; safe&lt;html&gt;
    

    Full encoding with all official references:

    DECLARE 
        @unsafe  NVARCHAR(MAX),
        @safe NVARCHAR(MAX) 
    
    -- Build string with first 10,000 unicode chars
    SELECT @unsafe = COALESCE(@unsafe, '') + NCHAR(number) + ' ' 
    FROM (
        SELECT TOP 10000 ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS number
        FROM sys.all_objects s1 CROSS JOIN sys.all_objects s2 
    ) t
    
    -- Build table variable with character entity references defined in HTML 4.0
    -- Reference: http://www.htmlcodetutorial.com/characterentities_famsupp_69.html
    DECLARE @t TABLE (
        name NVARCHAR(25) NOT NULL, 
        unicode_val INT NOT NULL PRIMARY KEY 
    ) 
    
    INSERT @t 
    VALUES
    ('&quot', 34),
    ('&amp', 38),
    ('&apos', 39),
    ('&lt', 60),
    ('&gt', 62),
    ('&nbsp', 160),
    ('&iexcl', 161),
    ('&cent', 162),
    ('&pound', 163),
    ('&curren', 164),
    ('&yen', 165),
    ('&brvbar', 166),
    ('&sect', 167),
    ('&uml', 168),
    ('&copy', 169),
    ('&ordf', 170),
    ('&laquo', 171),
    ('&not', 172),
    ('&shy', 173),
    ('&reg', 174),
    ('&macr', 175),
    ('&deg', 176),
    ('&plusmn', 177),
    ('&sup2', 178),
    ('&sup3', 179),
    ('&acute', 180),
    ('&micro', 181),
    ('&para', 182),
    ('&middot', 183),
    ('&cedil', 184),
    ('&sup1', 185),
    ('&ordm', 186),
    ('&raquo', 187),
    ('&frac14', 188),
    ('&frac12', 189),
    ('&frac34', 190),
    ('&iquest', 191),
    ('&Agrave', 192),
    ('&Aacute', 193),
    ('&Acirc', 194),
    ('&Atilde', 195),
    ('&Auml', 196),
    ('&Aring', 197),
    ('&AElig', 198),
    ('&Ccedil', 199),
    ('&Egrave', 200),
    ('&Eacute', 201),
    ('&Ecirc', 202),
    ('&Euml', 203),
    ('&Igrave', 204),
    ('&Iacute', 205),
    ('&Icirc', 206),
    ('&Iuml', 207),
    ('&ETH', 208),
    ('&Ntilde', 209),
    ('&Ograve', 210),
    ('&Oacute', 211),
    ('&Ocirc', 212),
    ('&Otilde', 213),
    ('&Ouml', 214),
    ('&times', 215),
    ('&Oslash', 216),
    ('&Ugrave', 217),
    ('&Uacute', 218),
    ('&Ucirc', 219),
    ('&Uuml', 220),
    ('&Yacute', 221),
    ('&THORN', 222),
    ('&szlig', 223),
    ('&agrave', 224),
    ('&aacute', 225),
    ('&acirc', 226),
    ('&atilde', 227),
    ('&auml', 228),
    ('&aring', 229),
    ('&aelig', 230),
    ('&ccedil', 231),
    ('&egrave', 232),
    ('&eacute', 233),
    ('&ecirc', 234),
    ('&euml', 235),
    ('&igrave', 236),
    ('&iacute', 237),
    ('&icirc', 238),
    ('&iuml', 239),
    ('&eth', 240),
    ('&ntilde', 241),
    ('&ograve', 242),
    ('&oacute', 243),
    ('&ocirc', 244),
    ('&otilde', 245),
    ('&ouml', 246),
    ('&divide', 247),
    ('&oslash', 248),
    ('&ugrave', 249),
    ('&uacute', 250),
    ('&ucirc', 251),
    ('&uuml', 252),
    ('&yacute', 253),
    ('&thorn', 254),
    ('&yuml', 255),
    ('&OElig', 338),
    ('&oelig', 339),
    ('&Scaron', 352),
    ('&scaron', 353),
    ('&Yuml', 376),
    ('&fnof', 402),
    ('&circ', 710),
    ('&tilde', 732),
    ('&Alpha', 913),
    ('&Beta', 914),
    ('&Gamma', 915),
    ('&Delta', 916),
    ('&Epsilon', 917),
    ('&Zeta', 918),
    ('&Eta', 919),
    ('&Theta', 920),
    ('&Iota', 921),
    ('&Kappa', 922),
    ('&Lambda', 923),
    ('&Mu', 924),
    ('&Nu', 925),
    ('&Xi', 926),
    ('&Omicron', 927),
    ('&Pi', 928),
    ('&Rho', 929),
    ('&Sigma', 931),
    ('&Tau', 932),
    ('&Upsilon', 933),
    ('&Phi', 934),
    ('&Chi', 935),
    ('&Psi', 936),
    ('&Omega', 937),
    ('&alpha', 945),
    ('&beta', 946),
    ('&gamma', 947),
    ('&delta', 948),
    ('&epsilon', 949),
    ('&zeta', 950),
    ('&eta', 951),
    ('&theta', 952),
    ('&iota', 953),
    ('&kappa', 954),
    ('&lambda', 955),
    ('&mu', 956),
    ('&nu', 957),
    ('&xi', 958),
    ('&omicron', 959),
    ('&pi', 960),
    ('&rho', 961),
    ('&sigmaf', 962),
    ('&sigma', 963),
    ('&tau', 964),
    ('&upsilon', 965),
    ('&phi', 966),
    ('&chi', 967),
    ('&psi', 968),
    ('&omega', 969),
    ('&thetasym', 977),
    ('&upsih', 978),
    ('&piv', 982),
    ('&ensp', 8194),
    ('&emsp', 8195),
    ('&thinsp', 8201),
    ('&zwnj', 8204),
    ('&zwj', 8205),
    ('&lrm', 8206),
    ('&rlm', 8207),
    ('&ndash', 8211),
    ('&mdash', 8212),
    ('&lsquo', 8216),
    ('&rsquo', 8217),
    ('&sbquo', 8218),
    ('&ldquo', 8220),
    ('&rdquo', 8221),
    ('&bdquo', 8222),
    ('&dagger', 8224),
    ('&Dagger', 8225),
    ('&bull', 8226),
    ('&hellip', 8230),
    ('&permil', 8240),
    ('&prime', 8242),
    ('&Prime', 8243),
    ('&lsaquo', 8249),
    ('&rsaquo', 8250),
    ('&oline', 8254),
    ('&frasl', 8260),
    ('&euro', 8364),
    ('&image', 8465),
    ('&weierp', 8472),
    ('&real', 8476),
    ('&trade', 8482),
    ('&alefsym', 8501),
    ('&larr', 8592),
    ('&uarr', 8593),
    ('&rarr', 8594),
    ('&darr', 8595),
    ('&harr', 8596),
    ('&crarr', 8629),
    ('&lArr', 8656),
    ('&uArr', 8657),
    ('&rArr', 8658),
    ('&dArr', 8659),
    ('&hArr', 8660),
    ('&forall', 8704),
    ('&part', 8706),
    ('&exist', 8707),
    ('&empty', 8709),
    ('&nabla', 8711),
    ('&isin', 8712),
    ('&notin', 8713),
    ('&ni', 8715),
    ('&prod', 8719),
    ('&sum', 8721),
    ('&minus', 8722),
    ('&lowast', 8727),
    ('&radic', 8730),
    ('&prop', 8733),
    ('&infin', 8734),
    ('&ang', 8736),
    ('&and', 8743),
    ('&or', 8744),
    ('&cap', 8745),
    ('&cup', 8746),
    ('&int', 8747),
    ('&there4', 8756),
    ('&sim', 8764),
    ('&cong', 8773),
    ('&asymp', 8776),
    ('&ne', 8800),
    ('&equiv', 8801),
    ('&le', 8804),
    ('&ge', 8805),
    ('&sub', 8834),
    ('&sup', 8835),
    ('&nsub', 8836),
    ('&sube', 8838),
    ('&supe', 8839),
    ('&oplus', 8853),
    ('&otimes', 8855),
    ('&perp', 8869),
    ('&sdot', 8901),
    ('&lceil', 8968),
    ('&rceil', 8969),
    ('&lfloor', 8970),
    ('&rfloor', 8971),
    ('&lang', 9001),
    ('&rang', 9002),
    ('&loz', 9674),
    ('&spades', 9824),
    ('&clubs', 9827),
    ('&hearts', 9829),
    ('&diams', 9830)
    
    -- Build numbers table to parse the string
    DECLARE @numbers TABLE (number INT NOT NULL PRIMARY KEY) 
    INSERT @numbers
    SELECT TOP (LEN(@unsafe)) ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS number
    FROM sys.all_objects s1 CROSS JOIN sys.all_objects s2
    
    -- Use numbers table to parse each character.
    -- If a match is found in character entity reference table,
    -- then use the safe substitute. Otherwise, if the unicode
    -- value is greater than 128, use &#<unicode char value>.
    -- Finally, use the original character if nothing else
    -- is a match
    SELECT @safe = COALESCE(@safe,'') 
           + COALESCE(name, 
             CASE WHEN UNICODE(SUBSTRING(@unsafe, number, 1)) > 128 THEN '&#' 
              + CONVERT(NVARCHAR(10), UNICODE(SUBSTRING(@unsafe, number, 1))) 
              ELSE SUBSTRING(@unsafe, number, 1) END)
    FROM @numbers 
    LEFT OUTER JOIN @t  
      ON UNICODE(SUBSTRING(@unsafe, number, 1)) = unicode_val
    
    SELECT @safe AS [safe]
    
    Results:  
    ! &quot # $ % &amp &apos ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; 
    &lt = &gt ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 
    [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { 
    | } ~   &#129 &#130 &#131 &#132 &#133 &#134 &#135 &#136 &#137 &#138 
    &#139 &#140 &#141 &#142 &#143 &#144 &#145 &#146 &#147 &#148 &#149 
    &#150 &#151 &#152 &#153 &#154 &#155 &#156 &#157 &#158 &#159 &nbsp 
    &iexcl &cent &pound &curren &yen &brvbar &sect &uml &copy &ordf 
    &laquo &not &shy &reg &macr &deg &plusmn &sup2 &sup3 &acute &micro 
    &para &middot &cedil &sup1 &ordm &raquo &frac14 &frac12 &frac34 
    &iquest &Agrave &Aacute &Acirc &Atilde &Auml &Aring &AElig &Ccedil 
    &Egrave &Eacute &Ecirc &Euml &Igrave &Iacute &Icirc &Iuml &ETH &Ntilde 
    &Ograve &Oacute &Ocirc &Otilde &Ouml &times &Oslash &Ugrave &Uacute 
    &Ucirc &Uuml &Yacute &THORN &szlig &agrave &aacute &acirc &atilde &auml 
    &aring &aelig &ccedil &egrave &eacute &ecirc &euml &igrave &iacute &icirc 
    &iuml &eth &ntilde &ograve &oacute &ocirc &otilde &ouml &divide &oslash 
    &ugrave &uacute &ucirc &uuml &yacute &thorn &yuml &#256 &#257 &#258 &#259 
    &#260 &#261 &#262 &#263 &#264 &#265 &#266...
    
    0 讨论(0)
  • 2021-01-03 13:08
    Declare @Expression nvarchar(max) = 'Y<1'
    
    Select @Expression = (Select @Expression For XML Path(''))
    
    Select @Expression
    
    0 讨论(0)
提交回复
热议问题