2013-06-11 10 views
5

Sto lavorando su una funzione con valori di tabella CLR per SQL Server 2008 R2. Ho bisogno di HTMLDecode una stringa in un punto, ma questo è problematico b/c che si basa su System.Web, che non è un assembly di supporto per SQL Server.Decodifica HTML senza System.Web

Qualcuno può pensare a un modo migliore per eseguire la decodifica HTML?

FYI SQL Server 2008 CLR supporta solo fino a .NET 3.5, quindi system.net.webutility non funzionerà.

+1

Basta scrivere voi stessi? –

+0

Grazie per quello ... – Wjdavis5

+0

Potrebbe essere utile: http://www.sqlteam.com/forums/topic.asp?TOPIC_ID=20749, http://stupidcodetricks.blogspot.ca/2008/06/decode-html- stringhe-in-sql-server.html – RobSiklos

risposta

1

Inoltre è possibile utilizzare riflettore per afferrare il codice da WebUtility direttamente (per favore non mi colpa per lo stile di codifica, la sua roba riflessa):

public class WebUtility { 
                                                                                                                                            private static class HtmlEntities { 
    private static string[] _entitiesList = new string[] 
     { 
      "\"-quot", 
      "&-amp", 
      "'-apos", 
      "<-lt", 
      ">-gt", 
      "\u00a0-nbsp", 
      "¡-iexcl", 
      "¢-cent", 
      "£-pound", 
      "¤-curren", 
      "¥-yen", 
      "¦-brvbar", 
      "§-sect", 
      "¨-uml", 
      "©-copy", 
      "ª-ordf", 
      "«-laquo", 
      "¬-not", 
      "­-shy", 
      "®-reg", 
      "¯-macr", 
      "°-deg", 
      "±-plusmn", 
      "²-sup2", 
      "³-sup3", 
      "´-acute", 
      "µ-micro", 
      "¶-para", 
      "·-middot", 
      "¸-cedil", 
      "¹-sup1", 
      "º-ordm", 
      "»-raquo", 
      "¼-frac14", 
      "½-frac12", 
      "¾-frac34", 
      "¿-iquest", 
      "À-Agrave", 
      "Á-Aacute", 
      "Â-Acirc", 
      "Ã-Atilde", 
      "Ä-Auml", 
      "Å-Aring", 
      "Æ-AElig", 
      "Ç-Ccedil", 
      "È-Egrave", 
      "É-Eacute", 
      "Ê-Ecirc", 
      "Ë-Euml", 
      "Ì-Igrave", 
      "Í-Iacute", 
      "Î-Icirc", 
      "Ï-Iuml", 
      "Ð-ETH", 
      "Ñ-Ntilde", 
      "Ò-Ograve", 
      "Ó-Oacute", 
      "Ô-Ocirc", 
      "Õ-Otilde", 
      "Ö-Ouml", 
      "×-times", 
      "Ø-Oslash", 
      "Ù-Ugrave", 
      "Ú-Uacute", 
      "Û-Ucirc", 
      "Ü-Uuml", 
      "Ý-Yacute", 
      "Þ-THORN", 
      "ß-szlig", 
      "à-agrave", 
      "á-aacute", 
      "â-acirc", 
      "ã-atilde", 
      "ä-auml", 
      "å-aring", 
      "æ-aelig", 
      "ç-ccedil", 
      "è-egrave", 
      "é-eacute", 
      "ê-ecirc", 
      "ë-euml", 
      "ì-igrave", 
      "í-iacute", 
      "î-icirc", 
      "ï-iuml", 
      "ð-eth", 
      "ñ-ntilde", 
      "ò-ograve", 
      "ó-oacute", 
      "ô-ocirc", 
      "õ-otilde", 
      "ö-ouml", 
      "÷-divide", 
      "ø-oslash", 
      "ù-ugrave", 
      "ú-uacute", 
      "û-ucirc", 
      "ü-uuml", 
      "ý-yacute", 
      "þ-thorn", 
      "ÿ-yuml", 
      "Œ-OElig", 
      "œ-oelig", 
      "Š-Scaron", 
      "š-scaron", 
      "Ÿ-Yuml", 
      "ƒ-fnof", 
      "ˆ-circ", 
      "˜-tilde", 
      "Α-Alpha", 
      "Β-Beta", 
      "Γ-Gamma", 
      "Δ-Delta", 
      "Ε-Epsilon", 
      "Ζ-Zeta", 
      "Η-Eta", 
      "Θ-Theta", 
      "Ι-Iota", 
      "Κ-Kappa", 
      "Λ-Lambda", 
      "Μ-Mu", 
      "Ν-Nu", 
      "Ξ-Xi", 
      "Ο-Omicron", 
      "Π-Pi", 
      "Ρ-Rho", 
      "Σ-Sigma", 
      "Τ-Tau", 
      "Υ-Upsilon", 
      "Φ-Phi", 
      "Χ-Chi", 
      "Ψ-Psi", 
      "Ω-Omega", 
      "α-alpha", 
      "β-beta", 
      "γ-gamma", 
      "δ-delta", 
      "ε-epsilon", 
      "ζ-zeta", 
      "η-eta", 
      "θ-theta", 
      "ι-iota", 
      "κ-kappa", 
      "λ-lambda", 
      "μ-mu", 
      "ν-nu", 
      "ξ-xi", 
      "ο-omicron", 
      "π-pi", 
      "ρ-rho", 
      "ς-sigmaf", 
      "σ-sigma", 
      "τ-tau", 
      "υ-upsilon", 
      "φ-phi", 
      "χ-chi", 
      "ψ-psi", 
      "ω-omega", 
      "ϑ-thetasym", 
      "ϒ-upsih", 
      "ϖ-piv", 
      "\u2002-ensp", 
      "\u2003-emsp", 
      "\u2009-thinsp", 
      "‌-zwnj", 
      "‍-zwj", 
      "‎-lrm", 
      "‏-rlm", 
      "–-ndash", 
      "—-mdash", 
      "‘-lsquo", 
      "’-rsquo", 
      "‚-sbquo", 
      "“-ldquo", 
      "”-rdquo", 
      "„-bdquo", 
      "†-dagger", 
      "‡-Dagger", 
      "•-bull", 
      "…-hellip", 
      "‰-permil", 
      "′-prime", 
      "″-Prime", 
      "‹-lsaquo", 
      "›-rsaquo", 
      "‾-oline", 
      "⁄-frasl", 
      "€-euro", 
      "ℑ-image", 
      "℘-weierp", 
      "ℜ-real", 
      "™-trade", 
      "ℵ-alefsym", 
      "←-larr", 
      "↑-uarr", 
      "→-rarr", 
      "↓-darr", 
      "↔-harr", 
      "↵-crarr", 
      "⇐-lArr", 
      "⇑-uArr", 
      "⇒-rArr", 
      "⇓-dArr", 
      "⇔-hArr", 
      "∀-forall", 
      "∂-part", 
      "∃-exist", 
      "∅-empty", 
      "∇-nabla", 
      "∈-isin", 
      "∉-notin", 
      "∋-ni", 
      "∏-prod", 
      "∑-sum", 
      "−-minus", 
      "∗-lowast", 
      "√-radic", 
      "∝-prop", 
      "∞-infin", 
      "∠-ang", 
      "∧-and", 
      "∨-or", 
      "∩-cap", 
      "∪-cup", 
      "∫-int", 
      "∴-there4", 
      "∼-sim", 
      "≅-cong", 
      "≈-asymp", 
      "≠-ne", 
      "≡-equiv", 
      "≤-le", 
      "≥-ge", 
      "⊂-sub", 
      "⊃-sup", 
      "⊄-nsub", 
      "⊆-sube", 
      "⊇-supe", 
      "⊕-oplus", 
      "⊗-otimes", 
      "⊥-perp", 
      "⋅-sdot", 
      "⌈-lceil", 
      "⌉-rceil", 
      "⌊-lfloor", 
      "⌋-rfloor", 
      "〈-lang", 
      "〉-rang", 
      "◊-loz", 
      "♠-spades", 
      "♣-clubs", 
      "♥-hearts", 
      "♦-diams" 
     }; 
    private static Dictionary<string, char> _lookupTable = WebUtility.HtmlEntities.GenerateLookupTable(); 
    private static Dictionary<string, char> GenerateLookupTable() { 
    Dictionary<string, char> dictionary = new Dictionary<string, char>(StringComparer.Ordinal); 
    string[] entitiesList = WebUtility.HtmlEntities._entitiesList; 
    for (int i = 0; i < entitiesList.Length; i++) { 
     string text = entitiesList[i]; 
     dictionary.Add(text.Substring(2), text[0]); 
    } 
    return dictionary; 
    } 
    public static char Lookup(string entity) { 
    char result; 
    WebUtility.HtmlEntities._lookupTable.TryGetValue(entity, out result); 
    return result; 
    } 
} 
    private enum UnicodeDecodingConformance { 
    Auto, 
    Strict, 
    Compat, 
    Loose 
    } 

    private static char[] _htmlEntityEndingChars = new char[] { ';', '&' }; 
    private static readonly UnicodeDecodingConformance _htmlDecodeConformance = UnicodeDecodingConformance.Auto; 

    public static string HtmlDecode(string value) { 
    if (string.IsNullOrEmpty(value)) { 
     return value; 
    } 
    if (!WebUtility.StringRequiresHtmlDecoding(value)) { 
     return value; 
    } 
    StringWriter stringWriter = new StringWriter(CultureInfo.InvariantCulture); 
    WebUtility.HtmlDecode(value, stringWriter); 
    return stringWriter.ToString(); 
    } 

    private static bool StringRequiresHtmlDecoding(string s) { 
    if (WebUtility._htmlDecodeConformance == UnicodeDecodingConformance.Compat) { 
     return s.IndexOf('&') >= 0; 
    } 
    for (int i = 0; i < s.Length; i++) { 
     char c = s[i]; 
     if (c == '&' || char.IsSurrogate(c)) { 
     return true; 
     } 
    } 
    return false; 
    } 

    private static void ConvertSmpToUtf16(uint smpChar, out char leadingSurrogate, out char trailingSurrogate) { 
    int num = (int)(smpChar - 65536u); 
    leadingSurrogate = (char)(num/1024 + 55296); 
    trailingSurrogate = (char)(num % 1024 + 56320); 
    } 

    public static void HtmlDecode(string value, TextWriter output) { 
    if (value == null) { 
     return; 
    } 
    if (output == null) { 
     throw new ArgumentNullException("output"); 
    } 
    if (!WebUtility.StringRequiresHtmlDecoding(value)) { 
     output.Write(value); 
     return; 
    } 
    int length = value.Length; 
    int i = 0; 
    while (i < length) { 
     char c = value[i]; 
     if (c != '&') { 
     goto IL_1B6; 
     } 
     int num = value.IndexOfAny(WebUtility._htmlEntityEndingChars, i + 1); 
     if (num <= 0 || value[num] != ';') { 
     goto IL_1B6; 
     } 
     string text = value.Substring(i + 1, num - i - 1); 
     if (text.Length > 1 && text[0] == '#') { 
     uint num2; 
     bool flag; 
     if (text[1] == 'x' || text[1] == 'X') { 
      flag = uint.TryParse(text.Substring(2), NumberStyles.AllowHexSpecifier, NumberFormatInfo.InvariantInfo, out num2); 
     } else { 
      flag = uint.TryParse(text.Substring(1), NumberStyles.Integer, NumberFormatInfo.InvariantInfo, out num2); 
     } 
     if (flag) { 
      switch (WebUtility._htmlDecodeConformance) { 
      case UnicodeDecodingConformance.Strict: 
       flag = (num2 < 55296u || (57343u < num2 && num2 <= 1114111u)); 
       break; 
      case UnicodeDecodingConformance.Compat: 
       flag = (0u < num2 && num2 <= 65535u); 
       break; 
      case UnicodeDecodingConformance.Loose: 
       flag = (num2 <= 1114111u); 
       break; 
      default: 
       flag = false; 
       break; 
      } 
     } 
     if (!flag) { 
      goto IL_1B6; 
     } 
     if (num2 <= 65535u) { 
      output.Write((char)num2); 
     } else { 
      char value2; 
      char value3; 
      WebUtility.ConvertSmpToUtf16(num2, out value2, out value3); 
      output.Write(value2); 
      output.Write(value3); 
     } 
     i = num; 
     } else { 
     i = num; 
     char c2 = WebUtility.HtmlEntities.Lookup(text); 
     if (c2 != '\0') { 
      c = c2; 
      goto IL_1B6; 
     } 
     output.Write('&'); 
     output.Write(text); 
     output.Write(';'); 
     } 
    IL_1BD: 
     i++; 
     continue; 
    IL_1B6: 
     output.Write(c); 
     goto IL_1BD; 
    } 
    } 
} 
+0

Questa è una soluzione parziale, ma un buon inizio. Gli assembly CLR utilizzati in MSSQL non possono contenere proprietà statiche che non sono di sola lettura, pertanto l'analisi del dizionario delle entità e alcune altre proprietà statiche devono essere convertite in equivalenti di sola lettura. – richardtallent

+0

Mi basta JustDecompile per estrarre tutto da Internet System.Web. Funziona come un campione – Wjdavis5