The Labs \ Source Viewer \ SSCLI \ System.Text \ NormalizationForm

  1. // ==++==
  2. //
  3. //
  4. // Copyright (c) 2006 Microsoft Corporation. All rights reserved.
  5. //
  6. // The use and distribution terms for this software are contained in the file
  7. // named license.txt, which can be found in the root of this distribution.
  8. // By using this software in any fashion, you are agreeing to be bound by the
  9. // terms of this license.
  10. //
  11. // You must not remove this notice, or any other, from this software.
  12. //
  13. //
  14. // ==--==
  15. namespace System.Text
  16. {
  17.     using System;
  18.     using System.Globalization;
  19.     using System.Text;
  20.     using System.Runtime.CompilerServices;
  21.    
  22.     // This is the enumeration for Normalization Forms
  23.     [System.Runtime.InteropServices.ComVisible(true)]
  24.     public enum NormalizationForm
  25.     {
  26.         FormC = 1,
  27.         FormD = 2,
  28.         FormKC = 5,
  29.         FormKD = 6
  30.     }
  31.    
  32.     internal enum ExtendedNormalizationForms
  33.     {
  34.         FormC = 1,
  35.         FormD = 2,
  36.         FormKC = 5,
  37.         FormKD = 6,
  38.         FormIdna = 13,
  39.         FormCDisallowUnassigned = 257,
  40.         FormDDisallowUnassigned = 258,
  41.         FormKCDisallowUnassigned = 261,
  42.         FormKDDisallowUnassigned = 262,
  43.         FormIdnaDisallowUnassigned = 269
  44.     }
  45.    
  46.     // This internal class wraps up our normalization behavior
  47.    
  48.     internal class Normalization
  49.     {
  50.         private static Normalization NFC;
  51.         private static Normalization NFD;
  52.         private static Normalization NFKC;
  53.         private static Normalization NFKD;
  54.         private static Normalization IDNA;
  55.         private static Normalization NFCDisallowUnassigned;
  56.         private static Normalization NFDDisallowUnassigned;
  57.         private static Normalization NFKCDisallowUnassigned;
  58.         private static Normalization NFKDDisallowUnassigned;
  59.         private static Normalization IDNADisallowUnassigned;
  60.        
  61.         private NormalizationForm normalizationForm;
  62.        
  63.         // These are error codes we get back from the Normalization DLL
  64.         private const int ERROR_SUCCESS = 0;
  65.         private const int ERROR_NOT_ENOUGH_MEMORY = 8;
  66.         private const int ERROR_INVALID_PARAMETER = 87;
  67.         private const int ERROR_INSUFFICIENT_BUFFER = 122;
  68.         private const int ERROR_NO_UNICODE_TRANSLATION = 1113;
  69.        
  70.         unsafe internal Normalization(NormalizationForm form, string strDataFile)
  71.         {
  72.             // Remember which form we are
  73.             this.normalizationForm = form;
  74.             // Load the DLL
  75.             if (!nativeLoadNormalizationDLL()) {
  76.                 // Unable to load the normalization DLL!
  77.                 throw new ArgumentException(Environment.GetResourceString("Argument_InvalidNormalizationForm"));
  78.             }
  79.            
  80.             // Tell the DLL where to find our data
  81.             byte* pTables = GlobalizationAssembly.GetGlobalizationResourceBytePtr(typeof(Normalization).Assembly, strDataFile);
  82.             if (pTables == null) {
  83.                 // Unable to load the specified normalizationForm,
  84.                 // tables not loaded from file
  85.                 throw new ArgumentException(Environment.GetResourceString("Argument_InvalidNormalizationForm"));
  86.             }
  87.            
  88.             // All we have to do is let the .dll know how to load it, then
  89.             // we can ignore the returned pointer.
  90.             byte* objNorm = nativeNormalizationInitNormalization(form, pTables);
  91.             if (objNorm == null) {
  92.                 // Unable to load the specified normalizationForm
  93.                 // native library class not initialized correctly
  94.                 throw new OutOfMemoryException(Environment.GetResourceString("Arg_OutOfMemoryException"));
  95.             }
  96.         }
  97.        
  98.         static internal Normalization GetNormalization(NormalizationForm form)
  99.         {
  100.             switch ((ExtendedNormalizationForms)form) {
  101.                 case ExtendedNormalizationForms.FormC:
  102.                     return GetFormC();
  103.                 case ExtendedNormalizationForms.FormD:
  104.                     return GetFormD();
  105.                 case ExtendedNormalizationForms.FormKC:
  106.                     return GetFormKC();
  107.                 case ExtendedNormalizationForms.FormKD:
  108.                     return GetFormKD();
  109.                 case ExtendedNormalizationForms.FormIdna:
  110.                     return GetFormIDNA();
  111.                 case ExtendedNormalizationForms.FormCDisallowUnassigned:
  112.                     return GetFormCDisallowUnassigned();
  113.                 case ExtendedNormalizationForms.FormDDisallowUnassigned:
  114.                     return GetFormDDisallowUnassigned();
  115.                 case ExtendedNormalizationForms.FormKCDisallowUnassigned:
  116.                     return GetFormKCDisallowUnassigned();
  117.                 case ExtendedNormalizationForms.FormKDDisallowUnassigned:
  118.                     return GetFormKDDisallowUnassigned();
  119.                 case ExtendedNormalizationForms.FormIdnaDisallowUnassigned:
  120.                     return GetFormIDNADisallowUnassigned();
  121.             }
  122.            
  123.             // They were supposed to have a form that we know about!
  124.             throw new ArgumentException(Environment.GetResourceString("Argument_InvalidNormalizationForm"));
  125.         }
  126.        
  127.         static internal Normalization GetFormC()
  128.         {
  129.             if (NFC != null)
  130.                 return NFC;
  131.            
  132.             NFC = new Normalization(NormalizationForm.FormC, "normnfc.nlp");
  133.             return NFC;
  134.         }
  135.        
  136.         static internal Normalization GetFormD()
  137.         {
  138.             if (NFD != null)
  139.                 return NFD;
  140.            
  141.             NFD = new Normalization(NormalizationForm.FormD, "normnfd.nlp");
  142.             return NFD;
  143.         }
  144.        
  145.         static internal Normalization GetFormKC()
  146.         {
  147.             if (NFKC != null)
  148.                 return NFKC;
  149.            
  150.             NFKC = new Normalization(NormalizationForm.FormKC, "normnfkc.nlp");
  151.             return NFKC;
  152.         }
  153.        
  154.         static internal Normalization GetFormKD()
  155.         {
  156.             if (NFKD != null)
  157.                 return NFKD;
  158.            
  159.             NFKD = new Normalization(NormalizationForm.FormKD, "normnfkd.nlp");
  160.             return NFKD;
  161.         }
  162.        
  163.         static internal Normalization GetFormIDNA()
  164.         {
  165.             if (IDNA != null)
  166.                 return IDNA;
  167.            
  168.             IDNA = new Normalization((NormalizationForm)ExtendedNormalizationForms.FormIdna, "normidna.nlp");
  169.             return IDNA;
  170.         }
  171.        
  172.         static internal Normalization GetFormCDisallowUnassigned()
  173.         {
  174.             if (NFCDisallowUnassigned != null)
  175.                 return NFCDisallowUnassigned;
  176.            
  177.             NFCDisallowUnassigned = new Normalization((NormalizationForm)ExtendedNormalizationForms.FormCDisallowUnassigned, "normnfc.nlp");
  178.             return NFCDisallowUnassigned;
  179.         }
  180.        
  181.         static internal Normalization GetFormDDisallowUnassigned()
  182.         {
  183.             if (NFDDisallowUnassigned != null)
  184.                 return NFDDisallowUnassigned;
  185.            
  186.             NFDDisallowUnassigned = new Normalization((NormalizationForm)ExtendedNormalizationForms.FormDDisallowUnassigned, "normnfd.nlp");
  187.             return NFDDisallowUnassigned;
  188.         }
  189.        
  190.         static internal Normalization GetFormKCDisallowUnassigned()
  191.         {
  192.             if (NFKCDisallowUnassigned != null)
  193.                 return NFKCDisallowUnassigned;
  194.            
  195.             NFKCDisallowUnassigned = new Normalization((NormalizationForm)ExtendedNormalizationForms.FormKCDisallowUnassigned, "normnfkc.nlp");
  196.             return NFKCDisallowUnassigned;
  197.         }
  198.        
  199.         static internal Normalization GetFormKDDisallowUnassigned()
  200.         {
  201.             if (NFKDDisallowUnassigned != null)
  202.                 return NFKDDisallowUnassigned;
  203.            
  204.             NFKDDisallowUnassigned = new Normalization((NormalizationForm)ExtendedNormalizationForms.FormKDDisallowUnassigned, "normnfkd.nlp");
  205.             return NFKDDisallowUnassigned;
  206.         }
  207.        
  208.         static internal Normalization GetFormIDNADisallowUnassigned()
  209.         {
  210.             if (IDNADisallowUnassigned != null)
  211.                 return IDNADisallowUnassigned;
  212.            
  213.             IDNADisallowUnassigned = new Normalization((NormalizationForm)ExtendedNormalizationForms.FormIdnaDisallowUnassigned, "normidna.nlp");
  214.             return IDNADisallowUnassigned;
  215.         }
  216.        
  217.         static internal bool IsNormalized(string strInput, NormalizationForm normForm)
  218.         {
  219.             return GetNormalization(normForm).IsNormalized(strInput);
  220.         }
  221.        
  222.         private bool IsNormalized(string strInput)
  223.         {
  224.             if (strInput == null)
  225.                 throw new ArgumentNullException(Environment.GetResourceString("ArgumentNull_String"), "strInput");
  226.            
  227.             int iError = ERROR_SUCCESS;
  228.             int iTest = nativeNormalizationIsNormalizedString(normalizationForm, ref iError, strInput, strInput.Length);
  229.            
  230.             switch (iError) {
  231.                 case ERROR_SUCCESS:
  232.                     // Success doesn't need to do anything
  233.                     break;
  234.                 case ERROR_NO_UNICODE_TRANSLATION:
  235.                    
  236.                     // Do appropriate stuff for the individual errors:
  237.                     // Only possible value here is ERROR_NO_UNICODE_TRANSLATION
  238.                     throw new ArgumentException(Environment.GetResourceString("Argument_InvalidCharSequenceNoIndex"), "strInput");
  239.                     break;
  240.                 case ERROR_NOT_ENOUGH_MEMORY:
  241.                     throw new OutOfMemoryException(Environment.GetResourceString("Arg_OutOfMemoryException"));
  242.                     break;
  243.                 default:
  244.                     throw new InvalidOperationException(Environment.GetResourceString("UnknownError_Num", iError));
  245.                     break;
  246.             }
  247.            
  248.             // Bit 1 is true, 0 is false from our return value.
  249.             return ((iTest & 1) == 1);
  250.         }
  251.        
  252.         static internal string Normalize(string strInput, NormalizationForm normForm)
  253.         {
  254.             return GetNormalization(normForm).Normalize(strInput);
  255.         }
  256.        
  257.         internal string Normalize(string strInput)
  258.         {
  259.             if (strInput == null)
  260.                 throw new ArgumentNullException("strInput", Environment.GetResourceString("ArgumentNull_String"));
  261.            
  262.             // Guess our buffer size first
  263.             int iLength = GuessLength(strInput);
  264.            
  265.             // Don't break for empty strings (only possible for D & KD and not really possible at that)
  266.             if (iLength == 0)
  267.                 return String.Empty;
  268.            
  269.             // Someplace to stick our buffer
  270.             char[] cBuffer = null;
  271.            
  272.             int iError = ERROR_INSUFFICIENT_BUFFER;
  273.             while (iError == ERROR_INSUFFICIENT_BUFFER) {
  274.                 // (re)allocation buffer and normalize string
  275.                 cBuffer = new char[iLength];
  276.                 iLength = nativeNormalizationNormalizeString(normalizationForm, ref iError, strInput, strInput.Length, cBuffer, cBuffer.Length);
  277.                
  278.                 // Could have an error (actually it'd be quite hard to have an error here)
  279.                 if (iError != ERROR_SUCCESS) {
  280.                     switch (iError) {
  281.                         case ERROR_INSUFFICIENT_BUFFER:
  282.                             // Do appropriate stuff for the individual errors:
  283.                             BCLDebug.Assert(iLength > cBuffer.Length, "Buffer overflow should have iLength > cBuffer.Length");
  284.                             continue;
  285.                         case ERROR_NO_UNICODE_TRANSLATION:
  286.                             // Illegal code point or order found. Ie: FFFE or D800 D800, etc.
  287.                             throw new ArgumentException(Environment.GetResourceString("Argument_InvalidCharSequence", iLength), "strInput");
  288.                             break;
  289.                         case ERROR_NOT_ENOUGH_MEMORY:
  290.                             throw new OutOfMemoryException(Environment.GetResourceString("Arg_OutOfMemoryException"));
  291.                             break;
  292.                         case ERROR_INVALID_PARAMETER:
  293.                         default:
  294.                             // Shouldn't have invalid parameters here unless we have a bug, drop through...
  295.                             // We shouldn't get here...
  296.                             throw new InvalidOperationException(Environment.GetResourceString("UnknownError_Num", iError));
  297.                             break;
  298.                     }
  299.                 }
  300.             }
  301.            
  302.             // Copy our buffer into our new string, which will be the appropriate size
  303.             string strReturn = new string(cBuffer, 0, iLength);
  304.            
  305.             // Return our output string
  306.             return strReturn;
  307.         }
  308.        
  309.         internal int GuessLength(string strInput)
  310.         {
  311.             if (strInput == null)
  312.                 throw new ArgumentNullException("strInput", Environment.GetResourceString("ArgumentNull_String"));
  313.            
  314.             // Get our guess
  315.             int iError = 0;
  316.             int iGuess = nativeNormalizationNormalizeString(normalizationForm, ref iError, strInput, strInput.Length, null, 0);
  317.            
  318.             // Could have an error (actually it'd be quite hard to have an error here)
  319.             BCLDebug.Assert(iError == ERROR_SUCCESS, "GuessLength() shouldn't return errors.");
  320.             if (iError != ERROR_SUCCESS) {
  321.                 // We shouldn't really be able to get here..., guessing length is
  322.                 // a trivial math function...
  323.                 // Can't really be Out of Memory, but just in case:
  324.                 if (iError == ERROR_NOT_ENOUGH_MEMORY)
  325.                     throw new OutOfMemoryException(Environment.GetResourceString("Arg_OutOfMemoryException"));
  326.                
  327.                 // Who knows what happened? Not us!
  328.                 throw new InvalidOperationException(Environment.GetResourceString("UnknownError_Num", iError));
  329.             }
  330.            
  331.             // Well, we guessed it
  332.             return iGuess;
  333.         }
  334.        
  335.         [MethodImplAttribute(MethodImplOptions.InternalCall)]
  336.         unsafe private static extern bool nativeLoadNormalizationDLL();
  337.        
  338.         [MethodImplAttribute(MethodImplOptions.InternalCall)]
  339.         unsafe private static extern int nativeNormalizationNormalizeString(NormalizationForm NormForm, ref int iError, string lpSrcString, int cwSrcLength, char[] lpDstString, int cwDstLength);
  340.        
  341.         [MethodImplAttribute(MethodImplOptions.InternalCall)]
  342.         unsafe private static extern int nativeNormalizationIsNormalizedString(NormalizationForm NormForm, ref int iError, string lpString, int cwLength);
  343.        
  344.         [MethodImplAttribute(MethodImplOptions.InternalCall)]
  345.         unsafe private static extern byte* nativeNormalizationInitNormalization(NormalizationForm NormForm, byte* pTableData);
  346.        
  347.     }
  348. }

Developer Fusion