The Labs \ Source Viewer \ SSCLI \ System.Text \ CodePageIndex

  1. // ==++==
  2. //
  3. //
  4. // Copyright (c) 2006 Microsoft Corporation. All rights reserved.
  5. //
  6. // The use and distribution terms for this software are contained in the file
  7. // named license.txt, which can be found in the root of this distribution.
  8. // By using this software in any fashion, you are agreeing to be bound by the
  9. // terms of this license.
  10. //
  11. // You must not remove this notice, or any other, from this software.
  12. //
  13. //
  14. // ==--==
  15. namespace System.Text
  16. {
  17.     using System;
  18.     using System.Globalization;
  19.     using System.Runtime.InteropServices;
  20.     using System.Security;
  21.     using System.Collections;
  22.     using System.Runtime.CompilerServices;
  23.     using System.Runtime.Serialization;
  24.     using System.Security.Permissions;
  25.     using Microsoft.Win32.SafeHandles;
  26.    
  27.     // Our input file data structures look like:
  28.     //
  29.     // Header Structure Looks Like:
  30.     // struct NLSPlusHeader
  31.     // {
  32.     // WORD[16] filename; // 32 bytes
  33.     // WORD[4] version; // 8 bytes = 40 // I.e: 3, 2, 0, 0
  34.     // WORD count; // 2 bytes = 42 // Number of code page index's that'll follow
  35.     // }
  36.     //
  37.     // Each code page section looks like:
  38.     // struct NLSCodePageIndex
  39.     // {
  40.     // WORD[16] codePageName; // 32 bytes
  41.     // WORD codePage; // +2 bytes = 34
  42.     // WORD byteCount; // +2 bytes = 36
  43.     // DWORD offset; // +4 bytes = 40 // Bytes from beginning of FILE.
  44.     // }
  45.     //
  46.     // Each code page then has its own header
  47.     // struct NLSCodePage
  48.     // {
  49.     // WORD[16] codePageName; // 32 bytes
  50.     // WORD[4] version; // 8 bytes = 40 // I.e: 3.2.0.0
  51.     // WORD codePage; // 2 bytes = 42
  52.     // WORD byteCount; // 2 bytes = 44 // 1 or 2 byte code page (SBCS or DBCS)
  53.     // WORD unicodeReplace; // 2 bytes = 46 // default replacement unicode character
  54.     // WORD byteReplace; // 2 bytes = 48 // default replacement byte(s)
  55.     // BYTE[] data; // data section
  56.     // }
  57.    
  58.     [Serializable()]
  59.     internal abstract class BaseCodePageEncoding : EncodingNLS, ISerializable
  60.     {
  61.         // Static & Const stuff
  62.         internal const string CODE_PAGE_DATA_FILE_NAME = "codepages.nlp";
  63.         [NonSerialized()]
  64.         protected int dataTableCodePage;
  65.        
  66.         // Variables to help us allocate/mark our memory section correctly
  67.         [NonSerialized()]
  68.         protected bool bFlagDataTable = true;
  69.         [NonSerialized()]
  70.         protected int iExtraBytes = 0;
  71.        
  72.         // Our private unicode to bytes best fit array and visa versa.
  73.         [NonSerialized()]
  74.         protected char[] arrayUnicodeBestFit = null;
  75.         [NonSerialized()]
  76.         protected char[] arrayBytesBestFit = null;
  77.        
  78.         // This is used to help ISCII, EUCJP and ISO2022 figure out they're MlangEncodings
  79.         [NonSerialized()]
  80.         protected bool m_bUseMlangTypeForSerialization = false;
  81.        
  82.         //
  83.         // This is the header for the native data table that we load from CODE_PAGE_DATA_FILE_NAME.
  84.         //
  85.         // Explicit layout is used here since a syntax like char[16] can not be used in sequential layout.
  86.         [StructLayout(LayoutKind.Explicit)]
  87.         unsafe internal struct CodePageDataFileHeader
  88.         {
  89.             [FieldOffset(0)]
  90.             internal char TableName;
  91.             // WORD[16]
  92.             [FieldOffset(32)]
  93.             internal ushort Version;
  94.             // WORD[4]
  95.             [FieldOffset(40)]
  96.             internal short CodePageCount;
  97.             // WORD
  98.             [FieldOffset(42)]
  99.             internal short unused1;
  100.             // Add a unused WORD so that CodePages is aligned with DWORD boundary.
  101.             // Otherwise, 64-bit version will fail.
  102.             [FieldOffset(44)]
  103.             internal CodePageIndex CodePages;
  104.             // Start of code page index
  105.         }
  106.        
  107.         [StructLayout(LayoutKind.Explicit, Pack = 2)]
  108.         unsafe internal struct CodePageIndex
  109.         {
  110.             [FieldOffset(0)]
  111.             internal char CodePageName;
  112.             // WORD[16]
  113.             [FieldOffset(32)]
  114.             internal short CodePage;
  115.             // WORD
  116.             [FieldOffset(34)]
  117.             internal short ByteCount;
  118.             // WORD
  119.             [FieldOffset(36)]
  120.             internal int Offset;
  121.             // DWORD
  122.         }
  123.        
  124.         [StructLayout(LayoutKind.Explicit)]
  125.         unsafe internal struct CodePageHeader
  126.         {
  127.             [FieldOffset(0)]
  128.             internal char CodePageName;
  129.             // WORD[16]
  130.             [FieldOffset(32)]
  131.             internal ushort VersionMajor;
  132.             // WORD
  133.             [FieldOffset(34)]
  134.             internal ushort VersionMinor;
  135.             // WORD
  136.             [FieldOffset(36)]
  137.             internal ushort VersionRevision;
  138.             // WORD
  139.             [FieldOffset(38)]
  140.             internal ushort VersionBuild;
  141.             // WORD
  142.             [FieldOffset(40)]
  143.             internal short CodePage;
  144.             // WORD
  145.             [FieldOffset(42)]
  146.             internal short ByteCount;
  147.             // WORD // 1 or 2 byte code page (SBCS or DBCS)
  148.             [FieldOffset(44)]
  149.             internal char UnicodeReplace;
  150.             // WORD // default replacement unicode character
  151.             [FieldOffset(46)]
  152.             internal ushort ByteReplace;
  153.             // WORD // default replacement bytes
  154.             [FieldOffset(48)]
  155.             internal short FirstDataWord;
  156.             // WORD[]
  157.         }
  158.        
  159.         // Initialize our global stuff
  160.         unsafe static CodePageDataFileHeader* m_pCodePageFileHeader = (CodePageDataFileHeader*)GlobalizationAssembly.GetGlobalizationResourceBytePtr(typeof(CharUnicodeInfo).Assembly, CODE_PAGE_DATA_FILE_NAME);
  161.        
  162.         // Real variables
  163.         [NonSerialized()]
  164.         unsafe protected CodePageHeader* pCodePage = null;
  165.        
  166.         // Safe handle wrapper around section map view
  167.         [NonSerialized()]
  168.         protected SafeViewOfFileHandle safeMemorySectionHandle = null;
  169.        
  170.         // Safe handle wrapper around mapped file handle
  171.         [NonSerialized()]
  172.         protected SafeFileMappingHandle safeFileMappingHandle = null;
  173.        
  174.         internal BaseCodePageEncoding(int codepage) : this(codepage, codepage)
  175.         {
  176.         }
  177.        
  178.         internal BaseCodePageEncoding(int codepage, int dataCodePage) : base(codepage == 0 ? Microsoft.Win32.Win32Native.GetACP() : codepage)
  179.         {
  180.             // Remember number of code page that we'll be using the table for.
  181.             dataTableCodePage = dataCodePage;
  182.             LoadCodePageTables();
  183.         }
  184.        
  185.         // Constructor called by serialization.
  186.         internal BaseCodePageEncoding(SerializationInfo info, StreamingContext context) : base(0)
  187.         {
  188.             // We cannot ever call this, we've proxied ourselved to CodePageEncoding
  189.             throw new ArgumentNullException("this");
  190.         }
  191.        
  192.         // ISerializable implementation
  193.         [SecurityPermissionAttribute(SecurityAction.LinkDemand, Flags = SecurityPermissionFlag.SerializationFormatter)]
  194.         void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
  195.         {
  196.             // Make sure to get teh base stuff too This throws if info is null
  197.             SerializeEncoding(info, context);
  198.             BCLDebug.Assert(info != null, "[BaseCodePageEncoding.GetObjectData] Expected null info to throw");
  199.            
  200.             info.AddValue(m_bUseMlangTypeForSerialization ? "m_maxByteSize" : "maxCharSize", this.IsSingleByte ? 1 : 2);
  201.            
  202.             // Use this class or MLangBaseCodePageEncoding as our deserializer.
  203.             info.SetType(m_bUseMlangTypeForSerialization ? typeof(MLangCodePageEncoding) : typeof(CodePageEncoding));
  204.         }
  205.        
  206.         // We need to load tables for our code page
  207.         unsafe private void LoadCodePageTables()
  208.         {
  209.             CodePageHeader* pCodePage = FindCodePage(dataTableCodePage);
  210.            
  211.             // Make sure we have one
  212.             if (pCodePage == null) {
  213.                 // Didn't have one
  214.                 throw new NotSupportedException(Environment.GetResourceString("NotSupported_NoCodepageData", CodePage));
  215.             }
  216.            
  217.             // Remember our code page
  218.             this.pCodePage = pCodePage;
  219.            
  220.             // We had it, so load it
  221.             LoadManagedCodePage();
  222.         }
  223.        
  224.         // Look up the code page pointer
  225.         unsafe private static CodePageHeader* FindCodePage(int codePage)
  226.         {
  227.             // We'll have to loop through all of the m_pCodePageIndex[] items to find our code page, this isn't
  228.             // binary or anything so its not monsterously fast.
  229.             for (int i = 0; i < m_pCodePageFileHeader->CodePageCount; i++) {
  230.                 CodePageIndex* pCodePageIndex = (&(m_pCodePageFileHeader->CodePages)) + i;
  231.                
  232.                 if (pCodePageIndex->CodePage == codePage) {
  233.                     // Found it!
  234.                     CodePageHeader* pCodePage = (CodePageHeader*)((byte*)m_pCodePageFileHeader + pCodePageIndex->Offset);
  235.                     return pCodePage;
  236.                 }
  237.             }
  238.            
  239.             // Couldn't find it
  240.             return null;
  241.         }
  242.        
  243.         // Get our code page byte count
  244.         unsafe static internal int GetCodePageByteSize(int codePage)
  245.         {
  246.             // Get our code page info
  247.             CodePageHeader* pCodePage = FindCodePage(codePage);
  248.            
  249.             // If null return 0
  250.             if (pCodePage == null)
  251.                 return 0;
  252.            
  253.             BCLDebug.Assert(pCodePage->ByteCount == 1 || pCodePage->ByteCount == 2, "[BaseCodePageEncoding] Code page (" + codePage + ") has invalid byte size (" + pCodePage->ByteCount + ") in table");
  254.             // Return what it says for byte count
  255.             return pCodePage->ByteCount;
  256.         }
  257.        
  258.         // We have a managed code page entry, so load our tables
  259.         unsafe protected abstract void LoadManagedCodePage();
  260.        
  261.         // Allocate memory to load our code page
  262.         unsafe protected byte* GetSharedMemory(int iSize)
  263.         {
  264.             // Build our name
  265.             string strName = GetMemorySectionName();
  266.            
  267.             IntPtr mappedFileHandle;
  268.            
  269.             // This gets shared memory for our map. If its can't, it gives us clean memory.
  270.             byte* pMemorySection = EncodingTable.nativeCreateOpenFileMapping(strName, iSize, out mappedFileHandle);
  271.             BCLDebug.Assert(pMemorySection != null, "[BaseCodePageEncoding.GetSharedMemory] Expected non-null memory section to be opened");
  272.            
  273.             // If that failed, we have to die.
  274.             if (pMemorySection == null)
  275.                 throw new OutOfMemoryException(Environment.GetResourceString("Arg_OutOfMemoryException"));
  276.            
  277.             // if we have null file handle. this means memory was allocated after
  278.             // failing to open the mapped file.
  279.            
  280.             if (mappedFileHandle != IntPtr.Zero) {
  281.                 safeMemorySectionHandle = new SafeViewOfFileHandle((IntPtr)pMemorySection, true);
  282.                 safeFileMappingHandle = new SafeFileMappingHandle(mappedFileHandle, true);
  283.             }
  284.            
  285.             return pMemorySection;
  286.         }
  287.        
  288.         unsafe protected virtual string GetMemorySectionName()
  289.         {
  290.             int iUseCodePage = this.bFlagDataTable ? dataTableCodePage : CodePage;
  291.            
  292.             string strName = String.Format(CultureInfo.InvariantCulture, "NLS_CodePage_{0}_{1}_{2}_{3}_{4}", iUseCodePage, this.pCodePage->VersionMajor, this.pCodePage->VersionMinor, this.pCodePage->VersionRevision, this.pCodePage->VersionBuild);
  293.            
  294.             return strName;
  295.         }
  296.        
  297.         unsafe protected abstract void ReadBestFitTable();
  298.        
  299.         internal override char[] GetBestFitUnicodeToBytesData()
  300.         {
  301.             // Read in our best fit table if necessary
  302.             if (arrayUnicodeBestFit == null)
  303.                 ReadBestFitTable();
  304.            
  305.             BCLDebug.Assert(arrayUnicodeBestFit != null, "[BaseCodePageEncoding.GetBestFitUnicodeToBytesData]Expected non-null arrayUnicodeBestFit");
  306.            
  307.             // Normally we don't have any best fit data.
  308.             return arrayUnicodeBestFit;
  309.         }
  310.        
  311.         internal override char[] GetBestFitBytesToUnicodeData()
  312.         {
  313.             // Read in our best fit table if necessary
  314.             if (arrayUnicodeBestFit == null)
  315.                 ReadBestFitTable();
  316.            
  317.             BCLDebug.Assert(arrayBytesBestFit != null, "[BaseCodePageEncoding.GetBestFitBytesToUnicodeData]Expected non-null arrayBytesBestFit");
  318.            
  319.             // Normally we don't have any best fit data.
  320.             return arrayBytesBestFit;
  321.         }
  322.        
  323.         // During the AppDomain shutdown the Encoding class may already finalized and the memory section
  324.         // is invalid. so we detect that by validating the memory section handle then re-initialize the memory
  325.         // section by calling LoadManagedCodePage() method and eventually the mapped file handle and
  326.         // the memory section pointer will get finalized one more time.
  327.         unsafe internal void CheckMemorySection()
  328.         {
  329.             if (safeMemorySectionHandle != null && safeMemorySectionHandle.DangerousGetHandle() == IntPtr.Zero) {
  330.                 LoadManagedCodePage();
  331.             }
  332.         }
  333.     }
  334. }

Developer Fusion