The Labs \ Source Viewer \ SSCLI \ System.Globalization \ HebrewNumberParsingContext

  1. // ==++==
  2. //
  3. //
  4. // Copyright (c) 2006 Microsoft Corporation. All rights reserved.
  5. //
  6. // The use and distribution terms for this software are contained in the file
  7. // named license.txt, which can be found in the root of this distribution.
  8. // By using this software in any fashion, you are agreeing to be bound by the
  9. // terms of this license.
  10. //
  11. // You must not remove this notice, or any other, from this software.
  12. //
  13. //
  14. // ==--==
  15. namespace System.Globalization
  16. {
  17.     using System;
  18.     using System.Text;
  19.    
  20.     ////////////////////////////////////////////////////////////////////////////
  21.     //
  22.     // Used in HebrewNumber.ParseByChar to maintain the context information (
  23.     // the state in the state machine and current Hebrew number values, etc.)
  24.     // when parsing Hebrew number character by character.
  25.     //
  26.     ////////////////////////////////////////////////////////////////////////////
  27.    
  28.     internal struct HebrewNumberParsingContext
  29.     {
  30.         // The current state of the state machine for parsing Hebrew numbers.
  31.         internal HebrewNumber.HS state;
  32.         // The current value of the Hebrew number.
  33.         // The final value is determined when state is FoundEndOfHebrewNumber.
  34.         internal int result;
  35.        
  36.         public HebrewNumberParsingContext(int result)
  37.         {
  38.             // Set the start state of the state machine for parsing Hebrew numbers.
  39.             state = HebrewNumber.HS.Start;
  40.             this.result = 0;
  41.         }
  42.     }
  43.    
  44.     ////////////////////////////////////////////////////////////////////////////
  45.     //
  46.     // Please see ParseByChar() for comments about different states defined here.
  47.     //
  48.     ////////////////////////////////////////////////////////////////////////////
  49.    
  50.     internal enum HebrewNumberParsingState
  51.     {
  52.         InvalidHebrewNumber,
  53.         NotHebrewDigit,
  54.         FoundEndOfHebrewNumber,
  55.         ContinueParsing
  56.     }
  57.    
  58.     ////////////////////////////////////////////////////////////////////////////
  59.     //
  60.     // class HebrewNumber
  61.     //
  62.     // Provides static methods for formatting integer values into
  63.     // Hebrew text and parsing Hebrew number text.
  64.     //
  65.     // Limitations:
  66.     // Parse can only handles value 1 ~ 999.
  67.     // ToString() can only handles 1 ~ 999. If value is greater than 5000,
  68.     // 5000 will be subtracted from the value.
  69.     //
  70.     ////////////////////////////////////////////////////////////////////////////
  71.    
  72.     internal class HebrewNumber
  73.     {
  74.        
  75.         // This class contains only static methods. Add a private ctor so that
  76.         // compiler won't generate a default one for us.
  77.         private HebrewNumber()
  78.         {
  79.         }
  80.        
  81.         ////////////////////////////////////////////////////////////////////////////
  82.         //
  83.         // ToString
  84.         //
  85.         // Converts the given number to Hebrew letters according to the numeric
  86.         // value of each Hebrew letter. Basically, this converts the lunar year
  87.         // and the lunar month to letters.
  88.         //
  89.         // The character of a year is described by three letters of the Hebrew
  90.         // alphabet, the first and third giving, respectively, the days of the
  91.         // weeks on which the New Year occurs and Passover begins, while the
  92.         // second is the initial of the Hebrew word for defective, normal, or
  93.         // complete.
  94.         //
  95.         // Defective Year : Both Heshvan and Kislev are defective (353 or 383 days)
  96.         // Normal Year : Heshvan is defective, Kislev is full (354 or 384 days)
  97.         // Complete Year : Both Heshvan and Kislev are full (355 or 385 days)
  98.         //
  99.         ////////////////////////////////////////////////////////////////////////////
  100.        
  101.         static internal string ToString(int Number)
  102.         {
  103.             char cTens = '\0';
  104.             char cUnits;
  105.             // tens and units chars
  106.             int Hundreds;
  107.             int Tens;
  108.             // hundreds and tens values
  109.             StringBuilder szHebrew = new StringBuilder();
  110.            
  111.            
  112.             //
  113.             // Adjust the number if greater than 5000.
  114.             //
  115.             if (Number > 5000) {
  116.                 Number -= 5000;
  117.             }
  118.            
  119.             BCLDebug.Assert(Number > 0 && Number <= 999, "Number is out of range.");
  120.             ;
  121.            
  122.             //
  123.             // Get the Hundreds.
  124.             //
  125.             Hundreds = Number / 100;
  126.            
  127.             if (Hundreds > 0) {
  128.                 Number -= Hundreds * 100;
  129.                 // \x05e7 = 100
  130.                 // \x05e8 = 200
  131.                 // \x05e9 = 300
  132.                 // \x05ea = 400
  133.                 // If the number is greater than 400, use the multiples of 400.
  134.                 for (int i = 0; i < (Hundreds / 4); i++) {
  135.                     szHebrew.Append('ת');
  136.                 }
  137.                
  138.                 int remains = Hundreds % 4;
  139.                 if (remains > 0) {
  140.                     szHebrew.Append((char)((int)'צ' + remains));
  141.                 }
  142.             }
  143.            
  144.             //
  145.             // Get the Tens.
  146.             //
  147.             Tens = Number / 10;
  148.             Number %= 10;
  149.            
  150.             switch (Tens) {
  151.                 case (0):
  152.                     cTens = '\0';
  153.                     break;
  154.                 case (1):
  155.                     cTens = 'י';
  156.                     // Hebrew Letter Yod
  157.                     break;
  158.                 case (2):
  159.                     cTens = 'כ';
  160.                     // Hebrew Letter Kaf
  161.                     break;
  162.                 case (3):
  163.                     cTens = 'ל';
  164.                     // Hebrew Letter Lamed
  165.                     break;
  166.                 case (4):
  167.                     cTens = 'מ';
  168.                     // Hebrew Letter Mem
  169.                     break;
  170.                 case (5):
  171.                     cTens = 'נ';
  172.                     // Hebrew Letter Nun
  173.                     break;
  174.                 case (6):
  175.                     cTens = 'ס';
  176.                     // Hebrew Letter Samekh
  177.                     break;
  178.                 case (7):
  179.                     cTens = 'ע';
  180.                     // Hebrew Letter Ayin
  181.                     break;
  182.                 case (8):
  183.                     cTens = 'פ';
  184.                     // Hebrew Letter Pe
  185.                     break;
  186.                 case (9):
  187.                     cTens = 'צ';
  188.                     // Hebrew Letter Tsadi
  189.                     break;
  190.             }
  191.            
  192.             //
  193.             // Get the Units.
  194.             //
  195.             cUnits = (char)(Number > 0 ? ((int)'א' + Number - 1) : 0);
  196.            
  197.             // Hebrew Letter He (5)
  198.             if ((cUnits == 'ה') && (cTens == 'י')) {
  199.                 // Hebrew Letter Yod (10)
  200.                 cUnits = 'ו';
  201.                 // Hebrew Letter Vav (6)
  202.                 cTens = 'ט';
  203.                 // Hebrew Letter Tet (9)
  204.             }
  205.            
  206.             // Hebrew Letter Vav (6)
  207.             if ((cUnits == 'ו') && (cTens == 'י')) {
  208.                 // Hebrew Letter Yod (10)
  209.                 cUnits = 'ז';
  210.                 // Hebrew Letter Zayin (7)
  211.                 cTens = 'ט';
  212.                 // Hebrew Letter Tet (9)
  213.             }
  214.            
  215.             //
  216.             // Copy the appropriate info to the given buffer.
  217.             //
  218.            
  219.             if (cTens != '\0') {
  220.                 szHebrew.Append(cTens);
  221.             }
  222.            
  223.             if (cUnits != '\0') {
  224.                 szHebrew.Append(cUnits);
  225.             }
  226.            
  227.             if (szHebrew.Length > 1) {
  228.                 szHebrew.Insert(szHebrew.Length - 1, '"');
  229.             }
  230.             else {
  231.                 szHebrew.Append('\'');
  232.             }
  233.            
  234.             //
  235.             // Return success.
  236.             //
  237.             return (szHebrew.ToString());
  238.         }
  239.        
  240.         ////////////////////////////////////////////////////////////////////////////
  241.         //
  242.         // Token used to tokenize a Hebrew word into tokens so that we can use in the
  243.         // state machine.
  244.         //
  245.         ////////////////////////////////////////////////////////////////////////////
  246.        
  247.         enum HebrewToken
  248.         {
  249.             Invalid = -1,
  250.             Digit400 = 0,
  251.             Digit200_300 = 1,
  252.             Digit100 = 2,
  253.             Digit10 = 3,
  254.             // 10 ~ 90
  255.             Digit1 = 4,
  256.             // 1, 2, 3, 4, 5, 8,
  257.             Digit6_7 = 5,
  258.             Digit7 = 6,
  259.             Digit9 = 7,
  260.             SingleQuote = 8,
  261.             DoubleQuote = 9
  262.         }
  263.        
  264.         ////////////////////////////////////////////////////////////////////////////
  265.         //
  266.         // This class is used to map a token into its Hebrew digit value.
  267.         //
  268.         ////////////////////////////////////////////////////////////////////////////
  269.        
  270.         class HebrewValue
  271.         {
  272.             internal HebrewToken token;
  273.             internal int value;
  274.             internal HebrewValue(HebrewToken token, int value)
  275.             {
  276.                 this.token = token;
  277.                 this.value = value;
  278.             }
  279.         }
  280.        
  281.         //
  282.         // Map a Hebrew character from U+05D0 ~ U+05EA to its digit value.
  283.         // The value is -1 if the Hebrew character does not have a associated value.
  284.         //
  285.         static HebrewValue[] HebrewValues = {new HebrewValue(HebrewToken.Digit1, 1), new HebrewValue(HebrewToken.Digit1, 2), new HebrewValue(HebrewToken.Digit1, 3), new HebrewValue(HebrewToken.Digit1, 4), new HebrewValue(HebrewToken.Digit1, 5), new HebrewValue(HebrewToken.Digit6_7, 6), new HebrewValue(HebrewToken.Digit6_7, 7), new HebrewValue(HebrewToken.Digit1, 8), new HebrewValue(HebrewToken.Digit9, 9), new HebrewValue(HebrewToken.Digit10, 10),
  286.         new HebrewValue(HebrewToken.Invalid, -1), new HebrewValue(HebrewToken.Digit10, 20), new HebrewValue(HebrewToken.Digit10, 30), new HebrewValue(HebrewToken.Invalid, -1), new HebrewValue(HebrewToken.Digit10, 40), new HebrewValue(HebrewToken.Invalid, -1), new HebrewValue(HebrewToken.Digit10, 50), new HebrewValue(HebrewToken.Digit10, 60), new HebrewValue(HebrewToken.Digit10, 70), new HebrewValue(HebrewToken.Invalid, -1),
  287.             // '\x05d0
  288.             // '\x05d1
  289.             // '\x05d2
  290.             // '\x05d3
  291.             // '\x05d4
  292.             // '\x05d5
  293.             // '\x05d6
  294.             // '\x05d7
  295.             // '\x05d8
  296.             // '\x05d9; // Hebrew Letter Yod
  297.             // '\x05da;
  298.             // '\x05db; // Hebrew Letter Kaf
  299.             // '\x05dc; // Hebrew Letter Lamed
  300.             // '\x05dd;
  301.             // '\x05de; // Hebrew Letter Mem
  302.             // '\x05df;
  303.             // '\x05e0; // Hebrew Letter Nun
  304.             // '\x05e1; // Hebrew Letter Samekh
  305.             // '\x05e2; // Hebrew Letter Ayin
  306.             // '\x05e3;
  307.             // '\x05e4; // Hebrew Letter Pe
  308.             // '\x05e5;
  309.             // '\x05e6; // Hebrew Letter Tsadi
  310.             // '\x05e7;
  311.             // '\x05e8;
  312.             // '\x05e9;
  313.             // '\x05ea;
  314.         new HebrewValue(HebrewToken.Digit10, 80), new HebrewValue(HebrewToken.Invalid, -1), new HebrewValue(HebrewToken.Digit10, 90), new HebrewValue(HebrewToken.Digit100, 100), new HebrewValue(HebrewToken.Digit200_300, 200), new HebrewValue(HebrewToken.Digit200_300, 300), new HebrewValue(HebrewToken.Digit400, 400)};
  315.        
  316.         const int minHebrewNumberCh = 1488;
  317.         static char maxHebrewNumberCh = (char)(minHebrewNumberCh + HebrewValues.Length - 1);
  318.        
  319.         ////////////////////////////////////////////////////////////////////////////
  320.         //
  321.         // Hebrew number parsing State
  322.         // The current state and the next token will lead to the next state in the state machine.
  323.         // DQ = Double Quote
  324.         //
  325.         ////////////////////////////////////////////////////////////////////////////
  326.        
  327.         internal enum HS
  328.         {
  329.             _err = -1,
  330.             // an error state
  331.             Start = 0,
  332.             S400 = 1,
  333.             // a Hebrew digit 400
  334.             S400_400 = 2,
  335.             // Two Hebrew digit 400
  336.             S400_X00 = 3,
  337.             // Two Hebrew digit 400 and followed by 100
  338.             S400_X0 = 4,
  339.             // Hebrew digit 400 and followed by 10 ~ 90
  340.             X00_DQ = 5,
  341.             // A hundred number and followed by a double quote.
  342.             S400_X00_X0 = 6,
  343.             X0_DQ = 7,
  344.             // A two-digit number and followed by a double quote.
  345.             X = 8,
  346.             // A single digit Hebrew number.
  347.             X0 = 9,
  348.             // A two-digit Hebrew number
  349.             X00 = 10,
  350.             // A three-digit Hebrew number
  351.             S400_DQ = 11,
  352.             // A Hebrew digit 400 and followed by a double quote.
  353.             S400_400_DQ = 12,
  354.             S400_400_100 = 13,
  355.             S9 = 14,
  356.             // Hebrew digit 9
  357.             X00_S9 = 15,
  358.             // A hundered number and followed by a digit 9
  359.             S9_DQ = 16,
  360.             // Hebrew digit 9 and followed by a double quote
  361.             END = 100
  362.             // A terminial state is reached.
  363.         }
  364.        
  365.         //
  366.         // The state machine for Hebrew number pasing.
  367.         //
  368.         static HS[][] m_numberPasingState = {new HS[] {HS.S400, HS.X00, HS.X00, HS.X0, HS.X, HS.X, HS.X, HS.S9, HS._err, HS._err
  369.         }, new HS[] {HS.S400_400, HS.S400_X00, HS.S400_X00, HS.S400_X0, HS._err, HS._err, HS._err, HS.X00_S9, HS.END, HS.S400_DQ
  370.         }, new HS[] {HS._err, HS._err, HS.S400_400_100, HS.S400_X0, HS._err, HS._err, HS._err, HS.X00_S9, HS._err, HS.S400_400_DQ
  371.         }, new HS[] {HS._err, HS._err, HS._err, HS.S400_X00_X0, HS._err, HS._err, HS._err, HS.X00_S9, HS._err, HS.X00_DQ
  372.         }, new HS[] {HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS.X0_DQ
  373.         }, new HS[] {HS._err, HS._err, HS._err, HS.END, HS.END, HS.END, HS.END, HS.END, HS._err, HS._err
  374.         }, new HS[] {HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS.X0_DQ
  375.         }, new HS[] {HS._err, HS._err, HS._err, HS._err, HS.END, HS.END, HS.END, HS.END, HS._err, HS._err
  376.         }, new HS[] {HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS.END, HS._err
  377.         }, new HS[] {HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS.END, HS.X0_DQ
  378.         },
  379.         new HS[] {HS._err, HS._err, HS._err, HS.S400_X0, HS._err, HS._err, HS._err, HS.X00_S9, HS.END, HS.X00_DQ
  380.         }, new HS[] {HS.END, HS.END, HS.END, HS.END, HS.END, HS.END, HS.END, HS.END, HS._err, HS._err
  381.         }, new HS[] {HS._err, HS._err, HS.END, HS.END, HS.END, HS.END, HS.END, HS.END, HS._err, HS._err
  382.         }, new HS[] {HS._err, HS._err, HS._err, HS.S400_X00_X0, HS._err, HS._err, HS._err, HS.X00_S9, HS._err, HS.X00_DQ
  383.         }, new HS[] {HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS.END, HS.S9_DQ
  384.         }, new HS[] {HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS.S9_DQ
  385.         }, new HS[] {HS._err, HS._err, HS._err, HS._err, HS._err, HS.END, HS.END, HS._err, HS._err, HS._err
  386.             // 400 300/200 100 90~10 8~1 6, 7, 9, ' "
  387.             /* 0 */            /* 1: S400 */            /* 2: S400_400 */            /* 3: S400_X00 */            /* 4: S400_X0 */            /* 5: X00_DQ */            /* 6: S400_X00_X0 */            /* 7: X0_DQ */            /* 8: X */            /* 9: X0 */            /* 10: X00 */            /* 11: S400_DQ */            /* 12: S400_400_DQ*/            /* 13: S400_400_100*/            /* 14: S9 */            /* 15: X00_S9 */            /* 16: S9_DQ */        }};
  388.        
  389.         ////////////////////////////////////////////////////////////////////////
  390.         //
  391.         // Actions:
  392.         // Parse a Hebrew number string.
  393.         // Returns:
  394.         // -1 if this is not a valid Hebrew string.
  395.         // Otherwise, a positive number is returned.
  396.         // Limitations:
  397.         // The state machine only parses Hebrew number from 1 ~ 999.
  398.         //
  399.         ////////////////////////////////////////////////////////////////////////
  400. /*
  401.         internal static int Parse(String str) {
  402.             BCLDebug.Assert(str != null, "str != null");
  403.             HebrewNumberParsingContext context = new HebrewNumberParsingContext();
  404.            
  405.             HebrewNumberParsingState result = 0;
  406.             int i;
  407.             for (i = 0; i < str.Length; i++) {
  408.                 result = ParseByChar(str[i], context);
  409.                 if (result == HebrewNumberParsingState.NotHebrewDigit || result == HebrewNumberParsingState.InvalidHebrewNumber) {
  410.                     return (-1);
  411.                 }
  412.                 if (result == HebrewNumberParsingState.FoundEndOfHebrewNumber) {
  413.                     // Reach the terminal state.
  414.                     break;
  415.                 }
  416.             }
  417.             if (result == HebrewNumberParsingState.FoundEndOfHebrewNumber && i == str.Length - 1) {
  418.                 return (context.result);
  419.             }
  420.             // There are still characters left in the string or a terminal state is not reached.
  421.             return (-1);
  422.         }
  423.         */       
  424.        
  425.         ////////////////////////////////////////////////////////////////////////
  426.         //
  427.         // Actions:
  428.         // Parse the Hebrew number by passing one character at a time.
  429.         // The state between characters are maintained at HebrewNumberPasingContext.
  430.         // Returns:
  431.         // Return a enum of HebrewNumberParsingState.
  432.         // NotHebrewDigit: The specified ch is not a valid Hebrew digit.
  433.         // InvalidHebrewNumber: After parsing the specified ch, it will lead into
  434.         // an invalid Hebrew number text.
  435.         // FoundEndOfHebrewNumber: A terminal state is reached. This means that
  436.         // we find a valid Hebrew number text after the specified ch is parsed.
  437.         // ContinueParsing: The specified ch is a valid Hebrew digit, and
  438.         // it will lead into a valid state in the state machine, we should
  439.         // continue to parse incoming characters.
  440.         //
  441.         ////////////////////////////////////////////////////////////////////////
  442.        
  443.         static internal HebrewNumberParsingState ParseByChar(char ch, ref HebrewNumberParsingContext context)
  444.         {
  445.             HebrewToken token;
  446.             if (ch == '\'') {
  447.                 token = HebrewToken.SingleQuote;
  448.             }
  449.             else if (ch == '"') {
  450.                 token = HebrewToken.DoubleQuote;
  451.             }
  452.             else {
  453.                 int index = (int)ch - minHebrewNumberCh;
  454.                 if (index >= 0 && index < HebrewValues.Length) {
  455.                     token = HebrewValues[index].token;
  456.                     if (token == HebrewToken.Invalid) {
  457.                         return (HebrewNumberParsingState.NotHebrewDigit);
  458.                     }
  459.                     context.result += HebrewValues[index].value;
  460.                 }
  461.                 else {
  462.                     // Not in valid Hebrew digit range.
  463.                     return (HebrewNumberParsingState.NotHebrewDigit);
  464.                 }
  465.             }
  466.             context.state = m_numberPasingState[(int)context.state][(int)token];
  467.             if (context.state == HS._err) {
  468.                 // Invalid Hebrew state. This indicates an incorrect Hebrew number.
  469.                 return (HebrewNumberParsingState.InvalidHebrewNumber);
  470.             }
  471.             if (context.state == HS.END) {
  472.                 // Reach a terminal state.
  473.                 return (HebrewNumberParsingState.FoundEndOfHebrewNumber);
  474.             }
  475.             // We should continue to parse.
  476.             return (HebrewNumberParsingState.ContinueParsing);
  477.         }
  478.        
  479.         ////////////////////////////////////////////////////////////////////////
  480.         //
  481.         // Actions:
  482.         // Check if the ch is a valid Hebrew number digit.
  483.         // This function will return true if the specified char is a legal Hebrew
  484.         // digit character, single quote, or double quote.
  485.         // Returns:
  486.         // true if the specified character is a valid Hebrew number character.
  487.         //
  488.         ////////////////////////////////////////////////////////////////////////
  489.        
  490.         static internal bool IsDigit(char ch)
  491.         {
  492.             if (ch >= minHebrewNumberCh && ch <= maxHebrewNumberCh) {
  493.                 return (HebrewValues[ch - minHebrewNumberCh].value >= 0);
  494.             }
  495.             return (ch == '\'' || ch == '"');
  496.         }
  497.        
  498.     }
  499. }

Developer Fusion