The Labs \ Source Viewer \ SSCLI \ System.Text.RegularExpressions \ Match

  1. //------------------------------------------------------------------------------
  2. // <copyright file="RegexMatch.cs" company="Microsoft">
  3. //
  4. // Copyright (c) 2006 Microsoft Corporation. All rights reserved.
  5. //
  6. // The use and distribution terms for this software are contained in the file
  7. // named license.txt, which can be found in the root of this distribution.
  8. // By using this software in any fashion, you are agreeing to be bound by the
  9. // terms of this license.
  10. //
  11. // You must not remove this notice, or any other, from this software.
  12. //
  13. // </copyright>
  14. //------------------------------------------------------------------------------
  15. // Match is the result class for a regex search.
  16. // It returns the location, length, and substring for
  17. // the entire match as well as every captured group.
  18. // Match is also used during the search to keep track of each capture for each group. This is
  19. // done using the "_matches" array. _matches[x] represents an array of the captures for group x.
  20. // This array consists of start and length pairs, and may have empty entries at the end. _matchcount[x]
  21. // stores how many captures a group has. Note that _matchcount[x]*2 is the length of all the valid
  22. // values in _matches. _matchcount[x]*2-2 is the Start of the last capture, and _matchcount[x]*2-1 is the
  23. // Length of the last capture
  24. //
  25. // For example, if group 2 has one capture starting at position 4 with length 6,
  26. // _matchcount[2] == 1
  27. // _matches[2][0] == 4
  28. // _matches[2][1] == 6
  29. //
  30. // Values in the _matches array can also be negative. This happens when using the balanced match
  31. // construct, "(?<start-end>...)". When the "end" group matches, a capture is added for both the "start"
  32. // and "end" groups. The capture added for "start" receives the negative values, and these values point to
  33. // the next capture to be balanced. They do NOT point to the capture that "end" just balanced out. The negative
  34. // values are indices into the _matches array transformed by the formula -3-x. This formula also untransforms.
  35. //
  36. namespace System.Text.RegularExpressions
  37. {
  38.    
  39.     using System.Collections;
  40.     using System.Diagnostics;
  41.     using System.Security.Permissions;
  42.     using System.Globalization;
  43.    
  44.     /// <devdoc>
  45.     /// <para>
  46.     /// Represents
  47.     /// the results from a single regular expression match.
  48.     /// </para>
  49.     /// </devdoc>
  50.     [Serializable()]
  51.     public class Match : Group
  52.     {
  53.         static internal Match _empty = new Match(null, 1, String.Empty, 0, 0, 0);
  54.         internal GroupCollection _groupcoll;
  55.        
  56.         // input to the match
  57.         internal Regex _regex;
  58.         internal int _textbeg;
  59.         internal int _textpos;
  60.         internal int _textend;
  61.         internal int _textstart;
  62.        
  63.         // output from the match
  64.         internal int[][] _matches;
  65.         internal int[] _matchcount;
  66.         internal bool _balancing;
  67.         // whether we've done any balancing with this match. If we
  68.         // have done balancing, we'll need to do extra work in Tidy().
  69.         /// <devdoc>
  70.         /// <para>
  71.         /// Returns an empty Match object.
  72.         /// </para>
  73.         /// </devdoc>
  74.         public static Match Empty {
  75.             get { return _empty; }
  76.         }
  77.        
  78. /*
  79.         * Nonpublic constructor
  80.         */       
  81.        
  82.         internal Match(Regex regex, int capcount, string text, int begpos, int len, int startpos) : base(text, new int[2], 0)
  83.         {
  84.            
  85.             _regex = regex;
  86.             _matchcount = new int[capcount];
  87.            
  88.             _matches = new int[capcount][];
  89.             _matches[0] = _caps;
  90.             _textbeg = begpos;
  91.             _textend = begpos + len;
  92.             _textstart = startpos;
  93.             _balancing = false;
  94.            
  95.             // No need for an exception here. This is only called internally, so we'll use an Assert instead
  96.             System.Diagnostics.Debug.Assert(!(_textbeg < 0 || _textstart < _textbeg || _textend < _textstart || _text.Length < _textend), "The parameters are out of range.");
  97.            
  98.         }
  99.        
  100. /*
  101.         * Nonpublic set-text method
  102.         */       
  103.         internal virtual void Reset(Regex regex, string text, int textbeg, int textend, int textstart)
  104.         {
  105.             _regex = regex;
  106.             _text = text;
  107.             _textbeg = textbeg;
  108.             _textend = textend;
  109.             _textstart = textstart;
  110.            
  111.             for (int i = 0; i < _matchcount.Length; i++) {
  112.                 _matchcount[i] = 0;
  113.             }
  114.            
  115.             _balancing = false;
  116.         }
  117.        
  118.         /// <devdoc>
  119.         /// <para>[To be supplied.]</para>
  120.         /// </devdoc>
  121.         public virtual GroupCollection Groups {
  122.             get {
  123.                 if (_groupcoll == null)
  124.                     _groupcoll = new GroupCollection(this, null);
  125.                
  126.                 return _groupcoll;
  127.             }
  128.         }
  129.        
  130. /*
  131.         * Returns the next match
  132.         */       
  133.         /// <devdoc>
  134.         /// <para>Returns a new Match with the results for the next match, starting
  135.         /// at the position at which the last match ended (at the character beyond the last
  136.         /// matched character).</para>
  137.         /// </devdoc>
  138.         public Match NextMatch()
  139.         {
  140.             if (_regex == null)
  141.                 return this;
  142.            
  143.             return _regex.Run(false, _length, _text, _textbeg, _textend - _textbeg, _textpos);
  144.         }
  145.        
  146.        
  147. /*
  148.         * Return the result string (using the replacement pattern)
  149.         */       
  150.         /// <devdoc>
  151.         /// <para>
  152.         /// Returns the expansion of the passed replacement pattern. For
  153.         /// example, if the replacement pattern is ?$1$2?, Result returns the concatenation
  154.         /// of Group(1).ToString() and Group(2).ToString().
  155.         /// </para>
  156.         /// </devdoc>
  157.         public virtual string Result(string replacement)
  158.         {
  159.             RegexReplacement repl;
  160.            
  161.             if (replacement == null)
  162.                 throw new ArgumentNullException("replacement");
  163.            
  164.             if (_regex == null)
  165.                 throw new NotSupportedException(SR.GetString(SR.NoResultOnFailed));
  166.            
  167.             repl = (RegexReplacement)_regex.replref.Get();
  168.            
  169.             if (repl == null || !repl.Pattern.Equals(replacement)) {
  170.                 repl = RegexParser.ParseReplacement(replacement, _regex.caps, _regex.capsize, _regex.capnames, _regex.roptions);
  171.                 _regex.replref.Cache(repl);
  172.             }
  173.            
  174.             return repl.Replacement(this);
  175.         }
  176.        
  177. /*
  178.         * Used by the replacement code
  179.         */       
  180.         internal virtual string GroupToStringImpl(int groupnum)
  181.         {
  182.             int c = _matchcount[groupnum];
  183.             if (c == 0)
  184.                 return String.Empty;
  185.            
  186.             int[] matches = _matches[groupnum];
  187.            
  188.             return _text.Substring(matches[(c - 1) * 2], matches[(c * 2) - 1]);
  189.         }
  190.        
  191. /*
  192.         * Used by the replacement code
  193.         */       
  194.         internal string LastGroupToStringImpl()
  195.         {
  196.             return GroupToStringImpl(_matchcount.Length - 1);
  197.         }
  198.        
  199.        
  200. /*
  201.         * Convert to a thread-safe object by precomputing cache contents
  202.         */       
  203.         /// <devdoc>
  204.         /// <para>
  205.         /// Returns a Match instance equivalent to the one supplied that is safe to share
  206.         /// between multiple threads.
  207.         /// </para>
  208.         /// </devdoc>
  209.         [HostProtection(Synchronization = true)]
  210.         public static Match Synchronized(Match inner)
  211.         {
  212.             if (inner == null)
  213.                 throw new ArgumentNullException("inner");
  214.            
  215.             int numgroups = inner._matchcount.Length;
  216.            
  217.             // Populate all groups by looking at each one
  218.             for (int i = 0; i < numgroups; i++) {
  219.                 Group group = inner.Groups[i];
  220.                
  221.                 // Depends on the fact that Group.Synchronized just
  222.                 // operates on and returns the same instance
  223.                 System.Text.RegularExpressions.Group.Synchronized(group);
  224.             }
  225.            
  226.             return inner;
  227.         }
  228.        
  229. /*
  230.         * Nonpublic builder: add a capture to the group specified by "cap"
  231.         */       
  232.         internal virtual void AddMatch(int cap, int start, int len)
  233.         {
  234.             int capcount;
  235.            
  236.             if (_matches[cap] == null)
  237.                 _matches[cap] = new int[2];
  238.            
  239.             capcount = _matchcount[cap];
  240.            
  241.             if (capcount * 2 + 2 > _matches[cap].Length) {
  242.                 int[] oldmatches = _matches[cap];
  243.                 int[] newmatches = new int[capcount * 8];
  244.                 for (int j = 0; j < capcount * 2; j++)
  245.                     newmatches[j] = oldmatches[j];
  246.                 _matches[cap] = newmatches;
  247.             }
  248.            
  249.             _matches[cap][capcount * 2] = start;
  250.             _matches[cap][capcount * 2 + 1] = len;
  251.             _matchcount[cap] = capcount + 1;
  252.         }
  253.        
  254. /*
  255.         * Nonpublic builder: Add a capture to balance the specified group.  This is used by the
  256.                               balanced match construct. (?<foo-foo2>...)
  257.           If there were no such thing as backtracking, this would be as simple as calling RemoveMatch(cap).
  258.           However, since we have backtracking, we need to keep track of everything.
  259.         */       
  260.         internal virtual void BalanceMatch(int cap)
  261.         {
  262.             int capcount;
  263.             int target;
  264.            
  265.             _balancing = true;
  266.            
  267.             // we'll look at the last capture first
  268.             capcount = _matchcount[cap];
  269.             target = capcount * 2 - 2;
  270.            
  271.             // first see if it is negative, and therefore is a reference to the next available
  272.             // capture group for balancing. If it is, we'll reset target to point to that capture.
  273.             if (_matches[cap][target] < 0)
  274.                 target = -3 - _matches[cap][target];
  275.            
  276.             // move back to the previous capture
  277.             target -= 2;
  278.            
  279.             // if the previous capture is a reference, just copy that reference to the end. Otherwise, point to it.
  280.             if (target >= 0 && _matches[cap][target] < 0)
  281.                 AddMatch(cap, _matches[cap][target], _matches[cap][target + 1]);
  282.             else
  283.                 /* == -3 - (target + 1) */                AddMatch(cap, -3 - target, -4 - target);
  284.            
  285.         }
  286.        
  287. /*
  288.         * Nonpublic builder: removes a group match by capnum
  289.         */       
  290.         internal virtual void RemoveMatch(int cap)
  291.         {
  292.             _matchcount[cap]--;
  293.         }
  294.        
  295. /*
  296.         * Nonpublic: tells if a group was matched by capnum
  297.         */       
  298.         internal virtual bool IsMatched(int cap)
  299.         {
  300.             return cap < _matchcount.Length && _matchcount[cap] > 0 && _matches[cap][_matchcount[cap] * 2 - 1] != (-3 + 1);
  301.         }
  302.        
  303. /*
  304.         * Nonpublic: returns the index of the last specified matched group by capnum
  305.         */       
  306.         internal virtual int MatchIndex(int cap)
  307.         {
  308.             int i = _matches[cap][_matchcount[cap] * 2 - 2];
  309.             if (i >= 0)
  310.                 return i;
  311.            
  312.             return _matches[cap][-3 - i];
  313.         }
  314.        
  315. /*
  316.         * Nonpublic: returns the length of the last specified matched group by capnum
  317.         */       
  318.         internal virtual int MatchLength(int cap)
  319.         {
  320.             int i = _matches[cap][_matchcount[cap] * 2 - 1];
  321.             if (i >= 0)
  322.                 return i;
  323.            
  324.             return _matches[cap][-3 - i];
  325.         }
  326.        
  327. /*
  328.         * Nonpublic: tidy the match so that it can be used as an immutable result
  329.         */       
  330.         internal virtual void Tidy(int textpos)
  331.         {
  332.             int[] interval;
  333.            
  334.             interval = _matches[0];
  335.             _index = interval[0];
  336.             _length = interval[1];
  337.             _textpos = textpos;
  338.             _capcount = _matchcount[0];
  339.            
  340.             if (_balancing) {
  341.                 // The idea here is that we want to compact all of our unbalanced captures. To do that we
  342.                 // use j basically as a count of how many unbalanced captures we have at any given time
  343.                 // (really j is an index, but j/2 is the count). First we skip past all of the real captures
  344.                 // until we find a balance captures. Then we check each subsequent entry. If it's a balance
  345.                 // capture (it's negative), we decrement j. If it's a real capture, we increment j and copy
  346.                 // it down to the last free position.
  347.                 for (int cap = 0; cap < _matchcount.Length; cap++) {
  348.                     int limit;
  349.                     int[] matcharray;
  350.                    
  351.                     limit = _matchcount[cap] * 2;
  352.                     matcharray = _matches[cap];
  353.                    
  354.                     int i = 0;
  355.                     int j;
  356.                    
  357.                     for (i = 0; i < limit; i++) {
  358.                         if (matcharray[i] < 0)
  359.                             break;
  360.                     }
  361.                    
  362.                     for (j = i; i < limit; i++) {
  363.                         if (matcharray[i] < 0) {
  364.                             // skip negative values
  365.                             j--;
  366.                         }
  367.                         else {
  368.                             // but if we find something positive (an actual capture), copy it back to the last
  369.                             // unbalanced position.
  370.                             if (i != j)
  371.                                 matcharray[j] = matcharray[i];
  372.                             j++;
  373.                         }
  374.                     }
  375.                    
  376.                     _matchcount[cap] = j / 2;
  377.                 }
  378.                
  379.                 _balancing = false;
  380.             }
  381.         }
  382.        
  383.         #if DBG
  384.         /// <internalonly/>
  385.         /// <devdoc>
  386.         /// </devdoc>
  387.         public bool Debug {
  388.             get {
  389.                 if (_regex == null)
  390.                     return false;
  391.                
  392.                 return _regex.Debug;
  393.             }
  394.         }
  395.        
  396.         /// <internalonly/>
  397.         /// <devdoc>
  398.         /// </devdoc>
  399.         internal virtual void Dump()
  400.         {
  401.             int i;
  402.             int j;
  403.            
  404.             for (i = 0; i < _matchcount.Length; i++) {
  405.                 System.Diagnostics.Debug.WriteLine("Capnum " + i.ToString(CultureInfo.InvariantCulture) + ":");
  406.                
  407.                 for (j = 0; j < _matchcount[i]; j++) {
  408.                     string text = "";
  409.                    
  410.                     if (_matches[i][j * 2] >= 0)
  411.                         text = _text.Substring(_matches[i][j * 2], _matches[i][j * 2 + 1]);
  412.                    
  413.                     System.Diagnostics.Debug.WriteLine(" (" + _matches[i][j * 2].ToString(CultureInfo.InvariantCulture) + "," + _matches[i][j * 2 + 1].ToString(CultureInfo.InvariantCulture) + ") " + text);
  414.                 }
  415.             }
  416.         }
  417.         #endif
  418.     }
  419.    
  420.    
  421. /*
  422.     * MatchSparse is for handling the case where slots are
  423.     * sparsely arranged (e.g., if somebody says use slot 100000)
  424.     */   
  425.     internal class MatchSparse : Match
  426.     {
  427.         // the lookup hashtable
  428.         internal new Hashtable _caps;
  429.        
  430. /*
  431.         * Nonpublic constructor
  432.         */       
  433.        
  434.         internal MatchSparse(Regex regex, Hashtable caps, int capcount, string text, int begpos, int len, int startpos) : base(regex, capcount, text, begpos, len, startpos)
  435.         {
  436.            
  437.             _caps = caps;
  438.         }
  439.        
  440.         public override GroupCollection Groups {
  441.             get {
  442.                 if (_groupcoll == null)
  443.                     _groupcoll = new GroupCollection(this, _caps);
  444.                
  445.                 return _groupcoll;
  446.             }
  447.         }
  448.        
  449.         #if DBG
  450.         internal override void Dump()
  451.         {
  452.             if (_caps != null) {
  453.                 IEnumerator e = _caps.Keys.GetEnumerator();
  454.                
  455.                 while (e.MoveNext()) {
  456.                     System.Diagnostics.Debug.WriteLine("Slot " + e.Current.ToString() + " -> " + _caps[e.Current].ToString());
  457.                 }
  458.             }
  459.            
  460.             base.Dump();
  461.         }
  462.         #endif
  463.        
  464.     }
  465.    
  466.    
  467. }

Developer Fusion