The Labs \ Source Viewer \ SSCLI \ System.IO.Compression \ FastEncoderWindow

  1. //------------------------------------------------------------------------------
  2. // <copyright file="FastEncoderWindow.cs" company="Microsoft">
  3. //
  4. // Copyright (c) 2006 Microsoft Corporation. All rights reserved.
  5. //
  6. // The use and distribution terms for this software are contained in the file
  7. // named license.txt, which can be found in the root of this distribution.
  8. // By using this software in any fashion, you are agreeing to be bound by the
  9. // terms of this license.
  10. //
  11. // You must not remove this notice, or any other, from this software.
  12. //
  13. // </copyright>
  14. //------------------------------------------------------------------------------
  15. namespace System.IO.Compression
  16. {
  17.     using System;
  18.     using System.Diagnostics;
  19.    
  20.     internal class FastEncoderWindow
  21.     {
  22.         private byte[] window;
  23.         // complete bytes window
  24.         private int bufPos;
  25.         // the start index of uncompressed bytes
  26.         private int bufEnd;
  27.         // the end index of uncompressed bytes
  28.         // Be very careful about increasing the window size; the code tables will have to
  29.         // be updated, since they assume that extra_distance_bits is never larger than a
  30.         // certain size.
  31.         const int FastEncoderHashShift = 4;
  32.         const int FastEncoderHashtableSize = 2048;
  33.         const int FastEncoderHashMask = FastEncoderHashtableSize - 1;
  34.         const int FastEncoderWindowSize = 8192;
  35.         const int FastEncoderWindowMask = FastEncoderWindowSize - 1;
  36.         const int FastEncoderMatch3DistThreshold = 16384;
  37.         internal const int MaxMatch = 258;
  38.         internal const int MinMatch = 3;
  39.        
  40.         // Following constants affect the search,
  41.         // they should be modifiable if we support different compression levels in future.
  42.         const int SearchDepth = 32;
  43.         const int GoodLength = 4;
  44.         const int NiceLength = 32;
  45.         const int LazyMatchThreshold = 6;
  46.        
  47.         // Hashtable structure
  48.         private ushort[] prev;
  49.         // next most recent occurance of chars with same hash value
  50.         private ushort[] lookup;
  51.         // hash table to find most recent occurance of chars with same hash value
  52.         public FastEncoderWindow()
  53.         {
  54.             window = new byte[2 * FastEncoderWindowSize + MaxMatch + 4];
  55.             prev = new ushort[FastEncoderWindowSize + MaxMatch];
  56.             lookup = new ushort[FastEncoderHashtableSize];
  57.             bufPos = FastEncoderWindowSize;
  58.             bufEnd = bufPos;
  59.         }
  60.        
  61.         public int BytesAvailable {
  62.             // uncompressed bytes
  63.             get {
  64.                 Debug.Assert(bufEnd - bufPos >= 0, "Ending pointer can't be in front of starting pointer!");
  65.                 return bufEnd - bufPos;
  66.             }
  67.         }
  68.        
  69.         public int FreeWindowSpace {
  70.             // Free space in the window
  71.             get { return 2 * FastEncoderWindowSize - bufEnd; }
  72.         }
  73.        
  74.         // copy bytes from input buffer into window
  75.         public void CopyBytes(byte[] inputBuffer, int startIndex, int count)
  76.         {
  77.             Array.Copy(inputBuffer, startIndex, window, bufEnd, count);
  78.             bufEnd += count;
  79.         }
  80.        
  81.         // slide the history window to the left by FastEncoderWindowSize bytes
  82.         public void MoveWindows()
  83.         {
  84.             int i;
  85.             Debug.Assert(bufPos == 2 * FastEncoderWindowSize, "only call this at the end of the window");
  86.            
  87.             // verify that the hash table is correct
  88.             VerifyHashes();
  89.             // Debug only code
  90.             Array.Copy(window, bufPos - FastEncoderWindowSize, window, 0, FastEncoderWindowSize);
  91.            
  92.             // move all the hash pointers back
  93.             for (i = 0; i < FastEncoderHashtableSize; i++) {
  94.                 int val = ((int)lookup[i]) - FastEncoderWindowSize;
  95.                
  96.                 if (val <= 0) {
  97.                     // too far away now? then set to zero
  98.                     lookup[i] = (ushort)0;
  99.                 }
  100.                 else {
  101.                     lookup[i] = (ushort)val;
  102.                 }
  103.             }
  104.            
  105.             // prev[]'s are absolute pointers, not relative pointers, so we have to move them back too
  106.             // making prev[]'s into relative pointers poses problems of its own
  107.             for (i = 0; i < FastEncoderWindowSize; i++) {
  108.                 long val = ((long)prev[i]) - FastEncoderWindowSize;
  109.                
  110.                 if (val <= 0) {
  111.                     prev[i] = (ushort)0;
  112.                 }
  113.                 else {
  114.                     prev[i] = (ushort)val;
  115.                 }
  116.             }
  117.            
  118.             #if DEBUG
  119.             // For debugging, wipe the window clean, so that if there is a bug in our hashing,
  120.             // the hash pointers will now point to locations which are not valid for the hash value
  121.             // (and will be caught by our ASSERTs).
  122.             Array.Clear(window, FastEncoderWindowSize, window.Length - FastEncoderWindowSize);
  123.             #endif
  124.            
  125.             VerifyHashes();
  126.             // debug: verify hash table is correct
  127.             bufPos = FastEncoderWindowSize;
  128.             bufEnd = bufPos;
  129.            
  130.         }
  131.        
  132.         private uint HashValue(uint hash, byte b)
  133.         {
  134.             return (hash << FastEncoderHashShift) ^ b;
  135.         }
  136.        
  137.         // insert string into hash table and return most recent location of same hash value
  138.         private uint InsertString(ref uint hash)
  139.         {
  140.             // Note we only use the lowest 11 bits of the hash vallue (hash table size is 11).
  141.             // This enables fast calculation of hash value for the input string.
  142.             // If we want to get the next hash code starting at next position,
  143.             // we can just increment bufPos and call this function.
  144.            
  145.             hash = HashValue(hash, window[bufPos + 2]);
  146.            
  147.             // Need to assert the hash value
  148.             uint search = lookup[hash & FastEncoderHashMask];
  149.             lookup[hash & FastEncoderHashMask] = (ushort)bufPos;
  150.             prev[bufPos & FastEncoderWindowMask] = (ushort)search;
  151.             return search;
  152.         }
  153.        
  154.         //
  155.         // insert strings into hashtable
  156.         // Arguments:
  157.         // hash : intial hash value
  158.         // matchLen : 1 + number of strings we need to insert.
  159.         //
  160.         private void InsertStrings(ref uint hash, int matchLen)
  161.         {
  162.             Debug.Assert(matchLen > 0, "Invalid match Len!");
  163.             if (bufEnd - bufPos <= matchLen) {
  164.                 bufPos += (matchLen - 1);
  165.             }
  166.             else {
  167.                 while (--matchLen > 0) {
  168.                     InsertString(ref hash);
  169.                     bufPos++;
  170.                 }
  171.             }
  172.         }
  173.        
  174.         //
  175.         // Find out what we should generate next. It can be a symbol, a distance/length pair
  176.         // or a symbol followed by distance/length pair
  177.         //
  178.         internal bool GetNextSymbolOrMatch(Match match)
  179.         {
  180.             Debug.Assert(bufPos >= FastEncoderWindowSize && bufPos < (2 * FastEncoderWindowSize), "Invalid Buffer Position!");
  181.            
  182.             // initialise the value of the hash, no problem if locations bufPos, bufPos+1
  183.             // are invalid (not enough data), since we will never insert using that hash value
  184.             uint hash = HashValue(0, window[bufPos]);
  185.             hash = HashValue(hash, window[bufPos + 1]);
  186.            
  187.             int matchLen;
  188.             int matchPos = 0;
  189.            
  190.             VerifyHashes();
  191.             // Debug only code
  192.             if (bufEnd - bufPos <= 3) {
  193.                 // The hash value becomes corrupt when we get within 3 characters of the end of the
  194.                 // input window, since the hash value is based on 3 characters. We just stop
  195.                 // inserting into the hash table at this point, and allow no matches.
  196.                 matchLen = 0;
  197.             }
  198.             else {
  199.                 // insert string into hash table and return most recent location of same hash value
  200.                 int search = (int)InsertString(ref hash);
  201.                
  202.                 // did we find a recent location of this hash value?
  203.                 if (search != 0) {
  204.                     // yes, now find a match at what we'll call position X
  205.                     matchLen = FindMatch(search, out matchPos, SearchDepth, NiceLength);
  206.                    
  207.                     // truncate match if we're too close to the end of the input window
  208.                     if (bufPos + matchLen > bufEnd)
  209.                         matchLen = bufEnd - bufPos;
  210.                 }
  211.                 else {
  212.                     // no most recent location found
  213.                     matchLen = 0;
  214.                 }
  215.             }
  216.            
  217.             if (matchLen < MinMatch) {
  218.                 // didn't find a match, so output unmatched char
  219.                 match.State = MatchState.HasSymbol;
  220.                 match.Symbol = window[bufPos];
  221.                 bufPos++;
  222.             }
  223.             else {
  224.                 // bufPos now points to X+1
  225.                 bufPos++;
  226.                
  227.                 // is this match so good (long) that we should take it automatically without
  228.                 // checking X+1 ?
  229.                 if (matchLen <= LazyMatchThreshold) {
  230.                     int nextMatchLen;
  231.                     int nextMatchPos = 0;
  232.                    
  233.                     // search at position X+1
  234.                     int search = (int)InsertString(ref hash);
  235.                    
  236.                     // no, so check for a better match at X+1
  237.                     if (search != 0) {
  238.                         nextMatchLen = FindMatch(search, out nextMatchPos, matchLen < GoodLength ? SearchDepth : (SearchDepth >> 2), NiceLength);
  239.                        
  240.                         // truncate match if we're too close to the end of the window
  241.                         // note: nextMatchLen could now be < MinMatch
  242.                         if (bufPos + nextMatchLen > bufEnd) {
  243.                             nextMatchLen = bufEnd - bufPos;
  244.                         }
  245.                     }
  246.                     else {
  247.                         nextMatchLen = 0;
  248.                     }
  249.                    
  250.                     // right now X and X+1 are both inserted into the search tree
  251.                     if (nextMatchLen > matchLen) {
  252.                         // since nextMatchLen > matchLen, it can't be < MinMatch here
  253.                        
  254.                         // match at X+1 is better, so output unmatched char at X
  255.                         match.State = MatchState.HasSymbolAndMatch;
  256.                         match.Symbol = window[bufPos - 1];
  257.                         match.Position = nextMatchPos;
  258.                         match.Length = nextMatchLen;
  259.                        
  260.                         // insert remainder of second match into search tree
  261.                         // example: (*=inserted already)
  262.                         //
  263.                         // X X+1 X+2 X+3 X+4
  264.                         // * *
  265.                         // nextmatchlen=3
  266.                         // bufPos
  267.                         //
  268.                         // If nextMatchLen == 3, we want to perform 2
  269.                         // insertions (at X+2 and X+3). However, first we must
  270.                         // inc bufPos.
  271.                         //
  272.                         bufPos++;
  273.                         // now points to X+2
  274.                         matchLen = nextMatchLen;
  275.                         InsertStrings(ref hash, matchLen);
  276.                     }
  277.                     else {
  278.                         // match at X is better, so take it
  279.                         match.State = MatchState.HasMatch;
  280.                         match.Position = matchPos;
  281.                         match.Length = matchLen;
  282.                        
  283.                         // Insert remainder of first match into search tree, minus the first
  284.                         // two locations, which were inserted by the FindMatch() calls.
  285.                         //
  286.                         // For example, if matchLen == 3, then we've inserted at X and X+1
  287.                         // already (and bufPos is now pointing at X+1), and now we need to insert
  288.                         // only at X+2.
  289.                         //
  290.                         matchLen--;
  291.                         bufPos++;
  292.                         // now bufPos points to X+2
  293.                         InsertStrings(ref hash, matchLen);
  294.                     }
  295.                 }
  296.                 else {
  297.                     // match_length >= good_match
  298.                     // in assertion: bufPos points to X+1, location X inserted already
  299.                     // first match is so good that we're not even going to check at X+1
  300.                     match.State = MatchState.HasMatch;
  301.                     match.Position = matchPos;
  302.                     match.Length = matchLen;
  303.                    
  304.                     // insert remainder of match at X into search tree
  305.                     InsertStrings(ref hash, matchLen);
  306.                 }
  307.             }
  308.            
  309.             if (bufPos == 2 * FastEncoderWindowSize) {
  310.                 MoveWindows();
  311.             }
  312.             return true;
  313.         }
  314.        
  315.         //
  316.         // Find a match starting at specified position and return length of match
  317.         // Arguments:
  318.         // search : where to start searching
  319.         // matchPos : return match position here
  320.         // searchDepth : # links to traverse
  321.         // NiceLength : stop immediately if we find a match >= NiceLength
  322.         //
  323.         int FindMatch(int search, out int matchPos, int searchDepth, int niceLength)
  324.         {
  325.             Debug.Assert(bufPos >= 0 && bufPos < 2 * FastEncoderWindowSize, "Invalid Buffer position!");
  326.             Debug.Assert(search < bufPos, "Invalid starting search point!");
  327.             Debug.Assert(RecalculateHash((int)search) == RecalculateHash(bufPos));
  328.            
  329.             int bestMatch = 0;
  330.             // best match length found so far
  331.             int bestMatchPos = 0;
  332.             // absolute match position of best match found
  333.             // the earliest we can look
  334.             int earliest = bufPos - FastEncoderWindowSize;
  335.             Debug.Assert(earliest >= 0, "bufPos is less than FastEncoderWindowSize!");
  336.            
  337.             byte wantChar = window[bufPos];
  338.             while (search > earliest) {
  339.                 // make sure all our hash links are valid
  340.                 Debug.Assert(RecalculateHash((int)search) == RecalculateHash(bufPos), "Corrupted hash link!");
  341.                
  342.                 // Start by checking the character that would allow us to increase the match
  343.                 // length by one. This improves performance quite a bit.
  344.                 if (window[search + bestMatch] == wantChar) {
  345.                     int j;
  346.                    
  347.                     // Now make sure that all the other characters are correct
  348.                     for (j = 0; j < MaxMatch; j++) {
  349.                         if (window[bufPos + j] != window[search + j])
  350.                             break;
  351.                     }
  352.                    
  353.                     if (j > bestMatch) {
  354.                         bestMatch = j;
  355.                         bestMatchPos = search;
  356.                         // absolute position
  357.                         if (j > NiceLength)
  358.                             break;
  359.                         wantChar = window[bufPos + j];
  360.                     }
  361.                 }
  362.                
  363.                 if (--searchDepth == 0) {
  364.                     break;
  365.                 }
  366.                
  367.                 Debug.Assert(prev[search & FastEncoderWindowMask] < search, "we should always go backwards!");
  368.                
  369.                 search = prev[search & FastEncoderWindowMask];
  370.             }
  371.            
  372.             // doesn't necessarily mean we found a match; bestMatch could be > 0 and < MinMatch
  373.             matchPos = bufPos - bestMatchPos - 1;
  374.             // convert absolute to relative position
  375.             // don't allow match length 3's which are too far away to be worthwhile
  376.             if (bestMatch == 3 && matchPos >= FastEncoderMatch3DistThreshold) {
  377.                 return 0;
  378.             }
  379.            
  380.             Debug.Assert(bestMatch < MinMatch || matchPos < FastEncoderWindowSize, "Only find match inside FastEncoderWindowSize");
  381.             return bestMatch;
  382.         }
  383.        
  384.        
  385.         [Conditional("DEBUG")]
  386.         void VerifyHashes()
  387.         {
  388.             for (int i = 0; i < FastEncoderHashtableSize; i++) {
  389.                 ushort where = lookup[i];
  390.                 ushort nextWhere;
  391.                
  392.                 while (where != 0 && bufPos - where < FastEncoderWindowSize) {
  393.                     Debug.Assert(RecalculateHash(where) == i, "Incorrect Hashcode!");
  394.                     nextWhere = prev[where & FastEncoderWindowMask];
  395.                     if (bufPos - nextWhere >= FastEncoderWindowSize) {
  396.                         break;
  397.                     }
  398.                    
  399.                     Debug.Assert(nextWhere < where, "pointer is messed up!");
  400.                     where = nextWhere;
  401.                 }
  402.             }
  403.         }
  404.        
  405.         // can't use conditional attribute here.
  406.         uint RecalculateHash(int position)
  407.         {
  408.             return (uint)(((window[position] << (2 * FastEncoderHashShift)) ^ (window[position + 1] << FastEncoderHashShift) ^ (window[position + 2])) & FastEncoderHashMask);
  409.         }
  410.     }
  411. }

Developer Fusion