UltraScan III
us_gzip.cpp
Go to the documentation of this file.
1 /* This is a highly customized version of GNU Gzip. It has
2  * been converted to Qt and C++.
3  * Assumptions include that only a regular file is passed.
4  * Compression is always -9.
5  *
6  * Since this file is derived from a GPLed application, this file is
7  * also licensed under the GPL.
8  *
9  * Bruce Dubbs
10  * Univerity of Texas Health Science Center
11 */
12 
13 #include <QFileInfo>
14 #include <QDataStream>
15 #include <QDateTime>
16 
17 #include <stdlib.h>
18 #include <stdio.h>
19 #include <time.h>
20 #include <fcntl.h>
21 
22 #ifdef Q_WS_X11
23 #include <unistd.h>
24 #endif
25 
26 #ifdef WIN32
27 # include <io.h>
28 # include <sys/utime.h>
29 # include <sys/stat.h>
30 # include <qdatetime.h>
31 # define utime _utime
32 # define open _open
33 # define read _read
34 # define write _write
35 # define close _close
36 # define fstat _fstat
37 # define stat _stat
38 # define utimbuf _utimbuf
39 #else
40 # include <utime.h>
41 # include <sys/stat.h> // For OSX
42 # define O_BINARY 0
43 #endif
44 
45 #include <iostream>
46 using namespace std;
47 
48 #include "us_gzip.h"
49 
50 #ifdef WIN32
51 # define ssize_t long
52 # define bzero(p, size) (void)memset((p), 0, (size))
53 #endif
54 
55 namespace gzip_data
56 {
57 /* ========================================================================
58  * Table of CRC-32's of all single-byte values (made by makecrc.c) */
60  0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
61  0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
62  0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
63  0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
64  0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
65  0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
66  0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
67  0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
68  0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
69  0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
70  0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
71  0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
72  0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
73  0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
74  0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
75  0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
76  0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
77  0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
78  0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
79  0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
80  0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
81  0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
82  0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
83  0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
84  0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
85  0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
86  0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
87  0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
88  0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
89  0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
90  0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
91  0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
92  0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
93  0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
94  0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
95  0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
96  0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
97  0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
98  0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
99  0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
100  0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
101  0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
102  0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
103  0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
104  0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
105  0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
106  0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
107  0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
108  0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
109  0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
110  0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
111  0x2d02ef8dL
112  };
113 
114  /* Tables for deflate from PKZIP's appnote.txt. */
115  unsigned border[] = /* Order of the bit length code lengths */
116  {
117  16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
118  };
119 
120  ush cplens[] = /* Copy lengths for literal codes 257..285 */
121  {
122  3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
123  35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0
124  };
125  /* note: see note #13 above about the 258 in this list. */
126 
127  ush cplext[]= /* Extra bits for literal codes 257..285 */
128  {
129  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
130  3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 99, 99 /* 99==invalid */
131  };
132 
133  ush cpdist[] = /* Copy offsets for distance codes 0..29 */
134  {
135  1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
136  257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
137  8193, 12289, 16385, 24577
138  };
139 
140  ush cpdext[] = /* Extra bits for distance codes */
141  {
142  0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
143  7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
144  12, 12, 13, 13
145  };
146 
148  {
149  0x0000,
150  0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff,
151  0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff
152  };
153 
155  {
156  /* good lazy nice chain */
157  /* 0 */ {0, 0, 0, 0}, /* store only */
158  /* 1 */ {4, 4, 8, 4}, /* maximum speed, no lazy matches */
159  /* 2 */ {4, 5, 16, 8},
160  /* 3 */ {4, 6, 32, 32},
161  /* 4 */ {4, 4, 16, 16}, /* lazy matches */
162  /* 5 */ {8, 16, 32, 32},
163  /* 6 */ {8, 16, 128, 128},
164  /* 7 */ {8, 32, 128, 256},
165  /* 8 */ {32, 128, 258, 1024},
166  /* 9 */ {32, 258, 258, 4096} /* maximum compression */
167  };
168 
169  int extra_lbits[] = /* extra bits for each length code */
170  {
171  0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0
172  };
173 
174  int extra_dbits[] = /* extra bits for each distance code */
175  {
176  0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13
177  };
178 
179  int extra_blbits[] = /* extra bits for each bit length code */
180  {
181  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7
182  };
183 
184  unsigned char bl_order[] =
185  {
186  16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15
187  };
188  /* The lengths of the bit length codes are sent in order of decreasing
189  * probability, to avoid transmitting the lengths for unused bit length codes.
190  */
191 
192 }
193 
194 using namespace gzip_data;
195 
197 {
198  static_dtree[ 0 ].Len = 0;
199 }
200 
201 int US_Gzip::gzip( const QString& filename )
202 {
203  bytes_out = 0;
204  return treat_file( filename, FALSE );
205 }
206 
207 int US_Gzip::gunzip( const QString& filename )
208 {
209  bytes_out = 0;
210  return treat_file( filename, TRUE );
211 }
212 
213 int US_Gzip::treat_file( const QString& iname, bool decompress )
214 {
215  //unsigned int crc;
216  time_t filetime;
217  QFileInfo filename( iname );
218 
219  // Check if the input file is present and valid
220  if ( ! filename.exists() ) return GZIP_NOEXIST;
221  if ( ! filename.isFile() ) return GZIP_NOTFILE;
222  if ( filename.isSymLink() ) return GZIP_NOTFILE;
223  if ( ! filename.isReadable() ) return GZIP_NOREAD;
224 
225  // Get the times and other data
226  QDateTime lastRead = filename.lastRead();
227  QDateTime lastMod = filename.lastModified();
228 
229  filetime = lastMod.toTime_t();
230 
231  ifd = open( iname.toAscii().constData(), O_RDONLY | O_BINARY );
232  if ( ifd < 0 ) return GZIP_READERROR;
233 
234  // Generate output file name.
235  QString oname = make_ofname( iname, decompress );
236  if ( oname == "" ) return GZIP_NOTGZ;
237 
238  // Open the input file and determine decompression method.
239 
240  if ( decompress )
241  {
242  ssize_t count;
243 
244  // Two byte signature
245  char signature[2];
246  count = read( ifd, signature, 2 );
247 
248  if ( count != 2 )
249  {
250  close( ifd );
251  return GZIP_READERROR;
252  }
253 
254  if ( signature[0] != (char) 0x1f ||
255  signature[1] != (char) 0x8b ) return GZIP_NOTGZ;
256 
257  // One byte method. Only 0x08, deflate, is supported
258  char method;
259  count = read( ifd, &method, 1 );
260 
261  if ( count != 1 )
262  {
263  close( ifd );
264  return GZIP_READERROR;
265  }
266 
267  // One byte flags. 00111111. Only bit 3 ( file name present ) is supported
268  // Bit 0 is ignored. Bits 1,2,4,5 (multi-part, extra field, comment,
269  // encrypyion) are flagged as unsupported.
270 
271  char flags;
272  count = read( ifd, &flags, 1 );
273 
274  if ( count != 1 )
275  {
276  close( ifd );
277  return GZIP_READERROR;
278  }
279 
280  if ( flags & 0x36 ) return GZIP_OPTIONNOTSUPPORTED;
281 
282  // Four bytes. File modification time in Unix format.
283  count = read( ifd, (char*) &filetime, 4 );
284 
285  if ( count != 4 )
286  {
287  close( ifd );
288  return GZIP_READERROR;
289  }
290 
291  char timestring[40];
292 #ifdef WIN32
293  //ctime_s( timestring, sizeof( timestring ), &filetime );
294  int stringCount = sprintf( timestring, "%s", ctime( &filetime ) );
295 #else
296  ctime_r( &filetime, timestring );
297 #endif
298 
299  timestring[24] = 0;
300 
301  // One byte. Extra flags (depend on compression method).
302  // FAST = 4 ( -1 compression ) or SLOW = 2 ( -9 compression )
303 
304  char extra;
305  // QString extraString;
306 
307  count = read( ifd, &extra, 1 );
308  if ( count != 1 )
309  {
310  close( ifd );
311  return GZIP_READERROR;
312  }
313 
314  // One byte OS. 0x03 = UNIX, 0x0b = WIN32
315  // This parameter can be ignored.
316 
317  char OS;
318  count = read( ifd, &OS, 1 );
319 
320  if ( count != 1 )
321  {
322  close( ifd );
323  return GZIP_READERROR;
324  }
325 
326  // Variable bytes Optional original file name, zero terminated.
327 
328  QString embedded_name( "" );
329  char c;
330 
331  if ( flags & 0x08 ) // Filename present
332  {
333  while ( TRUE )
334  {
335  count = read( ifd, &c, 1 );
336 
337  if ( count != 1 )
338  {
339  close( ifd );
340  return GZIP_READERROR;
341  }
342 
343  if ( c == 0 ) break;
344  embedded_name.append( c );
345  }
346 
347  oname = embedded_name;
348  }
349 
350  // Open the output file but check that it doesn't exist first
351  QFileInfo output_file( oname );
352 
353  if ( output_file.exists() ) return GZIP_OUTFILEEXISTS;
354 
355  ofd = open( oname.toAscii().constData(),
356  O_CREAT | O_WRONLY | O_BINARY , 0664 );
357  outcnt = 0;
358 
359  updcrc( NULL, 0 ); /* initialize crc */
360 
361  // Expand compressd data
362  try
363  {
364  inflate();
365  }
366  catch ( int inflate_error )
367  {
368  close( ifd );
369  close( ofd );
370  unlink( oname.toAscii().constData() );
371  return inflate_error;
372  }
373 
374 #define wp outcnt
375 #define GETBYTE() (inptr < insize ? inbuf[inptr++] : (wp = w, fill_inbuf(0)))
376  unsigned w = 0; // Dummy to make define above work
377 
378  // CRC
379  unsigned char buf[ 4 ];
380 
381  for ( int i = 0; i < 4; i++ ) buf[ i ] = GETBYTE();
382 
383  crc = buf[ 3 ] << 24 | buf[ 2 ] << 16 | buf[ 1 ] << 8 | buf[ 0 ];
384 
385  /* Validate decompression */
386  if ( crc != updcrc(outbuf, 0) )
387  {
388  close( ifd );
389  close( ofd );
390  unlink( oname.toAscii().constData() );
391  return GZIP_CRCERROR;
392  }
393 
394  // uncompressed input size modulo 2^32
395 
396  for ( int i = 0; i < 4; i++ ) buf[ i ] = GETBYTE();
397 
398  ulg size = buf[ 3 ] << 24 | buf[ 2 ] << 16 | buf[ 1 ] << 8 | buf[ 0 ];
399 
400  if ( size != (unsigned int) bytes_out )
401  {
402  close( ifd );
403  close( ofd );
404  unlink( oname.toAscii().constData() );
405  return GZIP_LENGTHERROR;
406  }
407  }
409  else // compress the input file
410  {
411  // Check output file -- exists? writable?
412  QFileInfo filename( oname );
414 
415  ofd = open( oname.toAscii().constData(),
416  O_CREAT | O_WRONLY | O_BINARY, 0664 );
417  if ( ofd < 0 ) return GZIP_WRITEERROR;
418 
419  outcnt = 0;
420  bytes_in = 0;
421 
422 #define flush_output(w) (wp=(w),flush_window())
423 
424 #define put_byte(c) \
425  { \
426  outbuf[ outcnt++ ] = (uch)(c); \
427  if ( outcnt == OUTBUFSIZ ) flush_outbuf();\
428  }
429 
430 /* Output a 16 bit value, lsb first */
431 #define put_short(w) \
432  { \
433  if ( outcnt < OUTBUFSIZ-2 ) \
434  { \
435  outbuf[ outcnt++ ] = (uch) ((w) & 0xff); \
436  outbuf[ outcnt++ ] = (uch) ((ush)(w) >> 8); \
437  } \
438  else \
439  { \
440  put_byte( (uch) ( (w) & 0xff) ); \
441  put_byte( (uch) ( (ush) (w) >> 8 ) ); \
442  } \
443  }
444 
445 /* Output a 32 bit value to the bit stream, lsb first */
446 #define put_long(n) \
447  { \
448  put_short( (n) & 0xffff ); \
449  put_short( ( (ulg) (n) ) >> 16) ; \
450  }
451 
452 #define GZIP_MAGIC "\037\213" /* Magic header for gzip files, 1F 8B */
453 #define DEFLATED 8
454 #define ORIG_NAME 0x08 /* bit 3 set: original file name present */
455 
456  try
457  {
458  /* Write the header to the gzip file. See algorithm.doc for the format */
459  put_byte( GZIP_MAGIC[0] ); /* magic header */
460  put_byte( GZIP_MAGIC[1] );
461  put_byte( DEFLATED ); /* compression method */
462 
463  uch flags = ORIG_NAME; /* general purpose bit flags */
464  put_byte( flags ); /* general flags */
465 
466  /* original time stamp (modification time) */
467  time_t time_stamp = lastMod.toTime_t();
468  put_long( (ulg) time_stamp == ( time_stamp & 0xffffffff ) ?
469  (ulg) time_stamp : (ulg) 0);
470 
471 #define SLOW 2
472  uch deflate_flags = SLOW; // Compression level 9
473  /* Write deflated file to zip file */
474  put_byte( (uch) deflate_flags ); /* extra flags */
475 
476 #ifdef WIN32
477 # define OS_CODE 0x0b
478 #else
479 # define OS_CODE 0x03
480 #endif
481 
482  put_byte( OS_CODE ); /* OS identifier */
483 
484  if ( strlen( iname.toAscii().constData() ) > 255 )
485  return GZIP_FILENAMEERROR;
486 
487  char f[256];
488  strcpy( f, iname.toAscii().constData() );
489  char* p = base_name( f ); /* Don't save the directory part. */
490  do { put_byte( *p ); } while ( *p++ );
491 
492  crc = updcrc( 0, 0 );
493 
494  bi_init();
495  ct_init();
496  lm_init();
497  deflate();
498 
499  /* Write the crc and uncompressed size */
500  put_long( crc );
501  put_long( (ulg) bytes_in );
502  }
503  catch ( int error )
504  {
505  close( ifd );
506  close( ofd );
507  return error;
508  }
509 
510  flush_outbuf();
511  }
512 
513  // Get input file data
514  struct stat ifstat;
515  fstat( ifd, &ifstat );
516 
517  // Close files
518  close( ifd );
519  close( ofd );
520 
521  // Set the permissions
522  chmod( oname.toAscii().constData(), ifstat.st_mode & 07777 );
523 
524 #ifndef WIN32
525  // Change the ownership (may fail if not root)
526  chown( oname.toAscii().constData(), ifstat.st_uid, ifstat.st_gid );
527 #endif
528 
529  // Reset oname metadata to ifile metadata
530 
531  struct utimbuf timep;
532 
533  timep.actime = ifstat.st_atime;
534  timep.modtime = filetime;
535  utime( oname.toAscii().constData(), &timep );
536 
537  // Now delete the input file
538  unlink( iname.toAscii().constData() );
539 
540  return GZIP_OK;
541 }
542 
543 /* ========================================================================
544  * Generate ofname given filename.
545 */
546 
547 QString US_Gzip::make_ofname( const QString& filename, bool decompress )
548 {
549  QString ofile = filename;
550 
551  if ( decompress )
552  { // Decompress: strip ".gz" or change ".svgz" to ".svg"
553  if ( filename.endsWith( ".gz" ) )
554  return ofile.section( ".", 0, 2 );
555  else if ( filename.endsWith( ".svgz" ) )
556  return ofile.section( ".", 0, 2 ) + ".svg";
557  else
558  return QString( "" );
559  }
560 
561  // Compress
562  if ( filename.endsWith( ".svg" ) )
563  return ofile.section( ".", 0, -2 ) + ".svgz";
564  else
565  return ofile + ".gz";
566 }
567 
569 /* Inflate deflated data
570 
571  Copyright (C) 1997, 1998, 1999, 2002 Free Software Foundation, Inc.
572  Updated for c++ and qt by Bruce Dubbs - 2008
573 */
574 
575 /* Not copyrighted 1992 by Mark Adler
576  version c10p1, 10 January 1993 */
577 
578 /* You can do whatever you like with this source file, though I would
579  prefer that if you modify it and redistribute it that you include
580  comments to that effect with your name and the date. Thank you.
581  [The history has been moved to the file ChangeLog.]
582  */
583 
584 /*
585  Inflate deflated (PKZIP's method 8 compressed) data. The compression
586  method searches for as much of the current string of bytes (up to a
587  length of 258) in the previous 32K bytes. If it doesn't find any
588  matches (of at least length 3), it codes the next byte. Otherwise, it
589  codes the length of the matched string and its distance backwards from
590  the current position. There is a single Huffman code that codes both
591  single bytes (called "literals") and match lengths. A second Huffman
592  code codes the distance information, which follows a length code. Each
593  length or distance code actually represents a base value and a number
594  of "extra" (sometimes zero) bits to get to add to the base value. At
595  the end of each deflated block is a special end-of-block (EOB) literal/
596  length code. The decoding process is basically: get a literal/length
597  code; if EOB then done; if a literal, emit the decoded byte; if a
598  length then get the distance and emit the referred-to bytes from the
599  sliding window of previously emitted data.
600 
601  There are (currently) three kinds of inflate blocks: stored, fixed, and
602  dynamic. The compressor deals with some chunk of data at a time, and
603  decides which method to use on a chunk-by-chunk basis. A chunk might
604  typically be 32K or 64K. If the chunk is uncompressible, then the
605  "stored" method is used. In this case, the bytes are simply stored as
606  is, eight bits per byte, with none of the above coding. The bytes are
607  preceded by a count, since there is no longer an EOB code.
608 
609  If the data is compressible, then either the fixed or dynamic methods
610  are used. In the dynamic method, the compressed data is preceded by
611  an encoding of the literal/length and distance Huffman codes that are
612  to be used to decode this block. The representation is itself Huffman
613  coded, and so is preceded by a description of that code. These code
614  descriptions take up a little space, and so for small blocks, there is
615  a predefined set of codes, called the fixed codes. The fixed method is
616  used if the block codes up smaller that way (usually for quite small
617  chunks), otherwise the dynamic method is used. In the latter case, the
618  codes are customized to the probabilities in the current block, and so
619  can code it much better than the pre-determined fixed codes.
620 
621  The Huffman codes themselves are decoded using a mutli-level table
622  lookup, in order to maximize the speed of decoding plus the speed of
623  building the decoding tables. See the comments below that precede the
624  lbits and dbits tuning parameters.
625  */
626 
627 
628 /*
629  Notes beyond the 1.93a appnote.txt:
630 
631  1. Distance pointers never point before the beginning of the output
632  stream.
633  2. Distance pointers can point back across blocks, up to 32k away.
634  3. There is an implied maximum of 7 bits for the bit length table and
635  15 bits for the actual data.
636  4. If only one code exists, then it is encoded using one bit. (Zero
637  would be more efficient, but perhaps a little confusing.) If two
638  codes exist, they are coded using one bit each (0 and 1).
639  5. There is no way of sending zero distance codes--a dummy must be
640  sent if there are none. (History: a pre 2.0 version of PKZIP would
641  store blocks with no distance codes, but this was discovered to be
642  too harsh a criterion.) Valid only for 1.93a. 2.04c does allow
643  zero distance codes, which is sent as one code of zero bits in
644  length.
645  6. There are up to 286 literal/length codes. Code 256 represents the
646  end-of-block. Note however that the static length tree defines
647  288 codes just to fill out the Huffman codes. Codes 286 and 287
648  cannot be used though, since there is no length base or extra bits
649  defined for them. Similarly, there are up to 30 distance codes.
650  However, static trees define 32 codes (all 5 bits) to fill out the
651  Huffman codes, but the last two had better not show up in the data.
652  7. Unzip can check dynamic Huffman blocks for complete code sets.
653  The exception is that a single code would not be complete (see #4).
654  8. The five bits following the block type is really the number of
655  literal codes sent minus 257.
656  9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits
657  (1+6+6). Therefore, to output three times the length, you output
658  three codes (1+1+1), whereas to output four times the same length,
659  you only need two codes (1+3). Hmm.
660  10. In the tree reconstruction algorithm, Code = Code + Increment
661  only if BitLength(i) is not zero. (Pretty obvious.)
662  11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19)
663  12. Note: length code 284 can represent 227-258, but length code 285
664  really is 258. The last length deserves its own, short code
665  since it gets used a lot in very redundant files. The length
666  258 is special since 258 - 3 (the min match length) is 255.
667  13. The literal/length and distance code bit lengths are read as a
668  single stream of lengths. It is possible (and advantageous) for
669  a repeat code (16, 17, or 18) to go across the boundary between
670  the two sets of lengths.
671  */
672 
673 #define slide window
674 
675 /* The inflate algorithm uses a sliding 32K byte window on the uncompressed
676  stream to find repeated byte strings. This is implemented here as a
677  circular buffer. The index is updated simply by incrementing and then
678  and'ing with 0x7fff (32K-1). */
679 /* It is left to other modules to supply the 32K area. It is assumed
680  to be usable as if it were declared "uch slide[32768];" or as just
681  "uch *slide;" and then malloc'ed in the latter case. The definition
682  must be in unzip.h, included above. */
683 /* unsigned wp; current position in slide */
684 
685 
686 /* Macros for inflate() bit peeking and grabbing.
687  The usage is:
688 
689  NEEDBITS(j)
690  x = b & mask_bits[j];
691  DUMPBITS(j)
692 
693  where NEEDBITS makes sure that b has at least j bits in it, and
694  DUMPBITS removes the bits from b. The macros use the variable k
695  for the number of bits in b. Normally, b and k are register
696  variables for speed, and are initialized at the beginning of a
697  routine that uses these macros from a global bit buffer and count.
698  The macros also use the variable w, which is a cached copy of wp.
699 
700  If we assume that EOB will be the longest code, then we will never
701  ask for bits with NEEDBITS that are beyond the end of the stream.
702  So, NEEDBITS should not read any more bytes than are needed to
703  meet the request. Then no bytes need to be "returned" to the buffer
704  at the end of the last block.
705 
706  However, this assumption is not true for fixed blocks--the EOB code
707  is 7 bits, but the other literal/length codes can be 8 or 9 bits.
708  (The EOB code is shorter than other codes because fixed blocks are
709  generally short. So, while a block always has an EOB, many other
710  literal/length codes have a significantly lower probability of
711  showing up at all.) However, by making the first table have a
712  lookup of seven bits, the EOB code will be found in that first
713  lookup, and so will not require that too many bits be pulled from
714  the stream.
715  */
716 
717 
718 //#ifdef CRYPT
719 // uch cc;
720 /*# define NEXTBYTE() \
721  (decrypt ? (cc = GETBYTE(), zdecode(cc), cc) : GETBYTE())
722 */
723 //#else
724 # define NEXTBYTE() (uch)GETBYTE()
725 //#endif
726 
727 #define NEEDBITS(n) {while(k<(n)){b|=((ulg)NEXTBYTE())<<k;k+=8;}}
728 
729 /*
730 #define NEEDBITS(n)
731 {
732  while ( k < (n) )
733  {
734  b |= ( (ulg)NEXTBYTE()) << k;
735  k += 8;
736  }
737 }
738 
739 */
740 
741 /*
742 #define NEEDBITS(n)
743 {
744  while ( k < (n) )
745  {
746  b |= ( (ulg) (uch)( inptr < insize ?
747  inbuf[ inptr++ ] :
748  ( wp = w, fill_inbuf( 0 ) ) ) ) << k;
749  k += 8;
750  }
751 }
752 
753 */
754 
755 #define DUMPBITS(n) {b>>=(n);k-=(n);}
756 
757 /*
758 
759 #define DUMPBITS(n)
760 {
761  b >>= (n);
762  k -= (n);
763 }
764 
765 */
766 
767 
768 /* Huffman code decoding is performed using a multi-level table lookup.
769  The fastest way to decode is to simply build a lookup table whose
770  size is determined by the longest code. However, the time it takes
771  to build this table can also be a factor if the data being decoded
772  is not very long. The most common codes are necessarily the
773  shortest codes, so those codes dominate the decoding time, and hence
774  the speed. The idea is you can have a shorter table that decodes the
775  shorter, more probable codes, and then point to subsidiary tables for
776  the longer codes. The time it costs to decode the longer codes is
777  then traded against the time it takes to make longer tables.
778 
779  This results of this trade are in the variables lbits and dbits
780  below. lbits is the number of bits the first level table for literal/
781  length codes can decode in one step, and dbits is the same thing for
782  the distance codes. Subsequent tables are also less than or equal to
783  those sizes. These values may be adjusted either when all of the
784  codes are shorter than that, in which case the longest code length in
785  bits is used, or when the shortest code is *longer* than the requested
786  table size, in which case the length of the shortest code in bits is
787  used.
788 
789  There are two different values for the two tables, since they code a
790  different number of possibilities each. The literal/length table
791  codes 286 possible values, or in a flat code, a little over eight
792  bits. The distance table codes 30 possible values, or a little less
793  than five bits, flat. The optimum values for speed end up being
794  about one bit more than those, so lbits is 8+1 and dbits is 5+1.
795  The optimum values may differ though from machine to machine, and
796  possibly even between compilers. Your mileage may vary.
797  */
798 
799 
800 //int lbits = 9; /* bits in base literal/length lookup table */
801 #define lbits 9
802 //int dbits = 6; /* bits in base distance lookup table */
803 #define dbits 6
804 
805 /* If BMAX needs to be larger than 16, then h and x[] should be ulg. */
806 #define BMAX 16 /* maximum bit length of any code (16 for explode) */
807 #define N_MAX 288 /* maximum number of codes in any set */
808 
809 
810 //unsigned hufts; /* track memory usage */
811 
812 #define memzero(s, n) bzero((s), (n))
813 
815  unsigned* b, /* code lengths in bits (all assumed <= BMAX) */
816  unsigned n, /* number of codes (assumed <= N_MAX) */
817  unsigned s, /* number of simple-valued codes (0..s-1) */
818  ush* d, /* list of base values for non-simple codes */
819  ush* e, /* list of extra bits for non-simple codes */
820  struct huft** t, /* result: starting table */
821  int* m ) /* maximum lookup bits, returns actual */
822 
823 /* Given a list of code lengths and a maximum table size, make a set of
824  tables to decode that set of codes. Return zero on success, one if
825  the given code set is incomplete (the tables are still built in this
826  case), two if the input is invalid (all zero length codes or an
827  oversubscribed set of lengths), and three if not enough memory. */
828 {
829  unsigned a; /* counter for codes of length k */
830  unsigned c[BMAX+1]; /* bit length count table */
831  unsigned f; /* i repeats in table every f entries */
832  int g; /* maximum code length */
833  int h; /* table level */
834  register unsigned i; /* counter, current code */
835  register unsigned j; /* counter */
836  register int k; /* number of bits in current code */
837  int l; /* bits per table (returned in m) */
838  register unsigned* p; /* pointer into c[], b[], or v[] */
839  register struct huft* q; /* points to current table */
840  struct huft r; /* table entry for structure assignment */
841  struct huft* u[BMAX]; /* table stack */
842  unsigned v[N_MAX]; /* values in order of bit length */
843  register int w; /* bits before this table == (l * h) */
844  unsigned x[BMAX+1]; /* bit offsets, then code stack */
845  unsigned* xp; /* pointer into x */
846  int y; /* number of dummy codes added */
847  unsigned z; /* number of entries in current table */
848 
849 
850  /* Generate counts for each bit length */
851  memzero( c, sizeof(c) );
852  p = b;
853  i = n;
854 
855  do
856  {
857  //Tracecv(*p, (stderr, (n-i >= ' ' && n-i <= '~' ? "%c %d\n" :"0x%x %d\n"),
858  // n-i, *p));
859  c[*p]++; /* assume all entries <= BMAX */
860  p++; /* Can't combine with above line (Solaris bug) */
861  } while ( --i );
862 
863  if ( c[0] == n ) /* null input--all zero length codes */
864  {
865  *t = (struct huft*) NULL;
866  *m = 0;
867  return 0;
868  }
869 
870 
871  /* Find minimum and maximum length, bound *m by those */
872  l = *m;
873 
874  for ( j = 1; j <= BMAX; j++ )
875  {
876  if ( c[j] ) break;
877  }
878 
879  k = j; /* minimum code length */
880 
881  if ( (unsigned)l < j ) l = j;
882 
883  for ( i = BMAX; i; i-- )
884  {
885  if ( c[i] ) break;
886  }
887 
888  g = i; /* maximum code length */
889 
890  if ((unsigned)l > i) l = i;
891 
892  *m = l;
893 
894 
895  /* Adjust last length count to fill out codes, if needed */
896  for (y = 1 << j; j < i; j++, y <<= 1)
897  {
898  if ( (y -= c[j]) < 0 ) return 2; /* bad input: more codes than bits */
899  }
900 
901  if ( ( y -= c[i] ) < 0 ) return 2;
902 
903  c[i] += y;
904 
905 
906  /* Generate starting offsets into the value table for each length */
907  x[1] = 0;
908  j = 0;
909  p = c + 1;
910  xp = x + 2;
911 
912  while ( --i )
913  { /* note that i == g from above */
914  *xp++ = (j += *p++);
915  }
916 
917  /* Make a table of values in order of bit lengths */
918  p = b;
919  i = 0;
920 
921  do
922  {
923  if ( (j = *p++) != 0 ) v[x[j]++] = i;
924  } while ( ++i < n );
925 
926  n = x[g]; /* set n to length of v */
927 
928 
929  /* Generate the Huffman codes and for each, make the table entries */
930 
931  x[0] = 0;
932  i = 0; /* first Huffman code is zero */
933  p = v; /* grab values in bit order */
934  h = -1; /* no tables yet--level -1 */
935  w = -l; /* bits decoded == (l * h) */
936  u[0] = (struct huft*) NULL; /* just to keep compilers happy */
937  q = (struct huft*) NULL; /* ditto */
938  z = 0; /* ditto */
939 
940  /* go through the bit lengths (k already is bits in shortest code) */
941  for ( ; k <= g; k++ )
942  {
943  a = c[k];
944  while ( a-- )
945  {
946  /* here i is the Huffman code of length k bits for value *p */
947  /* make tables up to required level */
948  while ( k > w + l )
949  {
950  h++;
951  w += l; /* previous table always l bits */
952 
953  /* compute minimum size table less than or equal to l bits */
954  z = (z = g - w) > (unsigned)l ? l : z; /* upper limit on table size */
955 
956  if ( (f = 1 << (j = k - w)) > a + 1 ) /* try a k-w bit table */
957  { /* too few codes for k-w bit table */
958  f -= a + 1; /* deduct codes from patterns left */
959  xp = c + k;
960  if ( j < z )
961  {
962  while ( ++j < z ) /* try smaller tables up to z bits */
963  {
964  if ( (f <<= 1) <= *++xp )
965  {
966  break; /* enough codes to use up j bits */
967  }
968 
969  f -= *xp; /* else deduct codes from patterns */
970  }
971  }
972  }
973 
974  z = 1 << j; /* table entries for j-bit table */
975 
976  /* allocate and link in new table */
977  if ( (q = (struct huft*) malloc( (z + 1) * sizeof(struct huft)) ) ==
978  (struct huft *) NULL )
979  {
980  if ( h ) huft_free( u[0] );
981  return 3; /* not enough memory */
982  }
983 
984  hufts += z + 1; /* track memory usage */
985  *t = q + 1; /* link to list for huft_free() */
986  *(t = &(q->v.t)) = (struct huft *)NULL;
987  u[h] = ++q; /* table starts after link */
988 
989  /* connect to last table, if there is one */
990  if ( h )
991  {
992  x[h] = i; /* save pattern for backing up */
993  r.b = (uch)l; /* bits to dump before this table */
994  r.e = (uch)(16 + j); /* bits in this table */
995  r.v.t = q; /* pointer to this table */
996  j = i >> (w - l); /* (get around Turbo C bug) */
997  u[h-1][j] = r; /* connect to last table */
998  }
999  }
1000 
1001  /* set up table entry in r */
1002  r.b = (uch)(k - w);
1003 
1004  if (p >= v + n)
1005  {
1006  r.e = 99; /* out of values--invalid code */
1007  }
1008  else if (*p < s)
1009  {
1010  r.e = (uch)(*p < 256 ? 16 : 15); /* 256 is end-of-block code */
1011  r.v.n = (ush)(*p); /* simple code is just the value */
1012  p++; /* one compiler does not like *p++ */
1013  }
1014  else
1015  {
1016  r.e = (uch) e[*p - s]; /* non-simple--look up in lists */
1017  r.v.n = d[*p++ - s];
1018  }
1019 
1020  /* fill code-like entries with r */
1021  f = 1 << (k - w);
1022 
1023  for ( j = i >> w; j < z; j += f )
1024  {
1025  q[j] = r;
1026  }
1027 
1028  /* backwards increment the k-bit code i */
1029  for ( j = 1 << (k - 1); i & j; j >>= 1 )
1030  {
1031  i ^= j;
1032  }
1033 
1034  i ^= j;
1035 
1036  /* backup over finished tables */
1037  while ( (i & ((1 << w) - 1)) != x[h] )
1038  {
1039  h--; /* don't need to update q */
1040  w -= l;
1041  }
1042  }
1043  }
1044 
1045  /* Return true (1) if we were given an incomplete table */
1046  return y != 0 && g != 1;
1047 }
1048 
1049 int US_Gzip::huft_free( struct huft* t ) /* table to free */
1050 /* Free the malloc'ed tables built by huft_build(), which makes a linked
1051  list of the tables it made, with the links in a dummy first entry of
1052  each table. */
1053 {
1054  register struct huft* p;
1055  register struct huft* q;
1056 
1057  /* Go through linked list, freeing from the malloced (t[-1]) address. */
1058  p = t;
1059 
1060  while ( p != (struct huft*) NULL )
1061  {
1062  q = (--p)->v.t;
1063  free( (char*) p );
1064  p = q;
1065  }
1066 
1067  return 0;
1068 }
1069 
1070 
1072  struct huft* tl,
1073  struct huft* td, /* literal/length and distance decoder tables */
1074  int bl,
1075  int bd /* number of bits decoded by tl[] and td[] */
1076  )
1077 /* inflate (decompress) the codes in a deflated (compressed) block.
1078  Return an error code or zero if it all goes ok. */
1079 {
1080  register unsigned e; /* table entry flag/number of extra bits */
1081  unsigned n;
1082  unsigned d; /* length and index for copy */
1083  unsigned w; /* current window position */
1084  struct huft* t; /* pointer to table entry */
1085  unsigned ml;
1086  unsigned md; /* masks for bl and bd bits */
1087  register ulg b; /* bit buffer */
1088  register unsigned k; /* number of bits in bit buffer */
1089 
1090  /* make local copies of globals */
1091  b = bb; /* initialize bit buffer */
1092  k = bk;
1093  w = wp; /* initialize window position */
1094 
1095  /* inflate the coded data */
1096  ml = mask_bits [ bl ]; /* precompute masks for speed */
1097  md = mask_bits [ bd ];
1098 
1099  for ( ; ; ) /* do until end of block */
1100  {
1101  NEEDBITS( (unsigned)bl )
1102 
1103  if ((e = (t = tl + ((unsigned)b & ml))->e) > 16)
1104  {
1105  do
1106  {
1107  if (e == 99) return 1;
1108  DUMPBITS( t->b )
1109  e -= 16;
1110  NEEDBITS( e )
1111  } while ( (e = (t = t->v.t + ((unsigned)b & mask_bits[e]))->e) > 16 );
1112  }
1113 
1114  DUMPBITS(t->b)
1115 
1116  if (e == 16) /* then it's a literal */
1117  {
1118  slide[w++] = (uch)t->v.n;
1119  //Tracevv((stderr, "%c", slide[w-1]));
1120 
1121  if (w == WSIZE)
1122  {
1123  flush_output(w);
1124  w = 0;
1125  }
1126  }
1127  else /* it's an EOB or a length */
1128  {
1129  /* exit if end of block */
1130  if (e == 15) break;
1131 
1132  /* get length of block to copy */
1133  NEEDBITS( e )
1134  n = t->v.n + ( (unsigned)b & mask_bits[e] );
1135  DUMPBITS( e );
1136 
1137  /* decode distance of block to copy */
1138  NEEDBITS( (unsigned) bd )
1139 
1140  if ( (e = (t = td + ((unsigned)b & md))->e) > 16 )
1141  {
1142  do
1143  {
1144  if ( e == 99 ) return 1;
1145  DUMPBITS(t->b)
1146  e -= 16;
1147  NEEDBITS(e)
1148  } while ( (e = (t = t->v.t + ((unsigned)b & mask_bits[e]))->e) > 16 );
1149  }
1150 
1151  DUMPBITS( t->b )
1152  NEEDBITS( e )
1153  d = w - t->v.n - ( (unsigned)b & mask_bits[e] );
1154  DUMPBITS( e )
1155  //Tracevv((stderr,"\\[%d,%d]", w-d, n));
1156 
1157  /* do the copy */
1158  do
1159  {
1160  n -= (e = (e = WSIZE - ((d &= WSIZE-1) > w ? d : w)) > n ? n : e);
1161 
1162  if (w - d >= e) /* (this test assumes unsigned comparison) */
1163  {
1164  memcpy( slide + w, slide + d, e );
1165  w += e;
1166  d += e;
1167  }
1168  else /* do it slow to avoid memcpy() overlap */
1169  {
1170  do
1171  {
1172  slide[w++] = slide[d++];
1173  //Tracevv((stderr, "%c", slide[w-1]));
1174  } while (--e);
1175  }
1176 
1177  if ( w == WSIZE )
1178  {
1179  flush_output(w);
1180  w = 0;
1181  }
1182  } while ( n );
1183 
1184  }
1185  }
1186 
1187  /* restore the globals from the locals */
1188  wp = w; /* restore global window pointer */
1189  bb = b; /* restore global bit buffer */
1190  bk = k;
1191 
1192  /* done */
1193  return 0;
1194 }
1195 
1196 
1197 
1199 /* "decompress" an inflated type 0 (stored) block. */
1200 {
1201  unsigned n; /* number of bytes in block */
1202  unsigned w; /* current window position */
1203  register ulg b; /* bit buffer */
1204  register unsigned k; /* number of bits in bit buffer */
1205 
1206 
1207  /* make local copies of globals */
1208  b = bb; /* initialize bit buffer */
1209  k = bk;
1210  w = wp; /* initialize window position */
1211 
1212 
1213  /* go to byte boundary */
1214  n = k & 7;
1215  DUMPBITS( n );
1216 
1217 
1218  /* get the length and its complement */
1219  NEEDBITS( 16 )
1220  n = ( (unsigned) b & 0xffff );
1221  DUMPBITS( 16 )
1222 
1223  NEEDBITS( 16 )
1224  if ( n != (unsigned)( (~b) & 0xffff ) )
1225  {
1226  return 1; /* error in compressed data */
1227  }
1228  DUMPBITS(16)
1229 
1230 
1231  /* read and output the compressed data */
1232  while ( n-- )
1233  {
1234  NEEDBITS( 8 )
1235  slide [ w++ ] = (uch) b;
1236  if ( w == WSIZE )
1237  {
1238  flush_output( w );
1239  w = 0;
1240  }
1241  DUMPBITS( 8 )
1242  }
1243 
1244  /* restore the globals from the locals */
1245  wp = w; /* restore global window pointer */
1246  bb = b; /* restore global bit buffer */
1247  bk = k;
1248  return 0;
1249 }
1250 
1251 
1252 
1254 /* decompress an inflated type 1 (fixed Huffman codes) block. We should
1255  either replace this with a custom decoder, or at least precompute the
1256  Huffman tables. */
1257 {
1258  int i; /* temporary variable */
1259  struct huft* tl; /* literal/length code table */
1260  struct huft* td; /* distance code table */
1261  int bl; /* lookup bits for tl */
1262  int bd; /* lookup bits for td */
1263  unsigned l[288]; /* length list for huft_build */
1264 
1265 
1266  /* set up literal table */
1267  for ( i = 0; i < 144; i++ ) l [ i ] = 8;
1268  for ( ; i < 256; i++ ) l [ i ] = 9;
1269  for ( ; i < 280; i++ ) l [ i ] = 7;
1270 
1271  /* make a complete, but wrong code set */
1272  for ( ; i < 288; i++ ) l [ i ] = 8;
1273 
1274  bl = 7;
1275 
1276  if ( ( i = huft_build(l, 288, 257, cplens, cplext, &tl, &bl ) ) != 0 )
1277  {
1278  return i;
1279  }
1280 
1281 
1282  /* set up distance table */
1283  for ( i = 0; i < 30; i++ ) l [ i ] = 5; /* make an incomplete code set */
1284 
1285  bd = 5;
1286 
1287  if ( ( i = huft_build( l, 30, 0, cpdist, cpdext, &td, &bd ) ) > 1 )
1288  {
1289  huft_free( tl );
1290  return i;
1291  }
1292 
1293 
1294  /* decompress until an end-of-block code */
1295  if ( inflate_codes( tl, td, bl, bd ) )
1296  {
1297  return 1;
1298  }
1299 
1300  /* free the decoding tables, return */
1301  huft_free( tl );
1302  huft_free( td );
1303  return 0;
1304 }
1305 
1306 
1307 
1309 /* decompress an inflated type 2 (dynamic Huffman codes) block. */
1310 {
1311  int i; /* temporary variables */
1312  unsigned j;
1313  unsigned l; /* last length */
1314  unsigned m; /* mask for bit lengths table */
1315  unsigned n; /* number of lengths to get */
1316  unsigned w; /* current window position */
1317  struct huft* tl; /* literal/length code table */
1318  struct huft* td; /* distance code table */
1319  int bl; /* lookup bits for tl */
1320  int bd; /* lookup bits for td */
1321  unsigned nb; /* number of bit length codes */
1322  unsigned nl; /* number of literal/length codes */
1323  unsigned nd; /* number of distance codes */
1324  unsigned ll[286+30]; /* literal/length and distance code lengths */
1325  register ulg b; /* bit buffer */
1326  register unsigned k; /* number of bits in bit buffer */
1327 
1328  static unsigned border[] = { /* Order of the bit length code lengths */
1329  16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
1330 
1331 
1332  /* make local bit buffer */
1333  b = bb;
1334  k = bk;
1335  w = wp;
1336 
1337  /* read in table lengths */
1338  NEEDBITS( 5 )
1339  nl = 257 + ( (unsigned) b & 0x1f ); /* number of literal/length codes */
1340  DUMPBITS( 5 )
1341 
1342  NEEDBITS( 5 )
1343  nd = 1 + ((unsigned) b & 0x1f ); /* number of distance codes */
1344  DUMPBITS( 5 )
1345 
1346  NEEDBITS( 4 )
1347  nb = 4 + ( (unsigned) b & 0xf ); /* number of bit length codes */
1348  DUMPBITS( 4 )
1349 
1350  if ( nl > 286 || nd > 30 ) return 1; /* bad lengths */
1351 
1352 
1353  /* read in bit-length-code lengths */
1354  for ( j = 0; j < nb; j++ )
1355  {
1356  NEEDBITS( 3 )
1357  ll [ border[j] ] = (unsigned) b & 7;
1358  DUMPBITS( 3 )
1359  }
1360 
1361  for ( ; j < 19; j++ ) ll[ border[j] ] = 0;
1362 
1363  /* build decoding table for trees--single level, 7 bit lookup */
1364  bl = 7;
1365 
1366  if ( ( i = huft_build( ll, 19, 19, NULL, NULL, &tl, &bl ) ) != 0 )
1367  {
1368  if ( i == 1 )
1369  huft_free( tl );
1370  return i; /* incomplete code set */
1371  }
1372 
1373  if ( tl == NULL ) /* Grrrhhh */
1374  return 2;
1375 
1376  /* read in literal and distance code lengths */
1377  n = nl + nd;
1378  m = mask_bits[bl];
1379  i = 0;
1380  l = 0;
1381 
1382  while ( (unsigned) i < n )
1383  {
1384  NEEDBITS( (unsigned) bl )
1385  j = ( td = tl + ( (unsigned) b & m ) ) -> b;
1386  DUMPBITS( j )
1387 
1388  j = td->v.n;
1389  if ( j < 16 ) /* length of code in bits (0..15) */
1390  {
1391  ll [ i++ ] = l = j; /* save last length in l */
1392  }
1393  else if ( j == 16 ) /* repeat last length 3 to 6 times */
1394  {
1395  NEEDBITS( 2 )
1396  j = 3 + ((unsigned)b & 3);
1397  DUMPBITS( 2 )
1398 
1399  if ( (unsigned) i + j > n) return 1;
1400 
1401  while ( j-- ) ll [ i++ ] = l;
1402  }
1403  else if ( j == 17 ) /* 3 to 10 zero length codes */
1404  {
1405  NEEDBITS( 3 )
1406  j = 3 + ( (unsigned) b & 7 );
1407  DUMPBITS( 3 )
1408 
1409  if ( (unsigned) i + j > n ) return 1;
1410 
1411  while ( j-- ) ll[ i++ ] = 0;
1412  l = 0;
1413  }
1414  else /* j == 18: 11 to 138 zero length codes */
1415  {
1416  NEEDBITS( 7 )
1417  j = 11 + ( (unsigned) b & 0x7f );
1418  DUMPBITS( 7 )
1419 
1420  if ( (unsigned) i + j > n ) return 1;
1421 
1422  while ( j-- ) ll[i++] = 0;
1423  l = 0;
1424  }
1425  }
1426 
1427 
1428  /* free decoding table for trees */
1429  huft_free(tl);
1430 
1431 
1432  /* restore the global bit buffer */
1433  bb = b;
1434  bk = k;
1435 
1436 
1437  /* build the decoding tables for literal/length and distance codes */
1438  bl = lbits;
1439  if ( ( i = huft_build( ll, nl, 257, cplens, cplext, &tl, &bl ) ) != 0 )
1440  {
1441  if ( i == 1 )
1442  {
1443  huft_free( tl );
1444  }
1445 
1446  return i; /* incomplete code set */
1447  }
1448 
1449  bd = dbits;
1450  if ( ( i = huft_build( ll + nl, nd, 0, cpdist, cpdext, &td, &bd ) ) != 0 )
1451  {
1452  if (i == 1)
1453  {
1454  huft_free( td );
1455  }
1456 
1457  huft_free( tl );
1458  return i; /* incomplete code set */
1459  }
1460 
1461 
1462  /* decompress until an end-of-block code */
1463  if ( inflate_codes( tl, td, bl, bd ) ) return 1;
1464 
1465 
1466  /* free the decoding tables, return */
1467  huft_free( tl );
1468  huft_free( td );
1469  return 0;
1470 }
1471 
1472 
1473 
1475 //int *e; /* last block flag */
1476 /* decompress an inflated block */
1477 {
1478  unsigned t; /* block type */
1479  unsigned w; /* current window position */
1480  register ulg b; /* bit buffer */
1481  register unsigned k; /* number of bits in bit buffer */
1482 
1483  /* make local bit buffer */
1484  b = bb;
1485  k = bk;
1486  w = wp;
1487 
1488  /* read in last block bit */
1489  NEEDBITS( 1 )
1490 
1491  *e = (int) b & 1;
1492  DUMPBITS( 1 )
1493 
1494 
1495  /* read in block type */
1496  NEEDBITS( 2 )
1497  t = (unsigned) b & 3;
1498  DUMPBITS( 2 )
1499 
1500 
1501  /* restore the global bit buffer */
1502  bb = b;
1503  bk = k;
1504 
1505  /* inflate that block type */
1506  if ( t == 2 ) return inflate_dynamic();
1507  if ( t == 0 ) return inflate_stored();
1508  if ( t == 1 ) return inflate_fixed();
1509 
1510  /* bad block type */
1511  return 2;
1512 }
1513 
1514 
1515 
1517 /* decompress an inflated entry */
1518 {
1519  int e; /* last block flag */
1520  int r; /* result code */
1521  unsigned h; /* maximum struct huft's malloc'ed */
1522 
1523  /* initialize window, bit buffer */
1524  wp = 0;
1525  bk = 0;
1526  bb = 0;
1527 
1528  /* decompress until the last block */
1529  h = 0;
1530 
1531  do
1532  {
1533  hufts = 0;
1534  if ( (r = inflate_block( &e ) ) != 0 ) return r;
1535  if ( hufts > h ) h = hufts;
1536  } while ( ! e );
1537 
1538  /* Undo too much lookahead. The next read will be byte aligned so we
1539  * can discard unused bits in the last meaningful byte.
1540  */
1541 
1542  while ( bk >= 8 )
1543  {
1544  bk -= 8;
1545  inptr--;
1546  }
1547 
1548  /* flush out slide */
1549  flush_output( wp );
1550 
1551  /* return success */
1552  return 0;
1553 }
1554 
1555 /* ===========================================================================
1556  * * Fill the input buffer. This is called only when the buffer is empty.
1557  * */
1558 int US_Gzip::fill_inbuf( int eof_ok )
1559 //int eof_ok; /* set if EOF acceptable as a result */
1560 {
1561  int len;
1562 
1563  /* Read as much as possible */
1564  insize = 0;
1565  do
1566  {
1567  len = read ( ifd, (char*) inbuf + insize, INBUFSIZ - insize );
1568  if ( len == 0 ) break;
1569  if ( len == -1 )
1570  {
1571  throw GZIP_READERROR; //read_error();
1572  break;
1573  }
1574 
1575  insize += len;
1576  } while ( insize < INBUFSIZ );
1577 
1578  if ( insize == 0 )
1579  {
1580  if ( eof_ok ) return EOF;
1581  flush_window();
1582  throw GZIP_READERROR; //read_error();
1583  }
1584 
1585  bytes_in += (off_t) insize;
1586  inptr = 1;
1587  return inbuf[ 0 ];
1588 }
1589 
1590 /* ===========================================================================
1591  * Write the output window window[0..outcnt-1] and update crc and bytes_out.
1592  * (Used for the decompressed data only.)
1593  */
1595 {
1596  if ( outcnt == 0 ) return;
1597 
1598  updcrc( window, outcnt );
1599  write_buf( ofd, (char *) window, outcnt );
1600 
1601  bytes_out += (off_t) outcnt;
1602  outcnt = 0;
1603 }
1604 
1605 /* ===========================================================================
1606  * Run a set of bytes through the crc shift register. If s is a NULL
1607  * pointer, then initialize the crc shift register contents instead.
1608  * Return the current crc in either case.
1609  */
1610 ulg US_Gzip::updcrc( uch* s, unsigned n )
1611 //uch *s; /* pointer to bytes to pump through */
1612 //unsigned n; /* number of bytes in s[] */
1613 {
1614  register ulg c; /* temporary variable */
1615 
1616  static ulg crc_internal = (ulg) 0xffffffffL; /* shift register contents */
1617 
1618  if ( s == NULL)
1619  {
1620  c = 0xffffffffL;
1621  }
1622  else
1623  {
1624  c = crc_internal;
1625  if ( n ) do
1626  {
1627  c = crc_32_tab[ ( (int) c ^ ( *s++ ) ) & 0xff ] ^ ( c >> 8 );
1628  } while ( --n );
1629  }
1630 
1631  crc_internal = c;
1632  return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */
1633 }
1634 
1635 
1636 /* ===========================================================================
1637  * Does the same as write(), but also handles partial pipe writes and checks
1638  * for error return.
1639  */
1640 
1641 void US_Gzip::write_buf( int fd, void* buf, unsigned cnt )
1642  // int fd;
1643  // voidp buf;
1644  // unsigned cnt;
1645 {
1646  unsigned n;
1647 
1648  while ( ( n = write( fd, buf, cnt) ) != cnt)
1649  {
1650  if ( n == (unsigned) (-1) )
1651  {
1652  throw GZIP_WRITEERROR;
1653  }
1654 
1655  cnt -= n;
1656  buf = (void*) ( (char*) buf + n );
1657  }
1658 }
1659 
1660 /* ========================================================================
1661  * Return the base name of a file (remove any directory prefix and
1662  * any version suffix). For systems with file names that are not
1663  * case sensitive, force the base name to lower case.
1664  */
1665 
1666 #define PATH_SEP '/'
1667 
1668 #ifdef WIN32 /* Windows NT */
1669 # define PATH_SEP2 '\\'
1670 # define PATH_SEP3 ':'
1671 #endif
1672 
1673 char* US_Gzip::base_name( char* fname )
1674 {
1675  char* p;
1676  char* f = fname;
1677 
1678  if ( ( p = strrchr( f, PATH_SEP ) ) != NULL ) f = p + 1;
1679 
1680 #ifdef PATH_SEP2
1681  if ( ( p = strrchr( f, PATH_SEP2 ) ) != NULL ) f = p + 1;
1682 #endif
1683 
1684 #ifdef PATH_SEP3
1685  if ( ( p = strrchr( f, PATH_SEP3 ) ) != NULL ) f = p + 1;
1686 #endif
1687 
1688  return f;
1689 }
1690 
1691 /* ===========================================================================
1692  * Write the output buffer outbuf[0..outcnt-1] and update bytes_out.
1693  * (used for the compressed data only)
1694  */
1696 {
1697  if ( outcnt == 0) return;
1698 
1699  write_buf( ofd, (char *) outbuf, outcnt );
1700  bytes_out += (off_t) outcnt;
1701  outcnt = 0;
1702 }
1703 
1704 /* ===========================================================================
1705  * We use a lazy evaluation for matches: a match is finally adopted only if
1706  * there is no better match at the next window position. */
1707 
1708 #define HASH_BITS 15
1709  /* For portability to 16 bit machines, do not use values above 15. */
1710 
1711 #define MIN_LOOKAHEAD ( MAX_MATCH + MIN_MATCH + 1 )
1712 /* Minimum amount of lookahead, except at the end of the input file.
1713  * See deflate.c for comments about the MIN_MATCH + 1. */
1714 
1715 #define MAX_DIST ( WSIZE - MIN_LOOKAHEAD )
1716 /* In order to simplify the code, particularly on 16 bit machines, match
1717  * distances are limited to MAX_DIST instead of WSIZE. */
1718 
1719 #define HASH_SIZE (unsigned) ( 1 << HASH_BITS )
1720 #define HASH_MASK ( HASH_SIZE - 1 )
1721 #define WMASK ( WSIZE - 1 )
1722 /* HASH_SIZE and WSIZE must be powers of two */
1723 
1724 
1725 
1726 #define H_SHIFT ( (HASH_BITS + MIN_MATCH - 1) / MIN_MATCH )
1727 /* Number of bits by which ins_h and del_h must be shifted at each
1728  * input step. It must be such that after MIN_MATCH steps, the oldest
1729  * byte no longer takes part in the hash key, that is:
1730  * H_SHIFT * MIN_MATCH >= HASH_BITS */
1731 
1732 
1733 /* ===========================================================================
1734  * Update a hash value with the given input byte
1735  * IN assertion: all calls to to UPDATE_HASH are made with consecutive
1736  * input characters, so that a running hash key can be computed from the
1737  * previous key instead of complete recalculation each time. */
1738 
1739 #define UPDATE_HASH(h,c) ( h = ( ( (h) << H_SHIFT ) ^ (c)) & HASH_MASK )
1740 
1741 
1742 /* ===========================================================================
1743  * Insert string s in the dictionary and set match_head to the previous head
1744  * of the hash chain (the most recent string with same hash key). Return
1745  * the previous length of the hash chain.
1746  * IN assertion: all calls to to INSERT_STRING are made with consecutive
1747  * input characters and the first MIN_MATCH bytes of s are valid
1748  * (except for the last MIN_MATCH-1 bytes of the input file). */
1749 
1750 #define INSERT_STRING(s, match_head) \
1751  ( UPDATE_HASH( ins_h, window[ (s) + MIN_MATCH - 1 ] ), \
1752  prev[ (s) & WMASK ] = match_head = head[ ins_h ], \
1753  head[ ins_h ] = (s) )
1754 
1755 /* ===========================================================================
1756  * Flush the current block, with given end-of-file flag.
1757  * IN assertion: strstart is set to the end of the current match. */
1758 
1759 #define FLUSH_BLOCK(eof) \
1760  flush_block( block_start >= 0L ? (char*) &window[ (unsigned) block_start ] : \
1761  (char*) NULL, (long) strstart - block_start, (eof) )
1762 
1763 
1764 off_t US_Gzip::deflate( void )
1765 {
1766  IPos hash_head; /* head of hash chain */
1767  IPos prev_match; /* previous match */
1768  int flush; /* set if current block must be flushed */
1769  int match_available = 0; /* set if previous match exists */
1770  register unsigned match_length = MIN_MATCH - 1; /* length of best match */
1771 
1772  /* Process the input block. */
1773  while ( lookahead != 0 )
1774  {
1775  /* Insert the string window[strstart .. strstart+2] in the
1776  * dictionary, and set hash_head to the head of the hash chain: */
1777 
1778  INSERT_STRING( strstart, hash_head );
1779 
1780  /* Find the longest match, discarding those <= prev_length. */
1781 
1782  prev_length = match_length, prev_match = match_start;
1783  match_length = MIN_MATCH-1;
1784 
1785  if ( hash_head != (ush) 0 &&
1786  prev_length < max_lazy_match &&
1787  strstart - hash_head <= MAX_DIST )
1788  {
1789  /* To simplify the code, we prevent matches with the string
1790  * of window index 0 (in particular we have to avoid a match
1791  * of the string with itself at the start of the input file). */
1792 
1793  match_length = longest_match( hash_head );
1794 
1795  /* longest_match() sets match_start */
1796 
1797  if ( match_length > lookahead ) match_length = lookahead;
1798 
1799  /* Ignore a length 3 match if it is too distant: */
1800 
1801  #define TOO_FAR 4096
1802  if ( match_length == MIN_MATCH && strstart - match_start > TOO_FAR )
1803  {
1804  /* If prev_match is also MIN_MATCH, match_start is garbage
1805  * but we will ignore the current match anyway. */
1806 
1807  match_length--;
1808  }
1809  }
1810 
1811  /* If there was a match at the previous step and the current
1812  * match is not better, output the previous match: */
1813 
1814  if ( prev_length >= MIN_MATCH && match_length <= prev_length )
1815  {
1816  flush = ct_tally( strstart - 1 - prev_match, prev_length - MIN_MATCH );
1817 
1818  /* Insert in hash table all strings up to the end of the match.
1819  * strstart-1 and strstart are already inserted. */
1820 
1821  lookahead -= prev_length - 1;
1822  prev_length -= 2;
1823 
1824  do
1825  {
1826  strstart++;
1827  INSERT_STRING(strstart, hash_head);
1828 
1829  /* strstart never exceeds WSIZE-MAX_MATCH, so there are
1830  * always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH
1831  * these bytes are garbage, but it does not matter since the
1832  * next lookahead bytes will always be emitted as literals. */
1833  } while ( --prev_length != 0 );
1834 
1835  match_available = 0;
1836  match_length = MIN_MATCH - 1;
1837  strstart++;
1838 
1839  if ( flush ) FLUSH_BLOCK(0), block_start = strstart;
1840 
1841  }
1842  else if ( match_available )
1843  {
1844  /* If there was no match at the previous position, output a
1845  * single literal. If there was a match but the current match
1846  * is longer, truncate the previous match to a single literal. */
1847 
1848  if ( ct_tally ( 0, window[ strstart - 1 ] ) )
1849  {
1850  FLUSH_BLOCK(0), block_start = strstart;
1851  }
1852 
1853  strstart++;
1854  lookahead--;
1855  }
1856  else
1857  {
1858  /* There is no previous match to compare with, wait for
1859  * the next step to decide. */
1860 
1861  match_available = 1;
1862  strstart++;
1863  lookahead--;
1864 
1865  /* Make sure that we always have enough lookahead, except
1866  * at the end of the input file. We need MAX_MATCH bytes
1867  * for the next match, plus MIN_MATCH bytes to insert the
1868  * string following the next match. */
1869 
1870  } while ( lookahead < MIN_LOOKAHEAD && ! eofile ) fill_window();
1871  }
1872 
1873  if ( match_available ) ct_tally ( 0, window[strstart -1 ] );
1874 
1875  return FLUSH_BLOCK(1); /* eof */
1876 }
1877 
1878 /* ===========================================================================
1879  * * Initialize the "longest match" routines for a new file
1880  * */
1881 void US_Gzip::lm_init( void )
1882 // Paramaters are not needed here
1883 // int pack_level; /* 0: store, 1: best speed, 9: best compression */
1884 // ush *flags; /* general purpose bit flag */
1885 {
1886 
1887 
1888  // While we are at it, initilize other variables needed
1889  // These were globals inthe original
1890 
1891  /* tree_desc l_desc =
1892  {
1893  dyn_ltree, static_ltree, extra_lbits, LITERALS + 1, L_CODES, MAX_BITS, 0
1894  }; */
1895 
1896  l_desc.dyn_tree = dyn_ltree;
1897  l_desc.static_tree = static_ltree;
1898  l_desc.extra_bits = extra_lbits;
1899  l_desc.extra_base = LITERALS + 1;
1900  l_desc.elems = L_CODES;
1901  l_desc.max_length = MAX_BITS;
1902  l_desc.max_code = 0;
1903 
1904  /* tree_desc xd_desc =
1905  {
1906  dyn_dtree, static_dtree, extra_dbits, 0, D_CODES, MAX_BITS, 0
1907  }; */
1908 
1909  d_desc.dyn_tree = dyn_dtree;
1910  d_desc.static_tree = static_dtree;
1911  d_desc.extra_bits = extra_dbits;
1912  d_desc.extra_base = 0;
1913  d_desc.elems = D_CODES;
1914  d_desc.max_length = MAX_BITS;
1915  d_desc.max_code = 0;
1916 
1917  /* tree_desc xbl_desc =
1918  {
1919  bl_tree, (ct_data*)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS, 0
1920  };*/
1921 
1922  bl_desc.dyn_tree = bl_tree;
1923  bl_desc.static_tree = 0;
1924  bl_desc.extra_bits = extra_blbits;
1925  bl_desc.extra_base = 0;
1926  bl_desc.elems = BL_CODES;
1927  bl_desc.max_length = MAX_BL_BITS;
1928  bl_desc.max_code = 0;
1929 
1931 #define pack_level 9
1932 
1933  /* Initialize the hash table. */
1934  memzero( (char*) head, HASH_SIZE * sizeof( *head ) );
1935 
1936  /* prev will be initialized on the fly */
1937 
1938  /* Set the default configuration parameters: */
1939  max_lazy_match = configuration_table[ pack_level ].max_lazy;
1940  good_match = configuration_table[ pack_level ].good_length;
1941  nice_match = configuration_table[ pack_level ].nice_length;
1942  max_chain_length = configuration_table[ pack_level ].max_chain;
1943 
1944  strstart = 0;
1945  block_start = 0L;
1946 
1947  updcrc( NULL, 0 ); /* initialize crc */
1948  lookahead = file_read( (char*) window, 2 * WSIZE );
1949 
1950  if ( lookahead == 0 )
1951  {
1952  eofile = 1;
1953  lookahead = 0;
1954  return;
1955  }
1956 
1957  eofile = 0;
1958 
1959  /* Make sure that we always have enough lookahead. This is important
1960  * if input comes from a device such as a tty. */
1961 
1962  while ( lookahead < MIN_LOOKAHEAD && ! eofile ) fill_window();
1963 
1964  ins_h = 0;
1965 
1966  register unsigned j;
1967  for ( j = 0; j < MIN_MATCH - 1; j++ ) UPDATE_HASH( ins_h, window[ j ] );
1968 
1969  /* If lookahead < MIN_MATCH, ins_h is garbage, but this is
1970  * not important since only literal bytes will be emitted. */
1971 }
1972 
1973 /* ===========================================================================
1974  * Read a new buffer from the current input file, perform end-of-line
1975  * translation, and update the crc and input file size.
1976  * IN assertion: size >= 2 (for end-of-line translation) */
1977 int US_Gzip::file_read( char* buf, unsigned int size )
1978 {
1979  unsigned len;
1980 
1981  len = read( ifd, buf, size );
1982  if ( len == 0 ) return (int) len;
1983 
1984  if ( len == (unsigned) -1 )
1985  {
1986  throw GZIP_READERROR;
1987  }
1988 
1989  crc = updcrc( (uch*)buf, len );
1990  bytes_in += (off_t)len;
1991 
1992  return (int) len;
1993 }
1994 
1995 /* ===========================================================================
1996  * Fill the window when the lookahead becomes insufficient.
1997  * Updates strstart and lookahead, and sets eofile if end of input file.
1998  * IN assertion: lookahead < MIN_LOOKAHEAD && strstart + lookahead > 0
1999  * OUT assertions: at least one byte has been read, or eofile is set;
2000  * file reads are performed for at least two bytes (required for the
2001  * translate_eol option). */
2002 
2004 {
2005  register unsigned n;
2006  register unsigned m;
2007 
2008  unsigned more = (unsigned) ( 2L * WSIZE - (ulg) lookahead - (ulg) strstart );
2009  /* Amount of free space at the end of the window. */
2010 
2011  /* If the window is almost full and there is insufficient lookahead,
2012  * * move the upper half to the lower one to make room in the upper half.
2013  * */
2014  if ( more == (unsigned) EOF )
2015  {
2016  /* Very unlikely, but possible on 16 bit machine if strstart == 0
2017  * and lookahead == 1 (input done one byte at time) */
2018  more--;
2019  }
2020  else
2021  {
2022  if ( strstart >= WSIZE + MAX_DIST )
2023  {
2024  /* By the IN assertion, the window is not empty so we can't confuse
2025  * more == 0 with more == 64K on a 16 bit machine. */
2026 
2027  memcpy( (char*) window, (char*) window + WSIZE, (unsigned) WSIZE );
2028 
2029  match_start -= WSIZE;
2030  strstart -= WSIZE; /* we now have strstart >= MAX_DIST: */
2031 
2032  block_start -= (long) WSIZE;
2033 
2034  for ( n = 0; n < HASH_SIZE; n++ )
2035  {
2036  m = head[n];
2037  head[n] = (Pos)(m >= WSIZE ? m - WSIZE : 0 );
2038  }
2039 
2040  for ( n = 0; n < WSIZE; n++ )
2041  {
2042  m = prev[n];
2043  prev[n] = (Pos)(m >= WSIZE ? m - WSIZE : 0 );
2044  /* If n is not on any hash chain, prev[n] is garbage but
2045  * its value will never be used. */
2046  }
2047 
2048  more += WSIZE;
2049  }
2050  }
2051 
2052  /* At this point, more >= 2 */
2053  if ( ! eofile )
2054  {
2055  n = file_read( (char*) window + strstart + lookahead, more );
2056  if ( n == 0 || n == (unsigned) EOF)
2057  {
2058  eofile = 1;
2059  }
2060  else
2061  {
2062  lookahead += n;
2063  }
2064  }
2065 }
2066 
2068 // IPos cur_match; /* current match */
2069 {
2070  unsigned chain_length = max_chain_length; /* max hash chain length */
2071  register uch* scan = window + strstart; /* current string */
2072  register uch* match; /* matched string */
2073  register int len; /* length of current match */
2074  int best_len = prev_length; /* best match length so far */
2075 
2076  IPos limit = strstart > (IPos) MAX_DIST ? strstart - (IPos) MAX_DIST : 0;
2077 
2078  /* Stop when cur_match becomes <= limit. To simplify the code,
2079  * we prevent matches with the string of window index 0. */
2080 
2081  /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
2082  * It is easy to get rid of this optimization if necessary. */
2083 
2084 #ifdef UNALIGNED_OK
2085  /* Compare two bytes at a time. Note: this is not always beneficial.
2086  * Try with and without -DUNALIGNED_OK to check. */
2087 
2088  register uch* strend = window + strstart + MAX_MATCH - 1;
2089  register ush scan_start = *(ush*) scan;
2090  register ush scan_end = *(ush*) (scan + best_len - 1 );
2091 #else
2092  register uch* strend = window + strstart + MAX_MATCH;
2093  register uch scan_end1 = scan[ best_len - 1 ];
2094  register uch scan_end = scan[ best_len ];
2095 #endif
2096 
2097  /* Do not waste too much time if we already have a good match: */
2098 
2099  if ( prev_length >= good_match )
2100  {
2101  chain_length >>= 2;
2102  }
2103 
2104  do
2105  {
2106  match = window + cur_match;
2107 
2108  /* Skip to next match if the match length cannot increase
2109  * or if the match length is less than 2: */
2110 
2111 #if (defined(UNALIGNED_OK) && MAX_MATCH == 258)
2112  /* This code assumes sizeof(unsigned short) == 2. Do not use
2113  * UNALIGNED_OK if your compiler uses a different size. */
2114 
2115  if ( *(ush*)( match + best_len - 1 ) != scan_end ||
2116  *(ush*) match != scan_start) continue;
2117 
2118  /* It is not necessary to compare scan[2] and match[2] since they are
2119  * always equal when the other bytes match, given that the hash keys
2120  * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at
2121  * strstart+3, +5, ... up to strstart+257. We check for insufficient
2122  * lookahead only every 4th comparison; the 128th check will be made
2123  * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is
2124  * necessary to put more guard bytes at the end of the window, or
2125  * to check more often for insufficient lookahead. */
2126 
2127  scan++;
2128  match++;
2129 
2130  do
2131  {
2132  } while ( *(ush*) ( scan += 2 ) == *(ush*) ( match += 2 ) &&
2133  *(ush*) ( scan += 2 ) == *(ush*) ( match += 2 ) &&
2134  *(ush*) ( scan += 2 ) == *(ush*) ( match += 2 ) &&
2135  *(ush*) ( scan += 2 ) == *(ush*) ( match += 2 ) &&
2136  scan < strend);
2137 
2138  /* The funny "do {}" generates better code on most compilers */
2139 
2140  /* Here, scan <= window+strstart + 257 */
2141  if ( *scan == *match ) scan++;
2142 
2143  len = ( MAX_MATCH - 1 ) - (int) ( strend - scan );
2144  scan = strend - ( MAX_MATCH - 1 );
2145 
2146 #else /* UNALIGNED_OK */
2147  if ( match[ best_len ] != scan_end ||
2148  match[ best_len - 1 ] != scan_end1 ||
2149  *match != *scan ||
2150  * ++match != scan[ 1 ] ) continue;
2151 
2152  /* The check at best_len-1 can be removed because it will be made
2153  * again later. (This heuristic is not always a win.)
2154  * It is not necessary to compare scan[2] and match[2] since they
2155  * are always equal when the other bytes match, given that
2156  * the hash keys are equal and that HASH_BITS >= 8. */
2157 
2158  scan += 2, match++;
2159 
2160  /* We check for insufficient lookahead only every 8th comparison;
2161  * the 256th check will be made at strstart+258. */
2162 
2163  do
2164  {
2165  } while ( * ++scan == * ++match && * ++scan == * ++match &&
2166  * ++scan == * ++match && * ++scan == * ++match &&
2167  * ++scan == * ++match && * ++scan == * ++match &&
2168  * ++scan == * ++match && * ++scan == * ++match &&
2169  scan < strend );
2170 
2171  len = MAX_MATCH - (int)( strend - scan );
2172  scan = strend - MAX_MATCH;
2173 
2174 #endif /* UNALIGNED_OK */
2175 
2176  if ( len > best_len )
2177  {
2178  match_start = cur_match;
2179  best_len = len;
2180  if ( len >= nice_match ) break;
2181 #ifdef UNALIGNED_OK
2182  scan_end = *(ush*) ( scan + best_len - 1 );
2183 #else
2184  scan_end1 = scan[ best_len - 1 ];
2185  scan_end = scan[ best_len ];
2186 #endif
2187  }
2188  } while ( ( cur_match = prev[ cur_match & WMASK ] ) > limit
2189  && --chain_length != 0 );
2190 
2191  return best_len;
2192 }
2193 
2194 /* ===========================================================================
2195  * Initialize the bit string routines. */
2196 
2197 void US_Gzip::bi_init ( void )
2198 // file_t zipfile; /* output zip file, NO_FILE for in-memory compression */
2199 {
2200  bi_buf = 0;
2201  bi_valid = 0;
2202 
2203  /* Set the defaults for file compression. They are set by memcompress
2204  * for in-memory compression. */
2205 }
2206 
2207 #define d_code(dist) \
2208  ( (dist) < 256 ? dist_code[ dist ] : dist_code[ 256 + ( ( dist ) >> 7 ) ] )
2209  /* Mapping from a distance to a distance code. dist is the distance - 1 and
2210  * must not have side effects. dist_code[256] and dist_code[257] are never
2211  * used. */
2212 
2213 #define STORED_BLOCK 0
2214 #define STORED STORED_BLOCK
2215 #define STATIC_TREES 1
2216 #define DYN_TREES 2
2217 /* The three kinds of block type */
2218 
2219 #ifndef DIST_BUFSIZE
2220 # define DIST_BUFSIZE LIT_BUFSIZE
2221 #endif
2222  /* Sizes of match buffers for literals/lengths and distances. There are
2223  * 4 reasons for limiting LIT_BUFSIZE to 64K:
2224  * - frequencies can be kept in 16 bit counters
2225  * - if compression is not successful for the first block, all input data is
2226  * still in the window so we can still emit a stored block even when input
2227  * comes from standard input. (This can also be done for all blocks if
2228  * LIT_BUFSIZE is not greater than 32K.)
2229  * - if compression is not successful for a file smaller than 64K, we can
2230  * even emit a stored file instead of a stored block (saving 5 bytes).
2231  * - creating new Huffman trees less frequently may not provide fast
2232  * adaptation to changes in the input data statistics. (Take for
2233  * example a binary file with poorly compressible code followed by
2234  * a highly compressible string table.) Smaller buffer sizes give
2235  * fast adaptation but have of course the overhead of transmitting trees
2236  * more frequently.
2237  * - I can't count above 4
2238  * The current code is general and allows DIST_BUFSIZE < LIT_BUFSIZE (to save
2239  * memory at the expense of compression). Some optimizations would be possible
2240  * if we rely on DIST_BUFSIZE == LIT_BUFSIZE. */
2241 
2242 /* ===========================================================================
2243  * Allocate the match buffer, initialize the various tables and save the
2244  * location of the internal file attribute (ascii/binary) and method
2245  * (DEFLATE/STORE).
2246  */
2247 
2248 void US_Gzip::ct_init ( void )
2249 {
2250  int n; /* iterates over tree elements */
2251  int bits; /* bit counter */
2252  int length; /* length value */
2253  int code; /* code value */
2254  int dist; /* distance index */
2255 
2256  compressed_len = 0L;
2257 
2258  if ( static_dtree[ 0 ].Len != 0 ) return; /* ct_init already called */
2259 
2260  /* Initialize the mapping length (0..255) -> length code (0..28) */
2261  length = 0;
2262  for ( code = 0; code < LENGTH_CODES - 1; code++ )
2263  {
2264  base_length[code] = length;
2265  for ( n = 0; n < ( 1 << extra_lbits[ code ] ); n++ )
2266  {
2267  length_code[length++] = (uch)code;
2268  }
2269  }
2270 
2271  /* Note that the length 255 (match length 258) can be represented
2272  * in two different ways: code 284 + 5 bits or code 285, so we
2273  * overwrite length_code[255] to use the best encoding:
2274  */
2275  length_code[ length - 1 ] = (uch)code;
2276 
2277  /* Initialize the mapping dist (0..32K) -> dist code (0..29) */
2278  dist = 0;
2279  for ( code = 0 ; code < 16; code++ )
2280  {
2281  base_dist[ code ] = dist;
2282  for ( n = 0; n < ( 1 << extra_dbits[ code ] ); n++ )
2283  {
2284  dist_code[ dist++ ] = (uch) code;
2285  }
2286  }
2287 
2288  dist >>= 7; /* from now on, all distances are divided by 128 */
2289  for ( ; code < D_CODES; code++ )
2290  {
2291  base_dist[ code ] = dist << 7;
2292  for ( n = 0; n < ( 1 << ( extra_dbits[ code ] - 7 ) ); n++ )
2293  {
2294  dist_code[256 + dist++] = (uch)code;
2295  }
2296  }
2297 
2298  /* Construct the codes of the static literal tree */
2299  for ( bits = 0; bits <= MAX_BITS; bits++ ) bl_count[ bits ] = 0;
2300 
2301  n = 0;
2302  while ( n <= 143 ) static_ltree[ n++ ].Len = 8, bl_count[ 8 ]++;
2303  while ( n <= 255 ) static_ltree[ n++ ].Len = 9, bl_count[ 9 ]++;
2304  while ( n <= 279 ) static_ltree[ n++ ].Len = 7, bl_count[ 7 ]++;
2305  while ( n <= 287 ) static_ltree[ n++ ].Len = 8, bl_count[ 8 ]++;
2306 
2307  /* Codes 286 and 287 do not exist, but we must include them in the
2308  * tree construction to get a canonical Huffman tree (longest code
2309  * all ones)
2310  */
2311  gen_codes( (ct_data*) static_ltree, L_CODES + 1 );
2312 
2313  /* The static distance tree is trivial: */
2314  for ( n = 0; n < D_CODES; n++ )
2315  {
2316  static_dtree[ n ].Len = 5;
2317  static_dtree[ n ].Code = bi_reverse( n, 5 );
2318  }
2319 
2320  /* Initialize the first block of the first file: */
2321  init_block();
2322 }
2323 
2324 /* ===========================================================================
2325  * Save the match info and tally the frequency counts. Return true if
2326  * the current block must be flushed. */
2327 
2328 int US_Gzip::ct_tally ( int dist, int lc )
2329 // int dist; /* distance of matched string */
2330 // int lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */
2331 {
2332  inbuf[ last_lit++ ] = (uch) lc;
2333 
2334  if ( dist == 0)
2335  {
2336  /* lc is the unmatched char */
2337  dyn_ltree[ lc ].Freq++;
2338  }
2339  else
2340  {
2341  /* Here, lc is the match length - MIN_MATCH */
2342  dist--; /* dist = match distance - 1 */
2343 
2344  dyn_ltree[ length_code[ lc ] + LITERALS + 1 ].Freq++;
2345  dyn_dtree[ d_code( dist ) ].Freq++;
2346 
2347  d_buf[ last_dist++ ] = (ush) dist;
2348  flags |= flag_bit;
2349  }
2350 
2351  flag_bit <<= 1;
2352 
2353  /* Output the flags if they fill a byte: */
2354  if ( ( last_lit & 7 ) == 0 )
2355  {
2356  flag_buf[ last_flags++ ] = flags;
2357  flags = 0;
2358  flag_bit = 1;
2359  }
2360 
2361  /* Try to guess if it is profitable to stop the current block here */
2362 #define level 9
2363  if ( level > 2 && ( last_lit & 0xfff ) == 0)
2364  {
2365  /* Compute an upper bound for the compressed length */
2366  ulg out_length = (ulg) last_lit * 8L;
2367  ulg in_length = (ulg) strstart - block_start;
2368  int dcode;
2369 
2370  for ( dcode = 0; dcode < D_CODES; dcode++ )
2371  {
2372  out_length += (ulg)dyn_dtree[dcode].Freq*(5L+extra_dbits[dcode]);
2373  }
2374 
2375  out_length >>= 3;
2376  if ( last_dist < last_lit / 2 && out_length < in_length / 2) return 1;
2377  }
2378 
2379  return ( last_lit == LIT_BUFSIZE - 1 || last_dist == DIST_BUFSIZE );
2380  /* We avoid equality with LIT_BUFSIZE because of wraparound at 64K
2381  * on 16 bit machines and because stored blocks are restricted to
2382  * 64K - 1 bytes. */
2383 }
2384 
2385 /* ===========================================================================
2386  * Determine the best encoding for the current block: dynamic trees, static
2387  * trees or store, and output the encoded block to the zip file. This function
2388  * returns the total compressed length for the file so far. */
2389 
2390 off_t US_Gzip::flush_block( char* buf, ulg stored_len, int eof )
2391 // char *buf; /* input block, or NULL if too old */
2392 // ulg stored_len; /* length of input block */
2393 // int eof; /* true if this is the last block for a file */
2394 {
2395  ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
2396  int max_blindex; /* index of last bit length code of non zero freq */
2397 
2398  flag_buf[ last_flags ] = flags; /* Save the flags for the last 8 items */
2399 
2400  /* Check if the file is ascii or binary */
2401  //if ( *file_type == (ush) UNKNOWN) set_file_type();
2402 
2403  /* Construct the literal and distance trees */
2404  build_tree( (tree_desc*) ( &l_desc ) );
2405 
2406  build_tree( (tree_desc*) ( &d_desc ) );
2407 
2408  /* At this point, opt_len and static_len are the total bit lengths of
2409  * the compressed block data, excluding the tree representations. */
2410 
2411  /* Build the bit length tree for the above two trees, and get the index
2412  * in bl_order of the last bit length code to send. */
2413 
2414  max_blindex = build_bl_tree();
2415 
2416  /* Determine the best encoding. Compute first the block length in bytes */
2417  opt_lenb = ( opt_len + 3 + 7 ) >> 3;
2418  static_lenb = ( static_len + 3 + 7 ) >> 3;
2419  //input_len += stored_len; /* for debugging only */
2420 
2421  if ( static_lenb <= opt_lenb ) opt_lenb = static_lenb;
2422 
2423  /* If compression failed and this is the first and last block,
2424  * and if the zip file can be seeked (to rewrite the local header),
2425  * the whole file is transformed into a stored file: */
2426 
2427 #define seekable() 0 /* force sequential output */
2428 
2429 #ifdef FORCE_METHOD
2430  if (level == 1 && eof && compressed_len == 0L) { /* force stored file */
2431 #else
2432  if ( stored_len <= opt_lenb && eof && compressed_len == 0L && seekable() )
2433  {
2434 #endif
2435 
2436  /* Since LIT_BUFSIZE <= 2*WSIZE, the input data must be there: */
2437  if ( buf == (char*) 0) throw GZIP_INTERNAL;
2438 
2439  copy_block( buf, (unsigned) stored_len, 0 ); /* without header */
2440  compressed_len = stored_len << 3;
2441  *file_method = STORED;
2442 
2443 #ifdef FORCE_METHOD
2444  }
2445  else
2446  if (level == 2 && buf != (char*)0)
2447  { /* force stored block */
2448 #else
2449  }
2450  else
2451  if ( stored_len + 4 <= opt_lenb && buf != (char*) 0 )
2452  {
2453  /* 4: two words for the lengths */
2454 #endif
2455  /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
2456  * Otherwise we can't have processed more than WSIZE input bytes since
2457  * the last block flush, because compression would have been
2458  * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
2459  * transform a block into a stored block. */
2460 
2461  send_bits( ( STORED_BLOCK << 1 ) + eof, 3 ); /* send block type */
2462  compressed_len = ( compressed_len + 3 + 7 ) & ~7L;
2463  compressed_len += ( stored_len + 4 ) << 3;
2464 
2465  copy_block( buf, (unsigned) stored_len, 1 ); /* with header */
2466 
2467 #ifdef FORCE_METHOD
2468  }
2469  else
2470  if ( level == 3 )
2471  { /* force static trees */
2472 #else
2473  } else
2474  if ( static_lenb == opt_lenb )
2475  {
2476 #endif
2477  send_bits( ( STATIC_TREES << 1 ) + eof, 3 );
2478  compress_block( (ct_data*) static_ltree, ( ct_data*) static_dtree );
2479  compressed_len += 3 + static_len;
2480  } else
2481  {
2482  send_bits( (DYN_TREES << 1 ) + eof, 3 );
2483  send_all_trees( l_desc.max_code + 1, d_desc.max_code + 1, max_blindex + 1 );
2484  compress_block( (ct_data*) dyn_ltree, (ct_data*) dyn_dtree );
2485  compressed_len += 3 + opt_len;
2486  }
2487 
2488  init_block();
2489 
2490  if ( eof )
2491  {
2492  bi_windup();
2493  compressed_len += 7; /* align on byte boundary */
2494  }
2495 
2496  return compressed_len >> 3;
2497 }
2498 
2499 /* ===========================================================================
2500  * Initialize a new block. */
2501 
2503 {
2504  int n; /* iterates over tree elements */
2505 
2506  /* Initialize the trees. */
2507  for ( n = 0; n < L_CODES; n++ ) dyn_ltree[ n ].Freq = 0;
2508  for ( n = 0; n < D_CODES; n++ ) dyn_dtree[ n ].Freq = 0;
2509  for ( n = 0; n < BL_CODES; n++ ) bl_tree [ n ].Freq = 0;
2510 
2511  dyn_ltree[ END_BLOCK ].Freq = 1;
2512  opt_len = static_len = 0L;
2513  last_lit = last_dist = last_flags = 0;
2514  flags = 0; flag_bit = 1;
2515 }
2516 
2517 #define send_code(c, tree) send_bits( tree[ c ].Code, tree[ c ].Len )
2518  /* Send a code of the given tree. c and tree must not have side effects */
2519 
2520 #define l_buf inbuf
2521 
2522 
2523 /* ===========================================================================
2524  * Send the block data compressed using the given Huffman trees */
2526 // ct_data* ltree; /* literal tree */
2527 // ct_data* dtree; /* distance tree */
2528 {
2529  unsigned dist; /* distance of matched string */
2530  int lc; /* match length or unmatched char (if dist == 0) */
2531  unsigned lx = 0; /* running index in l_buf */
2532  unsigned dx = 0; /* running index in d_buf */
2533  unsigned fx = 0; /* running index in flag_buf */
2534  uch flag = 0; /* current flags */
2535  unsigned code; /* the code to send */
2536  int extra; /* number of extra bits to send */
2537 
2538  if ( last_lit != 0 )
2539  do
2540  {
2541  if ( ( lx & 7 ) == 0 ) flag = flag_buf[ fx++ ];
2542  lc = l_buf[ lx++ ];
2543 
2544  if ( ( flag & 1 ) == 0 )
2545  {
2546  send_code( lc, ltree ); /* send a literal byte */
2547  }
2548  else
2549  {
2550  /* Here, lc is the match length - MIN_MATCH */
2551  code = length_code[ lc ];
2552  send_code( code + LITERALS + 1, ltree ); /* send the length code */
2553  extra = extra_lbits[ code ];
2554 
2555  if ( extra != 0 )
2556  {
2557  lc -= base_length[ code ];
2558  send_bits( lc, extra ); /* send the extra length bits */
2559  }
2560 
2561  dist = d_buf[ dx++ ];
2562 
2563  /* Here, dist is the match distance - 1 */
2564  code = d_code( dist );
2565 
2566  send_code( code, dtree ); /* send the distance code */
2567  extra = extra_dbits[ code ];
2568 
2569  if ( extra != 0 )
2570  {
2571  dist -= base_dist[ code ];
2572  send_bits( dist, extra ); /* send the extra distance bits */
2573  }
2574  } /* literal or match pair ? */
2575 
2576  flag >>= 1;
2577 
2578  } while ( lx < last_lit );
2579 
2580  send_code( END_BLOCK, ltree );
2581 }
2582 
2583 /* ===========================================================================
2584  * Send a value on a given number of bits.
2585  * IN assertion: length <= 16 and value fits in length bits. */
2586 
2587 void US_Gzip::send_bits( int value, int length )
2588 // int value; /* value to send */
2589 // int length; /* number of bits */
2590 {
2591  /* If not enough room in bi_buf, use (valid) bits from bi_buf and
2592  * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid))
2593  * unused bits in value. */
2594 
2595  if ( bi_valid > (int) Buf_size - length )
2596  {
2597  bi_buf |= (value << bi_valid);
2598  put_short( bi_buf );
2599  bi_buf = (ush)value >> ( (ush) Buf_size - bi_valid );
2600  bi_valid += length - Buf_size;
2601  }
2602  else
2603  {
2604  bi_buf |= value << bi_valid;
2605  bi_valid += length;
2606  }
2607 }
2608 
2609 /* ===========================================================================
2610  * Reverse the first len bits of a code, using straightforward code (a faster
2611  * method would use a table)
2612  * IN assertion: 1 <= len <= 15 */
2613 
2614 unsigned US_Gzip::bi_reverse( unsigned code, int len )
2615 // unsigned code; /* the value to invert */
2616 // int len; /* its bit length */
2617 {
2618  register unsigned res = 0;
2619 
2620  do
2621  {
2622  res |= code & 1;
2623  code >>= 1;
2624  res <<= 1;
2625  } while ( --len > 0 );
2626 
2627  return res >> 1;
2628 }
2629 
2630 /* ===========================================================================
2631  * Write out any remaining bits in an incomplete byte. */
2633 {
2634  if ( bi_valid > 8 )
2635  {
2636  put_short( bi_buf );
2637  }
2638  else if ( bi_valid > 0 )
2639  {
2640  put_byte( bi_buf );
2641  }
2642 
2643  bi_buf = 0;
2644  bi_valid = 0;
2645 }
2646 
2647 /* ===========================================================================
2648  * * Copy a stored block to the zip file, storing first the length and its
2649  * * one's complement if requested.
2650  * */
2651 void US_Gzip::copy_block( char* buf, unsigned len, int header )
2652 // char *buf; /* the input data */
2653 // unsigned len; /* its length */
2654 // int header; /* true if block header must be written */
2655 {
2656  bi_windup(); /* align on byte boundary */
2657 
2658  if ( header )
2659  {
2660  put_short( (ush) len );
2661  put_short( (ush) ~len );
2662  }
2663 
2664  while ( len-- )
2665  {
2666  put_byte( *buf++ );
2667  }
2668 }
2669 
2670 #define SMALLEST 1
2671 /* Index within the heap array of least frequent node in the Huffman tree */
2672 
2673 /* ===========================================================================
2674  * Remove the smallest element from the heap and recreate the heap with
2675  * one less element. Updates heap and heap_len. */
2676 #define pqremove(tree, top) \
2677 {\
2678  top = heap[ SMALLEST ]; \
2679  heap[ SMALLEST ] = heap[ heap_len--] ; \
2680  pqdownheap( tree, SMALLEST ); \
2681 }
2682 
2683 #define MAX(a,b) ( a >= b ? a : b )
2684 /* the arguments must not have side effects */
2685 
2686 /* ===========================================================================
2687  * Construct one Huffman tree and assigns the code bit strings and lengths.
2688  * Update the total bit length for the current block.
2689  * IN assertion: the field freq is set for all tree elements.
2690  * OUT assertions: the fields len and code are set to the optimal bit length
2691  * and corresponding code. The length opt_len is updated; static_len is
2692  * also updated if stree is not null. The field max_code is set. */
2693 
2695 // tree_desc near *desc; /* the tree descriptor */
2696 {
2697  ct_data* tree = desc->dyn_tree;
2698  ct_data* stree = desc->static_tree;
2699  int elems = desc->elems;
2700  int n, m; /* iterate over heap elements */
2701  int max_code = -1; /* largest code with non zero frequency */
2702  int node = elems; /* next internal node of the tree */
2703 
2704  /* Construct the initial heap, with least frequent element in
2705  * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
2706  * heap[0] is not used. */
2707 
2708  heap_len = 0;
2709  heap_max = HEAP_SIZE;
2710 
2711  for ( n = 0; n < elems; n++ )
2712  {
2713  if ( tree[ n ].Freq != 0 )
2714  {
2715  heap[ ++heap_len ] = max_code = n;
2716  depth[ n ] = 0;
2717  }
2718  else
2719  {
2720  tree[ n ].Len = 0;
2721  }
2722  }
2723 
2724  /* The pkzip format requires that at least one distance code exists,
2725  * and that at least one bit should be sent even if there is only one
2726  * possible code. So to avoid special checks later on we force at least
2727  * two codes of non zero frequency. */
2728 
2729  while ( heap_len < 2 )
2730  {
2731  int new1 = heap[ ++heap_len ] = ( max_code < 2 ? ++max_code : 0 );
2732 
2733  tree [ new1 ].Freq = 1;
2734  depth[ new1 ] = 0;
2735  opt_len--;
2736  if ( stree ) static_len -= stree[ new1 ].Len;
2737  /* new is 0 or 1 so it does not have extra bits */
2738  }
2739 
2740  desc->max_code = max_code;
2741 
2742  /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
2743  * establish sub-heaps of increasing lengths: */
2744 
2745  for ( n = heap_len / 2; n >= 1; n-- ) pqdownheap( tree, n );
2746 
2747  /* Construct the Huffman tree by repeatedly combining the least two
2748  * frequent nodes. */
2749 
2750  do
2751  {
2752  pqremove( tree, n ); /* n = node of least frequency */
2753  m = heap[ SMALLEST ]; /* m = node of next least frequency */
2754 
2755  heap[ --heap_max ] = n; /* keep the nodes sorted by frequency */
2756  heap[ --heap_max ] = m;
2757 
2758  /* Create a new node father of n and m */
2759  tree [ node ].Freq = tree[ n ].Freq + tree[ m ].Freq;
2760  depth[ node ] = (uch) ( MAX( depth[ n ], depth[ m ] ) + 1 );
2761  tree [ n ].Dad = tree[ m ].Dad = (ush) node;
2762 
2763  /* and insert the new node in the heap */
2764  heap[ SMALLEST ] = node++;
2765  pqdownheap( tree, SMALLEST );
2766 
2767  } while ( heap_len >= 2 );
2768 
2769  heap[ --heap_max ] = heap[ SMALLEST ];
2770 
2771  /* At this point, the fields freq and dad are set. We can now
2772  * generate the bit lengths. */
2773 
2774  gen_bitlen( (tree_desc*) desc );
2775 
2776  /* The field len is now set, we can generate the bit codes */
2777  gen_codes ( (ct_data*) tree, max_code );
2778 }
2779 
2780 
2781 /* ===========================================================================
2782  * Construct the Huffman tree for the bit lengths and return the index in
2783  * bl_order of the last bit length code to send. */
2784 
2786 {
2787  int max_blindex; /* index of last bit length code of non zero freq */
2788 
2789  /* Determine the bit length frequencies for literal and distance trees */
2790  scan_tree( (ct_data*) dyn_ltree, l_desc.max_code );
2791  scan_tree( (ct_data*) dyn_dtree, d_desc.max_code );
2792 
2793  /* Build the bit length tree: */
2794  build_tree( ( tree_desc*) ( &bl_desc ) );
2795 
2796  /* opt_len now includes the length of the tree representations, except
2797  * the lengths of the bit lengths codes and the 5+5+4 bits for the counts. */
2798 
2799  /* Determine the number of bit length codes to send. The pkzip format
2800  * requires that at least 4 bit length codes be sent. (appnote.txt says
2801  * 3 but the actual value used is 4.) */
2802 
2803  for ( max_blindex = BL_CODES - 1; max_blindex >= 3; max_blindex-- )
2804  {
2805  if ( bl_tree[ bl_order[ max_blindex ] ].Len != 0 ) break;
2806  }
2807 
2808  /* Update opt_len to include the bit length tree and counts */
2809  opt_len += 3 * ( max_blindex + 1 ) + 5 + 5 + 4;
2810 
2811  return max_blindex;
2812 }
2813 /* ===========================================================================
2814  * Compute the optimal bit lengths for a tree and update the total bit length
2815  * for the current block.
2816  * IN assertion: the fields freq and dad are set, heap[heap_max] and
2817  * above are the tree nodes sorted by increasing frequency.
2818  * OUT assertions: the field len is set to the optimal bit length, the
2819  * array bl_count contains the frequencies for each bit length.
2820  * The length opt_len is updated; static_len is also updated if stree is
2821  * not null. */
2822 
2824 // tree_desc near *desc; /* the tree descriptor */
2825 {
2826  ct_data* tree = desc->dyn_tree;
2827  int* extra = desc->extra_bits;
2828  int base = desc->extra_base;
2829  int max_code = desc->max_code;
2830  int max_length = desc->max_length;
2831  ct_data* stree = desc->static_tree;
2832  int h; /* heap index */
2833  int n;
2834  int m; /* iterate over the tree elements */
2835  int bits; /* bit length */
2836  int xbits; /* extra bits */
2837  ush f; /* frequency */
2838  int overflow = 0; /* number of elements with bit length too large */
2839 
2840  for ( bits = 0; bits <= MAX_BITS; bits++ ) bl_count[ bits ] = 0;
2841 
2842  /* In a first pass, compute the optimal bit lengths (which may
2843  * overflow in the case of the bit length tree). */
2844 
2845  tree[ heap[ heap_max ] ].Len = 0; /* root of the heap */
2846 
2847  for ( h = heap_max + 1; h < HEAP_SIZE; h++ )
2848  {
2849  n = heap[ h ];
2850  bits = tree[ tree[ n ].Dad ].Len + 1;
2851 
2852  if ( bits > max_length)
2853  {
2854  bits = max_length;
2855  overflow++;
2856  }
2857 
2858  tree[ n ].Len = (ush) bits;
2859  /* We overwrite tree[ n ].Dad which is no longer needed */
2860 
2861  if ( n > max_code ) continue; /* not a leaf node */
2862 
2863  bl_count[ bits ]++;
2864  xbits = 0;
2865 
2866  if ( n >= base ) xbits = extra[ n - base ];
2867 
2868  f = tree[ n ].Freq;
2869  opt_len += (ulg) f * ( bits + xbits );
2870 
2871  if ( stree ) static_len += (ulg) f * ( stree[ n ].Len + xbits );
2872  }
2873 
2874  if ( overflow == 0) return;
2875 
2876  /* Find the first bit length which could increase: */
2877  do
2878  {
2879  bits = max_length-1;
2880 
2881  while ( bl_count[ bits ] == 0 ) bits--;
2882 
2883  bl_count[ bits ]--; /* move one leaf down the tree */
2884  bl_count[ bits + 1 ] += 2; /* move one overflow item as its brother */
2885  bl_count[ max_length ]--;
2886 
2887  /* The brother of the overflow item also moves one step up,
2888  * but this does not affect bl_count[max_length] */
2889 
2890  overflow -= 2;
2891 
2892  } while ( overflow > 0 );
2893 
2894  /* Now recompute all bit lengths, scanning in increasing frequency.
2895  * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
2896  * lengths instead of fixing only the wrong ones. This idea is taken
2897  * from 'ar' written by Haruhiko Okumura.) */
2898 
2899  for ( bits = max_length; bits != 0; bits-- )
2900  {
2901  n = bl_count[bits];
2902 
2903  while ( n != 0 )
2904  {
2905  m = heap[--h];
2906 
2907  if ( m > max_code ) continue;
2908 
2909  if ( tree[ m ].Len != (unsigned) bits )
2910  {
2911  opt_len += ( (long) bits - (long) tree[ m ].Len ) * (long) tree[ m ].Freq;
2912  tree[ m ].Len = (ush) bits;
2913  }
2914  n--;
2915  }
2916  }
2917 }
2918 
2919 /* ===========================================================================
2920  * Generate the codes for a given tree and bit counts (which need not be
2921  * optimal).
2922  * IN assertion: the array bl_count contains the bit length statistics for
2923  * the given tree and the field len is set for all tree elements.
2924  * OUT assertion: the field code is set for all tree elements of non
2925  * zero code length. */
2926 
2927 void US_Gzip::gen_codes ( ct_data* tree, int max_code )
2928 // ct_data near *tree; /* the tree to decorate */
2929 // int max_code; /* largest code with non zero frequency */
2930 {
2931  ush next_code[ MAX_BITS + 1 ]; /* next code value for each bit length */
2932  ush code = 0; /* running code value */
2933  int bits; /* bit index */
2934  int n; /* code index */
2935 
2936  /* The distribution counts are first used to generate the code values
2937  * without bit reversal. */
2938 
2939  for ( bits = 1; bits <= MAX_BITS; bits++ )
2940  {
2941  next_code[ bits ] = code = ( code + bl_count[ bits - 1 ] ) << 1;
2942  }
2943 
2944  /* Check that the bit counts in bl_count are consistent. The last code
2945  * must be all ones. */
2946 
2947  for ( n = 0; n <= max_code; n++ )
2948  {
2949  int len = tree[ n ].Len;
2950 
2951  if ( len == 0 ) continue;
2952 
2953  /* Now reverse the bits */
2954  tree[ n ].Code = bi_reverse( next_code[ len ]++, len );
2955  }
2956 }
2957 
2958 /* ===========================================================================
2959  * Compares to subtrees, using the tree depth as tie breaker when
2960  * the subtrees have equal frequency. This minimizes the worst case length. */
2961 
2962 #define smaller( tree, n, m ) \
2963  ( tree[ n ].Freq < tree[ m ].Freq || \
2964  ( tree[ n ].Freq == tree[ m ].Freq && depth[ n ] <= depth[ m ] ) )
2965 
2966 /* ===========================================================================
2967  * Restore the heap property by moving down the tree starting at node k,
2968  * exchanging a node with the smallest of its two sons if necessary, stopping
2969  * when the heap property is re-established (each father smaller than its
2970  * two sons). */
2971 
2972 void US_Gzip::pqdownheap( ct_data* tree, int k )
2973 // ct_data near *tree; /* the tree to restore */
2974 // int k; /* node to move down */
2975 {
2976  int v = heap[ k ];
2977  int j = k << 1; /* left son of k */
2978 
2979  while ( j <= heap_len )
2980  {
2981  /* Set j to the smallest of the two sons: */
2982  if ( j < heap_len && smaller( tree, heap[ j + 1 ], heap[ j ] ) ) j++;
2983 
2984  /* Exit if v is smaller than both sons */
2985  if ( smaller( tree, v, heap[ j ] ) ) break;
2986 
2987  /* Exchange v with the smallest son */
2988  heap[ k ] = heap[ j ];
2989  k = j;
2990 
2991  /* And continue down the tree, setting j to the left son of k */
2992  j <<= 1;
2993  }
2994  heap[ k ] = v;
2995 }
2996 
2997 /* ===========================================================================
2998  * Send the header for a block using dynamic Huffman trees: the counts, the
2999  * lengths of the bit length codes, the literal tree and the distance tree.
3000  * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. */
3001 
3002 void US_Gzip::send_all_trees( int lcodes, int dcodes, int blcodes )
3003 // int lcodes, dcodes, blcodes; /* number of codes for each tree */
3004 {
3005  int rank; /* index in bl_order */
3006 
3007  send_bits( lcodes - 257, 5 ); /* not +255 as stated in appnote.txt */
3008  send_bits( dcodes - 1, 5 );
3009  send_bits( blcodes - 4, 4 ); /* not -3 as stated in appnote.txt */
3010 
3011  for ( rank = 0; rank < blcodes; rank++ )
3012  {
3013  send_bits( bl_tree[ bl_order[ rank ] ].Len, 3 );
3014  }
3015 
3016  send_tree( (ct_data*) dyn_ltree, lcodes - 1 ); /* send the literal tree */
3017  send_tree(( ct_data*) dyn_dtree, dcodes - 1 ); /* send the distance tree */
3018 }
3019 
3020 #define REP_3_6 16
3021 /* repeat previous bit length 3-6 times (2 bits of repeat count) */
3022 
3023 #define REPZ_3_10 17
3024 /* repeat a zero length 3-10 times (3 bits of repeat count) */
3025 
3026 #define REPZ_11_138 18
3027 /* repeat a zero length 11-138 times (7 bits of repeat count) */
3028 
3029 /* ===========================================================================
3030  * Scan a literal or distance tree to determine the frequencies of the codes
3031  * in the bit length tree. Updates opt_len to take into account the repeat
3032  * counts. (The contribution of the bit length codes will be added later
3033  * during the construction of bl_tree.) */
3034 
3035 void US_Gzip::scan_tree( ct_data* tree, int max_code )
3036 // ct_data near *tree; /* the tree to be scanned */
3037 // int max_code; /* and its largest code of non zero frequency */
3038 {
3039  int n; /* iterates over all tree elements */
3040  int prevlen = -1; /* last emitted length */
3041  int curlen; /* length of current code */
3042  int nextlen = tree[0].Len; /* length of next code */
3043  int count = 0; /* repeat count of the current code */
3044  int max_count = 7; /* max repeat count */
3045  int min_count = 4; /* min repeat count */
3046 
3047  if ( nextlen == 0 )
3048  {
3049  max_count = 138;
3050  min_count = 3;
3051  }
3052 
3053  tree[ max_code + 1 ].Len = (ush) 0xffff; /* guard */
3054 
3055  for ( n = 0; n <= max_code; n++ )
3056  {
3057  curlen = nextlen;
3058  nextlen = tree[ n + 1 ].Len;
3059 
3060  if ( ++count < max_count && curlen == nextlen )
3061  {
3062  continue;
3063  }
3064  else if ( count < min_count )
3065  {
3066  bl_tree[ curlen ].Freq += count;
3067  }
3068  else if ( curlen != 0 )
3069  {
3070  if ( curlen != prevlen ) bl_tree[ curlen ].Freq++;
3071  bl_tree[REP_3_6].Freq++;
3072  }
3073  else if ( count <= 10 )
3074  {
3075  bl_tree[ REPZ_3_10 ].Freq++;
3076  }
3077  else
3078  {
3079  bl_tree[ REPZ_11_138 ].Freq++;
3080  }
3081 
3082  count = 0;
3083  prevlen = curlen;
3084 
3085  if ( nextlen == 0 )
3086  {
3087  max_count = 138;
3088  min_count = 3;
3089  }
3090  else if ( curlen == nextlen )
3091  {
3092  max_count = 6;
3093  min_count = 3;
3094  }
3095  else
3096  {
3097  max_count = 7;
3098  min_count = 4;
3099  }
3100  }
3101 }
3102 
3103 /* ===========================================================================
3104  * Send a literal or distance tree in compressed form, using the codes in
3105  * bl_tree. */
3106 
3107 void US_Gzip::send_tree( ct_data* tree, int max_code )
3108 // ct_data near *tree; /* the tree to be scanned */
3109 // int max_code; /* and its largest code of non zero frequency */
3110 {
3111  int n; /* iterates over all tree elements */
3112  int prevlen = -1; /* last emitted length */
3113  int curlen; /* length of current code */
3114  int nextlen = tree[0].Len; /* length of next code */
3115  int count = 0; /* repeat count of the current code */
3116  int max_count = 7; /* max repeat count */
3117  int min_count = 4; /* min repeat count */
3118 
3119  /* tree[max_code+1].Len = -1; */ /* guard already set */
3120 
3121  if ( nextlen == 0 )
3122  {
3123  max_count = 138;
3124  min_count = 3;
3125  }
3126 
3127  for ( n = 0; n <= max_code; n++ )
3128  {
3129  curlen = nextlen;
3130  nextlen = tree[ n + 1 ].Len;
3131 
3132  if ( ++count < max_count && curlen == nextlen )
3133  {
3134  continue;
3135  }
3136  else if ( count < min_count )
3137  {
3138  do
3139  {
3140  send_code( curlen, bl_tree );
3141  } while ( --count != 0 );
3142  }
3143  else if ( curlen != 0 )
3144  {
3145  if ( curlen != prevlen )
3146  {
3147  send_code( curlen, bl_tree );
3148  count--;
3149  }
3150 
3151  send_code( REP_3_6, bl_tree );
3152  send_bits( count - 3, 2 );
3153  }
3154  else if ( count <= 10 )
3155  {
3156  send_code( REPZ_3_10, bl_tree );
3157  send_bits( count - 3, 3 );
3158  }
3159  else
3160  {
3161  send_code( REPZ_11_138, bl_tree );
3162  send_bits( count - 11, 7 );
3163  }
3164 
3165  count = 0;
3166  prevlen = curlen;
3167 
3168  if ( nextlen == 0 )
3169  {
3170  max_count = 138;
3171  min_count = 3;
3172  }
3173  else if ( curlen == nextlen )
3174  {
3175  max_count = 6;
3176  min_count = 3;
3177  }
3178  else
3179  {
3180  max_count = 7;
3181  min_count = 4;
3182  }
3183  }
3184 }
3185 
3187 QString US_Gzip::explain( const int error )
3188 {
3189  QString explanation;
3190  switch ( error )
3191  {
3192  case GZIP_OK:
3193  explanation = "The g(un)zip operation was succesful.";
3194  break;
3195 
3196  case GZIP_NOEXIST:
3197  explanation = "Could not find the input file." ;
3198  break;
3199 
3200  case GZIP_NOTFILE:
3201  explanation = "The input file name is not a regular file." ;
3202  break;
3203 
3204  case GZIP_NOREAD:
3205  explanation = "The input file is not readable by the user." ;
3206  break;
3207 
3209  explanation = "An unsupported option is included in the compressed file.";
3210  break;
3211 
3212  case GZIP_OUTFILEEXISTS:
3213  explanation = "The output file already exists." ;
3214  break;
3215 
3216  case GZIP_CRCERROR:
3217  explanation = "The compressed file failed an internal validation." ;
3218  break;
3219 
3220  case GZIP_READERROR:
3221  explanation = "Could not read the input file." ;
3222  break;
3223 
3224  case GZIP_WRITEERROR:
3225  explanation = "Could not write the output file." ;
3226  break;
3227 
3228  case GZIP_LENGTHERROR:
3229  explanation = "The output data does not match the internal file size." ;
3230  break;
3231 
3232  case GZIP_FILENAMEERROR:
3233  explanation = "The file/path name is too long." ;
3234  break;
3235 
3236  case GZIP_INTERNAL:
3237  explanation = "In internal gzip error occured." ;
3238  break;
3239 
3240  default:
3241  explanation = "Unknown return code: " + error;
3242  }
3243 
3244  return explanation;
3245 }