AdaptiveHuffman.Mod 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. MODULE AdaptiveHuffman; (** AUTHOR GF; PUROSE "adaptive Huffman coding" *)
  2. (* adaptive Huffman compression, Vitter's algorithm *)
  3. IMPORT Streams;
  4. CONST
  5. AlphabetSize* = 256; (* byte *)
  6. BlockSize* = 8*1024;
  7. ScaleSize = 2*1024;
  8. Encode = 0; Decode = 1;
  9. TYPE
  10. Index = INTEGER; (* 16-bit integer to keep the table small *)
  11. Symbol = LONGINT;
  12. Node = RECORD
  13. weight: LONGINT; (* node weight *)
  14. up: Index; (* next node up the tree *)
  15. down: Index; (* pair of down nodes *)
  16. pattern: Symbol (* node pattern *)
  17. END;
  18. HuffmanCoder = OBJECT
  19. VAR
  20. mode: SHORTINT; (* encode, decode *)
  21. in: Streams.Reader; (* input from archive *)
  22. out: Streams.Writer; (* output to archive *)
  23. curByte, curBit: LONGINT;
  24. esc: Index; (* the current tree height *)
  25. root: Index; (* the root of the tree *)
  26. map: ARRAY AlphabetSize OF Index; (* mapping for symbols to nodes *)
  27. table: ARRAY 2*AlphabetSize + 4 OF Node; (* the coding table *)
  28. PROCEDURE &New( m: SHORTINT; input: Streams.Reader; output: Streams.Writer );
  29. BEGIN
  30. ASSERT( m IN {Encode, Decode} );
  31. mode := m;
  32. IF mode = Encode THEN out := output ELSE in := input END;
  33. END New;
  34. PROCEDURE Initialize;
  35. VAR i: Index;
  36. BEGIN
  37. root := 2*AlphabetSize + 4 - 1; curByte := 0;
  38. IF mode = Encode THEN curByte := 0; curBit := 0 ELSE curBit := -1 END;
  39. FOR i := 0 TO root DO
  40. table[i].up := 0; table[i].down := 0; table[i].pattern := 0; table[i].weight := 0
  41. END;
  42. FOR i := 0 TO AlphabetSize - 1 DO map[i] := 0 END;
  43. esc := root;
  44. END Initialize;
  45. PROCEDURE Finish; (* flush last few bits *)
  46. BEGIN
  47. ASSERT( mode = Encode );
  48. WHILE curBit # 0 DO PutBit( 0 ) END;
  49. out.Update
  50. END Finish;
  51. PROCEDURE GetBit( ): LONGINT;
  52. VAR
  53. bit: LONGINT; ch: CHAR;
  54. BEGIN
  55. IF curBit < 0 THEN
  56. in.Char( ch ); curByte := ORD( ch ); curBit := 7
  57. END;
  58. bit := ASH( curByte, -curBit ) MOD 2;
  59. DEC( curBit );
  60. RETURN bit
  61. END GetBit;
  62. PROCEDURE PutBit( bit: LONGINT );
  63. BEGIN
  64. curByte := 2*curByte + bit;
  65. INC( curBit );
  66. IF curBit > 7 THEN
  67. out.Char( CHR( curByte ) ); curByte := 0; curBit := 0
  68. END
  69. END PutBit;
  70. PROCEDURE GetPattern( ): Symbol;
  71. VAR sym, i, bit: LONGINT;
  72. BEGIN
  73. sym := 0; bit := 1;
  74. FOR i := 1 TO 8 DO
  75. IF GetBit() = 1 THEN sym := sym + bit END;
  76. bit := bit * 2;
  77. END;
  78. RETURN sym
  79. END GetPattern;
  80. (* send the bits for an escaped symbol *)
  81. PROCEDURE PutPattern( sym: Symbol );
  82. VAR i: LONGINT;
  83. BEGIN
  84. FOR i := 1 TO 8 DO
  85. PutBit( sym MOD 2 ); sym := sym DIV 2;
  86. END
  87. END PutPattern;
  88. (* split escape node to incorporate new symbol *)
  89. PROCEDURE AddSymbol( sym: Symbol ): Index;
  90. VAR pair, node: Index;
  91. BEGIN
  92. ASSERT( esc > 1 );
  93. pair := esc; node := esc - 1; esc := esc - 2 ;
  94. table[pair].down := node; table[pair].weight := 1;
  95. table[node].up := pair;
  96. table[node].down := 0; table[node].weight := 0; table[node].pattern := sym;
  97. table[esc].up := pair;
  98. table[esc].down := 0; table[esc].weight := 0;
  99. map[sym] := node;
  100. RETURN node;
  101. END AddSymbol;
  102. (* swap leaf to group leader position return symbol's new node *)
  103. PROCEDURE GroupLeader( node: Index ): Index;
  104. VAR
  105. leader: Index;
  106. weight: LONGINT;
  107. sym, prev: Symbol;
  108. BEGIN
  109. weight := table[node].weight; leader := node;
  110. WHILE (leader < root) & (weight = table[leader + 1].weight) DO INC( leader ) END;
  111. IF leader # node THEN
  112. (* swap the leaf nodes *)
  113. sym := table[node].pattern;
  114. prev := table[leader].pattern;
  115. table[leader].pattern := sym;
  116. table[node].pattern := prev;
  117. map[sym] := leader;
  118. map[prev] := node;
  119. END;
  120. RETURN leader
  121. END GroupLeader;
  122. (* slide internal node up over all leaves of equal weight;
  123. or exchange leaf with next smaller weight internal node.
  124. return node's new position *)
  125. PROCEDURE SlideNode( node: Index ): Index;
  126. VAR next: Index; swap: Node;
  127. BEGIN
  128. swap := table[node]; next := node + 1;
  129. (* if we're sliding an internal node, find the highest possible leaf to exchange with *)
  130. IF ODD( swap.weight ) THEN
  131. WHILE swap.weight > table[next+1].weight DO INC( next ) END
  132. END;
  133. (* swap the two nodes *)
  134. table[node] := table[next]; table[next] := swap;
  135. table[next].up := table[node].up; table[node].up := swap.up;
  136. (* repair the symbol map and tree structure *)
  137. IF ODD( swap.weight ) THEN
  138. table[swap.down].up := next;
  139. table[swap.down - 1].up := next;
  140. map[table[node].pattern] := node;
  141. ELSE
  142. table[table[node].down - 1].up := node;
  143. table[table[node].down].up := node;
  144. map[swap.pattern] := next;
  145. END;
  146. RETURN next;
  147. END SlideNode;
  148. (* increment symbol weight and re balance the tree. *)
  149. PROCEDURE IncWeight( node: Index );
  150. VAR up: Index;
  151. BEGIN
  152. (* obviate swapping a parent with its child: increment the leaf and proceed directly to its parent.
  153. otherwise, promote leaf to group leader position in the tree *)
  154. IF table[node].up = node + 1 THEN
  155. INC( table[node].weight, 2 ); INC( node );
  156. ELSE
  157. node := GroupLeader( node )
  158. END;
  159. (* increase the weight of each node and slide over any smaller weights ahead of it until reaching the root.
  160. internal nodes work upwards from their initial positions; while symbol nodes slide over first,
  161. then work up from their final positions. *)
  162. INC( table[node].weight, 2 ); up := table[node].up;
  163. WHILE up # 0 DO
  164. WHILE table[node].weight > table[node + 1].weight DO node := SlideNode( node) END;
  165. IF ODD( table[node].weight) THEN
  166. node := up
  167. ELSE
  168. node := table[node].up
  169. END;
  170. INC( table[node].weight, 2 ); up := table[node].up
  171. END;
  172. END IncWeight;
  173. (* scale all weights and rebalance the tree,
  174. zero weight nodes are removed from the tree by sliding them out the rank list *)
  175. PROCEDURE Scale;
  176. VAR node, prev: Index; weight: LONGINT;
  177. PROCEDURE Weight( idx: Index ): LONGINT;
  178. VAR w: LONGINT;
  179. BEGIN
  180. w := table[idx].weight;
  181. IF ODD( w ) THEN RETURN w - 1 ELSE RETURN w END
  182. END Weight;
  183. BEGIN
  184. node := esc + 1;
  185. (* work up the tree from the escape node scaling weights by the value of bits *)
  186. WHILE node <= root DO
  187. (* recompute the weight of internal nodes; slide down and out any unused ones *)
  188. weight := table[node].weight;
  189. IF ODD( weight ) THEN
  190. weight := Weight( table[node].down ) + Weight( table[node].down-1 ) + 1
  191. ELSE
  192. (* remove zero weight leaves by incrementing esc and removing them from the symbol map *)
  193. weight := weight DIV 2;
  194. IF ODD( weight ) THEN DEC( weight ) END;
  195. IF weight = 0 THEN
  196. map[table[node].pattern] := 0; INC( esc, 2 );
  197. END
  198. END;
  199. (* slide the scaled node back down over any previous nodes with larger weights *)
  200. table[node].weight := weight;
  201. prev := node - 1;
  202. WHILE weight < table[prev].weight DO IGNORE SlideNode( prev ); DEC( prev ) END;
  203. INC( node )
  204. END;
  205. (* prepare a new escape node *)
  206. table[esc].down := 0; table[esc].weight := 0;
  207. END Scale;
  208. (* encode the next symbol *)
  209. PROCEDURE EncodeByte( ch: CHAR );
  210. VAR
  211. code, n: LONGINT;
  212. up, idx, node: Index; sym: Symbol;
  213. BEGIN
  214. ASSERT( mode = Encode );
  215. (* for a new symbol, direct the receiver to the escape node *)
  216. sym := ORD( ch );
  217. node := map[sym]; idx := node;
  218. IF idx = 0 THEN idx := esc END;
  219. ASSERT( idx # 0 );
  220. (* accumulate the code bits by working up the tree from the node to the root *)
  221. code := 1; n := 0; up := table[idx].up;
  222. WHILE up # 0 DO
  223. code := code * 2; INC( n );
  224. IF ODD( idx ) THEN INC( code ) END;
  225. idx := up; up := table[idx].up
  226. END;
  227. (* send the code, root selector bit first *)
  228. WHILE n > 0 DO
  229. PutBit( code MOD 2 ); code := code DIV 2;
  230. DEC( n )
  231. END;
  232. (* send pattern and incorporate it into the tree *)
  233. IF node = 0 THEN
  234. PutPattern( sym );
  235. node := AddSymbol( sym );
  236. END;
  237. (* adjust and re-balance the tree *)
  238. IncWeight( node );
  239. END EncodeByte;
  240. (* decode the next symbol *)
  241. PROCEDURE DecodeByte( ): CHAR;
  242. VAR
  243. node, down: Index;
  244. sym: Symbol;
  245. BEGIN
  246. ASSERT( mode = Decode );
  247. (* work down the tree from the root until reaching either a leaf or the escape node.
  248. A one bit means go left, a zero means go right. *)
  249. node := root; down := table[node].down;
  250. WHILE down # 0 DO
  251. IF GetBit( ) = 1 THEN
  252. node := down-1
  253. ELSE
  254. node := down;
  255. END;
  256. down := table[node].down;
  257. END;
  258. (* sent to the escape node *)
  259. IF node = esc THEN
  260. sym := GetPattern( );
  261. node := AddSymbol( sym );
  262. ELSE
  263. sym := table[node].pattern
  264. END;
  265. (* adjust and re-balance the tree *)
  266. IncWeight( node );
  267. RETURN CHR( sym );
  268. END DecodeByte;
  269. END HuffmanCoder;
  270. TYPE
  271. Encoder* =OBJECT
  272. VAR
  273. huff: HuffmanCoder;
  274. out: Streams.Writer;
  275. PROCEDURE & New*( archive: Streams.Writer );
  276. BEGIN
  277. NEW( huff, Encode, NIL, archive );
  278. out := archive
  279. END New;
  280. PROCEDURE CompressBlock*( CONST source: ARRAY OF CHAR; len: LONGINT );
  281. VAR i: LONGINT;
  282. BEGIN
  283. out.RawNum( len );
  284. huff.Initialize;
  285. i := 0;
  286. WHILE i < len DO
  287. huff.EncodeByte( source[i] ); INC( i );
  288. IF (i MOD ScaleSize = 0) & ((len-i) >= ScaleSize) THEN huff.Scale END
  289. END;
  290. huff.Finish
  291. END CompressBlock;
  292. END Encoder;
  293. TYPE
  294. Decoder* =OBJECT
  295. VAR
  296. huff: HuffmanCoder;
  297. input: Streams.Reader;
  298. PROCEDURE & New*( archive: Streams.Reader );
  299. BEGIN
  300. NEW( huff, Decode, archive, NIL );
  301. input := archive
  302. END New;
  303. PROCEDURE ExtractBlock*( VAR buf: ARRAY OF CHAR ): LONGINT;
  304. VAR i, blockSize: LONGINT;
  305. BEGIN
  306. input.RawNum( blockSize );
  307. IF blockSize = 0 THEN (* end of stream *) RETURN 0 END;
  308. huff.Initialize;
  309. i := 0;
  310. WHILE i < blockSize DO
  311. buf[i] := huff.DecodeByte( ); INC( i );
  312. IF (i MOD ScaleSize = 0) & ((blockSize-i) >= ScaleSize) THEN huff.Scale END
  313. END;
  314. RETURN blockSize
  315. END ExtractBlock;
  316. END Decoder;
  317. END AdaptiveHuffman.