AdaptiveHuffman.Mod 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. MODULE AdaptiveHuffman; (** AUTHOR GF; PUROSE "adaptive Huffman coding" *)
  2. (* adaptive Huffman compression, Vitter's FGK algorithm *)
  3. IMPORT Streams;
  4. CONST
  5. AlphabetSize = 256; (* byte *)
  6. BlockSize* = 8*1024;
  7. ScaleLimit = 4*1024;
  8. Encode = 0; Decode = 1;
  9. TYPE
  10. BitReader = OBJECT
  11. VAR
  12. in: Streams.Reader;
  13. curByte, curBit: LONGINT;
  14. PROCEDURE &New( r: Streams.Reader );
  15. BEGIN
  16. in := r; curBit := -1; curByte := 0
  17. END New;
  18. PROCEDURE Initialize;
  19. BEGIN
  20. curBit := -1; curByte := 0
  21. END Initialize;
  22. PROCEDURE Bit( ): LONGINT;
  23. VAR
  24. bit: LONGINT; ch: CHAR;
  25. BEGIN
  26. IF curBit < 0 THEN
  27. in.Char( ch ); curByte := ORD( ch ); curBit := 7
  28. END;
  29. bit := ASH( curByte, -curBit ) MOD 2;
  30. DEC( curBit );
  31. RETURN bit
  32. END Bit;
  33. END BitReader;
  34. TYPE
  35. BitWriter = OBJECT
  36. VAR
  37. out: Streams.Writer;
  38. curByte, curBit: LONGINT;
  39. PROCEDURE &New( w: Streams.Writer );
  40. BEGIN
  41. out := w; curBit := 0; curByte := 0
  42. END New;
  43. PROCEDURE Bit( bit: LONGINT );
  44. BEGIN
  45. curByte := 2*curByte + bit;
  46. INC( curBit );
  47. IF curBit > 7 THEN
  48. out.Char( CHR( curByte ) ); curByte := 0; curBit := 0
  49. END
  50. END Bit;
  51. PROCEDURE Finish; (* flush last few bits *)
  52. BEGIN
  53. WHILE curBit # 0 DO Bit( 0 ) END;
  54. out.Update
  55. END Finish;
  56. END BitWriter;
  57. TYPE
  58. HuffmanCoder = OBJECT
  59. TYPE
  60. Index = INTEGER; (* 16-bit integer to keep the table small *)
  61. Pattern = INTEGER;
  62. Node = RECORD
  63. weight: INTEGER; (* node weight *)
  64. pattern: Pattern; (* node pattern (if weight is even, leaf node) *)
  65. up: Index; (* next node up the tree *)
  66. down: Index (* pair of down nodes (if weight is odd, internal node) *)
  67. END;
  68. VAR
  69. mode: SHORTINT; (* Encode, Decode *)
  70. in: BitReader; (* input from archive *)
  71. out: BitWriter; (* output to archive *)
  72. esc: Index; (* the current escape node *)
  73. root: Index; (* the root of the tree *)
  74. map: ARRAY AlphabetSize OF Index; (* mapping of patterns to nodes *)
  75. table: ARRAY 2*AlphabetSize + 2 OF Node; (* the Huffman tree *)
  76. PROCEDURE &New( m: SHORTINT; input: Streams.Reader; output: Streams.Writer );
  77. BEGIN
  78. ASSERT( m IN {Encode, Decode} );
  79. mode := m;
  80. IF mode = Encode THEN NEW( out, output ) ELSE NEW( in, input ) END;
  81. END New;
  82. PROCEDURE Initialize;
  83. VAR i: Index;
  84. BEGIN
  85. root := 2*AlphabetSize + 2 - 1;
  86. FOR i := 0 TO root DO
  87. table[i].up := 0; table[i].down := 0; table[i].pattern := 0; table[i].weight := 0
  88. END;
  89. FOR i := 0 TO AlphabetSize - 1 DO map[i] := 0 END;
  90. esc := root;
  91. IF mode = Decode THEN in.Initialize END
  92. END Initialize;
  93. PROCEDURE Finish;
  94. BEGIN
  95. ASSERT( mode = Encode );
  96. out.Finish (* flush last few bits *)
  97. END Finish;
  98. PROCEDURE GetPattern( ): Pattern;
  99. VAR i: INTEGER; patt: Pattern;
  100. BEGIN
  101. patt := 0;
  102. FOR i := 0 TO 7 DO
  103. IF in.Bit() = 1 THEN patt := patt + INTEGER(ASH( 1, i )) END;
  104. END;
  105. RETURN patt
  106. END GetPattern;
  107. PROCEDURE PutPattern( patt: Pattern );
  108. VAR i: LONGINT;
  109. BEGIN
  110. FOR i := 0 TO 7 DO
  111. out.Bit( patt MOD 2 ); patt := patt DIV 2;
  112. END
  113. END PutPattern;
  114. (* split escape node to incorporate a new pattern *)
  115. PROCEDURE AddPattern( patt: Pattern ): Index;
  116. VAR pair, node: Index;
  117. BEGIN
  118. ASSERT( esc > 1 );
  119. pair := esc; node := esc - 1; esc := esc - 2 ;
  120. table[pair].down := node; table[pair].weight := 1;
  121. table[node].up := pair;
  122. table[node].down := 0; table[node].weight := 0; table[node].pattern := patt;
  123. table[esc].up := pair;
  124. table[esc].down := 0; table[esc].weight := 0;
  125. map[patt] := node;
  126. RETURN node;
  127. END AddPattern;
  128. (* swap leaf to group leader position, return pattern's new node *)
  129. PROCEDURE GroupLeader( node: Index ): Index;
  130. VAR
  131. leader: Index;
  132. weight: LONGINT;
  133. patt, prev: Pattern;
  134. BEGIN
  135. weight := table[node].weight; leader := node;
  136. WHILE (leader < root) & (weight = table[leader + 1].weight) DO INC( leader ) END;
  137. IF leader # node THEN
  138. (* swap the leaf nodes *)
  139. patt := table[node].pattern;
  140. prev := table[leader].pattern;
  141. table[leader].pattern := patt;
  142. table[node].pattern := prev;
  143. map[patt] := leader;
  144. map[prev] := node;
  145. END;
  146. RETURN leader
  147. END GroupLeader;
  148. (* slide internal node up over all leaves of equal weight
  149. or exchange leaf with next smaller weight internal node.
  150. return node's new position *)
  151. PROCEDURE SlideNode( node: Index ): Index;
  152. VAR next: Index; swap: Node;
  153. BEGIN
  154. swap := table[node]; next := node + 1;
  155. (* if we're sliding an internal node, find the highest possible leaf to exchange with *)
  156. IF ODD( swap.weight ) THEN
  157. WHILE swap.weight > table[next+1].weight DO INC( next ) END
  158. END;
  159. (* swap the two nodes *)
  160. table[node] := table[next]; table[next] := swap;
  161. table[next].up := table[node].up; table[node].up := swap.up;
  162. (* repair the pattern map and tree structure *)
  163. IF ODD( swap.weight ) THEN
  164. table[swap.down].up := next;
  165. table[swap.down - 1].up := next;
  166. map[table[node].pattern] := node;
  167. ELSE
  168. table[table[node].down - 1].up := node;
  169. table[table[node].down].up := node;
  170. map[swap.pattern] := next;
  171. END;
  172. RETURN next;
  173. END SlideNode;
  174. (* increment pattern weight and re balance the tree. *)
  175. PROCEDURE IncrementWeight( node: Index );
  176. VAR up: Index;
  177. BEGIN
  178. (* obviate swapping a parent with its child: increment the leaf and proceed directly to its parent.
  179. otherwise, promote leaf to group leader position in the tree *)
  180. IF table[node].up = node + 1 THEN
  181. INC( table[node].weight, 2 ); INC( node );
  182. ELSE
  183. node := GroupLeader( node )
  184. END;
  185. (* increase the weight of each node and slide over any smaller weights ahead of it until reaching the root.
  186. internal nodes work upwards from their initial positions; while pattern nodes slide over first,
  187. then work up from their final positions. *)
  188. INC( table[node].weight, 2 ); up := table[node].up;
  189. WHILE up # 0 DO
  190. WHILE table[node].weight > table[node + 1].weight DO node := SlideNode( node) END;
  191. IF ODD( table[node].weight) THEN
  192. node := up
  193. ELSE
  194. node := table[node].up
  195. END;
  196. INC( table[node].weight, 2 ); up := table[node].up
  197. END;
  198. END IncrementWeight;
  199. (* scale all weights and rebalance the tree,
  200. zero weight nodes are removed from the tree *)
  201. PROCEDURE Scale;
  202. VAR node, prev: Index; weight: INTEGER;
  203. PROCEDURE Weight( idx: Index ): INTEGER;
  204. VAR w: INTEGER;
  205. BEGIN
  206. w := table[idx].weight;
  207. IF ODD( w ) THEN RETURN w - 1 ELSE RETURN w END
  208. END Weight;
  209. BEGIN
  210. node := esc + 1;
  211. (* work up the tree from the escape node scaling weights *)
  212. WHILE node <= root DO
  213. weight := table[node].weight;
  214. IF ODD( weight ) THEN
  215. (* recompute the weight of internal nodes *)
  216. weight := Weight( table[node].down ) + Weight( table[node].down-1 ) + 1
  217. ELSE
  218. (* remove zero weight leaves and remove them from the pattern map *)
  219. weight := weight DIV 2;
  220. IF ODD( weight ) THEN DEC( weight ) END;
  221. IF weight = 0 THEN
  222. map[table[node].pattern] := 0; INC( esc, 2 );
  223. END
  224. END;
  225. (* slide the scaled node back down over any previous nodes with larger weights *)
  226. table[node].weight := weight;
  227. prev := node - 1;
  228. WHILE weight < table[prev].weight DO IGNORE SlideNode( prev ); DEC( prev ) END;
  229. INC( node )
  230. END;
  231. (* prepare a new escape node *)
  232. table[esc].down := 0; table[esc].weight := 0;
  233. END Scale;
  234. PROCEDURE EncodeByte( ch: CHAR );
  235. VAR
  236. code, bits: LONGINT;
  237. cur, node: Index; patt: Pattern;
  238. BEGIN
  239. patt := ORD( ch ); node := map[patt];
  240. (* accumulate the code bits by working up from the node to the root *)
  241. cur := node; code := 0; bits := 0;
  242. IF cur = 0 THEN cur := esc END;
  243. WHILE table[cur].up # 0 DO
  244. IF ODD( cur ) THEN code := code*2 + 1 ELSE code := code*2 END;
  245. INC( bits ); cur := table[cur].up
  246. END;
  247. (* send the code *)
  248. WHILE bits > 0 DO
  249. out.Bit( code MOD 2 ); code := code DIV 2;
  250. DEC( bits )
  251. END;
  252. IF node = 0 THEN
  253. (* send the new pattern and incorporate it into the tree *)
  254. PutPattern( patt ); node := AddPattern( patt );
  255. END;
  256. IncrementWeight( node );
  257. END EncodeByte;
  258. PROCEDURE ExtractByte( ): CHAR;
  259. VAR
  260. node, down: Index;
  261. patt: Pattern;
  262. BEGIN
  263. (* work down the tree from the root until reaching either a leaf or the escape node *)
  264. node := root; down := table[node].down;
  265. WHILE down # 0 DO
  266. IF in.Bit( ) = 1 THEN node := down - 1 ELSE node := down END;
  267. down := table[node].down;
  268. END;
  269. IF node = esc THEN
  270. (* add the new pattern to the tree *)
  271. patt := GetPattern( ); node := AddPattern( patt )
  272. ELSE
  273. patt := table[node].pattern
  274. END;
  275. IncrementWeight( node );
  276. RETURN CHR( patt );
  277. END ExtractByte;
  278. END HuffmanCoder;
  279. TYPE
  280. Encoder* =OBJECT
  281. VAR
  282. huff: HuffmanCoder;
  283. PROCEDURE & New*( archive: Streams.Writer );
  284. BEGIN
  285. NEW( huff, Encode, NIL, archive );
  286. END New;
  287. PROCEDURE CompressBlock*( CONST source: ARRAY OF CHAR; len: LONGINT );
  288. VAR i: LONGINT;
  289. BEGIN
  290. huff.Initialize;
  291. i := 0;
  292. WHILE i < len DO
  293. huff.EncodeByte( source[i] ); INC( i );
  294. IF (i MOD ScaleLimit = 0) & ((len-i) >= ScaleLimit) THEN huff.Scale END
  295. END;
  296. huff.Finish
  297. END CompressBlock;
  298. END Encoder;
  299. TYPE
  300. Decoder* =OBJECT
  301. VAR
  302. huff: HuffmanCoder;
  303. PROCEDURE & New*( archive: Streams.Reader );
  304. BEGIN
  305. NEW( huff, Decode, archive, NIL );
  306. END New;
  307. PROCEDURE ExtractBlock*( VAR buf: ARRAY OF CHAR; len: LONGINT );
  308. VAR i: LONGINT;
  309. BEGIN
  310. huff.Initialize;
  311. i := 0;
  312. WHILE i < len DO
  313. buf[i] := huff.ExtractByte( ); INC( i );
  314. IF (i MOD ScaleLimit = 0) & ((len - i) >= ScaleLimit) THEN huff.Scale END
  315. END;
  316. END ExtractBlock;
  317. END Decoder;
  318. END AdaptiveHuffman.