Huffman.Mod 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. MODULE Huffman; (** AUTHOR GF; PURPOSE "Huffman compression"; *)
  2. IMPORT Streams;
  3. CONST
  4. BlockSize = 8*1024;
  5. TYPE
  6. Pattern = RECORD pattern, freq: LONGINT END;
  7. PatternCounts = ARRAY 256 OF Pattern;
  8. PatternFrequencies = POINTER TO ARRAY OF Pattern; (* ordered by frequency *)
  9. Node = OBJECT
  10. VAR
  11. freq: LONGINT;
  12. left, right: Node; (* both NIL in case of leaf *)
  13. pattern: LONGINT;
  14. PROCEDURE & Init( patt, f: LONGINT );
  15. BEGIN
  16. pattern := patt; freq := f; left := NIL; right := NIL
  17. END Init;
  18. PROCEDURE AddChildren( l, r: Node );
  19. BEGIN
  20. left := l; right := r; freq := l.freq + r.freq
  21. END AddChildren;
  22. END Node;
  23. Encoder* = OBJECT
  24. TYPE
  25. HCode = RECORD len, val: LONGINT END;
  26. VAR
  27. w: Streams.Writer;
  28. codeTable: ARRAY 256 OF HCode;
  29. buffer: ARRAY 2*BlockSize OF CHAR;
  30. byte, curBit, bpos: LONGINT;
  31. PROCEDURE &New*( output: Streams.Writer );
  32. BEGIN
  33. w := output;
  34. END New;
  35. PROCEDURE Initialize( CONST source: ARRAY OF CHAR; len: LONGINT );
  36. VAR pf: PatternFrequencies;
  37. BEGIN
  38. pf := CountPatterns( source, len );
  39. WriteFrequencies( pf );
  40. BuildCodeTable( BuildTree( pf ) );
  41. byte := 0; bpos := 0; curBit := 7;
  42. END Initialize;
  43. PROCEDURE WriteFrequencies( pf: PatternFrequencies );
  44. VAR i, n: LONGINT;
  45. a: ARRAY 256 OF LONGINT;
  46. BEGIN
  47. n := LEN( pf^ );
  48. IF n < 128 THEN
  49. w.Char( CHR( n ) );
  50. FOR i := 0 TO n - 1 DO
  51. w.RawNum( pf[i].freq ); w.Char( CHR( pf[i].pattern ) )
  52. END
  53. ELSE
  54. w.Char( 0X );
  55. FOR i := 0 TO 255 DO a[i] := 0 END;
  56. FOR i := 0 TO n -1 DO a[pf[i].pattern] := pf[i].freq END;
  57. FOR i := 0 TO 255 DO w.RawNum( a[i] ) END
  58. END
  59. END WriteFrequencies;
  60. PROCEDURE CountPatterns( CONST source: ARRAY OF CHAR; len: LONGINT ): PatternFrequencies;
  61. VAR
  62. i: LONGINT; a: PatternCounts;
  63. BEGIN
  64. FOR i := 0 TO 255 DO a[i].pattern := i; a[i].freq := 0 END;
  65. FOR i := 0 TO len - 1 DO INC( a[ORD( source[i] )].freq ) END;
  66. FOR i := 0 TO 255 DO
  67. IF a[i].freq > 0 THEN (* scale => [1..101H] *)
  68. a[i].freq := 100H * a[i].freq DIV len + 1;
  69. END
  70. END;
  71. RETURN SortPatterns( a )
  72. END CountPatterns;
  73. PROCEDURE BuildCodeTable( tree: Node );
  74. VAR
  75. initval: HCode; i: LONGINT;
  76. PROCEDURE Traverse( node: Node; code: HCode );
  77. BEGIN
  78. IF node.left = NIL THEN (* leaf *)
  79. codeTable[node.pattern] := code;
  80. ELSE
  81. INC( code.len );
  82. code.val := 2*code.val; Traverse( node.right, code ); (* ..xx0 *)
  83. code.val := code.val + 1; Traverse( node.left, code ); (* ..xx1 *)
  84. END;
  85. END Traverse;
  86. BEGIN
  87. FOR i := 0 TO 255 DO codeTable[i].len := 0; codeTable[i].val := 0 END;
  88. initval.len := 0; initval.val := 0;
  89. Traverse( tree, initval );
  90. END BuildCodeTable;
  91. PROCEDURE AppendBit( bit: LONGINT );
  92. BEGIN
  93. IF bit # 0 THEN byte := byte + ASH( 1, curBit ) END;
  94. DEC( curBit );
  95. IF curBit < 0 THEN
  96. buffer[bpos] := CHR( byte ); INC( bpos );
  97. byte := 0; curBit := 7
  98. END
  99. END AppendBit;
  100. PROCEDURE Append( code: HCode );
  101. VAR len, val: LONGINT;
  102. BEGIN
  103. len := code.len; val := code.val;
  104. WHILE len > 0 DO
  105. DEC( len ); AppendBit( ASH( val, -len ) MOD 2 )
  106. END
  107. END Append;
  108. PROCEDURE CompressBlock*( CONST source: ARRAY OF CHAR; len: LONGINT );
  109. VAR i, codesize: LONGINT;
  110. BEGIN
  111. Initialize( source, len );
  112. FOR i := 0 TO len - 1 DO Append( codeTable[ORD( source[i] )] ) END;
  113. codesize := 8*bpos;
  114. IF curBit # 7 THEN
  115. INC( codesize, (7 - curBit) );
  116. buffer[bpos] := CHR( byte ); INC( bpos );
  117. END;
  118. w.RawNum( codesize );
  119. FOR i := 0 TO bpos - 1 DO w.Char( buffer[i] ) END;
  120. w.Update
  121. END CompressBlock;
  122. END Encoder;
  123. Decoder* = OBJECT
  124. VAR
  125. codesize: LONGINT; (* bits! *)
  126. r: Streams.Reader;
  127. tree: Node;
  128. byte, curBit: LONGINT;
  129. PROCEDURE &New*( input: Streams.Reader );
  130. BEGIN
  131. r := input;
  132. END New;
  133. PROCEDURE Initialize;
  134. VAR pf: PatternFrequencies;
  135. BEGIN
  136. pf := ReadFrequencies( r );
  137. tree := BuildTree( pf );
  138. r.RawNum( codesize );
  139. curBit := -1
  140. END Initialize;
  141. PROCEDURE ReadFrequencies( r: Streams.Reader ): PatternFrequencies;
  142. VAR i, n: LONGINT; c: CHAR;
  143. pf: PatternFrequencies;
  144. a: PatternCounts;
  145. BEGIN
  146. r.Char( c ); n := ORD( c );
  147. IF n > 0 THEN
  148. NEW( pf, n );
  149. FOR i := 0 TO n - 1 DO r.RawNum( pf[i].freq ); r.Char( c ); pf[i].pattern := ORD( c ) END
  150. ELSE
  151. FOR i := 0 TO 255 DO a[i].pattern := i; r.RawNum( a[i].freq ) END;
  152. pf := SortPatterns( a )
  153. END;
  154. RETURN pf
  155. END ReadFrequencies;
  156. PROCEDURE GetBit( ): LONGINT;
  157. VAR bit: LONGINT; c: CHAR;
  158. BEGIN
  159. IF curBit < 0 THEN
  160. r.Char( c ); byte := ORD( c ); curBit := 7
  161. END;
  162. bit := ASH( byte, -curBit ) MOD 2; DEC( curBit );
  163. RETURN bit
  164. END GetBit;
  165. PROCEDURE ExtractBlock*( VAR buf: ARRAY OF CHAR; VAR len: LONGINT );
  166. VAR
  167. i: LONGINT; n: Node;
  168. BEGIN
  169. Initialize; i := 0; len := 0;
  170. REPEAT
  171. n := tree;
  172. REPEAT
  173. IF GetBit() # 0 THEN n := n.left ELSE n := n.right END;
  174. INC( i )
  175. UNTIL n.left = NIL; (* leaf *)
  176. buf[len] := CHR( n.pattern ); INC( len )
  177. UNTIL i >= codesize;
  178. END ExtractBlock;
  179. END Decoder;
  180. (* sort patterns by frequency, omit unused patterns *)
  181. PROCEDURE SortPatterns( VAR a: PatternCounts ): PatternFrequencies;
  182. VAR
  183. i, n, start: LONGINT; pf: PatternFrequencies;
  184. PROCEDURE Sort( low, high: LONGINT );
  185. VAR
  186. i, j, m: LONGINT; tmp: Pattern;
  187. BEGIN
  188. IF low < high THEN
  189. i := low; j := high; m := (i + j) DIV 2;
  190. REPEAT
  191. WHILE a[i].freq < a[m].freq DO INC( i ) END;
  192. WHILE a[j].freq > a[m].freq DO DEC( j ) END;
  193. IF i <= j THEN
  194. IF i = m THEN m := j ELSIF j = m THEN m := i END;
  195. tmp := a[i]; a[i] := a[j]; a[j] := tmp;
  196. INC( i ); DEC( j )
  197. END;
  198. UNTIL i > j;
  199. Sort( low, j ); Sort( i, high )
  200. END
  201. END Sort;
  202. BEGIN
  203. Sort( 0, 255 ); (* sort patterns by frequency *)
  204. i := 0;
  205. WHILE a[i].freq = 0 DO INC( i ) END; (* skip unused patterns *)
  206. n := 256 - i; start := i;
  207. NEW( pf, n );
  208. FOR i := 0 TO n - 1 DO pf[i] := a[start + i] END;
  209. RETURN pf
  210. END SortPatterns;
  211. PROCEDURE BuildTree( pf: PatternFrequencies ): Node;
  212. VAR
  213. i, start, top: LONGINT; node, n2: Node;
  214. a: POINTER TO ARRAY OF Node;
  215. patt: LONGINT;
  216. BEGIN
  217. NEW( a, LEN( pf^ ) ); top := LEN( pf^ ) - 1;
  218. FOR i := 0 TO top DO NEW( a[i], pf[i].pattern, pf[i].freq ) END;
  219. IF top = 0 THEN
  220. (* the whole, probably last small block contains only a single pattern *)
  221. patt := (a[0].pattern + 1) MOD 256; (* some different pattern *)
  222. NEW( node, 0, 0 ); NEW( n2, patt, 0 ); node.AddChildren( n2, a[0] );
  223. ELSE
  224. start := 0;
  225. WHILE start < top DO
  226. NEW( node, 0, 0 ); node.AddChildren( a[start], a[start+1] );
  227. i := start + 1;
  228. WHILE (i < top) & (a[i+1].freq < node.freq) DO a[i] := a[i+1]; INC( i ) END;
  229. a[i] := node;
  230. INC( start );
  231. END
  232. END;
  233. RETURN node
  234. END BuildTree;
  235. END Huffman.