2
0

Huffman.mod 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383
  1. module Huffman; (** AUTHOR GF; PURPOSE "files and streams compression"; *)
  2. import Streams, Commands, Files, Strings, Kernel;
  3. const
  4. BlockSize = 8*1024;
  5. HTag = 00FF00F1H ;
  6. type
  7. HuffmanNode = object
  8. var
  9. frequency: longint;
  10. left, right: HuffmanNode; (* both nil in case of leaf *)
  11. pattern: char;
  12. procedure & Init( patt: char; freq: longint );
  13. begin
  14. pattern := patt; frequency := freq; left := nil; right := nil
  15. end Init;
  16. procedure AddChildren( l, r: HuffmanNode );
  17. begin
  18. left := l; right := r; frequency := l.frequency + r.frequency
  19. end AddChildren;
  20. end HuffmanNode;
  21. Codebits = record
  22. bitsize: longint;
  23. val: longint
  24. end;
  25. HuffmanCode = object
  26. var
  27. wsize, bitsize: longint;
  28. buffer: pointer to array BlockSize div 2 of longint;
  29. lastval, lastbits: longint;
  30. procedure &Init;
  31. begin
  32. new( buffer ); Clear
  33. end Init;
  34. procedure Clear;
  35. begin
  36. wsize := 0; lastval := 0; lastbits := 0
  37. end Clear;
  38. procedure Append( const bits: Codebits );
  39. var
  40. bitsize, val, addval, addbits, shift: longint;
  41. begin
  42. bitsize := bits.bitsize; val := bits.val;
  43. if lastbits + bitsize > 32 then
  44. addbits := 32 - lastbits; shift := bitsize - addbits;
  45. addval := lsh( val, -shift );
  46. lastval := lsh( lastval, addbits ) + addval;
  47. dec( bitsize, addbits ); dec( val, lsh( addval, shift ) );
  48. buffer[wsize] := lastval; inc( wsize ); lastval := 0; lastbits := 0
  49. end;
  50. lastval := lsh( lastval, bitsize ) + val; inc( lastbits, bitsize );
  51. if lastbits = 32 then
  52. buffer[wsize] := lastval; inc( wsize ); lastval := 0; lastbits := 0
  53. end
  54. end Append;
  55. procedure WriteCode( w: Streams.Writer );
  56. var i: longint;
  57. begin
  58. bitsize := 32*wsize + lastbits;
  59. if lastbits > 0 then
  60. buffer[wsize] := ash( lastval, 32 - lastbits ); inc( wsize );
  61. end;
  62. w.RawLInt( bitsize );
  63. for i := 0 to wsize - 1 do w.RawLInt( buffer[i] ) end;
  64. w.Update
  65. end WriteCode;
  66. procedure ReadCode( r: Streams.Reader );
  67. var i, n: longint;
  68. begin
  69. Clear;
  70. r.RawLInt( bitsize ); n := (bitsize + 31) div 32;
  71. for i := 0 to n - 1 do r.RawLInt( buffer[i] ) end
  72. end ReadCode;
  73. procedure Decode( tree: HuffmanNode; w: Streams.Writer );
  74. var i, x: longint; n: HuffmanNode;
  75. begin
  76. i := 0;
  77. repeat
  78. n := tree;
  79. repeat
  80. if i mod 32 = 0 then x := buffer[i div 32] end;
  81. if ash( x, i mod 32 ) < 0 then n := n.left else n := n.right end;
  82. inc( i )
  83. until n.left = nil; (* leaf *)
  84. w.Char( n.pattern )
  85. until i >= bitsize;
  86. w.Update
  87. end Decode;
  88. end HuffmanCode;
  89. Pattern = record
  90. frequency: longint;
  91. pattern: char
  92. end;
  93. PatternFrequencies = pointer to array of Pattern; (* ordered by frequency *)
  94. procedure Encode*( r: Streams.Reader; w: Streams.Writer );
  95. var
  96. buffer: HuffmanCode; i, n, needed, ofs, got, chunksize, timeout: longint;
  97. codeTable: array 256 of Codebits;
  98. pf: PatternFrequencies;
  99. plaintext: array BlockSize of char;
  100. timer: Kernel.Timer;
  101. begin
  102. new( buffer ); new( timer );
  103. w.RawLInt( HTag );
  104. loop
  105. if r is Files.Reader then
  106. r.Bytes( plaintext, 0, BlockSize, chunksize );
  107. else
  108. (* give reader some time (~3 sec) to accumulate data *)
  109. timeout := 100; ofs := 0; needed := BlockSize;
  110. repeat n := r.Available( );
  111. if n > 0 then
  112. if n > needed then n := needed end;
  113. r.Bytes( plaintext, ofs, n, got ); inc( ofs, got ); dec( needed, got )
  114. end;
  115. if needed > 0 then
  116. if timeout <= 1600 then timer.Sleep( timeout ); timeout := 2*timeout
  117. else needed := 0
  118. end;
  119. end;
  120. until needed = 0;
  121. chunksize := ofs
  122. end;
  123. if chunksize < 1 then exit end;
  124. pf := CountPatterns( plaintext, chunksize );
  125. InitCodeTable( codeTable, NewHuffmanTree( pf ) );
  126. buffer.Clear;
  127. for i := 0 to chunksize - 1 do
  128. buffer.Append( codeTable[ord( plaintext[i] )] );
  129. end;
  130. WriteFrequencies( pf, w );
  131. buffer.WriteCode( w );
  132. w.Update
  133. end
  134. end Encode;
  135. procedure Decode*( r: Streams.Reader; w: Streams.Writer; var msg: array of char ): boolean;
  136. var
  137. tree: HuffmanNode;
  138. buffer: HuffmanCode;
  139. tag: longint;
  140. begin
  141. r.RawLInt( tag );
  142. if tag # HTag then
  143. msg := "Huffman.Decode: bad input (compressed stream expected)";
  144. return false
  145. end;
  146. new( buffer );
  147. while r.Available( ) >= 11 do
  148. tree := NewHuffmanTree( ReadFrequencies( r ) );
  149. buffer.ReadCode( r );
  150. buffer.Decode( tree, w )
  151. end;
  152. return true
  153. end Decode;
  154. procedure CountPatterns( const block: array of char; blksize: longint ): PatternFrequencies;
  155. var
  156. i, n, start: longint;
  157. a: array 256 of Pattern;
  158. pf: PatternFrequencies;
  159. procedure Quicksort( low, high: longint );
  160. var
  161. i, j, m: longint; tmp: Pattern;
  162. begin
  163. if low < high then
  164. i := low; j := high; m := (i + j) div 2;
  165. repeat
  166. while a[i].frequency < a[m].frequency do inc( i ) end;
  167. while a[j].frequency > a[m].frequency do dec( j ) end;
  168. if i <= j then
  169. if i = m then m := j
  170. elsif j = m then m := i
  171. end;
  172. tmp := a[i]; a[i] := a[j]; a[j] := tmp;
  173. inc( i ); dec( j )
  174. end;
  175. until i > j;
  176. Quicksort( low, j ); Quicksort( i, high )
  177. end
  178. end Quicksort;
  179. begin
  180. for i := 0 to 255 do a[i].pattern := chr( i ); a[i].frequency := 0 end;
  181. for i := 0 to blksize - 1 do inc( a[ord( block[i] )].frequency ) end;
  182. Quicksort( 0, 255 ); (* sort patterns by frequency *)
  183. i := 0;
  184. while a[i].frequency = 0 do inc( i ) end; (* skip unused patterns *)
  185. n := 256 - i; start := i;
  186. new( pf, n );
  187. for i := 0 to n - 1 do pf[i] := a[start + i] end;
  188. return pf
  189. end CountPatterns;
  190. procedure NewHuffmanTree( pf: PatternFrequencies ): HuffmanNode;
  191. var
  192. i, start, top: longint; n, n2: HuffmanNode;
  193. a: pointer to array of HuffmanNode;
  194. patt: char;
  195. begin
  196. new( a, len( pf^ ) ); top := len( pf^ ) - 1;
  197. for i := 0 to top do new( a[i], pf[i].pattern, pf[i].frequency ) end;
  198. if top = 0 then
  199. (* the whole, probably last small block contains only one pattern *)
  200. patt := chr( (ord( a[0].pattern ) + 1) mod 256 ); (* some different pattern *)
  201. new( n, 0X, 0 ); new( n2, patt, 0 ); n.AddChildren( n2, a[0] );
  202. else
  203. start := 0;
  204. while start < top do
  205. new( n, 0X, 0 ); n.AddChildren( a[start], a[start+1] );
  206. i := start + 1;
  207. while (i < top) & (a[i+1].frequency < n.frequency) do a[i] := a[i+1]; inc( i ) end;
  208. a[i] := n;
  209. inc( start );
  210. end
  211. end;
  212. return n
  213. end NewHuffmanTree;
  214. procedure InitCodeTable( var table: array of Codebits; huffmanTree: HuffmanNode );
  215. var
  216. start: Codebits;
  217. procedure Traverse( node: HuffmanNode; bits: Codebits );
  218. begin
  219. if node.left = nil then (* leaf *)
  220. table[ord( node.pattern )] := bits;
  221. else
  222. inc( bits.bitsize );
  223. bits.val := 2*bits.val; Traverse( node.right, bits ); (* ..xx0 *)
  224. bits.val := bits.val + 1; Traverse( node.left, bits ); (* ..xx1 *)
  225. end;
  226. end Traverse;
  227. begin
  228. start.bitsize := 0; start.val := 0;
  229. Traverse( huffmanTree, start );
  230. end InitCodeTable;
  231. procedure ReadFrequencies( r: Streams.Reader ): PatternFrequencies;
  232. var
  233. i, n: longint;
  234. pf: PatternFrequencies;
  235. begin
  236. r.RawNum( n );
  237. new( pf, n );
  238. for i := 0 to n - 1 do
  239. r.RawNum( pf[i].frequency ); r.Char( pf[i].pattern );
  240. end;
  241. return pf
  242. end ReadFrequencies;
  243. procedure WriteFrequencies( pf: PatternFrequencies; w: Streams.Writer );
  244. var i, n: longint;
  245. begin
  246. n := len( pf^ );
  247. w.RawNum( n );
  248. for i := 0 to n - 1 do
  249. w.RawNum( pf[i].frequency ); w.Char( pf[i].pattern );
  250. end;
  251. end WriteFrequencies;
  252. procedure OpenNewFile( const name: array of char ): Files.File;
  253. var
  254. name2: array 128 of char; res: longint;
  255. begin
  256. if Files.Old( name ) # nil then
  257. copy( name, name2); Strings.Append( name2, ".Bak" );
  258. Files.Rename( name, name2, res )
  259. end;
  260. return Files.New( name )
  261. end OpenNewFile;
  262. procedure EncodeFile*( c: Commands.Context );
  263. var
  264. f1, f2: Files.File;
  265. r: Files.Reader; w: Files.Writer;
  266. name1, name2: array 128 of char;
  267. begin
  268. if c.arg.GetString( name1 ) then
  269. if ~c.arg.GetString( name2 ) then
  270. name2 := name1;
  271. Strings.Append( name2, ".hc" )
  272. end;
  273. f1 := Files.Old( name1 );
  274. if f1 # nil then
  275. Files.OpenReader( r, f1, 0 );
  276. f2 := OpenNewFile( name2 ); Files.OpenWriter( w, f2, 0 );
  277. Encode( r, w );
  278. w.Update;
  279. Files.Register( f2 )
  280. else
  281. c.error.String( "could not open file " ); c.error.String( name1 ); c.error.Ln
  282. end
  283. else
  284. c.error.String( "usage: Huffman.EncodeFile filename [filename] ~ " ); c.error.Ln;
  285. end;
  286. c.error.Update
  287. end EncodeFile;
  288. procedure DecodeFile*( c: Commands.Context );
  289. var
  290. f1, f2: Files.File;
  291. r: Files.Reader; w: Files.Writer;
  292. name1, name2, msg: array 128 of char;
  293. begin
  294. if c.arg.GetString( name1 ) then
  295. if ~c.arg.GetString( name2 ) then
  296. name2 := name1;
  297. if Strings.EndsWith( ".hc", name2 ) then name2[Strings.Length( name2 ) - 3] := 0X
  298. else Strings.Append( name2, ".uncomp" )
  299. end;
  300. end;
  301. f1 := Files.Old( name1 );
  302. if f1 # nil then
  303. Files.OpenReader( r, f1, 0 );
  304. f2 := OpenNewFile( name2 ); Files.OpenWriter( w, f2, 0 );
  305. if Decode( r, w, msg ) then
  306. w.Update;
  307. Files.Register( f2 )
  308. else
  309. c.error.String( msg ); c.error.Ln
  310. end
  311. else
  312. c.error.String( "could not open file " ); c.error.String( name1 ); c.error.Ln
  313. end
  314. else
  315. c.error.String( "usage: Huffman.DecodeFile filename [filename] ~ " ); c.error.Ln;
  316. end;
  317. c.error.Update
  318. end DecodeFile;
  319. end Huffman.
  320. Huffman.EncodeFile Huffman.mod ~
  321. Huffman.EncodeFile Huffman.Obj ~
  322. Huffman.EncodeFile uebung01.pdf ~
  323. Huffman.DecodeFile Huffman.mod.hc TTT.mod ~
  324. Huffman.DecodeFile Huffman.Obj.hc TTT.Obj ~
  325. Huffman.DecodeFile uebung01.pdf.hc TTT.pdf ~
  326. SystemTools.Free Huffman ~