123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383 |
- module Huffman; (** AUTHOR GF; PURPOSE "files and streams compression"; *)
- import Streams, Commands, Files, Strings, Kernel;
- const
- BlockSize = 8*1024;
- HTag = 00FF00F1H ;
-
- type
- HuffmanNode = object
- var
- frequency: longint;
- left, right: HuffmanNode; (* both nil in case of leaf *)
- pattern: char;
-
- procedure & Init( patt: char; freq: longint );
- begin
- pattern := patt; frequency := freq; left := nil; right := nil
- end Init;
-
- procedure AddChildren( l, r: HuffmanNode );
- begin
- left := l; right := r; frequency := l.frequency + r.frequency
- end AddChildren;
-
- end HuffmanNode;
-
-
- Codebits = record
- bitsize: longint;
- val: longint
- end;
-
-
- HuffmanCode = object
- var
- wsize, bitsize: longint;
- buffer: pointer to array BlockSize div 2 of longint;
- lastval, lastbits: longint;
-
- procedure &Init;
- begin
- new( buffer ); Clear
- end Init;
-
- procedure Clear;
- begin
- wsize := 0; lastval := 0; lastbits := 0
- end Clear;
-
-
- procedure Append( const bits: Codebits );
- var
- bitsize, val, addval, addbits, shift: longint;
- begin
- bitsize := bits.bitsize; val := bits.val;
- if lastbits + bitsize > 32 then
- addbits := 32 - lastbits; shift := bitsize - addbits;
- addval := lsh( val, -shift );
- lastval := lsh( lastval, addbits ) + addval;
- dec( bitsize, addbits ); dec( val, lsh( addval, shift ) );
- buffer[wsize] := lastval; inc( wsize ); lastval := 0; lastbits := 0
- end;
- lastval := lsh( lastval, bitsize ) + val; inc( lastbits, bitsize );
- if lastbits = 32 then
- buffer[wsize] := lastval; inc( wsize ); lastval := 0; lastbits := 0
- end
- end Append;
-
-
- procedure WriteCode( w: Streams.Writer );
- var i: longint;
- begin
- bitsize := 32*wsize + lastbits;
- if lastbits > 0 then
- buffer[wsize] := ash( lastval, 32 - lastbits ); inc( wsize );
- end;
-
- w.RawLInt( bitsize );
- for i := 0 to wsize - 1 do w.RawLInt( buffer[i] ) end;
- w.Update
- end WriteCode;
-
-
- procedure ReadCode( r: Streams.Reader );
- var i, n: longint;
- begin
- Clear;
- r.RawLInt( bitsize ); n := (bitsize + 31) div 32;
- for i := 0 to n - 1 do r.RawLInt( buffer[i] ) end
- end ReadCode;
-
-
- procedure Decode( tree: HuffmanNode; w: Streams.Writer );
- var i, x: longint; n: HuffmanNode;
- begin
- i := 0;
- repeat
- n := tree;
- repeat
- if i mod 32 = 0 then x := buffer[i div 32] end;
- if ash( x, i mod 32 ) < 0 then n := n.left else n := n.right end;
- inc( i )
- until n.left = nil; (* leaf *)
- w.Char( n.pattern )
- until i >= bitsize;
- w.Update
- end Decode;
-
- end HuffmanCode;
-
- Pattern = record
- frequency: longint;
- pattern: char
- end;
-
- PatternFrequencies = pointer to array of Pattern; (* ordered by frequency *)
-
-
-
- procedure Encode*( r: Streams.Reader; w: Streams.Writer );
- var
- buffer: HuffmanCode; i, n, needed, ofs, got, chunksize, timeout: longint;
- codeTable: array 256 of Codebits;
- pf: PatternFrequencies;
- plaintext: array BlockSize of char;
- timer: Kernel.Timer;
- begin
- new( buffer ); new( timer );
- w.RawLInt( HTag );
- loop
- if r is Files.Reader then
- r.Bytes( plaintext, 0, BlockSize, chunksize );
- else
- (* give reader some time (~3 sec) to accumulate data *)
- timeout := 100; ofs := 0; needed := BlockSize;
- repeat n := r.Available( );
- if n > 0 then
- if n > needed then n := needed end;
- r.Bytes( plaintext, ofs, n, got ); inc( ofs, got ); dec( needed, got )
- end;
- if needed > 0 then
- if timeout <= 1600 then timer.Sleep( timeout ); timeout := 2*timeout
- else needed := 0
- end;
- end;
- until needed = 0;
- chunksize := ofs
- end;
- if chunksize < 1 then exit end;
- pf := CountPatterns( plaintext, chunksize );
- InitCodeTable( codeTable, NewHuffmanTree( pf ) );
- buffer.Clear;
- for i := 0 to chunksize - 1 do
- buffer.Append( codeTable[ord( plaintext[i] )] );
- end;
- WriteFrequencies( pf, w );
- buffer.WriteCode( w );
- w.Update
- end
- end Encode;
-
-
- procedure Decode*( r: Streams.Reader; w: Streams.Writer; var msg: array of char ): boolean;
- var
- tree: HuffmanNode;
- buffer: HuffmanCode;
- tag: longint;
- begin
- r.RawLInt( tag );
- if tag # HTag then
- msg := "Huffman.Decode: bad input (compressed stream expected)";
- return false
- end;
- new( buffer );
- while r.Available( ) >= 11 do
- tree := NewHuffmanTree( ReadFrequencies( r ) );
- buffer.ReadCode( r );
- buffer.Decode( tree, w )
- end;
- return true
- end Decode;
-
-
- procedure CountPatterns( const block: array of char; blksize: longint ): PatternFrequencies;
- var
- i, n, start: longint;
- a: array 256 of Pattern;
- pf: PatternFrequencies;
-
- procedure Quicksort( low, high: longint );
- var
- i, j, m: longint; tmp: Pattern;
- begin
- if low < high then
- i := low; j := high; m := (i + j) div 2;
- repeat
- while a[i].frequency < a[m].frequency do inc( i ) end;
- while a[j].frequency > a[m].frequency do dec( j ) end;
- if i <= j then
- if i = m then m := j
- elsif j = m then m := i
- end;
- tmp := a[i]; a[i] := a[j]; a[j] := tmp;
- inc( i ); dec( j )
- end;
- until i > j;
- Quicksort( low, j ); Quicksort( i, high )
- end
- end Quicksort;
-
- begin
- for i := 0 to 255 do a[i].pattern := chr( i ); a[i].frequency := 0 end;
- for i := 0 to blksize - 1 do inc( a[ord( block[i] )].frequency ) end;
- Quicksort( 0, 255 ); (* sort patterns by frequency *)
- i := 0;
- while a[i].frequency = 0 do inc( i ) end; (* skip unused patterns *)
- n := 256 - i; start := i;
- new( pf, n );
- for i := 0 to n - 1 do pf[i] := a[start + i] end;
- return pf
- end CountPatterns;
-
-
-
- procedure NewHuffmanTree( pf: PatternFrequencies ): HuffmanNode;
- var
- i, start, top: longint; n, n2: HuffmanNode;
- a: pointer to array of HuffmanNode;
- patt: char;
- begin
- new( a, len( pf^ ) ); top := len( pf^ ) - 1;
- for i := 0 to top do new( a[i], pf[i].pattern, pf[i].frequency ) end;
- if top = 0 then
- (* the whole, probably last small block contains only one pattern *)
- patt := chr( (ord( a[0].pattern ) + 1) mod 256 ); (* some different pattern *)
- new( n, 0X, 0 ); new( n2, patt, 0 ); n.AddChildren( n2, a[0] );
- else
- start := 0;
- while start < top do
- new( n, 0X, 0 ); n.AddChildren( a[start], a[start+1] );
- i := start + 1;
- while (i < top) & (a[i+1].frequency < n.frequency) do a[i] := a[i+1]; inc( i ) end;
- a[i] := n;
- inc( start );
- end
- end;
- return n
- end NewHuffmanTree;
-
-
- procedure InitCodeTable( var table: array of Codebits; huffmanTree: HuffmanNode );
- var
- start: Codebits;
-
- procedure Traverse( node: HuffmanNode; bits: Codebits );
- begin
- if node.left = nil then (* leaf *)
- table[ord( node.pattern )] := bits;
- else
- inc( bits.bitsize );
- bits.val := 2*bits.val; Traverse( node.right, bits ); (* ..xx0 *)
- bits.val := bits.val + 1; Traverse( node.left, bits ); (* ..xx1 *)
- end;
- end Traverse;
-
- begin
- start.bitsize := 0; start.val := 0;
- Traverse( huffmanTree, start );
- end InitCodeTable;
-
-
- procedure ReadFrequencies( r: Streams.Reader ): PatternFrequencies;
- var
- i, n: longint;
- pf: PatternFrequencies;
- begin
- r.RawNum( n );
- new( pf, n );
- for i := 0 to n - 1 do
- r.RawNum( pf[i].frequency ); r.Char( pf[i].pattern );
- end;
- return pf
- end ReadFrequencies;
-
- procedure WriteFrequencies( pf: PatternFrequencies; w: Streams.Writer );
- var i, n: longint;
- begin
- n := len( pf^ );
- w.RawNum( n );
- for i := 0 to n - 1 do
- w.RawNum( pf[i].frequency ); w.Char( pf[i].pattern );
- end;
- end WriteFrequencies;
-
-
- procedure OpenNewFile( const name: array of char ): Files.File;
- var
- name2: array 128 of char; res: longint;
- begin
- if Files.Old( name ) # nil then
- copy( name, name2); Strings.Append( name2, ".Bak" );
- Files.Rename( name, name2, res )
- end;
- return Files.New( name )
- end OpenNewFile;
-
-
- procedure EncodeFile*( c: Commands.Context );
- var
- f1, f2: Files.File;
- r: Files.Reader; w: Files.Writer;
- name1, name2: array 128 of char;
- begin
- if c.arg.GetString( name1 ) then
- if ~c.arg.GetString( name2 ) then
- name2 := name1;
- Strings.Append( name2, ".hc" )
- end;
- f1 := Files.Old( name1 );
- if f1 # nil then
- Files.OpenReader( r, f1, 0 );
- f2 := OpenNewFile( name2 ); Files.OpenWriter( w, f2, 0 );
- Encode( r, w );
- w.Update;
- Files.Register( f2 )
- else
- c.error.String( "could not open file " ); c.error.String( name1 ); c.error.Ln
- end
- else
- c.error.String( "usage: Huffman.EncodeFile filename [filename] ~ " ); c.error.Ln;
- end;
- c.error.Update
- end EncodeFile;
-
-
- procedure DecodeFile*( c: Commands.Context );
- var
- f1, f2: Files.File;
- r: Files.Reader; w: Files.Writer;
- name1, name2, msg: array 128 of char;
- begin
- if c.arg.GetString( name1 ) then
- if ~c.arg.GetString( name2 ) then
- name2 := name1;
- if Strings.EndsWith( ".hc", name2 ) then name2[Strings.Length( name2 ) - 3] := 0X
- else Strings.Append( name2, ".uncomp" )
- end;
- end;
- f1 := Files.Old( name1 );
- if f1 # nil then
- Files.OpenReader( r, f1, 0 );
- f2 := OpenNewFile( name2 ); Files.OpenWriter( w, f2, 0 );
- if Decode( r, w, msg ) then
- w.Update;
- Files.Register( f2 )
- else
- c.error.String( msg ); c.error.Ln
- end
- else
- c.error.String( "could not open file " ); c.error.String( name1 ); c.error.Ln
- end
- else
- c.error.String( "usage: Huffman.DecodeFile filename [filename] ~ " ); c.error.Ln;
- end;
- c.error.Update
- end DecodeFile;
-
- end Huffman.
- Huffman.EncodeFile Huffman.mod ~
- Huffman.EncodeFile Huffman.Obj ~
- Huffman.EncodeFile uebung01.pdf ~
-
- Huffman.DecodeFile Huffman.mod.hc TTT.mod ~
- Huffman.DecodeFile Huffman.Obj.hc TTT.Obj ~
- Huffman.DecodeFile uebung01.pdf.hc TTT.pdf ~
-
- SystemTools.Free Huffman ~
|