/* Illustrative example of minimum-redundancy code calculation. Program reads a file of symbol frequencies -- one per line -- ascending frequency -- first line indicates upper bound on how many frequencies appear; calculate the codeword lengths for a minimum-redundancy code; print out the codeword lengths; print out the average cost per symbol and the entropy. The method for calculating codelengths in function calculate_minimum_redundancy is described in @inproceedings{mk95:wads, author = "A. Moffat and J. Katajainen", title = "In-place calculation of minimum-redundancy codes", booktitle = "Proc. Workshop on Algorithms and Data Structures", address = "Queen's University, Kingston, Ontario", publisher = "LNCS 955, Springer-Verlag", Month = aug, year = 1995, editor = "S.G. Akl and F. Dehne and J.-R. Sack", pages = "393-402", } The abstract of that paper may be fetched from http://www.cs.mu.oz.au/~alistair/abstracts/wads95.html A revised version is currently being prepared. Written by Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk November 1996. */ #include #include #include #define log2(x) (log10((double)(x))/log10(2.0)) void calculate_minimum_redundancy(int *, int); #define LIMIT 1000000 /* maximum value for n */ int main(int argc, char **argv) { int *A; /* the array of freqs */ int *B; /* a copy of A */ int n; /* number of symbols */ int totfreq; /* total frequency */ int bits; /* total bits in codewords */ double ent; /* entropy */ int i; /* claim space for arrays */ scanf("%d", &n); if (n <= 0 || n > LIMIT) { fprintf(stderr, "%s: n should be at least 0 and less than %d\n", argv[0], LIMIT); exit(1); } if ((A = (int *)malloc(n*sizeof(int))) == (int *)NULL || (B = (int *)malloc(n*sizeof(int))) == (int *)NULL) { fprintf(stderr, "%s: unable to allocate memory\n", argv[0]); exit(1); } /* read symbol frequencies */ for (i=0; i0 && A[i-1] > A[i])) { fprintf(stderr, "%s: %s %s\n", argv[0], "input frequencies must be non-negative", "and non-decreasing"); exit(1); } } if (totfreq <= 0) { fprintf(stderr, "%s: sum of frequencies must be positive\n", argv[0]); exit(1); } ent = 0.0; for (i=0; i 0.0) printf("inefficiency = %5.2f%%\n", 100*(double)bits/totfreq/ent - 100.0); free(A); free(B); return(0); } /*** Function to calculate in-place a minimum-redundancy code Parameters: A array of n symbol frequencies, non-decreasing n number of symbols in A */ void calculate_minimum_redundancy(int A[], int n) { int root; /* next root node to be used */ int leaf; /* next leaf to be used */ int next; /* next value to be assigned */ int avbl; /* number of available nodes */ int used; /* number of internal nodes */ int dpth; /* current depth of leaves */ /* check for pathological cases */ if (n==0) { return; } if (n==1) { A[0] = 0; return; } /* first pass, left to right, setting parent pointers */ A[0] += A[1]; root = 0; leaf = 2; for (next=1; next < n-1; next++) { /* select first item for a pairing */ if (leaf>=n || A[root]=n || (root=0; next--) A[next] = A[A[next]]+1; /* third pass, right to left, setting leaf depths */ avbl = 1; used = dpth = 0; root = n-2; next = n-1; while (avbl>0) { while (root>=0 && A[root]==dpth) { used++; root--; } while (avbl>used) { A[next--] = dpth; avbl--; } avbl = 2*used; dpth++; used = 0; } }