summaryrefslogtreecommitdiff
path: root/xdelta3/xdelta3-djw.h
diff options
context:
space:
mode:
Diffstat (limited to 'xdelta3/xdelta3-djw.h')
-rw-r--r--xdelta3/xdelta3-djw.h188
1 files changed, 127 insertions, 61 deletions
diff --git a/xdelta3/xdelta3-djw.h b/xdelta3/xdelta3-djw.h
index 154c679..f503492 100644
--- a/xdelta3/xdelta3-djw.h
+++ b/xdelta3/xdelta3-djw.h
@@ -666,17 +666,27 @@ djw_encode_prefix (xd3_stream *stream,
666 666
667 /* Compute number of extra codes beyond basic ones for this template. */ 667 /* Compute number of extra codes beyond basic ones for this template. */
668 num_to_encode = DJW_TOTAL_CODES; 668 num_to_encode = DJW_TOTAL_CODES;
669 while (num_to_encode > DJW_EXTRA_12OFFSET && clclen[num_to_encode-1] == 0) { num_to_encode -= 1; } 669 while (num_to_encode > DJW_EXTRA_12OFFSET && clclen[num_to_encode-1] == 0)
670 {
671 num_to_encode -= 1;
672 }
670 XD3_ASSERT (num_to_encode - DJW_EXTRA_12OFFSET < (1 << DJW_EXTRA_CODE_BITS)); 673 XD3_ASSERT (num_to_encode - DJW_EXTRA_12OFFSET < (1 << DJW_EXTRA_CODE_BITS));
671 674
672 /* Encode: # of extra codes */ 675 /* Encode: # of extra codes */
673 if ((ret = xd3_encode_bits (stream, output, bstate, DJW_EXTRA_CODE_BITS, 676 if ((ret = xd3_encode_bits (stream, output, bstate, DJW_EXTRA_CODE_BITS,
674 num_to_encode - DJW_EXTRA_12OFFSET))) { return ret; } 677 num_to_encode - DJW_EXTRA_12OFFSET)))
678 {
679 return ret;
680 }
675 681
676 /* Encode: MTF code lengths */ 682 /* Encode: MTF code lengths */
677 for (i = 0; i < num_to_encode; i += 1) 683 for (i = 0; i < num_to_encode; i += 1)
678 { 684 {
679 if ((ret = xd3_encode_bits (stream, output, bstate, DJW_CLCLEN_BITS, clclen[i]))) { return ret; } 685 if ((ret = xd3_encode_bits (stream, output, bstate,
686 DJW_CLCLEN_BITS, clclen[i])))
687 {
688 return ret;
689 }
680 } 690 }
681 691
682 /* Encode: CLEN code lengths */ 692 /* Encode: CLEN code lengths */
@@ -686,7 +696,10 @@ djw_encode_prefix (xd3_stream *stream,
686 usize_t bits = clclen[mtf_sym]; 696 usize_t bits = clclen[mtf_sym];
687 usize_t code = clcode[mtf_sym]; 697 usize_t code = clcode[mtf_sym];
688 698
689 if ((ret = xd3_encode_bits (stream, output, bstate, bits, code))) { return ret; } 699 if ((ret = xd3_encode_bits (stream, output, bstate, bits, code)))
700 {
701 return ret;
702 }
690 } 703 }
691 704
692 return 0; 705 return 0;
@@ -821,7 +834,7 @@ xd3_encode_huff (xd3_stream *stream,
821 usize_t groups, sector_size; 834 usize_t groups, sector_size;
822 bit_state bstate = BIT_STATE_ENCODE_INIT; 835 bit_state bstate = BIT_STATE_ENCODE_INIT;
823 xd3_output *in; 836 xd3_output *in;
824 int encode_bits; 837 int output_bits;
825 usize_t input_bits; 838 usize_t input_bits;
826 usize_t input_bytes; 839 usize_t input_bytes;
827 usize_t initial_offset = output->next; 840 usize_t initial_offset = output->next;
@@ -842,13 +855,15 @@ xd3_encode_huff (xd3_stream *stream,
842 if (0) 855 if (0)
843 { 856 {
844 regroup: 857 regroup:
845 /* Sometimes we dynamically decide there are too many groups. Arrive here. */ 858 /* Sometimes we dynamically decide there are too many groups. Arrive
859 * here. */
846 output->next = initial_offset; 860 output->next = initial_offset;
847 xd3_bit_state_encode_init (& bstate); 861 xd3_bit_state_encode_init (& bstate);
848 } 862 }
849 863
850 /* Encode: # of groups (3 bits) */ 864 /* Encode: # of groups (3 bits) */
851 if ((ret = xd3_encode_bits (stream, & output, & bstate, DJW_GROUP_BITS, groups-1))) { goto failure; } 865 if ((ret = xd3_encode_bits (stream, & output, & bstate,
866 DJW_GROUP_BITS, groups-1))) { goto failure; }
852 867
853 if (groups == 1) 868 if (groups == 1)
854 { 869 {
@@ -858,21 +873,30 @@ xd3_encode_huff (xd3_stream *stream,
858 uint8_t prefix_mtfsym[ALPHABET_SIZE]; 873 uint8_t prefix_mtfsym[ALPHABET_SIZE];
859 djw_prefix prefix; 874 djw_prefix prefix;
860 875
861 encode_bits = 876 output_bits =
862 djw_build_prefix (real_freq, clen, ALPHABET_SIZE, DJW_MAX_CODELEN); 877 djw_build_prefix (real_freq, clen, ALPHABET_SIZE, DJW_MAX_CODELEN);
863 djw_build_codes (code, clen, ALPHABET_SIZE, DJW_MAX_CODELEN); 878 djw_build_codes (code, clen, ALPHABET_SIZE, DJW_MAX_CODELEN);
864 879
865 if (encode_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; } 880 if (output_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient)
881 {
882 goto nosecond;
883 }
866 884
867 /* Encode: prefix */ 885 /* Encode: prefix */
868 prefix.mtfsym = prefix_mtfsym; 886 prefix.mtfsym = prefix_mtfsym;
869 prefix.symbol = clen; 887 prefix.symbol = clen;
870 prefix.scount = ALPHABET_SIZE; 888 prefix.scount = ALPHABET_SIZE;
871 889
872 if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix))) { goto failure; } 890 if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix)))
891 {
892 goto failure;
893 }
873 894
874 if (encode_bits + (8 * output->next) + EFFICIENCY_BITS >= 895 if (output_bits + (8 * output->next) + EFFICIENCY_BITS >=
875 input_bits && ! cfg->inefficient) { goto nosecond; } 896 input_bits && ! cfg->inefficient)
897 {
898 goto nosecond;
899 }
876 900
877 /* Encode: data */ 901 /* Encode: data */
878 for (in = input; in; in = in->next_page) 902 for (in = input; in; in = in->next_page)
@@ -885,14 +909,18 @@ xd3_encode_huff (xd3_stream *stream,
885 usize_t sym = *p++; 909 usize_t sym = *p++;
886 usize_t bits = clen[sym]; 910 usize_t bits = clen[sym];
887 911
888 IF_DEBUG (encode_bits -= bits); 912 IF_DEBUG (output_bits -= bits);
889 913
890 if ((ret = xd3_encode_bits (stream, & output, & bstate, bits, code[sym]))) { goto failure; } 914 if ((ret = xd3_encode_bits (stream, & output,
915 & bstate, bits, code[sym])))
916 {
917 goto failure;
918 }
891 } 919 }
892 while (p < p_max); 920 while (p < p_max);
893 } 921 }
894 922
895 XD3_ASSERT (encode_bits == 0); 923 XD3_ASSERT (output_bits == 0);
896 } 924 }
897 else 925 else
898 { 926 {
@@ -916,17 +944,29 @@ xd3_encode_huff (xd3_stream *stream,
916 944
917 /* Encode: sector size (5 bits) */ 945 /* Encode: sector size (5 bits) */
918 if ((ret = xd3_encode_bits (stream, & output, & bstate, 946 if ((ret = xd3_encode_bits (stream, & output, & bstate,
919 DJW_SECTORSZ_BITS, (sector_size/DJW_SECTORSZ_MULT)-1))) { goto failure; } 947 DJW_SECTORSZ_BITS,
948 (sector_size/DJW_SECTORSZ_MULT)-1)))
949 {
950 goto failure;
951 }
920 952
921 /* Dynamic allocation. */ 953 /* Dynamic allocation. */
922 if (gbest == NULL) 954 if (gbest == NULL)
923 { 955 {
924 if ((gbest = xd3_alloc (stream, gbest_max, 1)) == NULL) { ret = ENOMEM; goto failure; } 956 if ((gbest = xd3_alloc (stream, gbest_max, 1)) == NULL)
957 {
958 ret = ENOMEM;
959 goto failure;
960 }
925 } 961 }
926 962
927 if (gbest_mtf == NULL) 963 if (gbest_mtf == NULL)
928 { 964 {
929 if ((gbest_mtf = xd3_alloc (stream, gbest_max, 1)) == NULL) { ret = ENOMEM; goto failure; } 965 if ((gbest_mtf = xd3_alloc (stream, gbest_max, 1)) == NULL)
966 {
967 ret = ENOMEM;
968 goto failure;
969 }
930 } 970 }
931 971
932 /* OPT: Some of the inner loops can be optimized, as shown in bzip2 */ 972 /* OPT: Some of the inner loops can be optimized, as shown in bzip2 */
@@ -939,13 +979,14 @@ xd3_encode_huff (xd3_stream *stream,
939 979
940 IF_DEBUG1 (usize_t nz = 0); 980 IF_DEBUG1 (usize_t nz = 0);
941 981
942 /* Due to the single-code granularity of this distribution, it may be that we 982 /* Due to the single-code granularity of this distribution, it may
943 * can't generate a distribution for each group. In that case subtract one 983 * be that we can't generate a distribution for each group. In that
944 * group and try again. If (inefficient), we're testing group behavior, so 984 * case subtract one group and try again. If (inefficient), we're
945 * don't mess things up. */ 985 * testing group behavior, so don't mess things up. */
946 if (goal == 0 && !cfg->inefficient) 986 if (goal == 0 && !cfg->inefficient)
947 { 987 {
948 IF_DEBUG1 (DP(RINT "too many groups (%u), dropping one\n", groups)); 988 IF_DEBUG1 (DP(RINT "too many groups (%u), dropping one\n",
989 groups));
949 groups -= 1; 990 groups -= 1;
950 goto regroup; 991 goto regroup;
951 } 992 }
@@ -958,8 +999,10 @@ xd3_encode_huff (xd3_stream *stream,
958 sum += real_freq[sym2++]; 999 sum += real_freq[sym2++];
959 } 1000 }
960 1001
961 IF_DEBUG1(DP(RINT "group %u has symbols %u..%u (%u non-zero) (%u/%u = %.3f)\n", 1002 IF_DEBUG1(DP(RINT "group %u has symbols %u..%u (%u non-zero) "
962 gp, sym1, sym2, nz, sum, input_bytes, sum / (double)input_bytes);); 1003 "(%u/%u = %.3f)\n",
1004 gp, sym1, sym2, nz, sum,
1005 input_bytes, sum / (double)input_bytes););
963 1006
964 for (s = 0; s < ALPHABET_SIZE; s += 1) 1007 for (s = 0; s < ALPHABET_SIZE; s += 1)
965 { 1008 {
@@ -1040,7 +1083,7 @@ xd3_encode_huff (xd3_stream *stream,
1040 XD3_ASSERT (gbest_no == gbest_max); 1083 XD3_ASSERT (gbest_no == gbest_max);
1041 1084
1042 /* Recompute code lengths. */ 1085 /* Recompute code lengths. */
1043 encode_bits = 0; 1086 output_bits = 0;
1044 for (gp = 0; gp < groups; gp += 1) 1087 for (gp = 0; gp < groups; gp += 1)
1045 { 1088 {
1046 int i; 1089 int i;
@@ -1049,11 +1092,12 @@ xd3_encode_huff (xd3_stream *stream,
1049 1092
1050 memset (evolve_zero, 0, sizeof (evolve_zero)); 1093 memset (evolve_zero, 0, sizeof (evolve_zero));
1051 1094
1052 /* Cannot allow a zero clen when the real frequency is non-zero. Note: this 1095 /* Cannot allow a zero clen when the real frequency is non-zero.
1053 * means we are going to encode a fairly long code for these unused entries. An 1096 * Note: this means we are going to encode a fairly long code for
1054 * improvement would be to implement a NOTUSED code for when these are actually 1097 * these unused entries. An improvement would be to implement a
1055 * zero, but this requires another data structure (evolve_zero) since we don't 1098 * NOTUSED code for when these are actually zero, but this requires
1056 * know when evolve_freq[i] == 0... Briefly tested, looked worse. */ 1099 * another data structure (evolve_zero) since we don't know when
1100 * evolve_freq[i] == 0... Briefly tested, looked worse. */
1057 for (i = 0; i < ALPHABET_SIZE; i += 1) 1101 for (i = 0; i < ALPHABET_SIZE; i += 1)
1058 { 1102 {
1059 if (evolve_freq[gp][i] == 0 && real_freq[i] != 0) 1103 if (evolve_freq[gp][i] == 0 && real_freq[i] != 0)
@@ -1064,46 +1108,53 @@ xd3_encode_huff (xd3_stream *stream,
1064 } 1108 }
1065 } 1109 }
1066 1110
1067 encode_bits += djw_build_prefix (evolve_freq[gp], evolve_clen[gp], ALPHABET_SIZE, DJW_MAX_CODELEN); 1111 output_bits += djw_build_prefix (evolve_freq[gp], evolve_clen[gp],
1112 ALPHABET_SIZE, DJW_MAX_CODELEN);
1068 1113
1069 /* The above faking of frequencies does not matter for the last iteration, but 1114 /* The above faking of frequencies does not matter for the last
1070 * we don't know when that is yet. However, it also breaks the encode_bits 1115 * iteration, but we don't know when that is yet. However, it also
1071 * computation. Necessary for accuracy, and for the (encode_bits==0) assert 1116 * breaks the output_bits computation. Necessary for accuracy, and
1072 * after all bits are output. */ 1117 * for the (output_bits==0) assert after all bits are output. */
1073 if (any_zeros) 1118 if (any_zeros)
1074 { 1119 {
1075 IF_DEBUG1 (usize_t save_total = encode_bits); 1120 IF_DEBUG1 (usize_t save_total = output_bits);
1076 1121
1077 for (i = 0; i < ALPHABET_SIZE; i += 1) 1122 for (i = 0; i < ALPHABET_SIZE; i += 1)
1078 { 1123 {
1079 if (evolve_zero[i]) { encode_bits -= evolve_clen[gp][i]; } 1124 if (evolve_zero[i]) { output_bits -= evolve_clen[gp][i]; }
1080 } 1125 }
1081 1126
1082 IF_DEBUG1 (DP(RINT "evolve_zero reduced %u bits in group %u\n", save_total - encode_bits, gp)); 1127 IF_DEBUG1 (DP(RINT "evolve_zero reduced %u bits in group %u\n",
1128 save_total - output_bits, gp));
1083 } 1129 }
1084 } 1130 }
1085 1131
1086 IF_DEBUG1( 1132 IF_DEBUG1(
1087 DP(RINT "pass %u total bits: %u group uses: ", niter, encode_bits); 1133 DP(RINT "pass %u total bits: %u group uses: ", niter, output_bits);
1088 for (gp = 0; gp < groups; gp += 1) { DP(RINT "%u ", gcount[gp]); } 1134 for (gp = 0; gp < groups; gp += 1) { DP(RINT "%u ", gcount[gp]); }
1089 DP(RINT "\n");); 1135 DP(RINT "\n");
1136 );
1090 1137
1091 /* End iteration. (The following assertion proved invalid.) */ 1138 /* End iteration. */
1092 /*XD3_ASSERT (niter == 1 || best_bits >= encode_bits);*/
1093 1139
1094 IF_DEBUG1 (if (niter > 1 && best_bits < encode_bits) { 1140 IF_DEBUG1 (if (niter > 1 && best_bits < output_bits) {
1095 DP(RINT "iteration lost %u bits\n", encode_bits - best_bits); }); 1141 DP(RINT "iteration lost %u bits\n", output_bits - best_bits); });
1096 1142
1097 if (niter == 1 || (niter < DJW_MAX_ITER && (best_bits - encode_bits) >= DJW_MIN_IMPROVEMENT)) 1143 if (niter == 1 || (niter < DJW_MAX_ITER &&
1144 (best_bits - output_bits) >= DJW_MIN_IMPROVEMENT))
1098 { 1145 {
1099 best_bits = encode_bits; 1146 best_bits = output_bits;
1100 goto repeat; 1147 goto repeat;
1101 } 1148 }
1102 1149
1103 /* Efficiency check. */ 1150 /* Efficiency check. */
1104 if (encode_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; } 1151 if (output_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient)
1152 {
1153 goto nosecond;
1154 }
1105 1155
1106 IF_DEBUG1 (DP(RINT "djw compression: %u -> %0.3f\n", input_bytes, encode_bits / 8.0)); 1156 IF_DEBUG1 (DP(RINT "djw compression: %u -> %0.3f\n",
1157 input_bytes, output_bits / 8.0));
1107 1158
1108 /* Encode: prefix */ 1159 /* Encode: prefix */
1109 { 1160 {
@@ -1117,7 +1168,10 @@ xd3_encode_huff (xd3_stream *stream,
1117 prefix.repcnt = prefix_repcnt; 1168 prefix.repcnt = prefix_repcnt;
1118 1169
1119 djw_compute_multi_prefix (groups, evolve_clen, & prefix); 1170 djw_compute_multi_prefix (groups, evolve_clen, & prefix);
1120 if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix))) { goto failure; } 1171 if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix)))
1172 {
1173 goto failure;
1174 }
1121 } 1175 }
1122 1176
1123 /* Encode: selector frequencies */ 1177 /* Encode: selector frequencies */
@@ -1139,7 +1193,10 @@ xd3_encode_huff (xd3_stream *stream,
1139 for (i = 0; i < groups+1; i += 1) 1193 for (i = 0; i < groups+1; i += 1)
1140 { 1194 {
1141 if ((ret = xd3_encode_bits (stream, & output, & bstate, 1195 if ((ret = xd3_encode_bits (stream, & output, & bstate,
1142 DJW_GBCLEN_BITS, gbest_clen[i]))) { goto failure; } 1196 DJW_GBCLEN_BITS, gbest_clen[i])))
1197 {
1198 goto failure;
1199 }
1143 } 1200 }
1144 1201
1145 for (i = 0; i < gbest_prefix.mcount; i += 1) 1202 for (i = 0; i < gbest_prefix.mcount; i += 1)
@@ -1151,7 +1208,10 @@ xd3_encode_huff (xd3_stream *stream,
1151 XD3_ASSERT (gp_mtf < groups+1); 1208 XD3_ASSERT (gp_mtf < groups+1);
1152 1209
1153 if ((ret = xd3_encode_bits (stream, & output, & bstate, 1210 if ((ret = xd3_encode_bits (stream, & output, & bstate,
1154 gp_sel_bits, gp_sel_code))) { goto failure; } 1211 gp_sel_bits, gp_sel_code)))
1212 {
1213 goto failure;
1214 }
1155 1215
1156 IF_DEBUG (select_bits -= gp_sel_bits); 1216 IF_DEBUG (select_bits -= gp_sel_bits);
1157 } 1217 }
@@ -1160,8 +1220,11 @@ xd3_encode_huff (xd3_stream *stream,
1160 } 1220 }
1161 1221
1162 /* Efficiency check. */ 1222 /* Efficiency check. */
1163 if (encode_bits + select_bits + (8 * output->next) + 1223 if (output_bits + select_bits + (8 * output->next) +
1164 EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; } 1224 EFFICIENCY_BITS >= input_bits && ! cfg->inefficient)
1225 {
1226 goto nosecond;
1227 }
1165 1228
1166 /* Encode: data */ 1229 /* Encode: data */
1167 { 1230 {
@@ -1171,7 +1234,8 @@ xd3_encode_huff (xd3_stream *stream,
1171 /* Build code tables for each group. */ 1234 /* Build code tables for each group. */
1172 for (gp = 0; gp < groups; gp += 1) 1235 for (gp = 0; gp < groups; gp += 1)
1173 { 1236 {
1174 djw_build_codes (evolve_code[gp], evolve_clen[gp], ALPHABET_SIZE, DJW_MAX_CODELEN); 1237 djw_build_codes (evolve_code[gp], evolve_clen[gp],
1238 ALPHABET_SIZE, DJW_MAX_CODELEN);
1175 } 1239 }
1176 1240
1177 /* Now loop over the input. */ 1241 /* Now loop over the input. */
@@ -1196,9 +1260,13 @@ xd3_encode_huff (xd3_stream *stream,
1196 usize_t bits = gp_clens[sym]; 1260 usize_t bits = gp_clens[sym];
1197 usize_t code = gp_codes[sym]; 1261 usize_t code = gp_codes[sym];
1198 1262
1199 IF_DEBUG (encode_bits -= bits); 1263 IF_DEBUG (output_bits -= bits);
1200 1264
1201 if ((ret = xd3_encode_bits (stream, & output, & bstate, bits, code))) { goto failure; } 1265 if ((ret = xd3_encode_bits (stream, & output, & bstate,
1266 bits, code)))
1267 {
1268 goto failure;
1269 }
1202 1270
1203 GP_PAGE (); 1271 GP_PAGE ();
1204 } 1272 }
@@ -1206,9 +1274,7 @@ xd3_encode_huff (xd3_stream *stream,
1206 while (in != NULL); 1274 while (in != NULL);
1207 1275
1208 XD3_ASSERT (select_bits == 0); 1276 XD3_ASSERT (select_bits == 0);
1209 XD3_ASSERT (encode_bits == 0); 1277 XD3_ASSERT (output_bits == 0);
1210
1211#undef evolve_clen
1212 } 1278 }
1213 } 1279 }
1214 1280