/* -*- Mode: C;-*- * * This file is part of XDelta - A binary delta generator. * * Copyright (C) 1997, 1998, 1999, 2001 Josh MacDonald * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * Author: Josh MacDonald * * $Id: xdelta.c 1.4.1.50.1.3 Sun, 24 Feb 2002 11:30:34 -0800 jmacd $ */ #define G_DISABLE_ASSERT #include #include #include #include "xdelta.h" #include "xdeltapriv.h" /* $Format: "const guint xdelta_major_version = $ReleaseMajorVersion$;" $ */ const guint xdelta_major_version = 1; /* $Format: "const guint xdelta_minor_version = $ReleaseMinorVersion$;" $ */ const guint xdelta_minor_version = 1; /* $Format: "const guint xdelta_micro_version = $ReleaseMicroVersion$;" $ */ const guint xdelta_micro_version = 4; /* Control functions. */ static XdeltaControl* control_version_0 (SerialVersion0Control* cont) ; static void control_copy (XdeltaControl* cont, XdeltaSource* src, guint from, guint to); static gboolean control_add_info (XdeltaControl* cont, XdeltaSource* src, const guint8* md5, guint len); #ifndef XDELTA_HARDCODE_SIZE int QUERY_SIZE = 0; int QUERY_SIZE_POW = 0; int QUERY_SIZE_MASK = 0; #endif int xdp_set_query_size_pow (int size_pow) { #ifdef XDELTA_HARDCODE_SIZE return XDP_QUERY_HARDCODED; #else int x = 1; int size_log = 0; for (; x != 0; x <<= 1, size_log += 1) { if (x == size_pow) goto good; } return XDP_QUERY_POW2; good: QUERY_SIZE = size_log; QUERY_SIZE_POW = size_pow; QUERY_SIZE_MASK = size_pow-1; return 0; #endif } int xdp_blocksize () { if (QUERY_SIZE == 0) { xdp_set_query_size_pow (1<low = low; cksum->high = high; } /* Generate checksums */ static gboolean generate_checksums (XdeltaStream *stream, XdeltaSource *source) { gint total_checksums = handle_length (stream) / QUERY_SIZE_POW; gint checksum_index = 0; XdeltaChecksum cksum; XdeltaChecksum *result; const guint8* segment = NULL, *segment_pointer; gint segment_len, orig_segment_len; guint segment_page = 0; guint pages; #ifdef DEBUG_CKSUM g_print ("Total base checksums: %d\n", total_checksums); #endif source->ck_count = total_checksums; if (total_checksums == 0) return TRUE; /* This is the in-core FROM checksum table. */ result = g_new (XdeltaChecksum, total_checksums); source->cksums = result; for (pages = handle_pages (stream); segment_page <= pages; segment_page += 1) { segment_len = handle_map_page (stream, segment_page, &segment); if (segment_len < 0) return FALSE; orig_segment_len = segment_len; segment_len >>= QUERY_SIZE; for (segment_pointer = segment; segment_len != 0; segment_len -= 1, segment_pointer += QUERY_SIZE_POW) { /* note: cheating at the boundaries */ init_query_checksum (segment_pointer, &cksum); #ifdef DEBUG_CKSUM g_print ("New cksum %04x %04x indices %d-%d\n", cksum.low, cksum.high, checksum_index * QUERY_SIZE_POW, (checksum_index * QUERY_SIZE_POW) + QUERY_SIZE_POW - 1); #endif result[checksum_index++] = cksum; } if (! handle_unmap_page (stream, segment_page, &segment)) return FALSE; } return TRUE; } /* $Format: "#define XDELTA_REQUIRED_VERSION \"$ReleaseMajorVersion$.$ReleaseMinorVersion$.\"" $ */ #define XDELTA_REQUIRED_VERSION "1.1." XdeltaGenerator* __xdp_generator_new (const char* version) { XdeltaGenerator* xg; if (strncmp (version, XDELTA_REQUIRED_VERSION, strlen (XDELTA_REQUIRED_VERSION)) != 0) g_error ("XDelta library version mismatched, compiled for %s, running %s\n", version, XDELTA_VERSION); xg = g_new0 (XdeltaGenerator, 1); xg->sources = g_ptr_array_new (); xg->data_source = g_new0 (XdeltaSource, 1); xg->data_source->name = "(patch data)"; g_ptr_array_add (xg->sources, xg->data_source); return xg; } void xdp_generator_free (XdeltaGenerator *gen) { int i; for (i = 0; i < gen->sources->len; i += 1) xdp_source_free (gen->sources->pdata[i]); g_ptr_array_free (gen->sources, TRUE); g_free ((void*) gen->table); g_free (gen); } void init_pos (XdeltaStream* str, XdeltaPos* pos) { g_assert (str); memset (pos, 0, sizeof (*pos)); pos->page_size = handle_pagesize (str); } gboolean check_stream_integrity (XdeltaStream* str, const guint8* md5, guint len) { const guint8* act_md5; if (len != handle_length (str)) { xd_generate_handleintint_event (EC_XdStreamLengthFailed, str, len, handle_length (str)); return FALSE; } act_md5 = handle_checksum_md5 (str); if (! act_md5) return FALSE; if (memcmp (md5, act_md5, 16) != 0) { char exp[33], rec[33]; edsio_md5_to_string (md5, exp); edsio_md5_to_string (act_md5, rec); xd_generate_handlestringstring_event (EC_XdStreamChecksumFailed, str, exp, rec); g_free ((void*) act_md5); return FALSE; } g_free ((void*) act_md5); return TRUE; } static gboolean xdp_source_index_read (XdeltaSource *xs, XdeltaStream *index_in) { SerialSource *ss = handle_source (index_in); XdeltaIndex *index; if (! ss) return FALSE; /* TODO: free ss */ if (! unserialize_xdeltaindex (ss, &index)) return FALSE; if (! check_stream_integrity (xs->source_in, index->file_md5, index->file_len)) return FALSE; xs->ck_count = index->index_len; xs->cksums = index->index; return TRUE; } static gboolean xdp_source_index_internal (XdeltaSource *init, XdeltaStream *source_in, XdeltaOutStream *index_out) { if (! generate_checksums (source_in, init)) return FALSE; if (index_out) { const guint8* source_in_md5; SerialSink* sink = handle_sink (index_out, NULL, NULL, NULL, NULL); if (! sink) return FALSE; if (! (source_in_md5 = handle_checksum_md5 (source_in))) return FALSE; if (! serialize_xdeltaindex (sink, handle_length (source_in), source_in_md5, init->ck_count, init->cksums)) { g_free ((void*) source_in_md5); return FALSE; } g_free ((void*) source_in_md5); if (! handle_close (index_out, 0)) return FALSE; } return TRUE; } gboolean xdp_source_index (XdeltaStream *source_in, XdeltaOutStream *index_out) { XdeltaSource* xs = xdp_source_new ("(ignored)", source_in, NULL, index_out); if (xs) { xdp_source_free (xs); return TRUE; } return FALSE; } XdeltaSource* xdp_source_new (const char *name, XdeltaStream *source_in, XdeltaStream *index_in, XdeltaOutStream *index_out) { XdeltaSource* xs = g_new0 (XdeltaSource, 1); xs->source_in = source_in; xs->name = name; g_return_val_if_fail (source_in, NULL); g_return_val_if_fail (index_in ? ! index_out : TRUE, NULL); xs->index_in = index_in; xs->index_out = index_out; xs->source_pos.page_size = handle_pagesize (source_in); return xs; } static gboolean xdp_source_check_index (XdeltaSource *xs) { if (xs->source_index == 0) return TRUE; if (! xs->index_in) return xdp_source_index_internal (xs, xs->source_in, xs->index_out); else return xdp_source_index_read (xs, xs->index_in); } void xdp_source_free (XdeltaSource* xs) { if (xs) { /* if (xs->ckarray) @@@ this is troublesome now g_free (xs->ckarray);*/ g_free (xs); } } void xdp_source_add (XdeltaGenerator *gen, XdeltaSource *src) { if (gen->table) { g_free ((gpointer)gen->table); gen->table = NULL; } g_ptr_array_add (gen->sources, src); } guint c_hash (const XdeltaChecksum* c) { const guint high = c->high; const guint low = c->low; const guint it = (high >> 2) + (low << 3) + (high << 16); return (it ^ high ^ low); } static gboolean region_insert (XdeltaGenerator* gen, const XdeltaPos *xpos, guint len) { /* This is a little bit cryptic: the xpos.mem + EXPR expression * computes the offset into the current page, which is guaranteed * to be correct since map_page has not occured yet. */ const guint8* buf = xpos->mem + (gen->to_output_pos % xpos->page_size); if (len == 0) return TRUE; #ifdef DEBUG_INST g_print ("insert %d at %d\n", len, gen->to_output_pos); #endif if (! handle_write (gen->data_out, buf, len)) return FALSE; control_copy (gen->control, gen->data_source, gen->data_output_pos, gen->data_output_pos + len); gen->to_output_pos += len; gen->data_output_pos += len; return TRUE; } static gboolean region_copy (XdeltaGenerator* gen, XdeltaSource* src, guint from, guint to) { #ifdef DEBUG_INST g_print ("copy %d - %d (%d) to %d\n", from, to, to-from, gen->to_output_pos); #endif control_copy (gen->control, src, from, to); gen->to_output_pos += (to-from); return TRUE; } gboolean unmap_page (XdeltaStream* stream, XdeltaPos* pos) { if (! pos->mem) return TRUE; if (handle_unmap_page (stream, pos->mem_page, &pos->mem)) { pos->mem = NULL; return TRUE; } return FALSE; } gboolean map_page (XdeltaStream* stream, XdeltaPos* pos) { gint on_page; if (pos->mem && pos->mem_page == pos->page) return TRUE; if (pos->mem) { if (! handle_unmap_page (stream, pos->mem_page, &pos->mem)) return FALSE; pos->mem = NULL; } pos->mem_page = pos->page; on_page = handle_map_page (stream, pos->mem_page, &pos->mem); if (on_page >= 0) { pos->mem_rem = on_page; return TRUE; } return FALSE; } static gboolean try_match (XdeltaGenerator *gen, XdeltaStream *in, XdeltaPos *xpos_ptr, XdeltaSource *src, guint src_offset, gboolean *found) { XdeltaPos xpos = *xpos_ptr; XdeltaPos ypos = src->source_pos; gint rem, remsave; gint match_forward = 0; gint match_backward = 0; gint match_forward_max; gint match_backward_max; guint to_offset = XPOS (xpos); gboolean one_insert = FALSE; *found = FALSE; ypos.page = src_offset / ypos.page_size; ypos.off = src_offset % ypos.page_size; match_forward_max = MIN (handle_length (in) - to_offset, handle_length (src->source_in) - src_offset); match_backward_max = MIN (src_offset, to_offset - gen->to_output_pos); /* Don't allow backward paging */ match_backward_max = MIN (match_backward_max, xpos.off); /* We're testing against the negative below. */ match_backward_max = - match_backward_max; for (; match_backward > match_backward_max; ) { g_assert (xpos.off != 0); if (ypos.off == 0) { ypos.off = ypos.page_size; ypos.page -= 1; } if (! map_page (src->source_in, &ypos)) goto bail; rem = MIN (xpos.off, ypos.off); rem = MIN (match_backward - match_backward_max, rem); for (; rem > 0; rem -= 1, match_backward -= 1) { if (xpos.mem[xpos.off-1] != ypos.mem[ypos.off-1]) goto doneback; xpos.off -= 1; ypos.off -= 1; } } doneback: xpos.page = to_offset / xpos.page_size; xpos.off = to_offset % xpos.page_size; ypos.page = src_offset / ypos.page_size; ypos.off = src_offset % ypos.page_size; for (; match_forward < match_forward_max; ) { if (! map_page (src->source_in, &ypos)) goto bail; /* Fortunately, if this map happens it means that the match must * be long enough to succeed below, therefore it is safe to write * the insert out now. */ if (! one_insert && xpos.page != xpos.mem_page) { one_insert = TRUE; if (! region_insert (gen, &xpos, (to_offset + match_backward) - gen->to_output_pos)) goto bail; } if (! map_page (in, &xpos)) goto bail; rem = MIN (xpos.mem_rem - xpos.off, ypos.mem_rem - ypos.off); rem = MIN (match_forward_max - match_forward, rem); /* Do a int-wise comparison if the regions are aligned. */ if (rem > (4*sizeof(int)) && (xpos.off % sizeof (int)) == (ypos.off % sizeof(int))) { gint is; const int *xi, *yi; for (; xpos.off % sizeof(int); rem -= 1, match_forward += 1) { if (xpos.mem[xpos.off] != ypos.mem[ypos.off]) goto done; xpos.off += 1; ypos.off += 1; } remsave = rem; rem /= sizeof(int); xi = (const int*) (xpos.mem + xpos.off); yi = (const int*) (ypos.mem + ypos.off); is = rem; for (; rem > 0 && *xi == *yi; ) { rem -= 1; xi += 1; yi += 1; } is -= rem; match_forward += is * sizeof(int); xpos.off += is * sizeof(int); ypos.off += is * sizeof(int); rem = (rem * sizeof(int)) + (remsave % sizeof(int)); } for (; rem > 0; rem -= 1, match_forward += 1) { if (xpos.mem[xpos.off] != ypos.mem[ypos.off]) goto done; xpos.off += 1; ypos.off += 1; } FLIP_FORWARD (xpos); FLIP_FORWARD (ypos); } done: if (match_forward - match_backward >= QUERY_SIZE_POW) { *found = TRUE; if (! one_insert) { if (! region_insert (gen, &xpos, (to_offset + match_backward) - gen->to_output_pos)) goto bail; } if (! region_copy (gen, src, src_offset + match_backward, src_offset + match_forward)) goto bail; } else { g_assert (! one_insert); } *xpos_ptr = xpos; src->source_pos = ypos; return TRUE; bail: *xpos_ptr = xpos; src->source_pos = ypos; return FALSE; } static gboolean compute_copies (XdeltaGenerator* gen, XdeltaStream* stream) { XdeltaChecksum cksum; const XdeltaChecksum *source_cksum; const guint8 *segment_pointer; guint source_offset, segment_index, index, prime = gen->table_size; guint source_index; const guint32* table = gen->table; guint16 old_c, new_c; guint save_page, save_off; #ifdef DEBUG_MATCH_PRINT guint i; #endif XdeltaPos xpos; gboolean found; gboolean ret = TRUE; if (handle_length (stream) < QUERY_SIZE_POW) return TRUE; init_pos (stream, &xpos); while (XPOS (xpos) <= (handle_length (stream) - QUERY_SIZE_POW)) { if (!map_page (stream, &xpos)) return FALSE; g_assert (xpos.mem_rem > xpos.off); segment_index = (xpos.mem_rem - xpos.off); if (segment_index < QUERY_SIZE_POW) goto nextpage; segment_index -= QUERY_SIZE_POW; segment_pointer = xpos.mem + xpos.off; init_query_checksum (segment_pointer, &cksum); for (; ; segment_pointer += 1) { #ifdef DEBUG_CKSUM_UPDATE XdeltaChecksum cktest; init_query_checksum (segment_pointer, &cktest); if (cktest.high != cksum.high || cktest.low != cktest.low) abort (); #endif index = c_hash (&cksum) % prime; #ifdef DEBUG_MATCH_PRINT g_print ("%d: searching for match \"", XPOS(xpos)); for (i = 0; i < QUERY_SIZE_POW; i += 1) { if (isprint (segment_pointer[i])) g_print ("%c", segment_pointer[i]); else g_print ("\\0%o", segment_pointer[i]); } g_print ("\"... %s\n", table[index] ? "found" : "notfound"); #endif if (table[index]) { source_index = (table[index] & QUERY_SIZE_MASK) - 1; source_offset = (table[index] >> QUERY_SIZE); source_cksum = ((XdeltaSource*)gen->sources->pdata[source_index])->cksums + source_offset; if (cksum.high == source_cksum->high && cksum.low == source_cksum->low) { save_page = xpos.page; save_off = xpos.off; if (! try_match (gen, stream, &xpos, gen->sources->pdata[source_index], source_offset << QUERY_SIZE, &found)) { ret = FALSE; goto bail; } if (found) { g_assert (xpos.page*xpos.page_size+xpos.off == gen->to_output_pos); goto reenter; } else { xpos.page = save_page; xpos.off = save_off; } } } if (segment_index == 0) goto nextpage; segment_index -= 1; xpos.off += 1; old_c = CHEW(segment_pointer[0]); new_c = CHEW(segment_pointer[QUERY_SIZE_POW]); cksum.low -= old_c; cksum.low += new_c; cksum.high -= old_c << QUERY_SIZE; cksum.high += cksum.low; } nextpage: if (xpos.mem_rem < xpos.page_size) break; xpos.page += 1; xpos.off = 0; if (xpos.page != xpos.mem_page) { if (! region_insert (gen, &xpos, XPOS (xpos) - gen->to_output_pos)) return FALSE; } reenter: (void) 0; } xpos.off = gen->to_output_pos % handle_pagesize (stream); while (gen->to_output_pos < handle_length (stream)) { if (! map_page (stream, &xpos)) return FALSE; if (! region_insert (gen, &xpos, xpos.mem_rem - xpos.off)) ret = FALSE; xpos.off = 0; xpos.page += 1; } bail: if (! unmap_page (stream, &xpos)) return FALSE; return ret; } static gboolean just_output (XdeltaGenerator *gen, XdeltaStream *in) { XdeltaPos pos; init_pos (in, &pos); while (gen->to_output_pos < handle_length (in)) { if (! map_page (in, &pos)) return FALSE; if (! region_insert (gen, &pos, pos.mem_rem - pos.off)) return FALSE; pos.off = 0; pos.page += 1; } if (! unmap_page (in, &pos)) return FALSE; return TRUE; } /* Clobbering decision (see below): * * Algorithm A: Clobber it always (its fast!). The problem * is that this prefers matches at the front of * the file and leads to poor matches at the back * of the file (assuming I insert going backwards). * * Algorithm B: Keep a table of how many times there has * been a clobber at each index i, C[i]. * With probability 1/(C[i]+1), replace the * previous entry. This gives a uniform * probability of each entry surviving. * The problem (supposed) with this * algorithm is that the probabilities * should not be uniform (though uniform is * better than A) because there are more * chances to match a segment at the end of * the file than at the beginning. * * Algorithm C: Give a linear weight to each match * according to it's position in the file * -- number the segments from N down to 1 * starting at the beginning. Same as the * above but with a different weight. The * weight for segment i, match at checksum * offset k, follows. The total number of * checksums in the segment is C_i, * therefore the total checksum count is * C = sum_i (C_i). * Now the interior weight is the (C_i-k) * (the linear assumption) and the total * interior weight is sum_{j=1}^{N}{j}=(N)(N+1)/2 * so the kth segment has interior weight * * [2 (C_i - k)] / [(C_i) (C_i + 1)] * * add in the exterior weight (after * cancelling a C_i): * * w(i,k) = [2 (C_i - k)] / [(C_i + 1) (C)] * * Now, as above, we will compute whether to * keep or replace the current value at the j-th * decision. Let R_j be the running sum of * weights considered so far. R_0 = 0. At the * j-th decision, * * P_ikj(use new value) = w(i,k)/R_{j+1} * R_{j+1} = R_j + w(i,k) */ static gboolean xdp_generate_delta_int (XdeltaGenerator *gen, XdeltaStream *in, XdeltaOutStream *control_out, XdeltaOutStream *data_out) { gint i, j, total_from_ck_count = 0, prime = 0, index = 0; gint total_from_len = 0; guint32* table = NULL; if (QUERY_SIZE == 0) { xdp_set_query_size_pow (1<sources->len == 0) { xd_generate_void_event (EC_XdTooFewSources); return FALSE; } for (i = 0; i < gen->sources->len; i += 1) { XdeltaSource* src = gen->sources->pdata[i]; src->used = FALSE; src->sequential = TRUE; src->position = 0; src->source_index = i; if (src->source_index != 0) total_from_len += handle_length (src->source_in); } /* QUERY_SIZE_POW is the number of elements freed in the cksum hash * table for storing segment number + (offset/QUERY_SIZE_POW) in. 1 * for the zero + 1 for the data segment */ if (gen->sources->len > (QUERY_SIZE_POW-2)) { xd_generate_void_event (EC_XdTooManySources); return FALSE; } if (handle_length (in) < QUERY_SIZE_POW || total_from_len < QUERY_SIZE_POW) { if (! just_output (gen, in)) return FALSE; } else { for (i = 0; i < gen->sources->len; i += 1) { XdeltaSource* xs = (XdeltaSource*)gen->sources->pdata[i]; if (! xdp_source_check_index (xs)) return FALSE; total_from_ck_count += xs->ck_count; } prime = g_spaced_primes_closest (total_from_ck_count); gen->table = table = g_new0 (guint32, prime); gen->table_size = prime; for (i = 0; i < gen->sources->len; i += 1) { XdeltaSource* xs = (XdeltaSource*)gen->sources->pdata[i]; for (j = xs->ck_count-1; j >= 0; j -= 1) { index = c_hash (xs->cksums + j) % prime; #ifdef DEBUG_HASH gen->hash_entries += 1; if (table[index]) { gen->hash_conflicts += 1; /*regions_similar (gen, i, j, (table[index] & QUERY_SIZE_MASK) - 1, table[index] >> QUERY_SIZE);*/ } #endif /* This is the real code */ table[index] = (j << QUERY_SIZE) + 1 + i; } } #ifdef DEBUG_HASH for (i = 0; i < prime; i += 1) { if (gen->table[i]) gen->hash_fill += 1; } g_print ("*** Hash stats:\n"); g_print ("Hash conflicts: %d\n", gen->hash_conflicts); g_print ("Hash real conflicts: %d\n", gen->hash_real_conflicts); g_print ("Hash real real conflicts: %d\n", gen->hash_real_real_conflicts); g_print ("Hash fill: %d\n", gen->hash_fill); g_print ("Hash size: %d\n", gen->table_size); g_print ("Hash entries: %d\n", gen->hash_entries); g_print ("Hash fill/entries: %f\n", (float)gen->hash_fill/(float)gen->hash_entries); #endif if (! compute_copies (gen, in)) return FALSE; } return TRUE; } XdeltaControl* xdp_generate_delta (XdeltaGenerator *gen, XdeltaStream *in, XdeltaOutStream *control_out, XdeltaOutStream *data_out) { gint i; const guint8* in_md5; const guint8* data_out_md5; gen->data_out = data_out; gen->control_out = control_out; gen->control = control_new (); if (! xdp_generate_delta_int (gen, in, control_out, data_out)) return NULL; if (! handle_close (data_out, 0)) return NULL; if (! (in_md5 = handle_checksum_md5 (in))) return FALSE; if (! (data_out_md5 = handle_checksum_md5 (data_out))) return FALSE; gen->control->has_data = gen->data_source->used; gen->control->inst = &g_array_index (gen->control->inst_array, XdeltaInstruction, 0); gen->control->inst_len = gen->control->inst_array->len; gen->control->to_len = handle_length (in); memcpy (gen->control->to_md5, in_md5, 16); for (i = 0; i < gen->sources->len; i += 1) { XdeltaSource* src = gen->sources->pdata[i]; const guint8* md5; guint len; if (src->source_in) { if (! (md5 = handle_checksum_md5 (src->source_in))) return FALSE; len = handle_length (src->source_in); } else { len = handle_length (data_out); md5 = data_out_md5; } if (! control_add_info (gen->control, src, md5, len)) return NULL; } gen->control->source_info = (XdeltaSourceInfo**) gen->control->source_info_array->pdata; gen->control->source_info_len = gen->control->source_info_array->len; if (control_out && ! xdp_control_write (gen->control, control_out)) return NULL; return gen->control; } /* Below here boring details mostly to do with reading and writing * control. */ XdeltaControl* control_new (void) { XdeltaControl* it = g_new0 (XdeltaControl, 1); it->inst_array = g_array_new (FALSE, FALSE, sizeof (XdeltaInstruction)); it->source_info_array = g_ptr_array_new (); return it; } static void control_reindex (XdeltaControl* cont, XdeltaSource* src) { gint i; gint new_index = cont->source_info_array->len; for (i = 0; i < cont->inst_len; i += 1) { XdeltaInstruction* inst = cont->inst + i; if (inst->index == src->source_index) inst->index = new_index; } } gboolean control_add_info (XdeltaControl* cont, XdeltaSource* src, const guint8* md5, guint len) { XdeltaSourceInfo* si; if (! src->used) return TRUE; si = g_new0 (XdeltaSourceInfo, 1); si->name = src->name; si->sequential = src->sequential; si->len = len; si->isdata = (src->source_index == 0); memcpy (si->md5, md5, 16); control_reindex (cont, src); g_ptr_array_add (cont->source_info_array, si); return TRUE; } void xdp_control_free (XdeltaControl* cont) { if (cont->source_info_array) g_ptr_array_free (cont->source_info_array, TRUE); if (cont->inst_array) g_array_free (cont->inst_array, TRUE); g_free (cont); } void control_copy (XdeltaControl* cont, XdeltaSource* src, guint from, guint to) { XdeltaInstruction i; if (cont->inst_array->len > 0) { XdeltaInstruction* oi = & g_array_index (cont->inst_array, XdeltaInstruction, cont->inst_array->len-1); if (oi->index == src->source_index && (oi->offset + oi->length) == from) { oi->length += (to - from); return; } } i.index = src->source_index; i.offset = from; i.length = to - from; src->used = TRUE; if (src->position != from) src->sequential = FALSE; src->position = to; g_array_append_val (cont->inst_array, i); } #ifdef DEBUG_CONT static void print_info (XdeltaSourceInfo* si) { char md5str[33]; edsio_md5_to_string (si->md5, md5str); g_print (" ** info\n"); g_print (" md5: %s\n", md5str); g_print (" len: %d\n", si->length); } static void print_inst (XdeltaInstruction* i) { switch (i->type) { case INST_TYPE_COPY: g_print (" copy (%c) %d-%d (%d) from %d\n", i->type, i->offset, i->offset + i->length, i->length, i->index); break; case INST_TYPE_INSERT: g_print (" insert %d\n", i->length); break; } } static void xdp_print_control (XdeltaControl *cont) { gint i; g_print ("*** control\n"); g_print (" data len: %d\n", cont->data_len); print_info (&cont->to_info); g_print (" source info len: %d\n", cont->source_info_len); for (i = 0; i < cont->source_info_len; i += 1) print_info (cont->source_info[i]); g_print (" inst len: %d\n", cont->inst_len); for (i = 0; i < cont->inst_len; i += 1) print_inst (cont->inst + i); } #endif #ifdef DEBUG_CHECK_CONTROL void check_control (XdeltaControl* cont) { gint i; for (i = 0; i < cont->inst_len; i += 1) { switch (cont->inst[i].type) { case INST_TYPE_NCOPY: case INST_TYPE_COPY: if (cont->inst[i].index >= cont->source_info_len) g_error ("control has a too high instruction index\n"); } } } #endif static gboolean unpack_instructions (XdeltaControl* cont) { gint i; guint output_pos = 0; for (i = 0; i < cont->source_info_len; i += 1) { XdeltaSourceInfo* info = cont->source_info[i]; info->position = 0; info->copies = 0; info->copy_length = 0; } for (i = 0; i < cont->inst_len; i += 1) { XdeltaSourceInfo* info; XdeltaInstruction *inst = cont->inst + i; if (inst->index >= cont->source_info_len) { xd_generate_int_event (EC_XdOutOfRangeSourceIndex, inst->index); return FALSE; } info = cont->source_info[inst->index]; if (info->sequential) { inst->offset = info->position; info->position = inst->offset + inst->length; } inst->output_start = output_pos; output_pos += inst->length; info->copies += 1; info->copy_length += inst->length; } return TRUE; } static gboolean pack_instructions (XdeltaControl* cont) { gint i; for (i = 0; i < cont->source_info_len; i += 1) { XdeltaSourceInfo* info = cont->source_info[i]; info->position = 0; info->copies = 0; info->copy_length = 0; } for (i = 0; i < cont->inst_len; i += 1) { XdeltaSourceInfo* info; XdeltaInstruction *inst = cont->inst + i; if (inst->index >= cont->source_info_len) { xd_generate_int_event (EC_XdOutOfRangeSourceIndex, inst->index); return FALSE; } info = cont->source_info[inst->index]; if (info->sequential) { g_assert (info->position == inst->offset); info->position += inst->length; inst->offset = 0; } info->copies += 1; info->copy_length += inst->length; } return TRUE; } gboolean xdp_control_write (XdeltaControl *cont, XdeltaOutStream *cont_out) { SerialSink* sink = handle_sink (cont_out, NULL, NULL, NULL, NULL); #ifdef DEBUG_CONT xdp_print_control (cont); #endif #ifdef DEBUG_CHECK_CONTROL check_control (cont); #endif if (! sink) return FALSE; if (! pack_instructions (cont)) return FALSE; /* @@@ think about how the count function overcounts on the instruction * array by a factor of 2 or more. */ if (! serialize_xdeltacontrol_obj (sink, cont)) return FALSE; if (! handle_close (cont_out, 0)) return FALSE; return TRUE; } XdeltaControl* xdp_control_read (XdeltaStream *cont_in) { SerialSource* src = handle_source (cont_in); XdeltaControl* cont; SerialType type; if (! src) return NULL; /* TODO: free src */ if (! serializeio_unserialize_generic_acceptable (src, ST_XdeltaControl | ST_Version0Control, & type, (void**) & cont)) { /* TODO: the warning below was added in 1.1.5 after a bug report * regarding an invalid delta, which would fail in patch here: * It's unclear whether this is the "right" place to print the * error. */ g_warning ("patch parse error\n"); return NULL; } if (type == ST_Version0Control) { SerialVersion0Control *ocont = (SerialVersion0Control*) cont; xd_generate_string_event (EC_XdBackwardCompatibilityMode, "1.0"); cont = control_version_0 (ocont); g_free (ocont); } if (! unpack_instructions (cont)) return NULL; #ifdef DEBUG_CHECK_CONTROL check_control (cont); #endif return cont; } XdeltaControl* control_version_0 (SerialVersion0Control* ocont) { XdeltaControl* cont = g_new0 (XdeltaControl, 1); gint i; XdeltaSourceInfo* dinfo; g_assert (! ocont->normalized); memcpy (cont->to_md5, ocont->to_info.real_md5, 16); cont->to_len = ocont->to_info.length; cont->has_data = TRUE; cont->source_info_len = ocont->source_info_len + 1; cont->source_info = g_new (XdeltaSourceInfo*, cont->source_info_len); cont->source_info[0] = dinfo = g_new0 (XdeltaSourceInfo, 1); dinfo->name = "(patch data)"; memcpy (dinfo->md5, ocont->to_info.md5, 15); dinfo->len = ocont->data_len; dinfo->isdata = TRUE; dinfo->sequential = FALSE; for (i = 0; i < ocont->source_info_len; i += 1) { XdeltaSourceInfo* info = g_new0 (XdeltaSourceInfo, 1); SerialVersion0SourceInfo* oinfo = ocont->source_info[i]; cont->source_info[i+1] = info; info->name = "unnamed"; memcpy (info->md5, oinfo->md5, 16); info->len = oinfo->length; info->isdata = FALSE; info->sequential = FALSE; } /* The old unpack */ #define OLD_QUERY_SIZE 4 for (i = 0; i < ocont->inst_len; i += 1) { switch (ocont->inst[i].length & 3) { case 0: ocont->inst[i].type = 'N'; break; case 1: ocont->inst[i].type = 'E'; break; case 2: ocont->inst[i].type = 'C'; break; case 3: ocont->inst[i].type = 'I'; break; } ocont->inst[i].length >>= 2; ocont->inst[i].index = ocont->inst[i].length & OLD_QUERY_SIZE; ocont->inst[i].length >>= OLD_QUERY_SIZE; } cont->inst_len = ocont->inst_len; cont->inst = g_new (XdeltaInstruction, cont->inst_len); for (i = 0; i < cont->inst_len; i += 1) { cont->inst[i].length = ocont->inst[i].length; cont->inst[i].offset = ocont->inst[i].offset; switch (ocont->inst[i].type) { case 'N': case 'E': abort (); break; case 'C': g_assert (ocont->inst[i].index == 0); cont->inst[i].index = 1; break; case 'I': cont->inst[i].index = 0; break; } } return cont; }