summaryrefslogtreecommitdiff
path: root/xdelta1/xdelta.c
diff options
context:
space:
mode:
Diffstat (limited to 'xdelta1/xdelta.c')
-rw-r--r--xdelta1/xdelta.c1541
1 files changed, 0 insertions, 1541 deletions
diff --git a/xdelta1/xdelta.c b/xdelta1/xdelta.c
deleted file mode 100644
index 890643d..0000000
--- a/xdelta1/xdelta.c
+++ /dev/null
@@ -1,1541 +0,0 @@
1/* -*- Mode: C;-*-
2 *
3 * This file is part of XDelta - A binary delta generator.
4 *
5 * Copyright (C) 1997, 1998, 1999, 2001 Josh MacDonald
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 * Author: Josh MacDonald <jmacd@CS.Berkeley.EDU>
22 *
23 * $Id: xdelta.c 1.4.1.50.1.3 Sun, 24 Feb 2002 11:30:34 -0800 jmacd $
24 */
25
26#define G_DISABLE_ASSERT
27
28#include <string.h>
29#include <stdlib.h>
30#include <ctype.h>
31
32#include "xdelta.h"
33#include "xdeltapriv.h"
34
35/* $Format: "const guint xdelta_major_version = $ReleaseMajorVersion$;" $ */
36const guint xdelta_major_version = 1;
37/* $Format: "const guint xdelta_minor_version = $ReleaseMinorVersion$;" $ */
38const guint xdelta_minor_version = 1;
39/* $Format: "const guint xdelta_micro_version = $ReleaseMicroVersion$;" $ */
40const guint xdelta_micro_version = 4;
41
42/* Control functions.
43 */
44
45static XdeltaControl* control_version_0 (SerialVersion0Control* cont) ;
46static void control_copy (XdeltaControl* cont, XdeltaSource* src, guint from, guint to);
47static gboolean control_add_info (XdeltaControl* cont, XdeltaSource* src, const guint8* md5, guint len);
48
49#ifndef XDELTA_HARDCODE_SIZE
50int QUERY_SIZE = 0;
51int QUERY_SIZE_POW = 0;
52int QUERY_SIZE_MASK = 0;
53#endif
54
55int xdp_set_query_size_pow (int size_pow)
56{
57#ifdef XDELTA_HARDCODE_SIZE
58 return XDP_QUERY_HARDCODED;
59#else
60
61 int x = 1;
62 int size_log = 0;
63
64 for (; x != 0; x <<= 1, size_log += 1)
65 {
66 if (x == size_pow)
67 goto good;
68 }
69
70 return XDP_QUERY_POW2;
71
72 good:
73
74 QUERY_SIZE = size_log;
75 QUERY_SIZE_POW = size_pow;
76 QUERY_SIZE_MASK = size_pow-1;
77
78 return 0;
79
80#endif
81}
82
83int
84xdp_blocksize ()
85{
86 if (QUERY_SIZE == 0)
87 {
88 xdp_set_query_size_pow (1<<QUERY_SIZE_DEFAULT);
89 }
90
91 return QUERY_SIZE_POW;
92}
93
94const char*
95xdp_errno (int errval)
96{
97 switch (errval)
98 {
99 case XDP_QUERY_HARDCODED:
100 return "query size hardcoded";
101 case XDP_QUERY_POW2:
102 return "query size must be a power of 2";
103 }
104
105 return g_strerror (errval);
106}
107
108const guint16 single_hash[256] =
109{
110 /* Random numbers generated using SLIB's pseudo-random number
111 * generator. */
112 0xbcd1, 0xbb65, 0x42c2, 0xdffe, 0x9666, 0x431b, 0x8504, 0xeb46,
113 0x6379, 0xd460, 0xcf14, 0x53cf, 0xdb51, 0xdb08, 0x12c8, 0xf602,
114 0xe766, 0x2394, 0x250d, 0xdcbb, 0xa678, 0x02af, 0xa5c6, 0x7ea6,
115 0xb645, 0xcb4d, 0xc44b, 0xe5dc, 0x9fe6, 0x5b5c, 0x35f5, 0x701a,
116 0x220f, 0x6c38, 0x1a56, 0x4ca3, 0xffc6, 0xb152, 0x8d61, 0x7a58,
117 0x9025, 0x8b3d, 0xbf0f, 0x95a3, 0xe5f4, 0xc127, 0x3bed, 0x320b,
118 0xb7f3, 0x6054, 0x333c, 0xd383, 0x8154, 0x5242, 0x4e0d, 0x0a94,
119 0x7028, 0x8689, 0x3a22, 0x0980, 0x1847, 0xb0f1, 0x9b5c, 0x4176,
120 0xb858, 0xd542, 0x1f6c, 0x2497, 0x6a5a, 0x9fa9, 0x8c5a, 0x7743,
121 0xa8a9, 0x9a02, 0x4918, 0x438c, 0xc388, 0x9e2b, 0x4cad, 0x01b6,
122 0xab19, 0xf777, 0x365f, 0x1eb2, 0x091e, 0x7bf8, 0x7a8e, 0x5227,
123 0xeab1, 0x2074, 0x4523, 0xe781, 0x01a3, 0x163d, 0x3b2e, 0x287d,
124 0x5e7f, 0xa063, 0xb134, 0x8fae, 0x5e8e, 0xb7b7, 0x4548, 0x1f5a,
125 0xfa56, 0x7a24, 0x900f, 0x42dc, 0xcc69, 0x02a0, 0x0b22, 0xdb31,
126 0x71fe, 0x0c7d, 0x1732, 0x1159, 0xcb09, 0xe1d2, 0x1351, 0x52e9,
127 0xf536, 0x5a4f, 0xc316, 0x6bf9, 0x8994, 0xb774, 0x5f3e, 0xf6d6,
128 0x3a61, 0xf82c, 0xcc22, 0x9d06, 0x299c, 0x09e5, 0x1eec, 0x514f,
129 0x8d53, 0xa650, 0x5c6e, 0xc577, 0x7958, 0x71ac, 0x8916, 0x9b4f,
130 0x2c09, 0x5211, 0xf6d8, 0xcaaa, 0xf7ef, 0x287f, 0x7a94, 0xab49,
131 0xfa2c, 0x7222, 0xe457, 0xd71a, 0x00c3, 0x1a76, 0xe98c, 0xc037,
132 0x8208, 0x5c2d, 0xdfda, 0xe5f5, 0x0b45, 0x15ce, 0x8a7e, 0xfcad,
133 0xaa2d, 0x4b5c, 0xd42e, 0xb251, 0x907e, 0x9a47, 0xc9a6, 0xd93f,
134 0x085e, 0x35ce, 0xa153, 0x7e7b, 0x9f0b, 0x25aa, 0x5d9f, 0xc04d,
135 0x8a0e, 0x2875, 0x4a1c, 0x295f, 0x1393, 0xf760, 0x9178, 0x0f5b,
136 0xfa7d, 0x83b4, 0x2082, 0x721d, 0x6462, 0x0368, 0x67e2, 0x8624,
137 0x194d, 0x22f6, 0x78fb, 0x6791, 0xb238, 0xb332, 0x7276, 0xf272,
138 0x47ec, 0x4504, 0xa961, 0x9fc8, 0x3fdc, 0xb413, 0x007a, 0x0806,
139 0x7458, 0x95c6, 0xccaa, 0x18d6, 0xe2ae, 0x1b06, 0xf3f6, 0x5050,
140 0xc8e8, 0xf4ac, 0xc04c, 0xf41c, 0x992f, 0xae44, 0x5f1b, 0x1113,
141 0x1738, 0xd9a8, 0x19ea, 0x2d33, 0x9698, 0x2fe9, 0x323f, 0xcde2,
142 0x6d71, 0xe37d, 0xb697, 0x2c4f, 0x4373, 0x9102, 0x075d, 0x8e25,
143 0x1672, 0xec28, 0x6acb, 0x86cc, 0x186e, 0x9414, 0xd674, 0xd1a5
144};
145
146/* Compute the hash of length len for buf, a single byte array of
147 * values, by first indexing the random array.
148 */
149
150static void
151init_query_checksum (const guint8 *buf, XdeltaChecksum *cksum)
152{
153 gint i = QUERY_SIZE_POW;
154 guint16 low = 0;
155 guint16 high = 0;
156
157 for (; i != 0; i -= 1)
158 {
159 low += CHEW(*buf++);
160 high += low;
161 }
162
163 cksum->low = low;
164 cksum->high = high;
165}
166
167/* Generate checksums
168 */
169
170static gboolean
171generate_checksums (XdeltaStream *stream,
172 XdeltaSource *source)
173{
174 gint total_checksums = handle_length (stream) / QUERY_SIZE_POW;
175 gint checksum_index = 0;
176 XdeltaChecksum cksum;
177 XdeltaChecksum *result;
178 const guint8* segment = NULL, *segment_pointer;
179 gint segment_len, orig_segment_len;
180 guint segment_page = 0;
181 guint pages;
182
183#ifdef DEBUG_CKSUM
184 g_print ("Total base checksums: %d\n", total_checksums);
185#endif
186
187 source->ck_count = total_checksums;
188
189 if (total_checksums == 0)
190 return TRUE;
191
192 /* This is the in-core FROM checksum table. */
193 result = g_new (XdeltaChecksum, total_checksums);
194 source->cksums = result;
195
196 for (pages = handle_pages (stream); segment_page <= pages; segment_page += 1)
197 {
198 segment_len = handle_map_page (stream, segment_page, &segment);
199
200 if (segment_len < 0)
201 return FALSE;
202
203 orig_segment_len = segment_len;
204
205 segment_len >>= QUERY_SIZE;
206
207 for (segment_pointer = segment;
208 segment_len != 0;
209 segment_len -= 1, segment_pointer += QUERY_SIZE_POW)
210 {
211 /* note: cheating at the boundaries */
212 init_query_checksum (segment_pointer, &cksum);
213
214#ifdef DEBUG_CKSUM
215 g_print ("New cksum %04x %04x indices %d-%d\n",
216 cksum.low, cksum.high,
217 checksum_index * QUERY_SIZE_POW, (checksum_index * QUERY_SIZE_POW) + QUERY_SIZE_POW - 1);
218#endif
219
220 result[checksum_index++] = cksum;
221 }
222
223 if (! handle_unmap_page (stream, segment_page, &segment))
224 return FALSE;
225 }
226
227 return TRUE;
228}
229
230/* $Format: "#define XDELTA_REQUIRED_VERSION \"$ReleaseMajorVersion$.$ReleaseMinorVersion$.\"" $ */
231#define XDELTA_REQUIRED_VERSION "1.1."
232
233XdeltaGenerator*
234__xdp_generator_new (const char* version)
235{
236 XdeltaGenerator* xg;
237
238 if (strncmp (version, XDELTA_REQUIRED_VERSION, strlen (XDELTA_REQUIRED_VERSION)) != 0)
239 g_error ("XDelta library version mismatched, compiled for %s, running %s\n", version, XDELTA_VERSION);
240
241 xg = g_new0 (XdeltaGenerator, 1);
242
243 xg->sources = g_ptr_array_new ();
244
245 xg->data_source = g_new0 (XdeltaSource, 1);
246 xg->data_source->name = "(patch data)";
247
248 g_ptr_array_add (xg->sources, xg->data_source);
249
250 return xg;
251}
252
253void
254xdp_generator_free (XdeltaGenerator *gen)
255{
256 int i;
257
258 for (i = 0; i < gen->sources->len; i += 1)
259 xdp_source_free (gen->sources->pdata[i]);
260
261 g_ptr_array_free (gen->sources, TRUE);
262 g_free ((void*) gen->table);
263 g_free (gen);
264}
265
266void
267init_pos (XdeltaStream* str, XdeltaPos* pos)
268{
269 g_assert (str);
270
271 memset (pos, 0, sizeof (*pos));
272
273 pos->page_size = handle_pagesize (str);
274}
275
276gboolean
277check_stream_integrity (XdeltaStream* str, const guint8* md5, guint len)
278{
279 const guint8* act_md5;
280
281 if (len != handle_length (str))
282 {
283 xd_generate_handleintint_event (EC_XdStreamLengthFailed, str, len, handle_length (str));
284 return FALSE;
285 }
286
287 act_md5 = handle_checksum_md5 (str);
288
289 if (! act_md5)
290 return FALSE;
291
292 if (memcmp (md5, act_md5, 16) != 0)
293 {
294 char exp[33], rec[33];
295
296 edsio_md5_to_string (md5, exp);
297 edsio_md5_to_string (act_md5, rec);
298
299 xd_generate_handlestringstring_event (EC_XdStreamChecksumFailed, str, exp, rec);
300 g_free ((void*) act_md5);
301 return FALSE;
302 }
303
304 g_free ((void*) act_md5);
305
306 return TRUE;
307}
308
309static gboolean
310xdp_source_index_read (XdeltaSource *xs,
311 XdeltaStream *index_in)
312{
313 SerialSource *ss = handle_source (index_in);
314 XdeltaIndex *index;
315
316 if (! ss)
317 return FALSE;
318
319 /* TODO: free ss */
320
321 if (! unserialize_xdeltaindex (ss, &index))
322 return FALSE;
323
324 if (! check_stream_integrity (xs->source_in, index->file_md5, index->file_len))
325 return FALSE;
326
327 xs->ck_count = index->index_len;
328 xs->cksums = index->index;
329
330 return TRUE;
331}
332
333static gboolean
334xdp_source_index_internal (XdeltaSource *init,
335 XdeltaStream *source_in,
336 XdeltaOutStream *index_out)
337{
338 if (! generate_checksums (source_in, init))
339 return FALSE;
340
341 if (index_out)
342 {
343 const guint8* source_in_md5;
344 SerialSink* sink = handle_sink (index_out, NULL, NULL, NULL, NULL);
345
346 if (! sink)
347 return FALSE;
348
349 if (! (source_in_md5 = handle_checksum_md5 (source_in)))
350 return FALSE;
351
352 if (! serialize_xdeltaindex (sink,
353 handle_length (source_in),
354 source_in_md5,
355 init->ck_count,
356 init->cksums))
357 {
358 g_free ((void*) source_in_md5);
359 return FALSE;
360 }
361
362 g_free ((void*) source_in_md5);
363
364 if (! handle_close (index_out, 0))
365 return FALSE;
366 }
367
368 return TRUE;
369}
370
371gboolean
372xdp_source_index (XdeltaStream *source_in,
373 XdeltaOutStream *index_out)
374{
375 XdeltaSource* xs = xdp_source_new ("(ignored)", source_in, NULL, index_out);
376
377 if (xs)
378 {
379 xdp_source_free (xs);
380 return TRUE;
381 }
382
383 return FALSE;
384}
385
386XdeltaSource*
387xdp_source_new (const char *name,
388 XdeltaStream *source_in,
389 XdeltaStream *index_in,
390 XdeltaOutStream *index_out)
391{
392 XdeltaSource* xs = g_new0 (XdeltaSource, 1);
393
394 xs->source_in = source_in;
395 xs->name = name;
396
397 g_return_val_if_fail (source_in, NULL);
398 g_return_val_if_fail (index_in ? ! index_out : TRUE, NULL);
399
400 xs->index_in = index_in;
401 xs->index_out = index_out;
402 xs->source_pos.page_size = handle_pagesize (source_in);
403
404 return xs;
405}
406
407static gboolean
408xdp_source_check_index (XdeltaSource *xs)
409{
410 if (xs->source_index == 0)
411 return TRUE;
412
413 if (! xs->index_in)
414 return xdp_source_index_internal (xs, xs->source_in, xs->index_out);
415 else
416 return xdp_source_index_read (xs, xs->index_in);
417}
418
419void
420xdp_source_free (XdeltaSource* xs)
421{
422 if (xs)
423 {
424 /* if (xs->ckarray) @@@ this is troublesome now
425 g_free (xs->ckarray);*/
426
427 g_free (xs);
428 }
429}
430
431void
432xdp_source_add (XdeltaGenerator *gen,
433 XdeltaSource *src)
434{
435 if (gen->table)
436 {
437 g_free ((gpointer)gen->table);
438 gen->table = NULL;
439 }
440
441 g_ptr_array_add (gen->sources, src);
442}
443
444guint
445c_hash (const XdeltaChecksum* c)
446{
447 const guint high = c->high;
448 const guint low = c->low;
449 const guint it = (high >> 2) + (low << 3) + (high << 16);
450
451 return (it ^ high ^ low);
452}
453
454static gboolean
455region_insert (XdeltaGenerator* gen, const XdeltaPos *xpos, guint len)
456{
457 /* This is a little bit cryptic: the xpos.mem + EXPR expression
458 * computes the offset into the current page, which is guaranteed
459 * to be correct since map_page has not occured yet. */
460 const guint8* buf = xpos->mem + (gen->to_output_pos % xpos->page_size);
461
462 if (len == 0)
463 return TRUE;
464
465#ifdef DEBUG_INST
466 g_print ("insert %d at %d\n", len, gen->to_output_pos);
467#endif
468
469 if (! handle_write (gen->data_out, buf, len))
470 return FALSE;
471
472 control_copy (gen->control, gen->data_source, gen->data_output_pos, gen->data_output_pos + len);
473
474 gen->to_output_pos += len;
475 gen->data_output_pos += len;
476
477 return TRUE;
478}
479
480static gboolean
481region_copy (XdeltaGenerator* gen, XdeltaSource* src, guint from, guint to)
482{
483#ifdef DEBUG_INST
484 g_print ("copy %d - %d (%d) to %d\n", from, to, to-from, gen->to_output_pos);
485#endif
486
487 control_copy (gen->control, src, from, to);
488
489 gen->to_output_pos += (to-from);
490
491 return TRUE;
492}
493
494gboolean
495unmap_page (XdeltaStream* stream, XdeltaPos* pos)
496{
497 if (! pos->mem)
498 return TRUE;
499
500 if (handle_unmap_page (stream,
501 pos->mem_page,
502 &pos->mem))
503 {
504 pos->mem = NULL;
505 return TRUE;
506 }
507
508 return FALSE;
509}
510
511gboolean
512map_page (XdeltaStream* stream, XdeltaPos* pos)
513{
514 gint on_page;
515
516 if (pos->mem && pos->mem_page == pos->page)
517 return TRUE;
518
519 if (pos->mem)
520 {
521 if (! handle_unmap_page (stream,
522 pos->mem_page,
523 &pos->mem))
524 return FALSE;
525
526 pos->mem = NULL;
527 }
528
529 pos->mem_page = pos->page;
530
531 on_page = handle_map_page (stream,
532 pos->mem_page,
533 &pos->mem);
534
535 if (on_page >= 0)
536 {
537 pos->mem_rem = on_page;
538 return TRUE;
539 }
540
541 return FALSE;
542}
543
544static gboolean
545try_match (XdeltaGenerator *gen,
546 XdeltaStream *in,
547 XdeltaPos *xpos_ptr,
548 XdeltaSource *src,
549 guint src_offset,
550 gboolean *found)
551{
552 XdeltaPos xpos = *xpos_ptr;
553 XdeltaPos ypos = src->source_pos;
554 gint rem, remsave;
555 gint match_forward = 0;
556 gint match_backward = 0;
557 gint match_forward_max;
558 gint match_backward_max;
559 guint to_offset = XPOS (xpos);
560 gboolean one_insert = FALSE;
561
562 *found = FALSE;
563
564 ypos.page = src_offset / ypos.page_size;
565 ypos.off = src_offset % ypos.page_size;
566
567 match_forward_max = MIN (handle_length (in) - to_offset,
568 handle_length (src->source_in) - src_offset);
569 match_backward_max = MIN (src_offset, to_offset - gen->to_output_pos);
570
571 /* Don't allow backward paging */
572 match_backward_max = MIN (match_backward_max, xpos.off);
573
574 /* We're testing against the negative below. */
575 match_backward_max = - match_backward_max;
576
577 for (; match_backward > match_backward_max; )
578 {
579 g_assert (xpos.off != 0);
580
581 if (ypos.off == 0)
582 {
583 ypos.off = ypos.page_size;
584 ypos.page -= 1;
585 }
586
587 if (! map_page (src->source_in, &ypos))
588 goto bail;
589
590 rem = MIN (xpos.off, ypos.off);
591 rem = MIN (match_backward - match_backward_max, rem);
592
593 for (; rem > 0; rem -= 1, match_backward -= 1)
594 {
595 if (xpos.mem[xpos.off-1] != ypos.mem[ypos.off-1])
596 goto doneback;
597
598 xpos.off -= 1;
599 ypos.off -= 1;
600 }
601 }
602
603doneback:
604
605 xpos.page = to_offset / xpos.page_size;
606 xpos.off = to_offset % xpos.page_size;
607
608 ypos.page = src_offset / ypos.page_size;
609 ypos.off = src_offset % ypos.page_size;
610
611 for (; match_forward < match_forward_max; )
612 {
613 if (! map_page (src->source_in, &ypos))
614 goto bail;
615
616 /* Fortunately, if this map happens it means that the match must
617 * be long enough to succeed below, therefore it is safe to write
618 * the insert out now. */
619 if (! one_insert && xpos.page != xpos.mem_page)
620 {
621 one_insert = TRUE;
622
623 if (! region_insert (gen, &xpos, (to_offset + match_backward) - gen->to_output_pos))
624 goto bail;
625 }
626
627 if (! map_page (in, &xpos))
628 goto bail;
629
630 rem = MIN (xpos.mem_rem - xpos.off, ypos.mem_rem - ypos.off);
631 rem = MIN (match_forward_max - match_forward, rem);
632
633 /* Do a int-wise comparison if the regions are aligned. */
634 if (rem > (4*sizeof(int)) && (xpos.off % sizeof (int)) == (ypos.off % sizeof(int)))
635 {
636 gint is;
637 const int *xi, *yi;
638
639 for (; xpos.off % sizeof(int); rem -= 1, match_forward += 1)
640 {
641 if (xpos.mem[xpos.off] != ypos.mem[ypos.off])
642 goto done;
643
644 xpos.off += 1;
645 ypos.off += 1;
646 }
647
648 remsave = rem;
649 rem /= sizeof(int);
650
651 xi = (const int*) (xpos.mem + xpos.off);
652 yi = (const int*) (ypos.mem + ypos.off);
653
654 is = rem;
655
656 for (; rem > 0 && *xi == *yi; )
657 {
658 rem -= 1;
659 xi += 1;
660 yi += 1;
661 }
662
663 is -= rem;
664
665 match_forward += is * sizeof(int);
666 xpos.off += is * sizeof(int);
667 ypos.off += is * sizeof(int);
668
669 rem = (rem * sizeof(int)) + (remsave % sizeof(int));
670 }
671
672 for (; rem > 0; rem -= 1, match_forward += 1)
673 {
674 if (xpos.mem[xpos.off] != ypos.mem[ypos.off])
675 goto done;
676
677 xpos.off += 1;
678 ypos.off += 1;
679 }
680
681 FLIP_FORWARD (xpos);
682 FLIP_FORWARD (ypos);
683 }
684
685done:
686
687 if (match_forward - match_backward >= QUERY_SIZE_POW)
688 {
689 *found = TRUE;
690
691 if (! one_insert)
692 {
693 if (! region_insert (gen, &xpos, (to_offset + match_backward) - gen->to_output_pos))
694 goto bail;
695 }
696
697 if (! region_copy (gen, src, src_offset + match_backward, src_offset + match_forward))
698 goto bail;
699 }
700 else
701 {
702 g_assert (! one_insert);
703 }
704
705 *xpos_ptr = xpos;
706 src->source_pos = ypos;
707 return TRUE;
708
709bail:
710 *xpos_ptr = xpos;
711 src->source_pos = ypos;
712 return FALSE;
713}
714
715static gboolean
716compute_copies (XdeltaGenerator* gen, XdeltaStream* stream)
717{
718 XdeltaChecksum cksum;
719 const XdeltaChecksum *source_cksum;
720 const guint8 *segment_pointer;
721 guint source_offset, segment_index, index, prime = gen->table_size;
722 guint source_index;
723 const guint32* table = gen->table;
724 guint16 old_c, new_c;
725 guint save_page, save_off;
726#ifdef DEBUG_MATCH_PRINT
727 guint i;
728#endif
729 XdeltaPos xpos;
730 gboolean found;
731 gboolean ret = TRUE;
732
733 if (handle_length (stream) < QUERY_SIZE_POW)
734 return TRUE;
735
736 init_pos (stream, &xpos);
737
738 while (XPOS (xpos) <= (handle_length (stream) - QUERY_SIZE_POW))
739 {
740 if (!map_page (stream, &xpos))
741 return FALSE;
742
743 g_assert (xpos.mem_rem > xpos.off);
744
745 segment_index = (xpos.mem_rem - xpos.off);
746
747 if (segment_index < QUERY_SIZE_POW)
748 goto nextpage;
749
750 segment_index -= QUERY_SIZE_POW;
751
752 segment_pointer = xpos.mem + xpos.off;
753
754 init_query_checksum (segment_pointer, &cksum);
755
756 for (; ; segment_pointer += 1)
757 {
758#ifdef DEBUG_CKSUM_UPDATE
759 XdeltaChecksum cktest;
760
761 init_query_checksum (segment_pointer, &cktest);
762
763 if (cktest.high != cksum.high || cktest.low != cktest.low)
764 abort ();
765#endif
766
767 index = c_hash (&cksum) % prime;
768
769#ifdef DEBUG_MATCH_PRINT
770 g_print ("%d: searching for match \"", XPOS(xpos));
771 for (i = 0; i < QUERY_SIZE_POW; i += 1)
772 {
773 if (isprint (segment_pointer[i]))
774 g_print ("%c", segment_pointer[i]);
775 else
776 g_print ("\\0%o", segment_pointer[i]);
777 }
778 g_print ("\"... %s\n", table[index] ? "found" : "notfound");
779#endif
780
781 if (table[index])
782 {
783 source_index = (table[index] & QUERY_SIZE_MASK) - 1;
784 source_offset = (table[index] >> QUERY_SIZE);
785
786 source_cksum = ((XdeltaSource*)gen->sources->pdata[source_index])->cksums + source_offset;
787
788 if (cksum.high == source_cksum->high &&
789 cksum.low == source_cksum->low)
790 {
791 save_page = xpos.page;
792 save_off = xpos.off;
793
794 if (! try_match (gen,
795 stream,
796 &xpos,
797 gen->sources->pdata[source_index],
798 source_offset << QUERY_SIZE,
799 &found))
800 {
801 ret = FALSE;
802 goto bail;
803 }
804
805 if (found)
806 {
807 g_assert (xpos.page*xpos.page_size+xpos.off == gen->to_output_pos);
808
809 goto reenter;
810 }
811 else
812 {
813 xpos.page = save_page;
814 xpos.off = save_off;
815 }
816 }
817 }
818
819 if (segment_index == 0)
820 goto nextpage;
821
822 segment_index -= 1;
823 xpos.off += 1;
824
825 old_c = CHEW(segment_pointer[0]);
826 new_c = CHEW(segment_pointer[QUERY_SIZE_POW]);
827
828 cksum.low -= old_c;
829 cksum.low += new_c;
830
831 cksum.high -= old_c << QUERY_SIZE;
832 cksum.high += cksum.low;
833 }
834
835 nextpage:
836
837 if (xpos.mem_rem < xpos.page_size)
838 break;
839
840 xpos.page += 1;
841 xpos.off = 0;
842
843 if (xpos.page != xpos.mem_page)
844 {
845 if (! region_insert (gen, &xpos, XPOS (xpos) - gen->to_output_pos))
846 return FALSE;
847 }
848
849 reenter:
850 (void) 0;
851 }
852
853 xpos.off = gen->to_output_pos % handle_pagesize (stream);
854
855 while (gen->to_output_pos < handle_length (stream))
856 {
857 if (! map_page (stream, &xpos))
858 return FALSE;
859
860 if (! region_insert (gen, &xpos, xpos.mem_rem - xpos.off))
861 ret = FALSE;
862
863 xpos.off = 0;
864 xpos.page += 1;
865 }
866
867bail:
868
869 if (! unmap_page (stream, &xpos))
870 return FALSE;
871
872 return ret;
873}
874
875static gboolean
876just_output (XdeltaGenerator *gen,
877 XdeltaStream *in)
878{
879 XdeltaPos pos;
880
881 init_pos (in, &pos);
882
883 while (gen->to_output_pos < handle_length (in))
884 {
885 if (! map_page (in, &pos))
886 return FALSE;
887
888 if (! region_insert (gen, &pos, pos.mem_rem - pos.off))
889 return FALSE;
890
891 pos.off = 0;
892 pos.page += 1;
893 }
894
895 if (! unmap_page (in, &pos))
896 return FALSE;
897
898 return TRUE;
899}
900
901/* Clobbering decision (see below):
902 *
903 * Algorithm A: Clobber it always (its fast!). The problem
904 * is that this prefers matches at the front of
905 * the file and leads to poor matches at the back
906 * of the file (assuming I insert going backwards).
907 *
908 * Algorithm B: Keep a table of how many times there has
909 * been a clobber at each index i, C[i].
910 * With probability 1/(C[i]+1), replace the
911 * previous entry. This gives a uniform
912 * probability of each entry surviving.
913 * The problem (supposed) with this
914 * algorithm is that the probabilities
915 * should not be uniform (though uniform is
916 * better than A) because there are more
917 * chances to match a segment at the end of
918 * the file than at the beginning.
919 *
920 * Algorithm C: Give a linear weight to each match
921 * according to it's position in the file
922 * -- number the segments from N down to 1
923 * starting at the beginning. Same as the
924 * above but with a different weight. The
925 * weight for segment i, match at checksum
926 * offset k, follows. The total number of
927 * checksums in the segment is C_i,
928 * therefore the total checksum count is
929 * C = sum_i (C_i).
930 * Now the interior weight is the (C_i-k)
931 * (the linear assumption) and the total
932 * interior weight is sum_{j=1}^{N}{j}=(N)(N+1)/2
933 * so the kth segment has interior weight
934 *
935 * [2 (C_i - k)] / [(C_i) (C_i + 1)]
936 *
937 * add in the exterior weight (after
938 * cancelling a C_i):
939 *
940 * w(i,k) = [2 (C_i - k)] / [(C_i + 1) (C)]
941 *
942 * Now, as above, we will compute whether to
943 * keep or replace the current value at the j-th
944 * decision. Let R_j be the running sum of
945 * weights considered so far. R_0 = 0. At the
946 * j-th decision,
947 *
948 * P_ikj(use new value) = w(i,k)/R_{j+1}
949 * R_{j+1} = R_j + w(i,k)
950 */
951
952static gboolean
953xdp_generate_delta_int (XdeltaGenerator *gen,
954 XdeltaStream *in,
955 XdeltaOutStream *control_out,
956 XdeltaOutStream *data_out)
957{
958 gint i, j, total_from_ck_count = 0, prime = 0, index = 0;
959 gint total_from_len = 0;
960 guint32* table = NULL;
961
962 if (QUERY_SIZE == 0)
963 {
964 xdp_set_query_size_pow (1<<QUERY_SIZE_DEFAULT);
965 }
966
967 if (gen->sources->len == 0)
968 {
969 xd_generate_void_event (EC_XdTooFewSources);
970 return FALSE;
971 }
972
973 for (i = 0; i < gen->sources->len; i += 1)
974 {
975 XdeltaSource* src = gen->sources->pdata[i];
976
977 src->used = FALSE;
978 src->sequential = TRUE;
979 src->position = 0;
980 src->source_index = i;
981
982 if (src->source_index != 0)
983 total_from_len += handle_length (src->source_in);
984 }
985
986 /* QUERY_SIZE_POW is the number of elements freed in the cksum hash
987 * table for storing segment number + (offset/QUERY_SIZE_POW) in. 1
988 * for the zero + 1 for the data segment */
989 if (gen->sources->len > (QUERY_SIZE_POW-2))
990 {
991 xd_generate_void_event (EC_XdTooManySources);
992 return FALSE;
993 }
994
995 if (handle_length (in) < QUERY_SIZE_POW || total_from_len < QUERY_SIZE_POW)
996 {
997 if (! just_output (gen, in))
998 return FALSE;
999 }
1000 else
1001 {
1002 for (i = 0; i < gen->sources->len; i += 1)
1003 {
1004 XdeltaSource* xs = (XdeltaSource*)gen->sources->pdata[i];
1005
1006 if (! xdp_source_check_index (xs))
1007 return FALSE;
1008
1009 total_from_ck_count += xs->ck_count;
1010 }
1011
1012 prime = g_spaced_primes_closest (total_from_ck_count);
1013
1014 gen->table = table = g_new0 (guint32, prime);
1015 gen->table_size = prime;
1016
1017 for (i = 0; i < gen->sources->len; i += 1)
1018 {
1019 XdeltaSource* xs = (XdeltaSource*)gen->sources->pdata[i];
1020
1021 for (j = xs->ck_count-1; j >= 0; j -= 1)
1022 {
1023 index = c_hash (xs->cksums + j) % prime;
1024
1025#ifdef DEBUG_HASH
1026 gen->hash_entries += 1;
1027
1028 if (table[index])
1029 {
1030 gen->hash_conflicts += 1;
1031
1032 /*regions_similar (gen,
1033 i,
1034 j,
1035 (table[index] & QUERY_SIZE_MASK) - 1,
1036 table[index] >> QUERY_SIZE);*/
1037 }
1038#endif
1039
1040 /* This is the real code */
1041 table[index] = (j << QUERY_SIZE) + 1 + i;
1042 }
1043 }
1044
1045#ifdef DEBUG_HASH
1046 for (i = 0; i < prime; i += 1)
1047 {
1048 if (gen->table[i])
1049 gen->hash_fill += 1;
1050 }
1051
1052 g_print ("*** Hash stats:\n");
1053 g_print ("Hash conflicts: %d\n", gen->hash_conflicts);
1054 g_print ("Hash real conflicts: %d\n", gen->hash_real_conflicts);
1055 g_print ("Hash real real conflicts: %d\n", gen->hash_real_real_conflicts);
1056 g_print ("Hash fill: %d\n", gen->hash_fill);
1057 g_print ("Hash size: %d\n", gen->table_size);
1058 g_print ("Hash entries: %d\n", gen->hash_entries);
1059 g_print ("Hash fill/entries: %f\n", (float)gen->hash_fill/(float)gen->hash_entries);
1060#endif
1061
1062 if (! compute_copies (gen, in))
1063 return FALSE;
1064 }
1065
1066 return TRUE;
1067}
1068
1069XdeltaControl*
1070xdp_generate_delta (XdeltaGenerator *gen,
1071 XdeltaStream *in,
1072 XdeltaOutStream *control_out,
1073 XdeltaOutStream *data_out)
1074{
1075 gint i;
1076 const guint8* in_md5;
1077 const guint8* data_out_md5;
1078
1079 gen->data_out = data_out;
1080 gen->control_out = control_out;
1081 gen->control = control_new ();
1082
1083 if (! xdp_generate_delta_int (gen, in, control_out, data_out))
1084 return NULL;
1085
1086 if (! handle_close (data_out, 0))
1087 return NULL;
1088
1089 if (! (in_md5 = handle_checksum_md5 (in)))
1090 return FALSE;
1091
1092 if (! (data_out_md5 = handle_checksum_md5 (data_out)))
1093 return FALSE;
1094
1095 gen->control->has_data = gen->data_source->used;
1096 gen->control->inst = &g_array_index (gen->control->inst_array, XdeltaInstruction, 0);
1097 gen->control->inst_len = gen->control->inst_array->len;
1098
1099 gen->control->to_len = handle_length (in);
1100 memcpy (gen->control->to_md5, in_md5, 16);
1101
1102 for (i = 0; i < gen->sources->len; i += 1)
1103 {
1104 XdeltaSource* src = gen->sources->pdata[i];
1105 const guint8* md5;
1106 guint len;
1107
1108 if (src->source_in)
1109 {
1110 if (! (md5 = handle_checksum_md5 (src->source_in)))
1111 return FALSE;
1112
1113 len = handle_length (src->source_in);
1114 }
1115 else
1116 {
1117 len = handle_length (data_out);
1118 md5 = data_out_md5;
1119 }
1120
1121 if (! control_add_info (gen->control, src, md5, len))
1122 return NULL;
1123 }
1124
1125 gen->control->source_info = (XdeltaSourceInfo**) gen->control->source_info_array->pdata;
1126 gen->control->source_info_len = gen->control->source_info_array->len;
1127
1128 if (control_out && ! xdp_control_write (gen->control, control_out))
1129 return NULL;
1130
1131 return gen->control;
1132}
1133
1134/* Below here boring details mostly to do with reading and writing
1135 * control. */
1136
1137XdeltaControl*
1138control_new (void)
1139{
1140 XdeltaControl* it = g_new0 (XdeltaControl, 1);
1141
1142 it->inst_array = g_array_new (FALSE, FALSE, sizeof (XdeltaInstruction));
1143 it->source_info_array = g_ptr_array_new ();
1144
1145 return it;
1146}
1147
1148static void
1149control_reindex (XdeltaControl* cont, XdeltaSource* src)
1150{
1151 gint i;
1152 gint new_index = cont->source_info_array->len;
1153
1154 for (i = 0; i < cont->inst_len; i += 1)
1155 {
1156 XdeltaInstruction* inst = cont->inst + i;
1157
1158 if (inst->index == src->source_index)
1159 inst->index = new_index;
1160 }
1161}
1162
1163gboolean
1164control_add_info (XdeltaControl* cont, XdeltaSource* src, const guint8* md5, guint len)
1165{
1166 XdeltaSourceInfo* si;
1167
1168 if (! src->used)
1169 return TRUE;
1170
1171 si = g_new0 (XdeltaSourceInfo, 1);
1172
1173 si->name = src->name;
1174 si->sequential = src->sequential;
1175 si->len = len;
1176 si->isdata = (src->source_index == 0);
1177
1178 memcpy (si->md5, md5, 16);
1179
1180 control_reindex (cont, src);
1181
1182 g_ptr_array_add (cont->source_info_array, si);
1183
1184 return TRUE;
1185}
1186
1187void
1188xdp_control_free (XdeltaControl* cont)
1189{
1190 if (cont->source_info_array)
1191 g_ptr_array_free (cont->source_info_array, TRUE);
1192 if (cont->inst_array)
1193 g_array_free (cont->inst_array, TRUE);
1194 g_free (cont);
1195}
1196
1197void
1198control_copy (XdeltaControl* cont, XdeltaSource* src, guint from, guint to)
1199{
1200 XdeltaInstruction i;
1201
1202 if (cont->inst_array->len > 0)
1203 {
1204 XdeltaInstruction* oi = & g_array_index (cont->inst_array, XdeltaInstruction, cont->inst_array->len-1);
1205
1206 if (oi->index == src->source_index && (oi->offset + oi->length) == from)
1207 {
1208 oi->length += (to - from);
1209 return;
1210 }
1211 }
1212
1213 i.index = src->source_index;
1214 i.offset = from;
1215 i.length = to - from;
1216
1217 src->used = TRUE;
1218
1219 if (src->position != from)
1220 src->sequential = FALSE;
1221
1222 src->position = to;
1223
1224 g_array_append_val (cont->inst_array, i);
1225}
1226
1227#ifdef DEBUG_CONT
1228static void
1229print_info (XdeltaSourceInfo* si)
1230{
1231 char md5str[33];
1232
1233 edsio_md5_to_string (si->md5, md5str);
1234
1235 g_print (" ** info\n");
1236 g_print (" md5: %s\n", md5str);
1237 g_print (" len: %d\n", si->length);
1238}
1239
1240static void
1241print_inst (XdeltaInstruction* i)
1242{
1243 switch (i->type)
1244 {
1245 case INST_TYPE_COPY:
1246 g_print (" copy (%c) %d-%d (%d) from %d\n", i->type, i->offset, i->offset + i->length, i->length, i->index);
1247 break;
1248 case INST_TYPE_INSERT:
1249 g_print (" insert %d\n", i->length);
1250 break;
1251 }
1252}
1253
1254static void
1255xdp_print_control (XdeltaControl *cont)
1256{
1257 gint i;
1258
1259 g_print ("*** control\n");
1260
1261 g_print (" data len: %d\n", cont->data_len);
1262 print_info (&cont->to_info);
1263
1264 g_print (" source info len: %d\n", cont->source_info_len);
1265
1266 for (i = 0; i < cont->source_info_len; i += 1)
1267 print_info (cont->source_info[i]);
1268
1269 g_print (" inst len: %d\n", cont->inst_len);
1270
1271 for (i = 0; i < cont->inst_len; i += 1)
1272 print_inst (cont->inst + i);
1273
1274}
1275#endif
1276
1277#ifdef DEBUG_CHECK_CONTROL
1278void
1279check_control (XdeltaControl* cont)
1280{
1281 gint i;
1282
1283 for (i = 0; i < cont->inst_len; i += 1)
1284 {
1285 switch (cont->inst[i].type)
1286 {
1287 case INST_TYPE_NCOPY:
1288 case INST_TYPE_COPY:
1289 if (cont->inst[i].index >= cont->source_info_len)
1290 g_error ("control has a too high instruction index\n");
1291 }
1292 }
1293}
1294#endif
1295
1296static gboolean
1297unpack_instructions (XdeltaControl* cont)
1298{
1299 gint i;
1300 guint output_pos = 0;
1301
1302 for (i = 0; i < cont->source_info_len; i += 1)
1303 {
1304 XdeltaSourceInfo* info = cont->source_info[i];
1305
1306 info->position = 0;
1307 info->copies = 0;
1308 info->copy_length = 0;
1309 }
1310
1311 for (i = 0; i < cont->inst_len; i += 1)
1312 {
1313 XdeltaSourceInfo* info;
1314 XdeltaInstruction *inst = cont->inst + i;
1315
1316 if (inst->index >= cont->source_info_len)
1317 {
1318 xd_generate_int_event (EC_XdOutOfRangeSourceIndex, inst->index);
1319 return FALSE;
1320 }
1321
1322 info = cont->source_info[inst->index];
1323
1324 if (info->sequential)
1325 {
1326 inst->offset = info->position;
1327 info->position = inst->offset + inst->length;
1328 }
1329
1330 inst->output_start = output_pos;
1331 output_pos += inst->length;
1332
1333 info->copies += 1;
1334 info->copy_length += inst->length;
1335 }
1336
1337 return TRUE;
1338}
1339
1340static gboolean
1341pack_instructions (XdeltaControl* cont)
1342{
1343 gint i;
1344
1345 for (i = 0; i < cont->source_info_len; i += 1)
1346 {
1347 XdeltaSourceInfo* info = cont->source_info[i];
1348
1349 info->position = 0;
1350 info->copies = 0;
1351 info->copy_length = 0;
1352 }
1353
1354 for (i = 0; i < cont->inst_len; i += 1)
1355 {
1356 XdeltaSourceInfo* info;
1357 XdeltaInstruction *inst = cont->inst + i;
1358
1359 if (inst->index >= cont->source_info_len)
1360 {
1361 xd_generate_int_event (EC_XdOutOfRangeSourceIndex, inst->index);
1362 return FALSE;
1363 }
1364
1365 info = cont->source_info[inst->index];
1366
1367 if (info->sequential)
1368 {
1369 g_assert (info->position == inst->offset);
1370 info->position += inst->length;
1371 inst->offset = 0;
1372 }
1373
1374 info->copies += 1;
1375 info->copy_length += inst->length;
1376 }
1377
1378 return TRUE;
1379}
1380
1381
1382gboolean
1383xdp_control_write (XdeltaControl *cont,
1384 XdeltaOutStream *cont_out)
1385{
1386 SerialSink* sink = handle_sink (cont_out, NULL, NULL, NULL, NULL);
1387
1388#ifdef DEBUG_CONT
1389 xdp_print_control (cont);
1390#endif
1391#ifdef DEBUG_CHECK_CONTROL
1392 check_control (cont);
1393#endif
1394
1395 if (! sink)
1396 return FALSE;
1397
1398 if (! pack_instructions (cont))
1399 return FALSE;
1400
1401 /* @@@ think about how the count function overcounts on the instruction
1402 * array by a factor of 2 or more. */
1403 if (! serialize_xdeltacontrol_obj (sink, cont))
1404 return FALSE;
1405
1406 if (! handle_close (cont_out, 0))
1407 return FALSE;
1408
1409 return TRUE;
1410}
1411
1412XdeltaControl*
1413xdp_control_read (XdeltaStream *cont_in)
1414{
1415 SerialSource* src = handle_source (cont_in);
1416 XdeltaControl* cont;
1417 SerialType type;
1418
1419 if (! src)
1420 return NULL;
1421
1422 /* TODO: free src */
1423
1424 if (! serializeio_unserialize_generic_acceptable (src, ST_XdeltaControl | ST_Version0Control, & type, (void**) & cont))
1425 {
1426 /* TODO: the warning below was added in 1.1.5 after a bug report
1427 * regarding an invalid delta, which would fail in patch here:
1428 * It's unclear whether this is the "right" place to print the
1429 * error. */
1430 g_warning ("patch parse error\n");
1431 return NULL;
1432 }
1433
1434 if (type == ST_Version0Control)
1435 {
1436 SerialVersion0Control *ocont = (SerialVersion0Control*) cont;
1437
1438 xd_generate_string_event (EC_XdBackwardCompatibilityMode, "1.0");
1439
1440 cont = control_version_0 (ocont);
1441
1442 g_free (ocont);
1443 }
1444
1445 if (! unpack_instructions (cont))
1446 return NULL;
1447
1448#ifdef DEBUG_CHECK_CONTROL
1449 check_control (cont);
1450#endif
1451
1452 return cont;
1453}
1454
1455XdeltaControl*
1456control_version_0 (SerialVersion0Control* ocont)
1457{
1458 XdeltaControl* cont = g_new0 (XdeltaControl, 1);
1459 gint i;
1460 XdeltaSourceInfo* dinfo;
1461
1462 g_assert (! ocont->normalized);
1463
1464 memcpy (cont->to_md5, ocont->to_info.real_md5, 16);
1465
1466 cont->to_len = ocont->to_info.length;
1467
1468 cont->has_data = TRUE;
1469
1470 cont->source_info_len = ocont->source_info_len + 1;
1471 cont->source_info = g_new (XdeltaSourceInfo*, cont->source_info_len);
1472
1473 cont->source_info[0] = dinfo = g_new0 (XdeltaSourceInfo, 1);
1474
1475 dinfo->name = "(patch data)";
1476 memcpy (dinfo->md5, ocont->to_info.md5, 15);
1477 dinfo->len = ocont->data_len;
1478 dinfo->isdata = TRUE;
1479 dinfo->sequential = FALSE;
1480
1481 for (i = 0; i < ocont->source_info_len; i += 1)
1482 {
1483 XdeltaSourceInfo* info = g_new0 (XdeltaSourceInfo, 1);
1484 SerialVersion0SourceInfo* oinfo = ocont->source_info[i];
1485
1486 cont->source_info[i+1] = info;
1487
1488 info->name = "unnamed";
1489 memcpy (info->md5, oinfo->md5, 16);
1490 info->len = oinfo->length;
1491 info->isdata = FALSE;
1492 info->sequential = FALSE;
1493 }
1494
1495 /* The old unpack */
1496#define OLD_QUERY_SIZE 4
1497 for (i = 0; i < ocont->inst_len; i += 1)
1498 {
1499 switch (ocont->inst[i].length & 3)
1500 {
1501 case 0: ocont->inst[i].type = 'N'; break;
1502 case 1: ocont->inst[i].type = 'E'; break;
1503 case 2: ocont->inst[i].type = 'C'; break;
1504 case 3: ocont->inst[i].type = 'I'; break;
1505 }
1506
1507 ocont->inst[i].length >>= 2;
1508 ocont->inst[i].index = ocont->inst[i].length & OLD_QUERY_SIZE;
1509 ocont->inst[i].length >>= OLD_QUERY_SIZE;
1510 }
1511
1512 cont->inst_len = ocont->inst_len;
1513 cont->inst = g_new (XdeltaInstruction, cont->inst_len);
1514
1515 for (i = 0; i < cont->inst_len; i += 1)
1516 {
1517 cont->inst[i].length = ocont->inst[i].length;
1518 cont->inst[i].offset = ocont->inst[i].offset;
1519
1520 switch (ocont->inst[i].type)
1521 {
1522 case 'N':
1523 case 'E':
1524 abort ();
1525 break;
1526
1527 case 'C':
1528 g_assert (ocont->inst[i].index == 0);
1529
1530 cont->inst[i].index = 1;
1531 break;
1532
1533 case 'I':
1534 cont->inst[i].index = 0;
1535 break;
1536 }
1537
1538 }
1539
1540 return cont;
1541}