summaryrefslogtreecommitdiff
path: root/xdelta3/xdelta3.h
blob: 1f20c1045712d6e4821d7e5fbd064ef486bf29b1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
/* xdelta 3 - delta compression tools and library
 * Copyright (C) 2001, 2003, 2004, 2005, 2006.  Joshua P. MacDonald
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

/* Welcome to Xdelta.  If you want to know more about Xdelta, start by reading xdelta3.c.
 * If you are ready to use the API, continue reading here.  There are two interfaces --
 * xd3_encode_input and xd3_decode_input -- plus a dozen or so related calls.  This
 * interface is styled after Zlib. */

#ifndef _XDELTA3_H_
#define _XDELTA3_H_

#include <stdlib.h>
#include <string.h>
#include <sys/types.h>

/**********************************************************************/

/* Default configured value of stream->winsize.  If the program supplies
 * xd3_encode_input() with data smaller than winsize the stream will
 * automatically buffer the input, otherwise the input buffer is used directly.
 */
#ifndef XD3_DEFAULT_WINSIZE
#define XD3_DEFAULT_WINSIZE (1U << 23)
#endif

/* Default total size of the source window used in xdelta3-main.h */
#ifndef XD3_DEFAULT_SRCWINSZ
#define XD3_DEFAULT_SRCWINSZ (1U << 26)
#endif

/* When Xdelta requests a memory allocation for certain buffers, it rounds up to units of
 * at least this size.  The code assumes (and asserts) that this is a power-of-two. */
#ifndef XD3_ALLOCSIZE
#define XD3_ALLOCSIZE (1U<<14)
#endif

/* The XD3_HARDMAXWINSIZE parameter is a safety mechanism to protect decoders against
 * malicious files.  The decoder will never decode a window larger than this.  If the file
 * specifies VCD_TARGET the decoder may require two buffers of this size.
 *
 * 8-16MB is reasonable, probably don't need to go larger. */
#ifndef XD3_HARDMAXWINSIZE
#define XD3_HARDMAXWINSIZE (1U<<24)
#endif

/* The XD3_NODECOMPRESSSIZE parameter tells the xdelta main routine not to try to
 * externally-decompress source inputs that are too large.  Since these files must be
 * seekable, they are decompressed to a temporary file location and the user may not wish
 * for this. */
#ifndef XD3_NODECOMPRESSSIZE
#define XD3_NODECOMPRESSSIZE (1U<<28)
#endif

/* The IOPT_SIZE value sets the size of a buffer used to batch overlapping copy
 * instructions before they are optimized by picking the best non-overlapping ranges.  The
 * larger this buffer, the longer a forced xd3_srcwin_setup() decision is held off.
 * Setting this value to 0 causes an unlimited buffer to be used. */
#ifndef XD3_DEFAULT_IOPT_SIZE
#define XD3_DEFAULT_IOPT_SIZE    (1U<<15)
#endif

/* The maximum distance backward to search for small matches */
#ifndef XD3_DEFAULT_SPREVSZ
#define XD3_DEFAULT_SPREVSZ (1U<<18)
#endif

/* The default compression level
 */
#ifndef XD3_DEFAULT_LEVEL
#define XD3_DEFAULT_LEVEL 1
#endif

/* Sizes and addresses within VCDIFF windows are represented as usize_t
 *
 * For source-file offsets and total file sizes, total input and output counts, the xoff_t
 * type is used.  The decoder and encoder generally check for overflow of the xoff_t size,
 * and this is tested at the 32bit boundary [xdelta3-test.h].
 */
#ifndef _WIN32
typedef unsigned int    usize_t;
typedef u_int8_t        uint8_t;
typedef u_int16_t       uint16_t;

#ifndef __uint32_t_defined  /* Note: Cygwin compat */
typedef u_int32_t       uint32_t;
#endif

typedef u_int64_t       uint64_t;
#else
// CRT_SECURE_NO_DEPRECATE silences warnings about sprintf() and strerror()
#define _CRT_SECURE_NO_DEPRECATE 1
#define _CRT_SECURE_COPP_OVERLOAD_STANDARD_NAMES 1
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#define inline
typedef unsigned int   uint;
typedef unsigned int   usize_t;
typedef unsigned char  uint8_t;
typedef unsigned short uint16_t;
typedef unsigned long  uint32_t;
typedef ULONGLONG      uint64_t;
#endif

#define SIZEOF_USIZE_T 4

#ifndef XD3_USE_LARGEFILE64
#define XD3_USE_LARGEFILE64 1
#endif

#if XD3_USE_LARGEFILE64
#define __USE_FILE_OFFSET64 1 /* GLIBC: for 64bit fileops, ... ? */
typedef uint64_t xoff_t;
#define SIZEOF_XOFF_T 8
#else
typedef uint32_t xoff_t;
#define SIZEOF_XOFF_T 4
#endif

#define USE_UINT32 (SIZEOF_USIZE_T == 4 || SIZEOF_XOFF_T == 4 || REGRESSION_TEST)
#define USE_UINT64 (SIZEOF_USIZE_T == 8 || SIZEOF_XOFF_T == 8 || REGRESSION_TEST)

/**********************************************************************/

#ifndef INLINE
#define INLINE inline
#endif

/* Whether to build the encoder, otherwise only build the decoder. */
#ifndef XD3_ENCODER
#define XD3_ENCODER 1
#endif

/* The code returned when main() fails, also defined in system includes. */
#ifndef EXIT_FAILURE
#define EXIT_FAILURE 1
#endif

/* REGRESSION TEST enables the "xdelta3 test" command, which runs a series of self-tests. */
#ifndef REGRESSION_TEST
#define REGRESSION_TEST 0
#endif

/* XD3_DEBUG=1 enables assertions and various statistics.  Levels > 1 enable some
 * additional output only useful during development and debugging. */
#ifndef XD3_DEBUG
#define XD3_DEBUG 0
#endif

#ifndef PYTHON_MODULE
#define PYTHON_MODULE 0
#endif

#ifndef SWIG_MODULE
#define SWIG_MODULE 0
#endif

/* There are three string matching functions supplied: one fast, one slow (default), and
 * one soft-configurable.  To disable any of these, use the following definitions. */
#ifndef XD3_BUILD_SLOW
#define XD3_BUILD_SLOW 1
#endif
#ifndef XD3_BUILD_FAST
#define XD3_BUILD_FAST 1
#endif
#ifndef XD3_BUILD_FASTEST
#define XD3_BUILD_FASTEST 1
#endif
#ifndef XD3_BUILD_SOFT
#define XD3_BUILD_SOFT 1
#endif
#ifndef XD3_BUILD_DEFAULT
#define XD3_BUILD_DEFAULT 1
#endif

#if XD3_DEBUG
#include <stdio.h>
#endif

/* XPRINT.  Debug output and VCDIFF_TOOLS functions report to stderr.  I have used an
 * irregular style to abbreviate [fprintf(stderr, "] as [P(RINT "]. */
#define P    fprintf
#define RINT stderr,

typedef struct _xd3_stream             xd3_stream;
typedef struct _xd3_source             xd3_source;
typedef struct _xd3_hash_cfg           xd3_hash_cfg;
typedef struct _xd3_smatcher           xd3_smatcher;
typedef struct _xd3_rinst              xd3_rinst;
typedef struct _xd3_dinst              xd3_dinst;
typedef struct _xd3_hinst              xd3_hinst;
typedef struct _xd3_rpage              xd3_rpage;
typedef struct _xd3_addr_cache         xd3_addr_cache;
typedef struct _xd3_output             xd3_output;
typedef struct _xd3_desect             xd3_desect;
typedef struct _xd3_iopt_buflist       xd3_iopt_buflist;
typedef struct _xd3_rlist              xd3_rlist;
typedef struct _xd3_sec_type           xd3_sec_type;
typedef struct _xd3_sec_cfg            xd3_sec_cfg;
typedef struct _xd3_sec_stream         xd3_sec_stream;
typedef struct _xd3_config             xd3_config;
typedef struct _xd3_code_table_desc    xd3_code_table_desc;
typedef struct _xd3_code_table_sizes   xd3_code_table_sizes;
typedef struct _xd3_slist              xd3_slist;

/* The stream configuration has three callbacks functions, all of which may be supplied
 * with NULL values.  If config->getblk is provided as NULL, the stream returns
 * XD3_GETSRCBLK. */

typedef void*  (xd3_alloc_func)    (void       *opaque,
				    usize_t      items,
				    usize_t      size);
typedef void   (xd3_free_func)     (void       *opaque,
				    void       *address);

typedef int    (xd3_getblk_func)   (xd3_stream *stream,
				    xd3_source *source,
				    xoff_t      blkno);

/* These are internal functions to delay construction of encoding tables and support
 * alternate code tables.  See the comments & code enabled by GENERIC_ENCODE_TABLES. */

typedef const xd3_dinst* (xd3_code_table_func) (void);
typedef int              (xd3_comp_table_func) (xd3_stream *stream,
						const uint8_t **data,
						usize_t *size);


/* Some junk. */

#ifndef XD3_ASSERT
#if XD3_DEBUG
#define XD3_ASSERT(x) \
    do { if (! (x)) { P(RINT "%s:%d: XD3 assertion failed: %s\n", __FILE__, __LINE__, #x); \
    abort (); } } while (0)
#else
#define XD3_ASSERT(x) (void)0
#endif
#endif

#ifdef __GNUC__
/* As seen on linux-kernel. */
#ifndef max
#define max(x,y) ({ \
	const typeof(x) _x = (x);	\
	const typeof(y) _y = (y);	\
	(void) (&_x == &_y);		\
	_x > _y ? _x : _y; })
#endif

#ifndef min
#define min(x,y) ({ \
	const typeof(x) _x = (x);	\
	const typeof(y) _y = (y);	\
	(void) (&_x == &_y);		\
	_x < _y ? _x : _y; })
#endif
#else
#ifndef max
#define max(x,y) ((x) < (y) ? (y) : (x))
#endif
#ifndef min
#define min(x,y) ((x) < (y) ? (x) : (y))
#endif
#endif

/******************************************************************************************
 PUBLIC ENUMS
 ******************************************************************************************/

/* These are the five ordinary status codes returned by the xd3_encode_input() and
 * xd3_decode_input() state machines. */
typedef enum {

  /* An application must be prepared to handle these five return values from either
   * xd3_encode_input or xd3_decode_input, except in the case of no-source compression, in
   * which case XD3_GETSRCBLK is never returned.  More detailed comments for these are
   * given in xd3_encode_input and xd3_decode_input comments, below. */
  XD3_INPUT     = -17703, /* need input */
  XD3_OUTPUT    = -17704, /* have output */
  XD3_GETSRCBLK = -17705, /* need a block of source input (with no xd3_getblk function),
			   * a chance to do non-blocking read. */
  XD3_GOTHEADER = -17706, /* (decode-only) after the initial VCDIFF & first window header */
  XD3_WINSTART  = -17707, /* notification: returned before a window is processed, giving a
			   * chance to XD3_SKIP_WINDOW or not XD3_SKIP_EMIT that window. */
  XD3_WINFINISH  = -17708, /* notification: returned after encode/decode & output for a window */
  XD3_TOOFARBACK = -17709, /* (encoder only) may be returned by getblk() if the block is too old */
  XD3_INTERNAL   = -17710, /* internal error */
  XD3_INVALID    = -17711, /* invalid config */
  XD3_INVALID_INPUT = -17712, /* invalid input/decoder error */

} xd3_rvalues;

/* special values in config->flags */
typedef enum
{
  XD3_JUST_HDR       = (1 << 1),   /* used by VCDIFF tools, see xdelta3-main.h. */
  XD3_SKIP_WINDOW    = (1 << 2),   /* used by VCDIFF tools, see xdelta3-main.h. */
  XD3_SKIP_EMIT      = (1 << 3),   /* used by VCDIFF tools, see xdelta3-main.h. */
  XD3_FLUSH          = (1 << 4),   /* flush the stream buffer to prepare for xd3_stream_close(). */

  XD3_SEC_DJW        = (1 << 5),   /* use DJW static huffman */
  XD3_SEC_FGK        = (1 << 6),   /* use FGK adaptive huffman */
  XD3_SEC_TYPE       = (XD3_SEC_DJW | XD3_SEC_FGK),

  XD3_SEC_NODATA     = (1 << 7),   /* disable secondary compression of the data section. */
  XD3_SEC_NOINST     = (1 << 8),   /* disable secondary compression of the inst section. */
  XD3_SEC_NOADDR     = (1 << 9),   /* disable secondary compression of the addr section. */

  XD3_SEC_OTHER      = (XD3_SEC_NODATA | XD3_SEC_NOINST | XD3_SEC_NOADDR),

  XD3_ADLER32        = (1 << 10),  /* enable checksum computation in the encoder. */
  XD3_ADLER32_NOVER  = (1 << 11),  /* disable checksum verification in the decoder. */

  XD3_ALT_CODE_TABLE = (1 << 12),  /* for testing the alternate code table encoding. */

  XD3_NOCOMPRESS     = (1 << 13),  /* disable ordinary data compression feature,
				    * only search the source, not the target. */
  XD3_BEGREEDY       = (1 << 14),  /* disable the "1.5-pass algorithm", instead use
				    * greedy matching.  Greedy is off by default. */

  /* 4 bits to set the compression level the same as the command-line
   * setting -1 through -9 (-0 corresponds to the XD3_NOCOMPRESS flag,
   * and is independent of compression level).  This is for
   * convenience, especially with xd3_encode_memory(). */

  XD3_COMPLEVEL_SHIFT = 20,  /* 20 - 24 */
  XD3_COMPLEVEL_MASK = (0xF << XD3_COMPLEVEL_SHIFT),
  XD3_COMPLEVEL_1 = (1 << XD3_COMPLEVEL_SHIFT),
  XD3_COMPLEVEL_3 = (3 << XD3_COMPLEVEL_SHIFT),
  XD3_COMPLEVEL_6 = (6 << XD3_COMPLEVEL_SHIFT),
  XD3_COMPLEVEL_9 = (9 << XD3_COMPLEVEL_SHIFT),

} xd3_flags;

/* The values of this enumeration are set in xd3_config using the
 * smatch_cfg variable.  It can be set to default, slow, fast, etc.,
 * and soft. */
typedef enum
{
  XD3_SMATCH_DEFAULT = 0, /* Flags may contain XD3_COMPLEVEL bits, else default. */
  XD3_SMATCH_SLOW    = 1,
  XD3_SMATCH_FAST    = 2,
  XD3_SMATCH_FASTEST = 3,
  XD3_SMATCH_SOFT    = 4,
} xd3_smatch_cfg;

/******************************************************************************************
 PRIVATE ENUMS
 ******************************************************************************************/

/* stream->match_state is part of the xd3_encode_input state machine for source matching:
 *
 *  1. the XD3_GETSRCBLK block-read mechanism means reentrant matching
 *  2. this state spans encoder windows: a match and end-of-window will continue in the next
 *  3. the initial target byte and source byte are a presumed match, to avoid some computation
 *  in case the inputs are identical.
 */
typedef enum {

  MATCH_TARGET    = 0, /* in this state, attempt to match the start of the target with the
			* previously set source address (initially 0). */
  MATCH_BACKWARD  = 1, /* currently expanding a match backward in the source/target. */
  MATCH_FORWARD   = 2, /* currently expanding a match forward in the source/target. */
  MATCH_SEARCHING = 3, /* currently searching for a match. */

} xd3_match_state;

/* The xd3_encode_input state machine steps through these states in the following order.
 * The matcher is reentrant and returns XD3_INPUT whenever it requires more data.  After
 * receiving XD3_INPUT, if the application reads EOF it should call xd3_stream_close().
 */
typedef enum {

  ENC_INIT      = 0, /* xd3_encode_input has never been called. */
  ENC_INPUT     = 1, /* waiting for xd3_avail_input () to be called. */
  ENC_SEARCH    = 2, /* currently searching for matches. */
  ENC_FLUSH     = 3, /* currently emitting output. */
  ENC_POSTOUT   = 4, /* after an output section. */
  ENC_POSTWIN   = 5, /* after all output sections. */
  ENC_ABORTED   = 6, /* abort. */
} xd3_encode_state;

/* The xd3_decode_input state machine steps through these states in the following order.
 * The matcher is reentrant and returns XD3_INPUT whenever it requires more data.  After
 * receiving XD3_INPUT, if the application reads EOF it should call xd3_stream_close().
 *
 * 0-8:   the VCDIFF header
 * 9-18:  the VCDIFF window header
 * 19-21: the three primary sections: data (which I think should have gone last), inst, addr
 * 22:    producing output: returns XD3_OUTPUT, possibly XD3_GETSRCBLK,
 * 23:    return XD3_WINFINISH, set state=9 to decode more input
 */
typedef enum {

  DEC_VCHEAD   = 0, /* VCDIFF header */
  DEC_HDRIND   = 1, /* header indicator */

  DEC_SECONDID = 2, /* secondary compressor ID */

  DEC_TABLEN   = 3, /* code table length */
  DEC_NEAR     = 4, /* code table near */
  DEC_SAME     = 5, /* code table same */
  DEC_TABDAT   = 6, /* code table data */

  DEC_APPLEN   = 7, /* application data length */
  DEC_APPDAT   = 8, /* application data */

  DEC_WININD   = 9, /* window indicator */

  DEC_CPYLEN   = 10, /* copy window length */
  DEC_CPYOFF   = 11, /* copy window offset */

  DEC_ENCLEN   = 12, /* length of delta encoding */
  DEC_TGTLEN   = 13, /* length of target window */
  DEC_DELIND   = 14, /* delta indicator */

  DEC_DATALEN  = 15, /* length of ADD+RUN data */
  DEC_INSTLEN  = 16, /* length of instruction data */
  DEC_ADDRLEN  = 17, /* length of address data */

  DEC_CKSUM    = 18, /* window checksum */

  DEC_DATA     = 19, /* data section */
  DEC_INST     = 20, /* instruction section */
  DEC_ADDR     = 21, /* address section */

  DEC_EMIT     = 22, /* producing data */

  DEC_FINISH   = 23, /* window finished */

  DEC_ABORTED  = 24, /* xd3_abort_stream */
} xd3_decode_state;

/* An application never sees these internal codes: */  
typedef enum {
  XD3_NOSECOND  = -17708, /* when secondary compression finds no improvement. */
} xd3_pvalues;

/******************************************************************************************
 internal types
 ******************************************************************************************/

/* instruction lists used in the IOPT buffer */
struct _xd3_rlist
{
  xd3_rlist  *next;
  xd3_rlist  *prev;
};

/* the raw encoding of an instruction used in the IOPT buffer */
struct _xd3_rinst
{
  uint8_t     type;
  uint8_t     xtra;
  uint8_t     code1;
  uint8_t     code2;
  usize_t      pos;
  usize_t      size;
  xoff_t      addr;
  xd3_rlist   link;
};

/* the code-table form of an single- or double-instruction */
struct _xd3_dinst
{
  uint8_t     type1;
  uint8_t     size1;
  uint8_t     type2;
  uint8_t     size2;
};

/* the decoded form of a single (half) instruction. */
struct _xd3_hinst
{
  uint8_t     type;
  uint32_t    size;
  uint32_t    addr;
};

/* used by the encoder to buffer output in sections.  list of blocks. */
struct _xd3_output
{
  uint8_t    *base;
  usize_t      next;
  usize_t      avail;
  xd3_output *next_page;
};

/* the VCDIFF address cache, see the RFC */
struct _xd3_addr_cache
{
  uint     s_near;
  uint     s_same;
  usize_t  next_slot;  /* the circular index for near */
  usize_t *near_array; /* array of size s_near        */
  usize_t *same_array; /* array of size s_same*256    */
};

/* the IOPT buffer list is just a list of buffers, which may be allocated
 * during encode when using an unlimited buffer. */
struct _xd3_iopt_buflist
{
  xd3_rinst *buffer;
  xd3_iopt_buflist *next;
};

/* This is the record of a pre-compiled configuration, a subset of xd3_config. */
struct _xd3_smatcher
{
  const char        *name;
  int             (*string_match) (xd3_stream  *stream);
  uint               large_look;
  uint               large_step;
  uint               small_look;
  uint               small_chain;
  uint               small_lchain;
  uint               max_lazy;
  uint               long_enough;
};

/* hash table size & power-of-two hash function. */
struct _xd3_hash_cfg
{
  usize_t           size;
  usize_t           shift;
  usize_t           mask;
};

/* the sprev list */
struct _xd3_slist
{
  usize_t     last_pos;
};

/* a decoder section (data, inst, or addr).  there is an optimization to avoid copying
 * these sections if all the input is available, related to the copied field below.
 * secondation compression uses the copied2 field. */
struct _xd3_desect
{
  const uint8_t *buf;
  const uint8_t *buf_max;
  uint32_t       size;
  usize_t        pos;
  uint8_t       *copied1;
  usize_t        alloc1;
  uint8_t       *copied2;
  usize_t        alloc2;
};

/******************************************************************************************
 public types
 ******************************************************************************************/

/* Settings for the secondary compressor. */
struct _xd3_sec_cfg
{
  int                data_type;     /* Which section. (set automatically) */
  int                ngroups;       /* Number of DJW Huffman groups. */
  int                sector_size;   /* Sector size. */
  int                inefficient;   /* If true, ignore efficiency check [avoid XD3_NOSECOND]. */
};

/* This is the user-visible stream configuration. */
struct _xd3_config
{
  usize_t             winsize;       /* The encoder window size. */
  usize_t             sprevsz;       /* How far back small string matching goes */
  usize_t             iopt_size;     /* entries in the instruction-optimizing buffer */
  usize_t             srcwin_maxsz;  /* srcwin_size grows by a factor of 2 when no matches are found */

  xd3_getblk_func   *getblk;        /* The three callbacks. */
  xd3_alloc_func    *alloc;
  xd3_free_func     *freef;
  void              *opaque;        /* Not used. */
  int                flags;         /* stream->flags are initialized from xd3_config &
				     * never modified by the library.  Use xd3_set_flags
				     * to modify flags settings mid-stream. */

  xd3_sec_cfg       sec_data;       /* Secondary compressor config: data */
  xd3_sec_cfg       sec_inst;       /* Secondary compressor config: inst */
  xd3_sec_cfg       sec_addr;       /* Secondary compressor config: addr */

  xd3_smatch_cfg     smatch_cfg;    /* See enum: use fields below for soft config */
  xd3_smatcher       smatcher_soft;
};

/* The primary source file object. You create one of these objects and initialize the
 * first four fields.  This library maintains the next 5 fields.  The configured getblk
 * implementation is responsible for setting the final 3 fields when called (and/or when
 * XD3_GETSRCBLK is returned).
 */
struct _xd3_source
{
  /* you set */
  xoff_t              size;          /* size of this source */
  usize_t             blksize;       /* block size */
  const char         *name;          /* its name, for debug/print purposes */
  void               *ioh;           /* opaque handle */

  /* getblk sets */
  xoff_t              curblkno;      /* current block number: client sets after getblk request */
  usize_t             onblk;         /* number of bytes on current block: client sets, xd3 verifies */
  const uint8_t      *curblk;        /* current block array: client sets after getblk request */

  /* xd3 sets */
  usize_t             srclen;        /* length of this source window */
  xoff_t              srcbase;       /* offset of this source window in the source itself */
  xoff_t              blocks;        /* the total number of blocks in this source */
  usize_t             cpyoff_blocks; /* offset of copy window in blocks */
  usize_t             cpyoff_blkoff; /* offset of copy window in blocks, remainder */
  xoff_t              getblkno;      /* request block number: xd3 sets current getblk request */
};

/* The primary xd3_stream object, used for encoding and decoding.  You may access only two
 * fields: avail_out, next_out.  Use the methods above to operate on xd3_stream. */
struct _xd3_stream
{
  /* input state */
  const uint8_t    *next_in;          /* next input byte */
  usize_t           avail_in;         /* number of bytes available at next_in */
  xoff_t            total_in;         /* how many bytes in */

  /* output state */
  uint8_t          *next_out;         /* next output byte */
  usize_t           avail_out;        /* number of bytes available at next_out */
  usize_t           space_out;        /* total out space */
  xoff_t            current_window;   /* number of windows encoded/decoded */
  xoff_t            total_out;        /* how many bytes out */

  /* to indicate an error, xd3 sets */
  const char       *msg;              /* last error message, NULL if no error */

  /* source configuration */
  xd3_source       *src;              /* source array */

  /* encoder memory configuration */
  usize_t           winsize;          /* suggested window size */
  usize_t           sprevsz;          /* small string, previous window size (power of 2) */
  usize_t           sprevmask;        /* small string, previous window size mask */
  uint              iopt_size;
  uint              iopt_unlimited;
  uint              srcwin_maxsz;

  /* general configuration */
  xd3_getblk_func  *getblk;           /* set nxtblk, nxtblkno to scanblkno */
  xd3_alloc_func   *alloc;            /* malloc function */
  xd3_free_func    *free;             /* free function */
  void*             opaque;           /* private data object passed to alloc, free, and getblk */
  int               flags;            /* various options */
  
  /* secondary compressor configuration */
  xd3_sec_cfg       sec_data;         /* Secondary compressor config: data */
  xd3_sec_cfg       sec_inst;         /* Secondary compressor config: inst */
  xd3_sec_cfg       sec_addr;         /* Secondary compressor config: addr */

  xd3_smatcher      smatcher;

  usize_t           *large_table;      /* table of large checksums */
  xd3_hash_cfg       large_hash;       /* large hash config */

  usize_t           *small_table;      /* table of small checksums */
  xd3_slist         *small_prev;       /* table of previous offsets, circular linked list */
  int                small_reset;      /* true if small table should be reset */

  xd3_hash_cfg       small_hash;       /* small hash config */
  xd3_addr_cache     acache;           /* the vcdiff address cache */
  xd3_encode_state   enc_state;        /* state of the encoder */

  usize_t            taroff;           /* base offset of the target input */
  usize_t            input_position;   /* current input position */
  usize_t            min_match;        /* current minimum match length, avoids redundent matches */
  usize_t            unencoded_offset; /* current input, first unencoded offset. this value is <= the first
				       * instruction's position in the iopt buffer, if there is at least one
				       * match in the buffer. */

  // SRCWIN
  // these variables plus srcwin_maxsz above (set by config)
  int                srcwin_decided;    /* boolean: true if the srclen,srcbase have been decided. */
  xoff_t             srcwin_cksum_pos;  /* Source checksum position */

  // MATCH
  xd3_match_state    match_state;      /* encoder match state */
  xoff_t             match_srcpos;     /* current match source position relative to srcbase */
  xoff_t             match_minaddr;    /* smallest matching address to set window params
				       * (reset each window xd3_encode_reset) */
  xoff_t             match_maxaddr;    /* largest matching address to set window params
				       * (reset each window xd3_encode_reset) */
  usize_t            match_back;       /* match extends back so far */
  usize_t            match_maxback;    /* match extends back maximum */
  usize_t            match_fwd;        /* match extends forward so far */
  usize_t            match_maxfwd;     /* match extends forward maximum */

  xoff_t             maxsrcaddr;      /* address of the last source match (across windows) */

  uint8_t          *buf_in;           /* for saving buffered input */
  usize_t            buf_avail;        /* amount of saved input */
  const uint8_t    *buf_leftover;     /* leftover content of next_in (i.e., user's buffer) */
  usize_t            buf_leftavail;    /* amount of leftover content */

  xd3_output       *enc_current;      /* current output buffer */
  xd3_output       *enc_free;         /* free output buffers */
  xd3_output       *enc_heads[4];     /* array of encoded outputs: head of chain */
  xd3_output       *enc_tails[4];     /* array of encoded outputs: tail of chain */

  xd3_rlist         iopt_used;        /* instruction optimizing buffer */
  xd3_rlist         iopt_free;
  xd3_rinst        *iout;             /* next single instruction */
  xd3_iopt_buflist *iopt_alloc;

  const uint8_t    *enc_appheader;    /* application header to encode */
  usize_t            enc_appheadsz;    /* application header size */

  /* decoder stuff */
  xd3_decode_state  dec_state;        /* current DEC_XXX value */
  uint              dec_hdr_ind;      /* VCDIFF header indicator */
  uint              dec_win_ind;      /* VCDIFF window indicator */
  uint              dec_del_ind;      /* VCDIFF delta indicator */

  uint8_t           dec_magic[4];     /* First four bytes */
  usize_t           dec_magicbytes;   /* Magic position. */

  uint              dec_secondid;     /* Optional secondary compressor ID. */

  uint32_t          dec_codetblsz;    /* Optional code table: length. */
  uint8_t          *dec_codetbl;      /* Optional code table: storage. */
  usize_t           dec_codetblbytes; /* Optional code table: position. */

  uint32_t          dec_appheadsz;    /* Optional application header: size. */
  uint8_t          *dec_appheader;    /* Optional application header: storage */
  usize_t           dec_appheadbytes; /* Optional application header: position. */

  usize_t            dec_cksumbytes;   /* Optional checksum: position. */
  uint8_t           dec_cksum[4];     /* Optional checksum: storage. */
  uint32_t          dec_adler32;      /* Optional checksum: value. */

  uint32_t           dec_cpylen;       /* length of copy window (VCD_SOURCE or VCD_TARGET) */
  xoff_t             dec_cpyoff;       /* offset of copy window (VCD_SOURCE or VCD_TARGET)  */
  uint32_t           dec_enclen;       /* length of delta encoding */
  uint32_t           dec_tgtlen;       /* length of target window */

#if USE_UINT64
  uint64_t          dec_64part;       /* part of a decoded uint64_t */
#endif
#if USE_UINT32
  uint32_t          dec_32part;       /* part of a decoded uint32_t */
#endif

  xoff_t            dec_winstart;     /* offset of the start of current target window */
  xoff_t            dec_window_count; /* == current_window + 1 in DEC_FINISH */
  usize_t            dec_winbytes;     /* bytes of the three sections so far consumed */
  usize_t            dec_hdrsize;      /* VCDIFF + app header size */

  const uint8_t    *dec_tgtaddrbase;  /* Base of decoded target addresses (addr >= dec_cpylen). */
  const uint8_t    *dec_cpyaddrbase;  /* Base of decoded copy addresses (addr < dec_cpylen). */

  usize_t            dec_position;     /* current decoder position counting the cpylen offset */
  usize_t            dec_maxpos;       /* maximum decoder position counting the cpylen offset */
  xd3_hinst         dec_current1;     /* current instruction */
  xd3_hinst         dec_current2;     /* current instruction */

  uint8_t          *dec_buffer;       /* Decode buffer */
  uint8_t          *dec_lastwin;      /* In case of VCD_TARGET, the last target window. */
  usize_t            dec_lastlen;      /* length of the last target window */
  xoff_t            dec_laststart;    /* offset of the start of last target window */
  usize_t            dec_lastspace;    /* allocated space of last target window, for reuse */

  xd3_desect        inst_sect;        /* staging area for decoding window sections */
  xd3_desect        addr_sect;
  xd3_desect        data_sect;

  xd3_code_table_func       *code_table_func;
  xd3_comp_table_func       *comp_table_func;
  const xd3_dinst           *code_table;
  const xd3_code_table_desc *code_table_desc;
  xd3_dinst                 *code_table_alloc;

  /* secondary compression */
  const xd3_sec_type *sec_type;
  xd3_sec_stream     *sec_stream_d;
  xd3_sec_stream     *sec_stream_i;
  xd3_sec_stream     *sec_stream_a;

  /* statistics */
  xoff_t            n_scpy;
  xoff_t            n_tcpy;
  xoff_t            n_add;
  xoff_t            n_run;

  xoff_t            l_scpy;
  xoff_t            l_tcpy;
  xoff_t            l_add;
  xoff_t            l_run;

  usize_t           i_slots_used;

#if XD3_DEBUG
  usize_t            large_ckcnt;

  /* memory usage */
  usize_t            alloc_cnt;
  usize_t            free_cnt;

  xoff_t            n_emit;
#endif
};

/******************************************************************************************
 PUBLIC FUNCTIONS
 ******************************************************************************************/

/* This function configures an xd3_stream using the provided in-memory input buffer,
 * source buffer, output buffer, and flags.  The output array must be large enough or else
 * ENOSPC will be returned.  This is the simplest in-memory encoding interface. */
int     xd3_encode_memory (const uint8_t *input,
			   usize_t        input_size,
			   const uint8_t *source,
			   usize_t        source_size,
			   uint8_t       *output_buffer,
			   usize_t       *output_size,
			   usize_t        avail_output,
			   int            flags);

/* The reverse of xd3_encode_memory. */
int     xd3_decode_memory (const uint8_t *input,
			   usize_t        input_size,
			   const uint8_t *source,
			   usize_t        source_size,
			   uint8_t       *output_buf,
			   usize_t       *output_size,
			   usize_t        avail_output,
			   int            flags);

/* This function encodes an in-memory input.  Everything else about the xd3_stream is
 * configurable.  The output array must be large enough to hold the output or else ENOSPC
 * is returned.  The source (if any) should be set using xd3_set_source() with a
 * single-block xd3_source.  This calls the underlying non-blocking interface,
 * xd3_encode_input(), handling the necessary input/output states.  This method be
 * considered a reference for any application using xd3_encode_input() directly.
 *
 *   xd3_stream stream;
 *   xd3_config config;
 *   xd3_source src;
 * 
 *   memset (& src, 0, sizeof (src));
 *   memset (& stream, 0, sizeof (stream));
 *   memset (& config, 0, sizeof (config));
 *
 *   if (source != NULL)
 *     {
 *       src.size = source_size;
 *       src.blksize = source_size;
 *       src.curblkno = 0;
 *       src.onblk = source_size;
 *       src.curblk = source;
 *       xd3_set_source(&stream, &src);
 *     }
 *
 *   config.flags = flags;
 *   config.srcwin_maxsz = source_size;
 *   config.winsize = input_size;
 *
 *   ... set smatcher, appheader, encoding-table, compression-level, etc.
 *
 *   xd3_config_stream(&stream, &config);
 *   xd3_encode_stream(&stream, ...);
 *   xd3_free_stream(&stream);
 *
 * DO NOT USE except for testing. These methods are allocate bad buffer sizes.
 */
int     xd3_encode_stream (xd3_stream    *stream,
			   const uint8_t *input,
			   usize_t         input_size,
			   uint8_t       *output,
			   usize_t        *output_size,
			   usize_t         avail_output);

/* The reverse of xd3_encode_stream. */
int     xd3_decode_stream (xd3_stream    *stream,
			   const uint8_t *input,
			   usize_t        input_size,
			   uint8_t       *output,
			   usize_t       *output_size,
			   usize_t        avail_size);

/* This is the non-blocking interface.
 *
 * Handling input and output states is the same for encoding or decoding using the
 * xd3_avail_input() and xd3_consume_output() routines, inlined below.
 *
 * Return values:
 *
 *   XD3_INPUT:  the process requires more input: call xd3_avail_input() then repeat
 *   XD3_OUTPUT: the process has more output: read stream->next_out, stream->avail_out,
 *               then call xd3_consume_output(), then repeat
 *   XD3_GOTHEADER: (decoder-only) notification returned following the VCDIFF header and
 *               first window header.  the decoder may use the header to configure itself.
 *   XD3_WINSTART: a general notification returned once for each window except the 0-th
 *               window, which is implied by XD3_GOTHEADER.  It is recommended to
 *               use a switch-stmt such as:
 *                 ...
 *               again:
 *                 switch ((ret = xd3_decode_input (stream))) {
 *                    case XD3_GOTHEADER: {
 *                      assert(stream->current_window == 0);
 *                      stuff;
 *                    }
 *                    // fallthrough 
 *                    case XD3_WINSTART: {
 *                      something(stream->current_window);
 *                      goto again;
 *                    }
 *                    ...
 *   XD3_WINFINISH: a general notification, following the complete input & output of a
 *               window.  at this point, stream->total_in and stream->total_out are
 *               consistent for either encoding or decoding.
 *   XD3_GETSRCBLK: If the xd3_getblk() callback is NULL, this value is returned to
 *               initiate a non-blocking source read.
 */
int     xd3_decode_input  (xd3_stream    *stream);
int     xd3_encode_input  (xd3_stream    *stream);

/* The xd3_config structure is used to initialize a stream - all data is copied into
 * stream so config may be a temporary variable.  See the [documentation] or comments on
 * the xd3_config structure. */
int     xd3_config_stream (xd3_stream    *stream,
			   xd3_config    *config);

/* Since Xdelta3 doesn't open any files, xd3_close_stream is just an error check that the
 * stream is in a proper state to be closed: this means the encoder is flushed and the
 * decoder is at a window boundary.  The application is responsible for freeing any of the
 * resources it supplied. */
int     xd3_close_stream (xd3_stream    *stream);

/* This unconditionally closes/frees the stream, future close() will succeed. */
void    xd3_abort_stream (xd3_stream    *stream);

/* xd3_free_stream frees all memory allocated for the stream.  The application is
 * responsible for freeing any of the resources it supplied. */
void    xd3_free_stream   (xd3_stream    *stream);

/* This function informs the encoder or decoder that source matching (i.e.,
 * delta-compression) is possible.  For encoding, this should be called before the first
 * xd3_encode_input.  A NULL source is ignored.  For decoding, this should be called
 * before the first window is decoded, but the appheader may be read first
 * (XD3_GOTHEADER).  At this point, consult xd3_decoder_needs_source(), inlined below, to
 * determine if a source is expected by the decoder. */
int     xd3_set_source    (xd3_stream    *stream,
			   xd3_source    *source);

/* This should be called before the first call to xd3_encode_input() to include
 * application-specific data in the VCDIFF header. */
void    xd3_set_appheader (xd3_stream    *stream,
			   const uint8_t *data,
			   usize_t        size);

/* xd3_get_appheader may be called in the decoder after XD3_GOTHEADER.  For convenience,
 * the decoder always adds a single byte padding to the end of the application header,
 * which is set to zero in case the application header is a string. */
int     xd3_get_appheader (xd3_stream     *stream,
			   uint8_t       **data,
			   usize_t        *size);

/* After receiving XD3_GOTHEADER, the decoder should check this function which returns 1
 * if the decoder will require source data. */
int     xd3_decoder_needs_source (xd3_stream *stream);

/* Gives an error string for xdelta3-speficic errors, returns NULL for system errors */
const char* xd3_strerror (int ret);

/* For convenience, zero & initialize the xd3_config structure with specified flags. */
static inline
void    xd3_init_config (xd3_config *config,
			 int         flags)
{
  memset (config, 0, sizeof (*config));
  config->flags = flags;
}

/* This supplies some input to the stream. */
static inline
void    xd3_avail_input  (xd3_stream    *stream,
			  const uint8_t *idata,
			  usize_t         isize)
{
  /* Even if isize is zero, the code expects a non-NULL idata.  Why?  It uses this value
   * to determine whether xd3_avail_input has ever been called.  If xd3_encode_input is
   * called before xd3_avail_input it will return XD3_INPUT right away without allocating
   * a stream->winsize buffer.  This is to avoid an unwanted allocation. */
  XD3_ASSERT (idata != NULL);

  stream->next_in  = idata;
  stream->avail_in = isize;
}

/* This acknowledges receipt of output data, must be called after any XD3_OUTPUT
 * return. */
static inline
void xd3_consume_output (xd3_stream  *stream)
{
  stream->avail_out  = 0;
}

/* These are set for each XD3_WINFINISH return. */
static inline
int     xd3_encoder_used_source (xd3_stream *stream) { return stream->src != NULL && stream->src->srclen > 0; }
static inline
xoff_t  xd3_encoder_srcbase (xd3_stream *stream) { return stream->src->srcbase; }
static inline
usize_t  xd3_encoder_srclen (xd3_stream *stream) { return stream->src->srclen; }

/* Checks for legal flag changes. */
static inline
void xd3_set_flags (xd3_stream *stream, int flags)
{
  /* The bitwise difference should contain only XD3_FLUSH or XD3_SKIP_WINDOW */
  XD3_ASSERT(((flags ^ stream->flags) & ~(XD3_FLUSH | XD3_SKIP_WINDOW)) == 0);
  stream->flags = flags;
}

/* Gives some extra information about the latest library error, if any is known. */
static inline
const char* xd3_errstring (xd3_stream  *stream)
{
  return stream->msg ? stream->msg : "";
}

/* This function tells the number of bytes expected to be set in source->onblk after a
 * getblk request.  This is for convenience of handling a partial last block. */
static inline
usize_t xd3_bytes_on_srcblk (xd3_source *source, xoff_t blkno)
{
  XD3_ASSERT (blkno < source->blocks);

  if (blkno != source->blocks - 1)
    {
      return source->blksize;
    }

  return (usize_t)((source->size - 1) % source->blksize) + 1;
}

#endif /* _XDELTA3_H_ */