source src/indexer.c
Line | Flow | Count | Block(s) | Source |
---|---|---|---|---|
1 | - | /* | ||
2 | - | * Copyright (C) the libgit2 contributors. All rights reserved. | ||
3 | - | * | ||
4 | - | * This file is part of libgit2, distributed under the GNU GPL v2 with | ||
5 | - | * a Linking Exception. For full terms see the included COPYING file. | ||
6 | - | */ | ||
7 | - | |||
8 | - | #include "indexer.h" | ||
9 | - | |||
10 | - | #include "git2/indexer.h" | ||
11 | - | #include "git2/object.h" | ||
12 | - | |||
13 | - | #include "commit.h" | ||
14 | - | #include "tree.h" | ||
15 | - | #include "tag.h" | ||
16 | - | #include "pack.h" | ||
17 | - | #include "mwindow.h" | ||
18 | - | #include "posix.h" | ||
19 | - | #include "pack.h" | ||
20 | - | #include "filebuf.h" | ||
21 | - | #include "oid.h" | ||
22 | - | #include "oidarray.h" | ||
23 | - | #include "oidmap.h" | ||
24 | - | #include "zstream.h" | ||
25 | - | #include "object.h" | ||
26 | - | |||
27 | - | extern git_mutex git__mwindow_mutex; | ||
28 | - | |||
29 | - | size_t git_indexer__max_objects = UINT32_MAX; | ||
30 | - | |||
31 | - | #define UINT31_MAX (0x7FFFFFFF) | ||
32 | - | |||
33 | - | struct entry { | ||
34 | - | git_oid oid; | ||
35 | - | uint32_t crc; | ||
36 | - | uint32_t offset; | ||
37 | - | uint64_t offset_long; | ||
38 | - | }; | ||
39 | - | |||
40 | - | struct git_indexer { | ||
41 | - | unsigned int parsed_header :1, | ||
42 | - | pack_committed :1, | ||
43 | - | have_stream :1, | ||
44 | - | have_delta :1, | ||
45 | - | do_fsync :1, | ||
46 | - | do_verify :1; | ||
47 | - | struct git_pack_header hdr; | ||
48 | - | struct git_pack_file *pack; | ||
49 | - | unsigned int mode; | ||
50 | - | off64_t off; | ||
51 | - | off64_t entry_start; | ||
52 | - | git_object_t entry_type; | ||
53 | - | git_buf entry_data; | ||
54 | - | git_packfile_stream stream; | ||
55 | - | size_t nr_objects; | ||
56 | - | git_vector objects; | ||
57 | - | git_vector deltas; | ||
58 | - | unsigned int fanout[256]; | ||
59 | - | git_hash_ctx hash_ctx; | ||
60 | - | git_oid hash; | ||
61 | - | git_indexer_progress_cb progress_cb; | ||
62 | - | void *progress_payload; | ||
63 | - | char objbuf[8*1024]; | ||
64 | - | |||
65 | - | /* OIDs referenced from pack objects. Used for verification. */ | ||
66 | - | git_oidmap *expected_oids; | ||
67 | - | |||
68 | - | /* Needed to look up objects which we want to inject to fix a thin pack */ | ||
69 | - | git_odb *odb; | ||
70 | - | |||
71 | - | /* Fields for calculating the packfile trailer (hash of everything before it) */ | ||
72 | - | char inbuf[GIT_OID_RAWSZ]; | ||
73 | - | size_t inbuf_len; | ||
74 | - | git_hash_ctx trailer; | ||
75 | - | }; | ||
76 | - | |||
77 | - | struct delta_info { | ||
78 | - | off64_t delta_off; | ||
79 | - | }; | ||
80 | - | |||
81 | 17 | 2 | const git_oid *git_indexer_hash(const git_indexer *idx) | |
82 | - | { | ||
83 | 17 | 2 | return &idx->hash; | |
84 | - | } | ||
85 | - | |||
86 | 98 | 2 | static int parse_header(struct git_pack_header *hdr, struct git_pack_file *pack) | |
87 | - | { | ||
88 | - | int error; | ||
89 | - | git_map map; | ||
90 | - | |||
91 | 98 | 2,3 | if ((error = p_mmap(&map, sizeof(*hdr), GIT_PROT_READ, GIT_MAP_SHARED, pack->mwf.fd, 0)) < 0) | |
92 | ##### | 4 | return error; | |
93 | - | |||
94 | 98 | 5 | memcpy(hdr, map.data, sizeof(*hdr)); | |
95 | 98 | 5 | p_munmap(&map); | |
96 | - | |||
97 | - | /* Verify we recognize this pack file format. */ | ||
98 | 98 | 6,7 | if (hdr->hdr_signature != ntohl(PACK_SIGNATURE)) { | |
99 | ##### | 8 | git_error_set(GIT_ERROR_INDEXER, "wrong pack signature"); | |
100 | ##### | 9 | return -1; | |
101 | - | } | ||
102 | - | |||
103 | 98 | 10-13 | if (!pack_version_ok(hdr->hdr_version)) { | |
104 | ##### | 14 | git_error_set(GIT_ERROR_INDEXER, "wrong pack version"); | |
105 | ##### | 15 | return -1; | |
106 | - | } | ||
107 | - | |||
108 | 98 | 16 | return 0; | |
109 | - | } | ||
110 | - | |||
111 | 37066 | 2 | static int objects_cmp(const void *a, const void *b) | |
112 | - | { | ||
113 | 37066 | 2 | const struct entry *entrya = a; | |
114 | 37066 | 2 | const struct entry *entryb = b; | |
115 | - | |||
116 | 37066 | 2 | return git_oid__cmp(&entrya->oid, &entryb->oid); | |
117 | - | } | ||
118 | - | |||
119 | ##### | 2 | int git_indexer_options_init(git_indexer_options *opts, unsigned int version) | |
120 | - | { | ||
121 | ##### | 2-4 | GIT_INIT_STRUCTURE_FROM_TEMPLATE( | |
122 | - | opts, version, git_indexer_options, GIT_INDEXER_OPTIONS_INIT); | ||
123 | ##### | 5 | return 0; | |
124 | - | } | ||
125 | - | |||
126 | - | #ifndef GIT_DEPRECATE_HARD | ||
127 | ##### | 2 | int git_indexer_init_options(git_indexer_options *opts, unsigned int version) | |
128 | - | { | ||
129 | ##### | 2 | return git_indexer_options_init(opts, version); | |
130 | - | } | ||
131 | - | #endif | ||
132 | - | |||
133 | 99 | 2 | int git_indexer_new( | |
134 | - | git_indexer **out, | ||
135 | - | const char *prefix, | ||
136 | - | unsigned int mode, | ||
137 | - | git_odb *odb, | ||
138 | - | git_indexer_options *in_opts) | ||
139 | - | { | ||
140 | 99 | 2 | git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT; | |
141 | - | git_indexer *idx; | ||
142 | 99 | 2 | git_buf path = GIT_BUF_INIT, tmp_path = GIT_BUF_INIT; | |
143 | - | static const char suff[] = "/pack"; | ||
144 | 99 | 2 | int error, fd = -1; | |
145 | - | |||
146 | 99 | 2 | if (in_opts) | |
147 | 89 | 3 | memcpy(&opts, in_opts, sizeof(opts)); | |
148 | - | |||
149 | 99 | 4 | idx = git__calloc(1, sizeof(git_indexer)); | |
150 | 99 | 5,6 | GIT_ERROR_CHECK_ALLOC(idx); | |
151 | 99 | 7 | idx->odb = odb; | |
152 | 99 | 7 | idx->progress_cb = opts.progress_cb; | |
153 | 99 | 7 | idx->progress_payload = opts.progress_cb_payload; | |
154 | 99 | 7-9 | idx->mode = mode ? mode : GIT_PACK_FILE_MODE; | |
155 | 99 | 10 | git_buf_init(&idx->entry_data, 0); | |
156 | - | |||
157 | 99 | 11-14 | if ((error = git_hash_ctx_init(&idx->hash_ctx)) < 0 || | |
158 | 99 | 13,15,16 | (error = git_hash_ctx_init(&idx->trailer)) < 0 || | |
159 | 99 | 15 | (error = git_oidmap_new(&idx->expected_oids)) < 0) | |
160 | - | goto cleanup; | ||
161 | - | |||
162 | 99 | 17 | idx->do_verify = opts.verify; | |
163 | - | |||
164 | 99 | 17 | if (git_repository__fsync_gitdir) | |
165 | 1 | 18 | idx->do_fsync = 1; | |
166 | - | |||
167 | 99 | 19 | error = git_buf_joinpath(&path, prefix, suff); | |
168 | 99 | 20 | if (error < 0) | |
169 | ##### | 21 | goto cleanup; | |
170 | - | |||
171 | 99 | 22,23 | fd = git_futils_mktmp(&tmp_path, git_buf_cstr(&path), idx->mode); | |
172 | 99 | 24 | git_buf_dispose(&path); | |
173 | 99 | 25 | if (fd < 0) | |
174 | ##### | 26 | goto cleanup; | |
175 | - | |||
176 | 99 | 27,28 | error = git_packfile_alloc(&idx->pack, git_buf_cstr(&tmp_path)); | |
177 | 99 | 29 | git_buf_dispose(&tmp_path); | |
178 | - | |||
179 | 99 | 30 | if (error < 0) | |
180 | ##### | 31 | goto cleanup; | |
181 | - | |||
182 | 99 | 32 | idx->pack->mwf.fd = fd; | |
183 | 99 | 32,33 | if ((error = git_mwindow_file_register(&idx->pack->mwf)) < 0) | |
184 | ##### | 34 | goto cleanup; | |
185 | - | |||
186 | 99 | 35 | *out = idx; | |
187 | 99 | 35 | return 0; | |
188 | - | |||
189 | - | cleanup: | ||
190 | ##### | 36 | if (fd != -1) | |
191 | ##### | 37 | p_close(fd); | |
192 | - | |||
193 | ##### | 38,39 | if (git_buf_len(&tmp_path) > 0) | |
194 | ##### | 40,41 | p_unlink(git_buf_cstr(&tmp_path)); | |
195 | - | |||
196 | ##### | 42 | if (idx->pack != NULL) | |
197 | ##### | 43 | p_unlink(idx->pack->pack_name); | |
198 | - | |||
199 | ##### | 44 | git_buf_dispose(&path); | |
200 | ##### | 45 | git_buf_dispose(&tmp_path); | |
201 | ##### | 46 | git__free(idx); | |
202 | ##### | 47 | return -1; | |
203 | - | } | ||
204 | - | |||
205 | 1 | 2 | void git_indexer__set_fsync(git_indexer *idx, int do_fsync) | |
206 | - | { | ||
207 | 1 | 2 | idx->do_fsync = !!do_fsync; | |
208 | 1 | 2 | } | |
209 | - | |||
210 | - | /* Try to store the delta so we can try to resolve it later */ | ||
211 | 696 | 2 | static int store_delta(git_indexer *idx) | |
212 | - | { | ||
213 | - | struct delta_info *delta; | ||
214 | - | |||
215 | 696 | 2 | delta = git__calloc(1, sizeof(struct delta_info)); | |
216 | 696 | 3,4 | GIT_ERROR_CHECK_ALLOC(delta); | |
217 | 696 | 5 | delta->delta_off = idx->entry_start; | |
218 | - | |||
219 | 696 | 5,6 | if (git_vector_insert(&idx->deltas, delta) < 0) | |
220 | ##### | 7 | return -1; | |
221 | - | |||
222 | 696 | 8 | return 0; | |
223 | - | } | ||
224 | - | |||
225 | 4585 | 2 | static int hash_header(git_hash_ctx *ctx, off64_t len, git_object_t type) | |
226 | - | { | ||
227 | - | char buffer[64]; | ||
228 | - | size_t hdrlen; | ||
229 | - | int error; | ||
230 | - | |||
231 | 4585 | 2,3 | if ((error = git_odb__format_object_header(&hdrlen, | |
232 | - | buffer, sizeof(buffer), (size_t)len, type)) < 0) | ||
233 | ##### | 4 | return error; | |
234 | - | |||
235 | 4585 | 5 | return git_hash_update(ctx, buffer, hdrlen); | |
236 | - | } | ||
237 | - | |||
238 | 4593 | 2 | static int hash_object_stream(git_indexer*idx, git_packfile_stream *stream) | |
239 | - | { | ||
240 | - | ssize_t read; | ||
241 | - | |||
242 | 4593 | 2-4 | assert(idx && stream); | |
243 | - | |||
244 | - | do { | ||
245 | 9230 | 5,6 | if ((read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf))) < 0) | |
246 | 8 | 7 | break; | |
247 | - | |||
248 | 9222 | 8 | if (idx->do_verify) | |
249 | 6 | 9 | git_buf_put(&idx->entry_data, idx->objbuf, read); | |
250 | - | |||
251 | 9222 | 10 | git_hash_update(&idx->hash_ctx, idx->objbuf, read); | |
252 | 9222 | 11 | } while (read > 0); | |
253 | - | |||
254 | 4593 | 12 | if (read < 0) | |
255 | 8 | 13 | return (int)read; | |
256 | - | |||
257 | 4585 | 14 | return 0; | |
258 | - | } | ||
259 | - | |||
260 | - | /* In order to create the packfile stream, we need to skip over the delta base description */ | ||
261 | 697 | 2 | static int advance_delta_offset(git_indexer *idx, git_object_t type) | |
262 | - | { | ||
263 | 697 | 2 | git_mwindow *w = NULL; | |
264 | - | |||
265 | 697 | 2-4 | assert(type == GIT_OBJECT_REF_DELTA || type == GIT_OBJECT_OFS_DELTA); | |
266 | - | |||
267 | 697 | 5 | if (type == GIT_OBJECT_REF_DELTA) { | |
268 | 138 | 6 | idx->off += GIT_OID_RAWSZ; | |
269 | - | } else { | ||
270 | - | off64_t base_off; | ||
271 | 559 | 7 | int error = get_delta_base(&base_off, idx->pack, &w, &idx->off, type, idx->entry_start); | |
272 | 559 | 8 | git_mwindow_close(&w); | |
273 | 559 | 9 | if (error < 0) | |
274 | 559 | 10,11 | return error; | |
275 | - | } | ||
276 | - | |||
277 | 697 | 12 | return 0; | |
278 | - | } | ||
279 | - | |||
280 | - | /* Read from the stream and discard any output */ | ||
281 | 821 | 2 | static int read_object_stream(git_indexer *idx, git_packfile_stream *stream) | |
282 | - | { | ||
283 | - | ssize_t read; | ||
284 | - | |||
285 | 821 | 2,3 | assert(stream); | |
286 | - | |||
287 | - | do { | ||
288 | 1528 | 4 | read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf)); | |
289 | 1528 | 5 | } while (read > 0); | |
290 | - | |||
291 | 821 | 6 | if (read < 0) | |
292 | 125 | 7 | return (int)read; | |
293 | - | |||
294 | 696 | 8 | return 0; | |
295 | - | } | ||
296 | - | |||
297 | 5276 | 2 | static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, off64_t start, off64_t size) | |
298 | - | { | ||
299 | - | void *ptr; | ||
300 | - | uint32_t crc; | ||
301 | - | unsigned int left, len; | ||
302 | 5276 | 2 | git_mwindow *w = NULL; | |
303 | - | |||
304 | 5276 | 2 | crc = crc32(0L, Z_NULL, 0); | |
305 | 10552 | 3,9 | while (size) { | |
306 | 5276 | 4 | ptr = git_mwindow_open(mwf, &w, start, (size_t)size, &left); | |
307 | 5276 | 5 | if (ptr == NULL) | |
308 | ##### | 6 | return -1; | |
309 | - | |||
310 | 5276 | 7 | len = min(left, (unsigned int)size); | |
311 | 5276 | 7 | crc = crc32(crc, ptr, len); | |
312 | 5276 | 8 | size -= len; | |
313 | 5276 | 8 | start += len; | |
314 | 5276 | 8 | git_mwindow_close(&w); | |
315 | - | } | ||
316 | - | |||
317 | 5276 | 10 | *crc_out = htonl(crc); | |
318 | 5276 | 11 | return 0; | |
319 | - | } | ||
320 | - | |||
321 | 2 | 2 | static int add_expected_oid(git_indexer *idx, const git_oid *oid) | |
322 | - | { | ||
323 | - | /* | ||
324 | - | * If we know about that object because it is stored in our ODB or | ||
325 | - | * because we have already processed it as part of our pack file, we do | ||
326 | - | * not have to expect it. | ||
327 | - | */ | ||
328 | 2 | 2-4,6 | if ((!idx->odb || !git_odb_exists(idx->odb, oid)) && | |
329 | 2 | 5,8 | !git_oidmap_exists(idx->pack->idx_cache, oid) && | |
330 | 2 | 7 | !git_oidmap_exists(idx->expected_oids, oid)) { | |
331 | 2 | 9 | git_oid *dup = git__malloc(sizeof(*oid)); | |
332 | 2 | 10,11 | GIT_ERROR_CHECK_ALLOC(dup); | |
333 | 2 | 12 | git_oid_cpy(dup, oid); | |
334 | 2 | 13 | return git_oidmap_set(idx->expected_oids, dup, dup); | |
335 | - | } | ||
336 | - | |||
337 | ##### | 14 | return 0; | |
338 | - | } | ||
339 | - | |||
340 | 5 | 2 | static int check_object_connectivity(git_indexer *idx, const git_rawobj *obj) | |
341 | - | { | ||
342 | - | git_object *object; | ||
343 | - | git_oid *expected; | ||
344 | - | int error; | ||
345 | - | |||
346 | 5 | 2,3 | if (obj->type != GIT_OBJECT_BLOB && | |
347 | 1 | 3,4 | obj->type != GIT_OBJECT_TREE && | |
348 | ##### | 4,5 | obj->type != GIT_OBJECT_COMMIT && | |
349 | ##### | 5 | obj->type != GIT_OBJECT_TAG) | |
350 | ##### | 6 | return 0; | |
351 | - | |||
352 | 5 | 7,8 | if ((error = git_object__from_raw(&object, obj->data, obj->len, obj->type)) < 0) | |
353 | ##### | 9 | goto out; | |
354 | - | |||
355 | 5 | 10,11 | if ((expected = git_oidmap_get(idx->expected_oids, &object->cached.oid)) != NULL) { | |
356 | 1 | 12 | git_oidmap_delete(idx->expected_oids, &object->cached.oid); | |
357 | 1 | 13 | git__free(expected); | |
358 | - | } | ||
359 | - | |||
360 | - | /* | ||
361 | - | * Check whether this is a known object. If so, we can just continue as | ||
362 | - | * we assume that the ODB has a complete graph. | ||
363 | - | */ | ||
364 | 5 | 14-16 | if (idx->odb && git_odb_exists(idx->odb, &object->cached.oid)) | |
365 | ##### | 17 | return 0; | |
366 | - | |||
367 | 5 | 18 | switch (obj->type) { | |
368 | - | case GIT_OBJECT_TREE: | ||
369 | - | { | ||
370 | 1 | 19 | git_tree *tree = (git_tree *) object; | |
371 | - | git_tree_entry *entry; | ||
372 | - | size_t i; | ||
373 | - | |||
374 | 3 | 19,23-25 | git_array_foreach(tree->entries, i, entry) | |
375 | 2 | 20,21 | if (add_expected_oid(idx, entry->oid) < 0) | |
376 | ##### | 22 | goto out; | |
377 | - | |||
378 | 1 | 26 | break; | |
379 | - | } | ||
380 | - | case GIT_OBJECT_COMMIT: | ||
381 | - | { | ||
382 | ##### | 27 | git_commit *commit = (git_commit *) object; | |
383 | - | git_oid *parent_oid; | ||
384 | - | size_t i; | ||
385 | - | |||
386 | ##### | 27,31-33 | git_array_foreach(commit->parent_ids, i, parent_oid) | |
387 | ##### | 28,29 | if (add_expected_oid(idx, parent_oid) < 0) | |
388 | ##### | 30 | goto out; | |
389 | - | |||
390 | ##### | 34,35 | if (add_expected_oid(idx, &commit->tree_id) < 0) | |
391 | ##### | 36 | goto out; | |
392 | - | |||
393 | ##### | 37 | break; | |
394 | - | } | ||
395 | - | case GIT_OBJECT_TAG: | ||
396 | - | { | ||
397 | ##### | 38 | git_tag *tag = (git_tag *) object; | |
398 | - | |||
399 | ##### | 38,39 | if (add_expected_oid(idx, &tag->target) < 0) | |
400 | ##### | 40 | goto out; | |
401 | - | |||
402 | ##### | 41 | break; | |
403 | - | } | ||
404 | - | case GIT_OBJECT_BLOB: | ||
405 | - | default: | ||
406 | 4 | 42 | break; | |
407 | - | } | ||
408 | - | |||
409 | - | out: | ||
410 | 5 | 43 | git_object_free(object); | |
411 | - | |||
412 | 5 | 44 | return error; | |
413 | - | } | ||
414 | - | |||
415 | 4585 | 2 | static int store_object(git_indexer *idx) | |
416 | - | { | ||
417 | - | int i, error; | ||
418 | - | git_oid oid; | ||
419 | - | struct entry *entry; | ||
420 | - | off64_t entry_size; | ||
421 | - | struct git_pack_entry *pentry; | ||
422 | 4585 | 2 | off64_t entry_start = idx->entry_start; | |
423 | - | |||
424 | 4585 | 2 | entry = git__calloc(1, sizeof(*entry)); | |
425 | 4585 | 3,4 | GIT_ERROR_CHECK_ALLOC(entry); | |
426 | - | |||
427 | 4585 | 5 | pentry = git__calloc(1, sizeof(struct git_pack_entry)); | |
428 | 4585 | 6,7 | GIT_ERROR_CHECK_ALLOC(pentry); | |
429 | - | |||
430 | 4585 | 8 | git_hash_final(&oid, &idx->hash_ctx); | |
431 | 4585 | 9 | entry_size = idx->off - entry_start; | |
432 | 4585 | 9 | if (entry_start > UINT31_MAX) { | |
433 | ##### | 10 | entry->offset = UINT32_MAX; | |
434 | ##### | 10 | entry->offset_long = entry_start; | |
435 | - | } else { | ||
436 | 4585 | 11 | entry->offset = (uint32_t)entry_start; | |
437 | - | } | ||
438 | - | |||
439 | 4585 | 12 | if (idx->do_verify) { | |
440 | 3 | 13,13,13 | git_rawobj rawobj = { | |
441 | 3 | 13 | idx->entry_data.ptr, | |
442 | 3 | 13 | idx->entry_data.size, | |
443 | 3 | 13 | idx->entry_type | |
444 | - | }; | ||
445 | - | |||
446 | 3 | 13,14 | if ((error = check_object_connectivity(idx, &rawobj)) < 0) | |
447 | 3 | 15,16 | goto on_error; | |
448 | - | } | ||
449 | - | |||
450 | 4585 | 17 | git_oid_cpy(&pentry->sha1, &oid); | |
451 | 4585 | 18 | pentry->offset = entry_start; | |
452 | - | |||
453 | 4585 | 18,19 | if (git_oidmap_exists(idx->pack->idx_cache, &pentry->sha1)) { | |
454 | ##### | 20,21 | git_error_set(GIT_ERROR_INDEXER, "duplicate object %s found in pack", git_oid_tostr_s(&pentry->sha1)); | |
455 | ##### | 22 | git__free(pentry); | |
456 | ##### | 39 | goto on_error; | |
457 | - | } | ||
458 | - | |||
459 | 4585 | 23,24 | if ((error = git_oidmap_set(idx->pack->idx_cache, &pentry->sha1, pentry)) < 0) { | |
460 | ##### | 25 | git__free(pentry); | |
461 | ##### | 26 | git_error_set_oom(); | |
462 | ##### | 27 | goto on_error; | |
463 | - | } | ||
464 | - | |||
465 | 4585 | 28 | git_oid_cpy(&entry->oid, &oid); | |
466 | - | |||
467 | 4585 | 29,30 | if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0) | |
468 | ##### | 31 | goto on_error; | |
469 | - | |||
470 | - | /* Add the object to the list */ | ||
471 | 4585 | 32,33 | if (git_vector_insert(&idx->objects, entry) < 0) | |
472 | ##### | 34 | goto on_error; | |
473 | - | |||
474 | 606709 | 35-37 | for (i = oid.id[0]; i < 256; ++i) { | |
475 | 602124 | 36 | idx->fanout[i]++; | |
476 | - | } | ||
477 | - | |||
478 | 4585 | 38 | return 0; | |
479 | - | |||
480 | - | on_error: | ||
481 | ##### | 40 | git__free(entry); | |
482 | - | |||
483 | ##### | 41 | return -1; | |
484 | - | } | ||
485 | - | |||
486 | 2 | 2 | GIT_INLINE(bool) has_entry(git_indexer *idx, git_oid *id) | |
487 | - | { | ||
488 | 2 | 2 | return git_oidmap_exists(idx->pack->idx_cache, id); | |
489 | - | } | ||
490 | - | |||
491 | 693 | 2 | static int save_entry(git_indexer *idx, struct entry *entry, struct git_pack_entry *pentry, off64_t entry_start) | |
492 | - | { | ||
493 | - | int i; | ||
494 | - | |||
495 | 693 | 2 | if (entry_start > UINT31_MAX) { | |
496 | ##### | 3 | entry->offset = UINT32_MAX; | |
497 | ##### | 3 | entry->offset_long = entry_start; | |
498 | - | } else { | ||
499 | 693 | 4 | entry->offset = (uint32_t)entry_start; | |
500 | - | } | ||
501 | - | |||
502 | 693 | 5 | pentry->offset = entry_start; | |
503 | - | |||
504 | 693 | 5,6,8 | if (git_oidmap_exists(idx->pack->idx_cache, &pentry->sha1) || | |
505 | 693 | 7 | git_oidmap_set(idx->pack->idx_cache, &pentry->sha1, pentry) < 0) { | |
506 | ##### | 9 | git_error_set(GIT_ERROR_INDEXER, "cannot insert object into pack"); | |
507 | ##### | 10 | return -1; | |
508 | - | } | ||
509 | - | |||
510 | - | /* Add the object to the list */ | ||
511 | 693 | 11,12 | if (git_vector_insert(&idx->objects, entry) < 0) | |
512 | ##### | 13 | return -1; | |
513 | - | |||
514 | 82996 | 14-16 | for (i = entry->oid.id[0]; i < 256; ++i) { | |
515 | 82303 | 15 | idx->fanout[i]++; | |
516 | - | } | ||
517 | - | |||
518 | 693 | 17 | return 0; | |
519 | - | } | ||
520 | - | |||
521 | 691 | 2 | static int hash_and_save(git_indexer *idx, git_rawobj *obj, off64_t entry_start) | |
522 | - | { | ||
523 | - | git_oid oid; | ||
524 | - | size_t entry_size; | ||
525 | - | struct entry *entry; | ||
526 | 691 | 2 | struct git_pack_entry *pentry = NULL; | |
527 | - | |||
528 | 691 | 2 | entry = git__calloc(1, sizeof(*entry)); | |
529 | 691 | 3,4 | GIT_ERROR_CHECK_ALLOC(entry); | |
530 | - | |||
531 | 691 | 5,6 | if (git_odb__hashobj(&oid, obj) < 0) { | |
532 | ##### | 7 | git_error_set(GIT_ERROR_INDEXER, "failed to hash object"); | |
533 | ##### | 18 | goto on_error; | |
534 | - | } | ||
535 | - | |||
536 | 691 | 8 | pentry = git__calloc(1, sizeof(struct git_pack_entry)); | |
537 | 691 | 9,10 | GIT_ERROR_CHECK_ALLOC(pentry); | |
538 | - | |||
539 | 691 | 11 | git_oid_cpy(&pentry->sha1, &oid); | |
540 | 691 | 12 | git_oid_cpy(&entry->oid, &oid); | |
541 | 691 | 13 | entry->crc = crc32(0L, Z_NULL, 0); | |
542 | - | |||
543 | 691 | 14 | entry_size = (size_t)(idx->off - entry_start); | |
544 | 691 | 14,15 | if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0) | |
545 | ##### | 16 | goto on_error; | |
546 | - | |||
547 | 691 | 17 | return save_entry(idx, entry, pentry, entry_start); | |
548 | - | |||
549 | - | on_error: | ||
550 | ##### | 19 | git__free(pentry); | |
551 | ##### | 20 | git__free(entry); | |
552 | ##### | 21 | git__free(obj->data); | |
553 | ##### | 22 | return -1; | |
554 | - | } | ||
555 | - | |||
556 | 6070 | 2 | static int do_progress_callback(git_indexer *idx, git_indexer_progress *stats) | |
557 | - | { | ||
558 | 6070 | 2 | if (idx->progress_cb) | |
559 | 1035 | 3,4 | return git_error_set_after_callback_function( | |
560 | 1035 | 3 | idx->progress_cb(stats, idx->progress_payload), | |
561 | - | "indexer progress"); | ||
562 | 5035 | 5 | return 0; | |
563 | - | } | ||
564 | - | |||
565 | - | /* Hash everything but the last 20B of input */ | ||
566 | 4784 | 2 | static void hash_partially(git_indexer *idx, const uint8_t *data, size_t size) | |
567 | - | { | ||
568 | - | size_t to_expell, to_keep; | ||
569 | - | |||
570 | 4784 | 2 | if (size == 0) | |
571 | ##### | 3 | return; | |
572 | - | |||
573 | - | /* Easy case, dump the buffer and the data minus the last 20 bytes */ | ||
574 | 4784 | 4 | if (size >= GIT_OID_RAWSZ) { | |
575 | 1988 | 5 | git_hash_update(&idx->trailer, idx->inbuf, idx->inbuf_len); | |
576 | 1988 | 6 | git_hash_update(&idx->trailer, data, size - GIT_OID_RAWSZ); | |
577 | - | |||
578 | 1988 | 7 | data += size - GIT_OID_RAWSZ; | |
579 | 1988 | 7 | memcpy(idx->inbuf, data, GIT_OID_RAWSZ); | |
580 | 1988 | 7 | idx->inbuf_len = GIT_OID_RAWSZ; | |
581 | 1988 | 7 | return; | |
582 | - | } | ||
583 | - | |||
584 | - | /* We can just append */ | ||
585 | 2796 | 8 | if (idx->inbuf_len + size <= GIT_OID_RAWSZ) { | |
586 | 130 | 9 | memcpy(idx->inbuf + idx->inbuf_len, data, size); | |
587 | 130 | 9 | idx->inbuf_len += size; | |
588 | 130 | 9 | return; | |
589 | - | } | ||
590 | - | |||
591 | - | /* We need to partially drain the buffer and then append */ | ||
592 | 2666 | 10 | to_keep = GIT_OID_RAWSZ - size; | |
593 | 2666 | 10 | to_expell = idx->inbuf_len - to_keep; | |
594 | - | |||
595 | 2666 | 10 | git_hash_update(&idx->trailer, idx->inbuf, to_expell); | |
596 | - | |||
597 | 2666 | 11 | memmove(idx->inbuf, idx->inbuf + to_expell, to_keep); | |
598 | 2666 | 11 | memcpy(idx->inbuf + to_keep, data, size); | |
599 | 2666 | 11 | idx->inbuf_len += size - to_expell; | |
600 | - | } | ||
601 | - | |||
602 | 4791 | 2 | static int write_at(git_indexer *idx, const void *data, off64_t offset, size_t size) | |
603 | - | { | ||
604 | 4791 | 2 | git_file fd = idx->pack->mwf.fd; | |
605 | - | size_t mmap_alignment; | ||
606 | - | size_t page_offset; | ||
607 | - | off64_t page_start; | ||
608 | - | unsigned char *map_data; | ||
609 | - | git_map map; | ||
610 | - | int error; | ||
611 | - | |||
612 | 4791 | 2-4 | assert(data && size); | |
613 | - | |||
614 | 4791 | 5,6 | if ((error = git__mmap_alignment(&mmap_alignment)) < 0) | |
615 | ##### | 7 | return error; | |
616 | - | |||
617 | - | /* the offset needs to be at the mmap boundary for the platform */ | ||
618 | 4791 | 8 | page_offset = offset % mmap_alignment; | |
619 | 4791 | 8 | page_start = offset - page_offset; | |
620 | - | |||
621 | 4791 | 8,9 | if ((error = p_mmap(&map, page_offset + size, GIT_PROT_WRITE, GIT_MAP_SHARED, fd, page_start)) < 0) | |
622 | ##### | 10 | return error; | |
623 | - | |||
624 | 4791 | 11 | map_data = (unsigned char *)map.data; | |
625 | 4791 | 11 | memcpy(map_data + page_offset, data, size); | |
626 | 4791 | 11 | p_munmap(&map); | |
627 | - | |||
628 | 4791 | 12 | return 0; | |
629 | - | } | ||
630 | - | |||
631 | 4789 | 2 | static int append_to_pack(git_indexer *idx, const void *data, size_t size) | |
632 | - | { | ||
633 | - | off64_t new_size; | ||
634 | - | size_t mmap_alignment; | ||
635 | - | size_t page_offset; | ||
636 | - | off64_t page_start; | ||
637 | 4789 | 2 | off64_t current_size = idx->pack->mwf.size; | |
638 | 4789 | 2 | int fd = idx->pack->mwf.fd; | |
639 | - | int error; | ||
640 | - | |||
641 | 4789 | 2 | if (!size) | |
642 | ##### | 3 | return 0; | |
643 | - | |||
644 | 4789 | 4,5 | if ((error = git__mmap_alignment(&mmap_alignment)) < 0) | |
645 | ##### | 6 | return error; | |
646 | - | |||
647 | - | /* Write a single byte to force the file system to allocate space now or | ||
648 | - | * report an error, since we can't report errors when writing using mmap. | ||
649 | - | * Round the size up to the nearest page so that we only need to perform file | ||
650 | - | * I/O when we add a page, instead of whenever we write even a single byte. */ | ||
651 | 4789 | 7 | new_size = current_size + size; | |
652 | 4789 | 7 | page_offset = new_size % mmap_alignment; | |
653 | 4789 | 7 | page_start = new_size - page_offset; | |
654 | - | |||
655 | 4789 | 7,8,10 | if (p_lseek(fd, page_start + mmap_alignment - 1, SEEK_SET) < 0 || | |
656 | 4789 | 9 | p_write(idx->pack->mwf.fd, data, 1) < 0) { | |
657 | ##### | 11 | git_error_set(GIT_ERROR_OS, "cannot extend packfile '%s'", idx->pack->pack_name); | |
658 | ##### | 12 | return -1; | |
659 | - | } | ||
660 | - | |||
661 | 4789 | 13 | return write_at(idx, data, idx->pack->mwf.size, size); | |
662 | - | } | ||
663 | - | |||
664 | 10026 | 2 | static int read_stream_object(git_indexer *idx, git_indexer_progress *stats) | |
665 | - | { | ||
666 | 10026 | 2 | git_packfile_stream *stream = &idx->stream; | |
667 | 10026 | 2 | off64_t entry_start = idx->off; | |
668 | - | size_t entry_size; | ||
669 | - | git_object_t type; | ||
670 | 10026 | 2 | git_mwindow *w = NULL; | |
671 | - | int error; | ||
672 | - | |||
673 | 10026 | 2 | if (idx->pack->mwf.size <= idx->off + 20) | |
674 | 4612 | 3 | return GIT_EBUFS; | |
675 | - | |||
676 | 5414 | 4 | if (!idx->have_stream) { | |
677 | 5282 | 5 | error = git_packfile_unpack_header(&entry_size, &type, &idx->pack->mwf, &w, &idx->off); | |
678 | 5282 | 6 | if (error == GIT_EBUFS) { | |
679 | ##### | 7 | idx->off = entry_start; | |
680 | ##### | 7 | return error; | |
681 | - | } | ||
682 | 5282 | 8 | if (error < 0) | |
683 | ##### | 9 | return error; | |
684 | - | |||
685 | 5282 | 10 | git_mwindow_close(&w); | |
686 | 5282 | 11 | idx->entry_start = entry_start; | |
687 | 5282 | 11 | git_hash_init(&idx->hash_ctx); | |
688 | 5282 | 12 | git_buf_clear(&idx->entry_data); | |
689 | - | |||
690 | 5282 | 13,14 | if (type == GIT_OBJECT_REF_DELTA || type == GIT_OBJECT_OFS_DELTA) { | |
691 | 697 | 15 | error = advance_delta_offset(idx, type); | |
692 | 697 | 16 | if (error == GIT_EBUFS) { | |
693 | ##### | 17 | idx->off = entry_start; | |
694 | ##### | 17 | return error; | |
695 | - | } | ||
696 | 697 | 18 | if (error < 0) | |
697 | ##### | 19 | return error; | |
698 | - | |||
699 | 697 | 20 | idx->have_delta = 1; | |
700 | - | } else { | ||
701 | 4585 | 21 | idx->have_delta = 0; | |
702 | - | |||
703 | 4585 | 21 | error = hash_header(&idx->hash_ctx, entry_size, type); | |
704 | 4585 | 22 | if (error < 0) | |
705 | ##### | 23 | return error; | |
706 | - | } | ||
707 | - | |||
708 | 5282 | 24 | idx->have_stream = 1; | |
709 | 5282 | 24 | idx->entry_type = type; | |
710 | - | |||
711 | 5282 | 24 | error = git_packfile_stream_open(stream, idx->pack, idx->off); | |
712 | 5282 | 25 | if (error < 0) | |
713 | ##### | 26 | return error; | |
714 | - | } | ||
715 | - | |||
716 | 5414 | 27 | if (idx->have_delta) { | |
717 | 821 | 28 | error = read_object_stream(idx, stream); | |
718 | - | } else { | ||
719 | 4593 | 29 | error = hash_object_stream(idx, stream); | |
720 | - | } | ||
721 | - | |||
722 | 5414 | 30 | idx->off = stream->curpos; | |
723 | 5414 | 30 | if (error == GIT_EBUFS) | |
724 | 133 | 31 | return error; | |
725 | - | |||
726 | - | /* We want to free the stream reasorces no matter what here */ | ||
727 | 5281 | 32 | idx->have_stream = 0; | |
728 | 5281 | 32 | git_packfile_stream_dispose(stream); | |
729 | - | |||
730 | 5281 | 33 | if (error < 0) | |
731 | ##### | 34 | return error; | |
732 | - | |||
733 | 5281 | 35 | if (idx->have_delta) { | |
734 | 696 | 36 | error = store_delta(idx); | |
735 | - | } else { | ||
736 | 4585 | 37 | error = store_object(idx); | |
737 | - | } | ||
738 | - | |||
739 | 5281 | 38 | if (error < 0) | |
740 | ##### | 39 | return error; | |
741 | - | |||
742 | 5281 | 40 | if (!idx->have_delta) { | |
743 | 4585 | 41 | stats->indexed_objects++; | |
744 | - | } | ||
745 | 5281 | 42 | stats->received_objects++; | |
746 | - | |||
747 | 5281 | 42,43 | if ((error = do_progress_callback(idx, stats)) != 0) | |
748 | 3 | 44 | return error; | |
749 | - | |||
750 | 5278 | 45 | return 0; | |
751 | - | } | ||
752 | - | |||
753 | 4783 | 2 | int git_indexer_append(git_indexer *idx, const void *data, size_t size, git_indexer_progress *stats) | |
754 | - | { | ||
755 | 4783 | 2 | int error = -1; | |
756 | 4783 | 2 | struct git_pack_header *hdr = &idx->hdr; | |
757 | 4783 | 2 | git_mwindow_file *mwf = &idx->pack->mwf; | |
758 | - | |||
759 | 4783 | 2-5 | assert(idx && data && stats); | |
760 | - | |||
761 | 4783 | 6,7 | if ((error = append_to_pack(idx, data, size)) < 0) | |
762 | ##### | 8 | return error; | |
763 | - | |||
764 | 4783 | 9 | hash_partially(idx, data, (int)size); | |
765 | - | |||
766 | - | /* Make sure we set the new size of the pack */ | ||
767 | 4783 | 10 | idx->pack->mwf.size += size; | |
768 | - | |||
769 | 4783 | 10 | if (!idx->parsed_header) { | |
770 | - | unsigned int total_objects; | ||
771 | - | |||
772 | 122 | 11 | if ((unsigned)idx->pack->mwf.size < sizeof(struct git_pack_header)) | |
773 | 24 | 12 | return 0; | |
774 | - | |||
775 | 98 | 13,14 | if ((error = parse_header(&idx->hdr, idx->pack)) < 0) | |
776 | ##### | 15 | return error; | |
777 | - | |||
778 | 98 | 16 | idx->parsed_header = 1; | |
779 | 98 | 16 | idx->nr_objects = ntohl(hdr->hdr_entries); | |
780 | 98 | 17 | idx->off = sizeof(struct git_pack_header); | |
781 | - | |||
782 | 98 | 17 | if (idx->nr_objects <= git_indexer__max_objects) { | |
783 | 98 | 18 | total_objects = (unsigned int)idx->nr_objects; | |
784 | - | } else { | ||
785 | ##### | 19 | git_error_set(GIT_ERROR_INDEXER, "too many objects"); | |
786 | ##### | 20 | return -1; | |
787 | - | } | ||
788 | - | |||
789 | 98 | 21,22 | if (git_oidmap_new(&idx->pack->idx_cache) < 0) | |
790 | ##### | 23 | return -1; | |
791 | - | |||
792 | 98 | 24 | idx->pack->has_cache = 1; | |
793 | 98 | 24,25 | if (git_vector_init(&idx->objects, total_objects, objects_cmp) < 0) | |
794 | ##### | 26 | return -1; | |
795 | - | |||
796 | 98 | 27,28 | if (git_vector_init(&idx->deltas, total_objects / 2, NULL) < 0) | |
797 | ##### | 29 | return -1; | |
798 | - | |||
799 | 98 | 30 | stats->received_objects = 0; | |
800 | 98 | 30 | stats->local_objects = 0; | |
801 | 98 | 30 | stats->total_deltas = 0; | |
802 | 98 | 30 | stats->indexed_deltas = 0; | |
803 | 98 | 30 | stats->indexed_objects = 0; | |
804 | 98 | 30 | stats->total_objects = total_objects; | |
805 | - | |||
806 | 98 | 30,31 | if ((error = do_progress_callback(idx, stats)) != 0) | |
807 | 1 | 32 | return error; | |
808 | - | } | ||
809 | - | |||
810 | - | /* Now that we have data in the pack, let's try to parse it */ | ||
811 | - | |||
812 | - | /* As the file grows any windows we try to use will be out of date */ | ||
813 | 4758 | 33 | git_mwindow_free_all(mwf); | |
814 | - | |||
815 | 10036 | 39,40 | while (stats->indexed_objects < idx->nr_objects) { | |
816 | 10026 | 34,35 | if ((error = read_stream_object(idx, stats)) != 0) { | |
817 | 4748 | 36 | if (error == GIT_EBUFS) | |
818 | 4745 | 37 | break; | |
819 | - | else | ||
820 | 3 | 38 | goto on_error; | |
821 | - | } | ||
822 | - | } | ||
823 | - | |||
824 | 4755 | 41 | return 0; | |
825 | - | |||
826 | - | on_error: | ||
827 | 3 | 42 | git_mwindow_free_all(mwf); | |
828 | 3 | 43 | return error; | |
829 | - | } | ||
830 | - | |||
831 | 180 | 2 | static int index_path(git_buf *path, git_indexer *idx, const char *suffix) | |
832 | - | { | ||
833 | 180 | 2 | const char prefix[] = "pack-"; | |
834 | 180 | 2 | size_t slash = (size_t)path->size; | |
835 | - | |||
836 | - | /* search backwards for '/' */ | ||
837 | 5940 | 2,4,5 | while (slash > 0 && path->ptr[slash - 1] != '/') | |
838 | 5760 | 3 | slash--; | |
839 | - | |||
840 | 180 | 6,7 | if (git_buf_grow(path, slash + 1 + strlen(prefix) + | |
841 | 180 | 6 | GIT_OID_HEXSZ + strlen(suffix) + 1) < 0) | |
842 | ##### | 8 | return -1; | |
843 | - | |||
844 | 180 | 9 | git_buf_truncate(path, slash); | |
845 | 180 | 10 | git_buf_puts(path, prefix); | |
846 | 180 | 11,12 | git_oid_fmt(path->ptr + git_buf_len(path), &idx->hash); | |
847 | 180 | 13 | path->size += GIT_OID_HEXSZ; | |
848 | 180 | 13 | git_buf_puts(path, suffix); | |
849 | - | |||
850 | 180 | 14 | return git_buf_oom(path) ? -1 : 0; | |
851 | - | } | ||
852 | - | |||
853 | - | /** | ||
854 | - | * Rewind the packfile by the trailer, as we might need to fix the | ||
855 | - | * packfile by injecting objects at the tail and must overwrite it. | ||
856 | - | */ | ||
857 | 2 | 2 | static void seek_back_trailer(git_indexer *idx) | |
858 | - | { | ||
859 | 2 | 2 | idx->pack->mwf.size -= GIT_OID_RAWSZ; | |
860 | 2 | 2 | git_mwindow_free_all(&idx->pack->mwf); | |
861 | 2 | 3 | } | |
862 | - | |||
863 | 2 | 2 | static int inject_object(git_indexer *idx, git_oid *id) | |
864 | - | { | ||
865 | - | git_odb_object *obj; | ||
866 | - | struct entry *entry; | ||
867 | 2 | 2 | struct git_pack_entry *pentry = NULL; | |
868 | 2 | 2 | git_oid foo = {{0}}; | |
869 | - | unsigned char hdr[64]; | ||
870 | 2 | 2 | git_buf buf = GIT_BUF_INIT; | |
871 | - | off64_t entry_start; | ||
872 | - | const void *data; | ||
873 | - | size_t len, hdr_len; | ||
874 | - | int error; | ||
875 | - | |||
876 | 2 | 2 | seek_back_trailer(idx); | |
877 | 2 | 3 | entry_start = idx->pack->mwf.size; | |
878 | - | |||
879 | 2 | 3,4 | if (git_odb_read(&obj, idx->odb, id) < 0) { | |
880 | ##### | 5 | git_error_set(GIT_ERROR_INDEXER, "missing delta bases"); | |
881 | ##### | 6 | return -1; | |
882 | - | } | ||
883 | - | |||
884 | 2 | 7 | data = git_odb_object_data(obj); | |
885 | 2 | 8 | len = git_odb_object_size(obj); | |
886 | - | |||
887 | 2 | 9 | entry = git__calloc(1, sizeof(*entry)); | |
888 | 2 | 10,11 | GIT_ERROR_CHECK_ALLOC(entry); | |
889 | - | |||
890 | 2 | 12 | entry->crc = crc32(0L, Z_NULL, 0); | |
891 | - | |||
892 | - | /* Write out the object header */ | ||
893 | 2 | 13,14 | hdr_len = git_packfile__object_header(hdr, len, git_odb_object_type(obj)); | |
894 | 2 | 15,16 | if ((error = append_to_pack(idx, hdr, hdr_len)) < 0) | |
895 | ##### | 17 | goto cleanup; | |
896 | - | |||
897 | 2 | 18 | idx->pack->mwf.size += hdr_len; | |
898 | 2 | 18 | entry->crc = crc32(entry->crc, hdr, (uInt)hdr_len); | |
899 | - | |||
900 | 2 | 19,20 | if ((error = git_zstream_deflatebuf(&buf, data, len)) < 0) | |
901 | ##### | 21 | goto cleanup; | |
902 | - | |||
903 | - | /* And then the compressed object */ | ||
904 | 2 | 22,23 | if ((error = append_to_pack(idx, buf.ptr, buf.size)) < 0) | |
905 | ##### | 24 | goto cleanup; | |
906 | - | |||
907 | 2 | 25 | idx->pack->mwf.size += buf.size; | |
908 | 2 | 25,26 | entry->crc = htonl(crc32(entry->crc, (unsigned char *)buf.ptr, (uInt)buf.size)); | |
909 | 2 | 27 | git_buf_dispose(&buf); | |
910 | - | |||
911 | - | /* Write a fake trailer so the pack functions play ball */ | ||
912 | - | |||
913 | 2 | 28,29 | if ((error = append_to_pack(idx, &foo, GIT_OID_RAWSZ)) < 0) | |
914 | ##### | 30 | goto cleanup; | |
915 | - | |||
916 | 2 | 31 | idx->pack->mwf.size += GIT_OID_RAWSZ; | |
917 | - | |||
918 | 2 | 31 | pentry = git__calloc(1, sizeof(struct git_pack_entry)); | |
919 | 2 | 32,33 | GIT_ERROR_CHECK_ALLOC(pentry); | |
920 | - | |||
921 | 2 | 34 | git_oid_cpy(&pentry->sha1, id); | |
922 | 2 | 35 | git_oid_cpy(&entry->oid, id); | |
923 | 2 | 36 | idx->off = entry_start + hdr_len + len; | |
924 | - | |||
925 | 2 | 36 | error = save_entry(idx, entry, pentry, entry_start); | |
926 | - | |||
927 | - | cleanup: | ||
928 | 2 | 37 | if (error) { | |
929 | ##### | 38 | git__free(entry); | |
930 | ##### | 39 | git__free(pentry); | |
931 | - | } | ||
932 | - | |||
933 | 2 | 40 | git_odb_object_free(obj); | |
934 | 2 | 41 | return error; | |
935 | - | } | ||
936 | - | |||
937 | 2 | 2 | static int fix_thin_pack(git_indexer *idx, git_indexer_progress *stats) | |
938 | - | { | ||
939 | 2 | 2 | int error, found_ref_delta = 0; | |
940 | - | unsigned int i; | ||
941 | - | struct delta_info *delta; | ||
942 | - | size_t size; | ||
943 | - | git_object_t type; | ||
944 | 2 | 2 | git_mwindow *w = NULL; | |
945 | 2 | 2 | off64_t curpos = 0; | |
946 | - | unsigned char *base_info; | ||
947 | 2 | 2 | unsigned int left = 0; | |
948 | - | git_oid base; | ||
949 | - | |||
950 | 2 | 2-4 | assert(git_vector_length(&idx->deltas) > 0); | |
951 | - | |||
952 | 2 | 5 | if (idx->odb == NULL) { | |
953 | ##### | 6 | git_error_set(GIT_ERROR_INDEXER, "cannot fix a thin pack without an ODB"); | |
954 | ##### | 7 | return -1; | |
955 | - | } | ||
956 | - | |||
957 | - | /* Loop until we find the first REF delta */ | ||
958 | 2 | 8,16-18 | git_vector_foreach(&idx->deltas, i, delta) { | |
959 | 2 | 9 | if (!delta) | |
960 | ##### | 10 | continue; | |
961 | - | |||
962 | 2 | 11 | curpos = delta->delta_off; | |
963 | 2 | 11 | error = git_packfile_unpack_header(&size, &type, &idx->pack->mwf, &w, &curpos); | |
964 | 2 | 12 | if (error < 0) | |
965 | ##### | 13 | return error; | |
966 | - | |||
967 | 2 | 14 | if (type == GIT_OBJECT_REF_DELTA) { | |
968 | 2 | 15 | found_ref_delta = 1; | |
969 | 2 | 15 | break; | |
970 | - | } | ||
971 | - | } | ||
972 | - | |||
973 | 2 | 19 | if (!found_ref_delta) { | |
974 | ##### | 20 | git_error_set(GIT_ERROR_INDEXER, "no REF_DELTA found, cannot inject object"); | |
975 | ##### | 21 | return -1; | |
976 | - | } | ||
977 | - | |||
978 | - | /* curpos now points to the base information, which is an OID */ | ||
979 | 2 | 22 | base_info = git_mwindow_open(&idx->pack->mwf, &w, curpos, GIT_OID_RAWSZ, &left); | |
980 | 2 | 23 | if (base_info == NULL) { | |
981 | ##### | 24 | git_error_set(GIT_ERROR_INDEXER, "failed to map delta information"); | |
982 | ##### | 25 | return -1; | |
983 | - | } | ||
984 | - | |||
985 | 2 | 26 | git_oid_fromraw(&base, base_info); | |
986 | 2 | 27 | git_mwindow_close(&w); | |
987 | - | |||
988 | 2 | 28,29 | if (has_entry(idx, &base)) | |
989 | ##### | 30 | return 0; | |
990 | - | |||
991 | 2 | 31,32 | if (inject_object(idx, &base) < 0) | |
992 | ##### | 33 | return -1; | |
993 | - | |||
994 | 2 | 34 | stats->local_objects++; | |
995 | - | |||
996 | 2 | 34 | return 0; | |
997 | - | } | ||
998 | - | |||
999 | 92 | 2 | static int resolve_deltas(git_indexer *idx, git_indexer_progress *stats) | |
1000 | - | { | ||
1001 | - | unsigned int i; | ||
1002 | - | int error; | ||
1003 | - | struct delta_info *delta; | ||
1004 | 92 | 2 | int progressed = 0, non_null = 0, progress_cb_result; | |
1005 | - | |||
1006 | 181 | 2,36 | while (idx->deltas.length > 0) { | |
1007 | 175 | 3 | progressed = 0; | |
1008 | 175 | 3 | non_null = 0; | |
1009 | 1563 | 3,27-29 | git_vector_foreach(&idx->deltas, i, delta) { | |
1010 | 1389 | 4 | git_rawobj obj = {0}; | |
1011 | - | |||
1012 | 1389 | 4 | if (!delta) | |
1013 | 697 | 5,25 | continue; | |
1014 | - | |||
1015 | 696 | 6 | non_null = 1; | |
1016 | 696 | 6 | idx->off = delta->delta_off; | |
1017 | 696 | 6,7 | if ((error = git_packfile_unpack(&obj, idx->pack, &idx->off)) < 0) { | |
1018 | 5 | 8 | if (error == GIT_PASSTHROUGH) { | |
1019 | - | /* We have not seen the base object, we'll try again later. */ | ||
1020 | 4 | 9 | continue; | |
1021 | - | } | ||
1022 | 1 | 10,26 | return -1; | |
1023 | - | } | ||
1024 | - | |||
1025 | 691 | 11-13 | if (idx->do_verify && check_object_connectivity(idx, &obj) < 0) | |
1026 | - | /* TODO: error? continue? */ | ||
1027 | ##### | 14 | continue; | |
1028 | - | |||
1029 | 691 | 15,16 | if (hash_and_save(idx, &obj, delta->delta_off) < 0) | |
1030 | ##### | 17 | continue; | |
1031 | - | |||
1032 | 691 | 18 | git__free(obj.data); | |
1033 | 691 | 19 | stats->indexed_objects++; | |
1034 | 691 | 19 | stats->indexed_deltas++; | |
1035 | 691 | 19 | progressed = 1; | |
1036 | 691 | 19,20 | if ((progress_cb_result = do_progress_callback(idx, stats)) < 0) | |
1037 | ##### | 21 | return progress_cb_result; | |
1038 | - | |||
1039 | - | /* remove from the list */ | ||
1040 | 691 | 22 | git_vector_set(NULL, &idx->deltas, i, NULL); | |
1041 | 691 | 23,24 | git__free(delta); | |
1042 | - | } | ||
1043 | - | |||
1044 | - | /* if none were actually set, we're done */ | ||
1045 | 174 | 30 | if (!non_null) | |
1046 | 85 | 31 | break; | |
1047 | - | |||
1048 | 89 | 32-34 | if (!progressed && (fix_thin_pack(idx, stats) < 0)) { | |
1049 | ##### | 35 | return -1; | |
1050 | - | } | ||
1051 | - | } | ||
1052 | - | |||
1053 | 91 | 37 | return 0; | |
1054 | - | } | ||
1055 | - | |||
1056 | 1 | 2 | static int update_header_and_rehash(git_indexer *idx, git_indexer_progress *stats) | |
1057 | - | { | ||
1058 | - | void *ptr; | ||
1059 | 1 | 2 | size_t chunk = 1024*1024; | |
1060 | 1 | 2 | off64_t hashed = 0; | |
1061 | 1 | 2 | git_mwindow *w = NULL; | |
1062 | - | git_mwindow_file *mwf; | ||
1063 | - | unsigned int left; | ||
1064 | - | |||
1065 | 1 | 2 | mwf = &idx->pack->mwf; | |
1066 | - | |||
1067 | 1 | 2 | git_hash_init(&idx->trailer); | |
1068 | - | |||
1069 | - | |||
1070 | - | /* Update the header to include the numer of local objects we injected */ | ||
1071 | 1 | 3 | idx->hdr.hdr_entries = htonl(stats->total_objects + stats->local_objects); | |
1072 | 1 | 4,5 | if (write_at(idx, &idx->hdr, 0, sizeof(struct git_pack_header)) < 0) | |
1073 | ##### | 6 | return -1; | |
1074 | - | |||
1075 | - | /* | ||
1076 | - | * We now use the same technique as before to determine the | ||
1077 | - | * hash. We keep reading up to the end and let | ||
1078 | - | * hash_partially() keep the existing trailer out of the | ||
1079 | - | * calculation. | ||
1080 | - | */ | ||
1081 | 1 | 7 | git_mwindow_free_all(mwf); | |
1082 | 1 | 8 | idx->inbuf_len = 0; | |
1083 | 2 | 8,14 | while (hashed < mwf->size) { | |
1084 | 1 | 9 | ptr = git_mwindow_open(mwf, &w, hashed, chunk, &left); | |
1085 | 1 | 10 | if (ptr == NULL) | |
1086 | ##### | 11 | return -1; | |
1087 | - | |||
1088 | 1 | 12 | hash_partially(idx, ptr, left); | |
1089 | 1 | 13 | hashed += left; | |
1090 | - | |||
1091 | 1 | 13 | git_mwindow_close(&w); | |
1092 | - | } | ||
1093 | - | |||
1094 | 1 | 15 | return 0; | |
1095 | - | } | ||
1096 | - | |||
1097 | 94 | 2 | int git_indexer_commit(git_indexer *idx, git_indexer_progress *stats) | |
1098 | - | { | ||
1099 | 94 | 2 | git_mwindow *w = NULL; | |
1100 | 94 | 2 | unsigned int i, long_offsets = 0, left; | |
1101 | - | int error; | ||
1102 | - | struct git_pack_idx_header hdr; | ||
1103 | 94 | 2 | git_buf filename = GIT_BUF_INIT; | |
1104 | - | struct entry *entry; | ||
1105 | - | git_oid trailer_hash, file_hash; | ||
1106 | 94 | 2 | git_filebuf index_file = {0}; | |
1107 | - | void *packfile_trailer; | ||
1108 | - | |||
1109 | 94 | 2 | if (!idx->parsed_header) { | |
1110 | ##### | 3 | git_error_set(GIT_ERROR_INDEXER, "incomplete pack header"); | |
1111 | ##### | 4 | return -1; | |
1112 | - | } | ||
1113 | - | |||
1114 | - | /* Test for this before resolve_deltas(), as it plays with idx->off */ | ||
1115 | 94 | 5 | if (idx->off + 20 < idx->pack->mwf.size) { | |
1116 | ##### | 6 | git_error_set(GIT_ERROR_INDEXER, "unexpected data at the end of the pack"); | |
1117 | ##### | 7 | return -1; | |
1118 | - | } | ||
1119 | 94 | 8 | if (idx->off + 20 > idx->pack->mwf.size) { | |
1120 | 2 | 9 | git_error_set(GIT_ERROR_INDEXER, "missing trailer at the end of the pack"); | |
1121 | 2 | 10 | return -1; | |
1122 | - | } | ||
1123 | - | |||
1124 | 92 | 11 | packfile_trailer = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left); | |
1125 | 92 | 12 | if (packfile_trailer == NULL) { | |
1126 | ##### | 13 | git_mwindow_close(&w); | |
1127 | ##### | 131 | goto on_error; | |
1128 | - | } | ||
1129 | - | |||
1130 | - | /* Compare the packfile trailer as it was sent to us and what we calculated */ | ||
1131 | 92 | 14 | git_oid_fromraw(&file_hash, packfile_trailer); | |
1132 | 92 | 15 | git_mwindow_close(&w); | |
1133 | - | |||
1134 | 92 | 16 | git_hash_final(&trailer_hash, &idx->trailer); | |
1135 | 92 | 17,18 | if (git_oid_cmp(&file_hash, &trailer_hash)) { | |
1136 | ##### | 19 | git_error_set(GIT_ERROR_INDEXER, "packfile trailer mismatch"); | |
1137 | ##### | 20 | return -1; | |
1138 | - | } | ||
1139 | - | |||
1140 | - | /* Freeze the number of deltas */ | ||
1141 | 92 | 21 | stats->total_deltas = stats->total_objects - stats->indexed_objects; | |
1142 | - | |||
1143 | 92 | 21,22 | if ((error = resolve_deltas(idx, stats)) < 0) | |
1144 | 1 | 23 | return error; | |
1145 | - | |||
1146 | 91 | 24 | if (stats->indexed_objects != stats->total_objects) { | |
1147 | ##### | 25 | git_error_set(GIT_ERROR_INDEXER, "early EOF"); | |
1148 | ##### | 26 | return -1; | |
1149 | - | } | ||
1150 | - | |||
1151 | 91 | 27 | if (stats->local_objects > 0) { | |
1152 | 1 | 28,29 | if (update_header_and_rehash(idx, stats) < 0) | |
1153 | ##### | 30 | return -1; | |
1154 | - | |||
1155 | 1 | 31 | git_hash_final(&trailer_hash, &idx->trailer); | |
1156 | 1 | 32 | write_at(idx, &trailer_hash, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ); | |
1157 | - | } | ||
1158 | - | |||
1159 | - | /* | ||
1160 | - | * Is the resulting graph fully connected or are we still | ||
1161 | - | * missing some objects? In the second case, we can | ||
1162 | - | * bail out due to an incomplete and thus corrupt | ||
1163 | - | * packfile. | ||
1164 | - | */ | ||
1165 | 91 | 33,34 | if (git_oidmap_size(idx->expected_oids) > 0) { | |
1166 | 1 | 35,36 | git_error_set(GIT_ERROR_INDEXER, "packfile is missing %"PRIuZ" objects", | |
1167 | - | git_oidmap_size(idx->expected_oids)); | ||
1168 | 1 | 37 | return -1; | |
1169 | - | } | ||
1170 | - | |||
1171 | 90 | 38 | git_vector_sort(&idx->objects); | |
1172 | - | |||
1173 | - | /* Use the trailer hash as the pack file name to ensure | ||
1174 | - | * files with different contents have different names */ | ||
1175 | 90 | 39 | git_oid_cpy(&idx->hash, &trailer_hash); | |
1176 | - | |||
1177 | 90 | 40 | git_buf_sets(&filename, idx->pack->pack_name); | |
1178 | 90 | 41 | git_buf_shorten(&filename, strlen("pack")); | |
1179 | 90 | 42 | git_buf_puts(&filename, "idx"); | |
1180 | 90 | 43,44 | if (git_buf_oom(&filename)) | |
1181 | ##### | 45 | return -1; | |
1182 | - | |||
1183 | 90 | 46,46-50 | if (git_filebuf_open(&index_file, filename.ptr, | |
1184 | - | GIT_FILEBUF_HASH_CONTENTS | | ||
1185 | 90 | 46 | (idx->do_fsync ? GIT_FILEBUF_FSYNC : 0), | |
1186 | - | idx->mode) < 0) | ||
1187 | ##### | 51 | goto on_error; | |
1188 | - | |||
1189 | - | /* Write out the header */ | ||
1190 | 90 | 52 | hdr.idx_signature = htonl(PACK_IDX_SIGNATURE); | |
1191 | 90 | 53 | hdr.idx_version = htonl(2); | |
1192 | 90 | 54 | git_filebuf_write(&index_file, &hdr, sizeof(hdr)); | |
1193 | - | |||
1194 | - | /* Write out the fanout table */ | ||
1195 | 23130 | 55,58,59 | for (i = 0; i < 256; ++i) { | |
1196 | 23040 | 56 | uint32_t n = htonl(idx->fanout[i]); | |
1197 | 23040 | 57 | git_filebuf_write(&index_file, &n, sizeof(n)); | |
1198 | - | } | ||
1199 | - | |||
1200 | - | /* Write out the object names (SHA-1 hashes) */ | ||
1201 | 5297 | 60,62-64 | git_vector_foreach(&idx->objects, i, entry) { | |
1202 | 5207 | 61 | git_filebuf_write(&index_file, &entry->oid, sizeof(git_oid)); | |
1203 | - | } | ||
1204 | - | |||
1205 | - | /* Write out the CRC32 values */ | ||
1206 | 5297 | 65,67-69 | git_vector_foreach(&idx->objects, i, entry) { | |
1207 | 5207 | 66 | git_filebuf_write(&index_file, &entry->crc, sizeof(uint32_t)); | |
1208 | - | } | ||
1209 | - | |||
1210 | - | /* Write out the offsets */ | ||
1211 | 5297 | 70,77-79 | git_vector_foreach(&idx->objects, i, entry) { | |
1212 | - | uint32_t n; | ||
1213 | - | |||
1214 | 5207 | 71 | if (entry->offset == UINT32_MAX) | |
1215 | ##### | 72,73 | n = htonl(0x80000000 | long_offsets++); | |
1216 | - | else | ||
1217 | 5207 | 74,75 | n = htonl(entry->offset); | |
1218 | - | |||
1219 | 5207 | 76 | git_filebuf_write(&index_file, &n, sizeof(uint32_t)); | |
1220 | - | } | ||
1221 | - | |||
1222 | - | /* Write out the long offsets */ | ||
1223 | 5297 | 80,87-89 | git_vector_foreach(&idx->objects, i, entry) { | |
1224 | - | uint32_t split[2]; | ||
1225 | - | |||
1226 | 5207 | 81 | if (entry->offset != UINT32_MAX) | |
1227 | 5207 | 82 | continue; | |
1228 | - | |||
1229 | ##### | 83 | split[0] = htonl(entry->offset_long >> 32); | |
1230 | ##### | 84 | split[1] = htonl(entry->offset_long & 0xffffffff); | |
1231 | - | |||
1232 | ##### | 85,86 | git_filebuf_write(&index_file, &split, sizeof(uint32_t) * 2); | |
1233 | - | } | ||
1234 | - | |||
1235 | - | /* Write out the packfile trailer to the index */ | ||
1236 | 90 | 90,91 | if (git_filebuf_write(&index_file, &trailer_hash, GIT_OID_RAWSZ) < 0) | |
1237 | ##### | 92 | goto on_error; | |
1238 | - | |||
1239 | - | /* Write out the hash of the idx */ | ||
1240 | 90 | 93,94 | if (git_filebuf_hash(&trailer_hash, &index_file) < 0) | |
1241 | ##### | 95 | goto on_error; | |
1242 | - | |||
1243 | 90 | 96 | git_filebuf_write(&index_file, &trailer_hash, sizeof(git_oid)); | |
1244 | - | |||
1245 | - | /* Figure out what the final name should be */ | ||
1246 | 90 | 97,98 | if (index_path(&filename, idx, ".idx") < 0) | |
1247 | ##### | 99 | goto on_error; | |
1248 | - | |||
1249 | - | /* Commit file */ | ||
1250 | 90 | 100,101 | if (git_filebuf_commit_at(&index_file, filename.ptr) < 0) | |
1251 | ##### | 102 | goto on_error; | |
1252 | - | |||
1253 | 90 | 103 | git_mwindow_free_all(&idx->pack->mwf); | |
1254 | - | |||
1255 | - | /* Truncate file to undo rounding up to next page_size in append_to_pack */ | ||
1256 | 90 | 104,105 | if (p_ftruncate(idx->pack->mwf.fd, idx->pack->mwf.size) < 0) { | |
1257 | ##### | 106 | git_error_set(GIT_ERROR_OS, "failed to truncate pack file '%s'", idx->pack->pack_name); | |
1258 | ##### | 107 | return -1; | |
1259 | - | } | ||
1260 | - | |||
1261 | 90 | 108-110 | if (idx->do_fsync && p_fsync(idx->pack->mwf.fd) < 0) { | |
1262 | ##### | 111 | git_error_set(GIT_ERROR_OS, "failed to fsync packfile"); | |
1263 | ##### | 112 | goto on_error; | |
1264 | - | } | ||
1265 | - | |||
1266 | - | /* We need to close the descriptor here so Windows doesn't choke on commit_at */ | ||
1267 | 90 | 113,114 | if (p_close(idx->pack->mwf.fd) < 0) { | |
1268 | ##### | 115 | git_error_set(GIT_ERROR_OS, "failed to close packfile"); | |
1269 | ##### | 116 | goto on_error; | |
1270 | - | } | ||
1271 | - | |||
1272 | 90 | 117 | idx->pack->mwf.fd = -1; | |
1273 | - | |||
1274 | 90 | 117,118 | if (index_path(&filename, idx, ".pack") < 0) | |
1275 | ##### | 119 | goto on_error; | |
1276 | - | |||
1277 | - | /* And don't forget to rename the packfile to its new place. */ | ||
1278 | 90 | 120-122 | if (p_rename(idx->pack->pack_name, git_buf_cstr(&filename)) < 0) | |
1279 | ##### | 123 | goto on_error; | |
1280 | - | |||
1281 | - | /* And fsync the parent directory if we're asked to. */ | ||
1282 | 90 | 124,127 | if (idx->do_fsync && | |
1283 | 2 | 125,126 | git_futils_fsync_parent(git_buf_cstr(&filename)) < 0) | |
1284 | ##### | 128 | goto on_error; | |
1285 | - | |||
1286 | 90 | 129 | idx->pack_committed = 1; | |
1287 | - | |||
1288 | 90 | 129 | git_buf_dispose(&filename); | |
1289 | 90 | 130 | return 0; | |
1290 | - | |||
1291 | - | on_error: | ||
1292 | ##### | 132 | git_mwindow_free_all(&idx->pack->mwf); | |
1293 | ##### | 133 | git_filebuf_cleanup(&index_file); | |
1294 | ##### | 134 | git_buf_dispose(&filename); | |
1295 | ##### | 135 | return -1; | |
1296 | - | } | ||
1297 | - | |||
1298 | 110 | 2 | void git_indexer_free(git_indexer *idx) | |
1299 | - | { | ||
1300 | - | const git_oid *key; | ||
1301 | - | git_oid *value; | ||
1302 | - | size_t iter; | ||
1303 | - | |||
1304 | 110 | 2 | if (idx == NULL) | |
1305 | 110 | 3,31 | return; | |
1306 | - | |||
1307 | 99 | 4 | if (idx->have_stream) | |
1308 | 1 | 5 | git_packfile_stream_dispose(&idx->stream); | |
1309 | - | |||
1310 | 99 | 6 | git_vector_free_deep(&idx->objects); | |
1311 | - | |||
1312 | 99 | 7 | if (idx->pack->idx_cache) { | |
1313 | - | struct git_pack_entry *pentry; | ||
1314 | 5376 | 8-11 | git_oidmap_foreach_value(idx->pack->idx_cache, pentry, { | |
1315 | - | git__free(pentry); | ||
1316 | - | }); | ||
1317 | - | |||
1318 | 98 | 12,13 | git_oidmap_free(idx->pack->idx_cache); | |
1319 | - | } | ||
1320 | - | |||
1321 | 99 | 14 | git_vector_free_deep(&idx->deltas); | |
1322 | - | |||
1323 | 99 | 15,16 | if (!git_mutex_lock(&git__mwindow_mutex)) { | |
1324 | 99 | 17 | if (!idx->pack_committed) | |
1325 | 9 | 18 | git_packfile_close(idx->pack, true); | |
1326 | - | |||
1327 | 99 | 19 | git_packfile_free(idx->pack); | |
1328 | 99 | 20 | git_mutex_unlock(&git__mwindow_mutex); | |
1329 | - | } | ||
1330 | - | |||
1331 | 99 | 21 | iter = 0; | |
1332 | 100 | 21,23,24 | while (git_oidmap_iterate((void **) &value, idx->expected_oids, &iter, &key) == 0) | |
1333 | 1 | 22 | git__free(value); | |
1334 | - | |||
1335 | 99 | 25 | git_hash_ctx_cleanup(&idx->trailer); | |
1336 | 99 | 26 | git_hash_ctx_cleanup(&idx->hash_ctx); | |
1337 | 99 | 27 | git_buf_dispose(&idx->entry_data); | |
1338 | 99 | 28 | git_oidmap_free(idx->expected_oids); | |
1339 | 99 | 29 | git__free(idx); | |
1340 | - | } |