source src/tsort.c
Line | Flow | Count | Block(s) | Source |
---|---|---|---|---|
1 | - | /* | ||
2 | - | * Copyright (C) the libgit2 contributors. All rights reserved. | ||
3 | - | * | ||
4 | - | * This file is part of libgit2, distributed under the GNU GPL v2 with | ||
5 | - | * a Linking Exception. For full terms see the included COPYING file. | ||
6 | - | */ | ||
7 | - | |||
8 | - | #include "common.h" | ||
9 | - | |||
10 | - | /** | ||
11 | - | * An array-of-pointers implementation of Python's Timsort | ||
12 | - | * Based on code by Christopher Swenson under the MIT license | ||
13 | - | * | ||
14 | - | * Copyright (c) 2010 Christopher Swenson | ||
15 | - | * Copyright (c) 2011 Vicent Marti | ||
16 | - | */ | ||
17 | - | |||
18 | - | #ifndef MAX | ||
19 | - | # define MAX(x,y) (((x) > (y) ? (x) : (y))) | ||
20 | - | #endif | ||
21 | - | |||
22 | - | #ifndef MIN | ||
23 | - | # define MIN(x,y) (((x) < (y) ? (x) : (y))) | ||
24 | - | #endif | ||
25 | - | |||
26 | 32128 | 2 | static int binsearch( | |
27 | - | void **dst, const void *x, size_t size, git__sort_r_cmp cmp, void *payload) | ||
28 | - | { | ||
29 | - | int l, c, r; | ||
30 | - | void *lx, *cx; | ||
31 | - | |||
32 | 32128 | 2,3 | assert(size > 0); | |
33 | - | |||
34 | 32128 | 4 | l = 0; | |
35 | 32128 | 4 | r = (int)size - 1; | |
36 | 32128 | 4 | c = r >> 1; | |
37 | 32128 | 4 | lx = dst[l]; | |
38 | - | |||
39 | - | /* check for beginning conditions */ | ||
40 | 32130 | 4,5 | if (cmp(x, lx, payload) < 0) | |
41 | 9389 | 6 | return 0; | |
42 | - | |||
43 | 22741 | 7,8 | else if (cmp(x, lx, payload) == 0) { | |
44 | 31 | 9 | int i = 1; | |
45 | 32 | 9,11,12 | while (cmp(x, dst[i], payload) == 0) | |
46 | 1 | 10 | i++; | |
47 | 31 | 13 | return i; | |
48 | - | } | ||
49 | - | |||
50 | - | /* guaranteed not to be >= rx */ | ||
51 | 22709 | 14 | cx = dst[c]; | |
52 | - | while (1) { | ||
53 | 78550 | 15 | const int val = cmp(x, cx, payload); | |
54 | 78549 | 16 | if (val < 0) { | |
55 | 33567 | 17,18 | if (c - l <= 1) return c; | |
56 | 22792 | 19 | r = c; | |
57 | 44982 | 20 | } else if (val > 0) { | |
58 | 44745 | 21,22 | if (r - c <= 1) return c + 1; | |
59 | 33049 | 23 | l = c; | |
60 | 33049 | 23 | lx = cx; | |
61 | - | } else { | ||
62 | - | do { | ||
63 | 237 | 24 | cx = dst[++c]; | |
64 | 238 | 24,25 | } while (cmp(x, cx, payload) == 0); | |
65 | 238 | 26 | return c; | |
66 | - | } | ||
67 | 55841 | 27 | c = l + ((r - l) >> 1); | |
68 | 55841 | 27 | cx = dst[c]; | |
69 | 55841 | 27 | } | |
70 | - | } | ||
71 | - | |||
72 | - | /* Binary insertion sort, but knowing that the first "start" entries are sorted. Used in timsort. */ | ||
73 | 12558 | 2 | static void bisort( | |
74 | - | void **dst, size_t start, size_t size, git__sort_r_cmp cmp, void *payload) | ||
75 | - | { | ||
76 | - | size_t i; | ||
77 | - | void *x; | ||
78 | - | int location; | ||
79 | - | |||
80 | 71054 | 2,11,12 | for (i = start; i < size; i++) { | |
81 | - | int j; | ||
82 | - | /* If this entry is already correct, just move along */ | ||
83 | 58494 | 3,4 | if (cmp(dst[i - 1], dst[i], payload) <= 0) | |
84 | 26367 | 5 | continue; | |
85 | - | |||
86 | - | /* Else we need to find the right place, shift everything over, and squeeze in */ | ||
87 | 32127 | 6 | x = dst[i]; | |
88 | 32127 | 6 | location = binsearch(dst, x, i, cmp, payload); | |
89 | 252507 | 7-9 | for (j = (int)i - 1; j >= location; j--) { | |
90 | 220378 | 8 | dst[j + 1] = dst[j]; | |
91 | - | } | ||
92 | 32129 | 10 | dst[location] = x; | |
93 | - | } | ||
94 | 12571 | 13 | } | |
95 | - | |||
96 | - | |||
97 | - | /* timsort implementation, based on timsort.txt */ | ||
98 | - | struct tsort_run { | ||
99 | - | ssize_t start; | ||
100 | - | ssize_t length; | ||
101 | - | }; | ||
102 | - | |||
103 | - | struct tsort_store { | ||
104 | - | size_t alloc; | ||
105 | - | git__sort_r_cmp cmp; | ||
106 | - | void *payload; | ||
107 | - | void **storage; | ||
108 | - | }; | ||
109 | - | |||
110 | 113 | 2 | static void reverse_elements(void **dst, ssize_t start, ssize_t end) | |
111 | - | { | ||
112 | 236 | 2,4 | while (start < end) { | |
113 | 123 | 3 | void *tmp = dst[start]; | |
114 | 123 | 3 | dst[start] = dst[end]; | |
115 | 123 | 3 | dst[end] = tmp; | |
116 | - | |||
117 | 123 | 3 | start++; | |
118 | 123 | 3 | end--; | |
119 | - | } | ||
120 | 113 | 5 | } | |
121 | - | |||
122 | 864 | 2 | static ssize_t count_run( | |
123 | - | void **dst, ssize_t start, ssize_t size, struct tsort_store *store) | ||
124 | - | { | ||
125 | 864 | 2 | ssize_t curr = start + 2; | |
126 | - | |||
127 | 864 | 2 | if (size - start == 1) | |
128 | 309 | 3 | return 1; | |
129 | - | |||
130 | 555 | 4 | if (start >= size - 2) { | |
131 | ##### | 5,6 | if (store->cmp(dst[size - 2], dst[size - 1], store->payload) > 0) { | |
132 | ##### | 7 | void *tmp = dst[size - 1]; | |
133 | ##### | 7 | dst[size - 1] = dst[size - 2]; | |
134 | ##### | 7 | dst[size - 2] = tmp; | |
135 | - | } | ||
136 | - | |||
137 | ##### | 8 | return 2; | |
138 | - | } | ||
139 | - | |||
140 | 567 | 9,10 | if (store->cmp(dst[start], dst[start + 1], store->payload) <= 0) { | |
141 | 25027 | 11,13,15 | while (curr < size - 1 && | |
142 | 24707 | 14 | store->cmp(dst[curr - 1], dst[curr], store->payload) <= 0) | |
143 | 24573 | 12 | curr++; | |
144 | - | |||
145 | 442 | 16 | return curr - start; | |
146 | - | } else { | ||
147 | 166 | 17,19,21 | while (curr < size - 1 && | |
148 | 166 | 20 | store->cmp(dst[curr - 1], dst[curr], store->payload) > 0) | |
149 | 53 | 18 | curr++; | |
150 | - | |||
151 | - | /* reverse in-place */ | ||
152 | 113 | 22 | reverse_elements(dst, start, curr - 1); | |
153 | 113 | 23 | return curr - start; | |
154 | - | } | ||
155 | - | } | ||
156 | - | |||
157 | 351 | 2 | static size_t compute_minrun(size_t n) | |
158 | - | { | ||
159 | 351 | 2 | int r = 0; | |
160 | 735 | 2,4 | while (n >= 64) { | |
161 | 384 | 3 | r |= n & 1; | |
162 | 384 | 3 | n >>= 1; | |
163 | - | } | ||
164 | 351 | 5 | return n + r; | |
165 | - | } | ||
166 | - | |||
167 | 236 | 2 | static int check_invariant(struct tsort_run *stack, ssize_t stack_curr) | |
168 | - | { | ||
169 | 236 | 2 | if (stack_curr < 2) | |
170 | 25 | 3 | return 1; | |
171 | - | |||
172 | 211 | 4 | else if (stack_curr == 2) { | |
173 | 69 | 5 | const ssize_t A = stack[stack_curr - 2].length; | |
174 | 69 | 5 | const ssize_t B = stack[stack_curr - 1].length; | |
175 | 69 | 5 | return (A > B); | |
176 | - | } else { | ||
177 | 142 | 6 | const ssize_t A = stack[stack_curr - 3].length; | |
178 | 142 | 6 | const ssize_t B = stack[stack_curr - 2].length; | |
179 | 142 | 6 | const ssize_t C = stack[stack_curr - 1].length; | |
180 | 142 | 6 | return !((A <= B + C) || (B <= C)); | |
181 | - | } | ||
182 | - | } | ||
183 | - | |||
184 | 513 | 2 | static int resize(struct tsort_store *store, size_t new_size) | |
185 | - | { | ||
186 | 513 | 2 | if (store->alloc < new_size) { | |
187 | - | void **tempstore; | ||
188 | - | |||
189 | 369 | 3 | tempstore = git__reallocarray(store->storage, new_size, sizeof(void *)); | |
190 | - | |||
191 | - | /** | ||
192 | - | * Do not propagate on OOM; this will abort the sort and | ||
193 | - | * leave the array unsorted, but no error code will be | ||
194 | - | * raised | ||
195 | - | */ | ||
196 | 369 | 4 | if (tempstore == NULL) | |
197 | ##### | 5 | return -1; | |
198 | - | |||
199 | 369 | 6 | store->storage = tempstore; | |
200 | 369 | 6 | store->alloc = new_size; | |
201 | - | } | ||
202 | - | |||
203 | 513 | 7 | return 0; | |
204 | - | } | ||
205 | - | |||
206 | 513 | 2 | static void merge(void **dst, const struct tsort_run *stack, ssize_t stack_curr, struct tsort_store *store) | |
207 | - | { | ||
208 | 513 | 2 | const ssize_t A = stack[stack_curr - 2].length; | |
209 | 513 | 2 | const ssize_t B = stack[stack_curr - 1].length; | |
210 | 513 | 2 | const ssize_t curr = stack[stack_curr - 2].start; | |
211 | - | |||
212 | - | void **storage; | ||
213 | - | ssize_t i, j, k; | ||
214 | - | |||
215 | 513 | 2,3 | if (resize(store, MIN(A, B)) < 0) | |
216 | 496 | 4,32 | return; | |
217 | - | |||
218 | 513 | 5 | storage = store->storage; | |
219 | - | |||
220 | - | /* left merge */ | ||
221 | 513 | 5 | if (A < B) { | |
222 | 7 | 6 | memcpy(storage, &dst[curr], A * sizeof(void *)); | |
223 | 7 | 6 | i = 0; | |
224 | 7 | 6 | j = curr + A; | |
225 | - | |||
226 | 1138 | 6,17,18 | for (k = curr; k < curr + A + B; k++) { | |
227 | 1131 | 7,8 | if ((i < A) && (j < curr + A + B)) { | |
228 | 1124 | 9,10,13 | if (store->cmp(storage[i], dst[j], store->payload) <= 0) | |
229 | 370 | 11 | dst[k] = storage[i++]; | |
230 | - | else | ||
231 | 754 | 12 | dst[k] = dst[j++]; | |
232 | 7 | 14 | } else if (i < A) { | |
233 | 7 | 15 | dst[k] = storage[i++]; | |
234 | - | } else | ||
235 | ##### | 16 | dst[k] = dst[j++]; | |
236 | - | } | ||
237 | - | } else { | ||
238 | 506 | 19 | memcpy(storage, &dst[curr + A], B * sizeof(void *)); | |
239 | 506 | 19 | i = B - 1; | |
240 | 506 | 19 | j = curr + A - 1; | |
241 | - | |||
242 | 73405 | 19,30,31 | for (k = curr + A + B - 1; k >= curr; k--) { | |
243 | 72916 | 20,21 | if ((i >= 0) && (j >= curr)) { | |
244 | 50626 | 22,23,26 | if (store->cmp(dst[j], storage[i], store->payload) > 0) | |
245 | 30134 | 24 | dst[k] = dst[j--]; | |
246 | - | else | ||
247 | 20475 | 25 | dst[k] = storage[i--]; | |
248 | 22290 | 27 | } else if (i >= 0) | |
249 | 130 | 28 | dst[k] = storage[i--]; | |
250 | - | else | ||
251 | 22160 | 29 | dst[k] = dst[j--]; | |
252 | - | } | ||
253 | - | } | ||
254 | - | } | ||
255 | - | |||
256 | 200 | 2 | static ssize_t collapse(void **dst, struct tsort_run *stack, ssize_t stack_curr, struct tsort_store *store, ssize_t size) | |
257 | - | { | ||
258 | - | ssize_t A, B, C; | ||
259 | - | |||
260 | - | while (1) { | ||
261 | - | /* if the stack only has one thing on it, we are done with the collapse */ | ||
262 | 200 | 2 | if (stack_curr <= 1) | |
263 | ##### | 3 | break; | |
264 | - | |||
265 | - | /* if this is the last merge, just do it */ | ||
266 | 200 | 4,5 | if ((stack_curr == 2) && (stack[0].length + stack[1].length == size)) { | |
267 | ##### | 6 | merge(dst, stack, stack_curr, store); | |
268 | ##### | 7 | stack[0].length += stack[1].length; | |
269 | ##### | 7 | stack_curr--; | |
270 | ##### | 7 | break; | |
271 | - | } | ||
272 | - | |||
273 | - | /* check if the invariant is off for a stack of 2 elements */ | ||
274 | 200 | 8,9 | else if ((stack_curr == 2) && (stack[0].length <= stack[1].length)) { | |
275 | 25 | 10 | merge(dst, stack, stack_curr, store); | |
276 | 25 | 11 | stack[0].length += stack[1].length; | |
277 | 25 | 11 | stack_curr--; | |
278 | 25 | 11 | break; | |
279 | - | } | ||
280 | 175 | 12 | else if (stack_curr == 2) | |
281 | 44 | 13 | break; | |
282 | - | |||
283 | 131 | 14 | A = stack[stack_curr - 3].length; | |
284 | 131 | 14 | B = stack[stack_curr - 2].length; | |
285 | 131 | 14 | C = stack[stack_curr - 1].length; | |
286 | - | |||
287 | - | /* check first invariant */ | ||
288 | 131 | 14 | if (A <= B + C) { | |
289 | 52 | 15 | if (A < C) { | |
290 | ##### | 16 | merge(dst, stack, stack_curr - 1, store); | |
291 | ##### | 17 | stack[stack_curr - 3].length += stack[stack_curr - 2].length; | |
292 | ##### | 17 | stack[stack_curr - 2] = stack[stack_curr - 1]; | |
293 | ##### | 17 | stack_curr--; | |
294 | - | } else { | ||
295 | 52 | 18 | merge(dst, stack, stack_curr, store); | |
296 | 52 | 19 | stack[stack_curr - 2].length += stack[stack_curr - 1].length; | |
297 | 52 | 19,20 | stack_curr--; | |
298 | - | } | ||
299 | 79 | 21 | } else if (B <= C) { | |
300 | 67 | 22 | merge(dst, stack, stack_curr, store); | |
301 | 67 | 23 | stack[stack_curr - 2].length += stack[stack_curr - 1].length; | |
302 | 67 | 23 | stack_curr--; | |
303 | - | } else | ||
304 | 12 | 24 | break; | |
305 | 119 | 25 | } | |
306 | - | |||
307 | 81 | 26 | return stack_curr; | |
308 | - | } | ||
309 | - | |||
310 | - | #define PUSH_NEXT() do {\ | ||
311 | - | len = count_run(dst, curr, size, store);\ | ||
312 | - | run = minrun;\ | ||
313 | - | if (run > (ssize_t)size - curr) run = size - curr;\ | ||
314 | - | if (run > len) {\ | ||
315 | - | bisort(&dst[curr], len, run, cmp, payload);\ | ||
316 | - | len = run;\ | ||
317 | - | }\ | ||
318 | - | run_stack[stack_curr].start = curr;\ | ||
319 | - | run_stack[stack_curr++].length = len;\ | ||
320 | - | curr += len;\ | ||
321 | - | if (curr == (ssize_t)size) {\ | ||
322 | - | /* finish up */ \ | ||
323 | - | while (stack_curr > 1) { \ | ||
324 | - | merge(dst, run_stack, stack_curr, store); \ | ||
325 | - | run_stack[stack_curr - 2].length += run_stack[stack_curr - 1].length; \ | ||
326 | - | stack_curr--; \ | ||
327 | - | } \ | ||
328 | - | if (store->storage != NULL) {\ | ||
329 | - | git__free(store->storage);\ | ||
330 | - | store->storage = NULL;\ | ||
331 | - | }\ | ||
332 | - | return;\ | ||
333 | - | }\ | ||
334 | - | }\ | ||
335 | - | while (0) | ||
336 | - | |||
337 | 12675 | 2 | void git__tsort_r( | |
338 | - | void **dst, size_t size, git__sort_r_cmp cmp, void *payload) | ||
339 | - | { | ||
340 | 12675 | 2 | struct tsort_store _store, *store = &_store; | |
341 | - | struct tsort_run run_stack[128]; | ||
342 | - | |||
343 | 12675 | 2 | ssize_t stack_curr = 0; | |
344 | - | ssize_t len, run; | ||
345 | 12675 | 2 | ssize_t curr = 0; | |
346 | - | ssize_t minrun; | ||
347 | - | |||
348 | 12675 | 2 | if (size < 64) { | |
349 | 12325 | 3 | bisort(dst, 1, size, cmp, payload); | |
350 | 12321 | 70 | return; | |
351 | - | } | ||
352 | - | |||
353 | - | /* compute the minimum run length */ | ||
354 | 350 | 4 | minrun = (ssize_t)compute_minrun(size); | |
355 | - | |||
356 | - | /* temporary storage for merges */ | ||
357 | 351 | 5 | store->alloc = 0; | |
358 | 351 | 5 | store->storage = NULL; | |
359 | 351 | 5 | store->cmp = cmp; | |
360 | 351 | 5 | store->payload = payload; | |
361 | - | |||
362 | 353 | 5-19 | PUSH_NEXT(); | |
363 | 688 | 20-34 | PUSH_NEXT(); | |
364 | 7 | 35-49 | PUSH_NEXT(); | |
365 | - | |||
366 | - | while (1) { | ||
367 | 236 | 50,51 | if (!check_invariant(run_stack, stack_curr)) { | |
368 | 81 | 52 | stack_curr = collapse(dst, run_stack, stack_curr, store, size); | |
369 | 81 | 53 | continue; | |
370 | - | } | ||
371 | - | |||
372 | 155 | 54-68 | PUSH_NEXT(); | |
373 | 229 | 69 | } | |
374 | - | } | ||
375 | - | |||
376 | 268921 | 2 | static int tsort_r_cmp(const void *a, const void *b, void *payload) | |
377 | - | { | ||
378 | 268921 | 2 | return ((git__tsort_cmp)payload)(a, b); | |
379 | - | } | ||
380 | - | |||
381 | 12680 | 2 | void git__tsort(void **dst, size_t size, git__tsort_cmp cmp) | |
382 | - | { | ||
383 | 12680 | 2 | git__tsort_r(dst, size, tsort_r_cmp, cmp); | |
384 | 12673 | 3 | } |