source src/diff_tform.c
| Line | Flow | Count | Block(s) | Source |
|---|---|---|---|---|
| 1 | - | /* | ||
| 2 | - | * Copyright (C) the libgit2 contributors. All rights reserved. | ||
| 3 | - | * | ||
| 4 | - | * This file is part of libgit2, distributed under the GNU GPL v2 with | ||
| 5 | - | * a Linking Exception. For full terms see the included COPYING file. | ||
| 6 | - | */ | ||
| 7 | - | |||
| 8 | - | #include "diff_tform.h" | ||
| 9 | - | |||
| 10 | - | #include "git2/config.h" | ||
| 11 | - | #include "git2/blob.h" | ||
| 12 | - | #include "git2/sys/hashsig.h" | ||
| 13 | - | |||
| 14 | - | #include "diff.h" | ||
| 15 | - | #include "diff_generate.h" | ||
| 16 | - | #include "path.h" | ||
| 17 | - | #include "futils.h" | ||
| 18 | - | #include "config.h" | ||
| 19 | - | |||
| 20 | ![]() |
298 | 2 | git_diff_delta *git_diff__delta_dup( |
| 21 | - | const git_diff_delta *d, git_pool *pool) | ||
| 22 | - | { | ||
| 23 | 298 | 2 | git_diff_delta *delta = git__malloc(sizeof(git_diff_delta)); | |
| 24 | 298 | 3 | if (!delta) | |
| 25 | ##### | 4 | return NULL; | |
| 26 | - | |||
| 27 | 298 | 5 | memcpy(delta, d, sizeof(git_diff_delta)); | |
| 28 | 298 | 5 | GIT_DIFF_FLAG__CLEAR_INTERNAL(delta->flags); | |
| 29 | - | |||
| 30 | 298 | 5 | if (d->old_file.path != NULL) { | |
| 31 | 298 | 6 | delta->old_file.path = git_pool_strdup(pool, d->old_file.path); | |
| 32 | 298 | 7 | if (delta->old_file.path == NULL) | |
| 33 | ##### | 8 | goto fail; | |
| 34 | - | } | ||
| 35 | - | |||
| 36 | 298 | 9,10 | if (d->new_file.path != d->old_file.path && d->new_file.path != NULL) { | |
| 37 | 1 | 11 | delta->new_file.path = git_pool_strdup(pool, d->new_file.path); | |
| 38 | 1 | 12,14 | if (delta->new_file.path == NULL) | |
| 39 | ##### | 13 | goto fail; | |
| 40 | - | } else { | ||
| 41 | 297 | 15 | delta->new_file.path = delta->old_file.path; | |
| 42 | - | } | ||
| 43 | - | |||
| 44 | 298 | 16 | return delta; | |
| 45 | - | |||
| 46 | - | fail: | ||
| 47 | ##### | 17 | git__free(delta); | |
| 48 | ##### | 18 | return NULL; | |
| 49 | - | } | ||
| 50 | - | |||
| 51 | ![]() |
75 | 2 | git_diff_delta *git_diff__merge_like_cgit( |
| 52 | - | const git_diff_delta *a, | ||
| 53 | - | const git_diff_delta *b, | ||
| 54 | - | git_pool *pool) | ||
| 55 | - | { | ||
| 56 | - | git_diff_delta *dup; | ||
| 57 | - | |||
| 58 | - | /* Emulate C git for merging two diffs (a la 'git diff <sha>'). | ||
| 59 | - | * | ||
| 60 | - | * When C git does a diff between the work dir and a tree, it actually | ||
| 61 | - | * diffs with the index but uses the workdir contents. This emulates | ||
| 62 | - | * those choices so we can emulate the type of diff. | ||
| 63 | - | * | ||
| 64 | - | * We have three file descriptions here, let's call them: | ||
| 65 | - | * f1 = a->old_file | ||
| 66 | - | * f2 = a->new_file AND b->old_file | ||
| 67 | - | * f3 = b->new_file | ||
| 68 | - | */ | ||
| 69 | - | |||
| 70 | - | /* If one of the diffs is a conflict, just dup it */ | ||
| 71 | 75 | 2 | if (b->status == GIT_DELTA_CONFLICTED) | |
| 72 | 1 | 3 | return git_diff__delta_dup(b, pool); | |
| 73 | 74 | 4 | if (a->status == GIT_DELTA_CONFLICTED) | |
| 74 | ##### | 5 | return git_diff__delta_dup(a, pool); | |
| 75 | - | |||
| 76 | - | /* if f2 == f3 or f2 is deleted, then just dup the 'a' diff */ | ||
| 77 | 74 | 6,7 | if (b->status == GIT_DELTA_UNMODIFIED || a->status == GIT_DELTA_DELETED) | |
| 78 | 2 | 8 | return git_diff__delta_dup(a, pool); | |
| 79 | - | |||
| 80 | - | /* otherwise, base this diff on the 'b' diff */ | ||
| 81 | 72 | 9,10 | if ((dup = git_diff__delta_dup(b, pool)) == NULL) | |
| 82 | ##### | 11 | return NULL; | |
| 83 | - | |||
| 84 | - | /* If 'a' status is uninteresting, then we're done */ | ||
| 85 | 72 | 12,13 | if (a->status == GIT_DELTA_UNMODIFIED || | |
| 86 | 72 | 13,14 | a->status == GIT_DELTA_UNTRACKED || | |
| 87 | 71 | 14 | a->status == GIT_DELTA_UNREADABLE) | |
| 88 | 1 | 15 | return dup; | |
| 89 | - | |||
| 90 | 71 | 16,17 | assert(b->status != GIT_DELTA_UNMODIFIED); | |
| 91 | - | |||
| 92 | - | /* A cgit exception is that the diff of a file that is only in the | ||
| 93 | - | * index (i.e. not in HEAD nor workdir) is given as empty. | ||
| 94 | - | */ | ||
| 95 | 71 | 18 | if (dup->status == GIT_DELTA_DELETED) { | |
| 96 | 11 | 19 | if (a->status == GIT_DELTA_ADDED) { | |
| 97 | 6 | 20 | dup->status = GIT_DELTA_UNMODIFIED; | |
| 98 | 11 | 20,21 | dup->nfiles = 2; | |
| 99 | - | } | ||
| 100 | - | /* else don't overwrite DELETE status */ | ||
| 101 | - | } else { | ||
| 102 | 60 | 22 | dup->status = a->status; | |
| 103 | 60 | 22 | dup->nfiles = a->nfiles; | |
| 104 | - | } | ||
| 105 | - | |||
| 106 | 71 | 23 | git_oid_cpy(&dup->old_file.id, &a->old_file.id); | |
| 107 | 71 | 24 | dup->old_file.mode = a->old_file.mode; | |
| 108 | 71 | 24 | dup->old_file.size = a->old_file.size; | |
| 109 | 71 | 24 | dup->old_file.flags = a->old_file.flags; | |
| 110 | - | |||
| 111 | 71 | 24 | return dup; | |
| 112 | - | } | ||
| 113 | - | |||
| 114 | ![]() |
68 | 2 | int git_diff__merge( |
| 115 | - | git_diff *onto, const git_diff *from, git_diff__merge_cb cb) | ||
| 116 | - | { | ||
| 117 | 68 | 2 | int error = 0; | |
| 118 | - | git_pool onto_pool; | ||
| 119 | - | git_vector onto_new; | ||
| 120 | - | git_diff_delta *delta; | ||
| 121 | - | bool ignore_case, reversed; | ||
| 122 | - | unsigned int i, j; | ||
| 123 | - | |||
| 124 | 68 | 2-4 | assert(onto && from); | |
| 125 | - | |||
| 126 | 68 | 5 | if (!from->deltas.length) | |
| 127 | 3 | 6 | return 0; | |
| 128 | - | |||
| 129 | 65 | 7 | ignore_case = ((onto->opts.flags & GIT_DIFF_IGNORE_CASE) != 0); | |
| 130 | 65 | 7 | reversed = ((onto->opts.flags & GIT_DIFF_REVERSE) != 0); | |
| 131 | - | |||
| 132 | 65 | 7,8 | if (ignore_case != ((from->opts.flags & GIT_DIFF_IGNORE_CASE) != 0) || | |
| 133 | 65 | 8 | reversed != ((from->opts.flags & GIT_DIFF_REVERSE) != 0)) { | |
| 134 | ##### | 9 | git_error_set(GIT_ERROR_INVALID, | |
| 135 | - | "attempt to merge diffs created with conflicting options"); | ||
| 136 | ##### | 10 | return -1; | |
| 137 | - | } | ||
| 138 | - | |||
| 139 | 65 | 11,12,14 | if (git_vector_init(&onto_new, onto->deltas.length, git_diff_delta__cmp) < 0 || | |
| 140 | 65 | 13 | git_pool_init(&onto_pool, 1) < 0) | |
| 141 | ##### | 15 | return -1; | |
| 142 | - | |||
| 143 | 357 | 16,56,57 | for (i = 0, j = 0; i < onto->deltas.length || j < from->deltas.length; ) { | |
| 144 | 292 | 17-19 | git_diff_delta *o = GIT_VECTOR_GET(&onto->deltas, i); | |
| 145 | 292 | 20-22 | const git_diff_delta *f = GIT_VECTOR_GET(&from->deltas, j); | |
| 146 | 292 | 23,24,28-31 | int cmp = !f ? -1 : !o ? 1 : | |
| 147 | 200 | 25-27 | STRCMP_CASESELECT(ignore_case, o->old_file.path, f->old_file.path); | |
| 148 | - | |||
| 149 | 292 | 32 | if (cmp < 0) { | |
| 150 | 93 | 33 | delta = git_diff__delta_dup(o, &onto_pool); | |
| 151 | 93 | 34 | i++; | |
| 152 | 199 | 35 | } else if (cmp > 0) { | |
| 153 | 123 | 36 | delta = git_diff__delta_dup(f, &onto_pool); | |
| 154 | 123 | 37 | j++; | |
| 155 | - | } else { | ||
| 156 | 76 | 38-40 | const git_diff_delta *left = reversed ? f : o; | |
| 157 | 76 | 41-43 | const git_diff_delta *right = reversed ? o : f; | |
| 158 | - | |||
| 159 | 76 | 44 | delta = cb(left, right, &onto_pool); | |
| 160 | 76 | 45 | i++; | |
| 161 | 76 | 45 | j++; | |
| 162 | - | } | ||
| 163 | - | |||
| 164 | - | /* the ignore rules for the target may not match the source | ||
| 165 | - | * or the result of a merged delta could be skippable... | ||
| 166 | - | */ | ||
| 167 | 292 | 46-48 | if (delta && git_diff_delta__should_skip(&onto->opts, delta)) { | |
| 168 | 6 | 49 | git__free(delta); | |
| 169 | 6 | 50 | continue; | |
| 170 | - | } | ||
| 171 | - | |||
| 172 | 286 | 51-54 | if ((error = !delta ? -1 : git_vector_insert(&onto_new, delta)) < 0) | |
| 173 | ##### | 55 | break; | |
| 174 | - | } | ||
| 175 | - | |||
| 176 | 65 | 58 | if (!error) { | |
| 177 | 65 | 59 | git_vector_swap(&onto->deltas, &onto_new); | |
| 178 | 65 | 60 | git_pool_swap(&onto->pool, &onto_pool); | |
| 179 | - | |||
| 180 | 65 | 61 | if ((onto->opts.flags & GIT_DIFF_REVERSE) != 0) | |
| 181 | 1 | 62 | onto->old_src = from->old_src; | |
| 182 | - | else | ||
| 183 | 64 | 63 | onto->new_src = from->new_src; | |
| 184 | - | |||
| 185 | - | /* prefix strings also come from old pool, so recreate those.*/ | ||
| 186 | 65 | 65 | onto->opts.old_prefix = | |
| 187 | 65 | 64 | git_pool_strdup_safe(&onto->pool, onto->opts.old_prefix); | |
| 188 | 65 | 66 | onto->opts.new_prefix = | |
| 189 | 65 | 65 | git_pool_strdup_safe(&onto->pool, onto->opts.new_prefix); | |
| 190 | - | } | ||
| 191 | - | |||
| 192 | 65 | 67 | git_vector_free_deep(&onto_new); | |
| 193 | 65 | 68 | git_pool_clear(&onto_pool); | |
| 194 | - | |||
| 195 | 65 | 69 | return error; | |
| 196 | - | } | ||
| 197 | - | |||
| 198 | 8 | 2 | int git_diff_merge(git_diff *onto, const git_diff *from) | |
| 199 | - | { | ||
| 200 | 8 | 2 | return git_diff__merge(onto, from, git_diff__merge_like_cgit); | |
| 201 | - | } | ||
| 202 | - | |||
| 203 | 46 | 2 | int git_diff_find_similar__hashsig_for_file( | |
| 204 | - | void **out, const git_diff_file *f, const char *path, void *p) | ||
| 205 | - | { | ||
| 206 | 46 | 2 | git_hashsig_option_t opt = (git_hashsig_option_t)(intptr_t)p; | |
| 207 | - | |||
| 208 | - | GIT_UNUSED(f); | ||
| 209 | 46 | 2 | return git_hashsig_create_fromfile((git_hashsig **)out, path, opt); | |
| 210 | - | } | ||
| 211 | - | |||
| 212 | 3591 | 2 | int git_diff_find_similar__hashsig_for_buf( | |
| 213 | - | void **out, const git_diff_file *f, const char *buf, size_t len, void *p) | ||
| 214 | - | { | ||
| 215 | 3591 | 2 | git_hashsig_option_t opt = (git_hashsig_option_t)(intptr_t)p; | |
| 216 | - | |||
| 217 | - | GIT_UNUSED(f); | ||
| 218 | 3591 | 2 | return git_hashsig_create((git_hashsig **)out, buf, len, opt); | |
| 219 | - | } | ||
| 220 | - | |||
| 221 | 1425 | 2 | void git_diff_find_similar__hashsig_free(void *sig, void *payload) | |
| 222 | - | { | ||
| 223 | - | GIT_UNUSED(payload); | ||
| 224 | 1425 | 2 | git_hashsig_free(sig); | |
| 225 | 1425 | 3 | } | |
| 226 | - | |||
| 227 | 37350 | 2 | int git_diff_find_similar__calc_similarity( | |
| 228 | - | int *score, void *siga, void *sigb, void *payload) | ||
| 229 | - | { | ||
| 230 | - | int error; | ||
| 231 | - | |||
| 232 | - | GIT_UNUSED(payload); | ||
| 233 | 37350 | 2 | error = git_hashsig_compare(siga, sigb); | |
| 234 | 37350 | 3 | if (error < 0) | |
| 235 | ##### | 4 | return error; | |
| 236 | - | |||
| 237 | 37350 | 5 | *score = error; | |
| 238 | 37350 | 5 | return 0; | |
| 239 | - | } | ||
| 240 | - | |||
| 241 | - | #define DEFAULT_THRESHOLD 50 | ||
| 242 | - | #define DEFAULT_BREAK_REWRITE_THRESHOLD 60 | ||
| 243 | - | #define DEFAULT_RENAME_LIMIT 200 | ||
| 244 | - | |||
| 245 | ![]() |
332 | 2 | static int normalize_find_opts( |
| 246 | - | git_diff *diff, | ||
| 247 | - | git_diff_find_options *opts, | ||
| 248 | - | const git_diff_find_options *given) | ||
| 249 | - | { | ||
| 250 | 332 | 2 | git_config *cfg = NULL; | |
| 251 | - | git_hashsig_option_t hashsig_opts; | ||
| 252 | - | |||
| 253 | 332 | 2-4 | GIT_ERROR_CHECK_VERSION(given, GIT_DIFF_FIND_OPTIONS_VERSION, "git_diff_find_options"); | |
| 254 | - | |||
| 255 | 330 | 5,7 | if (diff->repo != NULL && | |
| 256 | 330 | 6 | git_repository_config__weakptr(&cfg, diff->repo) < 0) | |
| 257 | ##### | 8 | return -1; | |
| 258 | - | |||
| 259 | 330 | 9 | if (given) | |
| 260 | 324 | 10 | memcpy(opts, given, sizeof(*opts)); | |
| 261 | - | |||
| 262 | 330 | 11,12 | if (!given || | |
| 263 | 324 | 12 | (given->flags & GIT_DIFF_FIND_ALL) == GIT_DIFF_FIND_BY_CONFIG) | |
| 264 | - | { | ||
| 265 | 12 | 13 | if (cfg) { | |
| 266 | 12 | 14 | char *rule = | |
| 267 | 12 | 14 | git_config__get_string_force(cfg, "diff.renames", "true"); | |
| 268 | - | int boolval; | ||
| 269 | - | |||
| 270 | 12 | 15-18 | if (!git__parse_bool(&boolval, rule) && !boolval) | |
| 271 | - | /* don't set FIND_RENAMES if bool value is false */; | ||
| 272 | 11 | 19,20 | else if (!strcasecmp(rule, "copies") || !strcasecmp(rule, "copy")) | |
| 273 | 5 | 21 | opts->flags |= GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES; | |
| 274 | - | else | ||
| 275 | 6 | 22 | opts->flags |= GIT_DIFF_FIND_RENAMES; | |
| 276 | - | |||
| 277 | 12 | 23,24 | git__free(rule); | |
| 278 | - | } else { | ||
| 279 | - | /* set default flag */ | ||
| 280 | ##### | 25 | opts->flags |= GIT_DIFF_FIND_RENAMES; | |
| 281 | - | } | ||
| 282 | - | } | ||
| 283 | - | |||
| 284 | - | /* some flags imply others */ | ||
| 285 | - | |||
| 286 | 330 | 26 | if (opts->flags & GIT_DIFF_FIND_EXACT_MATCH_ONLY) { | |
| 287 | - | /* if we are only looking for exact matches, then don't turn | ||
| 288 | - | * MODIFIED items into ADD/DELETE pairs because it's too picky | ||
| 289 | - | */ | ||
| 290 | 5 | 27 | opts->flags &= ~(GIT_DIFF_FIND_REWRITES | GIT_DIFF_BREAK_REWRITES); | |
| 291 | - | |||
| 292 | - | /* similarly, don't look for self-rewrites to split */ | ||
| 293 | 5 | 27 | opts->flags &= ~GIT_DIFF_FIND_RENAMES_FROM_REWRITES; | |
| 294 | - | } | ||
| 295 | - | |||
| 296 | 330 | 28 | if (opts->flags & GIT_DIFF_FIND_RENAMES_FROM_REWRITES) | |
| 297 | 36 | 29 | opts->flags |= GIT_DIFF_FIND_RENAMES; | |
| 298 | - | |||
| 299 | 330 | 30 | if (opts->flags & GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED) | |
| 300 | 21 | 31 | opts->flags |= GIT_DIFF_FIND_COPIES; | |
| 301 | - | |||
| 302 | 330 | 32 | if (opts->flags & GIT_DIFF_BREAK_REWRITES) | |
| 303 | 34 | 33 | opts->flags |= GIT_DIFF_FIND_REWRITES; | |
| 304 | - | |||
| 305 | - | #define USE_DEFAULT(X) ((X) == 0 || (X) > 100) | ||
| 306 | - | |||
| 307 | 330 | 34,35 | if (USE_DEFAULT(opts->rename_threshold)) | |
| 308 | 327 | 36 | opts->rename_threshold = DEFAULT_THRESHOLD; | |
| 309 | - | |||
| 310 | 330 | 37,38 | if (USE_DEFAULT(opts->rename_from_rewrite_threshold)) | |
| 311 | 330 | 39 | opts->rename_from_rewrite_threshold = DEFAULT_THRESHOLD; | |
| 312 | - | |||
| 313 | 330 | 40,41 | if (USE_DEFAULT(opts->copy_threshold)) | |
| 314 | 328 | 42 | opts->copy_threshold = DEFAULT_THRESHOLD; | |
| 315 | - | |||
| 316 | 330 | 43,44 | if (USE_DEFAULT(opts->break_rewrite_threshold)) | |
| 317 | 326 | 45 | opts->break_rewrite_threshold = DEFAULT_BREAK_REWRITE_THRESHOLD; | |
| 318 | - | |||
| 319 | - | #undef USE_DEFAULT | ||
| 320 | - | |||
| 321 | 330 | 46 | if (!opts->rename_limit) { | |
| 322 | 330 | 47 | if (cfg) { | |
| 323 | 330 | 48,49 | opts->rename_limit = git_config__get_int_force( | |
| 324 | - | cfg, "diff.renamelimit", DEFAULT_RENAME_LIMIT); | ||
| 325 | - | } | ||
| 326 | - | |||
| 327 | 330 | 50 | if (opts->rename_limit <= 0) | |
| 328 | ##### | 51 | opts->rename_limit = DEFAULT_RENAME_LIMIT; | |
| 329 | - | } | ||
| 330 | - | |||
| 331 | - | /* assign the internal metric with whitespace flag as payload */ | ||
| 332 | 330 | 52 | if (!opts->metric) { | |
| 333 | 330 | 53 | opts->metric = git__malloc(sizeof(git_diff_similarity_metric)); | |
| 334 | 330 | 54,55 | GIT_ERROR_CHECK_ALLOC(opts->metric); | |
| 335 | - | |||
| 336 | 330 | 56 | opts->metric->file_signature = git_diff_find_similar__hashsig_for_file; | |
| 337 | 330 | 56 | opts->metric->buffer_signature = git_diff_find_similar__hashsig_for_buf; | |
| 338 | 330 | 56 | opts->metric->free_signature = git_diff_find_similar__hashsig_free; | |
| 339 | 330 | 56 | opts->metric->similarity = git_diff_find_similar__calc_similarity; | |
| 340 | - | |||
| 341 | 330 | 56 | if (opts->flags & GIT_DIFF_FIND_IGNORE_WHITESPACE) | |
| 342 | 7 | 57 | hashsig_opts = GIT_HASHSIG_IGNORE_WHITESPACE; | |
| 343 | 323 | 58 | else if (opts->flags & GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE) | |
| 344 | 6 | 59 | hashsig_opts = GIT_HASHSIG_NORMAL; | |
| 345 | - | else | ||
| 346 | 317 | 60 | hashsig_opts = GIT_HASHSIG_SMART_WHITESPACE; | |
| 347 | 330 | 61 | hashsig_opts |= GIT_HASHSIG_ALLOW_SMALL_FILES; | |
| 348 | 330 | 61 | opts->metric->payload = (void *)hashsig_opts; | |
| 349 | - | } | ||
| 350 | - | |||
| 351 | 330 | 62 | return 0; | |
| 352 | - | } | ||
| 353 | - | |||
| 354 | 6 | 2 | static int insert_delete_side_of_split( | |
| 355 | - | git_diff *diff, git_vector *onto, const git_diff_delta *delta) | ||
| 356 | - | { | ||
| 357 | - | /* make new record for DELETED side of split */ | ||
| 358 | 6 | 2 | git_diff_delta *deleted = git_diff__delta_dup(delta, &diff->pool); | |
| 359 | 6 | 3,4 | GIT_ERROR_CHECK_ALLOC(deleted); | |
| 360 | - | |||
| 361 | 6 | 5 | deleted->status = GIT_DELTA_DELETED; | |
| 362 | 6 | 5 | deleted->nfiles = 1; | |
| 363 | 6 | 5 | memset(&deleted->new_file, 0, sizeof(deleted->new_file)); | |
| 364 | 6 | 5 | deleted->new_file.path = deleted->old_file.path; | |
| 365 | 6 | 5 | deleted->new_file.flags |= GIT_DIFF_FLAG_VALID_ID; | |
| 366 | - | |||
| 367 | 6 | 5 | return git_vector_insert(onto, deleted); | |
| 368 | - | } | ||
| 369 | - | |||
| 370 | ![]() |
87 | 2 | static int apply_splits_and_deletes( |
| 371 | - | git_diff *diff, size_t expected_size, bool actually_split) | ||
| 372 | - | { | ||
| 373 | 87 | 2 | git_vector onto = GIT_VECTOR_INIT; | |
| 374 | - | size_t i; | ||
| 375 | - | git_diff_delta *delta; | ||
| 376 | - | |||
| 377 | 87 | 2,3 | if (git_vector_init(&onto, expected_size, git_diff_delta__cmp) < 0) | |
| 378 | ##### | 4 | return -1; | |
| 379 | - | |||
| 380 | - | /* build new delta list without TO_DELETE and splitting TO_SPLIT */ | ||
| 381 | 1936 | 5,25-27 | git_vector_foreach(&diff->deltas, i, delta) { | |
| 382 | 1849 | 6 | if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0) | |
| 383 | 248 | 7 | continue; | |
| 384 | - | |||
| 385 | 1601 | 8,9 | if ((delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0 && actually_split) { | |
| 386 | 4 | 10 | delta->similarity = 0; | |
| 387 | - | |||
| 388 | 4 | 10,11 | if (insert_delete_side_of_split(diff, &onto, delta) < 0) | |
| 389 | ##### | 12 | goto on_error; | |
| 390 | - | |||
| 391 | 4 | 13 | if (diff->new_src == GIT_ITERATOR_WORKDIR) | |
| 392 | 1 | 14 | delta->status = GIT_DELTA_UNTRACKED; | |
| 393 | - | else | ||
| 394 | 3 | 15 | delta->status = GIT_DELTA_ADDED; | |
| 395 | 4 | 16 | delta->nfiles = 1; | |
| 396 | 4 | 16 | memset(&delta->old_file, 0, sizeof(delta->old_file)); | |
| 397 | 4 | 16 | delta->old_file.path = delta->new_file.path; | |
| 398 | 4 | 16 | delta->old_file.flags |= GIT_DIFF_FLAG_VALID_ID; | |
| 399 | - | } | ||
| 400 | - | |||
| 401 | - | /* clean up delta before inserting into new list */ | ||
| 402 | 1601 | 17 | GIT_DIFF_FLAG__CLEAR_INTERNAL(delta->flags); | |
| 403 | - | |||
| 404 | 1601 | 17,18 | if (delta->status != GIT_DELTA_COPIED && | |
| 405 | 1587 | 18,19 | delta->status != GIT_DELTA_RENAMED && | |
| 406 | 1318 | 19,20 | (delta->status != GIT_DELTA_MODIFIED || actually_split)) | |
| 407 | 415 | 21 | delta->similarity = 0; | |
| 408 | - | |||
| 409 | - | /* insert into new list */ | ||
| 410 | 1601 | 22,23 | if (git_vector_insert(&onto, delta) < 0) | |
| 411 | ##### | 24 | goto on_error; | |
| 412 | - | } | ||
| 413 | - | |||
| 414 | - | /* cannot return an error past this point */ | ||
| 415 | - | |||
| 416 | - | /* free deltas from old list that didn't make it to the new one */ | ||
| 417 | 1936 | 28,31-33 | git_vector_foreach(&diff->deltas, i, delta) { | |
| 418 | 1849 | 29 | if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0) | |
| 419 | 248 | 30 | git__free(delta); | |
| 420 | - | } | ||
| 421 | - | |||
| 422 | - | /* swap new delta list into place */ | ||
| 423 | 87 | 34 | git_vector_swap(&diff->deltas, &onto); | |
| 424 | 87 | 35 | git_vector_free(&onto); | |
| 425 | 87 | 36 | git_vector_sort(&diff->deltas); | |
| 426 | - | |||
| 427 | 87 | 37 | return 0; | |
| 428 | - | |||
| 429 | - | on_error: | ||
| 430 | ##### | 38 | git_vector_free_deep(&onto); | |
| 431 | - | |||
| 432 | ##### | 39 | return -1; | |
| 433 | - | } | ||
| 434 | - | |||
| 435 | 81663 | 2 | GIT_INLINE(git_diff_file *) similarity_get_file(git_diff *diff, size_t idx) | |
| 436 | - | { | ||
| 437 | 81663 | 2 | git_diff_delta *delta = git_vector_get(&diff->deltas, idx / 2); | |
| 438 | 81663 | 3 | return (idx & 1) ? &delta->new_file : &delta->old_file; | |
| 439 | - | } | ||
| 440 | - | |||
| 441 | - | typedef struct { | ||
| 442 | - | size_t idx; | ||
| 443 | - | git_iterator_t src; | ||
| 444 | - | git_repository *repo; | ||
| 445 | - | git_diff_file *file; | ||
| 446 | - | git_buf data; | ||
| 447 | - | git_odb_object *odb_obj; | ||
| 448 | - | git_blob *blob; | ||
| 449 | - | } similarity_info; | ||
| 450 | - | |||
| 451 | ![]() |
1909 | 2 | static int similarity_init( |
| 452 | - | similarity_info *info, git_diff *diff, size_t file_idx) | ||
| 453 | - | { | ||
| 454 | 1909 | 2 | info->idx = file_idx; | |
| 455 | 1909 | 2-4 | info->src = (file_idx & 1) ? diff->new_src : diff->old_src; | |
| 456 | 1909 | 5 | info->repo = diff->repo; | |
| 457 | 1909 | 5 | info->file = similarity_get_file(diff, file_idx); | |
| 458 | 1909 | 6 | info->odb_obj = NULL; | |
| 459 | 1909 | 6 | info->blob = NULL; | |
| 460 | 1909 | 6 | git_buf_init(&info->data, 0); | |
| 461 | - | |||
| 462 | 1909 | 7,8 | if (info->file->size > 0 || info->src == GIT_ITERATOR_WORKDIR) | |
| 463 | 705 | 9 | return 0; | |
| 464 | - | |||
| 465 | 1204 | 10 | return git_diff_file__resolve_zero_size( | |
| 466 | - | info->file, &info->odb_obj, info->repo); | ||
| 467 | - | } | ||
| 468 | - | |||
| 469 | ![]() |
1237 | 2 | static int similarity_sig( |
| 470 | - | similarity_info *info, | ||
| 471 | - | const git_diff_find_options *opts, | ||
| 472 | - | void **cache) | ||
| 473 | - | { | ||
| 474 | 1237 | 2 | int error = 0; | |
| 475 | 1237 | 2 | git_diff_file *file = info->file; | |
| 476 | - | |||
| 477 | 1237 | 2 | if (info->src == GIT_ITERATOR_WORKDIR) { | |
| 478 | 46 | 3,3-5 | if ((error = git_buf_joinpath( | |
| 479 | 46 | 3 | &info->data, git_repository_workdir(info->repo), file->path)) < 0) | |
| 480 | ##### | 6 | return error; | |
| 481 | - | |||
| 482 | - | /* if path is not a regular file, just skip this item */ | ||
| 483 | 46 | 7,8 | if (!git_path_isfile(info->data.ptr)) | |
| 484 | ##### | 9 | return 0; | |
| 485 | - | |||
| 486 | - | /* TODO: apply wd-to-odb filters to file data if necessary */ | ||
| 487 | - | |||
| 488 | 46 | 10,10,10 | error = opts->metric->file_signature( | |
| 489 | 46 | 10 | &cache[info->idx], info->file, | |
| 490 | 46 | 10,10 | info->data.ptr, opts->metric->payload); | |
| 491 | - | } else { | ||
| 492 | - | /* if we didn't initially know the size, we might have an odb_obj | ||
| 493 | - | * around from earlier, so convert that, otherwise load the blob now | ||
| 494 | - | */ | ||
| 495 | 1191 | 11 | if (info->odb_obj != NULL) | |
| 496 | ##### | 12,12 | error = git_object__from_odb_object( | |
| 497 | ##### | 12 | (git_object **)&info->blob, info->repo, | |
| 498 | - | info->odb_obj, GIT_OBJECT_BLOB); | ||
| 499 | - | else | ||
| 500 | 1191 | 13 | error = git_blob_lookup(&info->blob, info->repo, &file->id); | |
| 501 | - | |||
| 502 | 1191 | 14 | if (error < 0) { | |
| 503 | - | /* if lookup fails, just skip this item in similarity calc */ | ||
| 504 | ##### | 15 | git_error_clear(); | |
| 505 | - | } else { | ||
| 506 | - | size_t sz; | ||
| 507 | - | |||
| 508 | - | /* index size may not be actual blob size if filtered */ | ||
| 509 | 1191 | 16,17 | if (file->size != git_blob_rawsize(info->blob)) | |
| 510 | ##### | 18,19 | file->size = git_blob_rawsize(info->blob); | |
| 511 | - | |||
| 512 | 1191 | 20-23 | sz = git__is_sizet(file->size) ? (size_t)file->size : (size_t)-1; | |
| 513 | - | |||
| 514 | 1191 | 24,24,25 | error = opts->metric->buffer_signature( | |
| 515 | 1191 | 25 | &cache[info->idx], info->file, | |
| 516 | 1191 | 24,24 | git_blob_rawcontent(info->blob), sz, opts->metric->payload); | |
| 517 | - | } | ||
| 518 | - | } | ||
| 519 | - | |||
| 520 | 1237 | 26 | return error; | |
| 521 | - | } | ||
| 522 | - | |||
| 523 | 79086 | 2 | static void similarity_unload(similarity_info *info) | |
| 524 | - | { | ||
| 525 | 79086 | 2 | if (info->odb_obj) | |
| 526 | ##### | 3 | git_odb_object_free(info->odb_obj); | |
| 527 | - | |||
| 528 | 79086 | 4 | if (info->blob) | |
| 529 | 1191 | 5 | git_blob_free(info->blob); | |
| 530 | - | else | ||
| 531 | 77895 | 6 | git_buf_dispose(&info->data); | |
| 532 | 79086 | 7 | } | |
| 533 | - | |||
| 534 | - | #define FLAG_SET(opts,flag_name) (((opts)->flags & flag_name) != 0) | ||
| 535 | - | |||
| 536 | - | /* - score < 0 means files cannot be compared | ||
| 537 | - | * - score >= 100 means files are exact match | ||
| 538 | - | * - score == 0 means files are completely different | ||
| 539 | - | */ | ||
| 540 | ![]() |
39877 | 2 | static int similarity_measure( |
| 541 | - | int *score, | ||
| 542 | - | git_diff *diff, | ||
| 543 | - | const git_diff_find_options *opts, | ||
| 544 | - | void **cache, | ||
| 545 | - | size_t a_idx, | ||
| 546 | - | size_t b_idx) | ||
| 547 | - | { | ||
| 548 | 39877 | 2 | git_diff_file *a_file = similarity_get_file(diff, a_idx); | |
| 549 | 39877 | 3 | git_diff_file *b_file = similarity_get_file(diff, b_idx); | |
| 550 | 39877 | 4 | bool exact_match = FLAG_SET(opts, GIT_DIFF_FIND_EXACT_MATCH_ONLY); | |
| 551 | 39877 | 4 | int error = 0; | |
| 552 | - | similarity_info a_info, b_info; | ||
| 553 | - | |||
| 554 | 39877 | 4 | *score = -1; | |
| 555 | - | |||
| 556 | - | /* don't try to compare things that aren't files */ | ||
| 557 | 39877 | 4,5 | if (!GIT_MODE_ISBLOB(a_file->mode) || !GIT_MODE_ISBLOB(b_file->mode)) | |
| 558 | ##### | 6 | return 0; | |
| 559 | - | |||
| 560 | - | /* if exact match is requested, force calculation of missing OIDs now */ | ||
| 561 | 39877 | 7 | if (exact_match) { | |
| 562 | 30 | 8-10 | if (git_oid_is_zero(&a_file->id) && | |
| 563 | ##### | 10,12 | diff->old_src == GIT_ITERATOR_WORKDIR && | |
| 564 | ##### | 11,11 | !git_diff__oid_for_file(&a_file->id, | |
| 565 | ##### | 11 | diff, a_file->path, a_file->mode, a_file->size)) | |
| 566 | ##### | 13 | a_file->flags |= GIT_DIFF_FLAG_VALID_ID; | |
| 567 | - | |||
| 568 | 30 | 14-16 | if (git_oid_is_zero(&b_file->id) && | |
| 569 | 6 | 16,18 | diff->new_src == GIT_ITERATOR_WORKDIR && | |
| 570 | 6 | 17,17 | !git_diff__oid_for_file(&b_file->id, | |
| 571 | 6 | 17 | diff, b_file->path, b_file->mode, b_file->size)) | |
| 572 | 6 | 19 | b_file->flags |= GIT_DIFF_FLAG_VALID_ID; | |
| 573 | - | } | ||
| 574 | - | |||
| 575 | - | /* check OID match as a quick test */ | ||
| 576 | 39877 | 20,21 | if (git_oid__cmp(&a_file->id, &b_file->id) == 0) { | |
| 577 | 312 | 22 | *score = 100; | |
| 578 | 312 | 22 | return 0; | |
| 579 | - | } | ||
| 580 | - | |||
| 581 | - | /* don't calculate signatures if we are doing exact match */ | ||
| 582 | 39565 | 23 | if (exact_match) { | |
| 583 | 22 | 24 | *score = 0; | |
| 584 | 22 | 24 | return 0; | |
| 585 | - | } | ||
| 586 | - | |||
| 587 | 39543 | 25 | memset(&a_info, 0, sizeof(a_info)); | |
| 588 | 39543 | 25 | memset(&b_info, 0, sizeof(b_info)); | |
| 589 | - | |||
| 590 | - | /* set up similarity data (will try to update missing file sizes) */ | ||
| 591 | 39543 | 25-27 | if (!cache[a_idx] && (error = similarity_init(&a_info, diff, a_idx)) < 0) | |
| 592 | ##### | 28 | return error; | |
| 593 | 39543 | 29-31 | if (!cache[b_idx] && (error = similarity_init(&b_info, diff, b_idx)) < 0) | |
| 594 | ##### | 32 | goto cleanup; | |
| 595 | - | |||
| 596 | - | /* check if file sizes are nowhere near each other */ | ||
| 597 | 39543 | 33,34 | if (a_file->size > 127 && | |
| 598 | 18252 | 34,35 | b_file->size > 127 && | |
| 599 | 10215 | 35,36 | (a_file->size > (b_file->size << 3) || | |
| 600 | 8411 | 36 | b_file->size > (a_file->size << 3))) | |
| 601 | - | goto cleanup; | ||
| 602 | - | |||
| 603 | - | /* update signature cache if needed */ | ||
| 604 | 36299 | 37 | if (!cache[a_idx]) { | |
| 605 | 367 | 38,39 | if ((error = similarity_sig(&a_info, opts, cache)) < 0) | |
| 606 | ##### | 40 | goto cleanup; | |
| 607 | - | } | ||
| 608 | 36299 | 41 | if (!cache[b_idx]) { | |
| 609 | 870 | 42,43 | if ((error = similarity_sig(&b_info, opts, cache)) < 0) | |
| 610 | ##### | 44 | goto cleanup; | |
| 611 | - | } | ||
| 612 | - | |||
| 613 | - | /* calculate similarity provided that the metric choose to process | ||
| 614 | - | * both the a and b files (some may not if file is too big, etc). | ||
| 615 | - | */ | ||
| 616 | 36299 | 45,46 | if (cache[a_idx] && cache[b_idx]) | |
| 617 | 36299 | 47,47,47,47 | error = opts->metric->similarity( | |
| 618 | 36299 | 47,47,47 | score, cache[a_idx], cache[b_idx], opts->metric->payload); | |
| 619 | - | |||
| 620 | - | cleanup: | ||
| 621 | 39543 | 48 | similarity_unload(&a_info); | |
| 622 | 39543 | 49 | similarity_unload(&b_info); | |
| 623 | - | |||
| 624 | 39543 | 50 | return error; | |
| 625 | - | } | ||
| 626 | - | |||
| 627 | ![]() |
51 | 2 | static int calc_self_similarity( |
| 628 | - | git_diff *diff, | ||
| 629 | - | const git_diff_find_options *opts, | ||
| 630 | - | size_t delta_idx, | ||
| 631 | - | void **cache) | ||
| 632 | - | { | ||
| 633 | 51 | 2 | int error, similarity = -1; | |
| 634 | 51 | 2-4 | git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx); | |
| 635 | - | |||
| 636 | 51 | 5 | if ((delta->flags & GIT_DIFF_FLAG__HAS_SELF_SIMILARITY) != 0) | |
| 637 | 16 | 6 | return 0; | |
| 638 | - | |||
| 639 | 35 | 7 | error = similarity_measure( | |
| 640 | 35 | 7 | &similarity, diff, opts, cache, 2 * delta_idx, 2 * delta_idx + 1); | |
| 641 | 35 | 8 | if (error < 0) | |
| 642 | ##### | 9 | return error; | |
| 643 | - | |||
| 644 | 35 | 10 | if (similarity >= 0) { | |
| 645 | 35 | 11 | delta->similarity = (uint16_t)similarity; | |
| 646 | 35 | 11 | delta->flags |= GIT_DIFF_FLAG__HAS_SELF_SIMILARITY; | |
| 647 | - | } | ||
| 648 | - | |||
| 649 | 35 | 12 | return 0; | |
| 650 | - | } | ||
| 651 | - | |||
| 652 | ![]() |
7107 | 2 | static bool is_rename_target( |
| 653 | - | git_diff *diff, | ||
| 654 | - | const git_diff_find_options *opts, | ||
| 655 | - | size_t delta_idx, | ||
| 656 | - | void **cache) | ||
| 657 | - | { | ||
| 658 | 7107 | 2-4 | git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx); | |
| 659 | - | |||
| 660 | - | /* skip things that aren't plain blobs */ | ||
| 661 | 7107 | 5 | if (!GIT_MODE_ISBLOB(delta->new_file.mode)) | |
| 662 | 349 | 6 | return false; | |
| 663 | - | |||
| 664 | - | /* only consider ADDED, RENAMED, COPIED, and split MODIFIED as | ||
| 665 | - | * targets; maybe include UNTRACKED if requested. | ||
| 666 | - | */ | ||
| 667 | 6758 | 7 | switch (delta->status) { | |
| 668 | - | case GIT_DELTA_UNMODIFIED: | ||
| 669 | - | case GIT_DELTA_DELETED: | ||
| 670 | - | case GIT_DELTA_IGNORED: | ||
| 671 | - | case GIT_DELTA_CONFLICTED: | ||
| 672 | 27 | 8 | return false; | |
| 673 | - | |||
| 674 | - | case GIT_DELTA_MODIFIED: | ||
| 675 | 3954 | 9,10 | if (!FLAG_SET(opts, GIT_DIFF_FIND_REWRITES) && | |
| 676 | 3921 | 10 | !FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES)) | |
| 677 | 3919 | 11 | return false; | |
| 678 | - | |||
| 679 | 35 | 12,13 | if (calc_self_similarity(diff, opts, delta_idx, cache) < 0) | |
| 680 | ##### | 14 | return false; | |
| 681 | - | |||
| 682 | 35 | 15,16 | if (FLAG_SET(opts, GIT_DIFF_BREAK_REWRITES) && | |
| 683 | 33 | 16 | delta->similarity < opts->break_rewrite_threshold) { | |
| 684 | 28 | 17 | delta->flags |= GIT_DIFF_FLAG__TO_SPLIT; | |
| 685 | 28 | 17 | break; | |
| 686 | - | } | ||
| 687 | 7 | 18,19 | if (FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) && | |
| 688 | 7 | 19 | delta->similarity < opts->rename_from_rewrite_threshold) { | |
| 689 | 2 | 20 | delta->flags |= GIT_DIFF_FLAG__TO_SPLIT; | |
| 690 | 2 | 20 | break; | |
| 691 | - | } | ||
| 692 | - | |||
| 693 | 5 | 21 | return false; | |
| 694 | - | |||
| 695 | - | case GIT_DELTA_UNTRACKED: | ||
| 696 | 41 | 22 | if (!FLAG_SET(opts, GIT_DIFF_FIND_FOR_UNTRACKED)) | |
| 697 | ##### | 23 | return false; | |
| 698 | 41 | 24 | break; | |
| 699 | - | |||
| 700 | - | default: /* all other status values should be checked */ | ||
| 701 | 2736 | 25 | break; | |
| 702 | - | } | ||
| 703 | - | |||
| 704 | 2807 | 26 | delta->flags |= GIT_DIFF_FLAG__IS_RENAME_TARGET; | |
| 705 | 2807 | 26 | return true; | |
| 706 | - | } | ||
| 707 | - | |||
| 708 | ![]() |
7107 | 2 | static bool is_rename_source( |
| 709 | - | git_diff *diff, | ||
| 710 | - | const git_diff_find_options *opts, | ||
| 711 | - | size_t delta_idx, | ||
| 712 | - | void **cache) | ||
| 713 | - | { | ||
| 714 | 7107 | 2-4 | git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx); | |
| 715 | - | |||
| 716 | - | /* skip things that aren't blobs */ | ||
| 717 | 7107 | 5 | if (!GIT_MODE_ISBLOB(delta->old_file.mode)) | |
| 718 | 2798 | 6 | return false; | |
| 719 | - | |||
| 720 | 4309 | 7 | switch (delta->status) { | |
| 721 | - | case GIT_DELTA_ADDED: | ||
| 722 | - | case GIT_DELTA_UNTRACKED: | ||
| 723 | - | case GIT_DELTA_UNREADABLE: | ||
| 724 | - | case GIT_DELTA_IGNORED: | ||
| 725 | - | case GIT_DELTA_CONFLICTED: | ||
| 726 | ##### | 8 | return false; | |
| 727 | - | |||
| 728 | - | case GIT_DELTA_DELETED: | ||
| 729 | - | case GIT_DELTA_TYPECHANGE: | ||
| 730 | 328 | 9 | break; | |
| 731 | - | |||
| 732 | - | case GIT_DELTA_UNMODIFIED: | ||
| 733 | 27 | 10 | if (!FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED)) | |
| 734 | 10 | 11 | return false; | |
| 735 | 17 | 12 | if (FLAG_SET(opts, GIT_DIFF_FIND_REMOVE_UNMODIFIED)) | |
| 736 | 3 | 13 | delta->flags |= GIT_DIFF_FLAG__TO_DELETE; | |
| 737 | 17 | 14 | break; | |
| 738 | - | |||
| 739 | - | default: /* MODIFIED, RENAMED, COPIED */ | ||
| 740 | - | /* if we're finding copies, this could be a source */ | ||
| 741 | 3954 | 15 | if (FLAG_SET(opts, GIT_DIFF_FIND_COPIES)) | |
| 742 | 37 | 16 | break; | |
| 743 | - | |||
| 744 | - | /* otherwise, this is only a source if we can split it */ | ||
| 745 | 3917 | 17,18 | if (!FLAG_SET(opts, GIT_DIFF_FIND_REWRITES) && | |
| 746 | 3903 | 18 | !FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES)) | |
| 747 | 3901 | 19 | return false; | |
| 748 | - | |||
| 749 | 16 | 20,21 | if (calc_self_similarity(diff, opts, delta_idx, cache) < 0) | |
| 750 | ##### | 22 | return false; | |
| 751 | - | |||
| 752 | 16 | 23,24 | if (FLAG_SET(opts, GIT_DIFF_BREAK_REWRITES) && | |
| 753 | 14 | 24 | delta->similarity < opts->break_rewrite_threshold) { | |
| 754 | 13 | 25 | delta->flags |= GIT_DIFF_FLAG__TO_SPLIT; | |
| 755 | 13 | 25 | break; | |
| 756 | - | } | ||
| 757 | - | |||
| 758 | 3 | 26,27 | if (FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) && | |
| 759 | 3 | 27 | delta->similarity < opts->rename_from_rewrite_threshold) | |
| 760 | 2 | 28 | break; | |
| 761 | - | |||
| 762 | 1 | 29 | return false; | |
| 763 | - | } | ||
| 764 | - | |||
| 765 | 397 | 30 | delta->flags |= GIT_DIFF_FLAG__IS_RENAME_SOURCE; | |
| 766 | 397 | 30 | return true; | |
| 767 | - | } | ||
| 768 | - | |||
| 769 | 80 | 2 | GIT_INLINE(bool) delta_is_split(git_diff_delta *delta) | |
| 770 | - | { | ||
| 771 | 80 | 2,3 | return (delta->status == GIT_DELTA_TYPECHANGE || | |
| 772 | 77 | 3 | (delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0); | |
| 773 | - | } | ||
| 774 | - | |||
| 775 | ![]() |
316 | 2 | GIT_INLINE(bool) delta_is_new_only(git_diff_delta *delta) |
| 776 | - | { | ||
| 777 | 316 | 2,3 | return (delta->status == GIT_DELTA_ADDED || | |
| 778 | 48 | 3,4 | delta->status == GIT_DELTA_UNTRACKED || | |
| 779 | 316 | 2,4-7 | delta->status == GIT_DELTA_UNREADABLE || | |
| 780 | 18 | 5 | delta->status == GIT_DELTA_IGNORED); | |
| 781 | - | } | ||
| 782 | - | |||
| 783 | 263 | 2 | GIT_INLINE(void) delta_make_rename( | |
| 784 | - | git_diff_delta *to, const git_diff_delta *from, uint16_t similarity) | ||
| 785 | - | { | ||
| 786 | 263 | 2 | to->status = GIT_DELTA_RENAMED; | |
| 787 | 263 | 2 | to->similarity = similarity; | |
| 788 | 263 | 2 | to->nfiles = 2; | |
| 789 | 263 | 2 | memcpy(&to->old_file, &from->old_file, sizeof(to->old_file)); | |
| 790 | 263 | 2 | to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; | |
| 791 | 263 | 2 | } | |
| 792 | - | |||
| 793 | - | typedef struct { | ||
| 794 | - | size_t idx; | ||
| 795 | - | uint16_t similarity; | ||
| 796 | - | } diff_find_match; | ||
| 797 | - | |||
| 798 | ![]() |
332 | 2 | int git_diff_find_similar( |
| 799 | - | git_diff *diff, | ||
| 800 | - | const git_diff_find_options *given_opts) | ||
| 801 | - | { | ||
| 802 | - | size_t s, t; | ||
| 803 | 332 | 2 | int error = 0, result; | |
| 804 | - | uint16_t similarity; | ||
| 805 | - | git_diff_delta *src, *tgt; | ||
| 806 | 332 | 2 | git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT; | |
| 807 | 332 | 2 | size_t num_deltas, num_srcs = 0, num_tgts = 0; | |
| 808 | 332 | 2 | size_t tried_srcs = 0, tried_tgts = 0; | |
| 809 | 332 | 2 | size_t num_rewrites = 0, num_updates = 0, num_bumped = 0; | |
| 810 | - | size_t sigcache_size; | ||
| 811 | 332 | 2 | void **sigcache = NULL; /* cache of similarity metric file signatures */ | |
| 812 | 332 | 2 | diff_find_match *tgt2src = NULL; | |
| 813 | 332 | 2 | diff_find_match *src2tgt = NULL; | |
| 814 | 332 | 2 | diff_find_match *tgt2src_copy = NULL; | |
| 815 | - | diff_find_match *best_match; | ||
| 816 | - | git_diff_file swap; | ||
| 817 | - | |||
| 818 | 332 | 2,3 | assert(diff); | |
| 819 | - | |||
| 820 | 332 | 4,5 | if ((error = normalize_find_opts(diff, &opts, given_opts)) < 0) | |
| 821 | 2 | 6 | return error; | |
| 822 | - | |||
| 823 | 330 | 7 | num_deltas = diff->deltas.length; | |
| 824 | - | |||
| 825 | - | /* TODO: maybe abort if deltas.length > rename_limit ??? */ | ||
| 826 | 330 | 7-9 | if (!num_deltas || !git__is_uint32(num_deltas)) | |
| 827 | - | goto cleanup; | ||
| 828 | - | |||
| 829 | - | /* No flags set; nothing to do */ | ||
| 830 | 328 | 10 | if ((opts.flags & GIT_DIFF_FIND_ALL) == 0) | |
| 831 | 1 | 11 | goto cleanup; | |
| 832 | - | |||
| 833 | 327 | 12-18 | GIT_ERROR_CHECK_ALLOC_MULTIPLY(&sigcache_size, num_deltas, 2); | |
| 834 | 327 | 19 | sigcache = git__calloc(sigcache_size, sizeof(void *)); | |
| 835 | 327 | 20,21 | GIT_ERROR_CHECK_ALLOC(sigcache); | |
| 836 | - | |||
| 837 | - | /* Label rename sources and targets | ||
| 838 | - | * | ||
| 839 | - | * This will also set self-similarity scores for MODIFIED files and | ||
| 840 | - | * mark them for splitting if break-rewrites is enabled | ||
| 841 | - | */ | ||
| 842 | 7434 | 22,31-33 | git_vector_foreach(&diff->deltas, t, tgt) { | |
| 843 | 7107 | 23,24 | if (is_rename_source(diff, &opts, t, sigcache)) | |
| 844 | 397 | 25 | ++num_srcs; | |
| 845 | - | |||
| 846 | 7107 | 26,27 | if (is_rename_target(diff, &opts, t, sigcache)) | |
| 847 | 2807 | 28 | ++num_tgts; | |
| 848 | - | |||
| 849 | 7107 | 29 | if ((tgt->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) | |
| 850 | 30 | 30 | num_rewrites++; | |
| 851 | - | } | ||
| 852 | - | |||
| 853 | - | /* if there are no candidate srcs or tgts, we're done */ | ||
| 854 | 327 | 34,35 | if (!num_srcs || !num_tgts) | |
| 855 | - | goto cleanup; | ||
| 856 | - | |||
| 857 | 99 | 36 | src2tgt = git__calloc(num_deltas, sizeof(diff_find_match)); | |
| 858 | 99 | 37,38 | GIT_ERROR_CHECK_ALLOC(src2tgt); | |
| 859 | 99 | 39 | tgt2src = git__calloc(num_deltas, sizeof(diff_find_match)); | |
| 860 | 99 | 40,41 | GIT_ERROR_CHECK_ALLOC(tgt2src); | |
| 861 | - | |||
| 862 | 99 | 42 | if (FLAG_SET(&opts, GIT_DIFF_FIND_COPIES)) { | |
| 863 | 29 | 43 | tgt2src_copy = git__calloc(num_deltas, sizeof(diff_find_match)); | |
| 864 | 29 | 44,45 | GIT_ERROR_CHECK_ALLOC(tgt2src_copy); | |
| 865 | - | } | ||
| 866 | - | |||
| 867 | - | /* | ||
| 868 | - | * Find best-fit matches for rename / copy candidates | ||
| 869 | - | */ | ||
| 870 | - | |||
| 871 | - | find_best_matches: | ||
| 872 | 134 | 46 | tried_tgts = num_bumped = 0; | |
| 873 | - | |||
| 874 | 4137 | 46,78-80 | git_vector_foreach(&diff->deltas, t, tgt) { | |
| 875 | - | /* skip things that are not rename targets */ | ||
| 876 | 4137 | 47 | if ((tgt->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0) | |
| 877 | 2424 | 48 | continue; | |
| 878 | - | |||
| 879 | 1713 | 49 | tried_srcs = 0; | |
| 880 | - | |||
| 881 | 340999 | 49,73-75 | git_vector_foreach(&diff->deltas, s, src) { | |
| 882 | - | /* skip things that are not rename sources */ | ||
| 883 | 340336 | 50 | if ((src->flags & GIT_DIFF_FLAG__IS_RENAME_SOURCE) == 0) | |
| 884 | 300451 | 51 | continue; | |
| 885 | - | |||
| 886 | - | /* calculate similarity for this pair and find best match */ | ||
| 887 | 39885 | 52 | if (s == t) | |
| 888 | 43 | 53 | result = -1; /* don't measure self-similarity here */ | |
| 889 | 39842 | 54,55 | else if ((error = similarity_measure( | |
| 890 | 39842 | 54 | &result, diff, &opts, sigcache, 2 * s, 2 * t + 1)) < 0) | |
| 891 | ##### | 56 | goto cleanup; | |
| 892 | - | |||
| 893 | 39885 | 57 | if (result < 0) | |
| 894 | 3287 | 58 | continue; | |
| 895 | 36598 | 59 | similarity = (uint16_t)result; | |
| 896 | - | |||
| 897 | - | /* is this a better rename? */ | ||
| 898 | 36598 | 59,60 | if (tgt2src[t].similarity < similarity && | |
| 899 | 1752 | 60 | src2tgt[s].similarity < similarity) | |
| 900 | - | { | ||
| 901 | - | /* eject old mapping */ | ||
| 902 | 437 | 61 | if (src2tgt[s].similarity > 0) { | |
| 903 | 72 | 62 | tgt2src[src2tgt[s].idx].similarity = 0; | |
| 904 | 72 | 62 | num_bumped++; | |
| 905 | - | } | ||
| 906 | 437 | 63 | if (tgt2src[t].similarity > 0) { | |
| 907 | 41 | 64 | src2tgt[tgt2src[t].idx].similarity = 0; | |
| 908 | 41 | 64 | num_bumped++; | |
| 909 | - | } | ||
| 910 | - | |||
| 911 | - | /* write new mapping */ | ||
| 912 | 437 | 65 | tgt2src[t].idx = s; | |
| 913 | 437 | 65 | tgt2src[t].similarity = similarity; | |
| 914 | 437 | 65 | src2tgt[s].idx = t; | |
| 915 | 437 | 65 | src2tgt[s].similarity = similarity; | |
| 916 | - | } | ||
| 917 | - | |||
| 918 | - | /* keep best absolute match for copies */ | ||
| 919 | 36598 | 66,67 | if (tgt2src_copy != NULL && | |
| 920 | 225 | 67 | tgt2src_copy[t].similarity < similarity) | |
| 921 | - | { | ||
| 922 | 64 | 68 | tgt2src_copy[t].idx = s; | |
| 923 | 64 | 68 | tgt2src_copy[t].similarity = similarity; | |
| 924 | - | } | ||
| 925 | - | |||
| 926 | 36598 | 69 | if (++tried_srcs >= num_srcs) | |
| 927 | 1050 | 70 | break; | |
| 928 | - | |||
| 929 | - | /* cap on maximum targets we'll examine (per "tgt" file) */ | ||
| 930 | 35548 | 71 | if (tried_srcs > opts.rename_limit) | |
| 931 | ##### | 72 | break; | |
| 932 | - | } | ||
| 933 | - | |||
| 934 | 1713 | 76 | if (++tried_tgts >= num_tgts) | |
| 935 | 134 | 77 | break; | |
| 936 | - | } | ||
| 937 | - | |||
| 938 | 134 | 81 | if (num_bumped > 0) /* try again if we bumped some items */ | |
| 939 | 35 | 82 | goto find_best_matches; | |
| 940 | - | |||
| 941 | - | /* | ||
| 942 | - | * Rewrite the diffs with renames / copies | ||
| 943 | - | */ | ||
| 944 | - | |||
| 945 | 2770 | 83,157-159 | git_vector_foreach(&diff->deltas, t, tgt) { | |
| 946 | - | /* skip things that are not rename targets */ | ||
| 947 | 2671 | 84 | if ((tgt->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0) | |
| 948 | 1694 | 85 | continue; | |
| 949 | - | |||
| 950 | - | /* check if this delta was the target of a similarity */ | ||
| 951 | 977 | 86 | if (tgt2src[t].similarity) | |
| 952 | 318 | 87 | best_match = &tgt2src[t]; | |
| 953 | 659 | 88,89 | else if (tgt2src_copy && tgt2src_copy[t].similarity) | |
| 954 | 2 | 90 | best_match = &tgt2src_copy[t]; | |
| 955 | - | else | ||
| 956 | 657 | 91 | continue; | |
| 957 | - | |||
| 958 | 320 | 92 | s = best_match->idx; | |
| 959 | 320 | 92-94 | src = GIT_VECTOR_GET(&diff->deltas, s); | |
| 960 | - | |||
| 961 | - | /* possible scenarios: | ||
| 962 | - | * 1. from DELETE to ADD/UNTRACK/IGNORE = RENAME | ||
| 963 | - | * 2. from DELETE to SPLIT/TYPECHANGE = RENAME + DELETE | ||
| 964 | - | * 3. from SPLIT/TYPECHANGE to ADD/UNTRACK/IGNORE = ADD + RENAME | ||
| 965 | - | * 4. from SPLIT/TYPECHANGE to SPLIT/TYPECHANGE = RENAME + SPLIT | ||
| 966 | - | * 5. from OTHER to ADD/UNTRACK/IGNORE = OTHER + COPY | ||
| 967 | - | */ | ||
| 968 | - | |||
| 969 | 320 | 95 | if (src->status == GIT_DELTA_DELETED) { | |
| 970 | - | |||
| 971 | 288 | 96,97 | if (delta_is_new_only(tgt)) { | |
| 972 | - | |||
| 973 | 283 | 98 | if (best_match->similarity < opts.rename_threshold) | |
| 974 | 38 | 99 | continue; | |
| 975 | - | |||
| 976 | 245 | 100 | delta_make_rename(tgt, src, best_match->similarity); | |
| 977 | - | |||
| 978 | 245 | 101 | src->flags |= GIT_DIFF_FLAG__TO_DELETE; | |
| 979 | 245 | 101 | num_rewrites++; | |
| 980 | - | } else { | ||
| 981 | 5 | 102-104 | assert(delta_is_split(tgt)); | |
| 982 | - | |||
| 983 | 5 | 105 | if (best_match->similarity < opts.rename_from_rewrite_threshold) | |
| 984 | 1 | 106 | continue; | |
| 985 | - | |||
| 986 | 4 | 107 | memcpy(&swap, &tgt->old_file, sizeof(swap)); | |
| 987 | - | |||
| 988 | 4 | 107 | delta_make_rename(tgt, src, best_match->similarity); | |
| 989 | 4 | 108 | num_rewrites--; | |
| 990 | - | |||
| 991 | 4 | 108,109 | assert(src->status == GIT_DELTA_DELETED); | |
| 992 | 4 | 110 | memcpy(&src->old_file, &swap, sizeof(src->old_file)); | |
| 993 | 4 | 110 | memset(&src->new_file, 0, sizeof(src->new_file)); | |
| 994 | 4 | 110 | src->new_file.path = src->old_file.path; | |
| 995 | 4 | 110 | src->new_file.flags |= GIT_DIFF_FLAG_VALID_ID; | |
| 996 | - | |||
| 997 | 4 | 110 | num_updates++; | |
| 998 | - | |||
| 999 | 4 | 110,111 | if (src2tgt[t].similarity > 0 && src2tgt[t].idx > t) { | |
| 1000 | - | /* what used to be at src t is now at src s */ | ||
| 1001 | 249 | 112,113 | tgt2src[src2tgt[t].idx].idx = s; | |
| 1002 | - | } | ||
| 1003 | - | } | ||
| 1004 | - | } | ||
| 1005 | - | |||
| 1006 | 32 | 114,115 | else if (delta_is_split(src)) { | |
| 1007 | - | |||
| 1008 | 14 | 116,117 | if (delta_is_new_only(tgt)) { | |
| 1009 | - | |||
| 1010 | 3 | 118 | if (best_match->similarity < opts.rename_threshold) | |
| 1011 | ##### | 119 | continue; | |
| 1012 | - | |||
| 1013 | 3 | 120 | delta_make_rename(tgt, src, best_match->similarity); | |
| 1014 | - | |||
| 1015 | 3 | 121-123 | src->status = (diff->new_src == GIT_ITERATOR_WORKDIR) ? | |
| 1016 | - | GIT_DELTA_UNTRACKED : GIT_DELTA_ADDED; | ||
| 1017 | 3 | 124 | src->nfiles = 1; | |
| 1018 | 3 | 124 | memset(&src->old_file, 0, sizeof(src->old_file)); | |
| 1019 | 3 | 124 | src->old_file.path = src->new_file.path; | |
| 1020 | 3 | 124 | src->old_file.flags |= GIT_DIFF_FLAG_VALID_ID; | |
| 1021 | - | |||
| 1022 | 3 | 124 | src->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; | |
| 1023 | 3 | 124 | num_rewrites--; | |
| 1024 | - | |||
| 1025 | 3 | 124 | num_updates++; | |
| 1026 | - | } else { | ||
| 1027 | 11 | 125-127 | assert(delta_is_split(src)); | |
| 1028 | - | |||
| 1029 | 11 | 128 | if (best_match->similarity < opts.rename_from_rewrite_threshold) | |
| 1030 | ##### | 129 | continue; | |
| 1031 | - | |||
| 1032 | 11 | 130 | memcpy(&swap, &tgt->old_file, sizeof(swap)); | |
| 1033 | - | |||
| 1034 | 11 | 130 | delta_make_rename(tgt, src, best_match->similarity); | |
| 1035 | 11 | 131 | num_rewrites--; | |
| 1036 | 11 | 131 | num_updates++; | |
| 1037 | - | |||
| 1038 | 11 | 131 | memcpy(&src->old_file, &swap, sizeof(src->old_file)); | |
| 1039 | - | |||
| 1040 | - | /* if we've just swapped the new element into the correct | ||
| 1041 | - | * place, clear the SPLIT flag | ||
| 1042 | - | */ | ||
| 1043 | 11 | 131,132 | if (tgt2src[s].idx == t && | |
| 1044 | 7 | 132 | tgt2src[s].similarity > | |
| 1045 | 7 | 132 | opts.rename_from_rewrite_threshold) { | |
| 1046 | 6 | 133 | src->status = GIT_DELTA_RENAMED; | |
| 1047 | 6 | 133 | src->similarity = tgt2src[s].similarity; | |
| 1048 | 6 | 133 | tgt2src[s].similarity = 0; | |
| 1049 | 6 | 133 | src->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; | |
| 1050 | 6 | 133 | num_rewrites--; | |
| 1051 | - | } | ||
| 1052 | - | /* otherwise, if we just overwrote a source, update mapping */ | ||
| 1053 | 5 | 134,135 | else if (src2tgt[t].similarity > 0 && src2tgt[t].idx > t) { | |
| 1054 | - | /* what used to be at src t is now at src s */ | ||
| 1055 | 3 | 136 | tgt2src[src2tgt[t].idx].idx = s; | |
| 1056 | - | } | ||
| 1057 | - | |||
| 1058 | 14 | 137,138 | num_updates++; | |
| 1059 | - | } | ||
| 1060 | - | } | ||
| 1061 | - | |||
| 1062 | 18 | 139 | else if (FLAG_SET(&opts, GIT_DIFF_FIND_COPIES)) { | |
| 1063 | 18 | 140 | if (tgt2src_copy[t].similarity < opts.copy_threshold) | |
| 1064 | 2 | 141 | continue; | |
| 1065 | - | |||
| 1066 | - | /* always use best possible source for copy */ | ||
| 1067 | 16 | 142 | best_match = &tgt2src_copy[t]; | |
| 1068 | 16 | 142-144 | src = GIT_VECTOR_GET(&diff->deltas, best_match->idx); | |
| 1069 | - | |||
| 1070 | 16 | 145,146 | if (delta_is_split(tgt)) { | |
| 1071 | 2 | 147 | error = insert_delete_side_of_split(diff, &diff->deltas, tgt); | |
| 1072 | 2 | 148 | if (error < 0) | |
| 1073 | ##### | 149 | goto cleanup; | |
| 1074 | 2 | 150 | num_rewrites--; | |
| 1075 | - | } | ||
| 1076 | - | |||
| 1077 | 16 | 151-154 | if (!delta_is_split(tgt) && !delta_is_new_only(tgt)) | |
| 1078 | 2 | 155 | continue; | |
| 1079 | - | |||
| 1080 | 14 | 156 | tgt->status = GIT_DELTA_COPIED; | |
| 1081 | 14 | 156 | tgt->similarity = best_match->similarity; | |
| 1082 | 14 | 156 | tgt->nfiles = 2; | |
| 1083 | 14 | 156 | memcpy(&tgt->old_file, &src->old_file, sizeof(tgt->old_file)); | |
| 1084 | 14 | 156 | tgt->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; | |
| 1085 | - | |||
| 1086 | 14 | 156 | num_updates++; | |
| 1087 | - | } | ||
| 1088 | - | } | ||
| 1089 | - | |||
| 1090 | - | /* | ||
| 1091 | - | * Actually split and delete entries as needed | ||
| 1092 | - | */ | ||
| 1093 | - | |||
| 1094 | 99 | 160,161 | if (num_rewrites > 0 || num_updates > 0) | |
| 1095 | 87 | 166,166 | error = apply_splits_and_deletes( | |
| 1096 | 87 | 166 | diff, diff->deltas.length - num_rewrites, | |
| 1097 | 87 | 162-165 | FLAG_SET(&opts, GIT_DIFF_BREAK_REWRITES) && | |
| 1098 | 29 | 163 | !FLAG_SET(&opts, GIT_DIFF_BREAK_REWRITES_FOR_RENAMES_ONLY)); | |
| 1099 | - | |||
| 1100 | - | cleanup: | ||
| 1101 | 330 | 167 | git__free(tgt2src); | |
| 1102 | 330 | 168 | git__free(src2tgt); | |
| 1103 | 330 | 169 | git__free(tgt2src_copy); | |
| 1104 | - | |||
| 1105 | 330 | 170 | if (sigcache) { | |
| 1106 | 14541 | 171,174,175 | for (t = 0; t < num_deltas * 2; ++t) { | |
| 1107 | 14214 | 172 | if (sigcache[t] != NULL) | |
| 1108 | 1237 | 173 | opts.metric->free_signature(sigcache[t], opts.metric->payload); | |
| 1109 | - | } | ||
| 1110 | 327 | 176 | git__free(sigcache); | |
| 1111 | - | } | ||
| 1112 | - | |||
| 1113 | 330 | 177,178 | if (!given_opts || !given_opts->metric) | |
| 1114 | 330 | 179 | git__free(opts.metric); | |
| 1115 | - | |||
| 1116 | 330 | 180 | return error; | |
| 1117 | - | } | ||
| 1118 | - | |||
| 1119 | - | #undef FLAG_SET |