source src/crlf.c
Line | Flow | Count | Block(s) | Source |
---|---|---|---|---|
1 | - | /* | ||
2 | - | * Copyright (C) the libgit2 contributors. All rights reserved. | ||
3 | - | * | ||
4 | - | * This file is part of libgit2, distributed under the GNU GPL v2 with | ||
5 | - | * a Linking Exception. For full terms see the included COPYING file. | ||
6 | - | */ | ||
7 | - | |||
8 | - | #include "common.h" | ||
9 | - | |||
10 | - | #include "git2/attr.h" | ||
11 | - | #include "git2/blob.h" | ||
12 | - | #include "git2/index.h" | ||
13 | - | #include "git2/sys/filter.h" | ||
14 | - | |||
15 | - | #include "futils.h" | ||
16 | - | #include "hash.h" | ||
17 | - | #include "filter.h" | ||
18 | - | #include "buf_text.h" | ||
19 | - | #include "repository.h" | ||
20 | - | |||
21 | - | typedef enum { | ||
22 | - | GIT_CRLF_UNDEFINED, | ||
23 | - | GIT_CRLF_BINARY, | ||
24 | - | GIT_CRLF_TEXT, | ||
25 | - | GIT_CRLF_TEXT_INPUT, | ||
26 | - | GIT_CRLF_TEXT_CRLF, | ||
27 | - | GIT_CRLF_AUTO, | ||
28 | - | GIT_CRLF_AUTO_INPUT, | ||
29 | - | GIT_CRLF_AUTO_CRLF, | ||
30 | - | } git_crlf_t; | ||
31 | - | |||
32 | - | struct crlf_attrs { | ||
33 | - | int attr_action; /* the .gitattributes setting */ | ||
34 | - | int crlf_action; /* the core.autocrlf setting */ | ||
35 | - | |||
36 | - | int auto_crlf; | ||
37 | - | int safe_crlf; | ||
38 | - | int core_eol; | ||
39 | - | }; | ||
40 | - | |||
41 | - | struct crlf_filter { | ||
42 | - | git_filter f; | ||
43 | - | }; | ||
44 | - | |||
45 | 21699 | 2 | static git_crlf_t check_crlf(const char *value) | |
46 | - | { | ||
47 | 21699 | 2,3 | if (GIT_ATTR_IS_TRUE(value)) | |
48 | 1066 | 4 | return GIT_CRLF_TEXT; | |
49 | 20632 | 5,6 | else if (GIT_ATTR_IS_FALSE(value)) | |
50 | 514 | 7 | return GIT_CRLF_BINARY; | |
51 | 20118 | 8,9 | else if (GIT_ATTR_IS_UNSPECIFIED(value)) | |
52 | - | ; | ||
53 | 807 | 10 | else if (strcmp(value, "input") == 0) | |
54 | ##### | 11 | return GIT_CRLF_TEXT_INPUT; | |
55 | 807 | 12 | else if (strcmp(value, "auto") == 0) | |
56 | 807 | 13 | return GIT_CRLF_AUTO; | |
57 | - | |||
58 | 19311 | 14 | return GIT_CRLF_UNDEFINED; | |
59 | - | } | ||
60 | - | |||
61 | 11414 | 2 | static git_configmap_value check_eol(const char *value) | |
62 | - | { | ||
63 | 11414 | 2,3 | if (GIT_ATTR_IS_UNSPECIFIED(value)) | |
64 | - | ; | ||
65 | 1761 | 4 | else if (strcmp(value, "lf") == 0) | |
66 | 1064 | 5 | return GIT_EOL_LF; | |
67 | 697 | 6 | else if (strcmp(value, "crlf") == 0) | |
68 | 697 | 7 | return GIT_EOL_CRLF; | |
69 | - | |||
70 | 9653 | 8 | return GIT_EOL_UNSET; | |
71 | - | } | ||
72 | - | |||
73 | 488 | 2 | static int has_cr_in_index(const git_filter_source *src) | |
74 | - | { | ||
75 | 488 | 2 | git_repository *repo = git_filter_source_repo(src); | |
76 | 488 | 3 | const char *path = git_filter_source_path(src); | |
77 | - | git_index *index; | ||
78 | - | const git_index_entry *entry; | ||
79 | - | git_blob *blob; | ||
80 | - | const void *blobcontent; | ||
81 | - | git_object_size_t blobsize; | ||
82 | - | bool found_cr; | ||
83 | - | |||
84 | 488 | 4 | if (!path) | |
85 | ##### | 5 | return false; | |
86 | - | |||
87 | 488 | 6,7 | if (git_repository_index__weakptr(&index, repo) < 0) { | |
88 | ##### | 8 | git_error_clear(); | |
89 | ##### | 9 | return false; | |
90 | - | } | ||
91 | - | |||
92 | 488 | 10-13 | if (!(entry = git_index_get_bypath(index, path, 0)) && | |
93 | 428 | 12 | !(entry = git_index_get_bypath(index, path, 1))) | |
94 | 428 | 14 | return false; | |
95 | - | |||
96 | 60 | 15 | if (!S_ISREG(entry->mode)) /* don't crlf filter non-blobs */ | |
97 | ##### | 16 | return true; | |
98 | - | |||
99 | 60 | 17,18 | if (git_blob_lookup(&blob, repo, &entry->id) < 0) | |
100 | ##### | 19 | return false; | |
101 | - | |||
102 | 60 | 20 | blobcontent = git_blob_rawcontent(blob); | |
103 | 60 | 21 | blobsize = git_blob_rawsize(blob); | |
104 | 60 | 22,23 | if (!git__is_sizet(blobsize)) | |
105 | ##### | 24 | blobsize = (size_t)-1; | |
106 | - | |||
107 | 60 | 26,30 | found_cr = (blobcontent != NULL && | |
108 | 60 | 25,27-29 | blobsize > 0 && | |
109 | 60 | 27 | memchr(blobcontent, '\r', (size_t)blobsize) != NULL); | |
110 | - | |||
111 | 60 | 30 | git_blob_free(blob); | |
112 | 60 | 31 | return found_cr; | |
113 | - | } | ||
114 | - | |||
115 | 468 | 2 | static int text_eol_is_crlf(struct crlf_attrs *ca) | |
116 | - | { | ||
117 | 468 | 2 | if (ca->auto_crlf == GIT_AUTO_CRLF_TRUE) | |
118 | 168 | 3 | return 1; | |
119 | 300 | 4 | else if (ca->auto_crlf == GIT_AUTO_CRLF_INPUT) | |
120 | 138 | 5 | return 0; | |
121 | - | |||
122 | 162 | 6 | if (ca->core_eol == GIT_EOL_CRLF) | |
123 | ##### | 7 | return 1; | |
124 | - | if (ca->core_eol == GIT_EOL_UNSET && GIT_EOL_NATIVE == GIT_EOL_CRLF) | ||
125 | - | return 1; | ||
126 | - | |||
127 | 162 | 8 | return 0; | |
128 | - | } | ||
129 | - | |||
130 | 2024 | 2 | static git_configmap_value output_eol(struct crlf_attrs *ca) | |
131 | - | { | ||
132 | 2024 | 2 | switch (ca->crlf_action) { | |
133 | - | case GIT_CRLF_BINARY: | ||
134 | ##### | 3 | return GIT_EOL_UNSET; | |
135 | - | case GIT_CRLF_TEXT_CRLF: | ||
136 | 642 | 4 | return GIT_EOL_CRLF; | |
137 | - | case GIT_CRLF_TEXT_INPUT: | ||
138 | 575 | 5 | return GIT_EOL_LF; | |
139 | - | case GIT_CRLF_UNDEFINED: | ||
140 | - | case GIT_CRLF_AUTO_CRLF: | ||
141 | 388 | 6 | return GIT_EOL_CRLF; | |
142 | - | case GIT_CRLF_AUTO_INPUT: | ||
143 | 206 | 7 | return GIT_EOL_LF; | |
144 | - | case GIT_CRLF_TEXT: | ||
145 | - | case GIT_CRLF_AUTO: | ||
146 | 213 | 8-12 | return text_eol_is_crlf(ca) ? GIT_EOL_CRLF : GIT_EOL_LF; | |
147 | - | } | ||
148 | - | |||
149 | - | /* TODO: warn when available */ | ||
150 | ##### | 13 | return ca->core_eol; | |
151 | - | } | ||
152 | - | |||
153 | 1486 | 2 | GIT_INLINE(int) check_safecrlf( | |
154 | - | struct crlf_attrs *ca, | ||
155 | - | const git_filter_source *src, | ||
156 | - | git_buf_text_stats *stats) | ||
157 | - | { | ||
158 | 1486 | 2 | const char *filename = git_filter_source_path(src); | |
159 | - | |||
160 | 1486 | 3 | if (!ca->safe_crlf) | |
161 | 665 | 4 | return 0; | |
162 | - | |||
163 | 821 | 5,6 | if (output_eol(ca) == GIT_EOL_LF) { | |
164 | - | /* | ||
165 | - | * CRLFs would not be restored by checkout: | ||
166 | - | * check if we'd remove CRLFs | ||
167 | - | */ | ||
168 | 436 | 7 | if (stats->crlf) { | |
169 | 274 | 8 | if (ca->safe_crlf == GIT_SAFE_CRLF_WARN) { | |
170 | - | /* TODO: issue a warning when available */ | ||
171 | - | } else { | ||
172 | 138 | 9,10 | if (filename && *filename) | |
173 | 138 | 11 | git_error_set( | |
174 | - | GIT_ERROR_FILTER, "CRLF would be replaced by LF in '%s'", | ||
175 | - | filename); | ||
176 | - | else | ||
177 | ##### | 12 | git_error_set( | |
178 | - | GIT_ERROR_FILTER, "CRLF would be replaced by LF"); | ||
179 | - | |||
180 | 138 | 13 | return -1; | |
181 | - | } | ||
182 | - | } | ||
183 | 385 | 14,15 | } else if (output_eol(ca) == GIT_EOL_CRLF) { | |
184 | - | /* | ||
185 | - | * CRLFs would be added by checkout: | ||
186 | - | * check if we have "naked" LFs | ||
187 | - | */ | ||
188 | 385 | 16 | if (stats->crlf != stats->lf) { | |
189 | 271 | 17 | if (ca->safe_crlf == GIT_SAFE_CRLF_WARN) { | |
190 | - | /* TODO: issue a warning when available */ | ||
191 | - | } else { | ||
192 | 136 | 18,19 | if (filename && *filename) | |
193 | 134 | 20 | git_error_set( | |
194 | - | GIT_ERROR_FILTER, "LF would be replaced by CRLF in '%s'", | ||
195 | - | filename); | ||
196 | - | else | ||
197 | 2 | 21 | git_error_set( | |
198 | - | GIT_ERROR_FILTER, "LF would be replaced by CRLF"); | ||
199 | - | |||
200 | 136 | 22 | return -1; | |
201 | - | } | ||
202 | - | } | ||
203 | - | } | ||
204 | - | |||
205 | 547 | 23 | return 0; | |
206 | - | } | ||
207 | - | |||
208 | 1815 | 2 | static int crlf_apply_to_odb( | |
209 | - | struct crlf_attrs *ca, | ||
210 | - | git_buf *to, | ||
211 | - | const git_buf *from, | ||
212 | - | const git_filter_source *src) | ||
213 | - | { | ||
214 | - | git_buf_text_stats stats; | ||
215 | - | bool is_binary; | ||
216 | - | int error; | ||
217 | - | |||
218 | - | /* Binary attribute? Empty file? Nothing to do */ | ||
219 | 1815 | 2-4 | if (ca->crlf_action == GIT_CRLF_BINARY || !git_buf_len(from)) | |
220 | 84 | 5 | return GIT_PASSTHROUGH; | |
221 | - | |||
222 | 1731 | 6 | is_binary = git_buf_text_gather_stats(&stats, from, false); | |
223 | - | |||
224 | - | /* Heuristics to see if we can skip the conversion. | ||
225 | - | * Straight from Core Git. | ||
226 | - | */ | ||
227 | 1731 | 7,8 | if (ca->crlf_action == GIT_CRLF_AUTO || | |
228 | 1548 | 8,9 | ca->crlf_action == GIT_CRLF_AUTO_INPUT || | |
229 | 1329 | 9 | ca->crlf_action == GIT_CRLF_AUTO_CRLF) { | |
230 | - | |||
231 | 699 | 10 | if (is_binary) | |
232 | 211 | 11 | return GIT_PASSTHROUGH; | |
233 | - | |||
234 | - | /* | ||
235 | - | * If the file in the index has any CR in it, do not convert. | ||
236 | - | * This is the new safer autocrlf handling. | ||
237 | - | */ | ||
238 | 488 | 12,13 | if (has_cr_in_index(src)) | |
239 | 34 | 14 | return GIT_PASSTHROUGH; | |
240 | - | } | ||
241 | - | |||
242 | 1486 | 15,16 | if ((error = check_safecrlf(ca, src, &stats)) < 0) | |
243 | 274 | 17 | return error; | |
244 | - | |||
245 | - | /* If there are no CR characters to filter out, then just pass */ | ||
246 | 1212 | 18 | if (!stats.crlf) | |
247 | 618 | 19 | return GIT_PASSTHROUGH; | |
248 | - | |||
249 | - | /* Actually drop the carriage returns */ | ||
250 | 594 | 20 | return git_buf_text_crlf_to_lf(to, from); | |
251 | - | } | ||
252 | - | |||
253 | 860 | 2 | static int crlf_apply_to_workdir( | |
254 | - | struct crlf_attrs *ca, | ||
255 | - | git_buf *to, | ||
256 | - | const git_buf *from) | ||
257 | - | { | ||
258 | - | git_buf_text_stats stats; | ||
259 | - | bool is_binary; | ||
260 | - | |||
261 | - | /* Empty file? Nothing to do. */ | ||
262 | 860 | 2-5 | if (git_buf_len(from) == 0 || output_eol(ca) != GIT_EOL_CRLF) | |
263 | 516 | 6 | return GIT_PASSTHROUGH; | |
264 | - | |||
265 | 344 | 7 | is_binary = git_buf_text_gather_stats(&stats, from, false); | |
266 | - | |||
267 | - | /* If there are no LFs, or all LFs are part of a CRLF, nothing to do */ | ||
268 | 344 | 8,9 | if (stats.lf == 0 || stats.lf == stats.crlf) | |
269 | 93 | 10 | return GIT_PASSTHROUGH; | |
270 | - | |||
271 | 251 | 11,12 | if (ca->crlf_action == GIT_CRLF_AUTO || | |
272 | 225 | 12,13 | ca->crlf_action == GIT_CRLF_AUTO_INPUT || | |
273 | 225 | 13 | ca->crlf_action == GIT_CRLF_AUTO_CRLF) { | |
274 | - | |||
275 | - | /* If we have any existing CR or CRLF line endings, do nothing */ | ||
276 | 150 | 14 | if (stats.cr > 0) | |
277 | 90 | 15 | return GIT_PASSTHROUGH; | |
278 | - | |||
279 | - | /* Don't filter binary files */ | ||
280 | 60 | 16 | if (is_binary) | |
281 | 11 | 17 | return GIT_PASSTHROUGH; | |
282 | - | } | ||
283 | - | |||
284 | 150 | 18 | return git_buf_text_lf_to_crlf(to, from); | |
285 | - | } | ||
286 | - | |||
287 | 11937 | 2 | static int convert_attrs( | |
288 | - | struct crlf_attrs *ca, | ||
289 | - | const char **attr_values, | ||
290 | - | const git_filter_source *src) | ||
291 | - | { | ||
292 | - | int error; | ||
293 | - | |||
294 | 11937 | 2 | memset(ca, 0, sizeof(struct crlf_attrs)); | |
295 | - | |||
296 | 11937 | 2-4 | if ((error = git_repository__configmap_lookup(&ca->auto_crlf, | |
297 | 11937 | 6,7 | git_filter_source_repo(src), GIT_CONFIGMAP_AUTO_CRLF)) < 0 || | |
298 | 11937 | 5 | (error = git_repository__configmap_lookup(&ca->safe_crlf, | |
299 | 11937 | 9,10 | git_filter_source_repo(src), GIT_CONFIGMAP_SAFE_CRLF)) < 0 || | |
300 | 11937 | 8 | (error = git_repository__configmap_lookup(&ca->core_eol, | |
301 | - | git_filter_source_repo(src), GIT_CONFIGMAP_EOL)) < 0) | ||
302 | ##### | 11 | return error; | |
303 | - | |||
304 | - | /* downgrade FAIL to WARN if ALLOW_UNSAFE option is used */ | ||
305 | 11937 | 12-14 | if ((git_filter_source_flags(src) & GIT_FILTER_ALLOW_UNSAFE) && | |
306 | 5058 | 14 | ca->safe_crlf == GIT_SAFE_CRLF_FAIL) | |
307 | 1 | 15 | ca->safe_crlf = GIT_SAFE_CRLF_WARN; | |
308 | - | |||
309 | 11937 | 16 | if (attr_values) { | |
310 | - | /* load the text attribute */ | ||
311 | 11929 | 17 | ca->crlf_action = check_crlf(attr_values[2]); /* text */ | |
312 | - | |||
313 | 11929 | 18 | if (ca->crlf_action == GIT_CRLF_UNDEFINED) | |
314 | 9770 | 19,20 | ca->crlf_action = check_crlf(attr_values[0]); /* crlf */ | |
315 | - | |||
316 | 11928 | 21 | if (ca->crlf_action != GIT_CRLF_BINARY) { | |
317 | - | /* load the eol attribute */ | ||
318 | 11414 | 22 | int eol_attr = check_eol(attr_values[1]); | |
319 | - | |||
320 | 11414 | 23,24 | if (ca->crlf_action == GIT_CRLF_AUTO && eol_attr == GIT_EOL_LF) | |
321 | 266 | 25 | ca->crlf_action = GIT_CRLF_AUTO_INPUT; | |
322 | 11148 | 26,27 | else if (ca->crlf_action == GIT_CRLF_AUTO && eol_attr == GIT_EOL_CRLF) | |
323 | 228 | 28 | ca->crlf_action = GIT_CRLF_AUTO_CRLF; | |
324 | 10920 | 29 | else if (eol_attr == GIT_EOL_LF) | |
325 | 798 | 30 | ca->crlf_action = GIT_CRLF_TEXT_INPUT; | |
326 | 10122 | 31 | else if (eol_attr == GIT_EOL_CRLF) | |
327 | 469 | 32 | ca->crlf_action = GIT_CRLF_TEXT_CRLF; | |
328 | - | } | ||
329 | - | |||
330 | 11928 | 33 | ca->attr_action = ca->crlf_action; | |
331 | - | } else { | ||
332 | 8 | 34 | ca->crlf_action = GIT_CRLF_UNDEFINED; | |
333 | - | } | ||
334 | - | |||
335 | 11936 | 35 | if (ca->crlf_action == GIT_CRLF_TEXT) | |
336 | 255 | 36-40 | ca->crlf_action = text_eol_is_crlf(ca) ? GIT_CRLF_TEXT_CRLF : GIT_CRLF_TEXT_INPUT; | |
337 | 11936 | 41,42 | if (ca->crlf_action == GIT_CRLF_UNDEFINED && ca->auto_crlf == GIT_AUTO_CRLF_FALSE) | |
338 | 8756 | 43 | ca->crlf_action = GIT_CRLF_BINARY; | |
339 | 11936 | 44,45 | if (ca->crlf_action == GIT_CRLF_UNDEFINED && ca->auto_crlf == GIT_AUTO_CRLF_TRUE) | |
340 | 258 | 46 | ca->crlf_action = GIT_CRLF_AUTO_CRLF; | |
341 | 11936 | 47,48 | if (ca->crlf_action == GIT_CRLF_UNDEFINED && ca->auto_crlf == GIT_AUTO_CRLF_INPUT) | |
342 | 79 | 49 | ca->crlf_action = GIT_CRLF_AUTO_INPUT; | |
343 | - | |||
344 | 11936 | 50 | return 0; | |
345 | - | } | ||
346 | - | |||
347 | 11937 | 2 | static int crlf_check( | |
348 | - | git_filter *self, | ||
349 | - | void **payload, /* points to NULL ptr on entry, may be set */ | ||
350 | - | const git_filter_source *src, | ||
351 | - | const char **attr_values) | ||
352 | - | { | ||
353 | - | struct crlf_attrs ca; | ||
354 | - | |||
355 | - | GIT_UNUSED(self); | ||
356 | - | |||
357 | 11937 | 2 | convert_attrs(&ca, attr_values, src); | |
358 | - | |||
359 | 11936 | 3 | if (ca.crlf_action == GIT_CRLF_BINARY) | |
360 | 9270 | 4 | return GIT_PASSTHROUGH; | |
361 | - | |||
362 | 2666 | 5 | *payload = git__malloc(sizeof(ca)); | |
363 | 2666 | 6,7 | GIT_ERROR_CHECK_ALLOC(*payload); | |
364 | 2666 | 8 | memcpy(*payload, &ca, sizeof(ca)); | |
365 | - | |||
366 | 2666 | 8 | return 0; | |
367 | - | } | ||
368 | - | |||
369 | 2675 | 2 | static int crlf_apply( | |
370 | - | git_filter *self, | ||
371 | - | void **payload, /* may be read and/or set */ | ||
372 | - | git_buf *to, | ||
373 | - | const git_buf *from, | ||
374 | - | const git_filter_source *src) | ||
375 | - | { | ||
376 | - | /* initialize payload in case `check` was bypassed */ | ||
377 | 2675 | 2 | if (!*payload) { | |
378 | 8 | 3 | int error = crlf_check(self, payload, src, NULL); | |
379 | - | |||
380 | 8 | 4 | if (error < 0) | |
381 | ##### | 5 | return error; | |
382 | - | } | ||
383 | - | |||
384 | 2675 | 6,7 | if (git_filter_source_mode(src) == GIT_FILTER_SMUDGE) | |
385 | 860 | 8 | return crlf_apply_to_workdir(*payload, to, from); | |
386 | - | else | ||
387 | 1815 | 9 | return crlf_apply_to_odb(*payload, to, from, src); | |
388 | - | } | ||
389 | - | |||
390 | 2666 | 2 | static void crlf_cleanup( | |
391 | - | git_filter *self, | ||
392 | - | void *payload) | ||
393 | - | { | ||
394 | - | GIT_UNUSED(self); | ||
395 | 2666 | 2 | git__free(payload); | |
396 | 2666 | 3 | } | |
397 | - | |||
398 | 9 | 2 | git_filter *git_crlf_filter_new(void) | |
399 | - | { | ||
400 | 9 | 2 | struct crlf_filter *f = git__calloc(1, sizeof(struct crlf_filter)); | |
401 | 9 | 3 | if (f == NULL) | |
402 | ##### | 4 | return NULL; | |
403 | - | |||
404 | 9 | 5 | f->f.version = GIT_FILTER_VERSION; | |
405 | 9 | 5 | f->f.attributes = "crlf eol text"; | |
406 | 9 | 5 | f->f.initialize = NULL; | |
407 | 9 | 5 | f->f.shutdown = git_filter_free; | |
408 | 9 | 5 | f->f.check = crlf_check; | |
409 | 9 | 5 | f->f.apply = crlf_apply; | |
410 | 9 | 5 | f->f.cleanup = crlf_cleanup; | |
411 | - | |||
412 | 9 | 5 | return (git_filter *)f; | |
413 | - | } |