source src/buf_text.c
Line | Flow | Count | Block(s) | Source |
---|---|---|---|---|
1 | - | /* | ||
2 | - | * Copyright (C) the libgit2 contributors. All rights reserved. | ||
3 | - | * | ||
4 | - | * This file is part of libgit2, distributed under the GNU GPL v2 with | ||
5 | - | * a Linking Exception. For full terms see the included COPYING file. | ||
6 | - | */ | ||
7 | - | #include "buf_text.h" | ||
8 | - | |||
9 | 57 | 2 | int git_buf_text_puts_escaped( | |
10 | - | git_buf *buf, | ||
11 | - | const char *string, | ||
12 | - | const char *esc_chars, | ||
13 | - | const char *esc_with) | ||
14 | - | { | ||
15 | - | const char *scan; | ||
16 | 57 | 2 | size_t total = 0, esc_len = strlen(esc_with), count, alloclen; | |
17 | - | |||
18 | 57 | 2 | if (!string) | |
19 | ##### | 3 | return 0; | |
20 | - | |||
21 | 176 | 4,6 | for (scan = string; *scan; ) { | |
22 | - | /* count run of non-escaped characters */ | ||
23 | 119 | 5 | count = strcspn(scan, esc_chars); | |
24 | 119 | 5 | total += count; | |
25 | 119 | 5 | scan += count; | |
26 | - | /* count run of escaped characters */ | ||
27 | 119 | 5 | count = strspn(scan, esc_chars); | |
28 | 119 | 5 | total += count * (esc_len + 1); | |
29 | 119 | 5 | scan += count; | |
30 | - | } | ||
31 | - | |||
32 | 57 | 7-13 | GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, total, 1); | |
33 | 57 | 14,15 | if (git_buf_grow_by(buf, alloclen) < 0) | |
34 | ##### | 16 | return -1; | |
35 | - | |||
36 | 176 | 17,21 | for (scan = string; *scan; ) { | |
37 | 119 | 18 | count = strcspn(scan, esc_chars); | |
38 | - | |||
39 | 119 | 18 | memmove(buf->ptr + buf->size, scan, count); | |
40 | 119 | 18 | scan += count; | |
41 | 119 | 18 | buf->size += count; | |
42 | - | |||
43 | 188 | 18-20 | for (count = strspn(scan, esc_chars); count > 0; --count) { | |
44 | - | /* copy escape sequence */ | ||
45 | 69 | 19 | memmove(buf->ptr + buf->size, esc_with, esc_len); | |
46 | 69 | 19 | buf->size += esc_len; | |
47 | - | /* copy character to be escaped */ | ||
48 | 69 | 19 | buf->ptr[buf->size] = *scan; | |
49 | 69 | 19 | buf->size++; | |
50 | 69 | 19 | scan++; | |
51 | - | } | ||
52 | - | } | ||
53 | - | |||
54 | 57 | 22 | buf->ptr[buf->size] = '\0'; | |
55 | - | |||
56 | 57 | 22 | return 0; | |
57 | - | } | ||
58 | - | |||
59 | 3082 | 2 | void git_buf_text_unescape(git_buf *buf) | |
60 | - | { | ||
61 | 3082 | 2 | buf->size = git__unescape(buf->ptr); | |
62 | 3082 | 3 | } | |
63 | - | |||
64 | 606 | 2 | int git_buf_text_crlf_to_lf(git_buf *tgt, const git_buf *src) | |
65 | - | { | ||
66 | 606 | 2 | const char *scan = src->ptr; | |
67 | 606 | 2 | const char *scan_end = src->ptr + src->size; | |
68 | 606 | 2 | const char *next = memchr(scan, '\r', src->size); | |
69 | - | size_t new_size; | ||
70 | - | char *out; | ||
71 | - | |||
72 | 606 | 2,3 | assert(tgt != src); | |
73 | - | |||
74 | 606 | 4 | if (!next) | |
75 | 3 | 5 | return git_buf_set(tgt, src->ptr, src->size); | |
76 | - | |||
77 | - | /* reduce reallocs while in the loop */ | ||
78 | 603 | 6-12 | GIT_ERROR_CHECK_ALLOC_ADD(&new_size, src->size, 1); | |
79 | 603 | 13,14 | if (git_buf_grow(tgt, new_size) < 0) | |
80 | ##### | 15 | return -1; | |
81 | - | |||
82 | 603 | 16 | out = tgt->ptr; | |
83 | 603 | 16 | tgt->size = 0; | |
84 | - | |||
85 | - | /* Find the next \r and copy whole chunk up to there to tgt */ | ||
86 | 3750 | 16,22,23 | for (; next; scan = next + 1, next = memchr(scan, '\r', scan_end - scan)) { | |
87 | 3147 | 17 | if (next > scan) { | |
88 | 3143 | 18 | size_t copylen = (size_t)(next - scan); | |
89 | 3143 | 18 | memcpy(out, scan, copylen); | |
90 | 3143 | 18 | out += copylen; | |
91 | - | } | ||
92 | - | |||
93 | - | /* Do not drop \r unless it is followed by \n */ | ||
94 | 3147 | 19,20 | if (next + 1 == scan_end || next[1] != '\n') | |
95 | 131 | 21 | *out++ = '\r'; | |
96 | - | } | ||
97 | - | |||
98 | - | /* Copy remaining input into dest */ | ||
99 | 603 | 24 | if (scan < scan_end) { | |
100 | 539 | 25 | size_t remaining = (size_t)(scan_end - scan); | |
101 | 539 | 25 | memcpy(out, scan, remaining); | |
102 | 539 | 25 | out += remaining; | |
103 | - | } | ||
104 | - | |||
105 | 603 | 26 | tgt->size = (size_t)(out - tgt->ptr); | |
106 | 603 | 26 | tgt->ptr[tgt->size] = '\0'; | |
107 | - | |||
108 | 603 | 26 | return 0; | |
109 | - | } | ||
110 | - | |||
111 | 164 | 2 | int git_buf_text_lf_to_crlf(git_buf *tgt, const git_buf *src) | |
112 | - | { | ||
113 | 164 | 2 | const char *start = src->ptr; | |
114 | 164 | 2 | const char *end = start + src->size; | |
115 | 164 | 2 | const char *scan = start; | |
116 | 164 | 2 | const char *next = memchr(scan, '\n', src->size); | |
117 | - | size_t alloclen; | ||
118 | - | |||
119 | 164 | 2,3 | assert(tgt != src); | |
120 | - | |||
121 | 164 | 4 | if (!next) | |
122 | 1 | 5 | return git_buf_set(tgt, src->ptr, src->size); | |
123 | - | |||
124 | - | /* attempt to reduce reallocs while in the loop */ | ||
125 | 163 | 6-12 | GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, src->size, src->size >> 4); | |
126 | 163 | 13-19 | GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, alloclen, 1); | |
127 | 163 | 20,21 | if (git_buf_grow(tgt, alloclen) < 0) | |
128 | ##### | 22 | return -1; | |
129 | 163 | 23 | tgt->size = 0; | |
130 | - | |||
131 | 1199 | 23,39,40 | for (; next; scan = next + 1, next = memchr(scan, '\n', end - scan)) { | |
132 | 1036 | 24 | size_t copylen = next - scan; | |
133 | - | |||
134 | - | /* if we find mixed line endings, carry on */ | ||
135 | 1036 | 24,25 | if (copylen && next[-1] == '\r') | |
136 | 109 | 26 | copylen--; | |
137 | - | |||
138 | 1036 | 27-33 | GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, copylen, 3); | |
139 | 1036 | 34,35 | if (git_buf_grow_by(tgt, alloclen) < 0) | |
140 | ##### | 36 | return -1; | |
141 | - | |||
142 | 1036 | 37 | if (copylen) { | |
143 | 1017 | 38 | memcpy(tgt->ptr + tgt->size, scan, copylen); | |
144 | 1017 | 38 | tgt->size += copylen; | |
145 | - | } | ||
146 | - | |||
147 | 1036 | 39 | tgt->ptr[tgt->size++] = '\r'; | |
148 | 1036 | 39 | tgt->ptr[tgt->size++] = '\n'; | |
149 | - | } | ||
150 | - | |||
151 | 163 | 41 | tgt->ptr[tgt->size] = '\0'; | |
152 | 163 | 41 | return git_buf_put(tgt, scan, end - scan); | |
153 | - | } | ||
154 | - | |||
155 | 3505 | 2 | int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strings) | |
156 | - | { | ||
157 | - | size_t i; | ||
158 | - | const char *str, *pfx; | ||
159 | - | |||
160 | 3505 | 2 | git_buf_clear(buf); | |
161 | - | |||
162 | 3505 | 3,4 | if (!strings || !strings->count) | |
163 | ##### | 5 | return 0; | |
164 | - | |||
165 | - | /* initialize common prefix to first string */ | ||
166 | 3505 | 6,7 | if (git_buf_sets(buf, strings->strings[0]) < 0) | |
167 | ##### | 8 | return -1; | |
168 | - | |||
169 | - | /* go through the rest of the strings, truncating to shared prefix */ | ||
170 | 3529 | 9,17,18 | for (i = 1; i < strings->count; ++i) { | |
171 | - | |||
172 | 588 | 10,12 | for (str = strings->strings[i], pfx = buf->ptr; | |
173 | 583 | 11,13 | *str && *str == *pfx; str++, pfx++) | |
174 | - | /* scanning */; | ||
175 | - | |||
176 | 436 | 14 | git_buf_truncate(buf, pfx - buf->ptr); | |
177 | - | |||
178 | 436 | 15 | if (!buf->size) | |
179 | 412 | 16 | break; | |
180 | - | } | ||
181 | - | |||
182 | 3505 | 19 | return 0; | |
183 | - | } | ||
184 | - | |||
185 | 618 | 2 | bool git_buf_text_is_binary(const git_buf *buf) | |
186 | - | { | ||
187 | 618 | 2 | const char *scan = buf->ptr, *end = buf->ptr + buf->size; | |
188 | - | git_bom_t bom; | ||
189 | 618 | 2 | int printable = 0, nonprintable = 0; | |
190 | - | |||
191 | 618 | 2 | scan += git_buf_text_detect_bom(&bom, buf); | |
192 | - | |||
193 | 618 | 3 | if (bom > GIT_BOM_UTF8) | |
194 | ##### | 4 | return 1; | |
195 | - | |||
196 | 116228 | 5,17 | while (scan < end) { | |
197 | 115613 | 6 | unsigned char c = *scan++; | |
198 | - | |||
199 | - | /* Printable characters are those above SPACE (0x1F) excluding DEL, | ||
200 | - | * and including BS, ESC and FF. | ||
201 | - | */ | ||
202 | 115613 | 6-10 | if ((c > 0x1F && c != 127) || c == '\b' || c == '\033' || c == '\014') | |
203 | 110275 | 11 | printable++; | |
204 | 5338 | 12 | else if (c == '\0') | |
205 | 3 | 13 | return true; | |
206 | 5335 | 14,15 | else if (!git__isspace(c)) | |
207 | 1 | 16 | nonprintable++; | |
208 | - | } | ||
209 | - | |||
210 | 615 | 18 | return ((printable >> 7) < nonprintable); | |
211 | - | } | ||
212 | - | |||
213 | 2094 | 2 | bool git_buf_text_contains_nul(const git_buf *buf) | |
214 | - | { | ||
215 | 2094 | 2 | return (memchr(buf->ptr, '\0', buf->size) != NULL); | |
216 | - | } | ||
217 | - | |||
218 | 201220 | 2 | int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf) | |
219 | - | { | ||
220 | - | const char *ptr; | ||
221 | - | size_t len; | ||
222 | - | |||
223 | 201220 | 2 | *bom = GIT_BOM_NONE; | |
224 | - | /* need at least 2 bytes to look for any BOM */ | ||
225 | 201220 | 2 | if (buf->size < 2) | |
226 | 174833 | 3 | return 0; | |
227 | - | |||
228 | 26387 | 4 | ptr = buf->ptr; | |
229 | 26387 | 4 | len = buf->size; | |
230 | - | |||
231 | 26387 | 4 | switch (*ptr++) { | |
232 | - | case 0: | ||
233 | 1 | 5-8 | if (len >= 4 && ptr[0] == 0 && ptr[1] == '\xFE' && ptr[2] == '\xFF') { | |
234 | ##### | 9 | *bom = GIT_BOM_UTF32_BE; | |
235 | ##### | 9 | return 4; | |
236 | - | } | ||
237 | 1 | 10 | break; | |
238 | - | case '\xEF': | ||
239 | 407 | 11-13 | if (len >= 3 && ptr[0] == '\xBB' && ptr[1] == '\xBF') { | |
240 | 407 | 14 | *bom = GIT_BOM_UTF8; | |
241 | 407 | 14 | return 3; | |
242 | - | } | ||
243 | ##### | 15 | break; | |
244 | - | case '\xFE': | ||
245 | 3 | 16 | if (*ptr == '\xFF') { | |
246 | 3 | 17 | *bom = GIT_BOM_UTF16_BE; | |
247 | 3 | 17 | return 2; | |
248 | - | } | ||
249 | ##### | 18 | break; | |
250 | - | case '\xFF': | ||
251 | 2 | 19 | if (*ptr != '\xFE') | |
252 | 1 | 20 | break; | |
253 | 1 | 21-23 | if (len >= 4 && ptr[1] == 0 && ptr[2] == 0) { | |
254 | ##### | 24 | *bom = GIT_BOM_UTF32_LE; | |
255 | ##### | 24 | return 4; | |
256 | - | } else { | ||
257 | 1 | 25 | *bom = GIT_BOM_UTF16_LE; | |
258 | 1 | 25 | return 2; | |
259 | - | } | ||
260 | - | break; | ||
261 | - | default: | ||
262 | 25974 | 26 | break; | |
263 | - | } | ||
264 | - | |||
265 | 25976 | 27 | return 0; | |
266 | - | } | ||
267 | - | |||
268 | 2084 | 2 | bool git_buf_text_gather_stats( | |
269 | - | git_buf_text_stats *stats, const git_buf *buf, bool skip_bom) | ||
270 | - | { | ||
271 | 2084 | 2 | const char *scan = buf->ptr, *end = buf->ptr + buf->size; | |
272 | - | int skip; | ||
273 | - | |||
274 | 2084 | 2 | memset(stats, 0, sizeof(*stats)); | |
275 | - | |||
276 | - | /* BOM detection */ | ||
277 | 2084 | 2 | skip = git_buf_text_detect_bom(&stats->bom, buf); | |
278 | 2084 | 3 | if (skip_bom) | |
279 | ##### | 4 | scan += skip; | |
280 | - | |||
281 | - | /* Ignore EOF character */ | ||
282 | 2084 | 5,6 | if (buf->size > 0 && end[-1] == '\032') | |
283 | ##### | 7 | end--; | |
284 | - | |||
285 | - | /* Counting loop */ | ||
286 | 199241 | 8,21 | while (scan < end) { | |
287 | 197157 | 9 | unsigned char c = *scan++; | |
288 | - | |||
289 | 197157 | 9,10 | if (c > 0x1F && c != 0x7F) | |
290 | 175434 | 11 | stats->printable++; | |
291 | 21723 | 12 | else switch (c) { | |
292 | - | case '\0': | ||
293 | 425 | 13 | stats->nul++; | |
294 | 425 | 13 | stats->nonprintable++; | |
295 | 425 | 13 | break; | |
296 | - | case '\n': | ||
297 | 12362 | 14 | stats->lf++; | |
298 | 12362 | 14 | break; | |
299 | - | case '\r': | ||
300 | 5956 | 15 | stats->cr++; | |
301 | 5956 | 15,16 | if (scan < end && *scan == '\n') | |
302 | 5052 | 17 | stats->crlf++; | |
303 | 5956 | 18 | break; | |
304 | - | case '\t': case '\f': case '\v': case '\b': case 0x1b: /*ESC*/ | ||
305 | 431 | 19 | stats->printable++; | |
306 | 431 | 19 | break; | |
307 | - | default: | ||
308 | 2549 | 20 | stats->nonprintable++; | |
309 | 2549 | 20 | break; | |
310 | - | } | ||
311 | - | } | ||
312 | - | |||
313 | - | /* Treat files with a bare CR as binary */ | ||
314 | 2084 | 22-24 | return (stats->cr != stats->crlf || stats->nul > 0 || | |
315 | 1677 | 24 | ((stats->printable >> 7) < stats->nonprintable)); | |
316 | - | } |