Skip to content

Commit 72d0fb2

Browse files
committed
Add trim/0 that trims leading and trailing whitespace
1 parent 913b264 commit 72d0fb2

File tree

7 files changed

+114
-1
lines changed

7 files changed

+114
-1
lines changed

docs/content/manual/manual.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1772,6 +1772,21 @@ sections:
17721772
input: '["fo", "foo", "barfoo", "foobar", "foob"]'
17731773
output: ['["fo","","bar","foobar","foob"]']
17741774

1775+
- title: "`trim`"
1776+
body: |
1777+
1778+
Trim leading and trailing whitespace in input.
1779+
1780+
Whitespace characters are the usual `" "`, `"\n"` `"\t"`, `"\r"`
1781+
and also all characters in the Unicode character database with the
1782+
whitespace property. Note that what considers whitespace might
1783+
change in the future.
1784+
1785+
examples:
1786+
- program: 'trim'
1787+
input: '" abc "'
1788+
output: ['"abc"']
1789+
17751790
- title: "`explode`"
17761791
body: |
17771792

jq.1.prebuilt

Lines changed: 19 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/builtin.c

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1184,6 +1184,47 @@ static jv f_string_indexes(jq_state *jq, jv a, jv b) {
11841184
return jv_string_indexes(a, b);
11851185
}
11861186

1187+
static jv f_string_trim(jq_state *jq, jv a) {
1188+
if (jv_get_kind(a) != JV_KIND_STRING) {
1189+
return ret_error(a, jv_string("trim input must be a string"));
1190+
}
1191+
1192+
int len = jv_string_length_bytes(jv_copy(a));
1193+
const char* start = jv_string_value(a);
1194+
const char* trim_start = start;
1195+
const char* end = trim_start + len;
1196+
const char *trim_end = end;
1197+
int c;
1198+
1199+
for (;;) {
1200+
const char* ns = jvp_utf8_next(trim_start, end, &c);
1201+
if (!ns || !jvp_codepoint_is_whitespace(c))
1202+
break;
1203+
trim_start = ns;
1204+
}
1205+
1206+
// empty string or start trim has trimmed everything
1207+
if (trim_end > trim_start) {
1208+
for (;;) {
1209+
const char *ns = jvp_utf8_backtrack(trim_end-1, trim_start, NULL);
1210+
if (ns == trim_start)
1211+
break;
1212+
jvp_utf8_next(ns, trim_end, &c);
1213+
if (!jvp_codepoint_is_whitespace(c))
1214+
break;
1215+
trim_end = ns;
1216+
}
1217+
}
1218+
1219+
// no new string needed if there is nothing to trim
1220+
if (trim_start == start && trim_end == end)
1221+
return a;
1222+
1223+
jv ts = jv_string_sized(trim_start, trim_end - trim_start);
1224+
jv_free(a);
1225+
return ts;
1226+
}
1227+
11871228
static jv f_string_implode(jq_state *jq, jv a) {
11881229
if (jv_get_kind(a) != JV_KIND_ARRAY) {
11891230
return ret_error(a, jv_string("implode input must be an array"));
@@ -1702,6 +1743,7 @@ BINOPS
17021743
{f_string_explode, "explode", 1},
17031744
{f_string_implode, "implode", 1},
17041745
{f_string_indexes, "_strindices", 2},
1746+
{f_string_trim, "trim", 1},
17051747
{f_setpath, "setpath", 3}, // FIXME typechecking
17061748
{f_getpath, "getpath", 2},
17071749
{f_delpaths, "delpaths", 2},

src/jv_unicode.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,3 +118,21 @@ int jvp_utf8_encode(int codepoint, char* out) {
118118
assert(out - start == jvp_utf8_encode_length(codepoint));
119119
return out - start;
120120
}
121+
122+
// characters with White_Space property in:
123+
// https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
124+
int jvp_codepoint_is_whitespace(int c) {
125+
return
126+
(c >= 0x0009 && c <= 0x000D) || // <control-0009>..<control-000D>
127+
c == 0x0020 || // SPACE
128+
c == 0x0085 || // <control-0085>
129+
c == 0x00A0 || // NO-BREAK SPACE
130+
c == 0x1680 || // OGHAM SPACE MARK
131+
(c >= 0x2000 && c <= 0x200A) || // EN QUAD..HAIR SPACE
132+
c == 0x2028 || // LINE SEPARATOR
133+
c == 0x2029 || // PARAGRAPH SEPARATOR
134+
c == 0x202F || // NARROW NO-BREAK SPACE
135+
c == 0x205F || // MEDIUM MATHEMATICAL SPACE
136+
c == 0x3000 // IDEOGRAPHIC SPACE
137+
;
138+
}

src/jv_unicode.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,6 @@ int jvp_utf8_decode_length(char startchar);
99

1010
int jvp_utf8_encode_length(int codepoint);
1111
int jvp_utf8_encode(int codepoint, char* out);
12+
13+
int jvp_codepoint_is_whitespace(int c);
1214
#endif

tests/jq.test

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1329,6 +1329,20 @@ split("")
13291329
"xababababax"
13301330
[1,7,[1,3,5,7]]
13311331

1332+
# trim
1333+
# \u000b is vertical tab (\v not supported by json)
1334+
map(trim)
1335+
[" \n\t\r\f\u000b", ""," ", "a", " a ", "abc", " abc ", " abc", "abc "]
1336+
["", "","", "a", "a", "abc", "abc", "abc", "abc"]
1337+
1338+
trim
1339+
"\u0009\u000A\u000B\u000C\u000D\u0020\u0085\u00A0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u2028\u2029\u202F\u205F\u3000abc\u0009\u000A\u000B\u000C\u000D\u0020\u0085\u00A0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u2028\u2029\u202F\u205F\u3000"
1340+
"abc"
1341+
1342+
try trim catch .
1343+
123
1344+
"trim input must be a string"
1345+
13321346
indices(1)
13331347
[0,1,1,2,3,4,1,5]
13341348
[1,2,6]

tests/man.test

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)