Skip to content

Commit 9f2ea9a

Browse files
committed
Add trim/0, ltrim/0 and rtrim/0 that trims leading and trailing whitespace
1 parent bc96146 commit 9f2ea9a

File tree

7 files changed

+145
-1
lines changed

7 files changed

+145
-1
lines changed

docs/content/manual/manual.yml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1772,6 +1772,25 @@ sections:
17721772
input: '["fo", "foo", "barfoo", "foobar", "foob"]'
17731773
output: ['["fo","","bar","foobar","foob"]']
17741774

1775+
- title: "`trim`, `ltrim`, `rtrim`"
1776+
body: |
1777+
1778+
`trim` trims both leading and trailing whitespace.
1779+
1780+
`ltrim` trims only leading (left side) whitespace.
1781+
1782+
`rtrim` trims only trailing (right side) whitespace.
1783+
1784+
Whitespace characters are the usual `" "`, `"\n"` `"\t"`, `"\r"`
1785+
and also all characters in the Unicode character database with the
1786+
whitespace property. Note that what considers whitespace might
1787+
change in the future.
1788+
1789+
examples:
1790+
- program: 'trim, ltrim, rtrim'
1791+
input: '" abc "'
1792+
output: ['"abc"', '"abc "', '" abc"']
1793+
17751794
- title: "`explode`"
17761795
body: |
17771796

jq.1.prebuilt

Lines changed: 25 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/builtin.c

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1197,6 +1197,58 @@ static jv f_string_indexes(jq_state *jq, jv a, jv b) {
11971197
return jv_string_indexes(a, b);
11981198
}
11991199

1200+
enum trim_op {
1201+
TRIM_LEFT = 1 << 0,
1202+
TRIM_RIGHT = 1 << 1
1203+
};
1204+
1205+
static jv string_trim(jv a, int op) {
1206+
if (jv_get_kind(a) != JV_KIND_STRING) {
1207+
return ret_error(a, jv_string("trim input must be a string"));
1208+
}
1209+
1210+
int len = jv_string_length_bytes(jv_copy(a));
1211+
const char *start = jv_string_value(a);
1212+
const char *trim_start = start;
1213+
const char *end = trim_start + len;
1214+
const char *trim_end = end;
1215+
int c;
1216+
1217+
if (op & TRIM_LEFT) {
1218+
for (;;) {
1219+
const char *ns = jvp_utf8_next(trim_start, end, &c);
1220+
if (!ns || !jvp_codepoint_is_whitespace(c))
1221+
break;
1222+
trim_start = ns;
1223+
}
1224+
}
1225+
1226+
// make sure not empty string or start trim has trimmed everything
1227+
if ((op & TRIM_RIGHT) && trim_end > trim_start) {
1228+
for (;;) {
1229+
const char *ns = jvp_utf8_backtrack(trim_end-1, trim_start, NULL);
1230+
jvp_utf8_next(ns, trim_end, &c);
1231+
if (!jvp_codepoint_is_whitespace(c))
1232+
break;
1233+
trim_end = ns;
1234+
if (ns == trim_start)
1235+
break;
1236+
}
1237+
}
1238+
1239+
// no new string needed if there is nothing to trim
1240+
if (trim_start == start && trim_end == end)
1241+
return a;
1242+
1243+
jv ts = jv_string_sized(trim_start, trim_end - trim_start);
1244+
jv_free(a);
1245+
return ts;
1246+
}
1247+
1248+
static jv f_string_trim(jq_state *jq, jv a) { return string_trim(a, TRIM_LEFT | TRIM_RIGHT); }
1249+
static jv f_string_ltrim(jq_state *jq, jv a) { return string_trim(a, TRIM_LEFT); }
1250+
static jv f_string_rtrim(jq_state *jq, jv a) { return string_trim(a, TRIM_RIGHT); }
1251+
12001252
static jv f_string_implode(jq_state *jq, jv a) {
12011253
if (jv_get_kind(a) != JV_KIND_ARRAY) {
12021254
return ret_error(a, jv_string("implode input must be an array"));
@@ -1721,6 +1773,9 @@ BINOPS
17211773
{f_string_explode, "explode", 1},
17221774
{f_string_implode, "implode", 1},
17231775
{f_string_indexes, "_strindices", 2},
1776+
{f_string_trim, "trim", 1},
1777+
{f_string_ltrim, "ltrim", 1},
1778+
{f_string_rtrim, "rtrim", 1},
17241779
{f_setpath, "setpath", 3}, // FIXME typechecking
17251780
{f_getpath, "getpath", 2},
17261781
{f_delpaths, "delpaths", 2},

src/jv_unicode.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,3 +118,21 @@ int jvp_utf8_encode(int codepoint, char* out) {
118118
assert(out - start == jvp_utf8_encode_length(codepoint));
119119
return out - start;
120120
}
121+
122+
// characters with White_Space property in:
123+
// https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
124+
int jvp_codepoint_is_whitespace(int c) {
125+
return
126+
(c >= 0x0009 && c <= 0x000D) || // <control-0009>..<control-000D>
127+
c == 0x0020 || // SPACE
128+
c == 0x0085 || // <control-0085>
129+
c == 0x00A0 || // NO-BREAK SPACE
130+
c == 0x1680 || // OGHAM SPACE MARK
131+
(c >= 0x2000 && c <= 0x200A) || // EN QUAD..HAIR SPACE
132+
c == 0x2028 || // LINE SEPARATOR
133+
c == 0x2029 || // PARAGRAPH SEPARATOR
134+
c == 0x202F || // NARROW NO-BREAK SPACE
135+
c == 0x205F || // MEDIUM MATHEMATICAL SPACE
136+
c == 0x3000 // IDEOGRAPHIC SPACE
137+
;
138+
}

src/jv_unicode.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,6 @@ int jvp_utf8_decode_length(char startchar);
99

1010
int jvp_utf8_encode_length(int codepoint);
1111
int jvp_utf8_encode(int codepoint, char* out);
12+
13+
int jvp_codepoint_is_whitespace(int c);
1214
#endif

tests/jq.test

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1334,6 +1334,26 @@ split("")
13341334
"xababababax"
13351335
[1,7,[1,3,5,7]]
13361336

1337+
# trim
1338+
# \u000b is vertical tab (\v not supported by json)
1339+
map(trim), map(ltrim), map(rtrim)
1340+
[" \n\t\r\f\u000b", ""," ", "a", " a ", "abc", " abc ", " abc", "abc "]
1341+
["", "", "", "a", "a", "abc", "abc", "abc", "abc"]
1342+
["", "", "", "a", "a ", "abc", "abc ", "abc", "abc "]
1343+
["", "", "", "a", " a", "abc", " abc", " abc", "abc"]
1344+
1345+
trim, ltrim, rtrim
1346+
"\u0009\u000A\u000B\u000C\u000D\u0020\u0085\u00A0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u2028\u2029\u202F\u205F\u3000abc\u0009\u000A\u000B\u000C\u000D\u0020\u0085\u00A0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u2028\u2029\u202F\u205F\u3000"
1347+
"abc"
1348+
"abc\u0009\u000A\u000B\u000C\u000D\u0020\u0085\u00A0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u2028\u2029\u202F\u205F\u3000"
1349+
"\u0009\u000A\u000B\u000C\u000D\u0020\u0085\u00A0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u2028\u2029\u202F\u205F\u3000abc"
1350+
1351+
try trim catch ., try ltrim catch ., try rtrim catch .
1352+
123
1353+
"trim input must be a string"
1354+
"trim input must be a string"
1355+
"trim input must be a string"
1356+
13371357
indices(1)
13381358
[0,1,1,2,3,4,1,5]
13391359
[1,2,6]

tests/man.test

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)