Skip to content

Commit 3a7d7ba

Browse files
authored
Add MultipartDecodingMode to configure filename decoding in multipart request (#6465)
Motivation: UTF-8 is the de facto standard for encoding the filename parameter in multipart/form-data, but Armeria uses ISO-8859-1. Also, other clients might use percent-encoding. https://github.com/helidon-io/helidon/blob/7dce029dcbe0cdda36b1b84eb24ba1eb9f9da2eb/http/http/src/main/java/io/helidon/http/ContentDisposition.java#L250-L256 Modifications: - Introduced the `MultipartDecodingMode` enum with three distinct strategies: UTF_8, ISO_8859_1, and URL_DECODING. - Added `defaultMultipartDecodingMode` `Flags`, which determines the default strategy by reading the `com.linecorp.armeria.defaultMultipartDecodingMode` JVM system property. - Additionally, the annotated service for multipart file uploads has been updated to use a UUID-based filename on the server side. This is a defensive measure to: - Prevent potential filename corruption. - Avoid issues where a long filename might exceed operating system path length limits. Result: - Server administrators can now explicitly configure the decoding strategy. - [Breaking Change] The default decoding mode is now explicitly UTF-8 to align with the de facto standard of modern web clients. If you want to use the previous behaviour, you can restore it by setting the following JVM system property: `-Dcom.linecorp.armeria.defaultMultipartDecodingMode=ISO_8859_1`.
1 parent 414a352 commit 3a7d7ba

File tree

11 files changed

+624
-89
lines changed

11 files changed

+624
-89
lines changed

core/src/main/java/com/linecorp/armeria/common/ContentDisposition.java

Lines changed: 202 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -38,17 +38,25 @@
3838

3939
import java.io.ByteArrayOutputStream;
4040
import java.io.UnsupportedEncodingException;
41+
import java.net.URLDecoder;
4142
import java.nio.charset.Charset;
4243
import java.nio.charset.StandardCharsets;
44+
import java.util.Base64;
45+
import java.util.BitSet;
4346
import java.util.List;
44-
import java.util.Map;
47+
import java.util.Locale;
4548
import java.util.Objects;
49+
import java.util.regex.Matcher;
50+
import java.util.regex.Pattern;
51+
52+
import org.slf4j.Logger;
53+
import org.slf4j.LoggerFactory;
4654

4755
import com.google.common.base.Ascii;
4856
import com.google.common.collect.ImmutableList;
49-
import com.google.common.collect.ImmutableMap;
5057

5158
import com.linecorp.armeria.common.annotation.Nullable;
59+
import com.linecorp.armeria.common.multipart.MultipartFilenameDecodingMode;
5260
import com.linecorp.armeria.internal.common.util.TemporaryThreadLocals;
5361

5462
/**
@@ -62,12 +70,34 @@
6270
*/
6371
public final class ContentDisposition {
6472

65-
// Forked from https://github.com/spring-projects/spring-framework/blob/d9ccd618ea9cbf339eb5639d24d5a5fabe8157b5/spring-web/src/main/java/org/springframework/http/ContentDisposition.java
73+
// Forked from https://github.com/spring-projects/spring-framework/blob/e5fccd1fbbf09f1e253b10ebfc12ad339d0196b5/spring-web/src/main/java/org/springframework/http/ContentDisposition.java
74+
75+
private static final Logger logger = LoggerFactory.getLogger(ContentDisposition.class);
6676

6777
private static final ContentDisposition EMPTY = new ContentDisposition("", null, null, null);
6878

69-
private static final Map<String, Charset> supportedCharsets =
70-
ImmutableMap.of("utf-8", UTF_8, "iso-8859-1", ISO_8859_1);
79+
private static final Pattern BASE64_ENCODED_PATTERN =
80+
Pattern.compile("=\\?([0-9a-zA-Z-_]+)\\?B\\?([+/0-9a-zA-Z]+=*)\\?=");
81+
82+
// Printable ASCII other than "?" or SPACE
83+
private static final Pattern QUOTED_PRINTABLE_ENCODED_PATTERN =
84+
Pattern.compile("=\\?([0-9a-zA-Z-_]+)\\?Q\\?([!->@-~]+)\\?=");
85+
86+
private static final MultipartFilenameDecodingMode MULTIPART_FILENAME_DECODING_MODE =
87+
Flags.defaultMultipartFilenameDecodingMode();
88+
89+
private static final BitSet PRINTABLE = new BitSet(256);
90+
91+
static {
92+
// RFC 2045, Section 6.7, and RFC 2047, Section 4.2
93+
for (int i = 33; i <= 126; i++) {
94+
PRINTABLE.set(i);
95+
}
96+
PRINTABLE.set(34, false); // "
97+
PRINTABLE.set(61, false); // =
98+
PRINTABLE.set(63, false); // ?
99+
PRINTABLE.set(95, false); // _
100+
}
71101

72102
/**
73103
* Returns a new {@link ContentDispositionBuilder} with the specified {@code type}.
@@ -146,7 +176,7 @@ public static ContentDisposition parse(String contentDisposition) {
146176
final String part = parts.get(i);
147177
final int eqIndex = part.indexOf('=');
148178
if (eqIndex != -1) {
149-
final String attribute = part.substring(0, eqIndex);
179+
final String attribute = part.substring(0, eqIndex).toLowerCase(Locale.ROOT);
150180
final String value;
151181
if (part.startsWith("\"", eqIndex + 1) && part.endsWith("\"")) {
152182
value = part.substring(eqIndex + 2, part.length() - 1);
@@ -161,14 +191,61 @@ public static ContentDisposition parse(String contentDisposition) {
161191
final int idx2 = value.indexOf('\'', idx1 + 1);
162192
if (idx1 != -1 && idx2 != -1) {
163193
final String charsetString = value.substring(0, idx1).trim();
164-
charset = supportedCharsets.getOrDefault(Ascii.toLowerCase(charsetString), ISO_8859_1);
194+
charset = Charset.forName(charsetString);
195+
if (UTF_8 != charset && ISO_8859_1 != charset) {
196+
throw new IllegalArgumentException("Charset must be UTF-8 or ISO-8859-1" +
197+
" for filename*: " + charsetString);
198+
}
199+
165200
filename = decodeFilename(value.substring(idx2 + 1), charset);
166201
} else {
167202
// US ASCII
168203
filename = decodeFilename(value, StandardCharsets.US_ASCII);
169204
}
170205
} else if ("filename".equals(attribute) && (filename == null)) {
171-
filename = value;
206+
if (value.startsWith("=?")) {
207+
Matcher matcher = BASE64_ENCODED_PATTERN.matcher(value);
208+
if (matcher.find()) {
209+
final Base64.Decoder decoder = Base64.getDecoder();
210+
final StringBuilder builder = new StringBuilder();
211+
do {
212+
charset = Charset.forName(matcher.group(1));
213+
final byte[] decoded = decoder.decode(matcher.group(2));
214+
builder.append(new String(decoded, charset));
215+
}
216+
while (matcher.find());
217+
218+
filename = builder.toString();
219+
} else {
220+
matcher = QUOTED_PRINTABLE_ENCODED_PATTERN.matcher(value);
221+
if (matcher.find()) {
222+
final StringBuilder builder = new StringBuilder();
223+
do {
224+
charset = Charset.forName(matcher.group(1));
225+
final String decoded =
226+
decodeQuotedPrintableFilename(matcher.group(2), charset);
227+
builder.append(decoded);
228+
}
229+
while (matcher.find());
230+
231+
filename = builder.toString();
232+
} else {
233+
filename = value;
234+
}
235+
}
236+
} else if (value.indexOf('\\') != -1) {
237+
filename = decodeQuotedPairs(value);
238+
} else if (MULTIPART_FILENAME_DECODING_MODE == MultipartFilenameDecodingMode.URL_DECODING) {
239+
try {
240+
filename = URLDecoder.decode(value, "UTF-8");
241+
} catch (Exception e) {
242+
logger.debug("Failed to URL decode filename: {}, contentDisposition: {}",
243+
value, contentDisposition, e);
244+
filename = value;
245+
}
246+
} else {
247+
filename = value;
248+
}
172249
}
173250
} else {
174251
throw new IllegalArgumentException("Invalid content disposition format: " + contentDisposition);
@@ -303,6 +380,10 @@ private static String decodeFilename(String filename, Charset charset) {
303380
filename + " (charset: " + charset + ')');
304381
}
305382
}
383+
return copyToString(baos, charset);
384+
}
385+
386+
private static String copyToString(ByteArrayOutputStream baos, Charset charset) {
306387
try {
307388
return baos.toString(charset.name());
308389
} catch (UnsupportedEncodingException e) {
@@ -364,6 +445,46 @@ private static void encodeFilename(StringBuilder sb, String input, Charset chars
364445
}
365446
}
366447

448+
private static String decodeQuotedPrintableFilename(String filename, Charset charset) {
449+
final byte[] value = filename.getBytes(StandardCharsets.US_ASCII);
450+
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
451+
int index = 0;
452+
while (index < value.length) {
453+
final byte b = value[index];
454+
if (b == '_') { // RFC 2047, section 4.2, rule (2)
455+
baos.write(' ');
456+
index++;
457+
} else if (b == '=' && index < value.length - 2) {
458+
final char[] array = {(char) value[index + 1], (char) value[index + 2]};
459+
baos.write(Integer.parseInt(String.valueOf(array), 16));
460+
index += 3;
461+
} else {
462+
baos.write(b);
463+
index++;
464+
}
465+
}
466+
return copyToString(baos, charset);
467+
}
468+
469+
private static String decodeQuotedPairs(String filename) {
470+
final StringBuilder sb = new StringBuilder();
471+
final int length = filename.length();
472+
for (int i = 0; i < length; i++) {
473+
final char c = filename.charAt(i);
474+
if (filename.charAt(i) == '\\' && i + 1 < length) {
475+
i++;
476+
final char next = filename.charAt(i);
477+
if (next != '"' && next != '\\') {
478+
sb.append(c);
479+
}
480+
sb.append(next);
481+
} else {
482+
sb.append(c);
483+
}
484+
}
485+
return sb.toString();
486+
}
487+
367488
@Override
368489
public boolean equals(@Nullable Object other) {
369490
if (this == other) {
@@ -405,17 +526,87 @@ public String asHeaderValue() {
405526
if (filename != null) {
406527
if (charset == null || StandardCharsets.US_ASCII.equals(charset)) {
407528
sb.append("; filename=\"");
408-
escapeQuotationsInFilename(sb, filename);
409-
sb.append('\"');
529+
sb.append(encodeQuotedPairs(this.filename)).append('\"');
410530
} else {
531+
sb.append("; filename=\"");
532+
sb.append(encodeQuotedPrintableFilename(filename, charset)).append('\"');
411533
sb.append("; filename*=");
412-
encodeFilename(sb, filename, charset);
534+
sb.append(encodeRfc5987Filename(filename, charset));
413535
}
414536
}
415537
return strVal = sb.toString();
416538
}
417539
}
418540

541+
private static String encodeQuotedPairs(String filename) {
542+
if (filename.indexOf('"') == -1 && filename.indexOf('\\') == -1) {
543+
return filename;
544+
}
545+
final StringBuilder sb = new StringBuilder();
546+
for (int i = 0; i < filename.length(); i++) {
547+
final char c = filename.charAt(i);
548+
if (c == '"' || c == '\\') {
549+
sb.append('\\');
550+
}
551+
sb.append(c);
552+
}
553+
return sb.toString();
554+
}
555+
556+
/**
557+
* Encode the given header field param as described in RFC 2047.
558+
*
559+
* @see <a href="https://datatracker.ietf.org/doc/html/rfc2047">RFC 2047</a>
560+
*/
561+
private static String encodeQuotedPrintableFilename(String filename, Charset charset) {
562+
final byte[] source = filename.getBytes(charset);
563+
final StringBuilder sb = new StringBuilder(source.length << 1);
564+
sb.append("=?");
565+
sb.append(charset.name());
566+
sb.append("?Q?");
567+
for (byte b : source) {
568+
if (b == 32) { // RFC 2047, section 4.2, rule (2)
569+
sb.append('_');
570+
} else if (isPrintable(b)) {
571+
sb.append((char) b);
572+
} else {
573+
sb.append('=');
574+
sb.append(String.format("%02X", b & 0xFF));
575+
}
576+
}
577+
sb.append("?=");
578+
return sb.toString();
579+
}
580+
581+
private static boolean isPrintable(byte c) {
582+
int b = c;
583+
if (b < 0) {
584+
b = 256 + b;
585+
}
586+
return PRINTABLE.get(b);
587+
}
588+
589+
/**
590+
* Encode the given header field param as describe in RFC 5987.
591+
*
592+
* @see <a href="https://datatracker.ietf.org/doc/html/rfc5987">RFC 5987</a>
593+
*/
594+
private static String encodeRfc5987Filename(String input, Charset charset) {
595+
final byte[] source = input.getBytes(charset);
596+
final StringBuilder sb = new StringBuilder(source.length << 1);
597+
sb.append(charset.name());
598+
sb.append("''");
599+
for (byte b : source) {
600+
if (isRFC5987AttrChar(b)) {
601+
sb.append((char) b);
602+
} else {
603+
sb.append('%');
604+
sb.append(String.format("%02X", b & 0xFF));
605+
}
606+
}
607+
return sb.toString();
608+
}
609+
419610
/**
420611
* Returns the header value for this content disposition as defined in RFC 6266.
421612
*/

core/src/main/java/com/linecorp/armeria/common/DefaultFlagsProvider.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import com.google.common.collect.ImmutableSet;
2828

2929
import com.linecorp.armeria.client.ResponseTimeoutMode;
30+
import com.linecorp.armeria.common.multipart.MultipartFilenameDecodingMode;
3031
import com.linecorp.armeria.common.util.Sampler;
3132
import com.linecorp.armeria.common.util.TlsEngineType;
3233
import com.linecorp.armeria.common.util.TransportType;
@@ -480,6 +481,11 @@ public Boolean allowSemicolonInPathComponent() {
480481
return false;
481482
}
482483

484+
@Override
485+
public MultipartFilenameDecodingMode defaultMultipartFilenameDecodingMode() {
486+
return MultipartFilenameDecodingMode.UTF_8;
487+
}
488+
483489
@Override
484490
public Path defaultMultipartUploadsLocation() {
485491
return Paths.get(System.getProperty("java.io.tmpdir") +

core/src/main/java/com/linecorp/armeria/common/Flags.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
import com.linecorp.armeria.client.retry.RetryingRpcClient;
5656
import com.linecorp.armeria.common.annotation.Nullable;
5757
import com.linecorp.armeria.common.annotation.UnstableApi;
58+
import com.linecorp.armeria.common.multipart.MultipartFilenameDecodingMode;
5859
import com.linecorp.armeria.common.util.Exceptions;
5960
import com.linecorp.armeria.common.util.Sampler;
6061
import com.linecorp.armeria.common.util.SystemInfo;
@@ -431,6 +432,10 @@ private static boolean validateTransportType(TransportType transportType, String
431432
private static final boolean ALLOW_SEMICOLON_IN_PATH_COMPONENT =
432433
getValue(FlagsProvider::allowSemicolonInPathComponent, "allowSemicolonInPathComponent");
433434

435+
private static final MultipartFilenameDecodingMode DEFAULT_MULTIPART_DECODING_MODE =
436+
getValue(FlagsProvider::defaultMultipartFilenameDecodingMode,
437+
"defaultMultipartFilenameDecodingMode");
438+
434439
private static final Path DEFAULT_MULTIPART_UPLOADS_LOCATION =
435440
getValue(FlagsProvider::defaultMultipartUploadsLocation, "defaultMultipartUploadsLocation");
436441

@@ -1532,6 +1537,32 @@ public static boolean useLegacyRouteDecoratorOrdering() {
15321537
return USE_LEGACY_ROUTE_DECORATOR_ORDERING;
15331538
}
15341539

1540+
/**
1541+
* Returns the default decoding mode for a {@code filename} parameter in a
1542+
* {@link HttpHeaderNames#CONTENT_DISPOSITION} header of a multipart request.
1543+
*
1544+
* <p>This flag determines how a server interprets the raw bytes of a {@code filename} parameter when it
1545+
* contains non-ASCII characters.
1546+
* <ul>
1547+
* <li>{@link MultipartFilenameDecodingMode#UTF_8}:
1548+
* (Default) Interprets the filename as a raw UTF-8 string</li>
1549+
* <li>{@link MultipartFilenameDecodingMode#ISO_8859_1}:
1550+
* Interprets the filename as a raw ISO-8859-1 string</li>
1551+
* <li>{@link MultipartFilenameDecodingMode#URL_DECODING}:
1552+
* URL-decodes the filename using the UTF-8 charset.
1553+
* Use this for compatibility with clients that percent-encode the filename.</li>
1554+
* </ul>
1555+
*
1556+
* <p>The default value of this flag is {@link MultipartFilenameDecodingMode#UTF_8}.
1557+
* Specify the
1558+
* {@code -Dcom.linecorp.armeria.defaultMultipartFilenameDecodingMode=<UTF_8|ISO_8859_1|URL_DECODING>}
1559+
* JVM option to override the default value.
1560+
*/
1561+
@UnstableApi
1562+
public static MultipartFilenameDecodingMode defaultMultipartFilenameDecodingMode() {
1563+
return DEFAULT_MULTIPART_DECODING_MODE;
1564+
}
1565+
15351566
/**
15361567
* Returns the {@link Path} that is used to store the files uploaded from {@code multipart/form-data}
15371568
* requests.

core/src/main/java/com/linecorp/armeria/common/FlagsProvider.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import com.linecorp.armeria.client.retry.RetryingRpcClient;
4141
import com.linecorp.armeria.common.annotation.Nullable;
4242
import com.linecorp.armeria.common.annotation.UnstableApi;
43+
import com.linecorp.armeria.common.multipart.MultipartFilenameDecodingMode;
4344
import com.linecorp.armeria.common.util.Exceptions;
4445
import com.linecorp.armeria.common.util.Sampler;
4546
import com.linecorp.armeria.common.util.SystemInfo;
@@ -1138,6 +1139,15 @@ default Boolean allowSemicolonInPathComponent() {
11381139
return null;
11391140
}
11401141

1142+
/**
1143+
* Returns the {@link MultipartFilenameDecodingMode} that is used to determine how to decode
1144+
* a {@code filename} parameter in a {@link HttpHeaderNames#CONTENT_DISPOSITION} header.
1145+
*/
1146+
@Nullable
1147+
default MultipartFilenameDecodingMode defaultMultipartFilenameDecodingMode() {
1148+
return null;
1149+
}
1150+
11411151
/**
11421152
* Returns the {@link Path} that is used to store the files uploaded from {@code multipart/form-data}
11431153
* requests.

0 commit comments

Comments
 (0)