1 /*
2 * Copyright 2009 Red Hat, Inc.
3 *
4 * Red Hat licenses this file to you under the Apache License, version 2.0
5 * (the "License"); you may not use this file except in compliance with the
6 * License. You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations
14 * under the License.
15 */
16 package org.jboss.netty.handler.codec.http2;
17
18 import java.io.UnsupportedEncodingException;
19 import java.net.URI;
20 import java.net.URLDecoder;
21 import java.nio.charset.Charset;
22 import java.util.ArrayList;
23 import java.util.Collections;
24 import java.util.LinkedHashMap;
25 import java.util.List;
26 import java.util.Map;
27
28 import org.jboss.netty.util.CharsetUtil;
29
30 /**
31 * Splits an HTTP query string into a path string and key-value parameter pairs.
32 * This decoder is for one time use only. Create a new instance for each URI:
33 * <pre>
34 * {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("/hello?recipient=world");
35 * assert decoder.getPath().equals("/hello");
36 * assert decoder.getParameters().get("recipient").equals("world");
37 * </pre>
38 *
39 * @author <a href="http://www.jboss.org/netty/">The Netty Project</a>
40 * @author Andy Taylor (andy.taylor@jboss.org)
41 * @author <a href="http://gleamynode.net/">Trustin Lee</a>
42 * @author <a href="http://tsunanet.net/">Benoit Sigoure</a>
43 * @version $Rev: 1107 $, $Date: 2012-04-15 19:00:57 +0200 (dim., 15 avr. 2012) $
44 *
45 * @see QueryStringEncoder
46 *
47 * @apiviz.stereotype utility
48 * @apiviz.has org.jboss.netty.handler.codec.http2.HttpRequest oneway - - decodes
49 */
50 public class QueryStringDecoder {
51
52 private static final int DEFAULT_MAX_PARAMS = 1024;
53
54 private final Charset charset;
55 private final String uri;
56 private final boolean hasPath;
57 private final int maxParams;
58 private String path;
59 private Map<String, List<String>> params;
60 private int nParams;
61
62 /**
63 * Creates a new decoder that decodes the specified URI. The decoder will
64 * assume that the query string is encoded in UTF-8.
65 */
66 public QueryStringDecoder(String uri) {
67 this(uri, HttpCodecUtil.DEFAULT_CHARSET);
68 }
69
70 /**
71 * Creates a new decoder that decodes the specified URI encoded in the
72 * specified charset.
73 */
74 public QueryStringDecoder(String uri, boolean hasPath) {
75 this(uri, HttpCodecUtil.DEFAULT_CHARSET, hasPath);
76 }
77
78 /**
79 * Creates a new decoder that decodes the specified URI encoded in the
80 * specified charset.
81 */
82 public QueryStringDecoder(String uri, Charset charset) {
83 this(uri, charset, true);
84 }
85
86 /**
87 * Creates a new decoder that decodes the specified URI encoded in the
88 * specified charset.
89 */
90 public QueryStringDecoder(String uri, Charset charset, boolean hasPath) {
91 this(uri, charset, hasPath, DEFAULT_MAX_PARAMS);
92 }
93
94 /**
95 * Creates a new decoder that decodes the specified URI encoded in the
96 * specified charset.
97 */
98 public QueryStringDecoder(String uri, Charset charset, boolean hasPath, int maxParams) {
99 if (uri == null) {
100 throw new NullPointerException("uri");
101 }
102 if (charset == null) {
103 throw new NullPointerException("charset");
104 }
105 if (maxParams <= 0) {
106 throw new IllegalArgumentException(
107 "maxParams: " + maxParams + " (expected: a positive integer)");
108 }
109
110 // http://en.wikipedia.org/wiki/Query_string
111 this.uri = uri.replace(';', '&');
112 this.charset = charset;
113 this.maxParams = maxParams;
114 this.hasPath = hasPath;
115 }
116
117 /**
118 * @deprecated Use {@link #QueryStringDecoder(String, Charset)} instead.
119 */
120 @Deprecated
121 public QueryStringDecoder(String uri, String charset) {
122 this(uri, Charset.forName(charset));
123 }
124
125 /**
126 * Creates a new decoder that decodes the specified URI. The decoder will
127 * assume that the query string is encoded in UTF-8.
128 */
129 public QueryStringDecoder(URI uri) {
130 this(uri, HttpCodecUtil.DEFAULT_CHARSET);
131 }
132
133 /**
134 * Creates a new decoder that decodes the specified URI encoded in the
135 * specified charset.
136 */
137 public QueryStringDecoder(URI uri, Charset charset) {
138 this(uri, charset, DEFAULT_MAX_PARAMS);
139 }
140
141 /**
142 * Creates a new decoder that decodes the specified URI encoded in the
143 * specified charset.
144 */
145 public QueryStringDecoder(URI uri, Charset charset, int maxParams) {
146 if (uri == null) {
147 throw new NullPointerException("uri");
148 }
149 if (charset == null) {
150 throw new NullPointerException("charset");
151 }
152 if (maxParams <= 0) {
153 throw new IllegalArgumentException(
154 "maxParams: " + maxParams + " (expected: a positive integer)");
155 }
156
157 String rawPath = uri.getRawPath();
158 if (rawPath != null) {
159 hasPath = true;
160 } else {
161 rawPath = "";
162 hasPath = false;
163 }
164 // Also take care of cut of things like "http://localhost"
165 String newUri = rawPath + "?" + uri.getRawQuery();
166
167 // http://en.wikipedia.org/wiki/Query_string
168 this.uri = newUri.replace(';', '&');
169 this.charset = charset;
170 this.maxParams = maxParams;
171
172 }
173
174 /**
175 * @deprecated Use {@link #QueryStringDecoder(URI, Charset)} instead.
176 */
177 @Deprecated
178 public QueryStringDecoder(URI uri, String charset) {
179 this(uri, Charset.forName(charset));
180 }
181
182 /**
183 * Returns the decoded path string of the URI.
184 */
185 public String getPath() {
186 if (path == null) {
187 if (!hasPath) {
188 return path = "";
189 }
190
191 int pathEndPos = uri.indexOf('?');
192 if (pathEndPos < 0) {
193 path = uri;
194 } else {
195 return path = uri.substring(0, pathEndPos);
196 }
197 }
198 return path;
199 }
200
201 /**
202 * Returns the decoded key-value parameter pairs of the URI.
203 */
204 public Map<String, List<String>> getParameters() {
205 if (params == null) {
206 if (hasPath) {
207 int pathLength = getPath().length();
208 if (uri.length() == pathLength) {
209 return Collections.emptyMap();
210 }
211 decodeParams(uri.substring(pathLength + 1));
212 } else {
213 if (uri.length() == 0) {
214 return Collections.emptyMap();
215 }
216 decodeParams(uri);
217 }
218 }
219 return params;
220 }
221
222 private void decodeParams(String s) {
223 Map<String, List<String>> params = this.params = new LinkedHashMap<String, List<String>>();
224 nParams = 0;
225 String name = null;
226 int pos = 0; // Beginning of the unprocessed region
227 int i; // End of the unprocessed region
228 char c = 0; // Current character
229 for (i = 0; i < s.length(); i++) {
230 c = s.charAt(i);
231 if (c == '=' && name == null) {
232 if (pos != i) {
233 name = decodeComponent(s.substring(pos, i), charset);
234 }
235 pos = i + 1;
236 } else if (c == '&') {
237 if (name == null && pos != i) {
238 // We haven't seen an `=' so far but moved forward.
239 // Must be a param of the form '&a&' so add it with
240 // an empty value.
241 if (!addParam(params, decodeComponent(s.substring(pos, i), charset), "")) {
242 return;
243 }
244 } else if (name != null) {
245 if (!addParam(params, name, decodeComponent(s.substring(pos, i), charset))) {
246 return;
247 }
248 name = null;
249 }
250 pos = i + 1;
251 }
252 }
253
254 if (pos != i) { // Are there characters we haven't dealt with?
255 if (name == null) { // Yes and we haven't seen any `='.
256 if (!addParam(params, decodeComponent(s.substring(pos, i), charset), "")) {
257 return;
258 }
259 } else { // Yes and this must be the last value.
260 if (!addParam(params, name, decodeComponent(s.substring(pos, i), charset))) {
261 return;
262 }
263 }
264 } else if (name != null) { // Have we seen a name without value?
265 if (!addParam(params, name, "")) {
266 return;
267 }
268 }
269 }
270
271 private boolean addParam(Map<String, List<String>> params, String name, String value) {
272 if (nParams >= maxParams) {
273 return false;
274 }
275
276 List<String> values = params.get(name);
277 if (values == null) {
278 values = new ArrayList<String>(1); // Often there's only 1 value.
279 params.put(name, values);
280 }
281 values.add(value);
282 nParams ++;
283 return true;
284 }
285
286 /**
287 * Decodes a bit of an URL encoded by a browser.
288 * <p>
289 * This is equivalent to calling {@link #decodeComponent(String, Charset)}
290 * with the UTF-8 charset (recommended to comply with RFC 3986, Section 2).
291 * @param s The string to decode (can be empty).
292 * @return The decoded string, or {@code s} if there's nothing to decode.
293 * If the string to decode is {@code null}, returns an empty string.
294 * @throws IllegalArgumentException if the string contains a malformed
295 * escape sequence.
296 */
297 public static String decodeComponent(final String s) {
298 return decodeComponent(s, HttpCodecUtil.DEFAULT_CHARSET);
299 }
300
301 /**
302 * Decodes a bit of an URL encoded by a browser.
303 * <p>
304 * The string is expected to be encoded as per RFC 3986, Section 2.
305 * This is the encoding used by JavaScript functions {@code encodeURI}
306 * and {@code encodeURIComponent}, but not {@code escape}. For example
307 * in this encoding, é (in Unicode {@code U+00E9} or in UTF-8
308 * {@code 0xC3 0xA9}) is encoded as {@code %C3%A9} or {@code %c3%a9}.
309 * <p>
310 * This is essentially equivalent to calling
311 * <code>{@link URLDecoder#decode(String, String) URLDecoder.decode}(s, charset.name())</code>
312 * except that it's over 2x faster and generates less garbage for the GC.
313 * Actually this function doesn't allocate any memory if there's nothing
314 * to decode, the argument itself is returned.
315 * @param s The string to decode (can be empty).
316 * @param charset The charset to use to decode the string (should really
317 * be {@link CharsetUtil#UTF_8}.
318 * @return The decoded string, or {@code s} if there's nothing to decode.
319 * If the string to decode is {@code null}, returns an empty string.
320 * @throws IllegalArgumentException if the string contains a malformed
321 * escape sequence.
322 */
323 @SuppressWarnings("fallthrough")
324 public static String decodeComponent(final String s,
325 final Charset charset) {
326 if (s == null) {
327 return "";
328 }
329 final int size = s.length();
330 boolean modified = false;
331 for (int i = 0; i < size; i++) {
332 final char c = s.charAt(i);
333 switch (c) {
334 case '%':
335 i++; // We can skip at least one char, e.g. `%%'.
336 // Fall through.
337 case '+':
338 modified = true;
339 break;
340 }
341 }
342 if (!modified) {
343 return s;
344 }
345 final byte[] buf = new byte[size];
346 int pos = 0; // position in `buf'.
347 for (int i = 0; i < size; i++) {
348 char c = s.charAt(i);
349 switch (c) {
350 case '+':
351 buf[pos++] = ' '; // "+" -> " "
352 break;
353 case '%':
354 if (i == size - 1) {
355 throw new IllegalArgumentException("unterminated escape"
356 + " sequence at end of string: " + s);
357 }
358 c = s.charAt(++i);
359 if (c == '%') {
360 buf[pos++] = '%'; // "%%" -> "%"
361 break;
362 } else if (i == size - 1) {
363 throw new IllegalArgumentException("partial escape"
364 + " sequence at end of string: " + s);
365 }
366 c = decodeHexNibble(c);
367 final char c2 = decodeHexNibble(s.charAt(++i));
368 if (c == Character.MAX_VALUE || c2 == Character.MAX_VALUE) {
369 throw new IllegalArgumentException(
370 "invalid escape sequence `%" + s.charAt(i - 1)
371 + s.charAt(i) + "' at index " + (i - 2)
372 + " of: " + s);
373 }
374 c = (char) (c * 16 + c2);
375 // Fall through.
376 default:
377 buf[pos++] = (byte) c;
378 break;
379 }
380 }
381 try {
382 return new String(buf, 0, pos, charset.name());
383 } catch (UnsupportedEncodingException e) {
384 throw new IllegalArgumentException("unsupported encoding: " + charset.name());
385 }
386 }
387
388 /**
389 * Helper to decode half of a hexadecimal number from a string.
390 * @param c The ASCII character of the hexadecimal number to decode.
391 * Must be in the range {@code [0-9a-fA-F]}.
392 * @return The hexadecimal value represented in the ASCII character
393 * given, or {@link Character#MAX_VALUE} if the character is invalid.
394 */
395 private static char decodeHexNibble(final char c) {
396 if ('0' <= c && c <= '9') {
397 return (char) (c - '0');
398 } else if ('a' <= c && c <= 'f') {
399 return (char) (c - 'a' + 10);
400 } else if ('A' <= c && c <= 'F') {
401 return (char) (c - 'A' + 10);
402 } else {
403 return Character.MAX_VALUE;
404 }
405 }
406 }