View Javadoc

1   /*
2    * Copyright 2009 Red Hat, Inc.
3    *
4    * Red Hat licenses this file to you under the Apache License, version 2.0
5    * (the "License"); you may not use this file except in compliance with the
6    * License.  You may obtain a copy of the License at:
7    *
8    *    http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
13   * License for the specific language governing permissions and limitations
14   * under the License.
15   */
16  package org.jboss.netty.handler.codec.http2;
17  
18  import java.io.UnsupportedEncodingException;
19  import java.net.URI;
20  import java.net.URLDecoder;
21  import java.nio.charset.Charset;
22  import java.util.ArrayList;
23  import java.util.Collections;
24  import java.util.LinkedHashMap;
25  import java.util.List;
26  import java.util.Map;
27  
28  import org.jboss.netty.util.CharsetUtil;
29  
30  /**
31   * Splits an HTTP query string into a path string and key-value parameter pairs.
32   * This decoder is for one time use only.  Create a new instance for each URI:
33   * <pre>
34   * {@link QueryStringDecoder} decoder = new {@link QueryStringDecoder}("/hello?recipient=world");
35   * assert decoder.getPath().equals("/hello");
36   * assert decoder.getParameters().get("recipient").equals("world");
37   * </pre>
38   *
39   * @author <a href="http://www.jboss.org/netty/">The Netty Project</a>
40   * @author Andy Taylor (andy.taylor@jboss.org)
41   * @author <a href="http://gleamynode.net/">Trustin Lee</a>
42   * @author <a href="http://tsunanet.net/">Benoit Sigoure</a>
43   * @version $Rev: 1107 $, $Date: 2012-04-15 19:00:57 +0200 (dim., 15 avr. 2012) $
44   *
45   * @see QueryStringEncoder
46   *
47   * @apiviz.stereotype utility
48   * @apiviz.has        org.jboss.netty.handler.codec.http2.HttpRequest oneway - - decodes
49   */
50  public class QueryStringDecoder {
51  
52      private static final int DEFAULT_MAX_PARAMS = 1024;
53      
54      private final Charset charset;
55      private final String uri;
56      private final boolean hasPath;
57      private final int maxParams;
58      private String path;
59      private Map<String, List<String>> params;
60      private int nParams;
61  
62      /**
63       * Creates a new decoder that decodes the specified URI. The decoder will
64       * assume that the query string is encoded in UTF-8.
65       */
66      public QueryStringDecoder(String uri) {
67          this(uri, HttpCodecUtil.DEFAULT_CHARSET);
68      }
69  
70      /**
71       * Creates a new decoder that decodes the specified URI encoded in the
72       * specified charset.
73       */
74      public QueryStringDecoder(String uri, boolean hasPath) {
75          this(uri, HttpCodecUtil.DEFAULT_CHARSET, hasPath);
76      }
77  
78      /**
79       * Creates a new decoder that decodes the specified URI encoded in the
80       * specified charset.
81       */
82      public QueryStringDecoder(String uri, Charset charset) {
83          this(uri, charset, true);
84      }
85  
86      /**
87       * Creates a new decoder that decodes the specified URI encoded in the
88       * specified charset.
89       */
90      public QueryStringDecoder(String uri, Charset charset, boolean hasPath) {
91          this(uri, charset, hasPath, DEFAULT_MAX_PARAMS);
92      }
93  
94      /**
95       * Creates a new decoder that decodes the specified URI encoded in the
96       * specified charset.
97       */
98      public QueryStringDecoder(String uri, Charset charset, boolean hasPath, int maxParams) {
99          if (uri == null) {
100             throw new NullPointerException("uri");
101         }
102         if (charset == null) {
103             throw new NullPointerException("charset");
104         }
105         if (maxParams <= 0) {
106             throw new IllegalArgumentException(
107                     "maxParams: " + maxParams + " (expected: a positive integer)");
108         }
109 
110         // http://en.wikipedia.org/wiki/Query_string
111         this.uri = uri.replace(';', '&');
112         this.charset = charset;
113         this.maxParams = maxParams;
114         this.hasPath = hasPath;
115     }
116 
117     /**
118      * @deprecated Use {@link #QueryStringDecoder(String, Charset)} instead.
119      */
120     @Deprecated
121     public QueryStringDecoder(String uri, String charset) {
122         this(uri, Charset.forName(charset));
123     }
124 
125     /**
126      * Creates a new decoder that decodes the specified URI. The decoder will
127      * assume that the query string is encoded in UTF-8.
128      */
129     public QueryStringDecoder(URI uri) {
130         this(uri, HttpCodecUtil.DEFAULT_CHARSET);
131     }
132 
133     /**
134      * Creates a new decoder that decodes the specified URI encoded in the
135      * specified charset.
136      */
137     public QueryStringDecoder(URI uri, Charset charset) {
138         this(uri, charset, DEFAULT_MAX_PARAMS);
139     }
140     
141     /**
142      * Creates a new decoder that decodes the specified URI encoded in the
143      * specified charset.
144      */
145     public QueryStringDecoder(URI uri, Charset charset, int maxParams) {
146         if (uri == null) {
147             throw new NullPointerException("uri");
148         }
149         if (charset == null) {
150             throw new NullPointerException("charset");
151         }
152         if (maxParams <= 0) {
153             throw new IllegalArgumentException(
154                     "maxParams: " + maxParams + " (expected: a positive integer)");
155         }
156         
157         String rawPath = uri.getRawPath();
158         if (rawPath != null) {
159             hasPath = true;
160         } else {
161             rawPath = "";
162             hasPath = false;
163         }
164         // Also take care of cut of things like "http://localhost" 
165         String newUri = rawPath + "?" + uri.getRawQuery();
166 
167         // http://en.wikipedia.org/wiki/Query_string
168         this.uri = newUri.replace(';', '&');
169         this.charset = charset;
170         this.maxParams = maxParams;
171 
172     }
173 
174     /**
175      * @deprecated Use {@link #QueryStringDecoder(URI, Charset)} instead.
176      */
177     @Deprecated
178     public QueryStringDecoder(URI uri, String charset) {
179         this(uri, Charset.forName(charset));
180     }
181 
182     /**
183      * Returns the decoded path string of the URI.
184      */
185     public String getPath() {
186         if (path == null) {
187             if (!hasPath) {
188                 return path = "";
189             }
190 
191             int pathEndPos = uri.indexOf('?');
192             if (pathEndPos < 0) {
193                 path = uri;
194             } else {
195                 return path = uri.substring(0, pathEndPos);
196             }
197         }
198         return path;
199     }
200 
201     /**
202      * Returns the decoded key-value parameter pairs of the URI.
203      */
204     public Map<String, List<String>> getParameters() {
205         if (params == null) {
206             if (hasPath) {
207                 int pathLength = getPath().length();
208                 if (uri.length() == pathLength) {
209                     return Collections.emptyMap();
210                 }
211                 decodeParams(uri.substring(pathLength + 1));
212             } else {
213                 if (uri.length() == 0) {
214                     return Collections.emptyMap();
215                 }
216                 decodeParams(uri);
217             }
218         }
219         return params;
220     }
221 
222     private void decodeParams(String s) {
223         Map<String, List<String>> params = this.params = new LinkedHashMap<String, List<String>>();
224         nParams = 0;
225         String name = null;
226         int pos = 0; // Beginning of the unprocessed region
227         int i;       // End of the unprocessed region
228         char c = 0;  // Current character
229         for (i = 0; i < s.length(); i++) {
230             c = s.charAt(i);
231             if (c == '=' && name == null) {
232                 if (pos != i) {
233                     name = decodeComponent(s.substring(pos, i), charset);
234                 }
235                 pos = i + 1;
236             } else if (c == '&') {
237                 if (name == null && pos != i) {
238                     // We haven't seen an `=' so far but moved forward.
239                     // Must be a param of the form '&a&' so add it with
240                     // an empty value.
241                     if (!addParam(params, decodeComponent(s.substring(pos, i), charset), "")) {
242                         return;
243                     }
244                 } else if (name != null) {
245                     if (!addParam(params, name, decodeComponent(s.substring(pos, i), charset))) {
246                         return;
247                     }
248                     name = null;
249                 }
250                 pos = i + 1;
251             }
252         }
253 
254         if (pos != i) {  // Are there characters we haven't dealt with?
255             if (name == null) {     // Yes and we haven't seen any `='.
256                 if (!addParam(params, decodeComponent(s.substring(pos, i), charset), "")) {
257                     return;
258                 }
259             } else {                // Yes and this must be the last value.
260                 if (!addParam(params, name, decodeComponent(s.substring(pos, i), charset))) {
261                     return;
262                 }
263             }
264         } else if (name != null) {  // Have we seen a name without value?
265             if (!addParam(params, name, "")) {
266                 return;
267             }
268         }
269     }
270 
271     private boolean addParam(Map<String, List<String>> params, String name, String value) {
272         if (nParams >= maxParams) {
273             return false;
274         }
275 
276         List<String> values = params.get(name);
277         if (values == null) {
278             values = new ArrayList<String>(1);  // Often there's only 1 value.
279             params.put(name, values);
280         }
281         values.add(value);
282         nParams ++;
283         return true;
284     }
285 
286     /**
287      * Decodes a bit of an URL encoded by a browser.
288      * <p>
289      * This is equivalent to calling {@link #decodeComponent(String, Charset)}
290      * with the UTF-8 charset (recommended to comply with RFC 3986, Section 2).
291      * @param s The string to decode (can be empty).
292      * @return The decoded string, or {@code s} if there's nothing to decode.
293      * If the string to decode is {@code null}, returns an empty string.
294      * @throws IllegalArgumentException if the string contains a malformed
295      * escape sequence.
296      */
297     public static String decodeComponent(final String s) {
298         return decodeComponent(s, HttpCodecUtil.DEFAULT_CHARSET);
299     }
300 
301     /**
302      * Decodes a bit of an URL encoded by a browser.
303      * <p>
304      * The string is expected to be encoded as per RFC 3986, Section 2.
305      * This is the encoding used by JavaScript functions {@code encodeURI}
306      * and {@code encodeURIComponent}, but not {@code escape}.  For example
307      * in this encoding, &eacute; (in Unicode {@code U+00E9} or in UTF-8
308      * {@code 0xC3 0xA9}) is encoded as {@code %C3%A9} or {@code %c3%a9}.
309      * <p>
310      * This is essentially equivalent to calling
311      *   <code>{@link URLDecoder#decode(String, String) URLDecoder.decode}(s, charset.name())</code>
312      * except that it's over 2x faster and generates less garbage for the GC.
313      * Actually this function doesn't allocate any memory if there's nothing
314      * to decode, the argument itself is returned.
315      * @param s The string to decode (can be empty).
316      * @param charset The charset to use to decode the string (should really
317      * be {@link CharsetUtil#UTF_8}.
318      * @return The decoded string, or {@code s} if there's nothing to decode.
319      * If the string to decode is {@code null}, returns an empty string.
320      * @throws IllegalArgumentException if the string contains a malformed
321      * escape sequence.
322      */
323     @SuppressWarnings("fallthrough")
324     public static String decodeComponent(final String s,
325                                          final Charset charset) {
326         if (s == null) {
327             return "";
328         }
329         final int size = s.length();
330         boolean modified = false;
331         for (int i = 0; i < size; i++) {
332             final char c = s.charAt(i);
333             switch (c) {
334                 case '%':
335                     i++;  // We can skip at least one char, e.g. `%%'.
336                     // Fall through.
337                 case '+':
338                     modified = true;
339                     break;
340             }
341         }
342         if (!modified) {
343             return s;
344         }
345         final byte[] buf = new byte[size];
346         int pos = 0;  // position in `buf'.
347         for (int i = 0; i < size; i++) {
348             char c = s.charAt(i);
349             switch (c) {
350                 case '+':
351                     buf[pos++] = ' ';  // "+" -> " "
352                     break;
353                 case '%':
354                     if (i == size - 1) {
355                         throw new IllegalArgumentException("unterminated escape"
356                                 + " sequence at end of string: " + s);
357                     }
358                     c = s.charAt(++i);
359                     if (c == '%') {
360                         buf[pos++] = '%';  // "%%" -> "%"
361                         break;
362                     } else if (i == size - 1) {
363                         throw new IllegalArgumentException("partial escape"
364                                 + " sequence at end of string: " + s);
365                     }
366                     c = decodeHexNibble(c);
367                     final char c2 = decodeHexNibble(s.charAt(++i));
368                     if (c == Character.MAX_VALUE || c2 == Character.MAX_VALUE) {
369                         throw new IllegalArgumentException(
370                                 "invalid escape sequence `%" + s.charAt(i - 1)
371                                 + s.charAt(i) + "' at index " + (i - 2)
372                                 + " of: " + s);
373                     }
374                     c = (char) (c * 16 + c2);
375                     // Fall through.
376                 default:
377                     buf[pos++] = (byte) c;
378                     break;
379             }
380         }
381         try {
382             return new String(buf, 0, pos, charset.name());
383         } catch (UnsupportedEncodingException e) {
384             throw new IllegalArgumentException("unsupported encoding: " + charset.name());
385         }
386     }
387 
388     /**
389      * Helper to decode half of a hexadecimal number from a string.
390      * @param c The ASCII character of the hexadecimal number to decode.
391      * Must be in the range {@code [0-9a-fA-F]}.
392      * @return The hexadecimal value represented in the ASCII character
393      * given, or {@link Character#MAX_VALUE} if the character is invalid.
394      */
395     private static char decodeHexNibble(final char c) {
396         if ('0' <= c && c <= '9') {
397             return (char) (c - '0');
398         } else if ('a' <= c && c <= 'f') {
399             return (char) (c - 'a' + 10);
400         } else if ('A' <= c && c <= 'F') {
401             return (char) (c - 'A' + 10);
402         } else {
403             return Character.MAX_VALUE;
404         }
405     }
406 }