jpayne@69
|
1 // Copyright (c) 2017 Cloudflare, Inc. and contributors
|
jpayne@69
|
2 // Licensed under the MIT License:
|
jpayne@69
|
3 //
|
jpayne@69
|
4 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
jpayne@69
|
5 // of this software and associated documentation files (the "Software"), to deal
|
jpayne@69
|
6 // in the Software without restriction, including without limitation the rights
|
jpayne@69
|
7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
jpayne@69
|
8 // copies of the Software, and to permit persons to whom the Software is
|
jpayne@69
|
9 // furnished to do so, subject to the following conditions:
|
jpayne@69
|
10 //
|
jpayne@69
|
11 // The above copyright notice and this permission notice shall be included in
|
jpayne@69
|
12 // all copies or substantial portions of the Software.
|
jpayne@69
|
13 //
|
jpayne@69
|
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
jpayne@69
|
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
jpayne@69
|
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
jpayne@69
|
17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
jpayne@69
|
18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
jpayne@69
|
19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
jpayne@69
|
20 // THE SOFTWARE.
|
jpayne@69
|
21
|
jpayne@69
|
22 #pragma once
|
jpayne@69
|
23
|
jpayne@69
|
24 #include <kj/string.h>
|
jpayne@69
|
25 #include <kj/vector.h>
|
jpayne@69
|
26 #include <inttypes.h>
|
jpayne@69
|
27
|
jpayne@69
|
28 KJ_BEGIN_HEADER
|
jpayne@69
|
29
|
jpayne@69
|
30 namespace kj {
|
jpayne@69
|
31
|
jpayne@69
|
32 struct UrlOptions {
|
jpayne@69
|
33 // A bag of options that you can pass to Url::parse()/tryParse() to customize the parser's
|
jpayne@69
|
34 // behavior.
|
jpayne@69
|
35 //
|
jpayne@69
|
36 // A copy of this options struct will be stored in the parsed Url object, at which point it
|
jpayne@69
|
37 // controls the behavior of the serializer in Url::toString().
|
jpayne@69
|
38
|
jpayne@69
|
39 bool percentDecode = true;
|
jpayne@69
|
40 // True if URL components should be automatically percent-decoded during parsing, and
|
jpayne@69
|
41 // percent-encoded during serialization.
|
jpayne@69
|
42
|
jpayne@69
|
43 bool allowEmpty = false;
|
jpayne@69
|
44 // Whether or not to allow empty path and query components when parsing; otherwise, they are
|
jpayne@69
|
45 // silently removed. In other words, setting this false causes consecutive slashes in the path or
|
jpayne@69
|
46 // consecutive ampersands in the query to be collapsed into one, whereas if true then they
|
jpayne@69
|
47 // produce empty components.
|
jpayne@69
|
48 };
|
jpayne@69
|
49
|
jpayne@69
|
50 struct Url {
|
jpayne@69
|
51 // Represents a URL (or, more accurately, a URI, but whatever).
|
jpayne@69
|
52 //
|
jpayne@69
|
53 // Can be parsed from a string and composed back into a string.
|
jpayne@69
|
54
|
jpayne@69
|
55 String scheme;
|
jpayne@69
|
56 // E.g. "http", "https".
|
jpayne@69
|
57
|
jpayne@69
|
58 struct UserInfo {
|
jpayne@69
|
59 String username;
|
jpayne@69
|
60 Maybe<String> password;
|
jpayne@69
|
61 };
|
jpayne@69
|
62
|
jpayne@69
|
63 Maybe<UserInfo> userInfo;
|
jpayne@69
|
64 // Username / password.
|
jpayne@69
|
65
|
jpayne@69
|
66 String host;
|
jpayne@69
|
67 // Hostname, including port if specified. We choose not to parse out the port because KJ's
|
jpayne@69
|
68 // network address parsing functions already accept addresses containing port numbers, and
|
jpayne@69
|
69 // because most web standards don't actually want to separate host and port.
|
jpayne@69
|
70
|
jpayne@69
|
71 Vector<String> path;
|
jpayne@69
|
72 bool hasTrailingSlash = false;
|
jpayne@69
|
73 // Path, split on '/' characters. Note that the individual components of `path` could contain
|
jpayne@69
|
74 // '/' characters if they were percent-encoded in the original URL.
|
jpayne@69
|
75 //
|
jpayne@69
|
76 // No component of the path is allowed to be "", ".", nor ".."; if such components are present,
|
jpayne@69
|
77 // toString() will throw. Note that parse() and parseRelative() automatically resolve such
|
jpayne@69
|
78 // components.
|
jpayne@69
|
79
|
jpayne@69
|
80 struct QueryParam {
|
jpayne@69
|
81 String name;
|
jpayne@69
|
82 String value;
|
jpayne@69
|
83 };
|
jpayne@69
|
84 Vector<QueryParam> query;
|
jpayne@69
|
85 // Query, e.g. from "?key=value&key2=value2". If a component of the query contains no '=' sign,
|
jpayne@69
|
86 // it will be parsed as a key with a null value, and later serialized with no '=' sign if you call
|
jpayne@69
|
87 // Url::toString().
|
jpayne@69
|
88 //
|
jpayne@69
|
89 // To distinguish between null-valued and empty-valued query parameters, we test whether
|
jpayne@69
|
90 // QueryParam::value is an allocated or unallocated string. For example:
|
jpayne@69
|
91 //
|
jpayne@69
|
92 // QueryParam { kj::str("name"), nullptr } // Null-valued; will not have an '=' sign.
|
jpayne@69
|
93 // QueryParam { kj::str("name"), kj::str("") } // Empty-valued; WILL have an '=' sign.
|
jpayne@69
|
94
|
jpayne@69
|
95 Maybe<String> fragment;
|
jpayne@69
|
96 // The stuff after the '#' character (not including the '#' character itself), if present.
|
jpayne@69
|
97
|
jpayne@69
|
98 using Options = UrlOptions;
|
jpayne@69
|
99 Options options;
|
jpayne@69
|
100
|
jpayne@69
|
101 // ---------------------------------------------------------------------------
|
jpayne@69
|
102
|
jpayne@69
|
103 Url() = default;
|
jpayne@69
|
104 Url(Url&&) = default;
|
jpayne@69
|
105 ~Url() noexcept(false);
|
jpayne@69
|
106 Url& operator=(Url&&) = default;
|
jpayne@69
|
107
|
jpayne@69
|
108 inline Url(String&& scheme, Maybe<UserInfo>&& userInfo, String&& host, Vector<String>&& path,
|
jpayne@69
|
109 bool hasTrailingSlash, Vector<QueryParam>&& query, Maybe<String>&& fragment,
|
jpayne@69
|
110 UrlOptions options)
|
jpayne@69
|
111 : scheme(kj::mv(scheme)), userInfo(kj::mv(userInfo)), host(kj::mv(host)), path(kj::mv(path)),
|
jpayne@69
|
112 hasTrailingSlash(hasTrailingSlash), query(kj::mv(query)), fragment(kj::mv(fragment)),
|
jpayne@69
|
113 options(options) {}
|
jpayne@69
|
114 // This constructor makes brace initialization work in C++11 and C++20 -- but is technically not
|
jpayne@69
|
115 // needed in C++14 nor C++17. Go figure.
|
jpayne@69
|
116
|
jpayne@69
|
117 Url clone() const;
|
jpayne@69
|
118
|
jpayne@69
|
119 enum Context {
|
jpayne@69
|
120 REMOTE_HREF,
|
jpayne@69
|
121 // A link to a remote resource. Requires an authority (hostname) section, hence this will
|
jpayne@69
|
122 // reject things like "mailto:" and "data:". This is the default context.
|
jpayne@69
|
123
|
jpayne@69
|
124 HTTP_PROXY_REQUEST,
|
jpayne@69
|
125 // The URL to place in the first line of an HTTP proxy request. This includes scheme, host,
|
jpayne@69
|
126 // path, and query, but omits userInfo (which should be used to construct the Authorization
|
jpayne@69
|
127 // header) and fragment (which should not be transmitted).
|
jpayne@69
|
128
|
jpayne@69
|
129 HTTP_REQUEST
|
jpayne@69
|
130 // The path to place in the first line of a regular HTTP request. This includes only the path
|
jpayne@69
|
131 // and query. Scheme, user, host, and fragment are omitted.
|
jpayne@69
|
132
|
jpayne@69
|
133 // TODO(someday): Add context(s) that supports things like "mailto:", "data:", "blob:". These
|
jpayne@69
|
134 // don't have an authority section.
|
jpayne@69
|
135 };
|
jpayne@69
|
136
|
jpayne@69
|
137 kj::String toString(Context context = REMOTE_HREF) const;
|
jpayne@69
|
138 // Convert the URL to a string.
|
jpayne@69
|
139
|
jpayne@69
|
140 static Url parse(StringPtr text, Context context = REMOTE_HREF, Options options = {});
|
jpayne@69
|
141 static Maybe<Url> tryParse(StringPtr text, Context context = REMOTE_HREF, Options options = {});
|
jpayne@69
|
142 // Parse an absolute URL.
|
jpayne@69
|
143
|
jpayne@69
|
144 Url parseRelative(StringPtr relative) const;
|
jpayne@69
|
145 Maybe<Url> tryParseRelative(StringPtr relative) const;
|
jpayne@69
|
146 // Parse a relative URL string with this URL as the base.
|
jpayne@69
|
147 };
|
jpayne@69
|
148
|
jpayne@69
|
149 } // namespace kj
|
jpayne@69
|
150
|
jpayne@69
|
151 KJ_END_HEADER
|