jpayne@69: // Copyright (c) 2017 Cloudflare, Inc. and contributors jpayne@69: // Licensed under the MIT License: jpayne@69: // jpayne@69: // Permission is hereby granted, free of charge, to any person obtaining a copy jpayne@69: // of this software and associated documentation files (the "Software"), to deal jpayne@69: // in the Software without restriction, including without limitation the rights jpayne@69: // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell jpayne@69: // copies of the Software, and to permit persons to whom the Software is jpayne@69: // furnished to do so, subject to the following conditions: jpayne@69: // jpayne@69: // The above copyright notice and this permission notice shall be included in jpayne@69: // all copies or substantial portions of the Software. jpayne@69: // jpayne@69: // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR jpayne@69: // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, jpayne@69: // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE jpayne@69: // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER jpayne@69: // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, jpayne@69: // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN jpayne@69: // THE SOFTWARE. jpayne@69: jpayne@69: #pragma once jpayne@69: jpayne@69: #include jpayne@69: #include jpayne@69: #include jpayne@69: jpayne@69: KJ_BEGIN_HEADER jpayne@69: jpayne@69: namespace kj { jpayne@69: jpayne@69: struct UrlOptions { jpayne@69: // A bag of options that you can pass to Url::parse()/tryParse() to customize the parser's jpayne@69: // behavior. jpayne@69: // jpayne@69: // A copy of this options struct will be stored in the parsed Url object, at which point it jpayne@69: // controls the behavior of the serializer in Url::toString(). jpayne@69: jpayne@69: bool percentDecode = true; jpayne@69: // True if URL components should be automatically percent-decoded during parsing, and jpayne@69: // percent-encoded during serialization. jpayne@69: jpayne@69: bool allowEmpty = false; jpayne@69: // Whether or not to allow empty path and query components when parsing; otherwise, they are jpayne@69: // silently removed. In other words, setting this false causes consecutive slashes in the path or jpayne@69: // consecutive ampersands in the query to be collapsed into one, whereas if true then they jpayne@69: // produce empty components. jpayne@69: }; jpayne@69: jpayne@69: struct Url { jpayne@69: // Represents a URL (or, more accurately, a URI, but whatever). jpayne@69: // jpayne@69: // Can be parsed from a string and composed back into a string. jpayne@69: jpayne@69: String scheme; jpayne@69: // E.g. "http", "https". jpayne@69: jpayne@69: struct UserInfo { jpayne@69: String username; jpayne@69: Maybe password; jpayne@69: }; jpayne@69: jpayne@69: Maybe userInfo; jpayne@69: // Username / password. jpayne@69: jpayne@69: String host; jpayne@69: // Hostname, including port if specified. We choose not to parse out the port because KJ's jpayne@69: // network address parsing functions already accept addresses containing port numbers, and jpayne@69: // because most web standards don't actually want to separate host and port. jpayne@69: jpayne@69: Vector path; jpayne@69: bool hasTrailingSlash = false; jpayne@69: // Path, split on '/' characters. Note that the individual components of `path` could contain jpayne@69: // '/' characters if they were percent-encoded in the original URL. jpayne@69: // jpayne@69: // No component of the path is allowed to be "", ".", nor ".."; if such components are present, jpayne@69: // toString() will throw. Note that parse() and parseRelative() automatically resolve such jpayne@69: // components. jpayne@69: jpayne@69: struct QueryParam { jpayne@69: String name; jpayne@69: String value; jpayne@69: }; jpayne@69: Vector query; jpayne@69: // Query, e.g. from "?key=value&key2=value2". If a component of the query contains no '=' sign, jpayne@69: // it will be parsed as a key with a null value, and later serialized with no '=' sign if you call jpayne@69: // Url::toString(). jpayne@69: // jpayne@69: // To distinguish between null-valued and empty-valued query parameters, we test whether jpayne@69: // QueryParam::value is an allocated or unallocated string. For example: jpayne@69: // jpayne@69: // QueryParam { kj::str("name"), nullptr } // Null-valued; will not have an '=' sign. jpayne@69: // QueryParam { kj::str("name"), kj::str("") } // Empty-valued; WILL have an '=' sign. jpayne@69: jpayne@69: Maybe fragment; jpayne@69: // The stuff after the '#' character (not including the '#' character itself), if present. jpayne@69: jpayne@69: using Options = UrlOptions; jpayne@69: Options options; jpayne@69: jpayne@69: // --------------------------------------------------------------------------- jpayne@69: jpayne@69: Url() = default; jpayne@69: Url(Url&&) = default; jpayne@69: ~Url() noexcept(false); jpayne@69: Url& operator=(Url&&) = default; jpayne@69: jpayne@69: inline Url(String&& scheme, Maybe&& userInfo, String&& host, Vector&& path, jpayne@69: bool hasTrailingSlash, Vector&& query, Maybe&& fragment, jpayne@69: UrlOptions options) jpayne@69: : scheme(kj::mv(scheme)), userInfo(kj::mv(userInfo)), host(kj::mv(host)), path(kj::mv(path)), jpayne@69: hasTrailingSlash(hasTrailingSlash), query(kj::mv(query)), fragment(kj::mv(fragment)), jpayne@69: options(options) {} jpayne@69: // This constructor makes brace initialization work in C++11 and C++20 -- but is technically not jpayne@69: // needed in C++14 nor C++17. Go figure. jpayne@69: jpayne@69: Url clone() const; jpayne@69: jpayne@69: enum Context { jpayne@69: REMOTE_HREF, jpayne@69: // A link to a remote resource. Requires an authority (hostname) section, hence this will jpayne@69: // reject things like "mailto:" and "data:". This is the default context. jpayne@69: jpayne@69: HTTP_PROXY_REQUEST, jpayne@69: // The URL to place in the first line of an HTTP proxy request. This includes scheme, host, jpayne@69: // path, and query, but omits userInfo (which should be used to construct the Authorization jpayne@69: // header) and fragment (which should not be transmitted). jpayne@69: jpayne@69: HTTP_REQUEST jpayne@69: // The path to place in the first line of a regular HTTP request. This includes only the path jpayne@69: // and query. Scheme, user, host, and fragment are omitted. jpayne@69: jpayne@69: // TODO(someday): Add context(s) that supports things like "mailto:", "data:", "blob:". These jpayne@69: // don't have an authority section. jpayne@69: }; jpayne@69: jpayne@69: kj::String toString(Context context = REMOTE_HREF) const; jpayne@69: // Convert the URL to a string. jpayne@69: jpayne@69: static Url parse(StringPtr text, Context context = REMOTE_HREF, Options options = {}); jpayne@69: static Maybe tryParse(StringPtr text, Context context = REMOTE_HREF, Options options = {}); jpayne@69: // Parse an absolute URL. jpayne@69: jpayne@69: Url parseRelative(StringPtr relative) const; jpayne@69: Maybe tryParseRelative(StringPtr relative) const; jpayne@69: // Parse a relative URL string with this URL as the base. jpayne@69: }; jpayne@69: jpayne@69: } // namespace kj jpayne@69: jpayne@69: KJ_END_HEADER