123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334 |
- import parsePath from 'parse-path';
- // https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs
- const DATA_URL_DEFAULT_MIME_TYPE = 'text/plain';
- const DATA_URL_DEFAULT_CHARSET = 'us-ascii';
- const testParameter = (name, filters) => filters.some(filter => filter instanceof RegExp ? filter.test(name) : filter === name);
- const normalizeDataURL = (urlString, {stripHash}) => {
- const match = /^data:(?<type>[^,]*?),(?<data>[^#]*?)(?:#(?<hash>.*))?$/.exec(urlString);
- if (!match) {
- throw new Error(`Invalid URL: ${urlString}`);
- }
- let {type, data, hash} = match.groups;
- const mediaType = type.split(';');
- hash = stripHash ? '' : hash;
- let isBase64 = false;
- if (mediaType[mediaType.length - 1] === 'base64') {
- mediaType.pop();
- isBase64 = true;
- }
- // Lowercase MIME type
- const mimeType = (mediaType.shift() || '').toLowerCase();
- const attributes = mediaType
- .map(attribute => {
- let [key, value = ''] = attribute.split('=').map(string => string.trim());
- // Lowercase `charset`
- if (key === 'charset') {
- value = value.toLowerCase();
- if (value === DATA_URL_DEFAULT_CHARSET) {
- return '';
- }
- }
- return `${key}${value ? `=${value}` : ''}`;
- })
- .filter(Boolean);
- const normalizedMediaType = [
- ...attributes,
- ];
- if (isBase64) {
- normalizedMediaType.push('base64');
- }
- if (normalizedMediaType.length > 0 || (mimeType && mimeType !== DATA_URL_DEFAULT_MIME_TYPE)) {
- normalizedMediaType.unshift(mimeType);
- }
- return `data:${normalizedMediaType.join(';')},${isBase64 ? data.trim() : data}${hash ? `#${hash}` : ''}`;
- };
- function normalizeUrl(urlString, options) {
- options = {
- defaultProtocol: 'http:',
- normalizeProtocol: true,
- forceHttp: false,
- forceHttps: false,
- stripAuthentication: true,
- stripHash: false,
- stripTextFragment: true,
- stripWWW: true,
- removeQueryParameters: [/^utm_\w+/i],
- removeTrailingSlash: true,
- removeSingleSlash: true,
- removeDirectoryIndex: false,
- sortQueryParameters: true,
- ...options,
- };
- urlString = urlString.trim();
- // Data URL
- if (/^data:/i.test(urlString)) {
- return normalizeDataURL(urlString, options);
- }
- if (/^view-source:/i.test(urlString)) {
- throw new Error('`view-source:` is not supported as it is a non-standard protocol');
- }
- const hasRelativeProtocol = urlString.startsWith('//');
- const isRelativeUrl = !hasRelativeProtocol && /^\.*\//.test(urlString);
- // Prepend protocol
- if (!isRelativeUrl) {
- urlString = urlString.replace(/^(?!(?:\w+:)?\/\/)|^\/\//, options.defaultProtocol);
- }
- const urlObject = new URL(urlString);
- if (options.forceHttp && options.forceHttps) {
- throw new Error('The `forceHttp` and `forceHttps` options cannot be used together');
- }
- if (options.forceHttp && urlObject.protocol === 'https:') {
- urlObject.protocol = 'http:';
- }
- if (options.forceHttps && urlObject.protocol === 'http:') {
- urlObject.protocol = 'https:';
- }
- // Remove auth
- if (options.stripAuthentication) {
- urlObject.username = '';
- urlObject.password = '';
- }
- // Remove hash
- if (options.stripHash) {
- urlObject.hash = '';
- } else if (options.stripTextFragment) {
- urlObject.hash = urlObject.hash.replace(/#?:~:text.*?$/i, '');
- }
- // Remove duplicate slashes if not preceded by a protocol
- // NOTE: This could be implemented using a single negative lookbehind
- // regex, but we avoid that to maintain compatibility with older js engines
- // which do not have support for that feature.
- if (urlObject.pathname) {
- // TODO: Replace everything below with `urlObject.pathname = urlObject.pathname.replace(/(?<!\b[a-z][a-z\d+\-.]{1,50}:)\/{2,}/g, '/');` when Safari supports negative lookbehind.
- // Split the string by occurrences of this protocol regex, and perform
- // duplicate-slash replacement on the strings between those occurrences
- // (if any).
- const protocolRegex = /\b[a-z][a-z\d+\-.]{1,50}:\/\//g;
- let lastIndex = 0;
- let result = '';
- for (;;) {
- const match = protocolRegex.exec(urlObject.pathname);
- if (!match) {
- break;
- }
- const protocol = match[0];
- const protocolAtIndex = match.index;
- const intermediate = urlObject.pathname.slice(lastIndex, protocolAtIndex);
- result += intermediate.replace(/\/{2,}/g, '/');
- result += protocol;
- lastIndex = protocolAtIndex + protocol.length;
- }
- const remnant = urlObject.pathname.slice(lastIndex, urlObject.pathname.length);
- result += remnant.replace(/\/{2,}/g, '/');
- urlObject.pathname = result;
- }
- // Decode URI octets
- if (urlObject.pathname) {
- try {
- urlObject.pathname = decodeURI(urlObject.pathname);
- } catch {}
- }
- // Remove directory index
- if (options.removeDirectoryIndex === true) {
- options.removeDirectoryIndex = [/^index\.[a-z]+$/];
- }
- if (Array.isArray(options.removeDirectoryIndex) && options.removeDirectoryIndex.length > 0) {
- let pathComponents = urlObject.pathname.split('/');
- const lastComponent = pathComponents[pathComponents.length - 1];
- if (testParameter(lastComponent, options.removeDirectoryIndex)) {
- pathComponents = pathComponents.slice(0, -1);
- urlObject.pathname = pathComponents.slice(1).join('/') + '/';
- }
- }
- if (urlObject.hostname) {
- // Remove trailing dot
- urlObject.hostname = urlObject.hostname.replace(/\.$/, '');
- // Remove `www.`
- if (options.stripWWW && /^www\.(?!www\.)[a-z\-\d]{1,63}\.[a-z.\-\d]{2,63}$/.test(urlObject.hostname)) {
- // Each label should be max 63 at length (min: 1).
- // Source: https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names
- // Each TLD should be up to 63 characters long (min: 2).
- // It is technically possible to have a single character TLD, but none currently exist.
- urlObject.hostname = urlObject.hostname.replace(/^www\./, '');
- }
- }
- // Remove query unwanted parameters
- if (Array.isArray(options.removeQueryParameters)) {
- // eslint-disable-next-line unicorn/no-useless-spread -- We are intentionally spreading to get a copy.
- for (const key of [...urlObject.searchParams.keys()]) {
- if (testParameter(key, options.removeQueryParameters)) {
- urlObject.searchParams.delete(key);
- }
- }
- }
- if (options.removeQueryParameters === true) {
- urlObject.search = '';
- }
- // Sort query parameters
- if (options.sortQueryParameters) {
- urlObject.searchParams.sort();
- // Calling `.sort()` encodes the search parameters, so we need to decode them again.
- try {
- urlObject.search = decodeURIComponent(urlObject.search);
- } catch {}
- }
- if (options.removeTrailingSlash) {
- urlObject.pathname = urlObject.pathname.replace(/\/$/, '');
- }
- const oldUrlString = urlString;
- // Take advantage of many of the Node `url` normalizations
- urlString = urlObject.toString();
- if (!options.removeSingleSlash && urlObject.pathname === '/' && !oldUrlString.endsWith('/') && urlObject.hash === '') {
- urlString = urlString.replace(/\/$/, '');
- }
- // Remove ending `/` unless removeSingleSlash is false
- if ((options.removeTrailingSlash || urlObject.pathname === '/') && urlObject.hash === '' && options.removeSingleSlash) {
- urlString = urlString.replace(/\/$/, '');
- }
- // Restore relative protocol, if applicable
- if (hasRelativeProtocol && !options.normalizeProtocol) {
- urlString = urlString.replace(/^http:\/\//, '//');
- }
- // Remove http/https
- if (options.stripProtocol) {
- urlString = urlString.replace(/^(?:https?:)?\/\//, '');
- }
- return urlString;
- }
- // Dependencies
- /**
- * parseUrl
- * Parses the input url.
- *
- * **Note**: This *throws* if invalid urls are provided.
- *
- * @name parseUrl
- * @function
- * @param {String} url The input url.
- * @param {Boolean|Object} normalize Whether to normalize the url or not.
- * Default is `false`. If `true`, the url will
- * be normalized. If an object, it will be the
- * options object sent to [`normalize-url`](https://github.com/sindresorhus/normalize-url).
- *
- * For SSH urls, normalize won't work.
- *
- * @return {Object} An object containing the following fields:
- *
- * - `protocols` (Array): An array with the url protocols (usually it has one element).
- * - `protocol` (String): The first protocol, `"ssh"` (if the url is a ssh url) or `"file"`.
- * - `port` (null|Number): The domain port.
- * - `resource` (String): The url domain (including subdomains).
- * - `user` (String): The authentication user (usually for ssh urls).
- * - `pathname` (String): The url pathname.
- * - `hash` (String): The url hash.
- * - `search` (String): The url querystring value.
- * - `href` (String): The input url.
- * - `query` (Object): The url querystring, parsed as object.
- * - `parse_failed` (Boolean): Whether the parsing failed or not.
- */
- const parseUrl = (url, normalize = false) => {
- // Constants
- const GIT_RE = /^(?:([a-z_][a-z0-9_-]{0,31})@|https?:\/\/)([\w\.\-@]+)[\/:]([\~,\.\w,\-,\_,\/]+?(?:\.git|\/)?)$/;
- const throwErr = msg => {
- const err = new Error(msg);
- err.subject_url = url;
- throw err
- };
- if (typeof url !== "string" || !url.trim()) {
- throwErr("Invalid url.");
- }
- if (url.length > parseUrl.MAX_INPUT_LENGTH) {
- throwErr("Input exceeds maximum length. If needed, change the value of parseUrl.MAX_INPUT_LENGTH.");
- }
- if (normalize) {
- if (typeof normalize !== "object") {
- normalize = {
- stripHash: false
- };
- }
- url = normalizeUrl(url, normalize);
- }
- const parsed = parsePath(url);
- // Potential git-ssh urls
- if (parsed.parse_failed) {
- const matched = parsed.href.match(GIT_RE);
- if (matched) {
- parsed.protocols = ["ssh"];
- parsed.protocol = "ssh";
- parsed.resource = matched[2];
- parsed.host = matched[2];
- parsed.user = matched[1];
- parsed.pathname = `/${matched[3]}`;
- parsed.parse_failed = false;
- } else {
- throwErr("URL parsing failed.");
- }
- }
- return parsed;
- };
- parseUrl.MAX_INPUT_LENGTH = 2048;
- export { parseUrl as default };
|