1 /*
2 Copyright (c) 2016-2021 Eugene Wissner
3 
4 Boost Software License - Version 1.0 - August 17th, 2003
5 
6 Permission is hereby granted, free of charge, to any person or organization
7 obtaining a copy of the software and accompanying documentation covered by
8 this license (the "Software") to use, reproduce, display, distribute,
9 execute, and transmit the Software, and to prepare derivative works of the
10 Software, and to permit third-parties to whom the Software is furnished to
11 do so, all subject to the following:
12 
13 The copyright notices in the Software and this entire statement, including
14 the above license grant, this restriction and the following disclaimer,
15 must be included in all copies of the Software, in whole or in part, and
16 all derivative works of the Software, unless such copies or derivative
17 works are solely in the form of machine-executable object code generated by
18 a source language processor.
19 
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
23 SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
24 FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
25 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 DEALINGS IN THE SOFTWARE.
27 */
28 
29 /**
30  * URL parser
31  *
32  * Copyright: Eugene Wissner 2016-2021.
33  * License: $(LINK2 boost.org/LICENSE_1_0.txt, Boost License 1.0).
34  * Authors: Eugene Wissner
35  */
36 module dlib.network.url;
37 
38 import std.ascii : isAlphaNum, isDigit;
39 import std.traits : isSomeString;
40 import std.uni : isAlpha, isNumber;
41 import std.uri;
42 
43 version (unittest) private
44 {
45     import std.typecons;
46     static Tuple!(string, string[string], ushort)[] URLTests;
47 }
48 
49 static this()
50 {
51     version (unittest)
52     {
53         URLTests = [
54             tuple(`127.0.0.1`, [
55                       "path": "127.0.0.1",
56                   ], ushort(0)),
57 
58             tuple(`http://127.0.0.1`, [
59                       "scheme": "http",
60                       "host": "127.0.0.1",
61                   ], ushort(0)),
62 
63             tuple(`http://127.0.0.1/`, [
64                       "scheme": "http",
65                       "host": "127.0.0.1",
66                       "path": "/",
67                   ], ushort(0)),
68 
69             tuple(`127.0.0.1/`, [
70                       "path": "127.0.0.1/",
71                   ], ushort(0)),
72 
73             tuple(`127.0.0.1:60000/`, [
74                       "host": "127.0.0.1",
75                       "path": "/",
76                   ], ushort(60000)),
77 
78             tuple(`example.org`, [
79                       "path": "example.org",
80                   ], ushort(0)),
81 
82             tuple(`example.org/`, [
83                       "path": "example.org/",
84                   ], ushort(0)),
85 
86             tuple(`http://example.org`, [
87                       "scheme": "http",
88                       "host": "example.org",
89                   ], ushort(0)),
90 
91             tuple(`http://example.org/`, [
92                       "scheme": "http",
93                       "host": "example.org",
94                       "path": "/",
95                   ], ushort(0)),
96 
97             tuple(`www.example.org`, [
98                       "path": "www.example.org",
99                   ], ushort(0)),
100 
101             tuple(`www.example.org/`, [
102                       "path": "www.example.org/",
103                   ], ushort(0)),
104 
105             tuple(`http://www.example.org`, [
106                       "scheme": "http",
107                       "host": "www.example.org",
108                   ], ushort(0)),
109 
110             tuple(`http://www.example.org/`, [
111                       "scheme": "http",
112                       "host": "www.example.org",
113                       "path": "/",
114                   ], ushort(0)),
115 
116             tuple(`www.example.org:2`, [
117                       "host": "www.example.org",
118                   ], ushort(2)),
119 
120             tuple(`http://www.example.org:80`, [
121                       "scheme": "http",
122                       "host": "www.example.org",
123                   ], ushort(80)),
124 
125             tuple(`http://www.example.org:80/`, [
126                       "scheme": "http",
127                       "host": "www.example.org",
128                       "path": "/",
129                   ], ushort(80)),
130 
131             tuple(`http://www.example.org/index.html`, [
132                       "scheme": "http",
133                       "host": "www.example.org",
134                       "path": "/index.html",
135                   ], ushort(0)),
136 
137             tuple(`www.example.org/?`, [
138                       "path": "www.example.org/",
139                     "query": "",
140                   ], ushort(0)),
141 
142             tuple(`www.example.org:80/?`, [
143                       "host": "www.example.org",
144                       "path": "/",
145                     "query": "",
146                   ], ushort(80)),
147 
148             tuple(`http://www.example.org/?`, [
149                       "scheme": "http",
150                       "host": "www.example.org",
151                       "path": "/",
152                     "query": "",
153                   ], ushort(0)),
154 
155             tuple(`http://www.example.org:80/?`, [
156                       "scheme": "http",
157                       "host": "www.example.org",
158                       "path": "/",
159                     "query": "",
160                   ], ushort(80)),
161 
162             tuple(`http://www.example.org:80/index.html`, [
163                       "scheme": "http",
164                       "host": "www.example.org",
165                       "path": "/index.html",
166                   ], ushort(80)),
167 
168             tuple(`http://www.example.org:80/foo/bar/index.html`, [
169                       "scheme": "http",
170                       "host": "www.example.org",
171                       "path": "/foo/bar/index.html",
172                   ], ushort(80)),
173 
174             tuple(`http://www.example.org:80/this/is/a/very/deep/directory/structure/and/file.png`, [
175                       "scheme": "http",
176                       "host": "www.example.org",
177                       "path": "/this/is/a/very/deep/directory/structure/and/file.png",
178                   ], ushort(80)),
179 
180             tuple(`http://www.example.org:80/deep/directory/structure/and/file.png?lots=1&of=2&parameters=3&too=4`, [
181                       "scheme": "http",
182                       "host": "www.example.org",
183                       "path": "/deep/directory/structure/and/file.png",
184                       "query": "lots=1&of=2&parameters=3&too=4",
185                   ], ushort(80)),
186 
187             tuple(`http://www.example.org:80/this/is/a/very/deep/directory/structure/and/`, [
188                       "scheme": "http",
189                       "host": "www.example.org",
190                       "path": "/this/is/a/very/deep/directory/structure/and/",
191                   ], ushort(80)),
192 
193             tuple(`http://www.example.org:80/this/is/a/very/deep/directory/structure/and/file.php`, [
194                       "scheme": "http",
195                       "host": "www.example.org",
196                       "path": "/this/is/a/very/deep/directory/structure/and/file.php",
197                   ], ushort(80)),
198 
199             tuple(`http://www.example.org:80/this/../a/../deep/directory`, [
200                       "scheme": "http",
201                       "host": "www.example.org",
202                       "path": "/this/../a/../deep/directory",
203                   ], ushort(80)),
204 
205             tuple(`http://www.example.org:80/this/../a/../deep/directory/`, [
206                       "scheme": "http",
207                       "host": "www.example.org",
208                       "path": "/this/../a/../deep/directory/",
209                   ], ushort(80)),
210 
211             tuple(`http://www.example.org:80/this/is/a/very/deep/directory/../image.png`, [
212                       "scheme": "http",
213                       "host": "www.example.org",
214                       "path": "/this/is/a/very/deep/directory/../image.png",
215                   ], ushort(80)),
216 
217             tuple(`http://www.example.org:80/index.html`, [
218                       "scheme": "http",
219                       "host": "www.example.org",
220                       "path": "/index.html",
221                   ], ushort(80)),
222 
223             tuple(`http://www.example.org:80/index.html?`, [
224                       "scheme": "http",
225                       "host": "www.example.org",
226                       "path": "/index.html",
227                     "query": "",
228                   ], ushort(80)),
229 
230             tuple(`http://www.example.org:80/#foo`, [
231                       "scheme": "http",
232                       "host": "www.example.org",
233                       "path": "/",
234                       "fragment": "foo",
235                   ], ushort(80)),
236 
237             tuple(`http://www.example.org:80/?#`, [
238                       "scheme": "http",
239                       "host": "www.example.org",
240                       "path": "/",
241                     "query": "",
242                     "fragment": "",
243                   ], ushort(80)),
244 
245             tuple(`http://www.example.org:80/?test=1`, [
246                       "scheme": "http",
247                       "host": "www.example.org",
248                       "path": "/",
249                       "query": "test=1",
250                   ], ushort(80)),
251 
252             tuple(`http://www.example.org/?test=1&`, [
253                       "scheme": "http",
254                       "host": "www.example.org",
255                       "path": "/",
256                       "query": "test=1&",
257                   ], ushort(0)),
258 
259             tuple(`http://www.example.org:80/?&`, [
260                       "scheme": "http",
261                       "host": "www.example.org",
262                       "path": "/",
263                       "query": "&",
264                   ], ushort(80)),
265 
266             tuple(`http://www.example.org:80/index.html?test=1&`, [
267                       "scheme": "http",
268                       "host": "www.example.org",
269                       "path": "/index.html",
270                       "query": "test=1&",
271                   ], ushort(80)),
272 
273             tuple(`http://www.example.org/index.html?&`, [
274                       "scheme": "http",
275                       "host": "www.example.org",
276                       "path": "/index.html",
277                       "query": "&",
278                   ], ushort(0)),
279 
280             tuple(`http://www.example.org:80/index.html?foo&`, [
281                       "scheme": "http",
282                       "host": "www.example.org",
283                       "path": "/index.html",
284                       "query": "foo&",
285                   ], ushort(80)),
286 
287             tuple(`http://www.example.org/index.html?&foo`, [
288                       "scheme": "http",
289                       "host": "www.example.org",
290                       "path": "/index.html",
291                       "query": "&foo",
292                   ], ushort(0)),
293 
294             tuple(`http://www.example.org:80/index.html?test=1&test2=char`, [
295                       "scheme": "http",
296                       "host": "www.example.org",
297                       "path": "/index.html",
298                       "query": "test=1&test2=char",
299                   ], ushort(80)),
300 
301             tuple(`www.example.org:80/index.html?test=1&test2=char#some_ref123`, [
302                       "host": "www.example.org",
303                       "path": "/index.html",
304                       "query": "test=1&test2=char",
305                       "fragment": "some_ref123",
306                   ], ushort(80)),
307 
308             tuple(`http://secret@www.example.org:80/index.html?test=1&test2=char#some_ref123`, [
309                       "scheme": "http",
310                       "host": "www.example.org",
311                       "user": "secret",
312                       "path": "/index.html",
313                       "query": "test=1&test2=char",
314                       "fragment": "some_ref123",
315                   ], ushort(80)),
316 
317             tuple(`http://secret:@www.example.org/index.html?test=1&test2=char#some_ref123`, [
318                       "scheme": "http",
319                       "host": "www.example.org",
320                       "user": "secret",
321                       "pass": "",
322                       "path": "/index.html",
323                       "query": "test=1&test2=char",
324                       "fragment": "some_ref123",
325                   ], ushort(0)),
326 
327             tuple(`http://:hideout@www.example.org:80/index.html?test=1&test2=char#some_ref123`, [
328                       "scheme": "http",
329                       "host": "www.example.org",
330                       "user": "",
331                       "pass": "hideout",
332                       "path": "/index.html",
333                       "query": "test=1&test2=char",
334                       "fragment": "some_ref123",
335                   ], ushort(80)),
336 
337             tuple(`http://secret:hideout@www.example.org/index.html?test=1&test2=char#some_ref123`, [
338                       "scheme": "http",
339                       "host": "www.example.org",
340                       "user": "secret",
341                       "pass": "hideout",
342                       "path": "/index.html",
343                       "query": "test=1&test2=char",
344                       "fragment": "some_ref123",
345                   ], ushort(0)),
346 
347             tuple(`http://secret:hid:out@www.example.org:80/index.html?test=1&test2=int#some_ref123`, [
348                       "scheme": "http",
349                       "host": "www.example.org",
350                       "user": "secret",
351                       "pass": "hid:out",
352                       "path": "/index.html",
353                       "query": "test=1&test2=int",
354                       "fragment": "some_ref123",
355                   ], ushort(80)),
356 
357             tuple(`nntp://news.example.org`, [
358                       "scheme": "nntp",
359                       "host": "news.example.org",
360                   ], ushort(0)),
361 
362             tuple(`ftp://ftp.gnu.org/gnu/glic/glibc.tar.gz`, [
363                       "scheme": "ftp",
364                       "host": "ftp.gnu.org",
365                       "path": "/gnu/glic/glibc.tar.gz",
366                   ], ushort(0)),
367 
368             tuple(`zlib:http://foo@bar`, [
369                       "scheme": "zlib",
370                       "path": "http://foo@bar",
371                   ], ushort(0)),
372 
373             tuple(`zlib:filename.txt`, [
374                       "scheme": "zlib",
375                       "path": "filename.txt",
376                   ], ushort(0)),
377 
378             tuple(`zlib:/path/to/my/file/file.txt`, [
379                       "scheme": "zlib",
380                       "path": "/path/to/my/file/file.txt",
381                   ], ushort(0)),
382 
383             tuple(`foo://foo@bar`, [
384                       "scheme": "foo",
385                       "host": "bar",
386                       "user": "foo",
387                   ], ushort(0)),
388 
389             tuple(`mailto:me@mydomain.com`, [
390                       "scheme": "mailto",
391                       "path": "me@mydomain.com",
392                   ], ushort(0)),
393 
394             tuple(`/foo.php?a=b&c=d`, [
395                       "path": "/foo.php",
396                       "query": "a=b&c=d",
397                   ], ushort(0)),
398 
399             tuple(`foo.php?a=b&c=d`, [
400                       "path": "foo.php",
401                       "query": "a=b&c=d",
402                   ], ushort(0)),
403 
404             tuple(`http://user:passwd@www.example.com:8080?bar=1&boom=0`, [
405                       "scheme": "http",
406                       "host": "www.example.com",
407                       "user": "user",
408                       "pass": "passwd",
409                       "query": "bar=1&boom=0",
410                   ], ushort(8080)),
411 
412             tuple(`file:///path/to/file`, [
413                       "scheme": "file",
414                       "path": "/path/to/file",
415                   ], ushort(0)),
416 
417             tuple(`file://path/to/file`, [
418                       "scheme": "file",
419                       "host": "path",
420                       "path": "/to/file",
421                   ], ushort(0)),
422 
423             tuple(`file:/path/to/file`, [
424                       "scheme": "file",
425                       "path": "/path/to/file",
426                   ], ushort(0)),
427 
428             tuple(`http://1.2.3.4:/abc.asp?a=1&b=2`, [
429                       "scheme": "http",
430                       "host": "1.2.3.4",
431                       "path": "/abc.asp",
432                       "query": "a=1&b=2",
433                   ], ushort(0)),
434 
435             tuple(`http://foo.com#bar`, [
436                       "scheme": "http",
437                       "host": "foo.com",
438                       "fragment": "bar",
439                   ], ushort(0)),
440 
441             tuple(`scheme:`, [
442                       "scheme": "scheme",
443                   ], ushort(0)),
444 
445             tuple(`foo+bar://baz@bang/bla`, [
446                       "scheme": "foo+bar",
447                       "host": "bang",
448                       "user": "baz",
449                       "path": "/bla",
450                   ], ushort(0)),
451 
452             tuple(`gg:9130731`, [
453                       "scheme": "gg",
454                       "path": "9130731",
455                   ], ushort(0)),
456 
457             tuple(`http://10.10.10.10/:80`, [
458                       "scheme": "http",
459                       "host": "10.10.10.10",
460                       "path": "/:80",
461                   ], ushort(0)),
462 
463             tuple(`http://x:?`, [
464                       "scheme": "http",
465                       "host": "x",
466                     "query": "",
467                   ], ushort(0)),
468 
469             tuple(`x:blah.com`, [
470                       "scheme": "x",
471                       "path": "blah.com",
472                   ], ushort(0)),
473 
474             tuple(`x:/blah.com`, [
475                       "scheme": "x",
476                       "path": "/blah.com",
477                   ], ushort(0)),
478 
479             tuple(`http://::?`, [
480                       "scheme": "http",
481                       "host": ":",
482                     "query": "",
483                   ], ushort(0)),
484 
485             tuple(`http://::#`, [
486                       "scheme": "http",
487                       "host": ":",
488                     "fragment": "",
489                   ], ushort(0)),
490 
491             tuple(`http://?:/`, [
492                       "scheme": "http",
493                       "host": "?",
494                       "path": "/",
495                   ], ushort(0)),
496 
497             tuple(`http://@?:/`, [
498                       "scheme": "http",
499                       "host": "?",
500                       "user": "",
501                       "path": "/",
502                   ], ushort(0)),
503 
504             tuple(`file:///:`, [
505                       "scheme": "file",
506                       "path": "/:",
507                   ], ushort(0)),
508 
509             tuple(`file:///a:/`, [
510                       "scheme": "file",
511                       "path": "a:/",
512                   ], ushort(0)),
513 
514             tuple(`file:///ab:/`, [
515                       "scheme": "file",
516                       "path": "/ab:/",
517                   ], ushort(0)),
518 
519             tuple(`file:///a:/`, [
520                       "scheme": "file",
521                       "path": "a:/",
522                   ], ushort(0)),
523 
524             tuple(`file:///@:/`, [
525                       "scheme": "file",
526                       "path": "@:/",
527                   ], ushort(0)),
528 
529             tuple(`file:///:80/`, [
530                       "scheme": "file",
531                       "path": "/:80/",
532                   ], ushort(0)),
533 
534             tuple(`[]`, [
535                       "path": "[]",
536                   ], ushort(0)),
537 
538             tuple(`http://[x:80]/`, [
539                       "scheme": "http",
540                       "host": "[x:80]",
541                       "path": "/",
542                   ], ushort(0)),
543 
544             tuple(``, [
545                       "path": "",
546                   ], ushort(0)),
547 
548             tuple(`/`, [
549                       "path": "/",
550                   ], ushort(0)),
551 
552             tuple(`/rest/Users?filter={"id":"789"}`, [
553                       "path": "/rest/Users",
554                       "query": `filter={"id":"789"}`,
555                   ], ushort(0)),
556 
557             tuple(`//example.org`, [
558                       "host": "example.org",
559                   ], ushort(0)),
560 
561             tuple(`/standard/?fq=B:20001`, [
562                       "path": "/standard/",
563                       "query": "fq=B:20001",
564                   ], ushort(0)),
565 
566             tuple(`/standard/?fq=B:200013`, [
567                       "path": "/standard/",
568                       "query": "fq=B:200013",
569                   ], ushort(0)),
570 
571             tuple(`/standard/?fq=home:012345`, [
572                       "path": "/standard/",
573                       "query": "fq=home:012345",
574                   ], ushort(0)),
575 
576             tuple(`/standard/?fq=home:01234`, [
577                       "path": "/standard/",
578                       "query": "fq=home:01234",
579                   ], ushort(0)),
580 
581             tuple(`http://user:pass@host`, [
582                     "scheme": "http",
583                     "host": "host",
584                       "user": "user",
585                       "pass": "pass",
586                   ], ushort(0)),
587 
588             tuple(`//user:pass@host`, [
589                     "host": "host",
590                       "user": "user",
591                       "pass": "pass",
592                   ], ushort(0)),
593 
594             tuple(`//user@host`, [
595                     "host": "host",
596                       "user": "user",
597                   ], ushort(0)),
598 
599             tuple(`//example.org:99/hey?a=b#c=d`, [
600                     "host": "example.org",
601                       "path": "/hey",
602                       "query": "a=b",
603                       "fragment": "c=d",
604                   ], ushort(99)),
605 
606             tuple(`//example.org/hey?a=b#c=d`, [
607                     "host": "example.org",
608                       "path": "/hey",
609                       "query": "a=b",
610                       "fragment": "c=d",
611                   ], ushort(0)),
612 
613             tuple(`http://example.org/some/path.cgi?t=1#fragment?data`, [
614                     "scheme": "http",
615                     "host": "example.org",
616                       "path": "/some/path.cgi",
617                       "query": "t=1",
618                       "fragment": "fragment?data",
619                   ], ushort(0)),
620 
621             tuple(`http://example.org/some/path.cgi#fragment?data`, [
622                     "scheme": "http",
623                     "host": "example.org",
624                       "path": "/some/path.cgi",
625                       "fragment": "fragment?data",
626                   ], ushort(0)),
627 
628             tuple(`x://::abc/?`, string[string].init, ushort(0)),
629             tuple(`http:///blah.com`, string[string].init, ushort(0)),
630             tuple(`http://:80`, string[string].init, ushort(0)),
631             tuple(`http://user@:80`, string[string].init, ushort(0)),
632             tuple(`http://user:pass@:80`, string[string].init, ushort(0)),
633             tuple(`http://:`, string[string].init, ushort(0)),
634             tuple(`http://@/`, string[string].init, ushort(0)),
635             tuple(`http://@:/`, string[string].init, ushort(0)),
636             tuple(`http://:/`, string[string].init, ushort(0)),
637             tuple(`http://?`, string[string].init, ushort(0)),
638             tuple(`http://#`, string[string].init, ushort(0)),
639             tuple(`http://:?`, string[string].init, ushort(0)),
640             tuple(`http://blah.com:123456`, string[string].init, ushort(0)),
641             tuple(`http://blah.com:70000`, string[string].init, ushort(0)),
642             tuple(`http://blah.com:abcdef`, string[string].init, ushort(0)),
643             tuple(`http://secret@hideout@www.example.org:80/index.html?test=1&test2=char#some_ref123`,
644                   string[string].init,
645                   ushort(0)),
646             tuple(`http://user:@pass@host/path?argument?value#etc`, string[string].init, ushort(0)),
647             tuple(`http://foo.com\@bar.com`, string[string].init, ushort(0)),
648             tuple(`http://email@address.com:pass@example.org`, string[string].init, ushort(0)),
649             tuple(`:`, string[string].init, ushort(0)),
650         ];
651     }
652 }
653 
654 /**
655  * A Unique Resource Locator.
656  *
657  * Params:
658  *     U = URL string type.
659  */
660 struct URL(U = string)
661     if (isSomeString!U)
662 {
663     /** The URL scheme. */
664     U scheme;
665 
666     /** The username. */
667     U user;
668 
669     /** The password. */
670     U pass;
671 
672     /** The hostname. */
673     U host;
674 
675     /** The port number. */
676     ushort port;
677 
678     /** The path. */
679     U path;
680 
681     /** The query string. */
682     U query;
683 
684     /** The anchor. */
685     U fragment;
686 
687     /**
688      * Attempts to parse an URL from a string.
689      * Output string data (scheme, user, etc.) are just slices of input string (e.g., no memory allocation and copying).
690      *
691      * Params:
692      *  source = The string containing the URL.
693      *
694      * Throws: $(D_PSYMBOL URIException) if the URL is malformed.
695      */
696     this(U source)
697     {
698         auto value = source;
699         ptrdiff_t pos = -1, endPos = value.length, start;
700 
701         foreach (i, ref c; source)
702         {
703             if (pos == -1 && c == ':')
704             {
705                 pos = i;
706             }
707             if (endPos == value.length && (c == '?' || c == '#'))
708             {
709                 endPos = i;
710             }
711         }
712 
713         // Check if the colon is a part of the scheme or the port and parse
714         // the appropriate part
715         if (value.length > 1 && value[0] == '/' && value[1] == '/')
716         {
717             // Relative scheme
718             start = 2;
719         }
720         else if (pos > 0)
721         {
722             // Validate scheme
723             // [ toLower(alpha) | digit | "+" | "-" | "." ]
724             foreach (ref c; value[0..pos])
725             {
726                 if (!c.isAlphaNum && c != '+' && c != '-' && c != '.')
727                 {
728                     if (endPos > pos)
729                     {
730                         if (!parsePort(value[pos..$]))
731                         {
732                             throw new URIException("Failed to parse port");
733                         }
734                     }
735                     goto ParsePath;
736                 }
737             }
738 
739             if (value.length == pos + 1) // only scheme is available
740             {
741                 scheme = value[0 .. $ - 1];
742                 return;
743             }
744             else if (value.length > pos + 1 && value[pos + 1] == '/')
745             {
746                 scheme = value[0..pos];
747 
748                 if (value.length > pos + 2 && value[pos + 2] == '/')
749                 {
750                     start = pos + 3;
751                     if (scheme == "file" && value.length > start && value[start] == '/')
752                     {
753                         // Windows drive letters
754                         if (value.length - start > 2 && value[start + 2] == ':')
755                         {
756                             ++start;
757                         }
758                         goto ParsePath;
759                     }
760                 }
761                 else
762                 {
763                     start = pos + 1;
764                     goto ParsePath;
765                 }
766             }
767             else // certain schemas like mailto: and zlib: may not have any / after them
768             {
769 
770                 if (!parsePort(value[pos..$]))
771                 {
772                     scheme = value[0..pos];
773                     start = pos + 1;
774                     goto ParsePath;
775                 }
776             }
777         }
778         else if (pos == 0 && parsePort(value[pos..$]))
779         {
780             // An URL shouldn't begin with a port number
781             throw new URIException("URL begins with port");
782         }
783         else
784         {
785             goto ParsePath;
786         }
787 
788         // Parse host
789         pos = -1;
790         for (ptrdiff_t i = start; i < value.length; ++i)
791         {
792             if (value[i] == '@')
793             {
794                 pos = i;
795             }
796             else if (value[i] == '/')
797             {
798                 endPos = i;
799                 break;
800             }
801         }
802 
803         // Check for login and password
804         if (pos != -1)
805         {
806             // *( unreserved / pct-encoded / sub-delims / ":" )
807             foreach (i, c; value[start..pos])
808             {
809                 if (c == ':')
810                 {
811                     if (user is null)
812                     {
813                         user = value[start .. start + i];
814                         pass = value[start + i + 1 .. pos];
815                     }
816                 }
817                 else if (!c.isAlpha &&
818                          !c.isNumber &&
819                          c != '!' &&
820                          c != ';' &&
821                          c != '=' &&
822                          c != '_' &&
823                          c != '~' &&
824                          !(c >= '$' && c <= '.'))
825                 {
826                     if (scheme !is null)
827                     {
828                         scheme = null;
829                     }
830                     if (user !is null)
831                     {
832                         user = null;
833                     }
834                     if (pass !is null)
835                     {
836                         pass = null;
837                     }
838                     throw new URIException("Restricted characters in user information");
839                 }
840             }
841             if (user is null)
842             {
843                 user = value[start..pos];
844             }
845 
846             start = ++pos;
847         }
848 
849         pos = endPos;
850         if (endPos <= 1 || value[start] != '[' || value[endPos - 1] != ']')
851         {
852             // Short circuit portscan
853             // IPv6 embedded address
854             for (ptrdiff_t i = endPos - 1; i >= start; --i)
855             {
856                 if (value[i] == ':')
857                 {
858                     pos = i;
859                     if  (port == 0 && !parsePort(value[i..endPos]))
860                     {
861                         if (scheme !is null)
862                         {
863                             scheme = null;
864                         }
865                         if (user !is null)
866                         {
867                             user = null;
868                         }
869                         if (pass !is null)
870                         {
871                             pass = null;
872                         }
873                         throw new URIException("Invalid port");
874                     }
875                     break;
876                 }
877             }
878         }
879 
880         // Check if we have a valid host, if we don't reject the string as url
881         if (pos <= start)
882         {
883             if (scheme !is null)
884             {
885                 scheme = null;
886             }
887             if (user !is null)
888             {
889                 user = null;
890             }
891             if (pass !is null)
892             {
893                 pass = null;
894             }
895             throw new URIException("Invalid host");
896         }
897 
898         host = value[start..pos];
899 
900         if (endPos == value.length)
901         {
902             return;
903         }
904 
905         start = endPos;
906 
907     ParsePath:
908         endPos = value.length;
909         pos = -1;
910         foreach (i, ref c; value[start..$])
911         {
912             if (c == '?' && pos == -1)
913             {
914                 pos = start + i;
915             }
916             else if (c == '#')
917             {
918                 endPos = start + i;
919                 break;
920             }
921         }
922         if (pos == -1)
923         {
924             pos = endPos;
925         }
926 
927         if (pos > start)
928         {
929             path = value[start..pos];
930         }
931         if (endPos >= ++pos)
932         {
933             query = value[pos..endPos];
934         }
935         if (++endPos <= value.length)
936         {
937             fragment = value[endPos..$];
938         }
939     }
940 
941     ~this()
942     {
943         if (scheme !is null)
944         {
945             scheme = null;
946         }
947         if (user !is null)
948         {
949             user = null;
950         }
951         if (pass !is null)
952         {
953             pass = null;
954         }
955         if (host !is null)
956         {
957             host = null;
958         }
959         if (path !is null)
960         {
961             path = null;
962         }
963         if (query !is null)
964         {
965             query = null;
966         }
967         if (fragment !is null)
968         {
969             fragment = null;
970         }
971     }
972 
973     /**
974      * Attempts to parse and set the port.
975      *
976      * Params:
977      *     port = String beginning with a colon followed by the port number and
978      *            an optional path (query string and/or fragment), like:
979      *            `:12345/some_path` or `:12345`.
980      *
981      * Returns: Whether the port could found.
982      */
983     private bool parsePort(U port) pure nothrow @safe @nogc
984     {
985         ptrdiff_t i = 1;
986         float lPort = 0;
987 
988         for (; i < port.length && port[i].isDigit() && i <= 6; ++i)
989         {
990             lPort += (port[i] - '0') / cast(float)(10 ^^ (i - 1));
991         }
992         if (i == 1 && (i == port.length || port[i] == '/'))
993         {
994             return true;
995         }
996         else if (i == port.length || port[i] == '/')
997         {
998             lPort *= 10 ^^ (i - 2);
999             if (lPort > ushort.max)
1000             {
1001                 return false;
1002             }
1003             this.port = cast(ushort)lPort;
1004             return true;
1005         }
1006         return false;
1007     }
1008 }
1009 
1010 unittest
1011 {
1012     auto u = URL!()("example.org");
1013     assert(u.path == "example.org");
1014 
1015     u = URL!()("relative/path");
1016     assert(u.path == "relative/path");
1017 
1018     // Host and scheme
1019     u = URL!()("https://example.org");
1020     assert(u.scheme == "https");
1021     assert(u.host == "example.org");
1022     assert(u.path is null);
1023     assert(u.port == 0);
1024     assert(u.fragment is null);
1025 
1026     // With user and port and path
1027     u = URL!()("https://hilary:putnam@example.org:443/foo/bar");
1028     assert(u.scheme == "https");
1029     assert(u.host == "example.org");
1030     assert(u.path == "/foo/bar");
1031     assert(u.port == 443);
1032     assert(u.user == "hilary");
1033     assert(u.pass == "putnam");
1034     assert(u.fragment is null);
1035 
1036     // With query string
1037     u = URL!()("https://example.org/?login=true");
1038     assert(u.scheme == "https");
1039     assert(u.host == "example.org");
1040     assert(u.path == "/");
1041     assert(u.query == "login=true");
1042     assert(u.fragment is null);
1043 
1044     // With query string and fragment
1045     u = URL!()("https://example.org/?login=false#label");
1046     assert(u.scheme == "https");
1047     assert(u.host == "example.org");
1048     assert(u.path == "/");
1049     assert(u.query == "login=false");
1050     assert(u.fragment == "label");
1051 
1052     u = URL!()("redis://root:password@localhost:2201/path?query=value#fragment");
1053     assert(u.scheme == "redis");
1054     assert(u.user == "root");
1055     assert(u.pass == "password");
1056     assert(u.host == "localhost");
1057     assert(u.port == 2201);
1058     assert(u.path == "/path");
1059     assert(u.query == "query=value");
1060     assert(u.fragment == "fragment");
1061 }
1062 
1063 private unittest
1064 {
1065     foreach(t; URLTests)
1066     {
1067         if (t[1].length == 0 && t[2] == 0)
1068         {
1069             try
1070             {
1071                 URL!()(t[0]);
1072                 assert(0);
1073             }
1074             catch (URIException e)
1075             {
1076                 assert(1);
1077             }
1078         }
1079         else
1080         {
1081             auto u = URL!()(t[0]);
1082             assert("scheme" in t[1] ? u.scheme == t[1]["scheme"] : u.scheme is null,
1083                    t[0]);
1084             assert("user" in t[1] ? u.user == t[1]["user"] : u.user is null, t[0]);
1085             assert("pass" in t[1] ? u.pass == t[1]["pass"] : u.pass is null, t[0]);
1086             assert("host" in t[1] ? u.host == t[1]["host"] : u.host is null, t[0]);
1087             assert(u.port == t[2], t[0]);
1088             assert("path" in t[1] ? u.path == t[1]["path"] : u.path is null, t[0]);
1089             assert("query" in t[1] ? u.query == t[1]["query"] : u.query is null, t[0]);
1090             if ("fragment" in t[1])
1091             {
1092                 assert(u.fragment == t[1]["fragment"], t[0]);
1093             }
1094             else
1095             {
1096                 assert(u.fragment is null, t[0]);
1097             }
1098         }
1099     }
1100 }
1101 
1102 /**
1103  * Contains possible URL components that can be returned from
1104  * $(D_PSYMBOL parseURL).
1105  */
1106 enum Component : string
1107 {
1108     scheme = "scheme",
1109     host = "host",
1110     port = "port",
1111     user = "user",
1112     pass = "pass",
1113     path = "path",
1114     query = "query",
1115     fragment = "fragment",
1116 }
1117 
1118 /**
1119  * Attempts to parse an URL from a string.
1120  *
1121  * Params:
1122  *     T      = $(D_SYMBOL Component) member or $(D_KEYWORD null) for a
1123  *              struct with all components.
1124  *     U      = URL string type.
1125  *     source = The string containing the URL.
1126  *
1127  * Returns: Requested URL component(s).
1128  */
1129 U parseURL(string T, U)(in U source)
1130     if ((T == "scheme"
1131       || T =="host"
1132       || T == "user"
1133       || T == "pass"
1134       || T == "path"
1135       || T == "query"
1136       || T == "fragment") && isSomeString!U)
1137 {
1138     auto ret = URL!U(source);
1139     return mixin("ret." ~ T);
1140 }
1141 
1142 /** ditto */
1143 auto parseURL(U)(in U source)
1144     if (isSomeString!U)
1145 {
1146     return URL!U(source);
1147 }
1148 
1149 /** ditto */
1150 ushort parseURL(string T, U)(in U source)
1151     if (T == "port" && isSomeString!U)
1152 {
1153     auto ret = URL!U(source);
1154     return ret.port;
1155 }
1156 
1157 unittest
1158 {
1159     assert(parseURL!(Component.port)("http://example.org:5326") == 5326);
1160 
1161     immutable dstring url = "http://example.org:5326";
1162     static assert(is(typeof(parseURL(url)) == URL!dstring));
1163 }
1164 
1165 private unittest
1166 {
1167     foreach(t; URLTests)
1168     {
1169         if (t[1].length == 0 && t[2] == 0)
1170         {
1171             try
1172             {
1173                 parseURL!(Component.port)(t[0]);
1174                 parseURL!(Component.user)(t[0]);
1175                 parseURL!(Component.pass)(t[0]);
1176                 parseURL!(Component.host)(t[0]);
1177                 parseURL!(Component.path)(t[0]);
1178                 parseURL!(Component.query)(t[0]);
1179                 parseURL!(Component.fragment)(t[0]);
1180                 assert(0);
1181             }
1182             catch (URIException e)
1183             {
1184                 assert(1);
1185             }
1186         }
1187         else
1188         {
1189             ushort port = parseURL!(Component.port)(t[0]);
1190             string component = parseURL!(Component.scheme)(t[0]);
1191             assert("scheme" in t[1] ? component == t[1]["scheme"] : component is null,
1192                    t[0]);
1193             component = parseURL!(Component.user)(t[0]);
1194             assert("user" in t[1] ? component == t[1]["user"] : component is null,
1195                    t[0]);
1196             component = parseURL!(Component.pass)(t[0]);
1197             assert("pass" in t[1] ? component == t[1]["pass"] : component is null,
1198                    t[0]);
1199             component = parseURL!(Component.host)(t[0]);
1200             assert("host" in t[1] ? component == t[1]["host"] : component is null,
1201                    t[0]);
1202             assert(port == t[2], t[0]);
1203             component = parseURL!(Component.path)(t[0]);
1204             assert("path" in t[1] ? component == t[1]["path"] : component is null,
1205                    t[0]);
1206             component = parseURL!(Component.query)(t[0]);
1207             assert("query" in t[1] ? component == t[1]["query"] : component is null,
1208                    t[0]);
1209             component = parseURL!(Component.fragment)(t[0]);
1210             if ("fragment" in t[1])
1211             {
1212                 assert(component == t[1]["fragment"], t[0]);
1213             }
1214             else
1215             {
1216                 assert(component is null, t[0]);
1217             }
1218         }
1219     }
1220 }