1 /* 2 Copyright (c) 2016-2021 Eugene Wissner 3 4 Boost Software License - Version 1.0 - August 17th, 2003 5 6 Permission is hereby granted, free of charge, to any person or organization 7 obtaining a copy of the software and accompanying documentation covered by 8 this license (the "Software") to use, reproduce, display, distribute, 9 execute, and transmit the Software, and to prepare derivative works of the 10 Software, and to permit third-parties to whom the Software is furnished to 11 do so, all subject to the following: 12 13 The copyright notices in the Software and this entire statement, including 14 the above license grant, this restriction and the following disclaimer, 15 must be included in all copies of the Software, in whole or in part, and 16 all derivative works of the Software, unless such copies or derivative 17 works are solely in the form of machine-executable object code generated by 18 a source language processor. 19 20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 23 SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 24 FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 25 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 26 DEALINGS IN THE SOFTWARE. 27 */ 28 29 /** 30 * URL parser 31 * 32 * Copyright: Eugene Wissner 2016-2021. 33 * License: $(LINK2 boost.org/LICENSE_1_0.txt, Boost License 1.0). 34 * Authors: Eugene Wissner 35 */ 36 module dlib.network.url; 37 38 import std.ascii : isAlphaNum, isDigit; 39 import std.traits : isSomeString; 40 import std.uni : isAlpha, isNumber; 41 import std.uri; 42 43 version (unittest) private 44 { 45 import std.typecons; 46 static Tuple!(string, string[string], ushort)[] URLTests; 47 } 48 49 static this() 50 { 51 version (unittest) 52 { 53 URLTests = [ 54 tuple(`127.0.0.1`, [ 55 "path": "127.0.0.1", 56 ], ushort(0)), 57 58 tuple(`http://127.0.0.1`, [ 59 "scheme": "http", 60 "host": "127.0.0.1", 61 ], ushort(0)), 62 63 tuple(`http://127.0.0.1/`, [ 64 "scheme": "http", 65 "host": "127.0.0.1", 66 "path": "/", 67 ], ushort(0)), 68 69 tuple(`127.0.0.1/`, [ 70 "path": "127.0.0.1/", 71 ], ushort(0)), 72 73 tuple(`127.0.0.1:60000/`, [ 74 "host": "127.0.0.1", 75 "path": "/", 76 ], ushort(60000)), 77 78 tuple(`example.org`, [ 79 "path": "example.org", 80 ], ushort(0)), 81 82 tuple(`example.org/`, [ 83 "path": "example.org/", 84 ], ushort(0)), 85 86 tuple(`http://example.org`, [ 87 "scheme": "http", 88 "host": "example.org", 89 ], ushort(0)), 90 91 tuple(`http://example.org/`, [ 92 "scheme": "http", 93 "host": "example.org", 94 "path": "/", 95 ], ushort(0)), 96 97 tuple(`www.example.org`, [ 98 "path": "www.example.org", 99 ], ushort(0)), 100 101 tuple(`www.example.org/`, [ 102 "path": "www.example.org/", 103 ], ushort(0)), 104 105 tuple(`http://www.example.org`, [ 106 "scheme": "http", 107 "host": "www.example.org", 108 ], ushort(0)), 109 110 tuple(`http://www.example.org/`, [ 111 "scheme": "http", 112 "host": "www.example.org", 113 "path": "/", 114 ], ushort(0)), 115 116 tuple(`www.example.org:2`, [ 117 "host": "www.example.org", 118 ], ushort(2)), 119 120 tuple(`http://www.example.org:80`, [ 121 "scheme": "http", 122 "host": "www.example.org", 123 ], ushort(80)), 124 125 tuple(`http://www.example.org:80/`, [ 126 "scheme": "http", 127 "host": "www.example.org", 128 "path": "/", 129 ], ushort(80)), 130 131 tuple(`http://www.example.org/index.html`, [ 132 "scheme": "http", 133 "host": "www.example.org", 134 "path": "/index.html", 135 ], ushort(0)), 136 137 tuple(`www.example.org/?`, [ 138 "path": "www.example.org/", 139 "query": "", 140 ], ushort(0)), 141 142 tuple(`www.example.org:80/?`, [ 143 "host": "www.example.org", 144 "path": "/", 145 "query": "", 146 ], ushort(80)), 147 148 tuple(`http://www.example.org/?`, [ 149 "scheme": "http", 150 "host": "www.example.org", 151 "path": "/", 152 "query": "", 153 ], ushort(0)), 154 155 tuple(`http://www.example.org:80/?`, [ 156 "scheme": "http", 157 "host": "www.example.org", 158 "path": "/", 159 "query": "", 160 ], ushort(80)), 161 162 tuple(`http://www.example.org:80/index.html`, [ 163 "scheme": "http", 164 "host": "www.example.org", 165 "path": "/index.html", 166 ], ushort(80)), 167 168 tuple(`http://www.example.org:80/foo/bar/index.html`, [ 169 "scheme": "http", 170 "host": "www.example.org", 171 "path": "/foo/bar/index.html", 172 ], ushort(80)), 173 174 tuple(`http://www.example.org:80/this/is/a/very/deep/directory/structure/and/file.png`, [ 175 "scheme": "http", 176 "host": "www.example.org", 177 "path": "/this/is/a/very/deep/directory/structure/and/file.png", 178 ], ushort(80)), 179 180 tuple(`http://www.example.org:80/deep/directory/structure/and/file.png?lots=1&of=2¶meters=3&too=4`, [ 181 "scheme": "http", 182 "host": "www.example.org", 183 "path": "/deep/directory/structure/and/file.png", 184 "query": "lots=1&of=2¶meters=3&too=4", 185 ], ushort(80)), 186 187 tuple(`http://www.example.org:80/this/is/a/very/deep/directory/structure/and/`, [ 188 "scheme": "http", 189 "host": "www.example.org", 190 "path": "/this/is/a/very/deep/directory/structure/and/", 191 ], ushort(80)), 192 193 tuple(`http://www.example.org:80/this/is/a/very/deep/directory/structure/and/file.php`, [ 194 "scheme": "http", 195 "host": "www.example.org", 196 "path": "/this/is/a/very/deep/directory/structure/and/file.php", 197 ], ushort(80)), 198 199 tuple(`http://www.example.org:80/this/../a/../deep/directory`, [ 200 "scheme": "http", 201 "host": "www.example.org", 202 "path": "/this/../a/../deep/directory", 203 ], ushort(80)), 204 205 tuple(`http://www.example.org:80/this/../a/../deep/directory/`, [ 206 "scheme": "http", 207 "host": "www.example.org", 208 "path": "/this/../a/../deep/directory/", 209 ], ushort(80)), 210 211 tuple(`http://www.example.org:80/this/is/a/very/deep/directory/../image.png`, [ 212 "scheme": "http", 213 "host": "www.example.org", 214 "path": "/this/is/a/very/deep/directory/../image.png", 215 ], ushort(80)), 216 217 tuple(`http://www.example.org:80/index.html`, [ 218 "scheme": "http", 219 "host": "www.example.org", 220 "path": "/index.html", 221 ], ushort(80)), 222 223 tuple(`http://www.example.org:80/index.html?`, [ 224 "scheme": "http", 225 "host": "www.example.org", 226 "path": "/index.html", 227 "query": "", 228 ], ushort(80)), 229 230 tuple(`http://www.example.org:80/#foo`, [ 231 "scheme": "http", 232 "host": "www.example.org", 233 "path": "/", 234 "fragment": "foo", 235 ], ushort(80)), 236 237 tuple(`http://www.example.org:80/?#`, [ 238 "scheme": "http", 239 "host": "www.example.org", 240 "path": "/", 241 "query": "", 242 "fragment": "", 243 ], ushort(80)), 244 245 tuple(`http://www.example.org:80/?test=1`, [ 246 "scheme": "http", 247 "host": "www.example.org", 248 "path": "/", 249 "query": "test=1", 250 ], ushort(80)), 251 252 tuple(`http://www.example.org/?test=1&`, [ 253 "scheme": "http", 254 "host": "www.example.org", 255 "path": "/", 256 "query": "test=1&", 257 ], ushort(0)), 258 259 tuple(`http://www.example.org:80/?&`, [ 260 "scheme": "http", 261 "host": "www.example.org", 262 "path": "/", 263 "query": "&", 264 ], ushort(80)), 265 266 tuple(`http://www.example.org:80/index.html?test=1&`, [ 267 "scheme": "http", 268 "host": "www.example.org", 269 "path": "/index.html", 270 "query": "test=1&", 271 ], ushort(80)), 272 273 tuple(`http://www.example.org/index.html?&`, [ 274 "scheme": "http", 275 "host": "www.example.org", 276 "path": "/index.html", 277 "query": "&", 278 ], ushort(0)), 279 280 tuple(`http://www.example.org:80/index.html?foo&`, [ 281 "scheme": "http", 282 "host": "www.example.org", 283 "path": "/index.html", 284 "query": "foo&", 285 ], ushort(80)), 286 287 tuple(`http://www.example.org/index.html?&foo`, [ 288 "scheme": "http", 289 "host": "www.example.org", 290 "path": "/index.html", 291 "query": "&foo", 292 ], ushort(0)), 293 294 tuple(`http://www.example.org:80/index.html?test=1&test2=char`, [ 295 "scheme": "http", 296 "host": "www.example.org", 297 "path": "/index.html", 298 "query": "test=1&test2=char", 299 ], ushort(80)), 300 301 tuple(`www.example.org:80/index.html?test=1&test2=char#some_ref123`, [ 302 "host": "www.example.org", 303 "path": "/index.html", 304 "query": "test=1&test2=char", 305 "fragment": "some_ref123", 306 ], ushort(80)), 307 308 tuple(`http://secret@www.example.org:80/index.html?test=1&test2=char#some_ref123`, [ 309 "scheme": "http", 310 "host": "www.example.org", 311 "user": "secret", 312 "path": "/index.html", 313 "query": "test=1&test2=char", 314 "fragment": "some_ref123", 315 ], ushort(80)), 316 317 tuple(`http://secret:@www.example.org/index.html?test=1&test2=char#some_ref123`, [ 318 "scheme": "http", 319 "host": "www.example.org", 320 "user": "secret", 321 "pass": "", 322 "path": "/index.html", 323 "query": "test=1&test2=char", 324 "fragment": "some_ref123", 325 ], ushort(0)), 326 327 tuple(`http://:hideout@www.example.org:80/index.html?test=1&test2=char#some_ref123`, [ 328 "scheme": "http", 329 "host": "www.example.org", 330 "user": "", 331 "pass": "hideout", 332 "path": "/index.html", 333 "query": "test=1&test2=char", 334 "fragment": "some_ref123", 335 ], ushort(80)), 336 337 tuple(`http://secret:hideout@www.example.org/index.html?test=1&test2=char#some_ref123`, [ 338 "scheme": "http", 339 "host": "www.example.org", 340 "user": "secret", 341 "pass": "hideout", 342 "path": "/index.html", 343 "query": "test=1&test2=char", 344 "fragment": "some_ref123", 345 ], ushort(0)), 346 347 tuple(`http://secret:hid:out@www.example.org:80/index.html?test=1&test2=int#some_ref123`, [ 348 "scheme": "http", 349 "host": "www.example.org", 350 "user": "secret", 351 "pass": "hid:out", 352 "path": "/index.html", 353 "query": "test=1&test2=int", 354 "fragment": "some_ref123", 355 ], ushort(80)), 356 357 tuple(`nntp://news.example.org`, [ 358 "scheme": "nntp", 359 "host": "news.example.org", 360 ], ushort(0)), 361 362 tuple(`ftp://ftp.gnu.org/gnu/glic/glibc.tar.gz`, [ 363 "scheme": "ftp", 364 "host": "ftp.gnu.org", 365 "path": "/gnu/glic/glibc.tar.gz", 366 ], ushort(0)), 367 368 tuple(`zlib:http://foo@bar`, [ 369 "scheme": "zlib", 370 "path": "http://foo@bar", 371 ], ushort(0)), 372 373 tuple(`zlib:filename.txt`, [ 374 "scheme": "zlib", 375 "path": "filename.txt", 376 ], ushort(0)), 377 378 tuple(`zlib:/path/to/my/file/file.txt`, [ 379 "scheme": "zlib", 380 "path": "/path/to/my/file/file.txt", 381 ], ushort(0)), 382 383 tuple(`foo://foo@bar`, [ 384 "scheme": "foo", 385 "host": "bar", 386 "user": "foo", 387 ], ushort(0)), 388 389 tuple(`mailto:me@mydomain.com`, [ 390 "scheme": "mailto", 391 "path": "me@mydomain.com", 392 ], ushort(0)), 393 394 tuple(`/foo.php?a=b&c=d`, [ 395 "path": "/foo.php", 396 "query": "a=b&c=d", 397 ], ushort(0)), 398 399 tuple(`foo.php?a=b&c=d`, [ 400 "path": "foo.php", 401 "query": "a=b&c=d", 402 ], ushort(0)), 403 404 tuple(`http://user:passwd@www.example.com:8080?bar=1&boom=0`, [ 405 "scheme": "http", 406 "host": "www.example.com", 407 "user": "user", 408 "pass": "passwd", 409 "query": "bar=1&boom=0", 410 ], ushort(8080)), 411 412 tuple(`file:///path/to/file`, [ 413 "scheme": "file", 414 "path": "/path/to/file", 415 ], ushort(0)), 416 417 tuple(`file://path/to/file`, [ 418 "scheme": "file", 419 "host": "path", 420 "path": "/to/file", 421 ], ushort(0)), 422 423 tuple(`file:/path/to/file`, [ 424 "scheme": "file", 425 "path": "/path/to/file", 426 ], ushort(0)), 427 428 tuple(`http://1.2.3.4:/abc.asp?a=1&b=2`, [ 429 "scheme": "http", 430 "host": "1.2.3.4", 431 "path": "/abc.asp", 432 "query": "a=1&b=2", 433 ], ushort(0)), 434 435 tuple(`http://foo.com#bar`, [ 436 "scheme": "http", 437 "host": "foo.com", 438 "fragment": "bar", 439 ], ushort(0)), 440 441 tuple(`scheme:`, [ 442 "scheme": "scheme", 443 ], ushort(0)), 444 445 tuple(`foo+bar://baz@bang/bla`, [ 446 "scheme": "foo+bar", 447 "host": "bang", 448 "user": "baz", 449 "path": "/bla", 450 ], ushort(0)), 451 452 tuple(`gg:9130731`, [ 453 "scheme": "gg", 454 "path": "9130731", 455 ], ushort(0)), 456 457 tuple(`http://10.10.10.10/:80`, [ 458 "scheme": "http", 459 "host": "10.10.10.10", 460 "path": "/:80", 461 ], ushort(0)), 462 463 tuple(`http://x:?`, [ 464 "scheme": "http", 465 "host": "x", 466 "query": "", 467 ], ushort(0)), 468 469 tuple(`x:blah.com`, [ 470 "scheme": "x", 471 "path": "blah.com", 472 ], ushort(0)), 473 474 tuple(`x:/blah.com`, [ 475 "scheme": "x", 476 "path": "/blah.com", 477 ], ushort(0)), 478 479 tuple(`http://::?`, [ 480 "scheme": "http", 481 "host": ":", 482 "query": "", 483 ], ushort(0)), 484 485 tuple(`http://::#`, [ 486 "scheme": "http", 487 "host": ":", 488 "fragment": "", 489 ], ushort(0)), 490 491 tuple(`http://?:/`, [ 492 "scheme": "http", 493 "host": "?", 494 "path": "/", 495 ], ushort(0)), 496 497 tuple(`http://@?:/`, [ 498 "scheme": "http", 499 "host": "?", 500 "user": "", 501 "path": "/", 502 ], ushort(0)), 503 504 tuple(`file:///:`, [ 505 "scheme": "file", 506 "path": "/:", 507 ], ushort(0)), 508 509 tuple(`file:///a:/`, [ 510 "scheme": "file", 511 "path": "a:/", 512 ], ushort(0)), 513 514 tuple(`file:///ab:/`, [ 515 "scheme": "file", 516 "path": "/ab:/", 517 ], ushort(0)), 518 519 tuple(`file:///a:/`, [ 520 "scheme": "file", 521 "path": "a:/", 522 ], ushort(0)), 523 524 tuple(`file:///@:/`, [ 525 "scheme": "file", 526 "path": "@:/", 527 ], ushort(0)), 528 529 tuple(`file:///:80/`, [ 530 "scheme": "file", 531 "path": "/:80/", 532 ], ushort(0)), 533 534 tuple(`[]`, [ 535 "path": "[]", 536 ], ushort(0)), 537 538 tuple(`http://[x:80]/`, [ 539 "scheme": "http", 540 "host": "[x:80]", 541 "path": "/", 542 ], ushort(0)), 543 544 tuple(``, [ 545 "path": "", 546 ], ushort(0)), 547 548 tuple(`/`, [ 549 "path": "/", 550 ], ushort(0)), 551 552 tuple(`/rest/Users?filter={"id":"789"}`, [ 553 "path": "/rest/Users", 554 "query": `filter={"id":"789"}`, 555 ], ushort(0)), 556 557 tuple(`//example.org`, [ 558 "host": "example.org", 559 ], ushort(0)), 560 561 tuple(`/standard/?fq=B:20001`, [ 562 "path": "/standard/", 563 "query": "fq=B:20001", 564 ], ushort(0)), 565 566 tuple(`/standard/?fq=B:200013`, [ 567 "path": "/standard/", 568 "query": "fq=B:200013", 569 ], ushort(0)), 570 571 tuple(`/standard/?fq=home:012345`, [ 572 "path": "/standard/", 573 "query": "fq=home:012345", 574 ], ushort(0)), 575 576 tuple(`/standard/?fq=home:01234`, [ 577 "path": "/standard/", 578 "query": "fq=home:01234", 579 ], ushort(0)), 580 581 tuple(`http://user:pass@host`, [ 582 "scheme": "http", 583 "host": "host", 584 "user": "user", 585 "pass": "pass", 586 ], ushort(0)), 587 588 tuple(`//user:pass@host`, [ 589 "host": "host", 590 "user": "user", 591 "pass": "pass", 592 ], ushort(0)), 593 594 tuple(`//user@host`, [ 595 "host": "host", 596 "user": "user", 597 ], ushort(0)), 598 599 tuple(`//example.org:99/hey?a=b#c=d`, [ 600 "host": "example.org", 601 "path": "/hey", 602 "query": "a=b", 603 "fragment": "c=d", 604 ], ushort(99)), 605 606 tuple(`//example.org/hey?a=b#c=d`, [ 607 "host": "example.org", 608 "path": "/hey", 609 "query": "a=b", 610 "fragment": "c=d", 611 ], ushort(0)), 612 613 tuple(`http://example.org/some/path.cgi?t=1#fragment?data`, [ 614 "scheme": "http", 615 "host": "example.org", 616 "path": "/some/path.cgi", 617 "query": "t=1", 618 "fragment": "fragment?data", 619 ], ushort(0)), 620 621 tuple(`http://example.org/some/path.cgi#fragment?data`, [ 622 "scheme": "http", 623 "host": "example.org", 624 "path": "/some/path.cgi", 625 "fragment": "fragment?data", 626 ], ushort(0)), 627 628 tuple(`x://::abc/?`, string[string].init, ushort(0)), 629 tuple(`http:///blah.com`, string[string].init, ushort(0)), 630 tuple(`http://:80`, string[string].init, ushort(0)), 631 tuple(`http://user@:80`, string[string].init, ushort(0)), 632 tuple(`http://user:pass@:80`, string[string].init, ushort(0)), 633 tuple(`http://:`, string[string].init, ushort(0)), 634 tuple(`http://@/`, string[string].init, ushort(0)), 635 tuple(`http://@:/`, string[string].init, ushort(0)), 636 tuple(`http://:/`, string[string].init, ushort(0)), 637 tuple(`http://?`, string[string].init, ushort(0)), 638 tuple(`http://#`, string[string].init, ushort(0)), 639 tuple(`http://:?`, string[string].init, ushort(0)), 640 tuple(`http://blah.com:123456`, string[string].init, ushort(0)), 641 tuple(`http://blah.com:70000`, string[string].init, ushort(0)), 642 tuple(`http://blah.com:abcdef`, string[string].init, ushort(0)), 643 tuple(`http://secret@hideout@www.example.org:80/index.html?test=1&test2=char#some_ref123`, 644 string[string].init, 645 ushort(0)), 646 tuple(`http://user:@pass@host/path?argument?value#etc`, string[string].init, ushort(0)), 647 tuple(`http://foo.com\@bar.com`, string[string].init, ushort(0)), 648 tuple(`http://email@address.com:pass@example.org`, string[string].init, ushort(0)), 649 tuple(`:`, string[string].init, ushort(0)), 650 ]; 651 } 652 } 653 654 /** 655 * A Unique Resource Locator. 656 * 657 * Params: 658 * U = URL string type. 659 */ 660 struct URL(U = string) 661 if (isSomeString!U) 662 { 663 /** The URL scheme. */ 664 U scheme; 665 666 /** The username. */ 667 U user; 668 669 /** The password. */ 670 U pass; 671 672 /** The hostname. */ 673 U host; 674 675 /** The port number. */ 676 ushort port; 677 678 /** The path. */ 679 U path; 680 681 /** The query string. */ 682 U query; 683 684 /** The anchor. */ 685 U fragment; 686 687 /** 688 * Attempts to parse an URL from a string. 689 * Output string data (scheme, user, etc.) are just slices of input string (e.g., no memory allocation and copying). 690 * 691 * Params: 692 * source = The string containing the URL. 693 * 694 * Throws: $(D_PSYMBOL URIException) if the URL is malformed. 695 */ 696 this(U source) 697 { 698 auto value = source; 699 ptrdiff_t pos = -1, endPos = value.length, start; 700 701 foreach (i, ref c; source) 702 { 703 if (pos == -1 && c == ':') 704 { 705 pos = i; 706 } 707 if (endPos == value.length && (c == '?' || c == '#')) 708 { 709 endPos = i; 710 } 711 } 712 713 // Check if the colon is a part of the scheme or the port and parse 714 // the appropriate part 715 if (value.length > 1 && value[0] == '/' && value[1] == '/') 716 { 717 // Relative scheme 718 start = 2; 719 } 720 else if (pos > 0) 721 { 722 // Validate scheme 723 // [ toLower(alpha) | digit | "+" | "-" | "." ] 724 foreach (ref c; value[0..pos]) 725 { 726 if (!c.isAlphaNum && c != '+' && c != '-' && c != '.') 727 { 728 if (endPos > pos) 729 { 730 if (!parsePort(value[pos..$])) 731 { 732 throw new URIException("Failed to parse port"); 733 } 734 } 735 goto ParsePath; 736 } 737 } 738 739 if (value.length == pos + 1) // only scheme is available 740 { 741 scheme = value[0 .. $ - 1]; 742 return; 743 } 744 else if (value.length > pos + 1 && value[pos + 1] == '/') 745 { 746 scheme = value[0..pos]; 747 748 if (value.length > pos + 2 && value[pos + 2] == '/') 749 { 750 start = pos + 3; 751 if (scheme == "file" && value.length > start && value[start] == '/') 752 { 753 // Windows drive letters 754 if (value.length - start > 2 && value[start + 2] == ':') 755 { 756 ++start; 757 } 758 goto ParsePath; 759 } 760 } 761 else 762 { 763 start = pos + 1; 764 goto ParsePath; 765 } 766 } 767 else // certain schemas like mailto: and zlib: may not have any / after them 768 { 769 770 if (!parsePort(value[pos..$])) 771 { 772 scheme = value[0..pos]; 773 start = pos + 1; 774 goto ParsePath; 775 } 776 } 777 } 778 else if (pos == 0 && parsePort(value[pos..$])) 779 { 780 // An URL shouldn't begin with a port number 781 throw new URIException("URL begins with port"); 782 } 783 else 784 { 785 goto ParsePath; 786 } 787 788 // Parse host 789 pos = -1; 790 for (ptrdiff_t i = start; i < value.length; ++i) 791 { 792 if (value[i] == '@') 793 { 794 pos = i; 795 } 796 else if (value[i] == '/') 797 { 798 endPos = i; 799 break; 800 } 801 } 802 803 // Check for login and password 804 if (pos != -1) 805 { 806 // *( unreserved / pct-encoded / sub-delims / ":" ) 807 foreach (i, c; value[start..pos]) 808 { 809 if (c == ':') 810 { 811 if (user is null) 812 { 813 user = value[start .. start + i]; 814 pass = value[start + i + 1 .. pos]; 815 } 816 } 817 else if (!c.isAlpha && 818 !c.isNumber && 819 c != '!' && 820 c != ';' && 821 c != '=' && 822 c != '_' && 823 c != '~' && 824 !(c >= '$' && c <= '.')) 825 { 826 if (scheme !is null) 827 { 828 scheme = null; 829 } 830 if (user !is null) 831 { 832 user = null; 833 } 834 if (pass !is null) 835 { 836 pass = null; 837 } 838 throw new URIException("Restricted characters in user information"); 839 } 840 } 841 if (user is null) 842 { 843 user = value[start..pos]; 844 } 845 846 start = ++pos; 847 } 848 849 pos = endPos; 850 if (endPos <= 1 || value[start] != '[' || value[endPos - 1] != ']') 851 { 852 // Short circuit portscan 853 // IPv6 embedded address 854 for (ptrdiff_t i = endPos - 1; i >= start; --i) 855 { 856 if (value[i] == ':') 857 { 858 pos = i; 859 if (port == 0 && !parsePort(value[i..endPos])) 860 { 861 if (scheme !is null) 862 { 863 scheme = null; 864 } 865 if (user !is null) 866 { 867 user = null; 868 } 869 if (pass !is null) 870 { 871 pass = null; 872 } 873 throw new URIException("Invalid port"); 874 } 875 break; 876 } 877 } 878 } 879 880 // Check if we have a valid host, if we don't reject the string as url 881 if (pos <= start) 882 { 883 if (scheme !is null) 884 { 885 scheme = null; 886 } 887 if (user !is null) 888 { 889 user = null; 890 } 891 if (pass !is null) 892 { 893 pass = null; 894 } 895 throw new URIException("Invalid host"); 896 } 897 898 host = value[start..pos]; 899 900 if (endPos == value.length) 901 { 902 return; 903 } 904 905 start = endPos; 906 907 ParsePath: 908 endPos = value.length; 909 pos = -1; 910 foreach (i, ref c; value[start..$]) 911 { 912 if (c == '?' && pos == -1) 913 { 914 pos = start + i; 915 } 916 else if (c == '#') 917 { 918 endPos = start + i; 919 break; 920 } 921 } 922 if (pos == -1) 923 { 924 pos = endPos; 925 } 926 927 if (pos > start) 928 { 929 path = value[start..pos]; 930 } 931 if (endPos >= ++pos) 932 { 933 query = value[pos..endPos]; 934 } 935 if (++endPos <= value.length) 936 { 937 fragment = value[endPos..$]; 938 } 939 } 940 941 ~this() 942 { 943 if (scheme !is null) 944 { 945 scheme = null; 946 } 947 if (user !is null) 948 { 949 user = null; 950 } 951 if (pass !is null) 952 { 953 pass = null; 954 } 955 if (host !is null) 956 { 957 host = null; 958 } 959 if (path !is null) 960 { 961 path = null; 962 } 963 if (query !is null) 964 { 965 query = null; 966 } 967 if (fragment !is null) 968 { 969 fragment = null; 970 } 971 } 972 973 /** 974 * Attempts to parse and set the port. 975 * 976 * Params: 977 * port = String beginning with a colon followed by the port number and 978 * an optional path (query string and/or fragment), like: 979 * `:12345/some_path` or `:12345`. 980 * 981 * Returns: Whether the port could found. 982 */ 983 private bool parsePort(U port) pure nothrow @safe @nogc 984 { 985 ptrdiff_t i = 1; 986 float lPort = 0; 987 988 for (; i < port.length && port[i].isDigit() && i <= 6; ++i) 989 { 990 lPort += (port[i] - '0') / cast(float)(10 ^^ (i - 1)); 991 } 992 if (i == 1 && (i == port.length || port[i] == '/')) 993 { 994 return true; 995 } 996 else if (i == port.length || port[i] == '/') 997 { 998 lPort *= 10 ^^ (i - 2); 999 if (lPort > ushort.max) 1000 { 1001 return false; 1002 } 1003 this.port = cast(ushort)lPort; 1004 return true; 1005 } 1006 return false; 1007 } 1008 } 1009 1010 unittest 1011 { 1012 auto u = URL!()("example.org"); 1013 assert(u.path == "example.org"); 1014 1015 u = URL!()("relative/path"); 1016 assert(u.path == "relative/path"); 1017 1018 // Host and scheme 1019 u = URL!()("https://example.org"); 1020 assert(u.scheme == "https"); 1021 assert(u.host == "example.org"); 1022 assert(u.path is null); 1023 assert(u.port == 0); 1024 assert(u.fragment is null); 1025 1026 // With user and port and path 1027 u = URL!()("https://hilary:putnam@example.org:443/foo/bar"); 1028 assert(u.scheme == "https"); 1029 assert(u.host == "example.org"); 1030 assert(u.path == "/foo/bar"); 1031 assert(u.port == 443); 1032 assert(u.user == "hilary"); 1033 assert(u.pass == "putnam"); 1034 assert(u.fragment is null); 1035 1036 // With query string 1037 u = URL!()("https://example.org/?login=true"); 1038 assert(u.scheme == "https"); 1039 assert(u.host == "example.org"); 1040 assert(u.path == "/"); 1041 assert(u.query == "login=true"); 1042 assert(u.fragment is null); 1043 1044 // With query string and fragment 1045 u = URL!()("https://example.org/?login=false#label"); 1046 assert(u.scheme == "https"); 1047 assert(u.host == "example.org"); 1048 assert(u.path == "/"); 1049 assert(u.query == "login=false"); 1050 assert(u.fragment == "label"); 1051 1052 u = URL!()("redis://root:password@localhost:2201/path?query=value#fragment"); 1053 assert(u.scheme == "redis"); 1054 assert(u.user == "root"); 1055 assert(u.pass == "password"); 1056 assert(u.host == "localhost"); 1057 assert(u.port == 2201); 1058 assert(u.path == "/path"); 1059 assert(u.query == "query=value"); 1060 assert(u.fragment == "fragment"); 1061 } 1062 1063 private unittest 1064 { 1065 foreach(t; URLTests) 1066 { 1067 if (t[1].length == 0 && t[2] == 0) 1068 { 1069 try 1070 { 1071 URL!()(t[0]); 1072 assert(0); 1073 } 1074 catch (URIException e) 1075 { 1076 assert(1); 1077 } 1078 } 1079 else 1080 { 1081 auto u = URL!()(t[0]); 1082 assert("scheme" in t[1] ? u.scheme == t[1]["scheme"] : u.scheme is null, 1083 t[0]); 1084 assert("user" in t[1] ? u.user == t[1]["user"] : u.user is null, t[0]); 1085 assert("pass" in t[1] ? u.pass == t[1]["pass"] : u.pass is null, t[0]); 1086 assert("host" in t[1] ? u.host == t[1]["host"] : u.host is null, t[0]); 1087 assert(u.port == t[2], t[0]); 1088 assert("path" in t[1] ? u.path == t[1]["path"] : u.path is null, t[0]); 1089 assert("query" in t[1] ? u.query == t[1]["query"] : u.query is null, t[0]); 1090 if ("fragment" in t[1]) 1091 { 1092 assert(u.fragment == t[1]["fragment"], t[0]); 1093 } 1094 else 1095 { 1096 assert(u.fragment is null, t[0]); 1097 } 1098 } 1099 } 1100 } 1101 1102 /** 1103 * Contains possible URL components that can be returned from 1104 * $(D_PSYMBOL parseURL). 1105 */ 1106 enum Component : string 1107 { 1108 scheme = "scheme", 1109 host = "host", 1110 port = "port", 1111 user = "user", 1112 pass = "pass", 1113 path = "path", 1114 query = "query", 1115 fragment = "fragment", 1116 } 1117 1118 /** 1119 * Attempts to parse an URL from a string. 1120 * 1121 * Params: 1122 * T = $(D_SYMBOL Component) member or $(D_KEYWORD null) for a 1123 * struct with all components. 1124 * U = URL string type. 1125 * source = The string containing the URL. 1126 * 1127 * Returns: Requested URL component(s). 1128 */ 1129 U parseURL(string T, U)(in U source) 1130 if ((T == "scheme" 1131 || T =="host" 1132 || T == "user" 1133 || T == "pass" 1134 || T == "path" 1135 || T == "query" 1136 || T == "fragment") && isSomeString!U) 1137 { 1138 auto ret = URL!U(source); 1139 return mixin("ret." ~ T); 1140 } 1141 1142 /** ditto */ 1143 auto parseURL(U)(in U source) 1144 if (isSomeString!U) 1145 { 1146 return URL!U(source); 1147 } 1148 1149 /** ditto */ 1150 ushort parseURL(string T, U)(in U source) 1151 if (T == "port" && isSomeString!U) 1152 { 1153 auto ret = URL!U(source); 1154 return ret.port; 1155 } 1156 1157 unittest 1158 { 1159 assert(parseURL!(Component.port)("http://example.org:5326") == 5326); 1160 1161 immutable dstring url = "http://example.org:5326"; 1162 static assert(is(typeof(parseURL(url)) == URL!dstring)); 1163 } 1164 1165 private unittest 1166 { 1167 foreach(t; URLTests) 1168 { 1169 if (t[1].length == 0 && t[2] == 0) 1170 { 1171 try 1172 { 1173 parseURL!(Component.port)(t[0]); 1174 parseURL!(Component.user)(t[0]); 1175 parseURL!(Component.pass)(t[0]); 1176 parseURL!(Component.host)(t[0]); 1177 parseURL!(Component.path)(t[0]); 1178 parseURL!(Component.query)(t[0]); 1179 parseURL!(Component.fragment)(t[0]); 1180 assert(0); 1181 } 1182 catch (URIException e) 1183 { 1184 assert(1); 1185 } 1186 } 1187 else 1188 { 1189 ushort port = parseURL!(Component.port)(t[0]); 1190 string component = parseURL!(Component.scheme)(t[0]); 1191 assert("scheme" in t[1] ? component == t[1]["scheme"] : component is null, 1192 t[0]); 1193 component = parseURL!(Component.user)(t[0]); 1194 assert("user" in t[1] ? component == t[1]["user"] : component is null, 1195 t[0]); 1196 component = parseURL!(Component.pass)(t[0]); 1197 assert("pass" in t[1] ? component == t[1]["pass"] : component is null, 1198 t[0]); 1199 component = parseURL!(Component.host)(t[0]); 1200 assert("host" in t[1] ? component == t[1]["host"] : component is null, 1201 t[0]); 1202 assert(port == t[2], t[0]); 1203 component = parseURL!(Component.path)(t[0]); 1204 assert("path" in t[1] ? component == t[1]["path"] : component is null, 1205 t[0]); 1206 component = parseURL!(Component.query)(t[0]); 1207 assert("query" in t[1] ? component == t[1]["query"] : component is null, 1208 t[0]); 1209 component = parseURL!(Component.fragment)(t[0]); 1210 if ("fragment" in t[1]) 1211 { 1212 assert(component == t[1]["fragment"], t[0]); 1213 } 1214 else 1215 { 1216 assert(component is null, t[0]); 1217 } 1218 } 1219 } 1220 }