1 /* 2 Copyright (c) 2018-2021 Timur Gafarov 3 4 Boost Software License - Version 1.0 - August 17th, 2003 5 6 Permission is hereby granted, free of charge, to any person or organization 7 obtaining a copy of the software and accompanying documentation covered by 8 this license (the "Software") to use, reproduce, display, distribute, 9 execute, and transmit the Software, and to prepare derivative works of the 10 Software, and to permit third-parties to whom the Software is furnished to 11 do so, all subject to the following: 12 13 The copyright notices in the Software and this entire statement, including 14 the above license grant, this restriction and the following disclaimer, 15 must be included in all copies of the Software, in whole or in part, and 16 all derivative works of the Software, unless such copies or derivative 17 works are solely in the form of machine-executable object code generated by 18 a source language processor. 19 20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 23 SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 24 FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 25 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 26 DEALINGS IN THE SOFTWARE. 27 */ 28 29 /** 30 * GC-free UTF-8 string type 31 * 32 * Copyright: Timur Gafarov 2018-2021. 33 * License: $(LINK2 https://boost.org/LICENSE_1_0.txt, Boost License 1.0). 34 * Authors: Timur Gafarov 35 */ 36 module dlib.text.str; 37 38 import dlib.core.memory; 39 import dlib.container.array; 40 import dlib.text.utf8; 41 import dlib.core.stream; 42 43 /** 44 * GC-free UTF-8 string type based on dlib.container.array. 45 * Stores up to 128 bytes without dynamic memory allocation, 46 * so short strings are processed very fast. 47 * String is always zero-terminated and directly compatible with C. 48 */ 49 struct String 50 { 51 /** 52 * Underlying array of characters 53 */ 54 Array!(char, 128) data; 55 56 private void addZero() 57 { 58 data.insertBack('\0'); 59 } 60 61 private void removeZero() 62 { 63 data.removeBack(1); 64 } 65 66 /** 67 * Construct from D string 68 */ 69 this(string s) 70 { 71 data.insertBack(s); 72 addZero(); 73 } 74 75 /** 76 * Construct from zero-terminated C string (ASCII or UTF8) 77 */ 78 this(const(char)* cStr) 79 { 80 size_t offset = 0; 81 while(cStr[offset] != 0) 82 { 83 offset++; 84 } 85 if (offset > 0) 86 data.insertBack(cStr[0..offset]); 87 addZero(); 88 } 89 90 /** 91 * Construct from zero-terminated UTF-16 LE string 92 */ 93 this(const(wchar)* wStr) 94 { 95 wchar* utf16 = cast(wchar*)wStr; 96 wchar utf16char; 97 do 98 { 99 utf16char = *wStr; 100 utf16++; 101 102 if (utf16char) 103 { 104 if (utf16char < 0x80) 105 { 106 data.insertBack((utf16char >> 0 & 0x7F) | 0x00); 107 } 108 else if (utf16char < 0x0800) 109 { 110 data.insertBack((utf16char >> 6 & 0x1F) | 0xC0); 111 data.insertBack((utf16char >> 0 & 0x3F) | 0x80); 112 } 113 else if (utf16char < 0x010000) 114 { 115 data.insertBack((utf16char >> 12 & 0x0F) | 0xE0); 116 data.insertBack((utf16char >> 6 & 0x3F) | 0x80); 117 data.insertBack((utf16char >> 0 & 0x3F) | 0x80); 118 } 119 else if (utf16char < 0x110000) 120 { 121 data.insertBack((utf16char >> 18 & 0x07) | 0xF0); 122 data.insertBack((utf16char >> 12 & 0x3F) | 0x80); 123 data.insertBack((utf16char >> 6 & 0x3F) | 0x80); 124 data.insertBack((utf16char >> 0 & 0x3F) | 0x80); 125 } 126 } 127 } 128 while(utf16char); 129 addZero(); 130 } 131 132 /** 133 * Construct from an InputStream 134 */ 135 this(InputStream istrm) 136 { 137 data.resize(cast(size_t)istrm.size, 0); 138 istrm.fillArray(data.data); 139 addZero(); 140 } 141 142 void free() 143 { 144 data.free(); 145 } 146 147 auto opOpAssign(string op)(string s) if (op == "~") 148 { 149 removeZero(); 150 data.insertBack(s); 151 addZero(); 152 return this; 153 } 154 155 auto opOpAssign(string op)(char c) if (op == "~") 156 { 157 removeZero(); 158 data.insertBack(c); 159 addZero(); 160 return this; 161 } 162 163 auto opOpAssign(string op)(String s) if (op == "~") 164 { 165 String s1 = this; 166 s1.removeZero(); 167 s1 ~= s; 168 s1.addZero(); 169 return s1; 170 } 171 172 void reserve(size_t amount) 173 { 174 data.reserve(amount); 175 } 176 177 @property size_t length() 178 { 179 if (data.length == 0) 180 return 0; 181 else 182 return data.length - 1; 183 } 184 185 @property string toString() const 186 { 187 if (data.length == 0) 188 return ""; 189 else 190 return cast(string)data.readOnlyData[0..$-1]; 191 } 192 193 alias toString this; 194 195 @property const(char)* ptr() const 196 { 197 return data.readOnlyData.ptr; 198 } 199 200 @property bool isDynamic() 201 { 202 return data.isDynamic; 203 } 204 205 /** 206 * Range interface that iterates the string by Unicode code point (dchar), 207 * i.e., foreach(dchar c; str.decode) 208 */ 209 auto decode() 210 { 211 return UTF8Decoder().decode(toString()); 212 } 213 } 214 215 unittest 216 { 217 String s = "hello"; 218 s ~= ", world"; 219 s ~= '!'; 220 assert(!s.isDynamic); 221 string dStr = s; 222 assert(dStr == "hello, world!"); 223 s.free(); 224 assert(s.length == 0); 225 }