1 /*
2 Copyright (c) 2018-2021 Timur Gafarov
3 
4 Boost Software License - Version 1.0 - August 17th, 2003
5 
6 Permission is hereby granted, free of charge, to any person or organization
7 obtaining a copy of the software and accompanying documentation covered by
8 this license (the "Software") to use, reproduce, display, distribute,
9 execute, and transmit the Software, and to prepare derivative works of the
10 Software, and to permit third-parties to whom the Software is furnished to
11 do so, all subject to the following:
12 
13 The copyright notices in the Software and this entire statement, including
14 the above license grant, this restriction and the following disclaimer,
15 must be included in all copies of the Software, in whole or in part, and
16 all derivative works of the Software, unless such copies or derivative
17 works are solely in the form of machine-executable object code generated by
18 a source language processor.
19 
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
23 SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
24 FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
25 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 DEALINGS IN THE SOFTWARE.
27 */
28 
29 /**
30  * GC-free UTF-8 string type
31  *
32  * Copyright: Timur Gafarov 2018-2021.
33  * License: $(LINK2 https://boost.org/LICENSE_1_0.txt, Boost License 1.0).
34  * Authors: Timur Gafarov
35  */
36 module dlib.text.str;
37 
38 import dlib.core.memory;
39 import dlib.container.array;
40 import dlib.text.utf8;
41 import dlib.core.stream;
42 
43 /**
44  * GC-free UTF-8 string type based on dlib.container.array.
45  * Stores up to 128 bytes without dynamic memory allocation,
46  * so short strings are processed very fast.
47  * String is always zero-terminated and directly compatible with C.
48  */
49 struct String
50 {
51     /**
52      * Underlying array of characters
53      */
54     Array!(char, 128) data;
55 
56     private void addZero()
57     {
58         data.insertBack('\0');
59     }
60 
61     private void removeZero()
62     {
63         data.removeBack(1);
64     }
65 
66     /**
67      * Construct from D string
68      */
69     this(string s)
70     {
71         data.insertBack(s);
72         addZero();
73     }
74 
75     /**
76      * Construct from zero-terminated C string (ASCII or UTF8)
77      */
78     this(const(char)* cStr)
79     {
80         size_t offset = 0;
81         while(cStr[offset] != 0)
82         {
83             offset++;
84         }
85         if (offset > 0)
86             data.insertBack(cStr[0..offset]);
87         addZero();
88     }
89 
90     /**
91      * Construct from zero-terminated UTF-16 LE string
92      */
93     this(const(wchar)* wStr)
94     {
95         wchar* utf16 = cast(wchar*)wStr;
96         wchar utf16char;
97         do
98         {
99             utf16char = *wStr;
100             utf16++;
101 
102             if (utf16char)
103             {
104                 if (utf16char < 0x80)
105                 {
106                     data.insertBack((utf16char >> 0 & 0x7F) | 0x00);
107                 }
108                 else if (utf16char < 0x0800)
109                 {
110                     data.insertBack((utf16char >> 6 & 0x1F) | 0xC0);
111                     data.insertBack((utf16char >> 0 & 0x3F) | 0x80);
112                 }
113                 else if (utf16char < 0x010000)
114                 {
115                     data.insertBack((utf16char >> 12 & 0x0F) | 0xE0);
116                     data.insertBack((utf16char >> 6 & 0x3F) | 0x80);
117                     data.insertBack((utf16char >> 0 & 0x3F) | 0x80);
118                 }
119                 else if (utf16char < 0x110000)
120                 {
121                     data.insertBack((utf16char >> 18 & 0x07) | 0xF0);
122                     data.insertBack((utf16char >> 12 & 0x3F) | 0x80);
123                     data.insertBack((utf16char >> 6 & 0x3F) | 0x80);
124                     data.insertBack((utf16char >> 0 & 0x3F) | 0x80);
125                 }
126             }
127         }
128         while(utf16char);
129         addZero();
130     }
131 
132     /**
133      * Construct from an InputStream
134      */
135     this(InputStream istrm)
136     {
137         data.resize(cast(size_t)istrm.size, 0);
138         istrm.fillArray(data.data);
139         addZero();
140     }
141 
142     void free()
143     {
144         data.free();
145     }
146 
147     auto opOpAssign(string op)(string s) if (op == "~")
148     {
149         removeZero();
150         data.insertBack(s);
151         addZero();
152         return this;
153     }
154 
155     auto opOpAssign(string op)(char c) if (op == "~")
156     {
157         removeZero();
158         data.insertBack(c);
159         addZero();
160         return this;
161     }
162 
163     auto opOpAssign(string op)(String s) if (op == "~")
164     {
165         String s1 = this;
166         s1.removeZero();
167         s1 ~= s;
168         s1.addZero();
169         return s1;
170     }
171 
172     void reserve(size_t amount)
173     {
174         data.reserve(amount);
175     }
176 
177     @property size_t length()
178     {
179         if (data.length == 0)
180             return 0;
181         else
182             return data.length - 1;
183     }
184 
185     @property string toString() const
186     {
187         if (data.length == 0)
188             return "";
189         else
190             return cast(string)data.readOnlyData[0..$-1];
191     }
192 
193     alias toString this;
194 
195     @property const(char)* ptr() const
196     {
197         return data.readOnlyData.ptr;
198     }
199 
200     @property bool isDynamic()
201     {
202         return data.isDynamic;
203     }
204 
205     /**
206      * Range interface that iterates the string by Unicode code point (dchar),
207      * i.e., foreach(dchar c; str.decode)
208      */
209     auto decode()
210     {
211         return UTF8Decoder().decode(toString());
212     }
213 }
214 
215 unittest
216 {
217     String s = "hello";
218     s ~= ", world";
219     s ~= '!';
220     assert(!s.isDynamic);
221     string dStr = s;
222     assert(dStr == "hello, world!");
223     s.free();
224     assert(s.length == 0);
225 }