|  | // Written in the D programming language. | 
|  |  | 
|  | /++ | 
|  | Encode and decode UTF-8, UTF-16 and UTF-32 strings. | 
|  |  | 
|  | UTF character support is restricted to | 
|  | $(D '\u0000' <= character <= '\U0010FFFF'). | 
|  |  | 
|  | $(SCRIPT inhibitQuickIndex = 1;) | 
|  | $(DIVC quickindex, | 
|  | $(BOOKTABLE, | 
|  | $(TR $(TH Category) $(TH Functions)) | 
|  | $(TR $(TD Decode) $(TD | 
|  | $(LREF decode) | 
|  | $(LREF decodeFront) | 
|  | )) | 
|  | $(TR $(TD Lazy decode) $(TD | 
|  | $(LREF byCodeUnit) | 
|  | $(LREF byChar) | 
|  | $(LREF byWchar) | 
|  | $(LREF byDchar) | 
|  | $(LREF byUTF) | 
|  | )) | 
|  | $(TR $(TD Encode) $(TD | 
|  | $(LREF encode) | 
|  | $(LREF toUTF8) | 
|  | $(LREF toUTF16) | 
|  | $(LREF toUTF32) | 
|  | $(LREF toUTFz) | 
|  | $(LREF toUTF16z) | 
|  | )) | 
|  | $(TR $(TD Length) $(TD | 
|  | $(LREF codeLength) | 
|  | $(LREF count) | 
|  | $(LREF stride) | 
|  | $(LREF strideBack) | 
|  | )) | 
|  | $(TR $(TD Index) $(TD | 
|  | $(LREF toUCSindex) | 
|  | $(LREF toUTFindex) | 
|  | )) | 
|  | $(TR $(TD Validation) $(TD | 
|  | $(LREF isValidDchar) | 
|  | $(LREF isValidCodepoint) | 
|  | $(LREF validate) | 
|  | )) | 
|  | $(TR $(TD Miscellaneous) $(TD | 
|  | $(LREF replacementDchar) | 
|  | $(LREF UseReplacementDchar) | 
|  | $(LREF UTFException) | 
|  | )) | 
|  | )) | 
|  | See_Also: | 
|  | $(LINK2 http://en.wikipedia.org/wiki/Unicode, Wikipedia)<br> | 
|  | $(LINK http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8)<br> | 
|  | $(LINK http://anubis.dkuug.dk/JTC1/SC2/WG2/docs/n1335) | 
|  | Copyright: Copyright The D Language Foundation 2000 - 2012. | 
|  | License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). | 
|  | Authors:   $(HTTP digitalmars.com, Walter Bright) and | 
|  | $(HTTP jmdavisprog.com, Jonathan M Davis) | 
|  | Source:    $(PHOBOSSRC std/utf.d) | 
|  | +/ | 
|  | module std.utf; | 
|  |  | 
|  | import std.exception : basicExceptionCtors; | 
|  | import core.exception : UnicodeException; | 
|  | import std.meta : AliasSeq; | 
|  | import std.range; | 
|  | import std.traits : isAutodecodableString, isConvertibleToString, | 
|  | isSomeChar, isSomeString, isStaticArray, Unqual; | 
|  | import std.typecons : Flag, Yes, No; | 
|  |  | 
|  |  | 
|  | /++ | 
|  | Exception thrown on errors in std.utf functions. | 
|  | +/ | 
|  | class UTFException : UnicodeException | 
|  | { | 
|  | import core.internal.string : unsignedToTempString, UnsignedStringBuf; | 
|  |  | 
|  | uint[4] sequence; | 
|  | size_t  len; | 
|  |  | 
|  | @safe pure nothrow @nogc | 
|  | UTFException setSequence(scope uint[] data...) return | 
|  | { | 
|  | assert(data.length <= 4); | 
|  |  | 
|  | len = data.length < 4 ? data.length : 4; | 
|  | sequence[0 .. len] = data[0 .. len]; | 
|  |  | 
|  | return this; | 
|  | } | 
|  |  | 
|  | // FIXME: Use std.exception.basicExceptionCtors here once | 
|  | // https://issues.dlang.org/show_bug.cgi?id=11500 is fixed | 
|  |  | 
|  | /** | 
|  | Standard exception constructors. | 
|  | */ | 
|  | this(string msg, string file = __FILE__, size_t line = __LINE__, | 
|  | Throwable next = null) @nogc @safe pure nothrow | 
|  | { | 
|  | super(msg, 0, file, line, next); | 
|  | } | 
|  | /// ditto | 
|  | this(string msg, size_t index, string file = __FILE__, | 
|  | size_t line = __LINE__, Throwable next = null) @safe pure nothrow | 
|  | { | 
|  | UnsignedStringBuf buf = void; | 
|  | msg ~= " (at index " ~ unsignedToTempString(index, buf) ~ ")"; | 
|  | super(msg, index, file, line, next); | 
|  | } | 
|  |  | 
|  | /** | 
|  | Returns: | 
|  | A `string` detailing the invalid UTF sequence. | 
|  | */ | 
|  | override string toString() const | 
|  | { | 
|  | if (len == 0) | 
|  | { | 
|  | /* Exception.toString() is not marked as const, although | 
|  | * it is const-compatible. | 
|  | */ | 
|  | //return super.toString(); | 
|  | auto e = () @trusted { return cast(Exception) super; } (); | 
|  | return e.toString(); | 
|  | } | 
|  |  | 
|  | string result = "Invalid UTF sequence:"; | 
|  |  | 
|  | foreach (i; sequence[0 .. len]) | 
|  | { | 
|  | UnsignedStringBuf buf = void; | 
|  | result ~= ' '; | 
|  | auto h = unsignedToTempString!16(i, buf); | 
|  | if (h.length == 1) | 
|  | result ~= '0'; | 
|  | result ~= h; | 
|  | result ~= 'x'; | 
|  | } | 
|  |  | 
|  | if (super.msg.length > 0) | 
|  | { | 
|  | result ~= " - "; | 
|  | result ~= super.msg; | 
|  | } | 
|  |  | 
|  | return result; | 
|  | } | 
|  | } | 
|  |  | 
|  | /// | 
|  | @safe unittest | 
|  | { | 
|  | import std.exception : assertThrown; | 
|  |  | 
|  | char[4] buf; | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xD800)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xDBFF)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xDC00)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xDFFF)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0x110000)); | 
|  | } | 
|  |  | 
|  | /* | 
|  | Provide array of invalidly encoded UTF strings. Useful for testing. | 
|  |  | 
|  | Params: | 
|  | Char = char, wchar, or dchar | 
|  |  | 
|  | Returns: | 
|  | an array of invalidly encoded UTF strings | 
|  | */ | 
|  |  | 
|  | package auto invalidUTFstrings(Char)() @safe pure @nogc nothrow | 
|  | if (isSomeChar!Char) | 
|  | { | 
|  | static if (is(Char == char)) | 
|  | { | 
|  | enum x = 0xDC00;         // invalid surrogate value | 
|  | enum y = 0x110000;       // out of range | 
|  |  | 
|  | static immutable string[8] result = | 
|  | [ | 
|  | "\x80",             // not a start byte | 
|  | "\xC0",             // truncated | 
|  | "\xC0\xC0",         // invalid continuation | 
|  | "\xF0\x82\x82\xAC", // overlong | 
|  | [ | 
|  | 0xE0 | (x >> 12), | 
|  | 0x80 | ((x >> 6) & 0x3F), | 
|  | 0x80 | (x & 0x3F) | 
|  | ], | 
|  | [ | 
|  | cast(char)(0xF0 | (y >> 18)), | 
|  | cast(char)(0x80 | ((y >> 12) & 0x3F)), | 
|  | cast(char)(0x80 | ((y >> 6) & 0x3F)), | 
|  | cast(char)(0x80 | (y & 0x3F)) | 
|  | ], | 
|  | [ | 
|  | cast(char)(0xF8 | 3),     // 5 byte encoding | 
|  | cast(char)(0x80 | 3), | 
|  | cast(char)(0x80 | 3), | 
|  | cast(char)(0x80 | 3), | 
|  | cast(char)(0x80 | 3), | 
|  | ], | 
|  | [ | 
|  | cast(char)(0xFC | 3),     // 6 byte encoding | 
|  | cast(char)(0x80 | 3), | 
|  | cast(char)(0x80 | 3), | 
|  | cast(char)(0x80 | 3), | 
|  | cast(char)(0x80 | 3), | 
|  | cast(char)(0x80 | 3), | 
|  | ], | 
|  | ]; | 
|  |  | 
|  | return result[]; | 
|  | } | 
|  | else static if (is(Char == wchar)) | 
|  | { | 
|  | static immutable wstring[5] result = | 
|  | [ | 
|  | [ | 
|  | cast(wchar) 0xDC00, | 
|  | ], | 
|  | [ | 
|  | cast(wchar) 0xDFFF, | 
|  | ], | 
|  | [ | 
|  | cast(wchar) 0xDBFF, | 
|  | cast(wchar) 0xDBFF, | 
|  | ], | 
|  | [ | 
|  | cast(wchar) 0xDBFF, | 
|  | cast(wchar) 0xE000, | 
|  | ], | 
|  | [ | 
|  | cast(wchar) 0xD800, | 
|  | ], | 
|  | ]; | 
|  |  | 
|  | return result[]; | 
|  | } | 
|  | else static if (is(Char == dchar)) | 
|  | { | 
|  | static immutable dstring[3] result = | 
|  | [ | 
|  | [ cast(dchar) 0x110000 ], | 
|  | [ cast(dchar) 0x00D800 ], | 
|  | [ cast(dchar) 0x00DFFF ], | 
|  | ]; | 
|  |  | 
|  | return result; | 
|  | } | 
|  | else | 
|  | static assert(0); | 
|  | } | 
|  |  | 
|  | /++ | 
|  | Check whether the given Unicode code point is valid. | 
|  |  | 
|  | Params: | 
|  | c = code point to check | 
|  |  | 
|  | Returns: | 
|  | `true` if and only if `c` is a valid Unicode code point | 
|  |  | 
|  | Note: | 
|  | `'\uFFFE'` and `'\uFFFF'` are considered valid by `isValidDchar`, | 
|  | as they are permitted for internal use by an application, but they are | 
|  | not allowed for interchange by the Unicode standard. | 
|  | +/ | 
|  | bool isValidDchar(dchar c) pure nothrow @safe @nogc | 
|  | { | 
|  | return c < 0xD800 || (c > 0xDFFF && c <= 0x10FFFF); | 
|  | } | 
|  |  | 
|  | /// | 
|  | @safe @nogc pure nothrow unittest | 
|  | { | 
|  | assert( isValidDchar(cast(dchar) 0x41)); | 
|  | assert( isValidDchar(cast(dchar) 0x00)); | 
|  | assert(!isValidDchar(cast(dchar) 0xD800)); | 
|  | assert(!isValidDchar(cast(dchar) 0x11FFFF)); | 
|  | } | 
|  |  | 
|  | pure nothrow @safe @nogc unittest | 
|  | { | 
|  | import std.exception; | 
|  |  | 
|  | assertCTFEable!( | 
|  | { | 
|  | assert( isValidDchar(cast(dchar)'a') == true); | 
|  | assert( isValidDchar(cast(dchar) 0x1FFFFF) == false); | 
|  |  | 
|  | assert(!isValidDchar(cast(dchar) 0x00D800)); | 
|  | assert(!isValidDchar(cast(dchar) 0x00DBFF)); | 
|  | assert(!isValidDchar(cast(dchar) 0x00DC00)); | 
|  | assert(!isValidDchar(cast(dchar) 0x00DFFF)); | 
|  | assert( isValidDchar(cast(dchar) 0x00FFFE)); | 
|  | assert( isValidDchar(cast(dchar) 0x00FFFF)); | 
|  | assert( isValidDchar(cast(dchar) 0x01FFFF)); | 
|  | assert( isValidDchar(cast(dchar) 0x10FFFF)); | 
|  | assert(!isValidDchar(cast(dchar) 0x110000)); | 
|  | }); | 
|  | } | 
|  |  | 
|  | /** | 
|  | Checks if a single character forms a valid code point. | 
|  |  | 
|  | When standing alone, some characters are invalid code points. For | 
|  | example the `wchar` `0xD800` is a so called high surrogate, which can | 
|  | only be interpreted together with a low surrogate following it. As a | 
|  | standalone character it is considered invalid. | 
|  |  | 
|  | See $(LINK2 http://www.unicode.org/versions/Unicode13.0.0/, | 
|  | Unicode Standard, D90, D91 and D92) for more details. | 
|  |  | 
|  | Params: | 
|  | c = character to test | 
|  | Char = character type of `c` | 
|  |  | 
|  | Returns: | 
|  | `true`, if `c` forms a valid code point. | 
|  | */ | 
|  | bool isValidCodepoint(Char)(Char c) | 
|  | if (isSomeChar!Char) | 
|  | { | 
|  | alias UChar = Unqual!Char; | 
|  | static if (is(UChar == char)) | 
|  | { | 
|  | return c <= 0x7F; | 
|  | } | 
|  | else static if (is(UChar == wchar)) | 
|  | { | 
|  | return c <= 0xD7FF || c >= 0xE000; | 
|  | } | 
|  | else static if (is(UChar == dchar)) | 
|  | { | 
|  | return isValidDchar(c); | 
|  | } | 
|  | else | 
|  | static assert(false, "unknown character type: `" ~ Char.stringof ~ "`"); | 
|  | } | 
|  |  | 
|  | /// | 
|  | @safe pure nothrow unittest | 
|  | { | 
|  | assert( isValidCodepoint(cast(char) 0x40)); | 
|  | assert(!isValidCodepoint(cast(char) 0x80)); | 
|  | assert( isValidCodepoint(cast(wchar) 0x1234)); | 
|  | assert(!isValidCodepoint(cast(wchar) 0xD800)); | 
|  | assert( isValidCodepoint(cast(dchar) 0x0010FFFF)); | 
|  | assert(!isValidCodepoint(cast(dchar) 0x12345678)); | 
|  | } | 
|  |  | 
|  | /++ | 
|  | Calculate the length of the UTF sequence starting at `index` | 
|  | in `str`. | 
|  |  | 
|  | Params: | 
|  | str = $(REF_ALTTEXT input range, isInputRange, std,range,primitives) | 
|  | of UTF code units. Must be random access if `index` is passed | 
|  | index = starting index of UTF sequence (default: `0`) | 
|  |  | 
|  | Returns: | 
|  | The number of code units in the UTF sequence. For UTF-8, this is a | 
|  | value between 1 and 4 (as per $(HTTP tools.ietf.org/html/rfc3629#section-3, RFC 3629$(COMMA) section 3)). | 
|  | For UTF-16, it is either 1 or 2. For UTF-32, it is always 1. | 
|  |  | 
|  | Throws: | 
|  | May throw a `UTFException` if `str[index]` is not the start of a | 
|  | valid UTF sequence. | 
|  |  | 
|  | Note: | 
|  | `stride` will only analyze the first `str[index]` element. It | 
|  | will not fully verify the validity of the UTF sequence, nor even verify | 
|  | the presence of the sequence: it will not actually guarantee that | 
|  | $(D index + stride(str, index) <= str.length). | 
|  | +/ | 
|  | uint stride(S)(auto ref S str, size_t index) | 
|  | if (is(S : const char[]) || | 
|  | (isRandomAccessRange!S && is(immutable ElementType!S == immutable char))) | 
|  | { | 
|  | static if (is(typeof(str.length) : ulong)) | 
|  | assert(index < str.length, "Past the end of the UTF-8 sequence"); | 
|  | immutable c = str[index]; | 
|  |  | 
|  | if (c < 0x80) | 
|  | return 1; | 
|  | else | 
|  | return strideImpl(c, index); | 
|  | } | 
|  |  | 
|  | /// Ditto | 
|  | uint stride(S)(auto ref S str) | 
|  | if (is(S : const char[]) || | 
|  | (isInputRange!S && is(immutable ElementType!S == immutable char))) | 
|  | { | 
|  | static if (is(S : const char[])) | 
|  | immutable c = str[0]; | 
|  | else | 
|  | immutable c = str.front; | 
|  |  | 
|  | if (c < 0x80) | 
|  | return 1; | 
|  | else | 
|  | return strideImpl(c, 0); | 
|  | } | 
|  |  | 
|  | @system unittest | 
|  | { | 
|  | import core.exception : AssertError; | 
|  | import std.conv : to; | 
|  | import std.exception; | 
|  | import std.string : format; | 
|  | import std.traits : FunctionAttribute, functionAttributes, isSafe; | 
|  | static void test(string s, dchar c, size_t i = 0, size_t line = __LINE__) | 
|  | { | 
|  | enforce(stride(s, i) == codeLength!char(c), | 
|  | new AssertError(format("Unit test failure string: %s", s), __FILE__, line)); | 
|  |  | 
|  | enforce(stride(RandomCU!char(s), i) == codeLength!char(c), | 
|  | new AssertError(format("Unit test failure range: %s", s), __FILE__, line)); | 
|  |  | 
|  | auto refRandom = new RefRandomCU!char(s); | 
|  | immutable randLen = refRandom.length; | 
|  | enforce(stride(refRandom, i) == codeLength!char(c), | 
|  | new AssertError(format("Unit test failure rand ref range: %s", s), __FILE__, line)); | 
|  | enforce(refRandom.length == randLen, | 
|  | new AssertError(format("Unit test failure rand ref range length: %s", s), __FILE__, line)); | 
|  |  | 
|  | if (i == 0) | 
|  | { | 
|  | enforce(stride(s) == codeLength!char(c), | 
|  | new AssertError(format("Unit test failure string 0: %s", s), __FILE__, line)); | 
|  |  | 
|  | enforce(stride(InputCU!char(s)) == codeLength!char(c), | 
|  | new AssertError(format("Unit test failure range 0: %s", s), __FILE__, line)); | 
|  |  | 
|  | auto refBidir = new RefBidirCU!char(s); | 
|  | immutable bidirLen = refBidir.length; | 
|  | enforce(stride(refBidir) == codeLength!char(c), | 
|  | new AssertError(format("Unit test failure bidir ref range code length: %s", s), __FILE__, line)); | 
|  | enforce(refBidir.length == bidirLen, | 
|  | new AssertError(format("Unit test failure bidir ref range length: %s", s), __FILE__, line)); | 
|  | } | 
|  | } | 
|  |  | 
|  | assertCTFEable!( | 
|  | { | 
|  | test("a", 'a'); | 
|  | test(" ", ' '); | 
|  | test("\u2029", '\u2029'); //paraSep | 
|  | test("\u0100", '\u0100'); | 
|  | test("\u0430", '\u0430'); | 
|  | test("\U00010143", '\U00010143'); | 
|  | test("abcdefcdef", 'a'); | 
|  | test("hello\U00010143\u0100\U00010143", 'h', 0); | 
|  | test("hello\U00010143\u0100\U00010143", 'e', 1); | 
|  | test("hello\U00010143\u0100\U00010143", 'l', 2); | 
|  | test("hello\U00010143\u0100\U00010143", 'l', 3); | 
|  | test("hello\U00010143\u0100\U00010143", 'o', 4); | 
|  | test("hello\U00010143\u0100\U00010143", '\U00010143', 5); | 
|  | test("hello\U00010143\u0100\U00010143", '\u0100', 9); | 
|  | test("hello\U00010143\u0100\U00010143", '\U00010143', 11); | 
|  |  | 
|  | foreach (S; AliasSeq!(char[], const char[], string)) | 
|  | { | 
|  | enum str = to!S("hello world"); | 
|  | static assert(isSafe!({ stride(str, 0); })); | 
|  | static assert(isSafe!({ stride(str);    })); | 
|  | static assert((functionAttributes!({ stride(str, 0); }) & FunctionAttribute.pure_) != 0); | 
|  | static assert((functionAttributes!({ stride(str);    }) & FunctionAttribute.pure_) != 0); | 
|  | } | 
|  | }); | 
|  | } | 
|  |  | 
|  | @safe unittest // invalid start bytes | 
|  | { | 
|  | import std.exception : assertThrown; | 
|  | immutable char[] invalidStartBytes = [ | 
|  | 0b1111_1000, // indicating a sequence length of 5 | 
|  | 0b1111_1100, // 6 | 
|  | 0b1111_1110, // 7 | 
|  | 0b1111_1111, // 8 | 
|  | 0b1000_0000, // continuation byte | 
|  | ]; | 
|  | foreach (c; invalidStartBytes) | 
|  | assertThrown!UTFException(stride([c])); | 
|  | } | 
|  |  | 
|  | /// Ditto | 
|  | uint stride(S)(auto ref S str, size_t index) | 
|  | if (is(S : const wchar[]) || | 
|  | (isRandomAccessRange!S && is(immutable ElementType!S == immutable wchar))) | 
|  | { | 
|  | static if (is(typeof(str.length) : ulong)) | 
|  | assert(index < str.length, "Past the end of the UTF-16 sequence"); | 
|  | immutable uint u = str[index]; | 
|  | return 1 + (u >= 0xD800 && u <= 0xDBFF); | 
|  | } | 
|  |  | 
|  | /// Ditto | 
|  | uint stride(S)(auto ref S str) @safe pure | 
|  | if (is(S : const wchar[])) | 
|  | { | 
|  | return stride(str, 0); | 
|  | } | 
|  |  | 
|  | /// Ditto | 
|  | uint stride(S)(auto ref S str) | 
|  | if (isInputRange!S && is(immutable ElementType!S == immutable wchar) && | 
|  | !is(S : const wchar[])) | 
|  | { | 
|  | assert(!str.empty, "UTF-16 sequence is empty"); | 
|  | immutable uint u = str.front; | 
|  | return 1 + (u >= 0xD800 && u <= 0xDBFF); | 
|  | } | 
|  |  | 
|  | @system unittest | 
|  | { | 
|  | import core.exception : AssertError; | 
|  | import std.conv : to; | 
|  | import std.exception; | 
|  | import std.string : format; | 
|  | import std.traits : FunctionAttribute, functionAttributes, isSafe; | 
|  | static void test(wstring s, dchar c, size_t i = 0, size_t line = __LINE__) | 
|  | { | 
|  | enforce(stride(s, i) == codeLength!wchar(c), | 
|  | new AssertError(format("Unit test failure string: %s", s), __FILE__, line)); | 
|  |  | 
|  | enforce(stride(RandomCU!wchar(s), i) == codeLength!wchar(c), | 
|  | new AssertError(format("Unit test failure range: %s", s), __FILE__, line)); | 
|  |  | 
|  | auto refRandom = new RefRandomCU!wchar(s); | 
|  | immutable randLen = refRandom.length; | 
|  | enforce(stride(refRandom, i) == codeLength!wchar(c), | 
|  | new AssertError(format("Unit test failure rand ref range: %s", s), __FILE__, line)); | 
|  | enforce(refRandom.length == randLen, | 
|  | new AssertError(format("Unit test failure rand ref range length: %s", s), __FILE__, line)); | 
|  |  | 
|  | if (i == 0) | 
|  | { | 
|  | enforce(stride(s) == codeLength!wchar(c), | 
|  | new AssertError(format("Unit test failure string 0: %s", s), __FILE__, line)); | 
|  |  | 
|  | enforce(stride(InputCU!wchar(s)) == codeLength!wchar(c), | 
|  | new AssertError(format("Unit test failure range 0: %s", s), __FILE__, line)); | 
|  |  | 
|  | auto refBidir = new RefBidirCU!wchar(s); | 
|  | immutable bidirLen = refBidir.length; | 
|  | enforce(stride(refBidir) == codeLength!wchar(c), | 
|  | new AssertError(format("Unit test failure bidir ref range code length: %s", s), __FILE__, line)); | 
|  | enforce(refBidir.length == bidirLen, | 
|  | new AssertError(format("Unit test failure bidir ref range length: %s", s), __FILE__, line)); | 
|  | } | 
|  | } | 
|  |  | 
|  | assertCTFEable!( | 
|  | { | 
|  | test("a", 'a'); | 
|  | test(" ", ' '); | 
|  | test("\u2029", '\u2029'); //paraSep | 
|  | test("\u0100", '\u0100'); | 
|  | test("\u0430", '\u0430'); | 
|  | test("\U00010143", '\U00010143'); | 
|  | test("abcdefcdef", 'a'); | 
|  | test("hello\U00010143\u0100\U00010143", 'h', 0); | 
|  | test("hello\U00010143\u0100\U00010143", 'e', 1); | 
|  | test("hello\U00010143\u0100\U00010143", 'l', 2); | 
|  | test("hello\U00010143\u0100\U00010143", 'l', 3); | 
|  | test("hello\U00010143\u0100\U00010143", 'o', 4); | 
|  | test("hello\U00010143\u0100\U00010143", '\U00010143', 5); | 
|  | test("hello\U00010143\u0100\U00010143", '\u0100', 7); | 
|  | test("hello\U00010143\u0100\U00010143", '\U00010143', 8); | 
|  |  | 
|  | foreach (S; AliasSeq!(wchar[], const wchar[], wstring)) | 
|  | { | 
|  | enum str = to!S("hello world"); | 
|  | static assert(isSafe!(() => stride(str, 0))); | 
|  | static assert(isSafe!(() => stride(str)   )); | 
|  | static assert((functionAttributes!(() => stride(str, 0)) & FunctionAttribute.pure_) != 0); | 
|  | static assert((functionAttributes!(() => stride(str)   ) & FunctionAttribute.pure_) != 0); | 
|  | } | 
|  | }); | 
|  | } | 
|  |  | 
|  | /// Ditto | 
|  | uint stride(S)(auto ref S str, size_t index = 0) | 
|  | if (is(S : const dchar[]) || | 
|  | (isInputRange!S && is(immutable ElementEncodingType!S == immutable dchar))) | 
|  | { | 
|  | static if (is(typeof(str.length) : ulong)) | 
|  | assert(index < str.length, "Past the end of the UTF-32 sequence"); | 
|  | else | 
|  | assert(!str.empty, "UTF-32 sequence is empty."); | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | /// | 
|  | @safe unittest | 
|  | { | 
|  | assert("a".stride == 1); | 
|  | assert("λ".stride == 2); | 
|  | assert("aλ".stride == 1); | 
|  | assert("aλ".stride(1) == 2); | 
|  | assert("𐐷".stride == 4); | 
|  | } | 
|  |  | 
|  | @system unittest | 
|  | { | 
|  | import core.exception : AssertError; | 
|  | import std.conv : to; | 
|  | import std.exception; | 
|  | import std.string : format; | 
|  | import std.traits : FunctionAttribute, functionAttributes, isSafe; | 
|  | static void test(dstring s, dchar c, size_t i = 0, size_t line = __LINE__) | 
|  | { | 
|  | enforce(stride(s, i) == codeLength!dchar(c), | 
|  | new AssertError(format("Unit test failure string: %s", s), __FILE__, line)); | 
|  |  | 
|  | enforce(stride(RandomCU!dchar(s), i) == codeLength!dchar(c), | 
|  | new AssertError(format("Unit test failure range: %s", s), __FILE__, line)); | 
|  |  | 
|  | auto refRandom = new RefRandomCU!dchar(s); | 
|  | immutable randLen = refRandom.length; | 
|  | enforce(stride(refRandom, i) == codeLength!dchar(c), | 
|  | new AssertError(format("Unit test failure rand ref range: %s", s), __FILE__, line)); | 
|  | enforce(refRandom.length == randLen, | 
|  | new AssertError(format("Unit test failure rand ref range length: %s", s), __FILE__, line)); | 
|  |  | 
|  | if (i == 0) | 
|  | { | 
|  | enforce(stride(s) == codeLength!dchar(c), | 
|  | new AssertError(format("Unit test failure string 0: %s", s), __FILE__, line)); | 
|  |  | 
|  | enforce(stride(InputCU!dchar(s)) == codeLength!dchar(c), | 
|  | new AssertError(format("Unit test failure range 0: %s", s), __FILE__, line)); | 
|  |  | 
|  | auto refBidir = new RefBidirCU!dchar(s); | 
|  | immutable bidirLen = refBidir.length; | 
|  | enforce(stride(refBidir) == codeLength!dchar(c), | 
|  | new AssertError(format("Unit test failure bidir ref range code length: %s", s), __FILE__, line)); | 
|  | enforce(refBidir.length == bidirLen, | 
|  | new AssertError(format("Unit test failure bidir ref range length: %s", s), __FILE__, line)); | 
|  | } | 
|  | } | 
|  |  | 
|  | assertCTFEable!( | 
|  | { | 
|  | test("a", 'a'); | 
|  | test(" ", ' '); | 
|  | test("\u2029", '\u2029'); //paraSep | 
|  | test("\u0100", '\u0100'); | 
|  | test("\u0430", '\u0430'); | 
|  | test("\U00010143", '\U00010143'); | 
|  | test("abcdefcdef", 'a'); | 
|  | test("hello\U00010143\u0100\U00010143", 'h', 0); | 
|  | test("hello\U00010143\u0100\U00010143", 'e', 1); | 
|  | test("hello\U00010143\u0100\U00010143", 'l', 2); | 
|  | test("hello\U00010143\u0100\U00010143", 'l', 3); | 
|  | test("hello\U00010143\u0100\U00010143", 'o', 4); | 
|  | test("hello\U00010143\u0100\U00010143", '\U00010143', 5); | 
|  | test("hello\U00010143\u0100\U00010143", '\u0100', 6); | 
|  | test("hello\U00010143\u0100\U00010143", '\U00010143', 7); | 
|  |  | 
|  | foreach (S; AliasSeq!(dchar[], const dchar[], dstring)) | 
|  | { | 
|  | enum str = to!S("hello world"); | 
|  | static assert(isSafe!(() => stride(str, 0))); | 
|  | static assert(isSafe!(() => stride(str)   )); | 
|  | static assert((functionAttributes!(() => stride(str, 0)) & FunctionAttribute.pure_) != 0); | 
|  | static assert((functionAttributes!(() => stride(str)   ) & FunctionAttribute.pure_) != 0); | 
|  | } | 
|  | }); | 
|  | } | 
|  |  | 
|  | private uint strideImpl(char c, size_t index) @trusted pure | 
|  | in { assert(c & 0x80); } | 
|  | do | 
|  | { | 
|  | import core.bitop : bsr; | 
|  | immutable msbs = 7 - bsr((~uint(c)) & 0xFF); | 
|  | if (c == 0xFF || msbs < 2 || msbs > 4) | 
|  | throw new UTFException("Invalid UTF-8 sequence", index); | 
|  | return msbs; | 
|  | } | 
|  |  | 
|  | /++ | 
|  | Calculate the length of the UTF sequence ending one code unit before | 
|  | `index` in `str`. | 
|  |  | 
|  | Params: | 
|  | str = bidirectional range of UTF code units. Must be random access if | 
|  | `index` is passed | 
|  | index = index one past end of UTF sequence (default: `str.length`) | 
|  |  | 
|  | Returns: | 
|  | The number of code units in the UTF sequence. For UTF-8, this is a | 
|  | value between 1 and 4 (as per $(HTTP tools.ietf.org/html/rfc3629#section-3, RFC 3629$(COMMA) section 3)). | 
|  | For UTF-16, it is either 1 or 2. For UTF-32, it is always 1. | 
|  |  | 
|  | Throws: | 
|  | May throw a `UTFException` if `str[index]` is not one past the | 
|  | end of a valid UTF sequence. | 
|  |  | 
|  | Note: | 
|  | `strideBack` will only analyze the element at $(D str[index - 1]) | 
|  | element. It will not fully verify the validity of the UTF sequence, nor | 
|  | even verify the presence of the sequence: it will not actually | 
|  | guarantee that $(D strideBack(str, index) <= index). | 
|  | +/ | 
|  | uint strideBack(S)(auto ref S str, size_t index) | 
|  | if (is(S : const char[]) || | 
|  | (isRandomAccessRange!S && is(immutable ElementType!S == immutable char))) | 
|  | { | 
|  | static if (is(typeof(str.length) : ulong)) | 
|  | assert(index <= str.length, "Past the end of the UTF-8 sequence"); | 
|  | assert(index > 0, "Not the end of the UTF-8 sequence"); | 
|  |  | 
|  | if ((str[index-1] & 0b1100_0000) != 0b1000_0000) | 
|  | return 1; | 
|  |  | 
|  | if (index >= 4) //single verification for most common case | 
|  | { | 
|  | static foreach (i; 2 .. 5) | 
|  | { | 
|  | if ((str[index-i] & 0b1100_0000) != 0b1000_0000) | 
|  | return i; | 
|  | } | 
|  | } | 
|  | else | 
|  | { | 
|  | static foreach (i; 2 .. 4) | 
|  | { | 
|  | if (index >= i && (str[index-i] & 0b1100_0000) != 0b1000_0000) | 
|  | return i; | 
|  | } | 
|  | } | 
|  | throw new UTFException("Not the end of the UTF sequence", index); | 
|  | } | 
|  |  | 
|  | /// Ditto | 
|  | uint strideBack(S)(auto ref S str) | 
|  | if (is(S : const char[]) || | 
|  | (isRandomAccessRange!S && hasLength!S && is(immutable ElementType!S == immutable char))) | 
|  | { | 
|  | return strideBack(str, str.length); | 
|  | } | 
|  |  | 
|  | /// Ditto | 
|  | uint strideBack(S)(auto ref S str) | 
|  | if (isBidirectionalRange!S && is(immutable ElementType!S == immutable char) && !isRandomAccessRange!S) | 
|  | { | 
|  | assert(!str.empty, "Past the end of the UTF-8 sequence"); | 
|  | auto temp = str.save; | 
|  | foreach (i; AliasSeq!(1, 2, 3, 4)) | 
|  | { | 
|  | if ((temp.back & 0b1100_0000) != 0b1000_0000) | 
|  | return i; | 
|  | temp.popBack(); | 
|  | if (temp.empty) | 
|  | break; | 
|  | } | 
|  | throw new UTFException("The last code unit is not the end of the UTF-8 sequence"); | 
|  | } | 
|  |  | 
|  | @system unittest | 
|  | { | 
|  | import core.exception : AssertError; | 
|  | import std.conv : to; | 
|  | import std.exception; | 
|  | import std.string : format; | 
|  | import std.traits : FunctionAttribute, functionAttributes, isSafe; | 
|  | static void test(string s, dchar c, size_t i = size_t.max, size_t line = __LINE__) | 
|  | { | 
|  | enforce(strideBack(s, i == size_t.max ? s.length : i) == codeLength!char(c), | 
|  | new AssertError(format("Unit test failure string: %s", s), __FILE__, line)); | 
|  |  | 
|  | enforce(strideBack(RandomCU!char(s), i == size_t.max ? s.length : i) == codeLength!char(c), | 
|  | new AssertError(format("Unit test failure range: %s", s), __FILE__, line)); | 
|  |  | 
|  | auto refRandom = new RefRandomCU!char(s); | 
|  | immutable randLen = refRandom.length; | 
|  | enforce(strideBack(refRandom, i == size_t.max ? s.length : i) == codeLength!char(c), | 
|  | new AssertError(format("Unit test failure rand ref range: %s", s), __FILE__, line)); | 
|  | enforce(refRandom.length == randLen, | 
|  | new AssertError(format("Unit test failure rand ref range length: %s", s), __FILE__, line)); | 
|  |  | 
|  | if (i == size_t.max) | 
|  | { | 
|  | enforce(strideBack(s) == codeLength!char(c), | 
|  | new AssertError(format("Unit test failure string code length: %s", s), __FILE__, line)); | 
|  |  | 
|  | enforce(strideBack(BidirCU!char(s)) == codeLength!char(c), | 
|  | new AssertError(format("Unit test failure range code length: %s", s), __FILE__, line)); | 
|  |  | 
|  | auto refBidir = new RefBidirCU!char(s); | 
|  | immutable bidirLen = refBidir.length; | 
|  | enforce(strideBack(refBidir) == codeLength!char(c), | 
|  | new AssertError(format("Unit test failure bidir ref range code length: %s", s), __FILE__, line)); | 
|  | enforce(refBidir.length == bidirLen, | 
|  | new AssertError(format("Unit test failure bidir ref range length: %s", s), __FILE__, line)); | 
|  | } | 
|  | } | 
|  |  | 
|  | assertCTFEable!( | 
|  | { | 
|  | test("a", 'a'); | 
|  | test(" ", ' '); | 
|  | test("\u2029", '\u2029'); //paraSep | 
|  | test("\u0100", '\u0100'); | 
|  | test("\u0430", '\u0430'); | 
|  | test("\U00010143", '\U00010143'); | 
|  | test("abcdefcdef", 'f'); | 
|  | test("\U00010143\u0100\U00010143hello", 'o', 15); | 
|  | test("\U00010143\u0100\U00010143hello", 'l', 14); | 
|  | test("\U00010143\u0100\U00010143hello", 'l', 13); | 
|  | test("\U00010143\u0100\U00010143hello", 'e', 12); | 
|  | test("\U00010143\u0100\U00010143hello", 'h', 11); | 
|  | test("\U00010143\u0100\U00010143hello", '\U00010143', 10); | 
|  | test("\U00010143\u0100\U00010143hello", '\u0100', 6); | 
|  | test("\U00010143\u0100\U00010143hello", '\U00010143', 4); | 
|  |  | 
|  | foreach (S; AliasSeq!(char[], const char[], string)) | 
|  | { | 
|  | enum str = to!S("hello world"); | 
|  | static assert(isSafe!({ strideBack(str, 0); })); | 
|  | static assert(isSafe!({ strideBack(str);    })); | 
|  | static assert((functionAttributes!({ strideBack(str, 0); }) & FunctionAttribute.pure_) != 0); | 
|  | static assert((functionAttributes!({ strideBack(str);    }) & FunctionAttribute.pure_) != 0); | 
|  | } | 
|  | }); | 
|  | } | 
|  |  | 
|  | //UTF-16 is self synchronizing: The length of strideBack can be found from | 
|  | //the value of a single wchar | 
|  | /// Ditto | 
|  | uint strideBack(S)(auto ref S str, size_t index) | 
|  | if (is(S : const wchar[]) || | 
|  | (isRandomAccessRange!S && is(immutable ElementType!S == immutable wchar))) | 
|  | { | 
|  | static if (is(typeof(str.length) : ulong)) | 
|  | assert(index <= str.length, "Past the end of the UTF-16 sequence"); | 
|  | assert(index > 0, "Not the end of a UTF-16 sequence"); | 
|  |  | 
|  | immutable c2 = str[index-1]; | 
|  | return 1 + (0xDC00 <= c2 && c2 < 0xE000); | 
|  | } | 
|  |  | 
|  | /// Ditto | 
|  | uint strideBack(S)(auto ref S str) | 
|  | if (is(S : const wchar[]) || | 
|  | (isBidirectionalRange!S && is(immutable ElementType!S == immutable wchar))) | 
|  | { | 
|  | assert(!str.empty, "UTF-16 sequence is empty"); | 
|  |  | 
|  | static if (is(S : const(wchar)[])) | 
|  | immutable c2 = str[$ - 1]; | 
|  | else | 
|  | immutable c2 = str.back; | 
|  |  | 
|  | return 1 + (0xDC00 <= c2 && c2 <= 0xE000); | 
|  | } | 
|  |  | 
|  | @system unittest | 
|  | { | 
|  | import core.exception : AssertError; | 
|  | import std.conv : to; | 
|  | import std.exception; | 
|  | import std.string : format; | 
|  | import std.traits : FunctionAttribute, functionAttributes, isSafe; | 
|  | static void test(wstring s, dchar c, size_t i = size_t.max, size_t line = __LINE__) | 
|  | { | 
|  | enforce(strideBack(s, i == size_t.max ? s.length : i) == codeLength!wchar(c), | 
|  | new AssertError(format("Unit test failure string: %s", s), __FILE__, line)); | 
|  |  | 
|  | enforce(strideBack(RandomCU!wchar(s), i == size_t.max ? s.length : i) == codeLength!wchar(c), | 
|  | new AssertError(format("Unit test failure range: %s", s), __FILE__, line)); | 
|  |  | 
|  | auto refRandom = new RefRandomCU!wchar(s); | 
|  | immutable randLen = refRandom.length; | 
|  | enforce(strideBack(refRandom, i == size_t.max ? s.length : i) == codeLength!wchar(c), | 
|  | new AssertError(format("Unit test failure rand ref range: %s", s), __FILE__, line)); | 
|  | enforce(refRandom.length == randLen, | 
|  | new AssertError(format("Unit test failure rand ref range length: %s", s), __FILE__, line)); | 
|  |  | 
|  | if (i == size_t.max) | 
|  | { | 
|  | enforce(strideBack(s) == codeLength!wchar(c), | 
|  | new AssertError(format("Unit test failure string code length: %s", s), __FILE__, line)); | 
|  |  | 
|  | enforce(strideBack(BidirCU!wchar(s)) == codeLength!wchar(c), | 
|  | new AssertError(format("Unit test failure range code length: %s", s), __FILE__, line)); | 
|  |  | 
|  | auto refBidir = new RefBidirCU!wchar(s); | 
|  | immutable bidirLen = refBidir.length; | 
|  | enforce(strideBack(refBidir) == codeLength!wchar(c), | 
|  | new AssertError(format("Unit test failure bidir ref range code length: %s", s), __FILE__, line)); | 
|  | enforce(refBidir.length == bidirLen, | 
|  | new AssertError(format("Unit test failure bidir ref range length: %s", s), __FILE__, line)); | 
|  | } | 
|  | } | 
|  |  | 
|  | assertCTFEable!( | 
|  | { | 
|  | test("a", 'a'); | 
|  | test(" ", ' '); | 
|  | test("\u2029", '\u2029'); //paraSep | 
|  | test("\u0100", '\u0100'); | 
|  | test("\u0430", '\u0430'); | 
|  | test("\U00010143", '\U00010143'); | 
|  | test("abcdefcdef", 'f'); | 
|  | test("\U00010143\u0100\U00010143hello", 'o', 10); | 
|  | test("\U00010143\u0100\U00010143hello", 'l', 9); | 
|  | test("\U00010143\u0100\U00010143hello", 'l', 8); | 
|  | test("\U00010143\u0100\U00010143hello", 'e', 7); | 
|  | test("\U00010143\u0100\U00010143hello", 'h', 6); | 
|  | test("\U00010143\u0100\U00010143hello", '\U00010143', 5); | 
|  | test("\U00010143\u0100\U00010143hello", '\u0100', 3); | 
|  | test("\U00010143\u0100\U00010143hello", '\U00010143', 2); | 
|  |  | 
|  | foreach (S; AliasSeq!(wchar[], const wchar[], wstring)) | 
|  | { | 
|  | enum str = to!S("hello world"); | 
|  | static assert(isSafe!(() => strideBack(str, 0))); | 
|  | static assert(isSafe!(() => strideBack(str)   )); | 
|  | static assert((functionAttributes!(() => strideBack(str, 0)) & FunctionAttribute.pure_) != 0); | 
|  | static assert((functionAttributes!(() => strideBack(str)   ) & FunctionAttribute.pure_) != 0); | 
|  | } | 
|  | }); | 
|  | } | 
|  |  | 
|  | /// Ditto | 
|  | uint strideBack(S)(auto ref S str, size_t index) | 
|  | if (isRandomAccessRange!S && is(immutable ElementEncodingType!S == immutable dchar)) | 
|  | { | 
|  | static if (is(typeof(str.length) : ulong)) | 
|  | assert(index <= str.length, "Past the end of the UTF-32 sequence"); | 
|  | assert(index > 0, "Not the end of the UTF-32 sequence"); | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | /// Ditto | 
|  | uint strideBack(S)(auto ref S str) | 
|  | if (isBidirectionalRange!S && is(immutable ElementEncodingType!S == immutable dchar)) | 
|  | { | 
|  | assert(!str.empty, "Empty UTF-32 sequence"); | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | /// | 
|  | @safe unittest | 
|  | { | 
|  | assert("a".strideBack == 1); | 
|  | assert("λ".strideBack == 2); | 
|  | assert("aλ".strideBack == 2); | 
|  | assert("aλ".strideBack(1) == 1); | 
|  | assert("𐐷".strideBack == 4); | 
|  | } | 
|  |  | 
|  | @system unittest | 
|  | { | 
|  | import core.exception : AssertError; | 
|  | import std.conv : to; | 
|  | import std.exception; | 
|  | import std.string : format; | 
|  | import std.traits : FunctionAttribute, functionAttributes, isSafe; | 
|  | static void test(dstring s, dchar c, size_t i = size_t.max, size_t line = __LINE__) | 
|  | { | 
|  | enforce(strideBack(s, i == size_t.max ? s.length : i) == codeLength!dchar(c), | 
|  | new AssertError(format("Unit test failure string: %s", s), __FILE__, line)); | 
|  |  | 
|  | enforce(strideBack(RandomCU!dchar(s), i == size_t.max ? s.length : i) == codeLength!dchar(c), | 
|  | new AssertError(format("Unit test failure range: %s", s), __FILE__, line)); | 
|  |  | 
|  | auto refRandom = new RefRandomCU!dchar(s); | 
|  | immutable randLen = refRandom.length; | 
|  | enforce(strideBack(refRandom, i == size_t.max ? s.length : i) == codeLength!dchar(c), | 
|  | new AssertError(format("Unit test failure rand ref range: %s", s), __FILE__, line)); | 
|  | enforce(refRandom.length == randLen, | 
|  | new AssertError(format("Unit test failure rand ref range length: %s", s), __FILE__, line)); | 
|  |  | 
|  | if (i == size_t.max) | 
|  | { | 
|  | enforce(strideBack(s) == codeLength!dchar(c), | 
|  | new AssertError(format("Unit test failure string code length: %s", s), __FILE__, line)); | 
|  |  | 
|  | enforce(strideBack(BidirCU!dchar(s)) == codeLength!dchar(c), | 
|  | new AssertError(format("Unit test failure range code length: %s", s), __FILE__, line)); | 
|  |  | 
|  | auto refBidir = new RefBidirCU!dchar(s); | 
|  | immutable bidirLen = refBidir.length; | 
|  | enforce(strideBack(refBidir) == codeLength!dchar(c), | 
|  | new AssertError(format("Unit test failure bidir ref range code length: %s", s), __FILE__, line)); | 
|  | enforce(refBidir.length == bidirLen, | 
|  | new AssertError(format("Unit test failure bidir ref range length: %s", s), __FILE__, line)); | 
|  | } | 
|  | } | 
|  |  | 
|  | assertCTFEable!( | 
|  | { | 
|  | test("a", 'a'); | 
|  | test(" ", ' '); | 
|  | test("\u2029", '\u2029'); //paraSep | 
|  | test("\u0100", '\u0100'); | 
|  | test("\u0430", '\u0430'); | 
|  | test("\U00010143", '\U00010143'); | 
|  | test("abcdefcdef", 'f'); | 
|  | test("\U00010143\u0100\U00010143hello", 'o', 8); | 
|  | test("\U00010143\u0100\U00010143hello", 'l', 7); | 
|  | test("\U00010143\u0100\U00010143hello", 'l', 6); | 
|  | test("\U00010143\u0100\U00010143hello", 'e', 5); | 
|  | test("\U00010143\u0100\U00010143hello", 'h', 4); | 
|  | test("\U00010143\u0100\U00010143hello", '\U00010143', 3); | 
|  | test("\U00010143\u0100\U00010143hello", '\u0100', 2); | 
|  | test("\U00010143\u0100\U00010143hello", '\U00010143', 1); | 
|  |  | 
|  | foreach (S; AliasSeq!(dchar[], const dchar[], dstring)) | 
|  | { | 
|  | enum str = to!S("hello world"); | 
|  | static assert(isSafe!(() => strideBack(str, 0))); | 
|  | static assert(isSafe!(() => strideBack(str)   )); | 
|  | static assert((functionAttributes!(() => strideBack(str, 0)) & FunctionAttribute.pure_) != 0); | 
|  | static assert((functionAttributes!(() => strideBack(str)   ) & FunctionAttribute.pure_) != 0); | 
|  | } | 
|  | }); | 
|  | } | 
|  |  | 
|  |  | 
|  | /++ | 
|  | Given `index` into `str` and assuming that `index` is at the start | 
|  | of a UTF sequence, `toUCSindex` determines the number of UCS characters | 
|  | up to `index`. So, `index` is the index of a code unit at the | 
|  | beginning of a code point, and the return value is how many code points into | 
|  | the string that that code point is. | 
|  | +/ | 
|  | size_t toUCSindex(C)(const(C)[] str, size_t index) @safe pure | 
|  | if (isSomeChar!C) | 
|  | { | 
|  | static if (is(immutable C == immutable dchar)) | 
|  | return index; | 
|  | else | 
|  | { | 
|  | size_t n = 0; | 
|  | size_t j = 0; | 
|  |  | 
|  | for (; j < index; ++n) | 
|  | j += stride(str, j); | 
|  |  | 
|  | if (j > index) | 
|  | { | 
|  | static if (is(immutable C == immutable char)) | 
|  | throw new UTFException("Invalid UTF-8 sequence", index); | 
|  | else | 
|  | throw new UTFException("Invalid UTF-16 sequence", index); | 
|  | } | 
|  |  | 
|  | return n; | 
|  | } | 
|  | } | 
|  |  | 
|  | /// | 
|  | @safe unittest | 
|  | { | 
|  | assert(toUCSindex(`hello world`, 7) == 7); | 
|  | assert(toUCSindex(`hello world`w, 7) == 7); | 
|  | assert(toUCSindex(`hello world`d, 7) == 7); | 
|  |  | 
|  | assert(toUCSindex(`Ma Chérie`, 7) == 6); | 
|  | assert(toUCSindex(`Ma Chérie`w, 7) == 7); | 
|  | assert(toUCSindex(`Ma Chérie`d, 7) == 7); | 
|  |  | 
|  | assert(toUCSindex(`さいごの果実 / ミツバチと科学者`, 9) == 3); | 
|  | assert(toUCSindex(`さいごの果実 / ミツバチと科学者`w, 9) == 9); | 
|  | assert(toUCSindex(`さいごの果実 / ミツバチと科学者`d, 9) == 9); | 
|  | } | 
|  |  | 
|  |  | 
|  | /++ | 
|  | Given a UCS index `n` into `str`, returns the UTF index. | 
|  | So, `n` is how many code points into the string the code point is, and | 
|  | the array index of the code unit is returned. | 
|  | +/ | 
|  | size_t toUTFindex(C)(const(C)[] str, size_t n) @safe pure | 
|  | if (isSomeChar!C) | 
|  | { | 
|  | static if (is(immutable C == immutable dchar)) | 
|  | { | 
|  | return n; | 
|  | } | 
|  | else | 
|  | { | 
|  | size_t i; | 
|  | while (n--) | 
|  | { | 
|  | i += stride(str, i); | 
|  | } | 
|  | return i; | 
|  | } | 
|  | } | 
|  |  | 
|  | /// | 
|  | @safe unittest | 
|  | { | 
|  | assert(toUTFindex(`hello world`, 7) == 7); | 
|  | assert(toUTFindex(`hello world`w, 7) == 7); | 
|  | assert(toUTFindex(`hello world`d, 7) == 7); | 
|  |  | 
|  | assert(toUTFindex(`Ma Chérie`, 6) == 7); | 
|  | assert(toUTFindex(`Ma Chérie`w, 7) == 7); | 
|  | assert(toUTFindex(`Ma Chérie`d, 7) == 7); | 
|  |  | 
|  | assert(toUTFindex(`さいごの果実 / ミツバチと科学者`, 3) == 9); | 
|  | assert(toUTFindex(`さいごの果実 / ミツバチと科学者`w, 9) == 9); | 
|  | assert(toUTFindex(`さいごの果実 / ミツバチと科学者`d, 9) == 9); | 
|  | } | 
|  |  | 
|  |  | 
|  | /* =================== Decode ======================= */ | 
|  |  | 
|  | /// Whether or not to replace invalid UTF with $(LREF replacementDchar) | 
|  | alias UseReplacementDchar = Flag!"useReplacementDchar"; | 
|  |  | 
|  | /++ | 
|  | Decodes and returns the code point starting at `str[index]`. `index` | 
|  | is advanced to one past the decoded code point. If the code point is not | 
|  | well-formed, then a `UTFException` is thrown and `index` remains | 
|  | unchanged. | 
|  |  | 
|  | decode will only work with strings and random access ranges of code units | 
|  | with length and slicing, whereas $(LREF decodeFront) will work with any | 
|  | input range of code units. | 
|  |  | 
|  | Params: | 
|  | useReplacementDchar = if invalid UTF, return replacementDchar rather than throwing | 
|  | str = input string or indexable Range | 
|  | index = starting index into s[]; incremented by number of code units processed | 
|  |  | 
|  | Returns: | 
|  | decoded character | 
|  |  | 
|  | Throws: | 
|  | $(LREF UTFException) if `str[index]` is not the start of a valid UTF | 
|  | sequence and useReplacementDchar is `No.useReplacementDchar` | 
|  | +/ | 
|  | dchar decode(UseReplacementDchar useReplacementDchar = No.useReplacementDchar, S)(auto ref S str, ref size_t index) | 
|  | if (!isSomeString!S && | 
|  | isRandomAccessRange!S && hasSlicing!S && hasLength!S && isSomeChar!(ElementType!S)) | 
|  | in | 
|  | { | 
|  | assert(index < str.length, "Attempted to decode past the end of a string"); | 
|  | } | 
|  | out (result) | 
|  | { | 
|  | assert(isValidDchar(result)); | 
|  | } | 
|  | do | 
|  | { | 
|  | if (str[index] < codeUnitLimit!S) | 
|  | return str[index++]; | 
|  | else | 
|  | return decodeImpl!(true, useReplacementDchar)(str, index); | 
|  | } | 
|  |  | 
|  | /// ditto | 
|  | dchar decode(UseReplacementDchar useReplacementDchar = No.useReplacementDchar, S)( | 
|  | auto ref scope S str, ref size_t index) @trusted pure | 
|  | if (isSomeString!S) | 
|  | in | 
|  | { | 
|  | assert(index < str.length, "Attempted to decode past the end of a string"); | 
|  | } | 
|  | out (result) | 
|  | { | 
|  | assert(isValidDchar(result)); | 
|  | } | 
|  | do | 
|  | { | 
|  | if (str[index] < codeUnitLimit!S) | 
|  | return str[index++]; | 
|  | else static if (is(immutable S == immutable C[], C)) | 
|  | return decodeImpl!(true, useReplacementDchar)(cast(const(C)[]) str, index); | 
|  | } | 
|  |  | 
|  | /// | 
|  | @safe pure unittest | 
|  | { | 
|  | size_t i; | 
|  |  | 
|  | assert("a".decode(i) == 'a' && i == 1); | 
|  | i = 0; | 
|  | assert("å".decode(i) == 'å' && i == 2); | 
|  | i = 1; | 
|  | assert("aå".decode(i) == 'å' && i == 3); | 
|  | i = 0; | 
|  | assert("å"w.decode(i) == 'å' && i == 1); | 
|  |  | 
|  | // ë as a multi-code point grapheme | 
|  | i = 0; | 
|  | assert("e\u0308".decode(i) == 'e' && i == 1); | 
|  | // ë as a single code point grapheme | 
|  | i = 0; | 
|  | assert("ë".decode(i) == 'ë' && i == 2); | 
|  | i = 0; | 
|  | assert("ë"w.decode(i) == 'ë' && i == 1); | 
|  | } | 
|  |  | 
|  | @safe pure unittest // https://issues.dlang.org/show_bug.cgi?id=22867 | 
|  | { | 
|  | import std.conv : hexString; | 
|  | string data = hexString!"f787a598"; | 
|  | size_t offset = 0; | 
|  | try data.decode(offset); | 
|  | catch (UTFException ex) assert(offset == 0); | 
|  | } | 
|  |  | 
|  | /++ | 
|  | `decodeFront` is a variant of $(LREF decode) which specifically decodes | 
|  | the first code point. Unlike $(LREF decode), `decodeFront` accepts any | 
|  | $(REF_ALTTEXT input range, isInputRange, std,range,primitives) | 
|  | of code units (rather than just a string or random access | 
|  | range). It also takes the range by `ref` and pops off the elements as it | 
|  | decodes them. If `numCodeUnits` is passed in, it gets set to the number | 
|  | of code units which were in the code point which was decoded. | 
|  |  | 
|  | Params: | 
|  | useReplacementDchar = if invalid UTF, return replacementDchar rather than throwing | 
|  | str = input string or indexable Range | 
|  | numCodeUnits = set to number of code units processed | 
|  |  | 
|  | Returns: | 
|  | decoded character | 
|  |  | 
|  | Throws: | 
|  | $(LREF UTFException) if `str.front` is not the start of a valid UTF | 
|  | sequence. If an exception is thrown, then there is no guarantee as to | 
|  | the number of code units which were popped off, as it depends on the | 
|  | type of range being used and how many code units had to be popped off | 
|  | before the code point was determined to be invalid. | 
|  | +/ | 
|  | dchar decodeFront(UseReplacementDchar useReplacementDchar = No.useReplacementDchar, S)( | 
|  | ref S str, out size_t numCodeUnits) | 
|  | if (!isSomeString!S && isInputRange!S && isSomeChar!(ElementType!S)) | 
|  | in | 
|  | { | 
|  | assert(!str.empty); | 
|  | } | 
|  | out (result) | 
|  | { | 
|  | assert(isValidDchar(result)); | 
|  | } | 
|  | do | 
|  | { | 
|  | immutable fst = str.front; | 
|  |  | 
|  | if (fst < codeUnitLimit!S) | 
|  | { | 
|  | str.popFront(); | 
|  | numCodeUnits = 1; | 
|  | return fst; | 
|  | } | 
|  | else | 
|  | { | 
|  | // https://issues.dlang.org/show_bug.cgi?id=14447 forces canIndex to be | 
|  | // done outside of decodeImpl, which is undesirable, since not all | 
|  | // overloads of decodeImpl need it. So, it should be moved back into | 
|  | // decodeImpl once https://issues.dlang.org/show_bug.cgi?id=8521 | 
|  | // has been fixed. | 
|  | enum canIndex = is(S : const char[]) || isRandomAccessRange!S && hasSlicing!S && hasLength!S; | 
|  | immutable retval = decodeImpl!(canIndex, useReplacementDchar)(str, numCodeUnits); | 
|  |  | 
|  | // The other range types were already popped by decodeImpl. | 
|  | static if (isRandomAccessRange!S && hasSlicing!S && hasLength!S) | 
|  | str = str[numCodeUnits .. str.length]; | 
|  |  | 
|  | return retval; | 
|  | } | 
|  | } | 
|  |  | 
|  | /// ditto | 
|  | dchar decodeFront(UseReplacementDchar useReplacementDchar = No.useReplacementDchar, S)( | 
|  | ref scope S str, out size_t numCodeUnits) @trusted pure | 
|  | if (isSomeString!S) | 
|  | in | 
|  | { | 
|  | assert(!str.empty); | 
|  | } | 
|  | out (result) | 
|  | { | 
|  | assert(isValidDchar(result)); | 
|  | } | 
|  | do | 
|  | { | 
|  | if (str[0] < codeUnitLimit!S) | 
|  | { | 
|  | numCodeUnits = 1; | 
|  | immutable retval = str[0]; | 
|  | str = str[1 .. $]; | 
|  | return retval; | 
|  | } | 
|  | else static if (is(immutable S == immutable C[], C)) | 
|  | { | 
|  | immutable retval = decodeImpl!(true, useReplacementDchar)(cast(const(C)[]) str, numCodeUnits); | 
|  | str = str[numCodeUnits .. $]; | 
|  | return retval; | 
|  | } | 
|  | } | 
|  |  | 
|  | /++ Ditto +/ | 
|  | dchar decodeFront(UseReplacementDchar useReplacementDchar = No.useReplacementDchar, S)(ref S str) | 
|  | if (isInputRange!S && isSomeChar!(ElementType!S)) | 
|  | { | 
|  | size_t numCodeUnits; | 
|  | return decodeFront!useReplacementDchar(str, numCodeUnits); | 
|  | } | 
|  |  | 
|  | /// | 
|  | @safe pure unittest | 
|  | { | 
|  | import std.range.primitives; | 
|  | string str = "Hello, World!"; | 
|  |  | 
|  | assert(str.decodeFront == 'H' && str == "ello, World!"); | 
|  | str = "å"; | 
|  | assert(str.decodeFront == 'å' && str.empty); | 
|  | str = "å"; | 
|  | size_t i; | 
|  | assert(str.decodeFront(i) == 'å' && i == 2 && str.empty); | 
|  | } | 
|  |  | 
|  | /++ | 
|  | `decodeBack` is a variant of $(LREF decode) which specifically decodes | 
|  | the last code point. Unlike $(LREF decode), `decodeBack` accepts any | 
|  | bidirectional range of code units (rather than just a string or random access | 
|  | range). It also takes the range by `ref` and pops off the elements as it | 
|  | decodes them. If `numCodeUnits` is passed in, it gets set to the number | 
|  | of code units which were in the code point which was decoded. | 
|  |  | 
|  | Params: | 
|  | useReplacementDchar = if invalid UTF, return `replacementDchar` rather than throwing | 
|  | str = input string or bidirectional Range | 
|  | numCodeUnits = gives the number of code units processed | 
|  |  | 
|  | Returns: | 
|  | A decoded UTF character. | 
|  |  | 
|  | Throws: | 
|  | $(LREF UTFException) if `str.back` is not the end of a valid UTF | 
|  | sequence. If an exception is thrown, the `str` itself remains unchanged, | 
|  | but there is no guarantee as to the value of `numCodeUnits` (when passed). | 
|  | +/ | 
|  | dchar decodeBack(UseReplacementDchar useReplacementDchar = No.useReplacementDchar, S)( | 
|  | ref S str, out size_t numCodeUnits) | 
|  | if (isSomeString!S) | 
|  | in | 
|  | { | 
|  | assert(!str.empty); | 
|  | } | 
|  | out (result) | 
|  | { | 
|  | assert(isValidDchar(result)); | 
|  | } | 
|  | do | 
|  | { | 
|  | if (str[$ - 1] < codeUnitLimit!S) | 
|  | { | 
|  | numCodeUnits = 1; | 
|  | immutable retval = str[$ - 1]; | 
|  | str = str[0 .. $ - 1]; | 
|  | return retval; | 
|  | } | 
|  | else static if (is(immutable S == immutable C[], C)) | 
|  | { | 
|  | numCodeUnits = strideBack(str); | 
|  | immutable newLength = str.length - numCodeUnits; | 
|  | size_t index = newLength; | 
|  | immutable retval = decodeImpl!(true, useReplacementDchar)(cast(const(C)[]) str, index); | 
|  | str = str[0 .. newLength]; | 
|  | return retval; | 
|  | } | 
|  | } | 
|  |  | 
|  | /++ Ditto +/ | 
|  | dchar decodeBack(UseReplacementDchar useReplacementDchar = No.useReplacementDchar, S)( | 
|  | ref S str, out size_t numCodeUnits) | 
|  | if (!isSomeString!S && isSomeChar!(ElementType!S) && isBidirectionalRange!S | 
|  | && ((isRandomAccessRange!S && hasLength!S) || !isRandomAccessRange!S)) | 
|  | in | 
|  | { | 
|  | assert(!str.empty); | 
|  | } | 
|  | out (result) | 
|  | { | 
|  | assert(isValidDchar(result)); | 
|  | } | 
|  | do | 
|  | { | 
|  | if (str.back < codeUnitLimit!S) | 
|  | { | 
|  | numCodeUnits = 1; | 
|  | immutable retval = str.back; | 
|  | str.popBack(); | 
|  | return retval; | 
|  | } | 
|  | else | 
|  | { | 
|  | numCodeUnits = strideBack(str); | 
|  | static if (isRandomAccessRange!S) | 
|  | { | 
|  | size_t index = str.length - numCodeUnits; | 
|  | immutable retval = decodeImpl!(true, useReplacementDchar)(str, index); | 
|  | str.popBackExactly(numCodeUnits); | 
|  | return retval; | 
|  | } | 
|  | else | 
|  | { | 
|  | alias Char = Unqual!(ElementType!S); | 
|  | Char[4] codeUnits; | 
|  | S tmp = str.save; | 
|  | for (size_t i = numCodeUnits; i > 0; ) | 
|  | { | 
|  | codeUnits[--i] = tmp.back; | 
|  | tmp.popBack(); | 
|  | } | 
|  | const Char[] codePoint = codeUnits[0 .. numCodeUnits]; | 
|  | size_t index = 0; | 
|  | immutable retval = decodeImpl!(true, useReplacementDchar)(codePoint, index); | 
|  | str = tmp; | 
|  | return retval; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | /++ Ditto +/ | 
|  | dchar decodeBack(UseReplacementDchar useReplacementDchar = No.useReplacementDchar, S)(ref S str) | 
|  | if (isSomeString!S | 
|  | || (isRandomAccessRange!S && hasLength!S && isSomeChar!(ElementType!S)) | 
|  | || (!isRandomAccessRange!S && isBidirectionalRange!S && isSomeChar!(ElementType!S))) | 
|  | in | 
|  | { | 
|  | assert(!str.empty); | 
|  | } | 
|  | out (result) | 
|  | { | 
|  | assert(isValidDchar(result)); | 
|  | } | 
|  | do | 
|  | { | 
|  | size_t numCodeUnits; | 
|  | return decodeBack!useReplacementDchar(str, numCodeUnits); | 
|  | } | 
|  |  | 
|  | /// | 
|  | @system pure unittest | 
|  | { | 
|  | import std.range.primitives; | 
|  | string str = "Hello, World!"; | 
|  |  | 
|  | assert(str.decodeBack == '!' && str == "Hello, World"); | 
|  | str = "å"; | 
|  | assert(str.decodeBack == 'å' && str.empty); | 
|  | str = "å"; | 
|  | size_t i; | 
|  | assert(str.decodeBack(i) == 'å' && i == 2 && str.empty); | 
|  | } | 
|  |  | 
|  | // For the given range, code unit values less than this | 
|  | // are guaranteed to be valid single-codepoint encodings. | 
|  | package template codeUnitLimit(S) | 
|  | if (isSomeChar!(ElementEncodingType!S)) | 
|  | { | 
|  | static if (is(immutable ElementEncodingType!S == immutable char)) | 
|  | enum char codeUnitLimit = 0x80; | 
|  | else static if (is(immutable ElementEncodingType!S == immutable wchar)) | 
|  | enum wchar codeUnitLimit = 0xD800; | 
|  | else | 
|  | enum dchar codeUnitLimit = 0xD800; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * For strings, this function does its own bounds checking to give a | 
|  | * more useful error message when attempting to decode past the end of a string. | 
|  | * Subsequently it uses a pointer instead of an array to avoid | 
|  | * redundant bounds checking. | 
|  | * | 
|  | * The three overloads of this operate on chars, wchars, and dchars. | 
|  | * | 
|  | * Params: | 
|  | *      canIndex = if S is indexable | 
|  | *      useReplacementDchar = if invalid UTF, return replacementDchar rather than throwing | 
|  | *      str = input string or Range | 
|  | *      index = starting index into s[]; incremented by number of code units processed | 
|  | * | 
|  | * Returns: | 
|  | *      decoded character | 
|  | */ | 
|  | private dchar decodeImpl(bool canIndex, UseReplacementDchar useReplacementDchar = No.useReplacementDchar, S)( | 
|  | auto ref S str, ref size_t index) | 
|  | if ( | 
|  | is(S : const char[]) || (isInputRange!S && is(immutable ElementEncodingType!S == immutable char))) | 
|  | { | 
|  | /* The following encodings are valid, except for the 5 and 6 byte | 
|  | * combinations: | 
|  | *  0xxxxxxx | 
|  | *  110xxxxx 10xxxxxx | 
|  | *  1110xxxx 10xxxxxx 10xxxxxx | 
|  | *  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | 
|  | *  111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx | 
|  | *  1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx | 
|  | */ | 
|  |  | 
|  | /* Dchar bitmask for different numbers of UTF-8 code units. | 
|  | */ | 
|  | alias bitMask = AliasSeq!((1 << 7) - 1, (1 << 11) - 1, (1 << 16) - 1, (1 << 21) - 1); | 
|  |  | 
|  | static if (is(S : const char[])) | 
|  | auto pstr = str.ptr + index;    // this is what makes decodeImpl() @system code | 
|  | else static if (isRandomAccessRange!S && hasSlicing!S && hasLength!S) | 
|  | auto pstr = str[index .. str.length]; | 
|  | else | 
|  | alias pstr = str; | 
|  |  | 
|  | // https://issues.dlang.org/show_bug.cgi?id=14447 forces this to be done | 
|  | // outside of decodeImpl | 
|  | //enum canIndex = is(S : const char[]) || (isRandomAccessRange!S && hasSlicing!S && hasLength!S); | 
|  |  | 
|  | static if (canIndex) | 
|  | { | 
|  | immutable length = str.length - index; | 
|  | ubyte fst = pstr[0]; | 
|  | } | 
|  | else | 
|  | { | 
|  | ubyte fst = pstr.front; | 
|  | pstr.popFront(); | 
|  | } | 
|  |  | 
|  | static if (!useReplacementDchar) | 
|  | { | 
|  | static if (canIndex) | 
|  | { | 
|  | static UTFException exception(S)(S str, string msg) | 
|  | { | 
|  | uint[4] sequence = void; | 
|  | size_t i; | 
|  |  | 
|  | do | 
|  | { | 
|  | sequence[i] = str[i]; | 
|  | } while (++i < str.length && i < 4 && (str[i] & 0xC0) == 0x80); | 
|  |  | 
|  | return new UTFException(msg, i).setSequence(sequence[0 .. i]); | 
|  | } | 
|  | } | 
|  |  | 
|  | UTFException invalidUTF() | 
|  | { | 
|  | static if (canIndex) | 
|  | return exception(pstr[0 .. length], "Invalid UTF-8 sequence"); | 
|  | else | 
|  | { | 
|  | //We can't include the invalid sequence with input strings without | 
|  | //saving each of the code units along the way, and we can't do it with | 
|  | //forward ranges without saving the entire range. Both would incur a | 
|  | //cost for the decoding of every character just to provide a better | 
|  | //error message for the (hopefully) rare case when an invalid UTF-8 | 
|  | //sequence is encountered, so we don't bother trying to include the | 
|  | //invalid sequence here, unlike with strings and sliceable ranges. | 
|  | return new UTFException("Invalid UTF-8 sequence"); | 
|  | } | 
|  | } | 
|  |  | 
|  | UTFException outOfBounds() | 
|  | { | 
|  | static if (canIndex) | 
|  | return exception(pstr[0 .. length], "Attempted to decode past the end of a string"); | 
|  | else | 
|  | return new UTFException("Attempted to decode past the end of a string"); | 
|  | } | 
|  | } | 
|  |  | 
|  | if ((fst & 0b1100_0000) != 0b1100_0000) | 
|  | { | 
|  | static if (useReplacementDchar) | 
|  | { | 
|  | ++index;            // always consume bad input to avoid infinite loops | 
|  | return replacementDchar; | 
|  | } | 
|  | else | 
|  | throw invalidUTF(); // starter must have at least 2 first bits set | 
|  | } | 
|  | ubyte tmp = void; | 
|  | dchar d = fst; // upper control bits are masked out later | 
|  | fst <<= 1; | 
|  |  | 
|  | foreach (i; AliasSeq!(1, 2, 3)) | 
|  | { | 
|  |  | 
|  | static if (canIndex) | 
|  | { | 
|  | if (i == length) | 
|  | { | 
|  | static if (useReplacementDchar) | 
|  | { | 
|  | index += i; | 
|  | return replacementDchar; | 
|  | } | 
|  | else | 
|  | throw outOfBounds(); | 
|  | } | 
|  | } | 
|  | else | 
|  | { | 
|  | if (pstr.empty) | 
|  | { | 
|  | static if (useReplacementDchar) | 
|  | { | 
|  | index += i; | 
|  | return replacementDchar; | 
|  | } | 
|  | else | 
|  | throw outOfBounds(); | 
|  | } | 
|  | } | 
|  |  | 
|  | static if (canIndex) | 
|  | tmp = pstr[i]; | 
|  | else | 
|  | { | 
|  | tmp = pstr.front; | 
|  | pstr.popFront(); | 
|  | } | 
|  |  | 
|  | if ((tmp & 0xC0) != 0x80) | 
|  | { | 
|  | static if (useReplacementDchar) | 
|  | { | 
|  | index += i + 1; | 
|  | return replacementDchar; | 
|  | } | 
|  | else | 
|  | throw invalidUTF(); | 
|  | } | 
|  |  | 
|  | d = (d << 6) | (tmp & 0x3F); | 
|  | fst <<= 1; | 
|  |  | 
|  | if (!(fst & 0x80)) // no more bytes | 
|  | { | 
|  | d &= bitMask[i]; // mask out control bits | 
|  |  | 
|  | // overlong, could have been encoded with i bytes | 
|  | if ((d & ~bitMask[i - 1]) == 0) | 
|  | { | 
|  | static if (useReplacementDchar) | 
|  | { | 
|  | index += i + 1; | 
|  | return replacementDchar; | 
|  | } | 
|  | else | 
|  | throw invalidUTF(); | 
|  | } | 
|  |  | 
|  | // check for surrogates only needed for 3 bytes | 
|  | static if (i == 2) | 
|  | { | 
|  | if (!isValidDchar(d)) | 
|  | { | 
|  | static if (useReplacementDchar) | 
|  | { | 
|  | index += i + 1; | 
|  | return replacementDchar; | 
|  | } | 
|  | else | 
|  | throw invalidUTF(); | 
|  | } | 
|  | } | 
|  |  | 
|  | static if (i == 3) | 
|  | { | 
|  | if (d > dchar.max) | 
|  | { | 
|  | static if (useReplacementDchar) | 
|  | d = replacementDchar; | 
|  | else | 
|  | throw invalidUTF(); | 
|  | } | 
|  | } | 
|  |  | 
|  | index += i + 1; | 
|  | return d; | 
|  | } | 
|  | } | 
|  |  | 
|  | static if (useReplacementDchar) | 
|  | { | 
|  | index += 4;             // read 4 chars by now | 
|  | return replacementDchar; | 
|  | } | 
|  | else | 
|  | throw invalidUTF(); | 
|  | } | 
|  |  | 
|  | @safe pure @nogc nothrow | 
|  | unittest | 
|  | { | 
|  | // Add tests for useReplacemendDchar == yes path | 
|  |  | 
|  | static struct R | 
|  | { | 
|  | @safe pure @nogc nothrow: | 
|  | this(string s) { this.s = s; } | 
|  | @property bool empty() { return idx == s.length; } | 
|  | @property char front() { return s[idx]; } | 
|  | void popFront() { ++idx; } | 
|  | size_t idx; | 
|  | string s; | 
|  | } | 
|  |  | 
|  | foreach (s; invalidUTFstrings!char()) | 
|  | { | 
|  | auto r = R(s); | 
|  | size_t index; | 
|  | dchar dc = decodeImpl!(false, Yes.useReplacementDchar)(r, index); | 
|  | assert(dc == replacementDchar); | 
|  | assert(1 <= index && index <= s.length); | 
|  | } | 
|  | } | 
|  |  | 
|  | private dchar decodeImpl(bool canIndex, UseReplacementDchar useReplacementDchar = No.useReplacementDchar, S) | 
|  | (auto ref S str, ref size_t index) | 
|  | if (is(S : const wchar[]) || (isInputRange!S && is(immutable ElementEncodingType!S == immutable wchar))) | 
|  | { | 
|  | static if (is(S : const wchar[])) | 
|  | auto pstr = str.ptr + index; | 
|  | else static if (isRandomAccessRange!S && hasSlicing!S && hasLength!S) | 
|  | auto pstr = str[index .. str.length]; | 
|  | else | 
|  | alias pstr = str; | 
|  |  | 
|  | // https://issues.dlang.org/show_bug.cgi?id=14447 forces this to be done | 
|  | // outside of decodeImpl | 
|  | //enum canIndex = is(S : const wchar[]) || (isRandomAccessRange!S && hasSlicing!S && hasLength!S); | 
|  |  | 
|  | static if (canIndex) | 
|  | { | 
|  | immutable length = str.length - index; | 
|  | uint u = pstr[0]; | 
|  | } | 
|  | else | 
|  | { | 
|  | uint u = pstr.front; | 
|  | pstr.popFront(); | 
|  | } | 
|  |  | 
|  | static if (!useReplacementDchar) | 
|  | { | 
|  | UTFException exception(string msg) | 
|  | { | 
|  | static if (canIndex) | 
|  | return new UTFException(msg).setSequence(pstr[0]); | 
|  | else | 
|  | return new UTFException(msg); | 
|  | } | 
|  | } | 
|  |  | 
|  | // The < case must be taken care of before decodeImpl is called. | 
|  | assert(u >= 0xD800); | 
|  |  | 
|  | if (u <= 0xDBFF) | 
|  | { | 
|  | static if (canIndex) | 
|  | immutable onlyOneCodeUnit = length == 1; | 
|  | else | 
|  | immutable onlyOneCodeUnit = pstr.empty; | 
|  |  | 
|  | if (onlyOneCodeUnit) | 
|  | { | 
|  | static if (useReplacementDchar) | 
|  | { | 
|  | ++index; | 
|  | return replacementDchar; | 
|  | } | 
|  | else | 
|  | throw exception("surrogate UTF-16 high value past end of string"); | 
|  | } | 
|  |  | 
|  | static if (canIndex) | 
|  | immutable uint u2 = pstr[1]; | 
|  | else | 
|  | { | 
|  | immutable uint u2 = pstr.front; | 
|  | pstr.popFront(); | 
|  | } | 
|  |  | 
|  | if (u2 < 0xDC00 || u2 > 0xDFFF) | 
|  | { | 
|  | static if (useReplacementDchar) | 
|  | u = replacementDchar; | 
|  | else | 
|  | throw exception("surrogate UTF-16 low value out of range"); | 
|  | } | 
|  | else | 
|  | u = ((u - 0xD7C0) << 10) + (u2 - 0xDC00); | 
|  | ++index; | 
|  | } | 
|  | else if (u >= 0xDC00 && u <= 0xDFFF) | 
|  | { | 
|  | static if (useReplacementDchar) | 
|  | u = replacementDchar; | 
|  | else | 
|  | throw exception("unpaired surrogate UTF-16 value"); | 
|  | } | 
|  | ++index; | 
|  |  | 
|  | // Note: u+FFFE and u+FFFF are specifically permitted by the | 
|  | // Unicode standard for application internal use (see isValidDchar) | 
|  |  | 
|  | return cast(dchar) u; | 
|  | } | 
|  |  | 
|  | @safe pure @nogc nothrow | 
|  | unittest | 
|  | { | 
|  | // Add tests for useReplacemendDchar == true path | 
|  |  | 
|  | static struct R | 
|  | { | 
|  | @safe pure @nogc nothrow: | 
|  | this(wstring s) { this.s = s; } | 
|  | @property bool empty() { return idx == s.length; } | 
|  | @property wchar front() { return s[idx]; } | 
|  | void popFront() { ++idx; } | 
|  | size_t idx; | 
|  | wstring s; | 
|  | } | 
|  |  | 
|  | foreach (s; invalidUTFstrings!wchar()) | 
|  | { | 
|  | auto r = R(s); | 
|  | size_t index; | 
|  | dchar dc = decodeImpl!(false, Yes.useReplacementDchar)(r, index); | 
|  | assert(dc == replacementDchar); | 
|  | assert(1 <= index && index <= s.length); | 
|  | } | 
|  | } | 
|  |  | 
|  | private dchar decodeImpl(bool canIndex, UseReplacementDchar useReplacementDchar = No.useReplacementDchar, S)( | 
|  | auto ref S str, ref size_t index) | 
|  | if (is(S : const dchar[]) || (isInputRange!S && is(immutable ElementEncodingType!S == immutable dchar))) | 
|  | { | 
|  | static if (is(S : const dchar[])) | 
|  | auto pstr = str.ptr; | 
|  | else | 
|  | alias pstr = str; | 
|  |  | 
|  | static if (is(S : const dchar[]) || isRandomAccessRange!S) | 
|  | { | 
|  | dchar dc = pstr[index]; | 
|  | if (!isValidDchar(dc)) | 
|  | { | 
|  | static if (useReplacementDchar) | 
|  | dc = replacementDchar; | 
|  | else | 
|  | throw new UTFException("Invalid UTF-32 value").setSequence(dc); | 
|  | } | 
|  | ++index; | 
|  | return dc; | 
|  | } | 
|  | else | 
|  | { | 
|  | dchar dc = pstr.front; | 
|  | if (!isValidDchar(dc)) | 
|  | { | 
|  | static if (useReplacementDchar) | 
|  | dc = replacementDchar; | 
|  | else | 
|  | throw new UTFException("Invalid UTF-32 value").setSequence(dc); | 
|  | } | 
|  | ++index; | 
|  | pstr.popFront(); | 
|  | return dc; | 
|  | } | 
|  | } | 
|  |  | 
|  | @safe pure @nogc nothrow | 
|  | unittest | 
|  | { | 
|  | // Add tests for useReplacemendDchar == true path | 
|  |  | 
|  | static struct R | 
|  | { | 
|  | @safe pure @nogc nothrow: | 
|  | this(dstring s) { this.s = s; } | 
|  | @property bool empty() { return idx == s.length; } | 
|  | @property dchar front() { return s[idx]; } | 
|  | void popFront() { ++idx; } | 
|  | size_t idx; | 
|  | dstring s; | 
|  | } | 
|  |  | 
|  | foreach (s; invalidUTFstrings!dchar()) | 
|  | { | 
|  | auto r = R(s); | 
|  | size_t index; | 
|  | dchar dc = decodeImpl!(false, Yes.useReplacementDchar)(r, index); | 
|  | assert(dc == replacementDchar); | 
|  | assert(1 <= index && index <= s.length); | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | version (StdUnittest) private void testDecode(R)(R range, | 
|  | size_t index, | 
|  | dchar expectedChar, | 
|  | size_t expectedIndex, | 
|  | size_t line = __LINE__) | 
|  | { | 
|  | import core.exception : AssertError; | 
|  | import std.exception : enforce; | 
|  | import std.string : format; | 
|  | import std.traits : isNarrowString; | 
|  |  | 
|  | static if (hasLength!R) | 
|  | immutable lenBefore = range.length; | 
|  |  | 
|  | static if (isRandomAccessRange!R && !isNarrowString!R) | 
|  | { | 
|  | { | 
|  | immutable result = decode(range, index); | 
|  | enforce(result == expectedChar, | 
|  | new AssertError(format("decode: Wrong character: %s", result), __FILE__, line)); | 
|  | enforce(index == expectedIndex, | 
|  | new AssertError(format("decode: Wrong index: %s", index), __FILE__, line)); | 
|  | static if (hasLength!R) | 
|  | { | 
|  | enforce(range.length == lenBefore, | 
|  | new AssertError(format("decode: length changed: %s", range.length), __FILE__, line)); | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | version (StdUnittest) private void testDecodeFront(R)(ref R range, | 
|  | dchar expectedChar, | 
|  | size_t expectedNumCodeUnits, | 
|  | size_t line = __LINE__) | 
|  | { | 
|  | import core.exception : AssertError; | 
|  | import std.exception : enforce; | 
|  | import std.string : format; | 
|  |  | 
|  | static if (hasLength!R) | 
|  | immutable lenBefore = range.length; | 
|  |  | 
|  | size_t numCodeUnits; | 
|  | immutable result = decodeFront(range, numCodeUnits); | 
|  | enforce(result == expectedChar, | 
|  | new AssertError(format("decodeFront: Wrong character: %s", result), __FILE__, line)); | 
|  | enforce(numCodeUnits == expectedNumCodeUnits, | 
|  | new AssertError(format("decodeFront: Wrong numCodeUnits: %s", numCodeUnits), __FILE__, line)); | 
|  |  | 
|  | static if (hasLength!R) | 
|  | { | 
|  | enforce(range.length == lenBefore - numCodeUnits, | 
|  | new AssertError(format("decodeFront: wrong length: %s", range.length), __FILE__, line)); | 
|  | } | 
|  | } | 
|  |  | 
|  | version (StdUnittest) private void testDecodeBack(R)(ref R range, | 
|  | dchar expectedChar, | 
|  | size_t expectedNumCodeUnits, | 
|  | size_t line = __LINE__) | 
|  | { | 
|  | // This condition is to allow unit testing all `decode` functions together | 
|  | static if (!isBidirectionalRange!R) | 
|  | return; | 
|  | else | 
|  | { | 
|  | import core.exception : AssertError; | 
|  | import std.exception : enforce; | 
|  | import std.string : format; | 
|  |  | 
|  | static if (hasLength!R) | 
|  | immutable lenBefore = range.length; | 
|  |  | 
|  | size_t numCodeUnits; | 
|  | immutable result = decodeBack(range, numCodeUnits); | 
|  | enforce(result == expectedChar, | 
|  | new AssertError(format("decodeBack: Wrong character: %s", result), __FILE__, line)); | 
|  | enforce(numCodeUnits == expectedNumCodeUnits, | 
|  | new AssertError(format("decodeBack: Wrong numCodeUnits: %s", numCodeUnits), __FILE__, line)); | 
|  |  | 
|  | static if (hasLength!R) | 
|  | { | 
|  | enforce(range.length == lenBefore - numCodeUnits, | 
|  | new AssertError(format("decodeBack: wrong length: %s", range.length), __FILE__, line)); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | version (StdUnittest) private void testAllDecode(R)(R range, | 
|  | dchar expectedChar, | 
|  | size_t expectedIndex, | 
|  | size_t line = __LINE__) | 
|  | { | 
|  | testDecode(range, 0, expectedChar, expectedIndex, line); | 
|  | static if (isBidirectionalRange!R) | 
|  | { | 
|  | auto rangeCopy = range.save; | 
|  | testDecodeBack(rangeCopy, expectedChar, expectedIndex, line); | 
|  | } | 
|  | testDecodeFront(range, expectedChar, expectedIndex, line); | 
|  | } | 
|  |  | 
|  | version (StdUnittest) private void testBadDecode(R)(R range, size_t index, size_t line = __LINE__) | 
|  | { | 
|  | import core.exception : AssertError; | 
|  | import std.exception : assertThrown, enforce; | 
|  | import std.string : format; | 
|  |  | 
|  | immutable initialIndex = index; | 
|  |  | 
|  | static if (hasLength!R) | 
|  | immutable lenBefore = range.length; | 
|  |  | 
|  | static if (isRandomAccessRange!R) | 
|  | { | 
|  | assertThrown!UTFException(decode(range, index), null, __FILE__, line); | 
|  | enforce(index == initialIndex, | 
|  | new AssertError(format("decode: Wrong index: %s", index), __FILE__, line)); | 
|  | static if (hasLength!R) | 
|  | { | 
|  | enforce(range.length == lenBefore, | 
|  | new AssertError(format("decode: length changed:", range.length), __FILE__, line)); | 
|  | } | 
|  | } | 
|  |  | 
|  | if (initialIndex == 0) | 
|  | assertThrown!UTFException(decodeFront(range, index), null, __FILE__, line); | 
|  | } | 
|  |  | 
|  | version (StdUnittest) private void testBadDecodeBack(R)(R range, size_t line = __LINE__) | 
|  | { | 
|  | // This condition is to allow unit testing all `decode` functions together | 
|  | static if (!isBidirectionalRange!R) | 
|  | return; | 
|  | else | 
|  | { | 
|  | import core.exception : AssertError; | 
|  | import std.exception : assertThrown, enforce; | 
|  | import std.string : format; | 
|  |  | 
|  | static if (hasLength!R) | 
|  | immutable lenBefore = range.length; | 
|  |  | 
|  | static if (isRandomAccessRange!R) | 
|  | { | 
|  | assertThrown!UTFException(decodeBack(range), null, __FILE__, line); | 
|  | static if (hasLength!R) | 
|  | { | 
|  | enforce(range.length == lenBefore, | 
|  | new AssertError(format("decodeBack: length changed:", range.length), __FILE__, line)); | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | @system unittest | 
|  | { | 
|  | import std.conv : to; | 
|  | import std.exception; | 
|  |  | 
|  | assertCTFEable!( | 
|  | { | 
|  | foreach (S; AliasSeq!(to!string, InputCU!char, RandomCU!char, | 
|  | (string s) => new RefBidirCU!char(s), | 
|  | (string s) => new RefRandomCU!char(s))) | 
|  | { | 
|  | enum sHasLength = hasLength!(typeof(S("abcd"))); | 
|  |  | 
|  | { | 
|  | auto range = S("abcd"); | 
|  | testDecode(range, 0, 'a', 1); | 
|  | testDecode(range, 1, 'b', 2); | 
|  | testDecodeFront(range, 'a', 1); | 
|  | testDecodeFront(range, 'b', 1); | 
|  | assert(decodeFront(range) == 'c'); | 
|  | assert(decodeFront(range) == 'd'); | 
|  | } | 
|  |  | 
|  | { | 
|  | auto range = S("ウェブサイト"); | 
|  | testDecode(range, 0, 'ウ', 3); | 
|  | testDecode(range, 3, 'ェ', 6); | 
|  | testDecodeFront(range, 'ウ', 3); | 
|  | testDecodeFront(range, 'ェ', 3); | 
|  | assert(decodeFront(range) == 'ブ'); | 
|  | assert(decodeFront(range) == 'サ'); | 
|  | } | 
|  |  | 
|  | { | 
|  | auto range = S("abcd"); | 
|  | testDecodeBack(range, 'd', 1); | 
|  | testDecodeBack(range, 'c', 1); | 
|  | testDecodeBack(range, 'b', 1); | 
|  | testDecodeBack(range, 'a', 1); | 
|  | } | 
|  |  | 
|  | { | 
|  | auto range = S("ウェブサイト"); | 
|  | testDecodeBack(range, 'ト', 3); | 
|  | testDecodeBack(range, 'イ', 3); | 
|  | testDecodeBack(range, 'サ', 3); | 
|  | testDecodeBack(range, 'ブ', 3); | 
|  | } | 
|  |  | 
|  | testAllDecode(S("\xC2\xA9"), '\u00A9', 2); | 
|  | testAllDecode(S("\xE2\x89\xA0"), '\u2260', 3); | 
|  |  | 
|  | foreach (str; ["\xE2\x89", // too short | 
|  | "\xC0\x8A", | 
|  | "\xE0\x80\x8A", | 
|  | "\xF0\x80\x80\x8A", | 
|  | "\xF8\x80\x80\x80\x8A", | 
|  | "\xFC\x80\x80\x80\x80\x8A"]) | 
|  | { | 
|  | testBadDecode(S(str), 0); | 
|  | testBadDecode(S(str), 1); | 
|  | testBadDecodeBack(S(str)); | 
|  | } | 
|  |  | 
|  | //Invalid UTF-8 sequence where the first code unit is valid. | 
|  | testAllDecode(S("\xEF\xBF\xBE"), cast(dchar) 0xFFFE, 3); | 
|  | testAllDecode(S("\xEF\xBF\xBF"), cast(dchar) 0xFFFF, 3); | 
|  |  | 
|  | //Invalid UTF-8 sequence where the first code unit isn't valid. | 
|  | foreach (str; ["\xED\xA0\x80", | 
|  | "\xED\xAD\xBF", | 
|  | "\xED\xAE\x80", | 
|  | "\xED\xAF\xBF", | 
|  | "\xED\xB0\x80", | 
|  | "\xED\xBE\x80", | 
|  | "\xED\xBF\xBF"]) | 
|  | { | 
|  | testBadDecode(S(str), 0); | 
|  | testBadDecodeBack(S(str)); | 
|  | } | 
|  | } | 
|  | }); | 
|  | } | 
|  |  | 
|  | @system unittest | 
|  | { | 
|  | import std.exception; | 
|  | assertCTFEable!( | 
|  | { | 
|  | foreach (S; AliasSeq!((wstring s) => s, InputCU!wchar, RandomCU!wchar, | 
|  | (wstring s) => new RefBidirCU!wchar(s), | 
|  | (wstring s) => new RefRandomCU!wchar(s))) | 
|  | { | 
|  | testAllDecode(S([cast(wchar) 0x1111]), cast(dchar) 0x1111, 1); | 
|  | testAllDecode(S([cast(wchar) 0xD800, cast(wchar) 0xDC00]), cast(dchar) 0x10000, 2); | 
|  | testAllDecode(S([cast(wchar) 0xDBFF, cast(wchar) 0xDFFF]), cast(dchar) 0x10FFFF, 2); | 
|  | testAllDecode(S([cast(wchar) 0xFFFE]), cast(dchar) 0xFFFE, 1); | 
|  | testAllDecode(S([cast(wchar) 0xFFFF]), cast(dchar) 0xFFFF, 1); | 
|  |  | 
|  | testBadDecode(S([ cast(wchar) 0xD801 ]), 0); | 
|  | testBadDecode(S([ cast(wchar) 0xD800, cast(wchar) 0x1200 ]), 0); | 
|  |  | 
|  | testBadDecodeBack(S([ cast(wchar) 0xD801 ])); | 
|  | testBadDecodeBack(S([ cast(wchar) 0x0010, cast(wchar) 0xD800 ])); | 
|  |  | 
|  | { | 
|  | auto range = S("ウェブサイト"); | 
|  | testDecode(range, 0, 'ウ', 1); | 
|  | testDecode(range, 1, 'ェ', 2); | 
|  | testDecodeFront(range, 'ウ', 1); | 
|  | testDecodeFront(range, 'ェ', 1); | 
|  | assert(decodeFront(range) == 'ブ'); | 
|  | assert(decodeFront(range) == 'サ'); | 
|  | } | 
|  |  | 
|  | { | 
|  | auto range = S("ウェブサイト"); | 
|  | testDecodeBack(range, 'ト', 1); | 
|  | testDecodeBack(range, 'イ', 1); | 
|  | testDecodeBack(range, 'サ', 1); | 
|  | testDecodeBack(range, 'ブ', 1); | 
|  | } | 
|  | } | 
|  |  | 
|  | foreach (S; AliasSeq!((wchar[] s) => s.idup, RandomCU!wchar, (wstring s) => new RefRandomCU!wchar(s))) | 
|  | { | 
|  | auto str = S([cast(wchar) 0xD800, cast(wchar) 0xDC00, | 
|  | cast(wchar) 0x1400, | 
|  | cast(wchar) 0xDAA7, cast(wchar) 0xDDDE]); | 
|  | testDecode(str, 0, cast(dchar) 0x10000, 2); | 
|  | testDecode(str, 2, cast(dchar) 0x1400, 3); | 
|  | testDecode(str, 3, cast(dchar) 0xB9DDE, 5); | 
|  | testDecodeBack(str, cast(dchar) 0xB9DDE, 2); | 
|  | testDecodeBack(str, cast(dchar) 0x1400, 1); | 
|  | testDecodeBack(str, cast(dchar) 0x10000, 2); | 
|  | } | 
|  | }); | 
|  | } | 
|  |  | 
|  | @system unittest | 
|  | { | 
|  | import std.exception; | 
|  | assertCTFEable!( | 
|  | { | 
|  | foreach (S; AliasSeq!((dstring s) => s, RandomCU!dchar, InputCU!dchar, | 
|  | (dstring s) => new RefBidirCU!dchar(s), | 
|  | (dstring s) => new RefRandomCU!dchar(s))) | 
|  | { | 
|  | testAllDecode(S([cast(dchar) 0x1111]), cast(dchar) 0x1111, 1); | 
|  | testAllDecode(S([cast(dchar) 0x10000]), cast(dchar) 0x10000, 1); | 
|  | testAllDecode(S([cast(dchar) 0x10FFFF]), cast(dchar) 0x10FFFF, 1); | 
|  | testAllDecode(S([cast(dchar) 0xFFFE]), cast(dchar) 0xFFFE, 1); | 
|  | testAllDecode(S([cast(dchar) 0xFFFF]), cast(dchar) 0xFFFF, 1); | 
|  |  | 
|  | testBadDecode(S([cast(dchar) 0xD800]), 0); | 
|  | testBadDecode(S([cast(dchar) 0xDFFE]), 0); | 
|  | testBadDecode(S([cast(dchar) 0x110000]), 0); | 
|  |  | 
|  | testBadDecodeBack(S([cast(dchar) 0xD800])); | 
|  | testBadDecodeBack(S([cast(dchar) 0xDFFE])); | 
|  | testBadDecodeBack(S([cast(dchar) 0x110000])); | 
|  |  | 
|  | { | 
|  | auto range = S("ウェブサイト"); | 
|  | testDecode(range, 0, 'ウ', 1); | 
|  | testDecode(range, 1, 'ェ', 2); | 
|  | testDecodeFront(range, 'ウ', 1); | 
|  | testDecodeFront(range, 'ェ', 1); | 
|  | assert(decodeFront(range) == 'ブ'); | 
|  | assert(decodeFront(range) == 'サ'); | 
|  | } | 
|  |  | 
|  | { | 
|  | auto range = S("ウェブサイト"); | 
|  | testDecodeBack(range, 'ト', 1); | 
|  | testDecodeBack(range, 'イ', 1); | 
|  | testDecodeBack(range, 'サ', 1); | 
|  | testDecodeBack(range, 'ブ', 1); | 
|  | } | 
|  | } | 
|  |  | 
|  | foreach (S; AliasSeq!((dchar[] s) => s.idup, RandomCU!dchar, (dstring s) => new RefRandomCU!dchar(s))) | 
|  | { | 
|  | auto str = S([cast(dchar) 0x10000, cast(dchar) 0x1400, cast(dchar) 0xB9DDE]); | 
|  | testDecode(str, 0, 0x10000, 1); | 
|  | testDecode(str, 1, 0x1400, 2); | 
|  | testDecode(str, 2, 0xB9DDE, 3); | 
|  | testDecodeBack(str, cast(dchar) 0xB9DDE, 1); | 
|  | testDecodeBack(str, cast(dchar) 0x1400, 1); | 
|  | testDecodeBack(str, cast(dchar) 0x10000, 1); | 
|  | } | 
|  | }); | 
|  | } | 
|  |  | 
|  | @safe unittest | 
|  | { | 
|  | import std.exception; | 
|  | import std.traits : FunctionAttribute, functionAttributes, isSafe; | 
|  | assertCTFEable!( | 
|  | { | 
|  | foreach (S; AliasSeq!( char[], const( char)[],  string, | 
|  | wchar[], const(wchar)[], wstring, | 
|  | dchar[], const(dchar)[], dstring)) | 
|  | { | 
|  | static assert(isSafe!({ S str; size_t i = 0; decode(str, i);      })); | 
|  | static assert(isSafe!({ S str; size_t i = 0; decodeFront(str, i); })); | 
|  | static assert(isSafe!({ S str; decodeFront(str); })); | 
|  | static assert((functionAttributes!({ S str; size_t i = 0; decode(str, i); }) & FunctionAttribute.pure_) != 0); | 
|  | static assert((functionAttributes!({ | 
|  | S str; size_t i = 0; decodeFront(str, i); | 
|  | }) & FunctionAttribute.pure_) != 0); | 
|  | static assert((functionAttributes!({ S str; decodeFront(str); }) & FunctionAttribute.pure_) != 0); | 
|  | static assert((functionAttributes!({ | 
|  | S str; size_t i = 0; decodeBack(str, i); | 
|  | }) & FunctionAttribute.pure_) != 0); | 
|  | static assert((functionAttributes!({ S str; decodeBack(str); }) & FunctionAttribute.pure_) != 0); | 
|  | } | 
|  | }); | 
|  | } | 
|  |  | 
|  | @safe unittest | 
|  | { | 
|  | import std.exception; | 
|  | char[4] val; | 
|  | val[0] = 0b1111_0111; | 
|  | val[1] = 0b1011_1111; | 
|  | val[2] = 0b1011_1111; | 
|  | val[3] = 0b1011_1111; | 
|  | size_t i = 0; | 
|  | assertThrown!UTFException((){ dchar ch = decode(val[], i); }()); | 
|  | } | 
|  | /* =================== Encode ======================= */ | 
|  |  | 
|  | private dchar _utfException(UseReplacementDchar useReplacementDchar)(string msg, dchar c) | 
|  | { | 
|  | static if (useReplacementDchar) | 
|  | return replacementDchar; | 
|  | else | 
|  | throw new UTFException(msg).setSequence(c); | 
|  | } | 
|  |  | 
|  | /++ | 
|  | Encodes `c` into the static array, `buf`, and returns the actual | 
|  | length of the encoded character (a number between `1` and `4` for | 
|  | `char[4]` buffers and a number between `1` and `2` for | 
|  | `wchar[2]` buffers). | 
|  |  | 
|  | Throws: | 
|  | `UTFException` if `c` is not a valid UTF code point. | 
|  | +/ | 
|  | size_t encode(UseReplacementDchar useReplacementDchar = No.useReplacementDchar)( | 
|  | out char[4] buf, dchar c) @safe pure | 
|  | { | 
|  | if (c <= 0x7F) | 
|  | { | 
|  | assert(isValidDchar(c)); | 
|  | buf[0] = cast(char) c; | 
|  | return 1; | 
|  | } | 
|  | if (c <= 0x7FF) | 
|  | { | 
|  | assert(isValidDchar(c)); | 
|  | buf[0] = cast(char)(0xC0 | (c >> 6)); | 
|  | buf[1] = cast(char)(0x80 | (c & 0x3F)); | 
|  | return 2; | 
|  | } | 
|  | if (c <= 0xFFFF) | 
|  | { | 
|  | if (0xD800 <= c && c <= 0xDFFF) | 
|  | c = _utfException!useReplacementDchar("Encoding a surrogate code point in UTF-8", c); | 
|  |  | 
|  | assert(isValidDchar(c)); | 
|  | L3: | 
|  | buf[0] = cast(char)(0xE0 | (c >> 12)); | 
|  | buf[1] = cast(char)(0x80 | ((c >> 6) & 0x3F)); | 
|  | buf[2] = cast(char)(0x80 | (c & 0x3F)); | 
|  | return 3; | 
|  | } | 
|  | if (c <= 0x10FFFF) | 
|  | { | 
|  | assert(isValidDchar(c)); | 
|  | buf[0] = cast(char)(0xF0 | (c >> 18)); | 
|  | buf[1] = cast(char)(0x80 | ((c >> 12) & 0x3F)); | 
|  | buf[2] = cast(char)(0x80 | ((c >> 6) & 0x3F)); | 
|  | buf[3] = cast(char)(0x80 | (c & 0x3F)); | 
|  | return 4; | 
|  | } | 
|  |  | 
|  | assert(!isValidDchar(c)); | 
|  | c = _utfException!useReplacementDchar("Encoding an invalid code point in UTF-8", c); | 
|  | goto L3; | 
|  | } | 
|  |  | 
|  | /// | 
|  | @safe unittest | 
|  | { | 
|  | import std.exception : assertThrown; | 
|  | import std.typecons : Yes; | 
|  |  | 
|  | char[4] buf; | 
|  |  | 
|  | assert(encode(buf, '\u0000') == 1 && buf[0 .. 1] == "\u0000"); | 
|  | assert(encode(buf, '\u007F') == 1 && buf[0 .. 1] == "\u007F"); | 
|  | assert(encode(buf, '\u0080') == 2 && buf[0 .. 2] == "\u0080"); | 
|  | assert(encode(buf, '\uE000') == 3 && buf[0 .. 3] == "\uE000"); | 
|  | assert(encode(buf, 0xFFFE) == 3 && buf[0 .. 3] == "\xEF\xBF\xBE"); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0x110000)); | 
|  |  | 
|  | encode!(Yes.useReplacementDchar)(buf, cast(dchar) 0x110000); | 
|  | auto slice = buf[]; | 
|  | assert(slice.decodeFront == replacementDchar); | 
|  | } | 
|  |  | 
|  | /// | 
|  | @safe unittest | 
|  | { | 
|  | import std.exception : assertThrown; | 
|  | import std.typecons : Yes; | 
|  |  | 
|  | wchar[2] buf; | 
|  |  | 
|  | assert(encode(buf, '\u0000') == 1 && buf[0 .. 1] == "\u0000"); | 
|  | assert(encode(buf, '\uD7FF') == 1 && buf[0 .. 1] == "\uD7FF"); | 
|  | assert(encode(buf, '\uE000') == 1 && buf[0 .. 1] == "\uE000"); | 
|  | assert(encode(buf, '\U00010000') == 2 && buf[0 .. 2] == "\U00010000"); | 
|  | assert(encode(buf, '\U0010FFFF') == 2 && buf[0 .. 2] == "\U0010FFFF"); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xD800)); | 
|  |  | 
|  | encode!(Yes.useReplacementDchar)(buf, cast(dchar) 0x110000); | 
|  | auto slice = buf[]; | 
|  | assert(slice.decodeFront == replacementDchar); | 
|  | } | 
|  |  | 
|  | /// | 
|  | @safe unittest | 
|  | { | 
|  | import std.exception : assertThrown; | 
|  | import std.typecons : Yes; | 
|  |  | 
|  | dchar[1] buf; | 
|  |  | 
|  | assert(encode(buf, '\u0000') == 1 && buf[0] == '\u0000'); | 
|  | assert(encode(buf, '\uD7FF') == 1 && buf[0] == '\uD7FF'); | 
|  | assert(encode(buf, '\uE000') == 1 && buf[0] == '\uE000'); | 
|  | assert(encode(buf, '\U0010FFFF') == 1 && buf[0] == '\U0010FFFF'); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xD800)); | 
|  |  | 
|  | encode!(Yes.useReplacementDchar)(buf, cast(dchar) 0x110000); | 
|  | assert(buf[0] == replacementDchar); | 
|  | } | 
|  |  | 
|  | @safe unittest | 
|  | { | 
|  | import std.exception; | 
|  | assertCTFEable!( | 
|  | { | 
|  | char[4] buf; | 
|  |  | 
|  | assert(encode(buf, '\u0000') == 1 && buf[0 .. 1] == "\u0000"); | 
|  | assert(encode(buf, '\u007F') == 1 && buf[0 .. 1] == "\u007F"); | 
|  | assert(encode(buf, '\u0080') == 2 && buf[0 .. 2] == "\u0080"); | 
|  | assert(encode(buf, '\u07FF') == 2 && buf[0 .. 2] == "\u07FF"); | 
|  | assert(encode(buf, '\u0800') == 3 && buf[0 .. 3] == "\u0800"); | 
|  | assert(encode(buf, '\uD7FF') == 3 && buf[0 .. 3] == "\uD7FF"); | 
|  | assert(encode(buf, '\uE000') == 3 && buf[0 .. 3] == "\uE000"); | 
|  | assert(encode(buf, 0xFFFE) == 3 && buf[0 .. 3] == "\xEF\xBF\xBE"); | 
|  | assert(encode(buf, 0xFFFF) == 3 && buf[0 .. 3] == "\xEF\xBF\xBF"); | 
|  | assert(encode(buf, '\U00010000') == 4 && buf[0 .. 4] == "\U00010000"); | 
|  | assert(encode(buf, '\U0010FFFF') == 4 && buf[0 .. 4] == "\U0010FFFF"); | 
|  |  | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xD800)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xDBFF)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xDC00)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xDFFF)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0x110000)); | 
|  |  | 
|  | assert(encode!(Yes.useReplacementDchar)(buf, cast(dchar) 0x110000) == buf.stride); | 
|  | enum replacementDcharString = "\uFFFD"; | 
|  | assert(buf[0 .. replacementDcharString.length] == replacementDcharString); | 
|  | }); | 
|  | } | 
|  |  | 
|  |  | 
|  | /// Ditto | 
|  | size_t encode(UseReplacementDchar useReplacementDchar = No.useReplacementDchar)( | 
|  | out wchar[2] buf, dchar c) @safe pure | 
|  | { | 
|  | if (c <= 0xFFFF) | 
|  | { | 
|  | if (0xD800 <= c && c <= 0xDFFF) | 
|  | c = _utfException!useReplacementDchar("Encoding an isolated surrogate code point in UTF-16", c); | 
|  |  | 
|  | assert(isValidDchar(c)); | 
|  | L1: | 
|  | buf[0] = cast(wchar) c; | 
|  | return 1; | 
|  | } | 
|  | if (c <= 0x10FFFF) | 
|  | { | 
|  | assert(isValidDchar(c)); | 
|  | buf[0] = cast(wchar)((((c - 0x10000) >> 10) & 0x3FF) + 0xD800); | 
|  | buf[1] = cast(wchar)(((c - 0x10000) & 0x3FF) + 0xDC00); | 
|  | return 2; | 
|  | } | 
|  |  | 
|  | c = _utfException!useReplacementDchar("Encoding an invalid code point in UTF-16", c); | 
|  | goto L1; | 
|  | } | 
|  |  | 
|  | @safe unittest | 
|  | { | 
|  | import std.exception; | 
|  | assertCTFEable!( | 
|  | { | 
|  | wchar[2] buf; | 
|  |  | 
|  | assert(encode(buf, '\u0000') == 1 && buf[0 .. 1] == "\u0000"); | 
|  | assert(encode(buf, '\uD7FF') == 1 && buf[0 .. 1] == "\uD7FF"); | 
|  | assert(encode(buf, '\uE000') == 1 && buf[0 .. 1] == "\uE000"); | 
|  | assert(encode(buf, 0xFFFE) == 1 && buf[0] == 0xFFFE); | 
|  | assert(encode(buf, 0xFFFF) == 1 && buf[0] == 0xFFFF); | 
|  | assert(encode(buf, '\U00010000') == 2 && buf[0 .. 2] == "\U00010000"); | 
|  | assert(encode(buf, '\U0010FFFF') == 2 && buf[0 .. 2] == "\U0010FFFF"); | 
|  |  | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xD800)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xDBFF)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xDC00)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xDFFF)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0x110000)); | 
|  |  | 
|  | assert(encode!(Yes.useReplacementDchar)(buf, cast(dchar) 0x110000) == buf.stride); | 
|  | assert(buf.front == replacementDchar); | 
|  | }); | 
|  | } | 
|  |  | 
|  |  | 
|  | /// Ditto | 
|  | size_t encode(UseReplacementDchar useReplacementDchar = No.useReplacementDchar)( | 
|  | out dchar[1] buf, dchar c) @safe pure | 
|  | { | 
|  | if ((0xD800 <= c && c <= 0xDFFF) || 0x10FFFF < c) | 
|  | c = _utfException!useReplacementDchar("Encoding an invalid code point in UTF-32", c); | 
|  | else | 
|  | assert(isValidDchar(c)); | 
|  | buf[0] = c; | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | @safe unittest | 
|  | { | 
|  | import std.exception; | 
|  | assertCTFEable!( | 
|  | { | 
|  | dchar[1] buf; | 
|  |  | 
|  | encode(buf, '\u0000'); assert(buf[0] == '\u0000'); | 
|  | encode(buf, '\uD7FF'); assert(buf[0] == '\uD7FF'); | 
|  | encode(buf, '\uE000'); assert(buf[0] == '\uE000'); | 
|  | encode(buf, 0xFFFE ); assert(buf[0] == 0xFFFE); | 
|  | encode(buf, 0xFFFF ); assert(buf[0] == 0xFFFF); | 
|  | encode(buf, '\U0010FFFF'); assert(buf[0] == '\U0010FFFF'); | 
|  |  | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xD800)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xDBFF)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xDC00)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xDFFF)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0x110000)); | 
|  |  | 
|  | assert(encode!(Yes.useReplacementDchar)(buf, cast(dchar) 0x110000) == buf.stride); | 
|  | assert(buf.front == replacementDchar); | 
|  | }); | 
|  | } | 
|  |  | 
|  |  | 
|  | /++ | 
|  | Encodes `c` in `str`'s encoding and appends it to `str`. | 
|  |  | 
|  | Throws: | 
|  | `UTFException` if `c` is not a valid UTF code point. | 
|  | +/ | 
|  | void encode(UseReplacementDchar useReplacementDchar = No.useReplacementDchar)( | 
|  | ref scope char[] str, dchar c) @safe pure | 
|  | { | 
|  | if (c <= 0x7F) | 
|  | { | 
|  | assert(isValidDchar(c)); | 
|  | str ~= cast(char) c; | 
|  | } | 
|  | else | 
|  | { | 
|  | char[4] buf; | 
|  | uint L; | 
|  |  | 
|  | if (c <= 0x7FF) | 
|  | { | 
|  | assert(isValidDchar(c)); | 
|  | buf[0] = cast(char)(0xC0 | (c >> 6)); | 
|  | buf[1] = cast(char)(0x80 | (c & 0x3F)); | 
|  | L = 2; | 
|  | } | 
|  | else if (c <= 0xFFFF) | 
|  | { | 
|  | if (0xD800 <= c && c <= 0xDFFF) | 
|  | c = _utfException!useReplacementDchar("Encoding a surrogate code point in UTF-8", c); | 
|  |  | 
|  | assert(isValidDchar(c)); | 
|  | L3: | 
|  | buf[0] = cast(char)(0xE0 | (c >> 12)); | 
|  | buf[1] = cast(char)(0x80 | ((c >> 6) & 0x3F)); | 
|  | buf[2] = cast(char)(0x80 | (c & 0x3F)); | 
|  | L = 3; | 
|  | } | 
|  | else if (c <= 0x10FFFF) | 
|  | { | 
|  | assert(isValidDchar(c)); | 
|  | buf[0] = cast(char)(0xF0 | (c >> 18)); | 
|  | buf[1] = cast(char)(0x80 | ((c >> 12) & 0x3F)); | 
|  | buf[2] = cast(char)(0x80 | ((c >> 6) & 0x3F)); | 
|  | buf[3] = cast(char)(0x80 | (c & 0x3F)); | 
|  | L = 4; | 
|  | } | 
|  | else | 
|  | { | 
|  | assert(!isValidDchar(c)); | 
|  | c = _utfException!useReplacementDchar("Encoding an invalid code point in UTF-8", c); | 
|  | goto L3; | 
|  | } | 
|  | str ~= buf[0 .. L]; | 
|  | } | 
|  | } | 
|  |  | 
|  | /// | 
|  | @safe unittest | 
|  | { | 
|  | char[] s = "abcd".dup; | 
|  | dchar d1 = 'a'; | 
|  | dchar d2 = 'ø'; | 
|  |  | 
|  | encode(s, d1); | 
|  | assert(s.length == 5); | 
|  | assert(s == "abcda"); | 
|  | encode(s, d2); | 
|  | assert(s.length == 7); | 
|  | assert(s == "abcdaø"); | 
|  | } | 
|  |  | 
|  | @safe unittest | 
|  | { | 
|  | import std.exception; | 
|  |  | 
|  | assertCTFEable!( | 
|  | { | 
|  | char[] s = "abcd".dup; | 
|  | encode(s, cast(dchar)'a'); | 
|  | assert(s.length == 5); | 
|  | assert(s == "abcda"); | 
|  |  | 
|  | encode(s, cast(dchar)'\u00A9'); | 
|  | assert(s.length == 7); | 
|  | assert(s == "abcda\xC2\xA9"); | 
|  | //assert(s == "abcda\u00A9");   // BUG: fix compiler | 
|  |  | 
|  | encode(s, cast(dchar)'\u2260'); | 
|  | assert(s.length == 10); | 
|  | assert(s == "abcda\xC2\xA9\xE2\x89\xA0"); | 
|  | }); | 
|  | } | 
|  |  | 
|  | @safe unittest | 
|  | { | 
|  | import std.exception; | 
|  | assertCTFEable!( | 
|  | { | 
|  | char[] buf; | 
|  |  | 
|  | encode(buf, '\u0000'); assert(buf[0 .. $] == "\u0000"); | 
|  | encode(buf, '\u007F'); assert(buf[1 .. $] == "\u007F"); | 
|  | encode(buf, '\u0080'); assert(buf[2 .. $] == "\u0080"); | 
|  | encode(buf, '\u07FF'); assert(buf[4 .. $] == "\u07FF"); | 
|  | encode(buf, '\u0800'); assert(buf[6 .. $] == "\u0800"); | 
|  | encode(buf, '\uD7FF'); assert(buf[9 .. $] == "\uD7FF"); | 
|  | encode(buf, '\uE000'); assert(buf[12 .. $] == "\uE000"); | 
|  | encode(buf, 0xFFFE); assert(buf[15 .. $] == "\xEF\xBF\xBE"); | 
|  | encode(buf, 0xFFFF); assert(buf[18 .. $] == "\xEF\xBF\xBF"); | 
|  | encode(buf, '\U00010000'); assert(buf[21 .. $] == "\U00010000"); | 
|  | encode(buf, '\U0010FFFF'); assert(buf[25 .. $] == "\U0010FFFF"); | 
|  |  | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xD800)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xDBFF)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xDC00)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xDFFF)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0x110000)); | 
|  |  | 
|  | enum replacementDcharString = "\uFFFD"; | 
|  | enum rdcslen = replacementDcharString.length; | 
|  | assert(buf[$ - rdcslen .. $] != replacementDcharString); | 
|  | encode!(Yes.useReplacementDchar)(buf, cast(dchar) 0x110000); | 
|  | assert(buf[$ - rdcslen .. $] == replacementDcharString); | 
|  | }); | 
|  | } | 
|  |  | 
|  | /// ditto | 
|  | void encode(UseReplacementDchar useReplacementDchar = No.useReplacementDchar)( | 
|  | ref scope wchar[] str, dchar c) @safe pure | 
|  | { | 
|  | if (c <= 0xFFFF) | 
|  | { | 
|  | if (0xD800 <= c && c <= 0xDFFF) | 
|  | c = _utfException!useReplacementDchar("Encoding an isolated surrogate code point in UTF-16", c); | 
|  |  | 
|  | assert(isValidDchar(c)); | 
|  | L1: | 
|  | str ~= cast(wchar) c; | 
|  | } | 
|  | else if (c <= 0x10FFFF) | 
|  | { | 
|  | wchar[2] buf; | 
|  |  | 
|  | assert(isValidDchar(c)); | 
|  | buf[0] = cast(wchar)((((c - 0x10000) >> 10) & 0x3FF) + 0xD800); | 
|  | buf[1] = cast(wchar)(((c - 0x10000) & 0x3FF) + 0xDC00); | 
|  | str ~= buf; | 
|  | } | 
|  | else | 
|  | { | 
|  | assert(!isValidDchar(c)); | 
|  | c = _utfException!useReplacementDchar("Encoding an invalid code point in UTF-16", c); | 
|  | goto L1; | 
|  | } | 
|  | } | 
|  |  | 
|  | @safe unittest | 
|  | { | 
|  | import std.exception; | 
|  | assertCTFEable!( | 
|  | { | 
|  | wchar[] buf; | 
|  |  | 
|  | encode(buf, '\u0000'); assert(buf[0] == '\u0000'); | 
|  | encode(buf, '\uD7FF'); assert(buf[1] == '\uD7FF'); | 
|  | encode(buf, '\uE000'); assert(buf[2] == '\uE000'); | 
|  | encode(buf, 0xFFFE); assert(buf[3] == 0xFFFE); | 
|  | encode(buf, 0xFFFF); assert(buf[4] == 0xFFFF); | 
|  | encode(buf, '\U00010000'); assert(buf[5 .. $] == "\U00010000"); | 
|  | encode(buf, '\U0010FFFF'); assert(buf[7 .. $] == "\U0010FFFF"); | 
|  |  | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xD800)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xDBFF)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xDC00)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xDFFF)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0x110000)); | 
|  |  | 
|  | assert(buf.back != replacementDchar); | 
|  | encode!(Yes.useReplacementDchar)(buf, cast(dchar) 0x110000); | 
|  | assert(buf.back == replacementDchar); | 
|  | }); | 
|  | } | 
|  |  | 
|  | /// ditto | 
|  | void encode(UseReplacementDchar useReplacementDchar = No.useReplacementDchar)( | 
|  | ref scope dchar[] str, dchar c) @safe pure | 
|  | { | 
|  | if ((0xD800 <= c && c <= 0xDFFF) || 0x10FFFF < c) | 
|  | c = _utfException!useReplacementDchar("Encoding an invalid code point in UTF-32", c); | 
|  | else | 
|  | assert(isValidDchar(c)); | 
|  | str ~= c; | 
|  | } | 
|  |  | 
|  | @safe unittest | 
|  | { | 
|  | import std.exception; | 
|  | assertCTFEable!( | 
|  | { | 
|  | dchar[] buf; | 
|  |  | 
|  | encode(buf, '\u0000'); assert(buf[0] == '\u0000'); | 
|  | encode(buf, '\uD7FF'); assert(buf[1] == '\uD7FF'); | 
|  | encode(buf, '\uE000'); assert(buf[2] == '\uE000'); | 
|  | encode(buf, 0xFFFE ); assert(buf[3] == 0xFFFE); | 
|  | encode(buf, 0xFFFF ); assert(buf[4] == 0xFFFF); | 
|  | encode(buf, '\U0010FFFF'); assert(buf[5] == '\U0010FFFF'); | 
|  |  | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xD800)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xDBFF)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xDC00)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0xDFFF)); | 
|  | assertThrown!UTFException(encode(buf, cast(dchar) 0x110000)); | 
|  |  | 
|  | assert(buf.back != replacementDchar); | 
|  | encode!(Yes.useReplacementDchar)(buf, cast(dchar) 0x110000); | 
|  | assert(buf.back == replacementDchar); | 
|  | }); | 
|  | } | 
|  |  | 
|  |  | 
|  | /++ | 
|  | Returns the number of code units that are required to encode the code point | 
|  | `c` when `C` is the character type used to encode it. | 
|  | +/ | 
|  | ubyte codeLength(C)(dchar c) @safe pure nothrow @nogc | 
|  | if (isSomeChar!C) | 
|  | { | 
|  | static if (C.sizeof == 1) | 
|  | { | 
|  | if (c <= 0x7F) return 1; | 
|  | if (c <= 0x7FF) return 2; | 
|  | if (c <= 0xFFFF) return 3; | 
|  | if (c <= 0x10FFFF) return 4; | 
|  | assert(false); | 
|  | } | 
|  | else static if (C.sizeof == 2) | 
|  | { | 
|  | return c <= 0xFFFF ? 1 : 2; | 
|  | } | 
|  | else | 
|  | { | 
|  | static assert(C.sizeof == 4); | 
|  | return 1; | 
|  | } | 
|  | } | 
|  |  | 
|  | /// | 
|  | @safe pure nothrow @nogc unittest | 
|  | { | 
|  | assert(codeLength!char('a') == 1); | 
|  | assert(codeLength!wchar('a') == 1); | 
|  | assert(codeLength!dchar('a') == 1); | 
|  |  | 
|  | assert(codeLength!char('\U0010FFFF') == 4); | 
|  | assert(codeLength!wchar('\U0010FFFF') == 2); | 
|  | assert(codeLength!dchar('\U0010FFFF') == 1); | 
|  | } | 
|  |  | 
|  |  | 
|  | /++ | 
|  | Returns the number of code units that are required to encode `str` | 
|  | in a string whose character type is `C`. This is particularly useful | 
|  | when slicing one string with the length of another and the two string | 
|  | types use different character types. | 
|  |  | 
|  | Params: | 
|  | C = the character type to get the encoding length for | 
|  | input = the $(REF_ALTTEXT input range, isInputRange, std,range,primitives) | 
|  | to calculate the encoding length from | 
|  | Returns: | 
|  | The number of code units in `input` when encoded to `C` | 
|  | +/ | 
|  | size_t codeLength(C, InputRange)(InputRange input) | 
|  | if (isSomeFiniteCharInputRange!InputRange) | 
|  | { | 
|  | alias EncType = Unqual!(ElementEncodingType!InputRange); | 
|  | static if (isSomeString!InputRange && is(EncType == C) && is(typeof(input.length))) | 
|  | return input.length; | 
|  | else | 
|  | { | 
|  | size_t total = 0; | 
|  |  | 
|  | foreach (c; input.byDchar) | 
|  | total += codeLength!C(c); | 
|  |  | 
|  | return total; | 
|  | } | 
|  | } | 
|  |  | 
|  | /// | 
|  | @safe unittest | 
|  | { | 
|  | assert(codeLength!char("hello world") == | 
|  | "hello world".length); | 
|  | assert(codeLength!wchar("hello world") == | 
|  | "hello world"w.length); | 
|  | assert(codeLength!dchar("hello world") == | 
|  | "hello world"d.length); | 
|  |  | 
|  | assert(codeLength!char(`プログラミング`) == | 
|  | `プログラミング`.length); | 
|  | assert(codeLength!wchar(`プログラミング`) == | 
|  | `プログラミング`w.length); | 
|  | assert(codeLength!dchar(`プログラミング`) == | 
|  | `プログラミング`d.length); | 
|  |  | 
|  | string haystack = `Être sans la verité, ça, ce ne serait pas bien.`; | 
|  | wstring needle = `Être sans la verité`; | 
|  | assert(haystack[codeLength!char(needle) .. $] == | 
|  | `, ça, ce ne serait pas bien.`); | 
|  | } | 
|  |  | 
|  | @safe unittest | 
|  | { | 
|  | import std.algorithm.iteration : filter; | 
|  | import std.conv : to; | 
|  | import std.exception; | 
|  |  | 
|  | assertCTFEable!( | 
|  | { | 
|  | foreach (S; AliasSeq!( char[], const  char[],  string, | 
|  | wchar[], const wchar[], wstring, | 
|  | dchar[], const dchar[], dstring)) | 
|  | { | 
|  | foreach (C; AliasSeq!(char, wchar, dchar)) | 
|  | { | 
|  | assert(codeLength!C(to!S("Walter Bright")) == to!(C[])("Walter Bright").length); | 
|  | assert(codeLength!C(to!S(`言語`)) == to!(C[])(`言語`).length); | 
|  | assert(codeLength!C(to!S(`ウェブサイト@La_Verité.com`)) == | 
|  | to!(C[])(`ウェブサイト@La_Verité.com`).length); | 
|  | assert(codeLength!C(to!S(`ウェブサイト@La_Verité.com`).filter!(x => true)()) == | 
|  | to!(C[])(`ウェブサイト@La_Verité.com`).length); | 
|  | } | 
|  | } | 
|  | }); | 
|  | } | 
|  |  | 
|  | /+ | 
|  | Internal helper function: | 
|  |  | 
|  | Returns true if it is safe to search for the Codepoint `c` inside | 
|  | code units, without decoding. | 
|  |  | 
|  | This is a runtime check that is used an optimization in various functions, | 
|  | particularly, in `std.string`. | 
|  | +/ | 
|  | package bool canSearchInCodeUnits(C)(dchar c) | 
|  | if (isSomeChar!C) | 
|  | { | 
|  | static if (C.sizeof == 1) | 
|  | return c <= 0x7F; | 
|  | else static if (C.sizeof == 2) | 
|  | return c <= 0xD7FF || (0xE000 <= c && c <= 0xFFFF); | 
|  | else static if (C.sizeof == 4) | 
|  | return true; | 
|  | else | 
|  | static assert(0); | 
|  | } | 
|  | @safe unittest | 
|  | { | 
|  | assert( canSearchInCodeUnits! char('a')); | 
|  | assert( canSearchInCodeUnits!wchar('a')); | 
|  | assert( canSearchInCodeUnits!dchar('a')); | 
|  | assert(!canSearchInCodeUnits! char('ö')); //Important test: ö <= 0xFF | 
|  | assert(!canSearchInCodeUnits! char(cast(char)'ö')); //Important test: ö <= 0xFF | 
|  | assert( canSearchInCodeUnits!wchar('ö')); | 
|  | assert( canSearchInCodeUnits!dchar('ö')); | 
|  | assert(!canSearchInCodeUnits! char('日')); | 
|  | assert( canSearchInCodeUnits!wchar('日')); | 
|  | assert( canSearchInCodeUnits!dchar('日')); | 
|  | assert(!canSearchInCodeUnits!wchar(cast(wchar) 0xDA00)); | 
|  | assert( canSearchInCodeUnits!dchar(cast(dchar) 0xDA00)); | 
|  | assert(!canSearchInCodeUnits! char('\U00010001')); | 
|  | assert(!canSearchInCodeUnits!wchar('\U00010001')); | 
|  | assert( canSearchInCodeUnits!dchar('\U00010001')); | 
|  | } | 
|  |  | 
|  | /* =================== Validation ======================= */ | 
|  |  | 
|  | /++ | 
|  | Checks to see if `str` is well-formed unicode or not. | 
|  |  | 
|  | Throws: | 
|  | `UTFException` if `str` is not well-formed. | 
|  | +/ | 
|  | void validate(S)(in S str) @safe pure | 
|  | if (isSomeString!S) | 
|  | { | 
|  | immutable len = str.length; | 
|  | for (size_t i = 0; i < len; ) | 
|  | { | 
|  | decode(str, i); | 
|  | } | 
|  | } | 
|  |  | 
|  | /// | 
|  | @safe unittest | 
|  | { | 
|  | import std.exception : assertThrown; | 
|  | char[] a = [167, 133, 175]; | 
|  | assertThrown!UTFException(validate(a)); | 
|  | } | 
|  |  | 
|  | // https://issues.dlang.org/show_bug.cgi?id=12923 | 
|  | @safe unittest | 
|  | { | 
|  | import std.exception; | 
|  | assertThrown((){ | 
|  | char[3]a=[167, 133, 175]; | 
|  | validate(a[]); | 
|  | }()); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Encodes the elements of `s` to UTF-8 and returns a newly allocated | 
|  | * string of the elements. | 
|  | * | 
|  | * Params: | 
|  | *     s = the string to encode | 
|  | * Returns: | 
|  | *     A UTF-8 string | 
|  | * See_Also: | 
|  | *     For a lazy, non-allocating version of these functions, see $(LREF byUTF). | 
|  | */ | 
|  | string toUTF8(S)(S s) | 
|  | if (isSomeFiniteCharInputRange!S) | 
|  | { | 
|  | return toUTFImpl!string(s); | 
|  | } | 
|  |  | 
|  | /// | 
|  | @safe pure unittest | 
|  | { | 
|  | import std.algorithm.comparison : equal; | 
|  |  | 
|  | // The ö is represented by two UTF-8 code units | 
|  | assert("Hellø"w.toUTF8.equal(['H', 'e', 'l', 'l', 0xC3, 0xB8])); | 
|  |  | 
|  | // 𐐷 is four code units in UTF-8 | 
|  | assert("𐐷"d.toUTF8.equal([0xF0, 0x90, 0x90, 0xB7])); | 
|  | } | 
|  |  | 
|  | @system pure unittest | 
|  | { | 
|  | import std.algorithm.comparison : equal; | 
|  | import std.internal.test.dummyrange : ReferenceInputRange; | 
|  |  | 
|  | alias RT = ReferenceInputRange!(ElementType!(string)); | 
|  | auto r1 = new RT("Hellø"); | 
|  | auto r2 = new RT("𐐷"); | 
|  |  | 
|  | assert(r1.toUTF8.equal(['H', 'e', 'l', 'l', 0xC3, 0xB8])); | 
|  | assert(r2.toUTF8.equal([0xF0, 0x90, 0x90, 0xB7])); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Encodes the elements of `s` to UTF-16 and returns a newly GC allocated | 
|  | * `wstring` of the elements. | 
|  | * | 
|  | * Params: | 
|  | *     s = the range to encode | 
|  | * Returns: | 
|  | *     A UTF-16 string | 
|  | * See_Also: | 
|  | *     For a lazy, non-allocating version of these functions, see $(LREF byUTF). | 
|  | */ | 
|  | wstring toUTF16(S)(S s) | 
|  | if (isSomeFiniteCharInputRange!S) | 
|  | { | 
|  | return toUTFImpl!wstring(s); | 
|  | } | 
|  |  | 
|  | /// | 
|  | @safe pure unittest | 
|  | { | 
|  | import std.algorithm.comparison : equal; | 
|  |  | 
|  | // these graphemes are two code units in UTF-16 and one in UTF-32 | 
|  | assert("𤭢"d.length == 1); | 
|  | assert("𐐷"d.length == 1); | 
|  |  | 
|  | assert("𤭢"d.toUTF16.equal([0xD852, 0xDF62])); | 
|  | assert("𐐷"d.toUTF16.equal([0xD801, 0xDC37])); | 
|  | } | 
|  |  | 
|  | @system pure unittest | 
|  | { | 
|  | import std.algorithm.comparison : equal; | 
|  | import std.internal.test.dummyrange : ReferenceInputRange; | 
|  |  | 
|  | alias RT = ReferenceInputRange!(ElementType!(string)); | 
|  | auto r1 = new RT("𤭢"); | 
|  | auto r2 = new RT("𐐷"); | 
|  |  | 
|  | assert(r1.toUTF16.equal([0xD852, 0xDF62])); | 
|  | assert(r2.toUTF16.equal([0xD801, 0xDC37])); | 
|  | } | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Encodes the elements of `s` to UTF-32 and returns a newly GC allocated | 
|  | * `dstring` of the elements. | 
|  | * | 
|  | * Params: | 
|  | *     s = the range to encode | 
|  | * Returns: | 
|  | *     A UTF-32 string | 
|  | * See_Also: | 
|  | *     For a lazy, non-allocating version of these functions, see $(LREF byUTF). | 
|  | */ | 
|  | dstring toUTF32(S)(scope S s) | 
|  | if (isSomeFiniteCharInputRange!S) | 
|  | { | 
|  | return toUTFImpl!dstring(s); | 
|  | } | 
|  |  | 
|  | /// | 
|  | @safe pure unittest | 
|  | { | 
|  | import std.algorithm.comparison : equal; | 
|  |  | 
|  | // these graphemes are two code units in UTF-16 and one in UTF-32 | 
|  | assert("𤭢"w.length == 2); | 
|  | assert("𐐷"w.length == 2); | 
|  |  | 
|  | assert("𤭢"w.toUTF32.equal([0x00024B62])); | 
|  | assert("𐐷"w.toUTF32.equal([0x00010437])); | 
|  | } | 
|  |  | 
|  | private T toUTFImpl(T, S)(scope S s) | 
|  | { | 
|  | static if (is(S : T)) | 
|  | { | 
|  | return s.idup; | 
|  | } | 
|  | else | 
|  | { | 
|  | import std.array : appender; | 
|  | auto app = appender!T(); | 
|  |  | 
|  | static if (is(S == C[], C) || hasLength!S) | 
|  | app.reserve(s.length); | 
|  |  | 
|  | foreach (c; s.byUTF!(Unqual!(ElementEncodingType!T))) | 
|  | app.put(c); | 
|  |  | 
|  | return app.data; | 
|  | } | 
|  | } | 
|  |  | 
|  | /* =================== toUTFz ======================= */ | 
|  |  | 
|  | /++ | 
|  | Returns a C-style zero-terminated string equivalent to `str`. `str` | 
|  | must not contain embedded `'\0'`'s as any C function will treat the first | 
|  | `'\0'` that it sees as the end of the string. If `str.empty` is | 
|  | `true`, then a string containing only `'\0'` is returned. | 
|  |  | 
|  | `toUTFz` accepts any type of string and is templated on the type of | 
|  | character pointer that you wish to convert to. It will avoid allocating a | 
|  | new string if it can, but there's a decent chance that it will end up having | 
|  | to allocate a new string - particularly when dealing with character types | 
|  | other than `char`. | 
|  |  | 
|  | $(RED Warning 1:) If the result of `toUTFz` equals `str.ptr`, then if | 
|  | anything alters the character one past the end of `str` (which is the | 
|  | `'\0'` character terminating the string), then the string won't be | 
|  | zero-terminated anymore. The most likely scenarios for that are if you | 
|  | append to `str` and no reallocation takes place or when `str` is a | 
|  | slice of a larger array, and you alter the character in the larger array | 
|  | which is one character past the end of `str`. Another case where it could | 
|  | occur would be if you had a mutable character array immediately after | 
|  | `str` in memory (for example, if they're member variables in a | 
|  | user-defined type with one declared right after the other) and that | 
|  | character array happened to start with `'\0'`. Such scenarios will never | 
|  | occur if you immediately use the zero-terminated string after calling | 
|  | `toUTFz` and the C function using it doesn't keep a reference to it. | 
|  | Also, they are unlikely to occur even if you save the zero-terminated string | 
|  | (the cases above would be among the few examples of where it could happen). | 
|  | However, if you save the zero-terminate string and want to be absolutely | 
|  | certain that the string stays zero-terminated, then simply append a | 
|  | `'\0'` to the string and use its `ptr` property rather than calling | 
|  | `toUTFz`. | 
|  |  | 
|  | $(RED Warning 2:) When passing a character pointer to a C function, and the | 
|  | C function keeps it around for any reason, make sure that you keep a | 
|  | reference to it in your D code. Otherwise, it may go away during a garbage | 
|  | collection cycle and cause a nasty bug when the C code tries to use it. | 
|  | +/ | 
|  | template toUTFz(P) | 
|  | if (is(P : C*, C) && isSomeChar!C) | 
|  | { | 
|  | P toUTFz(S)(S str) @safe pure | 
|  | if (isSomeString!S) | 
|  | { | 
|  | return toUTFzImpl!(P, S)(str); | 
|  | } | 
|  | } | 
|  |  | 
|  | /// | 
|  | @safe pure unittest | 
|  | { | 
|  | auto p1 = toUTFz!(char*)("hello world"); | 
|  | auto p2 = toUTFz!(const(char)*)("hello world"); | 
|  | auto p3 = toUTFz!(immutable(char)*)("hello world"); | 
|  | auto p4 = toUTFz!(char*)("hello world"d); | 
|  | auto p5 = toUTFz!(const(wchar)*)("hello world"); | 
|  | auto p6 = toUTFz!(immutable(dchar)*)("hello world"w); | 
|  | } | 
|  |  | 
|  | private P toUTFzImpl(P, S)(return scope S str) @safe pure | 
|  | if (is(immutable typeof(*P.init) == typeof(str[0]))) | 
|  | //immutable(C)[] -> C*, const(C)*, or immutable(C)* | 
|  | { | 
|  | if (str.empty) | 
|  | { | 
|  | typeof(*P.init)[] retval = ['\0']; | 
|  |  | 
|  | auto trustedPtr() @trusted { return retval.ptr; } | 
|  | return trustedPtr(); | 
|  | } | 
|  |  | 
|  | alias C = Unqual!(ElementEncodingType!S); | 
|  |  | 
|  | //If the P is mutable, then we have to make a copy. | 
|  | static if (is(Unqual!(typeof(*P.init)) == typeof(*P.init))) | 
|  | { | 
|  | return toUTFzImpl!(P, const(C)[])(cast(const(C)[])str); | 
|  | } | 
|  | else | 
|  | { | 
|  | if (!__ctfe) | 
|  | { | 
|  | auto trustedPtrAdd(S s) @trusted { return s.ptr + s.length; } | 
|  | immutable p = trustedPtrAdd(str); | 
|  |  | 
|  | // Peek past end of str, if it's 0, no conversion necessary. | 
|  | // Note that the compiler will put a 0 past the end of static | 
|  | // strings, and the storage allocator will put a 0 past the end | 
|  | // of newly allocated char[]'s. | 
|  | // Is p dereferenceable? A simple test: if the p points to an | 
|  | // address multiple of 4, then conservatively assume the pointer | 
|  | // might be pointing to a new block of memory, which might be | 
|  | // unreadable. Otherwise, it's definitely pointing to valid | 
|  | // memory. | 
|  | if ((cast(size_t) p & 3) && *p == '\0') | 
|  | return &str[0]; | 
|  | } | 
|  |  | 
|  | return toUTFzImpl!(P, const(C)[])(cast(const(C)[])str); | 
|  | } | 
|  | } | 
|  |  | 
|  | private P toUTFzImpl(P, S)(return scope S str) @safe pure | 
|  | if (is(typeof(str[0]) C) && is(immutable typeof(*P.init) == immutable C) && !is(C == immutable)) | 
|  | //C[] or const(C)[] -> C*, const(C)*, or immutable(C)* | 
|  | { | 
|  | alias InChar  = typeof(str[0]); | 
|  | alias OutChar = typeof(*P.init); | 
|  |  | 
|  | //const(C)[] -> const(C)* or | 
|  | //C[] -> C* or const(C)* | 
|  | static if (( is(const(Unqual!InChar) == InChar) &&  is(const(Unqual!OutChar) == OutChar)) || | 
|  | (!is(const(Unqual!InChar) == InChar) && !is(immutable(Unqual!OutChar) == OutChar))) | 
|  | { | 
|  | if (!__ctfe) | 
|  | { | 
|  | auto trustedPtrAdd(S s) @trusted { return s.ptr + s.length; } | 
|  | auto p = trustedPtrAdd(str); | 
|  |  | 
|  | if ((cast(size_t) p & 3) && *p == '\0') | 
|  | return &str[0]; | 
|  | } | 
|  |  | 
|  | str ~= '\0'; | 
|  | return &str[0]; | 
|  | } | 
|  | //const(C)[] -> C* or immutable(C)* or | 
|  | //C[] -> immutable(C)* | 
|  | else | 
|  | { | 
|  | import std.array : uninitializedArray; | 
|  | auto copy = uninitializedArray!(Unqual!OutChar[])(str.length + 1); | 
|  | copy[0 .. $ - 1] = str[]; | 
|  | copy[$ - 1] = '\0'; | 
|  |  | 
|  | auto trustedCast(typeof(copy) c) @trusted { return cast(P) c.ptr; } | 
|  | return trustedCast(copy); | 
|  | } | 
|  | } | 
|  |  | 
|  | private P toUTFzImpl(P, S)(S str) @safe pure | 
|  | if (!is(immutable typeof(*P.init) == immutable typeof(str[0]))) | 
|  | //C1[], const(C1)[], or immutable(C1)[] -> C2*, const(C2)*, or immutable(C2)* | 
|  | { | 
|  | import std.array : appender; | 
|  | auto retval = appender!(typeof(*P.init)[])(); | 
|  |  | 
|  | foreach (dchar c; str) | 
|  | retval.put(c); | 
|  | retval.put('\0'); | 
|  |  | 
|  | return () @trusted { return cast(P) retval.data.ptr; } (); | 
|  | } | 
|  |  | 
|  | @safe pure unittest | 
|  | { | 
|  | import core.exception : AssertError; | 
|  | import std.algorithm; | 
|  | import std.conv : to; | 
|  | import std.exception; | 
|  | import std.string : format; | 
|  |  | 
|  | assertCTFEable!( | 
|  | { | 
|  | foreach (S; AliasSeq!(string, wstring, dstring)) | 
|  | { | 
|  | alias C = Unqual!(ElementEncodingType!S); | 
|  |  | 
|  | auto s1 = to!S("hello\U00010143\u0100\U00010143"); | 
|  | auto temp = new C[](s1.length + 1); | 
|  | temp[0 .. $ - 1] = s1[0 .. $]; | 
|  | temp[$ - 1] = '\n'; | 
|  | --temp.length; | 
|  | auto trustedAssumeUnique(T)(T t) @trusted { return assumeUnique(t); } | 
|  | auto s2 = trustedAssumeUnique(temp); | 
|  | assert(s1 == s2); | 
|  |  | 
|  | void trustedCStringAssert(P, S)(S s) @trusted | 
|  | { | 
|  | auto p = toUTFz!P(s); | 
|  | assert(p[0 .. s.length] == s); | 
|  | assert(p[s.length] == '\0'); | 
|  | } | 
|  |  | 
|  | foreach (P; AliasSeq!(C*, const(C)*, immutable(C)*)) | 
|  | { | 
|  | trustedCStringAssert!P(s1); | 
|  | trustedCStringAssert!P(s2); | 
|  | } | 
|  | } | 
|  | }); | 
|  |  | 
|  | static void test(P, S)(S s, size_t line = __LINE__) @trusted | 
|  | { | 
|  | static size_t zeroLen(C)(const(C)* ptr) @trusted | 
|  | { | 
|  | size_t len = 0; | 
|  | while (*ptr != '\0') { ++ptr; ++len; } | 
|  | return len; | 
|  | } | 
|  |  | 
|  | auto p = toUTFz!P(s); | 
|  | immutable len = zeroLen(p); | 
|  | enforce(cmp(s, p[0 .. len]) == 0, | 
|  | new AssertError(format("Unit test failed: %s %s", P.stringof, S.stringof), | 
|  | __FILE__, line)); | 
|  | } | 
|  |  | 
|  | assertCTFEable!( | 
|  | { | 
|  | foreach (P; AliasSeq!(wchar*, const(wchar)*, immutable(wchar)*, | 
|  | dchar*, const(dchar)*, immutable(dchar)*)) | 
|  | { | 
|  | test!P("hello\U00010143\u0100\U00010143"); | 
|  | } | 
|  | foreach (P; AliasSeq!( char*, const( char)*, immutable( char)*, | 
|  | dchar*, const(dchar)*, immutable(dchar)*)) | 
|  | { | 
|  | test!P("hello\U00010143\u0100\U00010143"w); | 
|  | } | 
|  | foreach (P; AliasSeq!( char*, const( char)*, immutable( char)*, | 
|  | wchar*, const(wchar)*, immutable(wchar)*)) | 
|  | { | 
|  | test!P("hello\U00010143\u0100\U00010143"d); | 
|  | } | 
|  | foreach (S; AliasSeq!( char[], const( char)[], | 
|  | wchar[], const(wchar)[], | 
|  | dchar[], const(dchar)[])) | 
|  | { | 
|  | auto s = to!S("hello\U00010143\u0100\U00010143"); | 
|  |  | 
|  | foreach (P; AliasSeq!( char*, const( char)*, immutable( char)*, | 
|  | wchar*, const(wchar)*, immutable(wchar)*, | 
|  | dchar*, const(dchar)*, immutable(dchar)*)) | 
|  | { | 
|  | test!P(s); | 
|  | } | 
|  | } | 
|  | }); | 
|  | } | 
|  |  | 
|  |  | 
|  | /++ | 
|  | `toUTF16z` is a convenience function for `toUTFz!(const(wchar)*)`. | 
|  |  | 
|  | Encodes string `s` into UTF-16 and returns the encoded string. | 
|  | `toUTF16z` is suitable for calling the 'W' functions in the Win32 API | 
|  | that take an `LPCWSTR` argument. | 
|  | +/ | 
|  | const(wchar)* toUTF16z(C)(const(C)[] str) @safe pure | 
|  | if (isSomeChar!C) | 
|  | { | 
|  | return toUTFz!(const(wchar)*)(str); | 
|  | } | 
|  |  | 
|  | /// | 
|  | @system unittest | 
|  | { | 
|  | string str = "Hello, World!"; | 
|  | const(wchar)* p = str.toUTF16z; | 
|  | assert(p[str.length] == '\0'); | 
|  | } | 
|  |  | 
|  | @safe pure unittest | 
|  | { | 
|  | import std.conv : to; | 
|  | //toUTFz is already thoroughly tested, so this will just verify that | 
|  | //toUTF16z compiles properly for the various string types. | 
|  | foreach (S; AliasSeq!(string, wstring, dstring)) | 
|  | assert(toUTF16z(to!S("hello world")) !is null); | 
|  | } | 
|  |  | 
|  |  | 
|  | /* ================================ tests ================================== */ | 
|  |  | 
|  | @safe pure unittest | 
|  | { | 
|  | import std.exception; | 
|  |  | 
|  | assertCTFEable!( | 
|  | { | 
|  | assert(toUTF16("hello"c) == "hello"); | 
|  | assert(toUTF32("hello"c) == "hello"); | 
|  | assert(toUTF8 ("hello"w) == "hello"); | 
|  | assert(toUTF32("hello"w) == "hello"); | 
|  | assert(toUTF8 ("hello"d) == "hello"); | 
|  | assert(toUTF16("hello"d) == "hello"); | 
|  |  | 
|  | assert(toUTF16("hel\u1234o"c) == "hel\u1234o"); | 
|  | assert(toUTF32("hel\u1234o"c) == "hel\u1234o"); | 
|  | assert(toUTF8 ("hel\u1234o"w) == "hel\u1234o"); | 
|  | assert(toUTF32("hel\u1234o"w) == "hel\u1234o"); | 
|  | assert(toUTF8 ("hel\u1234o"d) == "hel\u1234o"); | 
|  | assert(toUTF16("hel\u1234o"d) == "hel\u1234o"); | 
|  |  | 
|  | assert(toUTF16("he\U0010AAAAllo"c) == "he\U0010AAAAllo"); | 
|  | assert(toUTF32("he\U0010AAAAllo"c) == "he\U0010AAAAllo"); | 
|  | assert(toUTF8 ("he\U0010AAAAllo"w) == "he\U0010AAAAllo"); | 
|  | assert(toUTF32("he\U0010AAAAllo"w) == "he\U0010AAAAllo"); | 
|  | assert(toUTF8 ("he\U0010AAAAllo"d) == "he\U0010AAAAllo"); | 
|  | assert(toUTF16("he\U0010AAAAllo"d) == "he\U0010AAAAllo"); | 
|  | }); | 
|  | } | 
|  |  | 
|  |  | 
|  | /++ | 
|  | Returns the total number of code points encoded in `str`. | 
|  |  | 
|  | Supercedes: This function supercedes $(LREF toUCSindex). | 
|  |  | 
|  | Standards: Unicode 5.0, ASCII, ISO-8859-1, WINDOWS-1252 | 
|  |  | 
|  | Throws: | 
|  | `UTFException` if `str` is not well-formed. | 
|  | +/ | 
|  | size_t count(C)(const(C)[] str) @safe pure nothrow @nogc | 
|  | if (isSomeChar!C) | 
|  | { | 
|  | return walkLength(str.byDchar); | 
|  | } | 
|  |  | 
|  | /// | 
|  | @safe pure nothrow @nogc unittest | 
|  | { | 
|  | assert(count("") == 0); | 
|  | assert(count("a") == 1); | 
|  | assert(count("abc") == 3); | 
|  | assert(count("\u20AC100") == 4); | 
|  | } | 
|  |  | 
|  | @safe pure nothrow @nogc unittest | 
|  | { | 
|  | import std.exception; | 
|  | assertCTFEable!( | 
|  | { | 
|  | assert(count("") == 0); | 
|  | assert(count("a") == 1); | 
|  | assert(count("abc") == 3); | 
|  | assert(count("\u20AC100") == 4); | 
|  | }); | 
|  | } | 
|  |  | 
|  |  | 
|  | // Ranges of code units for testing. | 
|  | version (StdUnittest) | 
|  | { | 
|  | private: | 
|  | struct InputCU(C) | 
|  | { | 
|  | import std.conv : to; | 
|  | @property bool empty() { return _str.empty; } | 
|  | @property C front() { return _str[0]; } | 
|  | void popFront() { _str = _str[1 .. $]; } | 
|  |  | 
|  | this(inout(C)[] str) | 
|  | { | 
|  | _str = to!(C[])(str); | 
|  | } | 
|  |  | 
|  | C[] _str; | 
|  | } | 
|  |  | 
|  | struct BidirCU(C) | 
|  | { | 
|  | import std.conv : to; | 
|  | @property bool empty() { return _str.empty; } | 
|  | @property C front() { return _str[0]; } | 
|  | void popFront() { _str = _str[1 .. $]; } | 
|  | @property C back() { return _str[$ - 1]; } | 
|  | void popBack() { _str = _str[0 .. $ - 1]; } | 
|  | @property auto save() { return BidirCU(_str); } | 
|  | @property size_t length() { return _str.length; } | 
|  |  | 
|  | this(inout(C)[] str) | 
|  | { | 
|  | _str = to!(C[])(str); | 
|  | } | 
|  |  | 
|  | C[] _str; | 
|  | } | 
|  |  | 
|  | struct RandomCU(C) | 
|  | { | 
|  | import std.conv : to; | 
|  | @property bool empty() { return _str.empty; } | 
|  | @property C front() { return _str[0]; } | 
|  | void popFront() { _str = _str[1 .. $]; } | 
|  | @property C back() { return _str[$ - 1]; } | 
|  | void popBack() { _str = _str[0 .. $ - 1]; } | 
|  | @property auto save() { return RandomCU(_str); } | 
|  | @property size_t length() { return _str.length; } | 
|  | C opIndex(size_t i) { return _str[i]; } | 
|  | auto opSlice(size_t i, size_t j) { return RandomCU(_str[i .. j]); } | 
|  |  | 
|  | this(inout(C)[] str) | 
|  | { | 
|  | _str = to!(C[])(str); | 
|  | } | 
|  |  | 
|  | C[] _str; | 
|  | } | 
|  |  | 
|  | class RefBidirCU(C) | 
|  | { | 
|  | import std.conv : to; | 
|  | @property bool empty() { return _str.empty; } | 
|  | @property C front() { return _str[0]; } | 
|  | void popFront() { _str = _str[1 .. $]; } | 
|  | @property C back() { return _str[$ - 1]; } | 
|  | void popBack() { _str = _str[0 .. $ - 1]; } | 
|  | @property auto save() { return new RefBidirCU(_str); } | 
|  | @property size_t length() { return _str.length; } | 
|  |  | 
|  | this(inout(C)[] str) | 
|  | { | 
|  | _str = to!(C[])(str); | 
|  | } | 
|  |  | 
|  | C[] _str; | 
|  | } | 
|  |  | 
|  | class RefRandomCU(C) | 
|  | { | 
|  | import std.conv : to; | 
|  | @property bool empty() { return _str.empty; } | 
|  | @property C front() { return _str[0]; } | 
|  | void popFront() { _str = _str[1 .. $]; } | 
|  | @property C back() { return _str[$ - 1]; } | 
|  | void popBack() { _str = _str[0 .. $ - 1]; } | 
|  | @property auto save() { return new RefRandomCU(_str); } | 
|  | @property size_t length() { return _str.length; } | 
|  | C opIndex(size_t i) { return _str[i]; } | 
|  | auto opSlice(size_t i, size_t j) { return new RefRandomCU(_str[i .. j]); } | 
|  |  | 
|  | this(inout(C)[] str) | 
|  | { | 
|  | _str = to!(C[])(str); | 
|  | } | 
|  |  | 
|  | C[] _str; | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Inserted in place of invalid UTF sequences. | 
|  | * | 
|  | * References: | 
|  | *      $(LINK http://en.wikipedia.org/wiki/Replacement_character#Replacement_character) | 
|  | */ | 
|  | enum dchar replacementDchar = '\uFFFD'; | 
|  |  | 
|  | /******************************************** | 
|  | * Iterate a range of char, wchar, or dchars by code unit. | 
|  | * | 
|  | * The purpose is to bypass the special case decoding that | 
|  | * $(REF front, std,range,primitives) does to character arrays. As a result, | 
|  | * using ranges with `byCodeUnit` can be `nothrow` while | 
|  | * $(REF front, std,range,primitives) throws when it encounters invalid Unicode | 
|  | * sequences. | 
|  | * | 
|  | * A code unit is a building block of the UTF encodings. Generally, an | 
|  | * individual code unit does not represent what's perceived as a full | 
|  | * character (a.k.a. a grapheme cluster in Unicode terminology). Many characters | 
|  | * are encoded with multiple code units. For example, the UTF-8 code units for | 
|  | * `ø` are `0xC3 0xB8`. That means, an individual element of `byCodeUnit` | 
|  | * often does not form a character on its own. Attempting to treat it as | 
|  | * one while iterating over the resulting range will give nonsensical results. | 
|  | * | 
|  | * Params: | 
|  | *      r = an $(REF_ALTTEXT input range, isInputRange, std,range,primitives) | 
|  | *      of characters (including strings) or a type that implicitly converts to a string type. | 
|  | * Returns: | 
|  | *      If `r` is not an auto-decodable string (i.e. a narrow string or a | 
|  | *      user-defined type that implicitly converts to a string type), then `r` | 
|  | *      is returned. | 
|  | * | 
|  | *      Otherwise, `r` is converted to its corresponding string type (if it's | 
|  | *      not already a string) and wrapped in a random-access range where the | 
|  | *      element encoding type of the string (its code unit) is the element type | 
|  | *      of the range, and that range returned. The range has slicing. | 
|  | * | 
|  | *      If `r` is quirky enough to be a struct or class which is an input range | 
|  | *      of characters on its own (i.e. it has the input range API as member | 
|  | *      functions), $(I and) it's implicitly convertible to a string type, then | 
|  | *      `r` is returned, and no implicit conversion takes place. | 
|  | * | 
|  | *      If `r` is wrapped in a new range, then that range has a `source` | 
|  | *      property for returning the string that's currently contained within that | 
|  | *      range. | 
|  | * | 
|  | * See_Also: | 
|  | *      Refer to the $(MREF std, uni) docs for a reference on Unicode | 
|  | *      terminology. | 
|  | * | 
|  | *      For a range that iterates by grapheme cluster (written character) see | 
|  | *      $(REF byGrapheme, std,uni). | 
|  | */ | 
|  | auto byCodeUnit(R)(R r) | 
|  | if ((isConvertibleToString!R && !isStaticArray!R) || | 
|  | (isInputRange!R && isSomeChar!(ElementEncodingType!R))) | 
|  | { | 
|  | import std.traits : StringTypeOf; | 
|  | static if (// This would be cleaner if we had a way to check whether a type | 
|  | // was a range without any implicit conversions. | 
|  | (isAutodecodableString!R && !__traits(hasMember, R, "empty") && | 
|  | !__traits(hasMember, R, "front") && !__traits(hasMember, R, "popFront"))) | 
|  | { | 
|  | static struct ByCodeUnitImpl | 
|  | { | 
|  | @safe pure nothrow @nogc: | 
|  |  | 
|  | @property bool empty() const     { return source.length == 0; } | 
|  | @property auto ref front() inout { return source[0]; } | 
|  | void popFront()                  { source = source[1 .. $]; } | 
|  |  | 
|  | @property auto save() { return ByCodeUnitImpl(source.save); } | 
|  |  | 
|  | @property auto ref back() inout { return source[$ - 1]; } | 
|  | void popBack()                  { source = source[0 .. $-1]; } | 
|  |  | 
|  | auto ref opIndex(size_t index) inout     { return source[index]; } | 
|  | auto opSlice(size_t lower, size_t upper) { return ByCodeUnitImpl(source[lower .. upper]); } | 
|  |  | 
|  | @property size_t length() const { return source.length; } | 
|  | alias opDollar = length; | 
|  |  | 
|  | StringTypeOf!R source; | 
|  | } | 
|  |  | 
|  | static assert(isRandomAccessRange!ByCodeUnitImpl); | 
|  |  | 
|  | return ByCodeUnitImpl(r); | 
|  | } | 
|  | else static if (!isInputRange!R || | 
|  | (is(R : const dchar[]) && !__traits(hasMember, R, "empty") && | 
|  | !__traits(hasMember, R, "front") && !__traits(hasMember, R, "popFront"))) | 
|  | { | 
|  | return cast(StringTypeOf!R) r; | 
|  | } | 
|  | else | 
|  | { | 
|  | // byCodeUnit for ranges and dchar[] is a no-op | 
|  | return r; | 
|  | } | 
|  | } | 
|  |  | 
|  | /// | 
|  | @safe unittest | 
|  | { | 
|  | import std.range.primitives; | 
|  | import std.traits : isAutodecodableString; | 
|  |  | 
|  | auto r = "Hello, World!".byCodeUnit(); | 
|  | static assert(hasLength!(typeof(r))); | 
|  | static assert(hasSlicing!(typeof(r))); | 
|  | static assert(isRandomAccessRange!(typeof(r))); | 
|  | static assert(is(ElementType!(typeof(r)) == immutable char)); | 
|  |  | 
|  | // contrast with the range capabilities of standard strings (with or | 
|  | // without autodecoding enabled). | 
|  | auto s = "Hello, World!"; | 
|  | static assert(isBidirectionalRange!(typeof(r))); | 
|  | static if (isAutodecodableString!(typeof(s))) | 
|  | { | 
|  | // with autodecoding enabled, strings are non-random-access ranges of | 
|  | // dchar. | 
|  | static assert(is(ElementType!(typeof(s)) == dchar)); | 
|  | static assert(!isRandomAccessRange!(typeof(s))); | 
|  | static assert(!hasSlicing!(typeof(s))); | 
|  | static assert(!hasLength!(typeof(s))); | 
|  | } | 
|  | else | 
|  | { | 
|  | // without autodecoding, strings are normal arrays. | 
|  | static assert(is(ElementType!(typeof(s)) == immutable char)); | 
|  | static assert(isRandomAccessRange!(typeof(s))); | 
|  | static assert(hasSlicing!(typeof(s))); | 
|  | static assert(hasLength!(typeof(s))); | 
|  | } | 
|  | } | 
|  |  | 
|  | /// `byCodeUnit` does no Unicode decoding | 
|  | @safe unittest | 
|  | { | 
|  | string noel1 = "noe\u0308l"; // noël using e + combining diaeresis | 
|  | assert(noel1.byCodeUnit[2] != 'ë'); | 
|  | assert(noel1.byCodeUnit[2] == 'e'); | 
|  |  | 
|  | string noel2 = "no\u00EBl"; // noël using a precomposed ë character | 
|  | // Because string is UTF-8, the code unit at index 2 is just | 
|  | // the first of a sequence that encodes 'ë' | 
|  | assert(noel2.byCodeUnit[2] != 'ë'); | 
|  | } | 
|  |  | 
|  | /// `byCodeUnit` exposes a `source` property when wrapping narrow strings. | 
|  | @safe unittest | 
|  | { | 
|  | import std.algorithm.comparison : equal; | 
|  | import std.range : popFrontN; | 
|  | import std.traits : isAutodecodableString; | 
|  | { | 
|  | auto range = byCodeUnit("hello world"); | 
|  | range.popFrontN(3); | 
|  | assert(equal(range.save, "lo world")); | 
|  | static if (isAutodecodableString!string) // only enabled with autodecoding | 
|  | { | 
|  | string str = range.source; | 
|  | assert(str == "lo world"); | 
|  | } | 
|  | } | 
|  | // source only exists if the range was wrapped | 
|  | { | 
|  | auto range = byCodeUnit("hello world"d); | 
|  | static assert(!__traits(compiles, range.source)); | 
|  | } | 
|  | } | 
|  |  | 
|  | @safe pure nothrow @nogc unittest | 
|  | { | 
|  | import std.range; | 
|  | { | 
|  | enum testStr = "𐁄𐂌𐃯 hello ディラン"; | 
|  | char[testStr.length] s; | 
|  | int i; | 
|  | foreach (c; testStr.byCodeUnit().byCodeUnit()) | 
|  | { | 
|  | s[i++] = c; | 
|  | } | 
|  | assert(s == testStr); | 
|  | } | 
|  | { | 
|  | enum testStr = "𐁄𐂌𐃯 hello ディラン"w; | 
|  | wchar[testStr.length] s; | 
|  | int i; | 
|  | foreach (c; testStr.byCodeUnit().byCodeUnit()) | 
|  | { | 
|  | s[i++] = c; | 
|  | } | 
|  | assert(s == testStr); | 
|  | } | 
|  | { | 
|  | enum testStr = "𐁄𐂌𐃯 hello ディラン"d; | 
|  | dchar[testStr.length] s; | 
|  | int i; | 
|  | foreach (c; testStr.byCodeUnit().byCodeUnit()) | 
|  | { | 
|  | s[i++] = c; | 
|  | } | 
|  | assert(s == testStr); | 
|  | } | 
|  | { | 
|  | auto bcu = "hello".byCodeUnit(); | 
|  | assert(bcu.length == 5); | 
|  | assert(bcu[3] == 'l'); | 
|  | assert(bcu[2 .. 4][1] == 'l'); | 
|  | } | 
|  | { | 
|  | char[5] orig = "hello"; | 
|  | auto bcu = orig[].byCodeUnit(); | 
|  | bcu.front = 'H'; | 
|  | assert(bcu.front == 'H'); | 
|  | bcu[1] = 'E'; | 
|  | assert(bcu[1] == 'E'); | 
|  | } | 
|  | { | 
|  | auto bcu = "hello".byCodeUnit().byCodeUnit(); | 
|  | static assert(isForwardRange!(typeof(bcu))); | 
|  | static assert(is(typeof(bcu) == struct) == isAutodecodableString!string); | 
|  | auto s = bcu.save; | 
|  | bcu.popFront(); | 
|  | assert(s.front == 'h'); | 
|  | } | 
|  | { | 
|  | auto bcu = "hello".byCodeUnit(); | 
|  | static assert(hasSlicing!(typeof(bcu))); | 
|  | static assert(isBidirectionalRange!(typeof(bcu))); | 
|  | static assert(is(typeof(bcu) == struct) == isAutodecodableString!string); | 
|  | static assert(is(typeof(bcu) == typeof(bcu.byCodeUnit()))); | 
|  | auto ret = bcu.retro; | 
|  | assert(ret.front == 'o'); | 
|  | ret.popFront(); | 
|  | assert(ret.front == 'l'); | 
|  | } | 
|  | { | 
|  | auto bcu = "κόσμε"w.byCodeUnit(); | 
|  | static assert(hasSlicing!(typeof(bcu))); | 
|  | static assert(isBidirectionalRange!(typeof(bcu))); | 
|  | static assert(is(typeof(bcu) == struct) == isAutodecodableString!wstring); | 
|  | static assert(is(typeof(bcu) == typeof(bcu.byCodeUnit()))); | 
|  | auto ret = bcu.retro; | 
|  | assert(ret.front == 'ε'); | 
|  | ret.popFront(); | 
|  | assert(ret.front == 'μ'); | 
|  | } | 
|  | { | 
|  | static struct Stringish | 
|  | { | 
|  | string s; | 
|  | alias s this; | 
|  | } | 
|  |  | 
|  | auto orig = Stringish("\U0010fff8 𐁊 foo 𐂓"); | 
|  | auto bcu = orig.byCodeUnit(); | 
|  | static assert(is(typeof(bcu) == struct)); | 
|  | static assert(!is(typeof(bcu) == Stringish) == isAutodecodableString!Stringish); | 
|  | static assert(is(typeof(bcu) == typeof(bcu.byCodeUnit()))); | 
|  | static assert(is(ElementType!(typeof(bcu)) == immutable char)); | 
|  | assert(bcu.front == cast(char) 244); | 
|  | } | 
|  | { | 
|  | static struct WStringish | 
|  | { | 
|  | wstring s; | 
|  | alias s this; | 
|  | } | 
|  |  | 
|  | auto orig = WStringish("\U0010fff8 𐁊 foo 𐂓"w); | 
|  | auto bcu = orig.byCodeUnit(); | 
|  | static assert(is(typeof(bcu) == struct)); | 
|  | static assert(!is(typeof(bcu) == WStringish) == isAutodecodableString!WStringish); | 
|  | static assert(is(typeof(bcu) == typeof(bcu.byCodeUnit()))); | 
|  | static assert(is(ElementType!(typeof(bcu)) == immutable wchar)); | 
|  | assert(bcu.front == cast(wchar) 56319); | 
|  | } | 
|  | { | 
|  | static struct DStringish | 
|  | { | 
|  | dstring s; | 
|  | alias s this; | 
|  | } | 
|  |  | 
|  | auto orig = DStringish("\U0010fff8 𐁊 foo 𐂓"d); | 
|  | auto bcu = orig.byCodeUnit(); | 
|  | static assert(is(typeof(bcu) == dstring)); | 
|  | static assert(is(typeof(bcu) == typeof(bcu.byCodeUnit()))); | 
|  | static assert(is(ElementType!(typeof(bcu)) == immutable dchar)); | 
|  | assert(bcu.front == cast(dchar) 1114104); | 
|  | } | 
|  | { | 
|  | static struct FuncStringish | 
|  | { | 
|  | string str; | 
|  | string s() pure nothrow @nogc { return str; } | 
|  | alias s this; | 
|  | } | 
|  |  | 
|  | auto orig = FuncStringish("\U0010fff8 𐁊 foo 𐂓"); | 
|  | auto bcu = orig.byCodeUnit(); | 
|  | static if (isAutodecodableString!FuncStringish) | 
|  | static assert(is(typeof(bcu) == struct)); | 
|  | else | 
|  | static assert(is(typeof(bcu) == string)); | 
|  | static assert(!is(typeof(bcu) == FuncStringish)); | 
|  | static assert(is(typeof(bcu) == typeof(bcu.byCodeUnit()))); | 
|  | static assert(is(ElementType!(typeof(bcu)) == immutable char)); | 
|  | assert(bcu.front == cast(char) 244); | 
|  | } | 
|  | { | 
|  | static struct Range | 
|  | { | 
|  | string data; | 
|  | bool empty() pure nothrow @nogc { return data.empty; } | 
|  | char front() pure nothrow @nogc { return data[0]; } | 
|  | void popFront() pure nothrow @nogc { data = data[1 .. $]; } | 
|  | } | 
|  |  | 
|  | auto orig = Range("\U0010fff8 𐁊 foo 𐂓"); | 
|  | auto bcu = orig.byCodeUnit(); | 
|  | static assert(is(typeof(bcu) == Range)); | 
|  | static assert(is(typeof(bcu) == typeof(bcu.byCodeUnit()))); | 
|  | static assert(is(ElementType!(typeof(bcu)) == char)); | 
|  | assert(bcu.front == cast(char) 244); | 
|  | } | 
|  | { | 
|  | static struct WRange | 
|  | { | 
|  | wstring data; | 
|  | bool empty() pure nothrow @nogc { return data.empty; } | 
|  | wchar front() pure nothrow @nogc { return data[0]; } | 
|  | void popFront() pure nothrow @nogc { data = data[1 .. $]; } | 
|  | } | 
|  |  | 
|  | auto orig = WRange("\U0010fff8 𐁊 foo 𐂓"w); | 
|  | auto bcu = orig.byCodeUnit(); | 
|  | static assert(is(typeof(bcu) == WRange)); | 
|  | static assert(is(typeof(bcu) == typeof(bcu.byCodeUnit()))); | 
|  | static assert(is(ElementType!(typeof(bcu)) == wchar)); | 
|  | assert(bcu.front == 56319); | 
|  | } | 
|  | { | 
|  | static struct DRange | 
|  | { | 
|  | dstring data; | 
|  | bool empty() pure nothrow @nogc { return data.empty; } | 
|  | dchar front() pure nothrow @nogc { return data[0]; } | 
|  | void popFront() pure nothrow @nogc { data = data[1 .. $]; } | 
|  | } | 
|  |  | 
|  | auto orig = DRange("\U0010fff8 𐁊 foo 𐂓"d); | 
|  | auto bcu = orig.byCodeUnit(); | 
|  | static assert(is(typeof(bcu) == DRange)); | 
|  | static assert(is(typeof(bcu) == typeof(bcu.byCodeUnit()))); | 
|  | static assert(is(ElementType!(typeof(bcu)) == dchar)); | 
|  | assert(bcu.front == 1114104); | 
|  | } | 
|  | { | 
|  | static struct RangeAndStringish | 
|  | { | 
|  | bool empty() pure nothrow @nogc { return data.empty; } | 
|  | char front() pure nothrow @nogc { return data[0]; } | 
|  | void popFront() pure nothrow @nogc { data = data[1 .. $]; } | 
|  |  | 
|  | string data; | 
|  | string s; | 
|  | alias s this; | 
|  | } | 
|  |  | 
|  | auto orig = RangeAndStringish("test.d", "other"); | 
|  | auto bcu = orig.byCodeUnit(); | 
|  | static assert(is(typeof(bcu) == RangeAndStringish)); | 
|  | static assert(is(typeof(bcu) == typeof(bcu.byCodeUnit()))); | 
|  | static assert(is(ElementType!(typeof(bcu)) == char)); | 
|  | assert(bcu.front == 't'); | 
|  | } | 
|  | { | 
|  | static struct WRangeAndStringish | 
|  | { | 
|  | bool empty() pure nothrow @nogc { return data.empty; } | 
|  | wchar front() pure nothrow @nogc { return data[0]; } | 
|  | void popFront() pure nothrow @nogc { data = data[1 .. $]; } | 
|  |  | 
|  | wstring data; | 
|  | wstring s; | 
|  | alias s this; | 
|  | } | 
|  |  | 
|  | auto orig = WRangeAndStringish("test.d"w, "other"w); | 
|  | auto bcu = orig.byCodeUnit(); | 
|  | static assert(is(typeof(bcu) == WRangeAndStringish)); | 
|  | static assert(is(typeof(bcu) == typeof(bcu.byCodeUnit()))); | 
|  | static assert(is(ElementType!(typeof(bcu)) == wchar)); | 
|  | assert(bcu.front == 't'); | 
|  | } | 
|  | { | 
|  | static struct DRangeAndStringish | 
|  | { | 
|  | bool empty() pure nothrow @nogc { return data.empty; } | 
|  | dchar front() pure nothrow @nogc { return data[0]; } | 
|  | void popFront() pure nothrow @nogc { data = data[1 .. $]; } | 
|  |  | 
|  | dstring data; | 
|  | dstring s; | 
|  | alias s this; | 
|  | } | 
|  |  | 
|  | auto orig = DRangeAndStringish("test.d"d, "other"d); | 
|  | auto bcu = orig.byCodeUnit(); | 
|  | static assert(is(typeof(bcu) == DRangeAndStringish)); | 
|  | static assert(is(typeof(bcu) == typeof(bcu.byCodeUnit()))); | 
|  | static assert(is(ElementType!(typeof(bcu)) == dchar)); | 
|  | assert(bcu.front == 't'); | 
|  | } | 
|  | { | 
|  | enum Enum : string { a = "test.d" } | 
|  |  | 
|  | auto orig = Enum.a; | 
|  | auto bcu = orig.byCodeUnit(); | 
|  | static assert(!is(typeof(bcu) == Enum)); | 
|  | static if (isAutodecodableString!Enum) | 
|  | static assert(is(typeof(bcu) == struct)); | 
|  | else | 
|  | static assert(is(typeof(bcu) == string)); | 
|  | static assert(is(ElementType!(typeof(bcu)) == immutable char)); | 
|  | assert(bcu.front == 't'); | 
|  | } | 
|  | { | 
|  | enum WEnum : wstring { a = "test.d"w } | 
|  |  | 
|  | auto orig = WEnum.a; | 
|  | auto bcu = orig.byCodeUnit(); | 
|  | static assert(!is(typeof(bcu) == WEnum)); | 
|  | static if (isAutodecodableString!WEnum) | 
|  | static assert(is(typeof(bcu) == struct)); | 
|  | else | 
|  | static assert(is(typeof(bcu) == wstring)); | 
|  | static assert(is(ElementType!(typeof(bcu)) == immutable wchar)); | 
|  | assert(bcu.front == 't'); | 
|  | } | 
|  | { | 
|  | enum DEnum : dstring { a = "test.d"d } | 
|  |  | 
|  | auto orig = DEnum.a; | 
|  | auto bcu = orig.byCodeUnit(); | 
|  | static assert(is(typeof(bcu) == dstring)); | 
|  | static assert(is(ElementType!(typeof(bcu)) == immutable dchar)); | 
|  | assert(bcu.front == 't'); | 
|  | } | 
|  |  | 
|  | static if (autodecodeStrings) | 
|  | { | 
|  | static assert(!is(typeof(byCodeUnit("hello")) == string)); | 
|  | static assert(!is(typeof(byCodeUnit("hello"w)) == wstring)); | 
|  | } | 
|  | else | 
|  | { | 
|  | static assert(is(typeof(byCodeUnit("hello")) == string)); | 
|  | static assert(is(typeof(byCodeUnit("hello"w)) == wstring)); | 
|  | } | 
|  | static assert(is(typeof(byCodeUnit("hello"d)) == dstring)); | 
|  |  | 
|  | static assert(!__traits(compiles, byCodeUnit((char[5]).init))); | 
|  | static assert(!__traits(compiles, byCodeUnit((wchar[5]).init))); | 
|  | static assert(!__traits(compiles, byCodeUnit((dchar[5]).init))); | 
|  |  | 
|  | enum SEnum : char[5] { a = "hello" } | 
|  | enum WSEnum : wchar[5] { a = "hello"w } | 
|  | enum DSEnum : dchar[5] { a = "hello"d } | 
|  |  | 
|  | static assert(!__traits(compiles, byCodeUnit(SEnum.a))); | 
|  | static assert(!__traits(compiles, byCodeUnit(WSEnum.a))); | 
|  | static assert(!__traits(compiles, byCodeUnit(DSEnum.a))); | 
|  | } | 
|  |  | 
|  | /**************************** | 
|  | * Iterate an $(REF_ALTTEXT input range, isInputRange, std,range,primitives) | 
|  | * of characters by char, wchar, or dchar. | 
|  | * These aliases simply forward to $(LREF byUTF) with the | 
|  | * corresponding C argument. | 
|  | * | 
|  | * Params: | 
|  | *      r = input range of characters, or array of characters | 
|  | */ | 
|  | alias byChar = byUTF!char; | 
|  |  | 
|  | /// Ditto | 
|  | alias byWchar = byUTF!wchar; | 
|  |  | 
|  | /// Ditto | 
|  | alias byDchar = byUTF!dchar; | 
|  |  | 
|  | @safe pure nothrow @nogc unittest | 
|  | { | 
|  | { | 
|  | char[5] s; | 
|  | int i; | 
|  | foreach (c; "hello".byChar.byChar()) | 
|  | { | 
|  | //writefln("[%d] '%c'", i, c); | 
|  | s[i++] = c; | 
|  | } | 
|  | assert(s == "hello"); | 
|  | } | 
|  | { | 
|  | char[5+2+3+4+3+3] s; | 
|  | int i; | 
|  | dchar[10] a; | 
|  | a[0 .. 8] = "hello\u07FF\uD7FF\U0010FFFF"d; | 
|  | a[8] = 0xD800;   // invalid | 
|  | a[9] = cast(dchar) 0x110000; // invalid | 
|  | foreach (c; a[].byChar()) | 
|  | { | 
|  | //writefln("[%d] '%c'", i, c); | 
|  | s[i++] = c; | 
|  | } | 
|  | assert(s == "hello\u07FF\uD7FF\U0010FFFF\uFFFD\uFFFD"); | 
|  | } | 
|  | { | 
|  | auto r = "hello"w.byChar(); | 
|  | r.popFront(); | 
|  | r.popFront(); | 
|  | assert(r.front == 'l'); | 
|  | } | 
|  | { | 
|  | auto r = "hello"d.byChar(); | 
|  | r.popFront(); | 
|  | r.popFront(); | 
|  | assert(r.front == 'l'); | 
|  | } | 
|  | { | 
|  | auto r = "hello"d.byChar(); | 
|  | assert(isForwardRange!(typeof(r))); | 
|  | auto s = r.save; | 
|  | r.popFront(); | 
|  | assert(s.front == 'h'); | 
|  | } | 
|  | } | 
|  |  | 
|  | @safe pure nothrow @nogc unittest | 
|  | { | 
|  | { | 
|  | wchar[11] s; | 
|  | int i; | 
|  | dchar[10] a; | 
|  | a[0 .. 8] = "hello\u07FF\uD7FF\U0010FFFF"d; | 
|  | a[8] = 0xD800;   // invalid | 
|  | a[9] = cast(dchar) 0x110000; // invalid | 
|  | foreach (c; a[].byWchar()) | 
|  | { | 
|  | //writefln("[%d] '%c' x%x", i, c, c); | 
|  | s[i++] = c; | 
|  | } | 
|  | foreach (j, wchar c; "hello\u07FF\uD7FF\U0010FFFF\uFFFD\uFFFD"w) | 
|  | { | 
|  | //writefln("[%d] '%c' x%x", j, c, c); | 
|  | } | 
|  | assert(s == "hello\u07FF\uD7FF\U0010FFFF\uFFFD\uFFFD"w); | 
|  | } | 
|  |  | 
|  | { | 
|  | auto r = "hello".byWchar(); | 
|  | r.popFront(); | 
|  | r.popFront(); | 
|  | assert(r.front == 'l'); | 
|  | } | 
|  | { | 
|  | auto r = "hello"d.byWchar(); | 
|  | r.popFront(); | 
|  | r.popFront(); | 
|  | assert(r.front == 'l'); | 
|  | } | 
|  | { | 
|  | auto r = "hello"d.byWchar(); | 
|  | assert(isForwardRange!(typeof(r))); | 
|  | auto s = r.save; | 
|  | r.popFront(); | 
|  | assert(s.front == 'h'); | 
|  | } | 
|  | } | 
|  |  | 
|  | @safe pure nothrow @nogc unittest | 
|  | { | 
|  | { | 
|  | dchar[9] s; | 
|  | int i; | 
|  | string a = "hello\u07FF\uD7FF\U00010000\U0010FFFF"; // 1,2,3,4 byte sequences | 
|  | foreach (c; a.byDchar()) | 
|  | { | 
|  | s[i++] = c; | 
|  | } | 
|  | assert(s == "hello\u07FF\uD7FF\U00010000\U0010FFFF"d); | 
|  | } | 
|  | { | 
|  | foreach (s; invalidUTFstrings!char()) | 
|  | { | 
|  | auto r = s.byDchar(); | 
|  | assert(!r.empty); | 
|  | assert(r.front == r.front); | 
|  | dchar c = r.front; | 
|  | assert(c == replacementDchar); | 
|  | } | 
|  | } | 
|  | { | 
|  | auto r = "hello".byDchar(); | 
|  | r.popFront(); | 
|  | r.popFront(); | 
|  | assert(r.front == 'l'); | 
|  | } | 
|  |  | 
|  | { | 
|  | dchar[8] s; | 
|  | int i; | 
|  | wstring a = "hello\u07FF\uD7FF\U0010FFFF"w; | 
|  | foreach (c; a.byDchar()) | 
|  | { | 
|  | //writefln("[%d] '%c' x%x", i, c, c); | 
|  | s[i++] = c; | 
|  | } | 
|  | assert(s == "hello\u07FF\uD7FF\U0010FFFF"d); | 
|  | } | 
|  | { | 
|  | foreach (s; invalidUTFstrings!wchar()) | 
|  | { | 
|  | auto r = s.byDchar(); | 
|  | assert(!r.empty); | 
|  | assert(r.front == r.front); | 
|  | dchar c = r.front; | 
|  | assert(c == replacementDchar); | 
|  | } | 
|  | } | 
|  | { | 
|  | wchar[2] ws; | 
|  | ws[0] = 0xD800; | 
|  | ws[1] = 0xDD00;             // correct surrogate pair | 
|  | auto r = ws[].byDchar(); | 
|  | assert(!r.empty); | 
|  | assert(r.front == r.front); | 
|  | dchar c = r.front; | 
|  | assert(c == '\U00010100'); | 
|  | } | 
|  | { | 
|  | auto r = "hello"w.byDchar(); | 
|  | r.popFront(); | 
|  | r.popFront(); | 
|  | assert(r.front == 'l'); | 
|  | } | 
|  |  | 
|  | { | 
|  | dchar[5] s; | 
|  | int i; | 
|  | dstring a = "hello"d; | 
|  | foreach (c; a.byDchar.byDchar()) | 
|  | { | 
|  | //writefln("[%d] '%c' x%x", i, c, c); | 
|  | s[i++] = c; | 
|  | } | 
|  | assert(s == "hello"d); | 
|  | } | 
|  | { | 
|  | auto r = "hello".byDchar(); | 
|  | assert(isForwardRange!(typeof(r))); | 
|  | auto s = r.save; | 
|  | r.popFront(); | 
|  | assert(s.front == 'h'); | 
|  | } | 
|  | { | 
|  | auto r = "hello"w.byDchar(); | 
|  | assert(isForwardRange!(typeof(r))); | 
|  | auto s = r.save; | 
|  | r.popFront(); | 
|  | assert(s.front == 'h'); | 
|  | } | 
|  | } | 
|  |  | 
|  | // test pure, @safe, nothrow, @nogc correctness of byChar/byWchar/byDchar, | 
|  | // which needs to support ranges with and without those attributes | 
|  |  | 
|  | pure @safe nothrow @nogc unittest | 
|  | { | 
|  | dchar[5] s = "hello"d; | 
|  | foreach (c; s[].byChar())  { } | 
|  | foreach (c; s[].byWchar()) { } | 
|  | foreach (c; s[].byDchar()) { } | 
|  | } | 
|  |  | 
|  | version (StdUnittest) | 
|  | private int impureVariable; | 
|  |  | 
|  | @system unittest | 
|  | { | 
|  | static struct ImpureThrowingSystemRange(Char) | 
|  | { | 
|  | @property bool empty() const { return true; } | 
|  | @property Char front() const { return Char.init; } | 
|  | void popFront() | 
|  | { | 
|  | impureVariable++; | 
|  | throw new Exception("only for testing nothrow"); | 
|  | } | 
|  | } | 
|  |  | 
|  | foreach (Char; AliasSeq!(char, wchar, dchar)) | 
|  | { | 
|  | ImpureThrowingSystemRange!Char range; | 
|  | foreach (c; range.byChar())  { } | 
|  | foreach (c; range.byWchar()) { } | 
|  | foreach (c; range.byDchar()) { } | 
|  | } | 
|  | } | 
|  |  | 
|  | /**************************** | 
|  | * Iterate an $(REF_ALTTEXT input range, isInputRange, std,range,primitives) | 
|  | * of characters by char type `C` by encoding the elements of the range. | 
|  | * | 
|  | * UTF sequences that cannot be converted to the specified encoding are either | 
|  | * replaced by U+FFFD per "5.22 Best Practice for U+FFFD Substitution" | 
|  | * of the Unicode Standard 6.2 or result in a thrown UTFException. | 
|  | *  Hence byUTF is not symmetric. | 
|  | * This algorithm is lazy, and does not allocate memory. | 
|  | * `@nogc`, `pure`-ity, `nothrow`, and `@safe`-ty are inferred from the | 
|  | * `r` parameter. | 
|  | * | 
|  | * Params: | 
|  | *      C = `char`, `wchar`, or `dchar` | 
|  | *      useReplacementDchar = UseReplacementDchar.yes means replace invalid UTF with `replacementDchar`, | 
|  | *                            UseReplacementDchar.no means throw `UTFException` for invalid UTF | 
|  | * | 
|  | * Throws: | 
|  | *      `UTFException` if invalid UTF sequence and `useReplacementDchar` is set to `UseReplacementDchar.no` | 
|  | * | 
|  | * GC: | 
|  | *      Does not use GC if `useReplacementDchar` is set to `UseReplacementDchar.yes` | 
|  | * | 
|  | * Returns: | 
|  | *      A bidirectional range if `R` is a bidirectional range and not auto-decodable, | 
|  | *      as defined by $(REF isAutodecodableString, std, traits). | 
|  | * | 
|  | *      A forward range if `R` is a forward range and not auto-decodable. | 
|  | * | 
|  | *      Or, if `R` is a range and it is auto-decodable and | 
|  | *      `is(ElementEncodingType!typeof(r) == C)`, then the range is passed | 
|  | *      to $(LREF byCodeUnit). | 
|  | * | 
|  | *      Otherwise, an input range of characters. | 
|  | */ | 
|  | template byUTF(C, UseReplacementDchar useReplacementDchar = Yes.useReplacementDchar) | 
|  | if (isSomeChar!C) | 
|  | { | 
|  | static if (is(immutable C == immutable UC, UC) && !is(C == UC)) | 
|  | alias byUTF = byUTF!UC; | 
|  | else: | 
|  |  | 
|  | auto ref byUTF(R)(R r) | 
|  | if (isAutodecodableString!R && isInputRange!R && isSomeChar!(ElementEncodingType!R)) | 
|  | { | 
|  | return byUTF(r.byCodeUnit()); | 
|  | } | 
|  |  | 
|  | auto ref byUTF(R)(R r) | 
|  | if (!isAutodecodableString!R && isInputRange!R && isSomeChar!(ElementEncodingType!R)) | 
|  | { | 
|  | static if (is(immutable ElementEncodingType!R == immutable RC, RC) && is(RC == C)) | 
|  | { | 
|  | return r.byCodeUnit(); | 
|  | } | 
|  | else static if (is(C == dchar)) | 
|  | { | 
|  | static struct Result | 
|  | { | 
|  | enum Empty = uint.max;  // range is empty or just constructed | 
|  |  | 
|  | this(return scope R r) | 
|  | { | 
|  | this.r = r; | 
|  | } | 
|  |  | 
|  | this(return scope R r, uint buff) | 
|  | { | 
|  | this.r = r; | 
|  | this.buff = buff; | 
|  | } | 
|  |  | 
|  | static if (isBidirectionalRange!R) | 
|  | { | 
|  | this(return scope R r, uint frontBuff, uint backBuff) | 
|  | { | 
|  | this.r = r; | 
|  | this.buff = frontBuff; | 
|  | this.backBuff = backBuff; | 
|  | } | 
|  | } | 
|  |  | 
|  | @property bool empty() | 
|  | { | 
|  | static if (isBidirectionalRange!R) | 
|  | return buff == Empty && backBuff == Empty && r.empty; | 
|  | else | 
|  | return buff == Empty && r.empty; | 
|  | } | 
|  |  | 
|  | @property dchar front() scope // 'scope' required by call to decodeFront() below | 
|  | { | 
|  | if (buff == Empty) | 
|  | { | 
|  | auto c = r.front; | 
|  |  | 
|  | static if (is(RC == wchar)) | 
|  | enum firstMulti = 0xD800; // First high surrogate. | 
|  | else | 
|  | enum firstMulti = 0x80; // First non-ASCII. | 
|  | if (c < firstMulti) | 
|  | { | 
|  | r.popFront; | 
|  | buff = cast(dchar) c; | 
|  | } | 
|  | else | 
|  | { | 
|  | buff = () @trusted { return decodeFront!(useReplacementDchar)(r); }(); | 
|  | } | 
|  | } | 
|  | return cast(dchar) buff; | 
|  | } | 
|  |  | 
|  | void popFront() | 
|  | { | 
|  | if (buff == Empty) | 
|  | front(); | 
|  | buff = Empty; | 
|  | } | 
|  |  | 
|  | static if (isForwardRange!R) | 
|  | { | 
|  | @property auto save() | 
|  | { | 
|  | static if (isBidirectionalRange!R) | 
|  | { | 
|  | return Result(r.save, buff, backBuff); | 
|  | } | 
|  | else | 
|  | { | 
|  | return Result(r.save, buff); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | static if (isBidirectionalRange!R) | 
|  | { | 
|  | @property dchar back() scope // 'scope' required by call to decodeBack() below | 
|  | { | 
|  | if (backBuff != Empty) | 
|  | return cast(dchar) backBuff; | 
|  |  | 
|  | auto c = r.back; | 
|  | static if (is(RC == wchar)) | 
|  | enum firstMulti = 0xD800; // First high surrogate. | 
|  | else | 
|  | enum firstMulti = 0x80; // First non-ASCII. | 
|  | if (c < firstMulti) | 
|  | { | 
|  | r.popBack; | 
|  | backBuff = cast(dchar) c; | 
|  | } | 
|  | else | 
|  | { | 
|  | backBuff = () @trusted { return decodeBack!useReplacementDchar(r); }(); | 
|  | } | 
|  | return cast(dchar) backBuff; | 
|  |  | 
|  | } | 
|  |  | 
|  | void popBack() | 
|  | { | 
|  | if (backBuff == Empty) | 
|  | back(); | 
|  | backBuff = Empty; | 
|  | } | 
|  | } | 
|  |  | 
|  | private: | 
|  |  | 
|  | R r; | 
|  | uint buff = Empty;      // one character lookahead buffer | 
|  | static if (isBidirectionalRange!R) | 
|  | uint backBuff = Empty; | 
|  | } | 
|  |  | 
|  | return Result(r); | 
|  | } | 
|  | else | 
|  | { | 
|  | static struct Result | 
|  | { | 
|  | this(return scope R r) | 
|  | { | 
|  | this.r = r; | 
|  | } | 
|  |  | 
|  | this(return scope R r, ushort pos, ushort fill, C[4 / C.sizeof] buf) | 
|  | { | 
|  | this.r = r; | 
|  | this.pos = pos; | 
|  | this.fill = fill; | 
|  | this.buf = buf; | 
|  | } | 
|  |  | 
|  | static if (isBidirectionalRange!R) | 
|  | { | 
|  | this(return scope R r, ushort frontPos, ushort frontFill, | 
|  | ushort backPos, ushort backFill, C[4 / C.sizeof] buf) | 
|  | { | 
|  | this.r = r; | 
|  | this.pos = frontPos; | 
|  | this.fill = frontFill; | 
|  | this.backPos = backPos; | 
|  | this.backFill = backFill; | 
|  | this.buf = buf; | 
|  | } | 
|  | } | 
|  |  | 
|  | @property bool empty() | 
|  | { | 
|  | static if (isBidirectionalRange!R) | 
|  | return pos == fill && backPos == backFill && r.empty; | 
|  | else | 
|  | return pos == fill && r.empty; | 
|  | } | 
|  |  | 
|  | @property auto front() scope // 'scope' required by call to decodeFront() below | 
|  | { | 
|  | if (pos == fill) | 
|  | { | 
|  | pos = 0; | 
|  | auto c = r.front; | 
|  |  | 
|  | static if (C.sizeof >= 2 && RC.sizeof >= 2) | 
|  | enum firstMulti = 0xD800; // First high surrogate. | 
|  | else | 
|  | enum firstMulti = 0x80; // First non-ASCII. | 
|  | if (c < firstMulti) | 
|  | { | 
|  | fill = 1; | 
|  | r.popFront; | 
|  | buf[pos] = cast(C) c; | 
|  | } | 
|  | else | 
|  | { | 
|  | static if (is(RC == dchar)) | 
|  | { | 
|  | r.popFront; | 
|  | dchar dc = c; | 
|  | } | 
|  | else | 
|  | dchar dc = () @trusted { return decodeFront!(useReplacementDchar)(r); }(); | 
|  | fill = cast(ushort) encode!(useReplacementDchar)(buf, dc); | 
|  | } | 
|  | } | 
|  | return buf[pos]; | 
|  | } | 
|  |  | 
|  | void popFront() | 
|  | { | 
|  | if (pos == fill) | 
|  | front; | 
|  | ++pos; | 
|  | } | 
|  |  | 
|  | static if (isForwardRange!R) | 
|  | { | 
|  | @property auto save() | 
|  | { | 
|  | static if (isBidirectionalRange!R) | 
|  | { | 
|  | return Result(r.save, pos, fill, backPos, backFill, buf); | 
|  | } | 
|  | else | 
|  | { | 
|  | return Result(r.save, pos, fill, buf); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | static if (isBidirectionalRange!R) | 
|  | { | 
|  | @property auto back() scope // 'scope' required by call to decodeBack() below | 
|  | { | 
|  | if (backPos != backFill) | 
|  | return buf[cast(ushort) (backFill - backPos - 1)]; | 
|  |  | 
|  | backPos = 0; | 
|  | auto c = r.back; | 
|  | static if (C.sizeof >= 2 && RC.sizeof >= 2) | 
|  | enum firstMulti = 0xD800; // First high surrogate. | 
|  | else | 
|  | enum firstMulti = 0x80; // First non-ASCII. | 
|  | if (c < firstMulti) | 
|  | { | 
|  | backFill = 1; | 
|  | r.popBack; | 
|  | buf[cast(ushort) (backFill - backPos - 1)] = cast(C) c; | 
|  | } | 
|  | else | 
|  | { | 
|  | static if (is(RC == dchar)) | 
|  | { | 
|  | r.popBack; | 
|  | dchar dc = c; | 
|  | } | 
|  | else | 
|  | dchar dc = () @trusted { return decodeBack!(useReplacementDchar)(r); }(); | 
|  | backFill = cast(ushort) encode!(useReplacementDchar)(buf, dc); | 
|  | } | 
|  | return buf[cast(ushort) (backFill - backPos - 1)]; | 
|  | } | 
|  |  | 
|  | void popBack() | 
|  | { | 
|  | if (backPos == backFill) | 
|  | back; | 
|  | ++backPos; | 
|  | } | 
|  | } | 
|  |  | 
|  | private: | 
|  |  | 
|  | R r; | 
|  | ushort pos, fill; | 
|  | static if (isBidirectionalRange!R) | 
|  | ushort backPos, backFill; | 
|  | C[4 / C.sizeof] buf = void; | 
|  | } | 
|  |  | 
|  | return Result(r); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | /// | 
|  | @safe pure nothrow unittest | 
|  | { | 
|  | import std.algorithm.comparison : equal; | 
|  |  | 
|  | // hellö as a range of `char`s, which are UTF-8 | 
|  | assert("hell\u00F6".byUTF!char().equal(['h', 'e', 'l', 'l', 0xC3, 0xB6])); | 
|  |  | 
|  | // `wchar`s are able to hold the ö in a single element (UTF-16 code unit) | 
|  | assert("hell\u00F6".byUTF!wchar().equal(['h', 'e', 'l', 'l', 'ö'])); | 
|  |  | 
|  | // 𐐷 is four code units in UTF-8, two in UTF-16, and one in UTF-32 | 
|  | assert("𐐷".byUTF!char().equal([0xF0, 0x90, 0x90, 0xB7])); | 
|  | assert("𐐷".byUTF!wchar().equal([0xD801, 0xDC37])); | 
|  | assert("𐐷".byUTF!dchar().equal([0x00010437])); | 
|  | } | 
|  |  | 
|  | /// | 
|  | @safe unittest | 
|  | { | 
|  | import std.algorithm.comparison : equal; | 
|  | import std.exception : assertThrown; | 
|  |  | 
|  | assert("hello\xF0betty".byChar.byUTF!(dchar, UseReplacementDchar.yes).equal("hello\uFFFDetty")); | 
|  | assertThrown!UTFException("hello\xF0betty".byChar.byUTF!(dchar, UseReplacementDchar.no).equal("hello betty")); | 
|  | } | 
|  |  | 
|  | @safe unittest | 
|  | { | 
|  | { | 
|  | wchar[] s = ['a', 'b', 0x219]; | 
|  | auto r = s.byUTF!char; | 
|  | assert(isBidirectionalRange!(typeof(r))); | 
|  | assert(r.back == 0x99); | 
|  | r.popBack; | 
|  | assert(r.back == 0xc8); | 
|  | r.popBack; | 
|  | assert(r.back == 'b'); | 
|  |  | 
|  | } | 
|  |  | 
|  | { | 
|  | wchar[] s = ['a', 'b', 0x219]; | 
|  | auto r = s.byUTF!wchar; | 
|  | uint i; | 
|  | assert(isBidirectionalRange!(typeof(r))); | 
|  | assert(r.back == 0x219); | 
|  | r.popBack; | 
|  | assert(r.back == 'b'); | 
|  | } | 
|  |  | 
|  | { | 
|  | wchar[] s = ['a', 'b', 0x219]; | 
|  | auto r = s.byUTF!dchar; | 
|  | assert(isBidirectionalRange!(typeof(r))); | 
|  | assert(r.back == 0x219); | 
|  | r.popBack; | 
|  | assert(r.back == 'b'); | 
|  | } | 
|  |  | 
|  | { | 
|  | dchar[] s = ['𐐷', '😁']; | 
|  | auto r = s.byUTF!wchar; | 
|  | assert(r.back == 0xde01); | 
|  | r.popBack; | 
|  | assert(r.back == 0xd83d); | 
|  | r.popBack; | 
|  | assert(r.back == 0xdc37); | 
|  | r.popBack; | 
|  | assert(r.back == 0xd801); | 
|  | } | 
|  |  | 
|  | { | 
|  | dchar[] s = ['𐐷', '😁']; | 
|  | auto r = s.byUTF!char; | 
|  | char[] res; | 
|  | while (!r.empty) | 
|  | { | 
|  | res ~= r.back; | 
|  | r.popBack; | 
|  | } | 
|  | import std.algorithm.comparison : equal; | 
|  | assert(res.equal([0x81, 0x98, 0x9f, 0xf0, 0xb7, 0x90, 0x90, 0xf0])); | 
|  | } | 
|  |  | 
|  | { | 
|  | dchar[] res; | 
|  | auto r = ['a', 'b', 'c', 'd', 'e'].byUTF!dchar; | 
|  | while (!r.empty) | 
|  | { | 
|  | res ~= r.back; | 
|  | r.popBack; | 
|  | } | 
|  | import std.algorithm.comparison : equal; | 
|  | assert(res.equal(['e', 'd', 'c', 'b', 'a'])); | 
|  | } | 
|  |  | 
|  | { | 
|  | //testing the save() function | 
|  | wchar[] s = ['Ă','ț']; | 
|  |  | 
|  | auto rc = s.byUTF!char; | 
|  | rc.popBack; | 
|  | auto rcCopy = rc.save; | 
|  | assert(rc.back == rcCopy.back); | 
|  | assert(rcCopy.back == 0xc8); | 
|  |  | 
|  | auto rd = s.byUTF!dchar; | 
|  | rd.popBack; | 
|  | auto rdCopy = rd.save; | 
|  | assert(rd.back == rdCopy.back); | 
|  | assert(rdCopy.back == 'Ă'); | 
|  | } | 
|  | } | 
|  |  | 
|  | /// | 
|  | @safe pure nothrow unittest | 
|  | { | 
|  | import std.range.primitives; | 
|  | wchar[] s = ['ă', 'î']; | 
|  |  | 
|  | auto rc = s.byUTF!char; | 
|  | static assert(isBidirectionalRange!(typeof(rc))); | 
|  | assert(rc.back == 0xae); | 
|  | rc.popBack; | 
|  | assert(rc.back == 0xc3); | 
|  | rc.popBack; | 
|  | assert(rc.back == 0x83); | 
|  | rc.popBack; | 
|  | assert(rc.back == 0xc4); | 
|  |  | 
|  | auto rw = s.byUTF!wchar; | 
|  | static assert(isBidirectionalRange!(typeof(rw))); | 
|  | assert(rw.back == 'î'); | 
|  | rw.popBack; | 
|  | assert(rw.back == 'ă'); | 
|  |  | 
|  | auto rd = s.byUTF!dchar; | 
|  | static assert(isBidirectionalRange!(typeof(rd))); | 
|  | assert(rd.back == 'î'); | 
|  | rd.popBack; | 
|  | assert(rd.back == 'ă'); | 
|  | } |