λ Reversing a string

======= Algorithm ======= <syntax js> var regexSymbolWithCombiningMarks = /([\0-\u02FF\u0370-\u1AAF\u1B00-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uE000-\uFE1F\uFE30-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF])([\u0300-\u036F\u1AB0-\u1AFF\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]+)/g; var regexSurrogatePair = /([\uD800-\uDBFF])([\uDC00-\uDFFF])/g; function reverseString(string) { // Step 1: deal with combining marks and astral symbols (surrogate pairs) string = string // Swap symbols with their combining marks so the combining marks go first .replace(regexSymbolWithCombiningMarks, function($0, $1, $2) { // Reverse the combining marks so they will end up in the same order // later on (after another round of reversing) return reverseString($2) + $1; }) // Swap high and low surrogates so the low surrogates go first .replace(regexSurrogatePair, '$2$1'); // Step 2: reverse the code units in the string var result = ''; var index = string.length; while (index--) { result += string.charAt(index); } return result; }</syntax> ======= Support ======= <syntax js> function run(input) { result = reverseString(input); }</syntax> ======= Tests ======= <syntax js> // Test by splitting to an array and then joining back to a string. function testTest() { assert(reverseString("test") === "tset"); } function testHello() { assert(reverseString("hello") === "olleh"); } function testLong() { assert(reverseString("By editing this page you agree to license") === "esnecil ot eerga uoy egap siht gnitide yB"); } function testNothing() { assert(reverseString("") === ""); } function testA() { assert(reverseString("A") === "A"); } // Tests from https://github.com/mathiasbynens/esrever function testMultipleCombiningMarks () { assert(reverseString('foo\u0303\u035C\u035D\u035Ebar') === 'rabo\u0303\u035C\u035D\u035Eof'); } function testAstralSymbol () { assert(reverseString('foo\uD834\uDF06bar') === 'rab\uD834\uDF06oof'); }</syntax> ======= Options ======= <syntax js> { "title":"Reversing a string", "height":"320px" } </syntax> ======= Visualisation ======= <syntax html> <html> <head> <style type="text/css"> input {width:100px;} .heading {font-family: Verdana, Helvetica; font-size: 12px; font-weight: bold;} #start {color:blue; text-decoration:underline} #result { padding-top: 10px } .char {width:40px; display:inline-block;font-family:monospace} </style> <script type="text/javascript"> function globals() { // clear out result return {result:undefined}; } function update(n, x) { var element = document.getElementById("result"); var input = x.stack[1]? x.stack[1].lookupInScope("input") : undefined; var string = x.lookupInScope("string"); var result = x.lookupInScope("result"); element.innerHTML = ""; if (input) { element.innerHTML += "<div class=heading>Input: \""+input+"\"</div>"+stringHexAndChars(input)+"<br><br>"; } if (string !== input) { element.innerHTML += "<div class=heading>Step 1:</div>" + stringHex(string||"")+"<br><br>"; } if (result) { if (input) element.innerHTML += "<div class=heading>Step 2:</div>"; else element.innerHTML += "<div class=heading>Output: \""+result+"\"</div>"; element.innerHTML += stringHexAndChars(result); } } function padString(str, len) { var c = len-str.length; for (var i=0; i<c; i++) { str = '0' + str; } return str; } function stringHexAndChars(string) { var result = ""; for (var i=0; i<string.length; i++) { result += "<div class=char>"; result += (getWholeChar(string, i) || "") + "<br>"; result += padString(string.charCodeAt(i).toString(16), 4); result += "</div>"; } return result; } function stringHex(string) { var result = ""; for (var i=0; i<string.length; i++) { result += "<div class=char>"; result += padString(string.charCodeAt(i).toString(16), 4); result += "</div>"; } return result; } function args() { var input = document.getElementById('input').value; return input; } function change() { reset(); } function select(sel) { switch (sel.value) { case "1": document.getElementById('input').value = "foo\uD834\uDF06bar"; break; case "2": document.getElementById('input').value = "man\u0303ana"; break; case "3": document.getElementById('input').value = "Test\ud83d\udd25\uD83D\uDCA9"; break; } reset(); } function getWholeChar(str, i) { var code = str.charCodeAt(i); if (isNaN(code)) { return ''; // Position not found } if (code < 0xD800 || code > 0xDFFF) { return str.charAt(i); } // High surrogate (could change last hex to 0xDB7F to treat high private // surrogates as single characters) if (0xD800 <= code && code <= 0xDBFF) { if (str.length <= (i + 1)) { return false;//throw 'High surrogate without following low surrogate'; } var next = str.charCodeAt(i + 1); if (0xDC00 > next || next > 0xDFFF) { return false;//throw 'High surrogate without following low surrogate'; } return str.charAt(i) + str.charAt(i + 1); } // Low surrogate (0xDC00 <= code && code <= 0xDFFF) if (i === 0) { throw 'Low surrogate without preceding high surrogate'; } var prev = str.charCodeAt(i - 1); // (could change last hex to 0xDB7F to treat high private // surrogates as single characters) if (0xD800 > prev || prev > 0xDBFF) { throw 'Low surrogate without preceding high surrogate'; } // We can pass over low surrogates now as the second component // in a pair which we have already processed return false; } </script> </head> <body> <input id="input" value='mañana' onchange="change()"> Tests: <select id="tests" onchange="select(this)"> <option value="-1">--</option> <option value="1">foo𝌆bar (astral character)</option> <option value="2">mañana (combining mark)</option> <option value="3">Test🔥 💩</option> </select> <div id="result"></div> </body> </html></syntax>