I'm trying to convert a unicode string to a hexadecimal representation in javascript.
This is what I have:
function convertFromHex(hex) {
var hex = hex.toString();//force conversion
var str = '';
for (var i = 0; i < hex.length; i += 2)
str += String.fromCharCode(parseInt(hex.substr(i, 2), 16));
return str;
}
function convertToHex(str) {
var hex = '';
for(var i=0;i<str.length;i++) {
hex += ''+str.charCodeAt(i).toString(16);
}
return hex;
}
But if fails on unicode characters, like chinese;
Input:
漢字
Output:
ªo"[W
Any ideas? Can this be done in javascript?
Remember that a JavaScript code unit is 16 bits wide. Therefore the hex string form will be 4 digits per code unit.
usage:
var str = "\u6f22\u5b57"; // "\u6f22\u5b57" === "漢字"
alert(str.hexEncode().hexDecode());
String to hex form:
String.prototype.hexEncode = function(){
var hex, i;
var result = "";
for (i=0; i<this.length; i++) {
hex = this.charCodeAt(i).toString(16);
result += ("000"+hex).slice(-4);
}
return result
}
Back again:
String.prototype.hexDecode = function(){
var j;
var hexes = this.match(/.{1,4}/g) || [];
var back = "";
for(j = 0; j<hexes.length; j++) {
back += String.fromCharCode(parseInt(hexes[j], 16));
}
return back;
}
Here is a tweak of McDowell's algorithm that doesn't pad the result:
function toHex(str) {
var result = '';
for (var i=0; i<str.length; i++) {
result += str.charCodeAt(i).toString(16);
}
return result;
}
A more up to date solution, for encoding:
// This is the same for all of the below, and
// you probably won't need it except for debugging
// in most cases.
function bytesToHex(bytes) {
return Array.from(
bytes,
byte => byte.toString(16).padStart(2, "0")
).join("");
}
// You almost certainly want UTF-8, which is
// now natively supported:
function stringToUTF8Bytes(string) {
return new TextEncoder().encode(string);
}
// But you might want UTF-16 for some reason.
// .charCodeAt(index) will return the underlying
// UTF-16 code-units (not code-points!), so you
// just need to format them in whichever endian order you want.
function stringToUTF16Bytes(string, littleEndian) {
const bytes = new Uint8Array(string.length * 2);
// Using DataView is the only way to get a specific
// endianness.
const view = new DataView(bytes.buffer);
for (let i = 0; i != string.length; i++) {
view.setUint16(i, string.charCodeAt(i), littleEndian);
}
return bytes;
}
// And you might want UTF-32 in even weirder cases.
// Fortunately, iterating a string gives the code
// points, which are identical to the UTF-32 encoding,
// though you still have the endianess issue.
function stringToUTF32Bytes(string, littleEndian) {
const codepoints = Array.from(string, c => c.codePointAt(0));
const bytes = new Uint8Array(codepoints.length * 4);
// Using DataView is the only way to get a specific
// endianness.
const view = new DataView(bytes.buffer);
for (let i = 0; i != codepoints.length; i++) {
view.setUint32(i, codepoints[i], littleEndian);
}
return bytes;
}
Examples:
bytesToHex(stringToUTF8Bytes("hello 漢字 👍"))
// "68656c6c6f20e6bca2e5ad9720f09f918d"
bytesToHex(stringToUTF16Bytes("hello 漢字 👍", false))
// "00680065006c006c006f00206f225b570020d83ddc4d"
bytesToHex(stringToUTF16Bytes("hello 漢字 👍", true))
// "680065006c006c006f002000226f575b20003dd84ddc"
bytesToHex(stringToUTF32Bytes("hello 漢字 👍", false))
// "00000068000000650000006c0000006c0000006f0000002000006f2200005b57000000200001f44d"
bytesToHex(stringToUTF32Bytes("hello 漢字 👍", true))
// "68000000650000006c0000006c0000006f00000020000000226f0000575b0000200000004df40100"
For decoding, it's generally a lot simpler, you just need:
function hexToBytes(hex) {
const bytes = new Uint8Array(hex.length / 2);
for (let i = 0; i !== bytes.length; i++) {
bytes[i] = parseInt(hex.substr(i * 2, 2), 16);
}
return bytes;
}
then use the encoding parameter of TextDecoder:
// UTF-8 is default
new TextDecoder().decode(hexToBytes("68656c6c6f20e6bca2e5ad9720f09f918d"));
// but you can also use:
new TextDecoder("UTF-16LE").decode(hexToBytes("680065006c006c006f002000226f575b20003dd84ddc"))
new TextDecoder("UTF-16BE").decode(hexToBytes("00680065006c006c006f00206f225b570020d83ddc4d"));
// "hello 漢字 👍"
Here's the list of allowed encoding names: https://www.w3.org/TR/encoding/#names-and-labels
You might notice UTF-32 is not on that list, which is a pain, so:
function bytesToStringUTF32(bytes, littleEndian) {
const view = new DataView(bytes.buffer);
const codepoints = new Uint32Array(view.byteLength / 4);
for (let i = 0; i !== codepoints.length; i++) {
codepoints[i] = view.getUint32(i * 4, littleEndian);
}
return String.fromCodePoint(...codepoints);
}
Then:
bytesToStringUTF32(hexToBytes("00000068000000650000006c0000006c0000006f0000002000006f2200005b57000000200001f44d"), false)
bytesToStringUTF32(hexToBytes("68000000650000006c0000006c0000006f00000020000000226f0000575b0000200000004df40100"), true)
// "hello 漢字 👍"
It depends on what encoding you use. If you want to convert utf-8 encoded hex to string, use this:
function fromHex(hex,str){
try{
str = decodeURIComponent(hex.replace(/(..)/g,'%$1'))
}
catch(e){
str = hex
console.log('invalid hex input: ' + hex)
}
return str
}
For the other direction use this:
function toHex(str,hex){
try{
hex = unescape(encodeURIComponent(str))
.split('').map(function(v){
return v.charCodeAt(0).toString(16)
}).join('')
}
catch(e){
hex = str
console.log('invalid text input: ' + str)
}
return hex
}
how do you get "\u6f22\u5b57" from 漢字 in JavaScript?
These are JavaScript Unicode escape sequences e.g. \u12AB. To convert them, you could iterate over every code unit in the string, call .toString(16) on it, and go from there.
However, it is more efficient to also use hexadecimal escape sequences e.g. \xAA in the output wherever possible.
Also note that ASCII symbols such as A, b, and - probably don’t need to be escaped.
I’ve written a small JavaScript library that does all this for you, called jsesc. It has lots of options to control the output.
Here’s an online demo of the tool in action: http://mothereff.in/js-escapes#1%E6%BC%A2%E5%AD%97
Your question was tagged as utf-8. Reading the rest of your question, UTF-8 encoding/decoding didn’t seem to be what you wanted here, but in case you ever need it: use utf8.js (online demo).
Here you go. :D
"漢字".split("").reduce((hex,c)=>hex+=c.charCodeAt(0).toString(16).padStart(4,"0"),"")
"6f225b57"
for non unicode
"hi".split("").reduce((hex,c)=>hex+=c.charCodeAt(0).toString(16).padStart(2,"0"),"")
"6869"
ASCII (utf-8) binary HEX string to string
"68656c6c6f20776f726c6421".match(/.{1,2}/g).reduce((acc,char)=>acc+String.fromCharCode(parseInt(char, 16)),"")
String to ASCII (utf-8) binary HEX string
"hello world!".split("").reduce((hex,c)=>hex+=c.charCodeAt(0).toString(16).padStart(2,"0"),"")
--- unicode ---
String to UNICODE (utf-16) binary HEX string
"hello world!".split("").reduce((hex,c)=>hex+=c.charCodeAt(0).toString(16).padStart(4,"0"),"")
UNICODE (utf-16) binary HEX string to string
"00680065006c006c006f00200077006f0072006c00640021".match(/.{1,4}/g).reduce((acc,char)=>acc+String.fromCharCode(parseInt(char, 16)),"")
Here is my take: these functions convert a UTF8 string to a proper HEX without the extra zeroes padding. A real UTF8 string has characters with 1, 2, 3 and 4 bytes length.
While working on this I found a couple key things that solved my problems:
str.split('') doesn't handle multi-byte characters like emojis correctly. The proper/modern way to handle this is with Array.from(str)
encodeURIComponent() and decodeURIComponent() are great tools to convert between string and hex. They are pretty standard, they handle UTF8 correctly.
(Most) ASCII characters (codes 0 - 127) don't get URI encoded, so they need to handled separately. But c.charCodeAt(0).toString(16) works perfectly for those
function utf8ToHex(str) {
return Array.from(str).map(c =>
c.charCodeAt(0) < 128 ? c.charCodeAt(0).toString(16) :
encodeURIComponent(c).replace(/\%/g,'').toLowerCase()
).join('');
},
function hexToUtf8: function(hex) {
return decodeURIComponent('%' + hex.match(/.{1,2}/g).join('%'));
}
Demo: https://jsfiddle.net/lyquix/k2tjbrvq/
UTF-8 Supported Convertion
Decode
function utf8ToHex(str) {
return Array.from(str).map(c =>
c.charCodeAt(0) < 128 ? c.charCodeAt(0).toString(16) :
encodeURIComponent(c).replace(/\%/g,'').toLowerCase()
).join('');
}
Encode
function hexToUtf8(hex) {
return decodeURIComponent('%' + hex.match(/.{1,2}/g).join('%'));
}
Sometimes from network transmissions/usdb devices you receive the data has a hexadimal string eg:
"12ADFF1345"
These type of string I want somehow to be converted into a binary equivalent into a buffer, in order to perform a some mathematical or binary operations on them.
Do you know how I can achieve that?
Use the builtin Buffer class :
let buf1 = Buffer.from('12ADFF1345', 'hex');
let value = buf1.readInt32LE(0);
let value2 = buf1.readInt16LE(2);
console.log(value,value2);
>> 335523090 5119
// '13ffad12' '13FF' (LE)
>> 313392915 -237
// '12ADFF13' 'ff13' (BE)
https://nodejs.org/api/buffer.html#buffer_class_method_buffer_from_string_encoding
Yes I know how to do that, the algorithm is simple (assuming that you have no escape characters):
Split the read string into a character.
Group each character pair.
Then generate the string 0x^first_character_pair^
parseInt the string above with base 16
In other words consult the following code:
const hexStringToBinaryBuffer = (string) => {
const subStrings = Array.from(string);
let previous = null;
const bytes = [];
_.each(subStrings, (val) => {
if (previous === null) { // Converting every 2 chars as binary data
previous = val;
} else {
const value = parseInt(`0x${previous}${val}`, 16);
bytes.push(value);
previous = null;
}
});
return Buffer.from(bytes);
};
This is usefull if you pass as string the result of a Buffer.toString('hex') or equivalent method via a network socket or a usb port and the other end received it.
It seems AS3 has a toString() for the Number class. Is there an equivalent in Haxe? The only solution I could come up with for converting an Int to a String is a function like:
public function IntToString(i:Int):String {
var strbuf:StringBuf = new StringBuf();
strbuf.add(i);
return strbuf.toString();
}
Is there a better method that I'm overlooking?
You don't usually need to manually convert an int to a string because the conversion is automatic.
var i = 1;
var s = "" + i; // s is now "1"
The "formal" way to convert any value to a string is to use Std.string():
var s = Std.string(i);
You could also use string interpolation:
var s = '$i';
The function your wrote is fine but definitely overkilling.
Using the Haxe programming language, is there any cross-platform way to read a PNG image, and get the pixel data from the image?
I have a file called stuff.png, and I want to obtain an array of RGB values from the image (as an integer array).
Here's an example usage of the Haxe format library to read a PNG file. You need -lib format in your compiler args / build.hxml:
function readPixels(file:String):{data:Bytes, width:Int, height:Int} {
var handle = sys.io.File.read(file, true);
var d = new format.png.Reader(handle).read();
var hdr = format.png.Tools.getHeader(d);
var ret = {
data:format.png.Tools.extract32(d),
width:hdr.width,
height:hdr.height
};
handle.close();
return ret;
}
Here's an example of how to get ARGB pixel data from the above:
public static function main() {
if (Sys.args().length == 0) {
trace('usage: PNGReader <filename>');
Sys.exit(1);
}
var filename = Sys.args()[0];
var pixels = readPixels(filename);
for (y in 0...pixels.height) {
for (x in 0...pixels.width) {
var p = pixels.data.getInt32(4*(x+y*pixels.width));
// ARGB, each 0-255
var a:Int = p>>>24;
var r:Int = (p>>>16)&0xff;
var g:Int = (p>>>8)&0xff;
var b:Int = (p)&0xff;
// Or, AARRGGBB in hex:
var hex:String = StringTools.hex(p,8);
trace('${ x },${ y }: ${ a },${ r },${ g },${ b } - ${ StringTools.hex(p,8) }');
}
}
You can always access the pixel data with BitmapData.getPixels/BitmapData.setPixels.
If you are using haXe NME, you can use Assets.getBitmapData() to load an asset image file.
If you want to load images from network, then you can use Loader class, it can asynchronous loading remote images, but in flash please mind the cross-domain issue.
For more generic ByteArray -> BitmapData conversion, use following code:
var ldr = new Loader();
ldr.loadBytes(cast(byteArray)); // bytearray contains raw image data
var dp: DisplayObject = ldr.content; // actually ontent should be of Bitmap class
var bitmapData = new BitmapData(Std.int(dp.width), Std.int(dp.height), true, 0);
bitmapData.draw(dp);
hi i write method which must to know that is size of specified directory i get response from server which contains flags of file name size and other info and on the different ftp servers format of answer is different how to know format of answer?
unsigned long long GetFtpDirSize(String^ ftpDir) {
unsigned long long size = 0;
int j = 0;
StringBuilder^ result = gcnew StringBuilder();
StreamReader^ reader;
FtpWebRequest^ reqFTP;
reqFTP = (FtpWebRequest^)FtpWebRequest::Create(gcnew Uri(ftpDir));
reqFTP->UseBinary = true;
reqFTP->Credentials = gcnew NetworkCredential("anonymous", "123");
reqFTP->Method = WebRequestMethods::Ftp::ListDirectoryDetails;
reqFTP->KeepAlive = false;
reqFTP->UsePassive = false;
try {
WebResponse^ resp = reqFTP->GetResponse();
Encoding^ code;
code = Encoding::GetEncoding(1251);
reader = gcnew StreamReader(resp->GetResponseStream(), code);
String^ line = reader->ReadToEnd();
array<Char>^delimiters = gcnew array<Char>{
'\r', '\n'
};
array<Char>^delimiters2 = gcnew array<Char>{
' '
};
array<String^>^words = line->Split(delimiters, StringSplitOptions::RemoveEmptyEntries);
array<String^>^DetPr;
System::Collections::IEnumerator^ myEnum = words->GetEnumerator();
while ( myEnum->MoveNext() ) {
String^ word = safe_cast<String^>(myEnum->Current);
DetPr = word->Split(delimiters2);
}
}
Basically, you can't. You are interpreting the raw result and there is no defined format for this data (or is there any requirement that this data be returned at all in the response). And the FTP protocol does not define any other way of getting this.
What that leaves you with is a collection of parsing patterns for the server types you know about and working through them looking for valid data. Not entirely easy.