From d05aec9b66656f8996a04db3be09e91a7b5f3343 Mon Sep 17 00:00:00 2001 From: Kitteh Date: Fri, 4 Jun 2021 13:09:08 +0100 Subject: [PATCH] Use proper utf8 and utf16BE encoding/decoding. --- src/qtshit/read/readQVariantT.zig | 3 ++- src/qtshit/read/readString.zig | 33 +++++++++++++------------------ src/qtshit/utils/unicode.zig | 2 +- src/qtshit/write/writeString.zig | 7 +++++-- 4 files changed, 22 insertions(+), 23 deletions(-) diff --git a/src/qtshit/read/readQVariantT.zig b/src/qtshit/read/readQVariantT.zig index 810b18f..cdfa932 100644 --- a/src/qtshit/read/readQVariantT.zig +++ b/src/qtshit/read/readQVariantT.zig @@ -6,6 +6,7 @@ const readShort = @import("./readShort.zig").readShort; const readByte = @import("./readByte.zig").readByte; const readSignedByte = @import("./readSignedByte.zig").readSignedByte; const readQByteArray = @import("./readQByteArray.zig").readQByteArray; +const ReadStringErrors = @import("./readString.zig").ReadStringErrors; const readString = @import("./readString.zig").readString; const readQVariantList = @import("./readQVariantList.zig").readQVariantList; const readQVariant = @import("./readQVariant.zig").readQVariant; @@ -17,7 +18,7 @@ const readUserType = @import("./usertypes/readUserType.zig").readUserType; const QVariant = @import("../types/QVariant.zig").QVariant; const QVariantTypes = @import("../types/QVariantTypes.zig").QVariantTypes; -pub fn readQVariantT(reader: anytype, type_id: u32, allocator: *std.mem.Allocator) (@TypeOf(reader).Error || error{EndOfStream} || error{OutOfMemory} || error{DecodeError})!QVariant { +pub fn readQVariantT(reader: anytype, type_id: u32, allocator: *std.mem.Allocator) (@TypeOf(reader).Error || error{EndOfStream} || error{OutOfMemory} || error{DecodeError} || ReadStringErrors ) !QVariant { switch (type_id) { @enumToInt(QVariantTypes.Byte) => { var byte = try readByte(reader); diff --git a/src/qtshit/read/readString.zig b/src/qtshit/read/readString.zig index c8137e0..83a2c50 100644 --- a/src/qtshit/read/readString.zig +++ b/src/qtshit/read/readString.zig @@ -1,30 +1,25 @@ const std = @import("std"); const readInt = @import("./readInt.zig").readInt; -const readShort = @import("./readShort.zig").readShort; +const range = @import("../utils/RangeIter.zig").range; +const unicode = @import("../utils/unicode.zig"); -pub fn readString(reader: anytype, allocator: *std.mem.Allocator) ![]u8 { - var data = std.ArrayList(u8).init(allocator); - defer data.deinit(); +pub const ReadStringErrors = (error{ExpectedSecondSurrogateHalf} || error{UnexpectedSecondSurrogateHalf} || error{DanglingSurrogateHalf} || error{OutOfMemory}); - var length = try readInt(reader); - var chars = @divTrunc(length, 2); - //std.debug.print("read: readString length={d} \n", .{length}); +pub fn readString(reader: anytype, allocator: *std.mem.Allocator) (@TypeOf(reader).Error || error{EndOfStream} || ReadStringErrors)![]u8 { + var utf16Data = std.ArrayList(u16).init(allocator); + defer utf16Data.deinit(); - var index: usize = 0; - while (true) { - if (index == chars) break; + var num_bytes = @divTrunc(try readInt(reader), 2); - const byte = try readShort(reader); - try data.append(@truncate(u8, byte)); - index += 1; + var iter = range(i32, 0, num_bytes); + while (iter.next()) |i| { + if (i == num_bytes) break; + try utf16Data.append(try reader.readIntLittle(u16)); } - var ut8Str = try allocator.alloc(u8, @intCast(usize, chars)); - for (data.items) |char, i| { - ut8Str[i] = char; - } + var utf8 = try unicode.utf16BEToUtf8(allocator, utf16Data.items); - //std.debug.print("string: {s}\n", .{ut8Str}); + //std.debug.print("string: {s}\n", .{utf8}); - return ut8Str; + return utf8; } diff --git a/src/qtshit/utils/unicode.zig b/src/qtshit/utils/unicode.zig index acd11d1..bcaa5a2 100644 --- a/src/qtshit/utils/unicode.zig +++ b/src/qtshit/utils/unicode.zig @@ -14,7 +14,7 @@ pub fn utf8ToUtf16BE(allocator: *std.mem.Allocator, utf8: []const u8) ![]u16 { var utf16LE = try std.unicode.utf8ToUtf16LeWithNull(allocator, utf8); // Little Endian to Big Endian var utf16BE = byteSwapArray(u16, &utf16LE); - return utf16BE; + return utf16BE[0..utf16BE.len]; } pub fn utf16BEToUtf8(allocator: *std.mem.Allocator, utf16: []u16) ![]u8 { diff --git a/src/qtshit/write/writeString.zig b/src/qtshit/write/writeString.zig index 8a0fe68..f05139a 100644 --- a/src/qtshit/write/writeString.zig +++ b/src/qtshit/write/writeString.zig @@ -2,10 +2,13 @@ const std = @import("std"); const writeInt = @import("./writeInt.zig").writeInt; const unicode = @import("../utils/unicode.zig"); +pub const WriteStringErrors = (error{InvalidUtf8} || error{OutOfMemory}); -pub fn writeString(writer: anytype, allocator: *std.mem.Allocator, str: []const u8) (error{InvalidUtf8} || error{OutOfMemory})!void { +pub fn writeString(writer: anytype, allocator: *std.mem.Allocator, str: []const u8) WriteStringErrors!void { var str_utf16BE = try unicode.utf8ToUtf16BE(allocator, str); defer allocator.free(str_utf16BE); - try writeInt(writer, @intCast(i32, str_utf16BE.len)); + + // len * 2 because length of u8 not length of u16. + try writeInt(writer, @intCast(i32, str_utf16BE.len * 2)); try writer.writeAll(std.mem.sliceAsBytes(str_utf16BE)); } \ No newline at end of file