Use proper utf8 and utf16BE encoding/decoding.

This commit is contained in:
Kitteh 2021-06-04 13:09:08 +01:00
parent cb7afda2c8
commit d05aec9b66
4 changed files with 22 additions and 23 deletions

View file

@ -6,6 +6,7 @@ const readShort = @import("./readShort.zig").readShort;
const readByte = @import("./readByte.zig").readByte;
const readSignedByte = @import("./readSignedByte.zig").readSignedByte;
const readQByteArray = @import("./readQByteArray.zig").readQByteArray;
const ReadStringErrors = @import("./readString.zig").ReadStringErrors;
const readString = @import("./readString.zig").readString;
const readQVariantList = @import("./readQVariantList.zig").readQVariantList;
const readQVariant = @import("./readQVariant.zig").readQVariant;
@ -17,7 +18,7 @@ const readUserType = @import("./usertypes/readUserType.zig").readUserType;
const QVariant = @import("../types/QVariant.zig").QVariant;
const QVariantTypes = @import("../types/QVariantTypes.zig").QVariantTypes;
pub fn readQVariantT(reader: anytype, type_id: u32, allocator: *std.mem.Allocator) (@TypeOf(reader).Error || error{EndOfStream} || error{OutOfMemory} || error{DecodeError})!QVariant {
pub fn readQVariantT(reader: anytype, type_id: u32, allocator: *std.mem.Allocator) (@TypeOf(reader).Error || error{EndOfStream} || error{OutOfMemory} || error{DecodeError} || ReadStringErrors ) !QVariant {
switch (type_id) {
@enumToInt(QVariantTypes.Byte) => {
var byte = try readByte(reader);

View file

@ -1,30 +1,25 @@
const std = @import("std");
const readInt = @import("./readInt.zig").readInt;
const readShort = @import("./readShort.zig").readShort;
const range = @import("../utils/RangeIter.zig").range;
const unicode = @import("../utils/unicode.zig");
pub fn readString(reader: anytype, allocator: *std.mem.Allocator) ![]u8 {
var data = std.ArrayList(u8).init(allocator);
defer data.deinit();
pub const ReadStringErrors = (error{ExpectedSecondSurrogateHalf} || error{UnexpectedSecondSurrogateHalf} || error{DanglingSurrogateHalf} || error{OutOfMemory});
var length = try readInt(reader);
var chars = @divTrunc(length, 2);
//std.debug.print("read: readString length={d} \n", .{length});
pub fn readString(reader: anytype, allocator: *std.mem.Allocator) (@TypeOf(reader).Error || error{EndOfStream} || ReadStringErrors)![]u8 {
var utf16Data = std.ArrayList(u16).init(allocator);
defer utf16Data.deinit();
var index: usize = 0;
while (true) {
if (index == chars) break;
var num_bytes = @divTrunc(try readInt(reader), 2);
const byte = try readShort(reader);
try data.append(@truncate(u8, byte));
index += 1;
var iter = range(i32, 0, num_bytes);
while (iter.next()) |i| {
if (i == num_bytes) break;
try utf16Data.append(try reader.readIntLittle(u16));
}
var ut8Str = try allocator.alloc(u8, @intCast(usize, chars));
for (data.items) |char, i| {
ut8Str[i] = char;
}
var utf8 = try unicode.utf16BEToUtf8(allocator, utf16Data.items);
//std.debug.print("string: {s}\n", .{ut8Str});
//std.debug.print("string: {s}\n", .{utf8});
return ut8Str;
return utf8;
}

View file

@ -14,7 +14,7 @@ pub fn utf8ToUtf16BE(allocator: *std.mem.Allocator, utf8: []const u8) ![]u16 {
var utf16LE = try std.unicode.utf8ToUtf16LeWithNull(allocator, utf8);
// Little Endian to Big Endian
var utf16BE = byteSwapArray(u16, &utf16LE);
return utf16BE;
return utf16BE[0..utf16BE.len];
}
pub fn utf16BEToUtf8(allocator: *std.mem.Allocator, utf16: []u16) ![]u8 {

View file

@ -2,10 +2,13 @@ const std = @import("std");
const writeInt = @import("./writeInt.zig").writeInt;
const unicode = @import("../utils/unicode.zig");
pub const WriteStringErrors = (error{InvalidUtf8} || error{OutOfMemory});
pub fn writeString(writer: anytype, allocator: *std.mem.Allocator, str: []const u8) (error{InvalidUtf8} || error{OutOfMemory})!void {
pub fn writeString(writer: anytype, allocator: *std.mem.Allocator, str: []const u8) WriteStringErrors!void {
var str_utf16BE = try unicode.utf8ToUtf16BE(allocator, str);
defer allocator.free(str_utf16BE);
try writeInt(writer, @intCast(i32, str_utf16BE.len));
// len * 2 because length of u8 not length of u16.
try writeInt(writer, @intCast(i32, str_utf16BE.len * 2));
try writer.writeAll(std.mem.sliceAsBytes(str_utf16BE));
}