Add lossy parameter to fromUtf8

This commit is contained in:
Simon Warta 2022-10-24 16:49:20 +02:00
parent 55ca044b60
commit a08c6bae92
3 changed files with 23 additions and 2 deletions

View File

@ -10,6 +10,8 @@ and this project adheres to
- @cosmjs/tendermint-rpc: Add `HttpBatchClient`, which implements `RpcClient`,
supporting batch RPC requests ([#1300]).
- @cosmjs/encoding: Add `lossy` parameter to `fromUtf8` allowing the use of a
replacement charater instead of throwing.
## [0.29.2] - 2022-10-13

View File

@ -1,3 +1,4 @@
import { toAscii } from "./ascii";
import { fromUtf8, toUtf8 } from "./utf8";
describe("utf8", () => {
@ -59,4 +60,15 @@ describe("utf8", () => {
// Broken UTF8 example from https://github.com/nodejs/node/issues/16894
expect(() => fromUtf8(new Uint8Array([0xf0, 0x80, 0x80]))).toThrow();
});
describe("fromUtf8", () => {
it("replaces characters in lossy mode", () => {
expect(fromUtf8(new Uint8Array([]), true)).toEqual("");
expect(fromUtf8(new Uint8Array([0x61, 0x62, 0x63]), true)).toEqual("abc");
// Example from https://doc.rust-lang.org/stable/std/string/struct.String.html#method.from_utf8_lossy
expect(
fromUtf8(new Uint8Array([...toAscii("Hello "), 0xf0, 0x90, 0x80, ...toAscii("World")]), true),
).toEqual("Hello <20>World");
});
});
});

View File

@ -12,6 +12,13 @@ export function toUtf8(str: string): Uint8Array {
return new TextEncoder().encode(str);
}
export function fromUtf8(data: Uint8Array): string {
return new TextDecoder("utf-8", { fatal: true }).decode(data);
/**
* Takes UTF-8 data and decodes it to a string.
*
* In lossy mode, the replacement character <EFBFBD> is used to substitude invalid
* encodings. By default lossy mode is off and invalid data will lead to exceptions.
*/
export function fromUtf8(data: Uint8Array, lossy = false): string {
const fatal = !lossy;
return new TextDecoder("utf-8", { fatal }).decode(data);
}