mblaze

Unix utilities to deal with Maildir - my mirror
git clone https://pi.duncano.de/git/mblaze.git
Log | Files | Refs | README | COPYING

commit d21ab392e99ef0a5a1b3382136fe94c784adf5b3
parent 53c6a3b37ee28dd11698ec414939aad18244db68
Author: Leah Neukirchen <leah@vuxu.org>
Date:   Mon, 13 Mar 2017 17:32:13 +0100

safe_u8putstr: actually transcode assumed latin1 into valid utf8

Diffstat:
safe_u8putstr.c | 18+++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/safe_u8putstr.c b/safe_u8putstr.c @@ -7,7 +7,7 @@ safe_u8putstr(char *s0, size_t l, FILE *stream) // tty-safe output of s, with relaxed utf-8 semantics: // - C0 and C1 are displayed as escape sequences // - valid utf8 is printed as is - // - rest is printed bytewise as is (probably latin1) + // - rest is assumed to be latin1, and translated into utf8 // - translate CRLF to CR unsigned char *s = (unsigned char* )s0; @@ -35,6 +35,9 @@ safe_u8putstr(char *s0, size_t l, FILE *stream) fputc(*s, stream); } } else if ((*s & 0xc0) == 0x80) { + if (*s >= 0xa0) + goto latin1; + // C1 fputc(0xe2, stream); fputc(0x90, stream); @@ -56,12 +59,17 @@ safe_u8putstr(char *s0, size_t l, FILE *stream) if ((f & 0xe0c00000) == 0xc0800000) goto u2; else if ((f & 0xf0c0c000) == 0xe0808000) goto u3; - else if ((f & 0xf8c0c0c0) == 0xf0808080) goto u4; - else /* invalid utf8 */ goto u1; -u4: fputc(*s++, stream); + else if ((f & 0xf8c0c0c0) == 0xf0808080) { + fputc(*s++, stream); u3: fputc(*s++, stream); u2: fputc(*s++, stream); -u1: fputc(*s, stream); + fputc(*s, stream); + } else { +latin1: + /* invalid utf8, assume it was latin1 */ + fputc(0xc0 | (*s >> 6), stream); + fputc(0x80 | (*s & 0x3f), stream); + } } s++; }