From ba03e83e24dba832002394d27f0527c2a7b0bcdc Mon Sep 17 00:00:00 2001 From: Felipe Gasper Date: Fri, 28 May 2021 22:05:15 -0400 Subject: [PATCH] =?UTF-8?q?Don=E2=80=99t=20UTF8-flag=20PVs=20that=20aren?= =?UTF-8?q?=E2=80=99t=20valid=20UTF-8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue #78 --- dbdimp.c | 10 +++---- dbdimp.h | 6 +++++ t/69_unicode_no_invalid_utf8.t | 49 ++++++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 5 deletions(-) create mode 100644 t/69_unicode_no_invalid_utf8.t diff --git a/dbdimp.c b/dbdimp.c index 2452821..85d9a96 100644 --- a/dbdimp.c +++ b/dbdimp.c @@ -272,7 +272,7 @@ stacked_sv_from_sqlite3_value(pTHX_ sqlite3_value *value, int is_unicode) len = sqlite3_value_bytes(value); sv = newSVpvn((const char *)sqlite3_value_text(value), len); if (is_unicode) { - SvUTF8_on(sv); + DBD_SQLITE_UTF8_DECODE(sv); } return sv_2mortal(sv); case SQLITE_BLOB: @@ -1229,7 +1229,7 @@ sqlite_st_fetch(SV *sth, imp_sth_t *imp_sth) } sv_setpvn(AvARRAY(av)[i], val, len); if (imp_dbh->unicode) { - SvUTF8_on(AvARRAY(av)[i]); + DBD_SQLITE_UTF8_DECODE(AvARRAY(av)[i]); } else { SvUTF8_off(AvARRAY(av)[i]); } @@ -1405,7 +1405,7 @@ sqlite_st_FETCH_attrib(SV *sth, imp_sth_t *imp_sth, SV *keysv) /* fieldname = ++dot; */ SV *sv_fieldname = newSVpv(fieldname, 0); if (imp_dbh->unicode) - SvUTF8_on(sv_fieldname); + DBD_SQLITE_UTF8_DECODE(sv_fieldname); av_store(av, n, sv_fieldname); } } @@ -2125,9 +2125,9 @@ sqlite_db_collation_dispatcher_utf8(void *func, int len1, const void *string1, SAVETMPS; PUSHMARK(SP); sv1 = newSVpvn(string1, len1); - SvUTF8_on(sv1); + DBD_SQLITE_UTF8_DECODE(sv1); sv2 = newSVpvn(string2, len2); - SvUTF8_on(sv2); + DBD_SQLITE_UTF8_DECODE(sv2); XPUSHs( sv_2mortal( sv1 ) ); XPUSHs( sv_2mortal( sv2 ) ); PUTBACK; diff --git a/dbdimp.h b/dbdimp.h index 04d2b00..8d5d096 100644 --- a/dbdimp.h +++ b/dbdimp.h @@ -22,6 +22,12 @@ typedef struct { #define sqlite3_int64 sqlite_int64 #endif +#define DBD_SQLITE_UTF8_DECODE(sv) ( \ + is_utf8_string((U8*) SvPVX(sv), SvCUR(sv)) \ + ? SvUTF8_on(sv) \ + : warn("Received invalid UTF-8 from SQLite; cannot decode!") \ +) + /* A linked list of statements prepared by this module */ typedef struct stmt_list_s stmt_list_s; diff --git a/t/69_unicode_no_invalid_utf8.t b/t/69_unicode_no_invalid_utf8.t new file mode 100644 index 0000000..9156b01 --- /dev/null +++ b/t/69_unicode_no_invalid_utf8.t @@ -0,0 +1,49 @@ +# This is a test for correct handling of upgraded strings without +# the sqlite_unicode parameter. + +use strict; +use warnings; +use lib "t/lib"; +use SQLiteTest; +use Test::More; +use if -d ".git", "Test::FailWarnings"; + +{ + my $dbh = connect_ok( + dbfile => 'foo', + RaiseError => 1, + ); + + my $tbl_name = "\xe9p\xe9e"; + my $str = "CREATE TABLE $tbl_name ( col1 TEXT )"; + $dbh->do($str); + + $dbh->{'sqlite_unicode'} = 1; + + my @warnings; + my $master_ar = do { + local $SIG{'__WARN__'} = sub { push @warnings, @_ }; + $dbh->selectall_arrayref('SELECT * FROM sqlite_master', { Slice => {} }); + }; + + for my $key ( sort keys %{ $master_ar->[0] } ) { + ok( + utf8::valid($master_ar->[0]{$key}), + "$key is utf8::valid", + ); + } + + is( + $master_ar->[0]{'name'}, + $tbl_name, + '`name`', + ); + + like( + $warnings[0], + qr, + 'warning about invalid UTF-8', + ); +} + +done_testing;