From 9dc75eaeadd5d7f3d5024c3f82630d951722c4c7 Mon Sep 17 00:00:00 2001 From: Felipe Gasper Date: Sat, 29 May 2021 17:01:39 -0400 Subject: [PATCH] =?UTF-8?q?Replace=20=E2=80=9Cstring=5Funicode=E2=80=9D=20?= =?UTF-8?q?boolean=20with=20=E2=80=9Cstring=5Fmode=E2=80=9D=20enum.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue #78 and issue #68: This introduces additional, more robust schemas for translating strings between SQLite and Perl. --- constants.inc | 7 + dbdimp.c | 316 +++++++++++++++--------- dbdimp.h | 55 ++++- dbdimp_tokenizer.inc | 38 ++- dbdimp_virtual_table.inc | 13 +- lib/DBD/SQLite.pm | 65 ++++- lib/DBD/SQLite/Constants.pm | 5 + t/02_logon.t | 6 +- t/12_unicode.t | 8 +- t/13_create_collation.t | 8 +- t/33_non_latin_path.t | 6 +- t/37_regexp.t | 6 +- t/43_fts3.t | 6 +- t/68_upgraded_no_unicode.t | 2 + t/rt_25371_asymmetric_unicode.t | 6 +- t/rt_25924_user_defined_func_unicode.t | 4 +- t/rt_71311_bind_col_and_unicode.t | 4 +- t/rt_78833_utf8_flag_for_column_names.t | 6 +- t/rt_96877_unicode_statements.t | 4 +- util/constants.pl | 17 ++ 20 files changed, 406 insertions(+), 176 deletions(-) diff --git a/constants.inc b/constants.inc index 4660ad5..e146852 100644 --- a/constants.inc +++ b/constants.inc @@ -5,6 +5,13 @@ MODULE = DBD::SQLite PACKAGE = DBD::SQLite::Constants PROTOTYPES: ENABLE +BOOT: + newCONSTSUB( gv_stashpv("DBD::SQLite::Constants", FALSE), "DBD_SQLITE_STRING_MODE_PV", newSVuv(DBD_SQLITE_STRING_MODE_PV) ); + newCONSTSUB( gv_stashpv("DBD::SQLite::Constants", FALSE), "DBD_SQLITE_STRING_MODE_BYTES", newSVuv(DBD_SQLITE_STRING_MODE_BYTES) ); + newCONSTSUB( gv_stashpv("DBD::SQLite::Constants", FALSE), "DBD_SQLITE_STRING_MODE_UNICODE_NAIVE", newSVuv(DBD_SQLITE_STRING_MODE_UNICODE_NAIVE) ); + newCONSTSUB( gv_stashpv("DBD::SQLite::Constants", FALSE), "DBD_SQLITE_STRING_MODE_UNICODE_FALLBACK", newSVuv(DBD_SQLITE_STRING_MODE_UNICODE_FALLBACK) ); + newCONSTSUB( gv_stashpv("DBD::SQLite::Constants", FALSE), "DBD_SQLITE_STRING_MODE_UNICODE_STRICT", newSVuv(DBD_SQLITE_STRING_MODE_UNICODE_STRICT) ); + #if SQLITE_VERSION_NUMBER >= 3034000 IV diff --git a/dbdimp.c b/dbdimp.c index 2452821..c81b6f7 100644 --- a/dbdimp.c +++ b/dbdimp.c @@ -239,11 +239,11 @@ void init_cxt() { dTHX; MY_CXT_INIT; - MY_CXT.last_dbh_is_unicode = 0; + MY_CXT.last_dbh_string_mode = DBD_SQLITE_STRING_MODE_PV; } SV * -stacked_sv_from_sqlite3_value(pTHX_ sqlite3_value *value, int is_unicode) +stacked_sv_from_sqlite3_value(pTHX_ sqlite3_value *value, dbd_sqlite_string_mode_t string_mode) { STRLEN len; sqlite_int64 iv; @@ -271,9 +271,7 @@ stacked_sv_from_sqlite3_value(pTHX_ sqlite3_value *value, int is_unicode) case SQLITE_TEXT: len = sqlite3_value_bytes(value); sv = newSVpvn((const char *)sqlite3_value_text(value), len); - if (is_unicode) { - SvUTF8_on(sv); - } + DBD_SQLITE_UTF8_DECODE_IF_NEEDED(sv, string_mode); return sv_2mortal(sv); case SQLITE_BLOB: len = sqlite3_value_bytes(value); @@ -431,6 +429,44 @@ sqlite_discon_all(SV *drh, imp_drh_t *imp_drh) return FALSE; /* no way to do this */ } +#define _croak_invalid_value(name, value) \ + croak("Invalid value (%s) given for %s", value, name); + +/* Like SvUV but croaks on anything other than an unsigned int. */ +static inline int +my_SvUV_strict(pTHX_ SV *input, const char* name) +{ + if (SvUOK(input)) { + return SvUV(input); + } + + const char* pv = SvPVbyte_nolen(input); + + UV uv; + int numtype = grok_number(pv, strlen(pv), &uv); + + /* Anything else is invalid: */ + if (numtype != IS_NUMBER_IN_UV) _croak_invalid_value(name, pv); + + return uv; +} + +static inline dbd_sqlite_string_mode_t +_extract_sqlite_string_mode_from_sv( pTHX_ SV* input ) +{ + if (SvOK(input)) { + UV val = my_SvUV_strict(aTHX_ input, "sqlite_string_mode"); + + if (val >= _DBD_SQLITE_STRING_MODE_COUNT) { + _croak_invalid_value("sqlite_string_mode", SvPVbyte_nolen(input)); + } + + return val; + } + + return DBD_SQLITE_STRING_MODE_PV; +} + int sqlite_db_login6(SV *dbh, imp_dbh_t *imp_dbh, char *dbname, char *user, char *pass, SV *attr) { @@ -440,7 +476,7 @@ sqlite_db_login6(SV *dbh, imp_dbh_t *imp_dbh, char *dbname, char *user, char *pa SV **val; int extended = 0; int flag = 0; - int unicode = 0; + dbd_sqlite_string_mode_t string_mode = DBD_SQLITE_STRING_MODE_PV; sqlite_trace(dbh, imp_dbh, 3, form("login '%s' (version %s)", dbname, sqlite3_version)); @@ -463,13 +499,24 @@ sqlite_db_login6(SV *dbh, imp_dbh_t *imp_dbh, char *dbname, char *user, char *pa hv_stores(hv, "ReadOnly", newSViv(1)); } } - /* sqlite_unicode should be detected earlier, to register default functions correctly */ - if (hv_exists(hv, "sqlite_unicode", 14)) { + + /* sqlite_string_mode should be detected earlier, to register default functions correctly */ + + SV** string_mode_svp = hv_fetchs(hv, "sqlite_string_mode", 0); + if (string_mode_svp != NULL && SvOK(*string_mode_svp)) { + string_mode = _extract_sqlite_string_mode_from_sv(aTHX_ *string_mode_svp); + + /* Legacy alternatives to sqlite_string_mode: */ + } else if (hv_exists(hv, "sqlite_unicode", 14)) { val = hv_fetch(hv, "sqlite_unicode", 14, 0); - unicode = (val && SvOK(*val)) ? SvIV(*val) : 0; + if ( (val && SvOK(*val)) ? SvIV(*val) : 0 ) { + string_mode = DBD_SQLITE_STRING_MODE_UNICODE_NAIVE; + } } else if (hv_exists(hv, "unicode", 7)) { val = hv_fetch(hv, "unicode", 7, 0); - unicode = (val && SvOK(*val)) ? SvIV(*val) : 0; + if ( (val && SvOK(*val)) ? SvIV(*val) : 0 ) { + string_mode = DBD_SQLITE_STRING_MODE_UNICODE_NAIVE; + } } } rc = sqlite_open2(dbname, &(imp_dbh->db), flag, extended); @@ -478,7 +525,7 @@ sqlite_db_login6(SV *dbh, imp_dbh_t *imp_dbh, char *dbname, char *user, char *pa } DBIc_IMPSET_on(imp_dbh); - imp_dbh->unicode = unicode; + imp_dbh->string_mode = string_mode; imp_dbh->functions = newAV(); imp_dbh->aggregates = newAV(); imp_dbh->collation_needed_callback = newSVsv( &PL_sv_undef ); @@ -546,12 +593,7 @@ sqlite_db_do_sv(SV *dbh, imp_dbh_t *imp_dbh, SV *sv_statement) } /* sqlite3_prepare wants an utf8-encoded SQL statement */ - if (imp_dbh->unicode) { - sv_utf8_upgrade(sv_statement); - } - else { - sv_utf8_downgrade(sv_statement, 0); - } + DBD_SQLITE_PREP_SV_FOR_SQLITE(sv_statement, imp_dbh->string_mode); statement = SvPV_nolen(sv_statement); @@ -738,6 +780,10 @@ sqlite_db_destroy(SV *dbh, imp_dbh_t *imp_dbh) DBIc_IMPSET_off(imp_dbh); } +#define _warn_deprecated_if_possible(old, new) \ + if (DBIc_has(imp_dbh, DBIcf_WARN)) \ + warn("\"%s\" attribute will be deprecated. Use \"%s\" instead.", old, new); + int sqlite_db_STORE_attrib(SV *dbh, imp_dbh_t *imp_dbh, SV *keysv, SV *valuesv) { @@ -790,26 +836,33 @@ sqlite_db_STORE_attrib(SV *dbh, imp_dbh_t *imp_dbh, SV *keysv, SV *valuesv) imp_dbh->prefer_numeric_type = !(! SvTRUE(valuesv)); return TRUE; } - if (strEQ(key, "sqlite_unicode")) { + + if (strEQ(key, "sqlite_string_mode")) { + dbd_sqlite_string_mode_t string_mode = _extract_sqlite_string_mode_from_sv(aTHX_ valuesv); + +#if PERL_UNICODE_DOES_NOT_WORK_WELL + if (string_mode & DBD_SQLITE_STRING_MODE_UNICODE_ANY) { + sqlite_trace(dbh, imp_dbh, 3, form("Unicode support is disabled for this version of perl.")); + string_mode = DBD_SQLITE_STRING_MODE_PV; + } +#endif + + imp_dbh->string_mode = string_mode; + + return TRUE; + } + + if (strEQ(key, "sqlite_unicode") || strEQ(key, "unicode")) { + _warn_deprecated_if_possible(key, "sqlite_string_mode"); #if PERL_UNICODE_DOES_NOT_WORK_WELL sqlite_trace(dbh, imp_dbh, 3, form("Unicode support is disabled for this version of perl.")); - imp_dbh->unicode = 0; + imp_dbh->string_mode = DBD_SQLITE_STRING_MODE_PV; #else - imp_dbh->unicode = !(! SvTRUE(valuesv)); -#endif - return TRUE; - } - if (strEQ(key, "unicode")) { - if (DBIc_has(imp_dbh, DBIcf_WARN)) - warn("\"unicode\" attribute will be deprecated. Use \"sqlite_unicode\" instead."); -#if PERL_UNICODE_DOES_NOT_WORK_WELL - sqlite_trace(dbh, imp_dbh, 3, form("Unicode support is disabled for this version of perl.")); - imp_dbh->unicode = 0; -#else - imp_dbh->unicode = !(! SvTRUE(valuesv)); + imp_dbh->string_mode = SvTRUE(valuesv) ? DBD_SQLITE_STRING_MODE_UNICODE_NAIVE : DBD_SQLITE_STRING_MODE_PV; #endif return TRUE; } + return FALSE; } @@ -837,22 +890,18 @@ sqlite_db_FETCH_attrib(SV *dbh, imp_dbh_t *imp_dbh, SV *keysv) if (strEQ(key, "sqlite_prefer_numeric_type")) { return sv_2mortal(newSViv(imp_dbh->prefer_numeric_type ? 1 : 0)); } - if (strEQ(key, "sqlite_unicode")) { -#if PERL_UNICODE_DOES_NOT_WORK_WELL - sqlite_trace(dbh, imp_dbh, 3, "Unicode support is disabled for this version of perl."); - return sv_2mortal(newSViv(0)); -#else - return sv_2mortal(newSViv(imp_dbh->unicode ? 1 : 0)); -#endif + + if (strEQ(key, "sqlite_string_mode")) { + return sv_2mortal(newSVuv(imp_dbh->string_mode)); } - if (strEQ(key, "unicode")) { - if (DBIc_has(imp_dbh, DBIcf_WARN)) - warn("\"unicode\" attribute will be deprecated. Use \"sqlite_unicode\" instead."); + + if (strEQ(key, "sqlite_unicode") || strEQ(key, "unicode")) { + _warn_deprecated_if_possible(key, "sqlite_string_mode"); #if PERL_UNICODE_DOES_NOT_WORK_WELL sqlite_trace(dbh, imp_dbh, 3, "Unicode support is disabled for this version of perl."); return sv_2mortal(newSViv(0)); #else - return sv_2mortal(newSViv(imp_dbh->unicode ? 1 : 0)); + return sv_2mortal(newSViv(imp_dbh->string_mode == DBD_SQLITE_STRING_MODE_UNICODE_NAIVE ? 1 : 0)); #endif } @@ -885,7 +934,7 @@ sqlite_st_prepare_sv(SV *sth, imp_sth_t *imp_sth, SV *sv_statement, SV *attribs) stmt_list_s * new_stmt; D_imp_dbh_from_sth; - MY_CXT.last_dbh_is_unicode = imp_dbh->unicode; + MY_CXT.last_dbh_string_mode = imp_dbh->string_mode; if (!DBIc_ACTIVE(imp_dbh)) { sqlite_error(sth, -2, "attempt to prepare on inactive database handle"); @@ -893,12 +942,7 @@ sqlite_st_prepare_sv(SV *sth, imp_sth_t *imp_sth, SV *sv_statement, SV *attribs) } /* sqlite3_prepare wants an utf8-encoded SQL statement */ - if (imp_dbh->unicode) { - sv_utf8_upgrade(sv_statement); - } - else { - sv_utf8_downgrade(sv_statement, 0); - } + DBD_SQLITE_PREP_SV_FOR_SQLITE(sv_statement, imp_dbh->string_mode); statement = SvPV_nolen(sv_statement); @@ -1012,12 +1056,15 @@ sqlite_st_execute(SV *sth, imp_sth_t *imp_sth) const char *data; int numtype = 0; - if (imp_dbh->unicode) { + if (imp_dbh->string_mode & DBD_SQLITE_STRING_MODE_UNICODE_ANY) { data = SvPVutf8(value, len); } - else { + else if (imp_dbh->string_mode == DBD_SQLITE_STRING_MODE_BYTES) { data = SvPVbyte(value, len); } + else { + data = SvPV(value, len); + } /* * XXX: For backward compatibility, it'd be better to @@ -1228,11 +1275,9 @@ sqlite_st_fetch(SV *sth, imp_sth_t *imp_sth) } } sv_setpvn(AvARRAY(av)[i], val, len); - if (imp_dbh->unicode) { - SvUTF8_on(AvARRAY(av)[i]); - } else { - SvUTF8_off(AvARRAY(av)[i]); - } + + DBD_SQLITE_UTF8_DECODE_IF_NEEDED(AvARRAY(av)[i], imp_dbh->string_mode); + break; case SQLITE_BLOB: sqlite_trace(sth, imp_sth, 5, form("fetch column %d as blob", i)); @@ -1404,8 +1449,9 @@ sqlite_st_FETCH_attrib(SV *sth, imp_sth_t *imp_sth, SV *keysv) /* if (dot) drop table name from field name */ /* fieldname = ++dot; */ SV *sv_fieldname = newSVpv(fieldname, 0); - if (imp_dbh->unicode) - SvUTF8_on(sv_fieldname); + + DBD_SQLITE_UTF8_DECODE_IF_NEEDED(sv_fieldname, imp_dbh->string_mode); + av_store(av, n, sv_fieldname); } } @@ -1699,7 +1745,7 @@ sqlite_db_busy_timeout(pTHX_ SV *dbh, SV *timeout ) } static void -sqlite_db_func_dispatcher(int is_unicode, sqlite3_context *context, int argc, sqlite3_value **value) +sqlite_db_func_dispatcher(dbd_sqlite_string_mode_t string_mode, sqlite3_context *context, int argc, sqlite3_value **value) { dTHX; dSP; @@ -1714,7 +1760,7 @@ sqlite_db_func_dispatcher(int is_unicode, sqlite3_context *context, int argc, sq PUSHMARK(SP); for ( i=0; i < argc; i++ ) { - XPUSHs(stacked_sv_from_sqlite3_value(aTHX_ value[i], is_unicode)); + XPUSHs(stacked_sv_from_sqlite3_value(aTHX_ value[i], string_mode)); } PUTBACK; @@ -1745,17 +1791,46 @@ sqlite_db_func_dispatcher(int is_unicode, sqlite3_context *context, int argc, sq } static void -sqlite_db_func_dispatcher_unicode(sqlite3_context *context, int argc, sqlite3_value **value) +sqlite_db_func_dispatcher_unicode_naive(sqlite3_context *context, int argc, sqlite3_value **value) { - sqlite_db_func_dispatcher(1, context, argc, value); + sqlite_db_func_dispatcher(DBD_SQLITE_STRING_MODE_UNICODE_NAIVE, context, argc, value); } static void -sqlite_db_func_dispatcher_no_unicode(sqlite3_context *context, int argc, sqlite3_value **value) +sqlite_db_func_dispatcher_unicode_fallback(sqlite3_context *context, int argc, sqlite3_value **value) { - sqlite_db_func_dispatcher(0, context, argc, value); + sqlite_db_func_dispatcher(DBD_SQLITE_STRING_MODE_UNICODE_FALLBACK, context, argc, value); } +static void +sqlite_db_func_dispatcher_unicode_strict(sqlite3_context *context, int argc, sqlite3_value **value) +{ + sqlite_db_func_dispatcher(DBD_SQLITE_STRING_MODE_UNICODE_STRICT, context, argc, value); +} + +static void +sqlite_db_func_dispatcher_bytes(sqlite3_context *context, int argc, sqlite3_value **value) +{ + sqlite_db_func_dispatcher(DBD_SQLITE_STRING_MODE_BYTES, context, argc, value); +} + +static void +sqlite_db_func_dispatcher_pv(sqlite3_context *context, int argc, sqlite3_value **value) +{ + sqlite_db_func_dispatcher(DBD_SQLITE_STRING_MODE_PV, context, argc, value); +} + +typedef void (*dispatch_func_t)(sqlite3_context*, int, sqlite3_value**); + +static dispatch_func_t _FUNC_DISPATCHER[_DBD_SQLITE_STRING_MODE_COUNT] = { + sqlite_db_func_dispatcher_pv, + sqlite_db_func_dispatcher_bytes, + NULL, NULL, + sqlite_db_func_dispatcher_unicode_naive, + sqlite_db_func_dispatcher_unicode_fallback, + sqlite_db_func_dispatcher_unicode_strict, +}; + int sqlite_db_create_function(pTHX_ SV *dbh, const char *name, int argc, SV *func, int flags) { @@ -1777,8 +1852,7 @@ sqlite_db_create_function(pTHX_ SV *dbh, const char *name, int argc, SV *func, i /* warn("create_function %s with %d args\n", name, argc); */ rc = sqlite3_create_function( imp_dbh->db, name, argc, SQLITE_UTF8|flags, func_sv, - imp_dbh->unicode ? sqlite_db_func_dispatcher_unicode - : sqlite_db_func_dispatcher_no_unicode, + _FUNC_DISPATCHER[imp_dbh->string_mode], NULL, NULL ); if ( rc != SQLITE_OK ) { sqlite_error(dbh, rc, form("sqlite_create_function failed with error %s", sqlite3_errmsg(imp_dbh->db))); @@ -1943,7 +2017,7 @@ sqlite_db_aggr_step_dispatcher(sqlite3_context *context, { dTHX; dSP; - int i, is_unicode = 0; /* TODO : find out from db handle */ + int i, string_mode = DBD_SQLITE_STRING_MODE_PV; /* TODO : find out from db handle */ aggrInfo *aggr; aggr = sqlite3_aggregate_context(context, sizeof (aggrInfo)); @@ -1965,7 +2039,7 @@ sqlite_db_aggr_step_dispatcher(sqlite3_context *context, PUSHMARK(SP); XPUSHs( sv_2mortal( newSVsv( aggr->aggr_inst ) )); for ( i=0; i < argc; i++ ) { - XPUSHs(stacked_sv_from_sqlite3_value(aTHX_ value[i], is_unicode)); + XPUSHs(stacked_sv_from_sqlite3_value(aTHX_ value[i], string_mode)); } PUTBACK; @@ -2081,71 +2155,78 @@ sqlite_db_create_aggregate(pTHX_ SV *dbh, const char *name, int argc, SV *aggr_p return TRUE; } +#define SQLITE_DB_COLLATION_BASE(func, sv1, sv2) STMT_START { \ + int cmp = 0; \ + int n_retval, i; \ + \ + ENTER; \ + SAVETMPS; \ + PUSHMARK(SP); \ + XPUSHs( sv_2mortal( sv1 ) ); \ + XPUSHs( sv_2mortal( sv2 ) ); \ + PUTBACK; \ + n_retval = call_sv(func, G_SCALAR); \ + SPAGAIN; \ + if (n_retval != 1) { \ + warn("collation function returned %d arguments", n_retval); \ + } \ + for(i = 0; i < n_retval; i++) { \ + cmp = POPi; \ + } \ + PUTBACK; \ + FREETMPS; \ + LEAVE; \ + \ + return cmp; \ +} STMT_END + int sqlite_db_collation_dispatcher(void *func, int len1, const void *string1, int len2, const void *string2) { dTHX; dSP; - int cmp = 0; - int n_retval, i; - ENTER; - SAVETMPS; - PUSHMARK(SP); - XPUSHs( sv_2mortal( newSVpvn( string1, len1) ) ); - XPUSHs( sv_2mortal( newSVpvn( string2, len2) ) ); - PUTBACK; - n_retval = call_sv(func, G_SCALAR); - SPAGAIN; - if (n_retval != 1) { - warn("collation function returned %d arguments", n_retval); - } - for(i = 0; i < n_retval; i++) { - cmp = POPi; - } - PUTBACK; - FREETMPS; - LEAVE; - - return cmp; + SQLITE_DB_COLLATION_BASE(func, newSVpvn( string1, len1), newSVpvn( string2, len2)); } int -sqlite_db_collation_dispatcher_utf8(void *func, int len1, const void *string1, +sqlite_db_collation_dispatcher_utf8_naive(void *func, int len1, const void *string1, int len2, const void *string2) { dTHX; dSP; - int cmp = 0; - int n_retval, i; - SV *sv1, *sv2; - ENTER; - SAVETMPS; - PUSHMARK(SP); - sv1 = newSVpvn(string1, len1); - SvUTF8_on(sv1); - sv2 = newSVpvn(string2, len2); - SvUTF8_on(sv2); - XPUSHs( sv_2mortal( sv1 ) ); - XPUSHs( sv_2mortal( sv2 ) ); - PUTBACK; - n_retval = call_sv(func, G_SCALAR); - SPAGAIN; - if (n_retval != 1) { - warn("collation function returned %d arguments", n_retval); - } - for(i = 0; i < n_retval; i++) { - cmp = POPi; - } - PUTBACK; - FREETMPS; - LEAVE; - - return cmp; + SQLITE_DB_COLLATION_BASE(func, newSVpvn_flags( string1, len1, SVf_UTF8), newSVpvn_flags( string2, len2, SVf_UTF8)); } +int +sqlite_db_collation_dispatcher_utf8_fallback(void *func, int len1, const void *string1, + int len2, const void *string2) +{ + dTHX; + dSP; + + SV* sv1 = newSVpvn( string1, len1); + SV* sv2 = newSVpvn( string2, len2); + + DBD_SQLITE_UTF8_DECODE_WITH_FALLBACK(sv1); + DBD_SQLITE_UTF8_DECODE_WITH_FALLBACK(sv2); + + SQLITE_DB_COLLATION_BASE(func, sv1, sv2); +} + +typedef int (*collation_dispatch_func_t)(void *, int, const void *, int, const void *); + +static collation_dispatch_func_t _COLLATION_DISPATCHER[_DBD_SQLITE_STRING_MODE_COUNT] = { + sqlite_db_collation_dispatcher, + sqlite_db_collation_dispatcher, + NULL, NULL, + sqlite_db_collation_dispatcher_utf8_naive, + sqlite_db_collation_dispatcher_utf8_fallback, + sqlite_db_collation_dispatcher_utf8_fallback, +}; + int sqlite_db_create_collation(pTHX_ SV *dbh, const char *name, SV *func) { @@ -2181,8 +2262,7 @@ sqlite_db_create_collation(pTHX_ SV *dbh, const char *name, SV *func) rv = sqlite3_create_collation( imp_dbh->db, name, SQLITE_UTF8, func_sv, - imp_dbh->unicode ? sqlite_db_collation_dispatcher_utf8 - : sqlite_db_collation_dispatcher + _COLLATION_DISPATCHER[imp_dbh->string_mode] ); if ( rv != SQLITE_OK ) { diff --git a/dbdimp.h b/dbdimp.h index 04d2b00..b357e1f 100644 --- a/dbdimp.h +++ b/dbdimp.h @@ -7,8 +7,24 @@ #define MY_CXT_KEY "DBD::SQLite::_guts" XS_VERSION +typedef enum { + DBD_SQLITE_STRING_MODE_PV, + DBD_SQLITE_STRING_MODE_BYTES, + + /* Leave space here so that we can use DBD_SQLITE_STRING_MODE_UNICODE_ANY + as a means of checking for any unicode mode. */ + + DBD_SQLITE_STRING_MODE_UNICODE_NAIVE = 4, + DBD_SQLITE_STRING_MODE_UNICODE_FALLBACK, + DBD_SQLITE_STRING_MODE_UNICODE_STRICT, + + _DBD_SQLITE_STRING_MODE_COUNT, +} dbd_sqlite_string_mode_t; + +#define DBD_SQLITE_STRING_MODE_UNICODE_ANY DBD_SQLITE_STRING_MODE_UNICODE_NAIVE + typedef struct { - int last_dbh_is_unicode; + dbd_sqlite_string_mode_t last_dbh_string_mode; } my_cxt_t; #define PERL_UNICODE_DOES_NOT_WORK_WELL \ @@ -22,6 +38,41 @@ typedef struct { #define sqlite3_int64 sqlite_int64 #endif +#define DBD_SQLITE_UTF8_DECODE_NAIVE(sv) SvUTF8_on(sv) + +#define DBD_SQLITE_UTF8_DECODE_CHECKED(sv, onfail) ( \ + is_utf8_string((U8*) SvPVX(sv), SvCUR(sv)) \ + ? SvUTF8_on(sv) \ + : onfail("Received invalid UTF-8 from SQLite; cannot decode!") \ +) + +#define DBD_SQLITE_UTF8_DECODE_WITH_FALLBACK(sv) ( \ + DBD_SQLITE_UTF8_DECODE_CHECKED(sv, warn) \ +) + +#define DBD_SQLITE_UTF8_DECODE_STRICT(sv) ( \ + DBD_SQLITE_UTF8_DECODE_CHECKED(sv, croak) \ +) + +#define DBD_SQLITE_PREP_SV_FOR_SQLITE(sv, string_mode) STMT_START { \ + if (string_mode & DBD_SQLITE_STRING_MODE_UNICODE_ANY) { \ + sv_utf8_upgrade(sv); \ + } \ + else if (string_mode == DBD_SQLITE_STRING_MODE_BYTES) { \ + sv_utf8_downgrade(sv, 0); \ + } \ +} STMT_END + +#define DBD_SQLITE_UTF8_DECODE_IF_NEEDED(sv, string_mode) ( \ + string_mode == DBD_SQLITE_STRING_MODE_UNICODE_NAIVE \ + ? DBD_SQLITE_UTF8_DECODE_NAIVE(sv) \ + : string_mode == DBD_SQLITE_STRING_MODE_UNICODE_FALLBACK \ + ? DBD_SQLITE_UTF8_DECODE_WITH_FALLBACK(sv) \ + : string_mode == DBD_SQLITE_STRING_MODE_UNICODE_STRICT \ + ? DBD_SQLITE_UTF8_DECODE_STRICT(sv) \ + : 0 \ +) + /* A linked list of statements prepared by this module */ typedef struct stmt_list_s stmt_list_s; @@ -41,7 +92,7 @@ struct imp_dbh_st { dbih_dbc_t com; /* sqlite specific bits */ sqlite3 *db; - bool unicode; + dbd_sqlite_string_mode_t string_mode; bool handle_binary_nulls; int timeout; AV *functions; diff --git a/dbdimp_tokenizer.inc b/dbdimp_tokenizer.inc index d48409a..1df1a20 100644 --- a/dbdimp_tokenizer.inc +++ b/dbdimp_tokenizer.inc @@ -94,6 +94,30 @@ static int perl_tokenizer_Open( SV *perl_string; int n_retval; + /* build a Perl copy of the input string */ + if (nBytes < 0) { /* we get -1 from fts3. Don't know why ! */ + nBytes = strlen(pInput); + } + + /* SVs_TEMP will call sv_2mortal */ + perl_string = newSVpvn_flags(pInput, nBytes, SVs_TEMP); + + switch (MY_CXT.last_dbh_string_mode) { + DBD_SQLITE_STRING_MODE_UNICODE_NAIVE: + DBD_SQLITE_UTF8_DECODE_NAIVE(perl_string); + break; + + DBD_SQLITE_STRING_MODE_UNICODE_FALLBACK: + DBD_SQLITE_STRING_MODE_UNICODE_STRICT: + DBD_SQLITE_UTF8_DECODE_WITH_FALLBACK(perl_string); + break; + + default: + break; + } + + DBD_SQLITE_UTF8_DECODE_IF_NEEDED(perl_string, MY_CXT.last_dbh_string_mode); + perl_tokenizer *t = (perl_tokenizer *)pTokenizer; /* allocate and initialize the cursor struct */ @@ -102,29 +126,17 @@ static int perl_tokenizer_Open( memset(c, 0, sizeof(*c)); *ppCursor = &c->base; - /* flags for creating the Perl SV containing the input string */ - flags = SVs_TEMP; /* will call sv_2mortal */ - /* special handling if working with utf8 strings */ - if (MY_CXT.last_dbh_is_unicode) { + if (MY_CXT.last_dbh_string_mode & DBD_SQLITE_STRING_MODE_UNICODE_ANY) { /* data to keep track of byte offsets */ c->lastByteOffset = c->pInput = pInput; c->lastCharOffset = 0; - - /* string passed to Perl needs to be flagged as utf8 */ - flags |= SVf_UTF8; } ENTER; SAVETMPS; - /* build a Perl copy of the input string */ - if (nBytes < 0) { /* we get -1 from fts3. Don't know why ! */ - nBytes = strlen(pInput); - } - perl_string = newSVpvn_flags(pInput, nBytes, flags); - /* call the tokenizer coderef */ PUSHMARK(SP); XPUSHs(perl_string); diff --git a/dbdimp_virtual_table.inc b/dbdimp_virtual_table.inc index a3888be..3907848 100644 --- a/dbdimp_virtual_table.inc +++ b/dbdimp_virtual_table.inc @@ -432,7 +432,7 @@ static int perl_vt_Filter( sqlite3_vtab_cursor *pVtabCursor, dSP; dMY_CXT; int i, count; - int is_unicode = MY_CXT.last_dbh_is_unicode; + dbd_sqlite_string_mode_t string_mode = MY_CXT.last_dbh_string_mode; ENTER; SAVETMPS; @@ -443,7 +443,7 @@ static int perl_vt_Filter( sqlite3_vtab_cursor *pVtabCursor, XPUSHs(sv_2mortal(newSViv(idxNum))); XPUSHs(sv_2mortal(newSVpv(idxStr, 0))); for(i = 0; i < argc; i++) { - XPUSHs(stacked_sv_from_sqlite3_value(aTHX_ argv[i], is_unicode)); + XPUSHs(stacked_sv_from_sqlite3_value(aTHX_ argv[i], string_mode)); } PUTBACK; count = call_method("FILTER", G_VOID); @@ -588,7 +588,7 @@ static int perl_vt_Update( sqlite3_vtab *pVTab, dSP; dMY_CXT; int count, i; - int is_unicode = MY_CXT.last_dbh_is_unicode; + dbd_sqlite_string_mode_t string_mode = MY_CXT.last_dbh_string_mode; int rc = SQLITE_ERROR; SV *rowidsv; @@ -599,7 +599,7 @@ static int perl_vt_Update( sqlite3_vtab *pVTab, PUSHMARK(SP); XPUSHs(((perl_vtab *) pVTab)->perl_vtab_obj); for(i = 0; i < argc; i++) { - XPUSHs(stacked_sv_from_sqlite3_value(aTHX_ argv[i], is_unicode)); + XPUSHs(stacked_sv_from_sqlite3_value(aTHX_ argv[i], string_mode)); } PUTBACK; count = call_method ("_SQLITE_UPDATE", G_SCALAR); @@ -704,8 +704,9 @@ static int perl_vt_FindFunction(sqlite3_vtab *pVTab, /* return function information for sqlite3 within *pxFunc and *ppArg */ is_overloaded = coderef && SvTRUE(coderef); if (is_overloaded) { - *pxFunc = MY_CXT.last_dbh_is_unicode ? sqlite_db_func_dispatcher_unicode - : sqlite_db_func_dispatcher_no_unicode; + + *pxFunc = _FUNC_DISPATCHER[MY_CXT.last_dbh_string_mode]; + *ppArg = coderef; } diff --git a/lib/DBD/SQLite.pm b/lib/DBD/SQLite.pm index ab4aca8..a30359c 100644 --- a/lib/DBD/SQLite.pm +++ b/lib/DBD/SQLite.pm @@ -1622,32 +1622,71 @@ Your sweet spot probably lies somewhere in between. Returns the version of the SQLite library which B is using, e.g., "3.26.0". Can only be read. -=item sqlite_unicode +=item sqlite_string_mode -If set to a true value, B will turn the UTF-8 flag on for all -text strings coming out of the database (this feature is currently disabled -for perl < 5.8.5). For more details on the UTF-8 flag see -L. The default is for the UTF-8 flag to be turned off. +SQLite strings are simple arrays of bytes, but Perl strings can store any +arbitrary Unicode code point. Thus, DBD::SQLite has to adopt some method +of translating between those two models. This parameter defines that +translation. -Also note that due to some bizarreness in SQLite's type system (see +Accepted values are the following constants: + +=over + +=item * DBD_SQLITE_STRING_MODE_BYTES: All strings are assumed to +represent bytes. A Perl string that contains any code point above 255 +will trigger an exception. This is appropriate for Latin-1 strings, +binary data, pre-encoded UTF-8 strings, etc. + +=item * DBD_SQLITE_STRING_MODE_UNICODE_FALLBACK: All Perl strings are encoded +to UTF-8 before being given to SQLite. Perl will B to decode SQLite +strings as UTF-8 when giving them to Perl. Should any such string not be +valid UTF-8, a warning is thrown, and the string is left undecoded. + +This is appropriate for strings that are decoded to characters via, +e.g., L. + +Also note that, due to some bizarreness in SQLite's type system (see L), if you want to retain -blob-style behavior for B columns under C<< $dbh->{sqlite_unicode} = 1 ->> (say, to store images in the database), you have to state so +blob-style behavior for B columns under DBD_SQLITE_STRING_MODE_UNICODE +(say, to store images in the database), you have to state so explicitly using the 3-argument form of L when doing updates: use DBI qw(:sql_types); - $dbh->{sqlite_unicode} = 1; + $dbh->{string_mode} = DBD_SQLITE_STRING_MODE_UNICODE; my $sth = $dbh->prepare("INSERT INTO mytable (blobcolumn) VALUES (?)"); - + # Binary_data will be stored as is. $sth->bind_param(1, $binary_data, SQL_BLOB); Defining the column type as C in the DDL is B sufficient. -This attribute was originally named as C, and renamed to -C for integrity since version 1.26_06. Old C -attribute is still accessible but will be deprecated in the near future. +=item * DBD_SQLITE_STRING_MODE_UNICODE_STRICT: Like +DBD_SQLITE_STRING_MODE_UNICODE_FALLBACK but usually throws an exception +rather than a warning if SQLite sends invalid UTF-8. (In Perl callbacks +from SQLite we still warn instead.) + +=item * DBD_SQLITE_STRING_MODULE_UNICODE_NAIVE: Like +DBD_SQLITE_STRING_MODE_UNICODE_FALLBACK but uses a "naïve" UTF-8 decoding +method that forgoes validation. This is marginally faster than a validated +decode, but it can also B B B + +=item * DBD_SQLITE_STRING_MODE_PV (default, but B B B): Like +DBD_SQLITE_STRING_MODE_BYTES, but when translating Perl strings to SQLite +the Perl string's internal byte buffer is given to SQLite. B B +B, but it's been the default for many years, and changing that would +break existing applications. + +=back + +=item C or C (deprecated) + +If truthy, equivalent to setting C to +DBD_SQLITE_STRING_MODE_UNICODE_NAIVE; if falsy, equivalent to +DBD_SQLITE_STRING_MODE_PV. + +Prefer C in all new code. =item sqlite_allow_multiple_statements diff --git a/lib/DBD/SQLite/Constants.pm b/lib/DBD/SQLite/Constants.pm index 1f2588f..fbf7763 100644 --- a/lib/DBD/SQLite/Constants.pm +++ b/lib/DBD/SQLite/Constants.pm @@ -8,6 +8,11 @@ use warnings; use base 'Exporter'; use DBD::SQLite; our @EXPORT_OK = ( + 'DBD_SQLITE_STRING_MODE_PV', + 'DBD_SQLITE_STRING_MODE_BYTES', + 'DBD_SQLITE_STRING_MODE_UNICODE_NAIVE', + 'DBD_SQLITE_STRING_MODE_UNICODE_FALLBACK', + 'DBD_SQLITE_STRING_MODE_UNICODE_STRICT', # allowed_return_values_from_sqlite3_txn_state qw/ SQLITE_TXN_NONE diff --git a/t/02_logon.t b/t/02_logon.t index ef73c28..64d8dda 100644 --- a/t/02_logon.t +++ b/t/02_logon.t @@ -7,6 +7,8 @@ use SQLiteTest qw/connect_ok @CALL_FUNCS/; use Test::More; use if -d ".git", "Test::FailWarnings"; +my $unicode_opt = DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_UNICODE_STRICT; + my $show_diag = 0; foreach my $call_func (@CALL_FUNCS) { @@ -30,9 +32,9 @@ foreach my $call_func (@CALL_FUNCS) { skip( 'Unicode is not supported before 5.8.5', 2 ); } my $file = 'foo'.$$; - my $dbh = DBI->connect( "dbi:SQLite:dbname=$file;sqlite_unicode=1", '', '' ); + my $dbh = DBI->connect( "dbi:SQLite:dbname=$file;sqlite_string_mode=$unicode_opt", '', '' ); isa_ok( $dbh, 'DBI::db' ); - is( $dbh->{sqlite_unicode}, 1, 'Unicode is on' ); + is( $dbh->{sqlite_string_mode}, $unicode_opt, 'Unicode is on' ); $dbh->disconnect; unlink $file; } diff --git a/t/12_unicode.t b/t/12_unicode.t index 1776d53..95c5a02 100644 --- a/t/12_unicode.t +++ b/t/12_unicode.t @@ -8,6 +8,8 @@ use SQLiteTest; use Test::More; use if -d ".git", "Test::FailWarnings"; +my $unicode_opt = DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_UNICODE_STRICT; + BEGIN { requires_unicode_support() } # @@ -51,7 +53,7 @@ ok( my ($textback, $bytesback); SCOPE: { my $dbh = connect_ok( dbfile => 'foo', RaiseError => 1 ); - is( $dbh->{sqlite_unicode}, 0, 'Unicode is off' ); + is( $dbh->{sqlite_string_mode}, DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_PV, 'default string mode is pv' ); ok( $dbh->do("CREATE TABLE table1 (a TEXT, b BLOB)"), 'CREATE TABLE', @@ -73,8 +75,8 @@ SCOPE: { # Start over but now activate Unicode support. SCOPE: { - my $dbh = connect_ok( dbfile => 'foo', sqlite_unicode => 1 ); - is( $dbh->{sqlite_unicode}, 1, 'Unicode is on' ); + my $dbh = connect_ok( dbfile => 'foo', sqlite_string_mode => $unicode_opt ); + is( $dbh->{sqlite_string_mode}, $unicode_opt, 'Unicode is on' ); ($textback, $bytesback) = database_roundtrip($dbh, $utfstring, $bytestring); diff --git a/t/13_create_collation.t b/t/13_create_collation.t index a2a9d17..3aa6985 100644 --- a/t/13_create_collation.t +++ b/t/13_create_collation.t @@ -8,6 +8,8 @@ use if -d ".git", "Test::FailWarnings"; use Encode qw/decode/; use DBD::SQLite; +my $unicode_opt = DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_UNICODE_STRICT; + BEGIN { requires_unicode_support(); } BEGIN { @@ -71,10 +73,10 @@ is($DBD::SQLite::COLLATION{foo}, \&by_num_desc, "overridden collation"); foreach my $call_func (@CALL_FUNCS) { - for my $use_unicode (0, 1) { + for my $unicode_opt ( DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_BYTES, DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_UNICODE_STRICT) { # connect - my $dbh = connect_ok( RaiseError => 1, sqlite_unicode => $use_unicode ); + my $dbh = connect_ok( RaiseError => 1, sqlite_string_mode => $unicode_opt ); # populate test data my @words = qw{ @@ -84,7 +86,7 @@ foreach my $call_func (@CALL_FUNCS) { HAT hâôer féôu fêôe fèöe ferme }; - if ($use_unicode) { + if ($unicode_opt != DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_BYTES) { utf8::upgrade($_) foreach @words; } diff --git a/t/33_non_latin_path.t b/t/33_non_latin_path.t index 49deb6c..a55600e 100644 --- a/t/33_non_latin_path.t +++ b/t/33_non_latin_path.t @@ -10,6 +10,8 @@ use if -d ".git", "Test::FailWarnings"; use File::Temp (); use File::Spec::Functions ':ALL'; +my $unicode_opt = DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_UNICODE_STRICT; + BEGIN { requires_unicode_support() } my $dir = File::Temp::tempdir( CLEANUP => 1 ); @@ -55,7 +57,7 @@ foreach my $subdir ( 'longascii', 'adatb my $dbh = DBI->connect("dbi:SQLite:dbname=$dbfile", undef, undef, { RaiseError => 1, PrintError => 0, - sqlite_unicode => 1, + sqlite_string_mode => $unicode_opt, } ); isa_ok( $dbh, 'DBI::db' ); }; @@ -67,7 +69,7 @@ foreach my $subdir ( 'longascii', 'adatb my $dbh = DBI->connect("dbi:SQLite:dbname=$dbfile", undef, undef, { RaiseError => 1, PrintError => 0, - sqlite_unicode => 1, + sqlite_string_mode => $unicode_opt, } ); isa_ok( $dbh, 'DBI::db' ); }; diff --git a/t/37_regexp.t b/t/37_regexp.t index 1260578..b5e51ad 100644 --- a/t/37_regexp.t +++ b/t/37_regexp.t @@ -31,10 +31,10 @@ use DBD::SQLite; foreach my $call_func (@CALL_FUNCS) { - for my $use_unicode (0, 1) { + for my $string_mode (DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_BYTES, DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_UNICODE_STRICT) { # connect - my $dbh = connect_ok( RaiseError => 1, sqlite_unicode => $use_unicode ); + my $dbh = connect_ok( RaiseError => 1, sqlite_string_mode => $string_mode ); # The following tests are about ordering, so don't reverse! if ($dbh->selectrow_array('PRAGMA reverse_unordered_selects')) { @@ -43,7 +43,7 @@ foreach my $call_func (@CALL_FUNCS) { # populate test data my @vals = @words; - if ($use_unicode) { + if ($string_mode == DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_BYTES) { utf8::upgrade($_) foreach @vals; } diff --git a/t/43_fts3.t b/t/43_fts3.t index 934c8de..158345e 100644 --- a/t/43_fts3.t +++ b/t/43_fts3.t @@ -67,10 +67,10 @@ sub locale_tokenizer { # see also: Search::Tokenizer use DBD::SQLite; -for my $use_unicode (0, 1) { +for my $string_mode (DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_BYTES, DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_UNICODE_STRICT) { # connect - my $dbh = connect_ok( RaiseError => 1, sqlite_unicode => $use_unicode ); + my $dbh = connect_ok( RaiseError => 1, sqlite_string_mode => $string_mode ); for my $fts (qw/fts3 fts4/) { next if $fts eq 'fts4' && !has_sqlite('3.7.4'); @@ -102,7 +102,7 @@ for my $use_unicode (0, 1) { my ($query, @expected) = @$t; @expected = map {$doc_ids[$_]} @expected; my $results = $dbh->selectcol_arrayref($sql, undef, $query); - is_deeply($results, \@expected, "$query ($fts, unicode=$use_unicode)"); + is_deeply($results, \@expected, "$query ($fts, string_mode=$string_mode)"); } } } diff --git a/t/68_upgraded_no_unicode.t b/t/68_upgraded_no_unicode.t index 34d745f..503c259 100644 --- a/t/68_upgraded_no_unicode.t +++ b/t/68_upgraded_no_unicode.t @@ -5,11 +5,13 @@ use strict; use warnings; use lib "t/lib"; use SQLiteTest; +use DBD::SQLite::Constants; use Test::More; use if -d ".git", "Test::FailWarnings"; { my $dbh = connect_ok( dbfile => 'foo', RaiseError => 1 ); + $dbh->{sqlite_string_mode} = DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_BYTES; my $tbl_name = "\xe9p\xe9e"; utf8::encode $tbl_name; diff --git a/t/rt_25371_asymmetric_unicode.t b/t/rt_25371_asymmetric_unicode.t index ef1e978..6442cc2 100644 --- a/t/rt_25371_asymmetric_unicode.t +++ b/t/rt_25371_asymmetric_unicode.t @@ -5,10 +5,12 @@ use SQLiteTest; use Test::More; use if -d ".git", "Test::FailWarnings"; +my $unicode_opt = DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_UNICODE_STRICT; + BEGIN { requires_unicode_support(); } -my $dbh = connect_ok( sqlite_unicode => 1 ); -is( $dbh->{sqlite_unicode}, 1, 'Unicode is on' ); +my $dbh = connect_ok( sqlite_string_mode => $unicode_opt ); +is( $dbh->{sqlite_string_mode}, $unicode_opt, 'string mode is unicode/strict' ); ok( $dbh->do(<<'END_SQL'), 'CREATE TABLE' ); CREATE TABLE foo ( diff --git a/t/rt_25924_user_defined_func_unicode.t b/t/rt_25924_user_defined_func_unicode.t index a2845ae..fbec113 100644 --- a/t/rt_25924_user_defined_func_unicode.t +++ b/t/rt_25924_user_defined_func_unicode.t @@ -5,10 +5,12 @@ use SQLiteTest; use Test::More; use if -d ".git", "Test::FailWarnings"; +my $unicode_opt = DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_UNICODE_STRICT; + BEGIN { requires_unicode_support() } foreach my $call_func (@CALL_FUNCS) { - my $dbh = connect_ok( sqlite_unicode => 1 ); + my $dbh = connect_ok( sqlite_string_mode => $unicode_opt ); ok($dbh->$call_func( "perl_uc", 1, \&perl_uc, "create_function" )); ok( $dbh->do(<<'END_SQL'), 'CREATE TABLE' ); diff --git a/t/rt_71311_bind_col_and_unicode.t b/t/rt_71311_bind_col_and_unicode.t index 5a9482f..324e5ef 100644 --- a/t/rt_71311_bind_col_and_unicode.t +++ b/t/rt_71311_bind_col_and_unicode.t @@ -6,9 +6,11 @@ use Test::More; use if -d ".git", "Test::FailWarnings"; use DBI qw/:sql_types/; +use DBD::SQLite::Constants; + BEGIN{ requires_unicode_support(); } -my $dbh = connect_ok(sqlite_unicode => 1); +my $dbh = connect_ok(sqlite_string_mode => DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_UNICODE_STRICT); $dbh->do('create table test1 (id integer, b blob)'); my $blob = "\x{82}\x{A0}"; diff --git a/t/rt_78833_utf8_flag_for_column_names.t b/t/rt_78833_utf8_flag_for_column_names.t index a8c16b0..3fcacd7 100644 --- a/t/rt_78833_utf8_flag_for_column_names.t +++ b/t/rt_78833_utf8_flag_for_column_names.t @@ -6,6 +6,8 @@ use Test::More; use if -d ".git", "Test::FailWarnings"; use Encode; +use DBD::SQLite::Constants; + BEGIN { requires_unicode_support() } unicode_test("\x{263A}"); # (decoded) smiley character @@ -20,7 +22,7 @@ sub unicode_test { { # tests for an environment where everything is encoded - my $dbh = connect_ok(sqlite_unicode => 0); + my $dbh = connect_ok(sqlite_string_mode => DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_BYTES); $dbh->do("pragma foreign_keys = on"); my $unicode_quoted = $dbh->quote_identifier($unicode_encoded); $dbh->do("create table $unicode_quoted (id, $unicode_quoted primary key)"); @@ -87,7 +89,7 @@ sub unicode_test { } { # tests for an environment where everything is decoded - my $dbh = connect_ok(sqlite_unicode => 1); + my $dbh = connect_ok(sqlite_string_mode => DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_UNICODE_STRICT); $dbh->do("pragma foreign_keys = on"); my $unicode_quoted = $dbh->quote_identifier($unicode); $dbh->do("create table $unicode_quoted (id, $unicode_quoted primary key)"); diff --git a/t/rt_96877_unicode_statements.t b/t/rt_96877_unicode_statements.t index 2129186..437a753 100644 --- a/t/rt_96877_unicode_statements.t +++ b/t/rt_96877_unicode_statements.t @@ -11,8 +11,8 @@ use if -d ".git", "Test::FailWarnings"; BEGIN { requires_unicode_support() } -my $dbh = connect_ok( sqlite_unicode => 1 ); -is( $dbh->{sqlite_unicode}, 1, 'Unicode is on' ); +my $dbh = connect_ok( sqlite_string_mode => DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_UNICODE_NAIVE ); +is( $dbh->{sqlite_string_mode}, DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_UNICODE_NAIVE, 'Unicode is on' ); ok( $dbh->do(<<'END_SQL'), 'CREATE TABLE' ); CREATE TABLE foo ( diff --git a/util/constants.pl b/util/constants.pl index 4ed7165..0599e3b 100644 --- a/util/constants.pl +++ b/util/constants.pl @@ -17,6 +17,14 @@ my %shorter_tags = ( compile_time_library_version_numbers => 'version', ); +my @dbd_sqlite_constants = ( + 'DBD_SQLITE_STRING_MODE_PV', + 'DBD_SQLITE_STRING_MODE_BYTES', + 'DBD_SQLITE_STRING_MODE_UNICODE_NAIVE', + 'DBD_SQLITE_STRING_MODE_UNICODE_FALLBACK', + 'DBD_SQLITE_STRING_MODE_UNICODE_STRICT', +); + my %constants = extract_constants(); write_inc(%constants); write_pm(%constants); @@ -34,8 +42,13 @@ MODULE = DBD::SQLite PACKAGE = DBD::SQLite::Constants PROTOTYPES: ENABLE +BOOT: END + for my $constsub (@dbd_sqlite_constants) { + print {$fh} qq< newCONSTSUB( gv_stashpv("DBD::SQLite::Constants", FALSE), "$constsub", newSVuv($constsub) );\n> + } + for my $tag (sort grep !/^_/, keys %constants) { _write_tag($fh, $tag, $constants{$tag}); } @@ -144,6 +157,10 @@ use DBD::SQLite; our \@EXPORT_OK = ( END + for my $const (@dbd_sqlite_constants) { + print {$fh} " '$const',\n"; + } + for my $tag (sort keys %constants) { print $fh <<"END"; # $tag