1
0
Fork 0
mirror of https://github.com/DBD-SQLite/DBD-SQLite synced 2025-06-06 13:48:55 -04:00

Merge branch 'string_mode'

This commit is contained in:
Kenichi Ishigaki 2021-05-30 21:18:42 +09:00
commit 31483a245f
20 changed files with 406 additions and 176 deletions

View file

@ -5,6 +5,13 @@ MODULE = DBD::SQLite PACKAGE = DBD::SQLite::Constants
PROTOTYPES: ENABLE
BOOT:
newCONSTSUB( gv_stashpv("DBD::SQLite::Constants", FALSE), "DBD_SQLITE_STRING_MODE_PV", newSVuv(DBD_SQLITE_STRING_MODE_PV) );
newCONSTSUB( gv_stashpv("DBD::SQLite::Constants", FALSE), "DBD_SQLITE_STRING_MODE_BYTES", newSVuv(DBD_SQLITE_STRING_MODE_BYTES) );
newCONSTSUB( gv_stashpv("DBD::SQLite::Constants", FALSE), "DBD_SQLITE_STRING_MODE_UNICODE_NAIVE", newSVuv(DBD_SQLITE_STRING_MODE_UNICODE_NAIVE) );
newCONSTSUB( gv_stashpv("DBD::SQLite::Constants", FALSE), "DBD_SQLITE_STRING_MODE_UNICODE_FALLBACK", newSVuv(DBD_SQLITE_STRING_MODE_UNICODE_FALLBACK) );
newCONSTSUB( gv_stashpv("DBD::SQLite::Constants", FALSE), "DBD_SQLITE_STRING_MODE_UNICODE_STRICT", newSVuv(DBD_SQLITE_STRING_MODE_UNICODE_STRICT) );
#if SQLITE_VERSION_NUMBER >= 3034000
IV

316
dbdimp.c
View file

@ -239,11 +239,11 @@ void
init_cxt() {
dTHX;
MY_CXT_INIT;
MY_CXT.last_dbh_is_unicode = 0;
MY_CXT.last_dbh_string_mode = DBD_SQLITE_STRING_MODE_PV;
}
SV *
stacked_sv_from_sqlite3_value(pTHX_ sqlite3_value *value, int is_unicode)
stacked_sv_from_sqlite3_value(pTHX_ sqlite3_value *value, dbd_sqlite_string_mode_t string_mode)
{
STRLEN len;
sqlite_int64 iv;
@ -271,9 +271,7 @@ stacked_sv_from_sqlite3_value(pTHX_ sqlite3_value *value, int is_unicode)
case SQLITE_TEXT:
len = sqlite3_value_bytes(value);
sv = newSVpvn((const char *)sqlite3_value_text(value), len);
if (is_unicode) {
SvUTF8_on(sv);
}
DBD_SQLITE_UTF8_DECODE_IF_NEEDED(sv, string_mode);
return sv_2mortal(sv);
case SQLITE_BLOB:
len = sqlite3_value_bytes(value);
@ -431,6 +429,44 @@ sqlite_discon_all(SV *drh, imp_drh_t *imp_drh)
return FALSE; /* no way to do this */
}
#define _croak_invalid_value(name, value) \
croak("Invalid value (%s) given for %s", value, name);
/* Like SvUV but croaks on anything other than an unsigned int. */
static inline int
my_SvUV_strict(pTHX_ SV *input, const char* name)
{
if (SvUOK(input)) {
return SvUV(input);
}
const char* pv = SvPVbyte_nolen(input);
UV uv;
int numtype = grok_number(pv, strlen(pv), &uv);
/* Anything else is invalid: */
if (numtype != IS_NUMBER_IN_UV) _croak_invalid_value(name, pv);
return uv;
}
static inline dbd_sqlite_string_mode_t
_extract_sqlite_string_mode_from_sv( pTHX_ SV* input )
{
if (SvOK(input)) {
UV val = my_SvUV_strict(aTHX_ input, "sqlite_string_mode");
if (val >= _DBD_SQLITE_STRING_MODE_COUNT) {
_croak_invalid_value("sqlite_string_mode", SvPVbyte_nolen(input));
}
return val;
}
return DBD_SQLITE_STRING_MODE_PV;
}
int
sqlite_db_login6(SV *dbh, imp_dbh_t *imp_dbh, char *dbname, char *user, char *pass, SV *attr)
{
@ -440,7 +476,7 @@ sqlite_db_login6(SV *dbh, imp_dbh_t *imp_dbh, char *dbname, char *user, char *pa
SV **val;
int extended = 0;
int flag = 0;
int unicode = 0;
dbd_sqlite_string_mode_t string_mode = DBD_SQLITE_STRING_MODE_PV;
sqlite_trace(dbh, imp_dbh, 3, form("login '%s' (version %s)", dbname, sqlite3_version));
@ -463,13 +499,24 @@ sqlite_db_login6(SV *dbh, imp_dbh_t *imp_dbh, char *dbname, char *user, char *pa
hv_stores(hv, "ReadOnly", newSViv(1));
}
}
/* sqlite_unicode should be detected earlier, to register default functions correctly */
if (hv_exists(hv, "sqlite_unicode", 14)) {
/* sqlite_string_mode should be detected earlier, to register default functions correctly */
SV** string_mode_svp = hv_fetchs(hv, "sqlite_string_mode", 0);
if (string_mode_svp != NULL && SvOK(*string_mode_svp)) {
string_mode = _extract_sqlite_string_mode_from_sv(aTHX_ *string_mode_svp);
/* Legacy alternatives to sqlite_string_mode: */
} else if (hv_exists(hv, "sqlite_unicode", 14)) {
val = hv_fetch(hv, "sqlite_unicode", 14, 0);
unicode = (val && SvOK(*val)) ? SvIV(*val) : 0;
if ( (val && SvOK(*val)) ? SvIV(*val) : 0 ) {
string_mode = DBD_SQLITE_STRING_MODE_UNICODE_NAIVE;
}
} else if (hv_exists(hv, "unicode", 7)) {
val = hv_fetch(hv, "unicode", 7, 0);
unicode = (val && SvOK(*val)) ? SvIV(*val) : 0;
if ( (val && SvOK(*val)) ? SvIV(*val) : 0 ) {
string_mode = DBD_SQLITE_STRING_MODE_UNICODE_NAIVE;
}
}
}
rc = sqlite_open2(dbname, &(imp_dbh->db), flag, extended);
@ -478,7 +525,7 @@ sqlite_db_login6(SV *dbh, imp_dbh_t *imp_dbh, char *dbname, char *user, char *pa
}
DBIc_IMPSET_on(imp_dbh);
imp_dbh->unicode = unicode;
imp_dbh->string_mode = string_mode;
imp_dbh->functions = newAV();
imp_dbh->aggregates = newAV();
imp_dbh->collation_needed_callback = newSVsv( &PL_sv_undef );
@ -546,12 +593,7 @@ sqlite_db_do_sv(SV *dbh, imp_dbh_t *imp_dbh, SV *sv_statement)
}
/* sqlite3_prepare wants an utf8-encoded SQL statement */
if (imp_dbh->unicode) {
sv_utf8_upgrade(sv_statement);
}
else {
sv_utf8_downgrade(sv_statement, 0);
}
DBD_SQLITE_PREP_SV_FOR_SQLITE(sv_statement, imp_dbh->string_mode);
statement = SvPV_nolen(sv_statement);
@ -738,6 +780,10 @@ sqlite_db_destroy(SV *dbh, imp_dbh_t *imp_dbh)
DBIc_IMPSET_off(imp_dbh);
}
#define _warn_deprecated_if_possible(old, new) \
if (DBIc_has(imp_dbh, DBIcf_WARN)) \
warn("\"%s\" attribute will be deprecated. Use \"%s\" instead.", old, new);
int
sqlite_db_STORE_attrib(SV *dbh, imp_dbh_t *imp_dbh, SV *keysv, SV *valuesv)
{
@ -790,26 +836,33 @@ sqlite_db_STORE_attrib(SV *dbh, imp_dbh_t *imp_dbh, SV *keysv, SV *valuesv)
imp_dbh->prefer_numeric_type = !(! SvTRUE(valuesv));
return TRUE;
}
if (strEQ(key, "sqlite_unicode")) {
if (strEQ(key, "sqlite_string_mode")) {
dbd_sqlite_string_mode_t string_mode = _extract_sqlite_string_mode_from_sv(aTHX_ valuesv);
#if PERL_UNICODE_DOES_NOT_WORK_WELL
if (string_mode & DBD_SQLITE_STRING_MODE_UNICODE_ANY) {
sqlite_trace(dbh, imp_dbh, 3, form("Unicode support is disabled for this version of perl."));
string_mode = DBD_SQLITE_STRING_MODE_PV;
}
#endif
imp_dbh->string_mode = string_mode;
return TRUE;
}
if (strEQ(key, "sqlite_unicode") || strEQ(key, "unicode")) {
_warn_deprecated_if_possible(key, "sqlite_string_mode");
#if PERL_UNICODE_DOES_NOT_WORK_WELL
sqlite_trace(dbh, imp_dbh, 3, form("Unicode support is disabled for this version of perl."));
imp_dbh->unicode = 0;
imp_dbh->string_mode = DBD_SQLITE_STRING_MODE_PV;
#else
imp_dbh->unicode = !(! SvTRUE(valuesv));
#endif
return TRUE;
}
if (strEQ(key, "unicode")) {
if (DBIc_has(imp_dbh, DBIcf_WARN))
warn("\"unicode\" attribute will be deprecated. Use \"sqlite_unicode\" instead.");
#if PERL_UNICODE_DOES_NOT_WORK_WELL
sqlite_trace(dbh, imp_dbh, 3, form("Unicode support is disabled for this version of perl."));
imp_dbh->unicode = 0;
#else
imp_dbh->unicode = !(! SvTRUE(valuesv));
imp_dbh->string_mode = SvTRUE(valuesv) ? DBD_SQLITE_STRING_MODE_UNICODE_NAIVE : DBD_SQLITE_STRING_MODE_PV;
#endif
return TRUE;
}
return FALSE;
}
@ -837,22 +890,18 @@ sqlite_db_FETCH_attrib(SV *dbh, imp_dbh_t *imp_dbh, SV *keysv)
if (strEQ(key, "sqlite_prefer_numeric_type")) {
return sv_2mortal(newSViv(imp_dbh->prefer_numeric_type ? 1 : 0));
}
if (strEQ(key, "sqlite_unicode")) {
#if PERL_UNICODE_DOES_NOT_WORK_WELL
sqlite_trace(dbh, imp_dbh, 3, "Unicode support is disabled for this version of perl.");
return sv_2mortal(newSViv(0));
#else
return sv_2mortal(newSViv(imp_dbh->unicode ? 1 : 0));
#endif
if (strEQ(key, "sqlite_string_mode")) {
return sv_2mortal(newSVuv(imp_dbh->string_mode));
}
if (strEQ(key, "unicode")) {
if (DBIc_has(imp_dbh, DBIcf_WARN))
warn("\"unicode\" attribute will be deprecated. Use \"sqlite_unicode\" instead.");
if (strEQ(key, "sqlite_unicode") || strEQ(key, "unicode")) {
_warn_deprecated_if_possible(key, "sqlite_string_mode");
#if PERL_UNICODE_DOES_NOT_WORK_WELL
sqlite_trace(dbh, imp_dbh, 3, "Unicode support is disabled for this version of perl.");
return sv_2mortal(newSViv(0));
#else
return sv_2mortal(newSViv(imp_dbh->unicode ? 1 : 0));
return sv_2mortal(newSViv(imp_dbh->string_mode == DBD_SQLITE_STRING_MODE_UNICODE_NAIVE ? 1 : 0));
#endif
}
@ -885,7 +934,7 @@ sqlite_st_prepare_sv(SV *sth, imp_sth_t *imp_sth, SV *sv_statement, SV *attribs)
stmt_list_s * new_stmt;
D_imp_dbh_from_sth;
MY_CXT.last_dbh_is_unicode = imp_dbh->unicode;
MY_CXT.last_dbh_string_mode = imp_dbh->string_mode;
if (!DBIc_ACTIVE(imp_dbh)) {
sqlite_error(sth, -2, "attempt to prepare on inactive database handle");
@ -893,12 +942,7 @@ sqlite_st_prepare_sv(SV *sth, imp_sth_t *imp_sth, SV *sv_statement, SV *attribs)
}
/* sqlite3_prepare wants an utf8-encoded SQL statement */
if (imp_dbh->unicode) {
sv_utf8_upgrade(sv_statement);
}
else {
sv_utf8_downgrade(sv_statement, 0);
}
DBD_SQLITE_PREP_SV_FOR_SQLITE(sv_statement, imp_dbh->string_mode);
statement = SvPV_nolen(sv_statement);
@ -1012,12 +1056,15 @@ sqlite_st_execute(SV *sth, imp_sth_t *imp_sth)
const char *data;
int numtype = 0;
if (imp_dbh->unicode) {
if (imp_dbh->string_mode & DBD_SQLITE_STRING_MODE_UNICODE_ANY) {
data = SvPVutf8(value, len);
}
else {
else if (imp_dbh->string_mode == DBD_SQLITE_STRING_MODE_BYTES) {
data = SvPVbyte(value, len);
}
else {
data = SvPV(value, len);
}
/*
* XXX: For backward compatibility, it'd be better to
@ -1228,11 +1275,9 @@ sqlite_st_fetch(SV *sth, imp_sth_t *imp_sth)
}
}
sv_setpvn(AvARRAY(av)[i], val, len);
if (imp_dbh->unicode) {
SvUTF8_on(AvARRAY(av)[i]);
} else {
SvUTF8_off(AvARRAY(av)[i]);
}
DBD_SQLITE_UTF8_DECODE_IF_NEEDED(AvARRAY(av)[i], imp_dbh->string_mode);
break;
case SQLITE_BLOB:
sqlite_trace(sth, imp_sth, 5, form("fetch column %d as blob", i));
@ -1404,8 +1449,9 @@ sqlite_st_FETCH_attrib(SV *sth, imp_sth_t *imp_sth, SV *keysv)
/* if (dot) drop table name from field name */
/* fieldname = ++dot; */
SV *sv_fieldname = newSVpv(fieldname, 0);
if (imp_dbh->unicode)
SvUTF8_on(sv_fieldname);
DBD_SQLITE_UTF8_DECODE_IF_NEEDED(sv_fieldname, imp_dbh->string_mode);
av_store(av, n, sv_fieldname);
}
}
@ -1699,7 +1745,7 @@ sqlite_db_busy_timeout(pTHX_ SV *dbh, SV *timeout )
}
static void
sqlite_db_func_dispatcher(int is_unicode, sqlite3_context *context, int argc, sqlite3_value **value)
sqlite_db_func_dispatcher(dbd_sqlite_string_mode_t string_mode, sqlite3_context *context, int argc, sqlite3_value **value)
{
dTHX;
dSP;
@ -1714,7 +1760,7 @@ sqlite_db_func_dispatcher(int is_unicode, sqlite3_context *context, int argc, sq
PUSHMARK(SP);
for ( i=0; i < argc; i++ ) {
XPUSHs(stacked_sv_from_sqlite3_value(aTHX_ value[i], is_unicode));
XPUSHs(stacked_sv_from_sqlite3_value(aTHX_ value[i], string_mode));
}
PUTBACK;
@ -1745,17 +1791,46 @@ sqlite_db_func_dispatcher(int is_unicode, sqlite3_context *context, int argc, sq
}
static void
sqlite_db_func_dispatcher_unicode(sqlite3_context *context, int argc, sqlite3_value **value)
sqlite_db_func_dispatcher_unicode_naive(sqlite3_context *context, int argc, sqlite3_value **value)
{
sqlite_db_func_dispatcher(1, context, argc, value);
sqlite_db_func_dispatcher(DBD_SQLITE_STRING_MODE_UNICODE_NAIVE, context, argc, value);
}
static void
sqlite_db_func_dispatcher_no_unicode(sqlite3_context *context, int argc, sqlite3_value **value)
sqlite_db_func_dispatcher_unicode_fallback(sqlite3_context *context, int argc, sqlite3_value **value)
{
sqlite_db_func_dispatcher(0, context, argc, value);
sqlite_db_func_dispatcher(DBD_SQLITE_STRING_MODE_UNICODE_FALLBACK, context, argc, value);
}
static void
sqlite_db_func_dispatcher_unicode_strict(sqlite3_context *context, int argc, sqlite3_value **value)
{
sqlite_db_func_dispatcher(DBD_SQLITE_STRING_MODE_UNICODE_STRICT, context, argc, value);
}
static void
sqlite_db_func_dispatcher_bytes(sqlite3_context *context, int argc, sqlite3_value **value)
{
sqlite_db_func_dispatcher(DBD_SQLITE_STRING_MODE_BYTES, context, argc, value);
}
static void
sqlite_db_func_dispatcher_pv(sqlite3_context *context, int argc, sqlite3_value **value)
{
sqlite_db_func_dispatcher(DBD_SQLITE_STRING_MODE_PV, context, argc, value);
}
typedef void (*dispatch_func_t)(sqlite3_context*, int, sqlite3_value**);
static dispatch_func_t _FUNC_DISPATCHER[_DBD_SQLITE_STRING_MODE_COUNT] = {
sqlite_db_func_dispatcher_pv,
sqlite_db_func_dispatcher_bytes,
NULL, NULL,
sqlite_db_func_dispatcher_unicode_naive,
sqlite_db_func_dispatcher_unicode_fallback,
sqlite_db_func_dispatcher_unicode_strict,
};
int
sqlite_db_create_function(pTHX_ SV *dbh, const char *name, int argc, SV *func, int flags)
{
@ -1777,8 +1852,7 @@ sqlite_db_create_function(pTHX_ SV *dbh, const char *name, int argc, SV *func, i
/* warn("create_function %s with %d args\n", name, argc); */
rc = sqlite3_create_function( imp_dbh->db, name, argc, SQLITE_UTF8|flags,
func_sv,
imp_dbh->unicode ? sqlite_db_func_dispatcher_unicode
: sqlite_db_func_dispatcher_no_unicode,
_FUNC_DISPATCHER[imp_dbh->string_mode],
NULL, NULL );
if ( rc != SQLITE_OK ) {
sqlite_error(dbh, rc, form("sqlite_create_function failed with error %s", sqlite3_errmsg(imp_dbh->db)));
@ -1943,7 +2017,7 @@ sqlite_db_aggr_step_dispatcher(sqlite3_context *context,
{
dTHX;
dSP;
int i, is_unicode = 0; /* TODO : find out from db handle */
int i, string_mode = DBD_SQLITE_STRING_MODE_PV; /* TODO : find out from db handle */
aggrInfo *aggr;
aggr = sqlite3_aggregate_context(context, sizeof (aggrInfo));
@ -1965,7 +2039,7 @@ sqlite_db_aggr_step_dispatcher(sqlite3_context *context,
PUSHMARK(SP);
XPUSHs( sv_2mortal( newSVsv( aggr->aggr_inst ) ));
for ( i=0; i < argc; i++ ) {
XPUSHs(stacked_sv_from_sqlite3_value(aTHX_ value[i], is_unicode));
XPUSHs(stacked_sv_from_sqlite3_value(aTHX_ value[i], string_mode));
}
PUTBACK;
@ -2081,71 +2155,78 @@ sqlite_db_create_aggregate(pTHX_ SV *dbh, const char *name, int argc, SV *aggr_p
return TRUE;
}
#define SQLITE_DB_COLLATION_BASE(func, sv1, sv2) STMT_START { \
int cmp = 0; \
int n_retval, i; \
\
ENTER; \
SAVETMPS; \
PUSHMARK(SP); \
XPUSHs( sv_2mortal( sv1 ) ); \
XPUSHs( sv_2mortal( sv2 ) ); \
PUTBACK; \
n_retval = call_sv(func, G_SCALAR); \
SPAGAIN; \
if (n_retval != 1) { \
warn("collation function returned %d arguments", n_retval); \
} \
for(i = 0; i < n_retval; i++) { \
cmp = POPi; \
} \
PUTBACK; \
FREETMPS; \
LEAVE; \
\
return cmp; \
} STMT_END
int
sqlite_db_collation_dispatcher(void *func, int len1, const void *string1,
int len2, const void *string2)
{
dTHX;
dSP;
int cmp = 0;
int n_retval, i;
ENTER;
SAVETMPS;
PUSHMARK(SP);
XPUSHs( sv_2mortal( newSVpvn( string1, len1) ) );
XPUSHs( sv_2mortal( newSVpvn( string2, len2) ) );
PUTBACK;
n_retval = call_sv(func, G_SCALAR);
SPAGAIN;
if (n_retval != 1) {
warn("collation function returned %d arguments", n_retval);
}
for(i = 0; i < n_retval; i++) {
cmp = POPi;
}
PUTBACK;
FREETMPS;
LEAVE;
return cmp;
SQLITE_DB_COLLATION_BASE(func, newSVpvn( string1, len1), newSVpvn( string2, len2));
}
int
sqlite_db_collation_dispatcher_utf8(void *func, int len1, const void *string1,
sqlite_db_collation_dispatcher_utf8_naive(void *func, int len1, const void *string1,
int len2, const void *string2)
{
dTHX;
dSP;
int cmp = 0;
int n_retval, i;
SV *sv1, *sv2;
ENTER;
SAVETMPS;
PUSHMARK(SP);
sv1 = newSVpvn(string1, len1);
SvUTF8_on(sv1);
sv2 = newSVpvn(string2, len2);
SvUTF8_on(sv2);
XPUSHs( sv_2mortal( sv1 ) );
XPUSHs( sv_2mortal( sv2 ) );
PUTBACK;
n_retval = call_sv(func, G_SCALAR);
SPAGAIN;
if (n_retval != 1) {
warn("collation function returned %d arguments", n_retval);
}
for(i = 0; i < n_retval; i++) {
cmp = POPi;
}
PUTBACK;
FREETMPS;
LEAVE;
return cmp;
SQLITE_DB_COLLATION_BASE(func, newSVpvn_flags( string1, len1, SVf_UTF8), newSVpvn_flags( string2, len2, SVf_UTF8));
}
int
sqlite_db_collation_dispatcher_utf8_fallback(void *func, int len1, const void *string1,
int len2, const void *string2)
{
dTHX;
dSP;
SV* sv1 = newSVpvn( string1, len1);
SV* sv2 = newSVpvn( string2, len2);
DBD_SQLITE_UTF8_DECODE_WITH_FALLBACK(sv1);
DBD_SQLITE_UTF8_DECODE_WITH_FALLBACK(sv2);
SQLITE_DB_COLLATION_BASE(func, sv1, sv2);
}
typedef int (*collation_dispatch_func_t)(void *, int, const void *, int, const void *);
static collation_dispatch_func_t _COLLATION_DISPATCHER[_DBD_SQLITE_STRING_MODE_COUNT] = {
sqlite_db_collation_dispatcher,
sqlite_db_collation_dispatcher,
NULL, NULL,
sqlite_db_collation_dispatcher_utf8_naive,
sqlite_db_collation_dispatcher_utf8_fallback,
sqlite_db_collation_dispatcher_utf8_fallback,
};
int
sqlite_db_create_collation(pTHX_ SV *dbh, const char *name, SV *func)
{
@ -2181,8 +2262,7 @@ sqlite_db_create_collation(pTHX_ SV *dbh, const char *name, SV *func)
rv = sqlite3_create_collation(
imp_dbh->db, name, SQLITE_UTF8,
func_sv,
imp_dbh->unicode ? sqlite_db_collation_dispatcher_utf8
: sqlite_db_collation_dispatcher
_COLLATION_DISPATCHER[imp_dbh->string_mode]
);
if ( rv != SQLITE_OK ) {

View file

@ -7,8 +7,24 @@
#define MY_CXT_KEY "DBD::SQLite::_guts" XS_VERSION
typedef enum {
DBD_SQLITE_STRING_MODE_PV,
DBD_SQLITE_STRING_MODE_BYTES,
/* Leave space here so that we can use DBD_SQLITE_STRING_MODE_UNICODE_ANY
as a means of checking for any unicode mode. */
DBD_SQLITE_STRING_MODE_UNICODE_NAIVE = 4,
DBD_SQLITE_STRING_MODE_UNICODE_FALLBACK,
DBD_SQLITE_STRING_MODE_UNICODE_STRICT,
_DBD_SQLITE_STRING_MODE_COUNT,
} dbd_sqlite_string_mode_t;
#define DBD_SQLITE_STRING_MODE_UNICODE_ANY DBD_SQLITE_STRING_MODE_UNICODE_NAIVE
typedef struct {
int last_dbh_is_unicode;
dbd_sqlite_string_mode_t last_dbh_string_mode;
} my_cxt_t;
#define PERL_UNICODE_DOES_NOT_WORK_WELL \
@ -22,6 +38,41 @@ typedef struct {
#define sqlite3_int64 sqlite_int64
#endif
#define DBD_SQLITE_UTF8_DECODE_NAIVE(sv) SvUTF8_on(sv)
#define DBD_SQLITE_UTF8_DECODE_CHECKED(sv, onfail) ( \
is_utf8_string((U8*) SvPVX(sv), SvCUR(sv)) \
? SvUTF8_on(sv) \
: onfail("Received invalid UTF-8 from SQLite; cannot decode!") \
)
#define DBD_SQLITE_UTF8_DECODE_WITH_FALLBACK(sv) ( \
DBD_SQLITE_UTF8_DECODE_CHECKED(sv, warn) \
)
#define DBD_SQLITE_UTF8_DECODE_STRICT(sv) ( \
DBD_SQLITE_UTF8_DECODE_CHECKED(sv, croak) \
)
#define DBD_SQLITE_PREP_SV_FOR_SQLITE(sv, string_mode) STMT_START { \
if (string_mode & DBD_SQLITE_STRING_MODE_UNICODE_ANY) { \
sv_utf8_upgrade(sv); \
} \
else if (string_mode == DBD_SQLITE_STRING_MODE_BYTES) { \
sv_utf8_downgrade(sv, 0); \
} \
} STMT_END
#define DBD_SQLITE_UTF8_DECODE_IF_NEEDED(sv, string_mode) ( \
string_mode == DBD_SQLITE_STRING_MODE_UNICODE_NAIVE \
? DBD_SQLITE_UTF8_DECODE_NAIVE(sv) \
: string_mode == DBD_SQLITE_STRING_MODE_UNICODE_FALLBACK \
? DBD_SQLITE_UTF8_DECODE_WITH_FALLBACK(sv) \
: string_mode == DBD_SQLITE_STRING_MODE_UNICODE_STRICT \
? DBD_SQLITE_UTF8_DECODE_STRICT(sv) \
: 0 \
)
/* A linked list of statements prepared by this module */
typedef struct stmt_list_s stmt_list_s;
@ -41,7 +92,7 @@ struct imp_dbh_st {
dbih_dbc_t com;
/* sqlite specific bits */
sqlite3 *db;
bool unicode;
dbd_sqlite_string_mode_t string_mode;
bool handle_binary_nulls;
int timeout;
AV *functions;

View file

@ -94,6 +94,30 @@ static int perl_tokenizer_Open(
SV *perl_string;
int n_retval;
/* build a Perl copy of the input string */
if (nBytes < 0) { /* we get -1 from fts3. Don't know why ! */
nBytes = strlen(pInput);
}
/* SVs_TEMP will call sv_2mortal */
perl_string = newSVpvn_flags(pInput, nBytes, SVs_TEMP);
switch (MY_CXT.last_dbh_string_mode) {
DBD_SQLITE_STRING_MODE_UNICODE_NAIVE:
DBD_SQLITE_UTF8_DECODE_NAIVE(perl_string);
break;
DBD_SQLITE_STRING_MODE_UNICODE_FALLBACK:
DBD_SQLITE_STRING_MODE_UNICODE_STRICT:
DBD_SQLITE_UTF8_DECODE_WITH_FALLBACK(perl_string);
break;
default:
break;
}
DBD_SQLITE_UTF8_DECODE_IF_NEEDED(perl_string, MY_CXT.last_dbh_string_mode);
perl_tokenizer *t = (perl_tokenizer *)pTokenizer;
/* allocate and initialize the cursor struct */
@ -102,29 +126,17 @@ static int perl_tokenizer_Open(
memset(c, 0, sizeof(*c));
*ppCursor = &c->base;
/* flags for creating the Perl SV containing the input string */
flags = SVs_TEMP; /* will call sv_2mortal */
/* special handling if working with utf8 strings */
if (MY_CXT.last_dbh_is_unicode) {
if (MY_CXT.last_dbh_string_mode & DBD_SQLITE_STRING_MODE_UNICODE_ANY) {
/* data to keep track of byte positions */
c->currentByte = c->pInput = pInput;
c->currentChar = 0;
/* string passed to Perl needs to be flagged as utf8 */
flags |= SVf_UTF8;
}
ENTER;
SAVETMPS;
/* build a Perl copy of the input string */
if (nBytes < 0) { /* we get -1 from fts3. Don't know why ! */
nBytes = strlen(pInput);
}
perl_string = newSVpvn_flags(pInput, nBytes, flags);
/* call the tokenizer coderef */
PUSHMARK(SP);
XPUSHs(perl_string);

View file

@ -432,7 +432,7 @@ static int perl_vt_Filter( sqlite3_vtab_cursor *pVtabCursor,
dSP;
dMY_CXT;
int i, count;
int is_unicode = MY_CXT.last_dbh_is_unicode;
dbd_sqlite_string_mode_t string_mode = MY_CXT.last_dbh_string_mode;
ENTER;
SAVETMPS;
@ -443,7 +443,7 @@ static int perl_vt_Filter( sqlite3_vtab_cursor *pVtabCursor,
XPUSHs(sv_2mortal(newSViv(idxNum)));
XPUSHs(sv_2mortal(newSVpv(idxStr, 0)));
for(i = 0; i < argc; i++) {
XPUSHs(stacked_sv_from_sqlite3_value(aTHX_ argv[i], is_unicode));
XPUSHs(stacked_sv_from_sqlite3_value(aTHX_ argv[i], string_mode));
}
PUTBACK;
count = call_method("FILTER", G_VOID);
@ -588,7 +588,7 @@ static int perl_vt_Update( sqlite3_vtab *pVTab,
dSP;
dMY_CXT;
int count, i;
int is_unicode = MY_CXT.last_dbh_is_unicode;
dbd_sqlite_string_mode_t string_mode = MY_CXT.last_dbh_string_mode;
int rc = SQLITE_ERROR;
SV *rowidsv;
@ -599,7 +599,7 @@ static int perl_vt_Update( sqlite3_vtab *pVTab,
PUSHMARK(SP);
XPUSHs(((perl_vtab *) pVTab)->perl_vtab_obj);
for(i = 0; i < argc; i++) {
XPUSHs(stacked_sv_from_sqlite3_value(aTHX_ argv[i], is_unicode));
XPUSHs(stacked_sv_from_sqlite3_value(aTHX_ argv[i], string_mode));
}
PUTBACK;
count = call_method ("_SQLITE_UPDATE", G_SCALAR);
@ -704,8 +704,9 @@ static int perl_vt_FindFunction(sqlite3_vtab *pVTab,
/* return function information for sqlite3 within *pxFunc and *ppArg */
is_overloaded = coderef && SvTRUE(coderef);
if (is_overloaded) {
*pxFunc = MY_CXT.last_dbh_is_unicode ? sqlite_db_func_dispatcher_unicode
: sqlite_db_func_dispatcher_no_unicode;
*pxFunc = _FUNC_DISPATCHER[MY_CXT.last_dbh_string_mode];
*ppArg = coderef;
}

View file

@ -1622,32 +1622,71 @@ Your sweet spot probably lies somewhere in between.
Returns the version of the SQLite library which B<DBD::SQLite> is using,
e.g., "3.26.0". Can only be read.
=item sqlite_unicode
=item sqlite_string_mode
If set to a true value, B<DBD::SQLite> will turn the UTF-8 flag on for all
text strings coming out of the database (this feature is currently disabled
for perl < 5.8.5). For more details on the UTF-8 flag see
L<perlunicode>. The default is for the UTF-8 flag to be turned off.
SQLite strings are simple arrays of bytes, but Perl strings can store any
arbitrary Unicode code point. Thus, DBD::SQLite has to adopt some method
of translating between those two models. This parameter defines that
translation.
Also note that due to some bizarreness in SQLite's type system (see
Accepted values are the following constants:
=over
=item * DBD_SQLITE_STRING_MODE_BYTES: All strings are assumed to
represent bytes. A Perl string that contains any code point above 255
will trigger an exception. This is appropriate for Latin-1 strings,
binary data, pre-encoded UTF-8 strings, etc.
=item * DBD_SQLITE_STRING_MODE_UNICODE_FALLBACK: All Perl strings are encoded
to UTF-8 before being given to SQLite. Perl will B<try> to decode SQLite
strings as UTF-8 when giving them to Perl. Should any such string not be
valid UTF-8, a warning is thrown, and the string is left undecoded.
This is appropriate for strings that are decoded to characters via,
e.g., L<Encode/decode>.
Also note that, due to some bizarreness in SQLite's type system (see
L<https://www.sqlite.org/datatype3.html>), if you want to retain
blob-style behavior for B<some> columns under C<< $dbh->{sqlite_unicode} = 1
>> (say, to store images in the database), you have to state so
blob-style behavior for B<some> columns under DBD_SQLITE_STRING_MODE_UNICODE
(say, to store images in the database), you have to state so
explicitly using the 3-argument form of L<DBI/bind_param> when doing
updates:
use DBI qw(:sql_types);
$dbh->{sqlite_unicode} = 1;
$dbh->{string_mode} = DBD_SQLITE_STRING_MODE_UNICODE;
my $sth = $dbh->prepare("INSERT INTO mytable (blobcolumn) VALUES (?)");
# Binary_data will be stored as is.
$sth->bind_param(1, $binary_data, SQL_BLOB);
Defining the column type as C<BLOB> in the DDL is B<not> sufficient.
This attribute was originally named as C<unicode>, and renamed to
C<sqlite_unicode> for integrity since version 1.26_06. Old C<unicode>
attribute is still accessible but will be deprecated in the near future.
=item * DBD_SQLITE_STRING_MODE_UNICODE_STRICT: Like
DBD_SQLITE_STRING_MODE_UNICODE_FALLBACK but usually throws an exception
rather than a warning if SQLite sends invalid UTF-8. (In Perl callbacks
from SQLite we still warn instead.)
=item * DBD_SQLITE_STRING_MODULE_UNICODE_NAIVE: Like
DBD_SQLITE_STRING_MODE_UNICODE_FALLBACK but uses a "naïve" UTF-8 decoding
method that forgoes validation. This is marginally faster than a validated
decode, but it can also B<corrupt> B<Perl> B<itself!>
=item * DBD_SQLITE_STRING_MODE_PV (default, but B<DO> B<NOT> B<USE>): Like
DBD_SQLITE_STRING_MODE_BYTES, but when translating Perl strings to SQLite
the Perl string's internal byte buffer is given to SQLite. B<This> B<is>
B<bad>, but it's been the default for many years, and changing that would
break existing applications.
=back
=item C<sqlite_unicode> or C<unicode> (deprecated)
If truthy, equivalent to setting C<sqlite_string_mode> to
DBD_SQLITE_STRING_MODE_UNICODE_NAIVE; if falsy, equivalent to
DBD_SQLITE_STRING_MODE_PV.
Prefer C<sqlite_string_mode> in all new code.
=item sqlite_allow_multiple_statements

View file

@ -8,6 +8,11 @@ use warnings;
use base 'Exporter';
use DBD::SQLite;
our @EXPORT_OK = (
'DBD_SQLITE_STRING_MODE_PV',
'DBD_SQLITE_STRING_MODE_BYTES',
'DBD_SQLITE_STRING_MODE_UNICODE_NAIVE',
'DBD_SQLITE_STRING_MODE_UNICODE_FALLBACK',
'DBD_SQLITE_STRING_MODE_UNICODE_STRICT',
# allowed_return_values_from_sqlite3_txn_state
qw/
SQLITE_TXN_NONE

View file

@ -7,6 +7,8 @@ use SQLiteTest qw/connect_ok @CALL_FUNCS/;
use Test::More;
use if -d ".git", "Test::FailWarnings";
my $unicode_opt = DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_UNICODE_STRICT;
my $show_diag = 0;
foreach my $call_func (@CALL_FUNCS) {
@ -30,9 +32,9 @@ foreach my $call_func (@CALL_FUNCS) {
skip( 'Unicode is not supported before 5.8.5', 2 );
}
my $file = 'foo'.$$;
my $dbh = DBI->connect( "dbi:SQLite:dbname=$file;sqlite_unicode=1", '', '' );
my $dbh = DBI->connect( "dbi:SQLite:dbname=$file;sqlite_string_mode=$unicode_opt", '', '' );
isa_ok( $dbh, 'DBI::db' );
is( $dbh->{sqlite_unicode}, 1, 'Unicode is on' );
is( $dbh->{sqlite_string_mode}, $unicode_opt, 'Unicode is on' );
$dbh->disconnect;
unlink $file;
}

View file

@ -8,6 +8,8 @@ use SQLiteTest;
use Test::More;
use if -d ".git", "Test::FailWarnings";
my $unicode_opt = DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_UNICODE_STRICT;
BEGIN { requires_unicode_support() }
#
@ -51,7 +53,7 @@ ok(
my ($textback, $bytesback);
SCOPE: {
my $dbh = connect_ok( dbfile => 'foo', RaiseError => 1 );
is( $dbh->{sqlite_unicode}, 0, 'Unicode is off' );
is( $dbh->{sqlite_string_mode}, DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_PV, 'default string mode is pv' );
ok(
$dbh->do("CREATE TABLE table1 (a TEXT, b BLOB)"),
'CREATE TABLE',
@ -73,8 +75,8 @@ SCOPE: {
# Start over but now activate Unicode support.
SCOPE: {
my $dbh = connect_ok( dbfile => 'foo', sqlite_unicode => 1 );
is( $dbh->{sqlite_unicode}, 1, 'Unicode is on' );
my $dbh = connect_ok( dbfile => 'foo', sqlite_string_mode => $unicode_opt );
is( $dbh->{sqlite_string_mode}, $unicode_opt, 'Unicode is on' );
($textback, $bytesback) = database_roundtrip($dbh, $utfstring, $bytestring);

View file

@ -8,6 +8,8 @@ use if -d ".git", "Test::FailWarnings";
use Encode qw/decode/;
use DBD::SQLite;
my $unicode_opt = DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_UNICODE_STRICT;
BEGIN { requires_unicode_support(); }
BEGIN {
@ -71,10 +73,10 @@ is($DBD::SQLite::COLLATION{foo}, \&by_num_desc, "overridden collation");
foreach my $call_func (@CALL_FUNCS) {
for my $use_unicode (0, 1) {
for my $unicode_opt ( DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_BYTES, DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_UNICODE_STRICT) {
# connect
my $dbh = connect_ok( RaiseError => 1, sqlite_unicode => $use_unicode );
my $dbh = connect_ok( RaiseError => 1, sqlite_string_mode => $unicode_opt );
# populate test data
my @words = qw{
@ -84,7 +86,7 @@ foreach my $call_func (@CALL_FUNCS) {
HAT hâôer
féôu fêôe fèöe ferme
};
if ($use_unicode) {
if ($unicode_opt != DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_BYTES) {
utf8::upgrade($_) foreach @words;
}

View file

@ -10,6 +10,8 @@ use if -d ".git", "Test::FailWarnings";
use File::Temp ();
use File::Spec::Functions ':ALL';
my $unicode_opt = DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_UNICODE_STRICT;
BEGIN { requires_unicode_support() }
my $dir = File::Temp::tempdir( CLEANUP => 1 );
@ -55,7 +57,7 @@ foreach my $subdir ( 'longascii', 'adatb
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbfile", undef, undef, {
RaiseError => 1,
PrintError => 0,
sqlite_unicode => 1,
sqlite_string_mode => $unicode_opt,
} );
isa_ok( $dbh, 'DBI::db' );
};
@ -67,7 +69,7 @@ foreach my $subdir ( 'longascii', 'adatb
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbfile", undef, undef, {
RaiseError => 1,
PrintError => 0,
sqlite_unicode => 1,
sqlite_string_mode => $unicode_opt,
} );
isa_ok( $dbh, 'DBI::db' );
};

View file

@ -31,10 +31,10 @@ use DBD::SQLite;
foreach my $call_func (@CALL_FUNCS) {
for my $use_unicode (0, 1) {
for my $string_mode (DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_BYTES, DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_UNICODE_STRICT) {
# connect
my $dbh = connect_ok( RaiseError => 1, sqlite_unicode => $use_unicode );
my $dbh = connect_ok( RaiseError => 1, sqlite_string_mode => $string_mode );
# The following tests are about ordering, so don't reverse!
if ($dbh->selectrow_array('PRAGMA reverse_unordered_selects')) {
@ -43,7 +43,7 @@ foreach my $call_func (@CALL_FUNCS) {
# populate test data
my @vals = @words;
if ($use_unicode) {
if ($string_mode == DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_BYTES) {
utf8::upgrade($_) foreach @vals;
}

View file

@ -59,10 +59,10 @@ sub Unicode_Word_tokenizer { # see also: Search::Tokenizer
use DBD::SQLite;
for my $use_unicode (0, 1) {
for my $string_mode (DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_BYTES, DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_UNICODE_STRICT) {
# connect
my $dbh = connect_ok( RaiseError => 1, sqlite_unicode => $use_unicode );
my $dbh = connect_ok( RaiseError => 1, sqlite_string_mode => $string_mode );
for my $fts (qw/fts3 fts4/) {
next if $fts eq 'fts4' && !has_sqlite('3.7.4');
@ -94,7 +94,7 @@ for my $use_unicode (0, 1) {
my ($query, @expected) = @$t;
@expected = map {$doc_ids[$_]} @expected;
my $results = $dbh->selectcol_arrayref($sql, undef, $query);
is_deeply($results, \@expected, "$query ($fts, unicode=$use_unicode)");
is_deeply($results, \@expected, "$query ($fts, string_mode=$string_mode)");
}
}

View file

@ -5,11 +5,13 @@ use strict;
use warnings;
use lib "t/lib";
use SQLiteTest;
use DBD::SQLite::Constants;
use Test::More;
use if -d ".git", "Test::FailWarnings";
{
my $dbh = connect_ok( dbfile => 'foo', RaiseError => 1 );
$dbh->{sqlite_string_mode} = DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_BYTES;
my $tbl_name = "\xe9p\xe9e";
utf8::encode $tbl_name;

View file

@ -5,10 +5,12 @@ use SQLiteTest;
use Test::More;
use if -d ".git", "Test::FailWarnings";
my $unicode_opt = DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_UNICODE_STRICT;
BEGIN { requires_unicode_support(); }
my $dbh = connect_ok( sqlite_unicode => 1 );
is( $dbh->{sqlite_unicode}, 1, 'Unicode is on' );
my $dbh = connect_ok( sqlite_string_mode => $unicode_opt );
is( $dbh->{sqlite_string_mode}, $unicode_opt, 'string mode is unicode/strict' );
ok( $dbh->do(<<'END_SQL'), 'CREATE TABLE' );
CREATE TABLE foo (

View file

@ -5,10 +5,12 @@ use SQLiteTest;
use Test::More;
use if -d ".git", "Test::FailWarnings";
my $unicode_opt = DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_UNICODE_STRICT;
BEGIN { requires_unicode_support() }
foreach my $call_func (@CALL_FUNCS) {
my $dbh = connect_ok( sqlite_unicode => 1 );
my $dbh = connect_ok( sqlite_string_mode => $unicode_opt );
ok($dbh->$call_func( "perl_uc", 1, \&perl_uc, "create_function" ));
ok( $dbh->do(<<'END_SQL'), 'CREATE TABLE' );

View file

@ -6,9 +6,11 @@ use Test::More;
use if -d ".git", "Test::FailWarnings";
use DBI qw/:sql_types/;
use DBD::SQLite::Constants;
BEGIN{ requires_unicode_support(); }
my $dbh = connect_ok(sqlite_unicode => 1);
my $dbh = connect_ok(sqlite_string_mode => DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_UNICODE_STRICT);
$dbh->do('create table test1 (id integer, b blob)');
my $blob = "\x{82}\x{A0}";

View file

@ -6,6 +6,8 @@ use Test::More;
use if -d ".git", "Test::FailWarnings";
use Encode;
use DBD::SQLite::Constants;
BEGIN { requires_unicode_support() }
unicode_test("\x{263A}"); # (decoded) smiley character
@ -20,7 +22,7 @@ sub unicode_test {
{ # tests for an environment where everything is encoded
my $dbh = connect_ok(sqlite_unicode => 0);
my $dbh = connect_ok(sqlite_string_mode => DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_BYTES);
$dbh->do("pragma foreign_keys = on");
my $unicode_quoted = $dbh->quote_identifier($unicode_encoded);
$dbh->do("create table $unicode_quoted (id, $unicode_quoted primary key)");
@ -87,7 +89,7 @@ sub unicode_test {
}
{ # tests for an environment where everything is decoded
my $dbh = connect_ok(sqlite_unicode => 1);
my $dbh = connect_ok(sqlite_string_mode => DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_UNICODE_STRICT);
$dbh->do("pragma foreign_keys = on");
my $unicode_quoted = $dbh->quote_identifier($unicode);
$dbh->do("create table $unicode_quoted (id, $unicode_quoted primary key)");

View file

@ -11,8 +11,8 @@ use if -d ".git", "Test::FailWarnings";
BEGIN { requires_unicode_support() }
my $dbh = connect_ok( sqlite_unicode => 1 );
is( $dbh->{sqlite_unicode}, 1, 'Unicode is on' );
my $dbh = connect_ok( sqlite_string_mode => DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_UNICODE_NAIVE );
is( $dbh->{sqlite_string_mode}, DBD::SQLite::Constants::DBD_SQLITE_STRING_MODE_UNICODE_NAIVE, 'Unicode is on' );
ok( $dbh->do(<<'END_SQL'), 'CREATE TABLE' );
CREATE TABLE foo (

View file

@ -17,6 +17,14 @@ my %shorter_tags = (
compile_time_library_version_numbers => 'version',
);
my @dbd_sqlite_constants = (
'DBD_SQLITE_STRING_MODE_PV',
'DBD_SQLITE_STRING_MODE_BYTES',
'DBD_SQLITE_STRING_MODE_UNICODE_NAIVE',
'DBD_SQLITE_STRING_MODE_UNICODE_FALLBACK',
'DBD_SQLITE_STRING_MODE_UNICODE_STRICT',
);
my %constants = extract_constants();
write_inc(%constants);
write_pm(%constants);
@ -34,8 +42,13 @@ MODULE = DBD::SQLite PACKAGE = DBD::SQLite::Constants
PROTOTYPES: ENABLE
BOOT:
END
for my $constsub (@dbd_sqlite_constants) {
print {$fh} qq< newCONSTSUB( gv_stashpv("DBD::SQLite::Constants", FALSE), "$constsub", newSVuv($constsub) );\n>
}
for my $tag (sort grep !/^_/, keys %constants) {
_write_tag($fh, $tag, $constants{$tag});
}
@ -144,6 +157,10 @@ use DBD::SQLite;
our \@EXPORT_OK = (
END
for my $const (@dbd_sqlite_constants) {
print {$fh} " '$const',\n";
}
for my $tag (sort keys %constants) {
print $fh <<"END";
# $tag