Fix all type errors

2025-06-07 14:19:10 -04:00 · 2020-08-30 07:59:53 +02:00 · 2020-08-30 07:59:53 +02:00 · 1ad93cacc0
commit 1ad93cacc0
parent 8c0209e4f4
2 changed files with 89 additions and 87 deletions
--- a/dbdimp_tokenizer.inc
+++ b/dbdimp_tokenizer.inc
@ -16,23 +16,14 @@ typedef struct perl_fts3_tokenizer_cursor {
    int         currentChar;     /* char position corresponding to currentByte */
 } perl_fts3_tokenizer_cursor;

-typedef struct perl_fts5_tokenizer {
-    fts5_tokenizer base;
+typedef struct perl_Fts5Tokenizer {
+    Fts5Tokenizer base;
    SV *coderef;                 /* the perl tokenizer is a coderef that takes
-                                    a string and returns a cursor coderef */
-} perl_fts5_tokenizer;
-
-typedef struct perl_fts5_tokenizer_cursor {
-    // fts5_tokenizer_cursor base;
-    SV *coderef;                 /* ref to the closure that returns terms */
-    char *pToken;                /* storage for a copy of the last token */
-    int nTokenAllocated;         /* space allocated to pToken buffer */
-
-    /* members below are only used if the input string is in utf8 */
-    const char *pInput;          /* input we are tokenizing */
-    const char *lastByteOffset;  /* offset into pInput */
-    int lastCharOffset;          /* char offset corresponding to lastByteOffset */
-} perl_fts5_tokenizer_cursor;
+                                 ** a string and and some parameters and
+                                 ** in turn calls the xToken() function
+                                 ** passed to it
+                                 */
+} perl_Fts5Tokenizer;

 /*
 ** Create a new tokenizer instance.
@ -340,20 +331,19 @@ int sqlite_db_register_fts3_perl_tokenizer(pTHX_ SV *dbh)
 ** where qualified::function::name is a fully qualified perl function
 */
 static int perl_fts5_tokenizer_Create(
-    int argc, const char * const *argv,
-    sqlite3_tokenizer **ppTokenizer
+    void* pCtx, const char **azArg, int nArg, Fts5Tokenizer **ppOut
 ){
    dTHX;
    dSP;
    int n_retval;
    SV *retval;
-    perl_fts3_tokenizer *t;
+    perl_Fts5Tokenizer *t;

-    if (!argc) {
+    if (!nArg) {
        return SQLITE_ERROR;
    }

-    t = (perl_fts3_tokenizer *) sqlite3_malloc(sizeof(*t));
+    t = (perl_Fts5Tokenizer *) sqlite3_malloc(sizeof(*t));
    if( t==NULL ) return SQLITE_NOMEM;
    memset(t, 0, sizeof(*t));

@ -363,7 +353,7 @@ static int perl_fts5_tokenizer_Create(
    /* call the qualified::function::name */
    PUSHMARK(SP);
    PUTBACK;
-    n_retval = call_pv(argv[0], G_SCALAR);
+    n_retval = call_pv(azArg[0], G_SCALAR);
    SPAGAIN;

    /* store a copy of the returned coderef into the tokenizer structure */
@ -372,7 +362,7 @@ static int perl_fts5_tokenizer_Create(
    }
    retval = POPs;
    t->coderef   = newSVsv(retval);
-    *ppTokenizer = &t->base;
+    *ppOut = &t->base;

    PUTBACK;
    FREETMPS;
@ -384,27 +374,33 @@ static int perl_fts5_tokenizer_Create(
 /*
 ** Destroy a tokenizer
 */
-static int perl_fts5_tokenizer_Delete(Fts5Tokenizer *pTokenizer){
+static void perl_fts5_tokenizer_Delete(Fts5Tokenizer *pTokenizer){
    dTHX;
-    perl_fts5_tokenizer *t = (perl_fts5_tokenizer *) pTokenizer;
+    perl_Fts5Tokenizer *t = (perl_Fts5Tokenizer *) pTokenizer;
    sv_free(t->coderef);
    sqlite3_free(t);
-    return SQLITE_OK;
+    return;
 }

 /*
-** Extract the next token from a tokenization cursor.  The cursor must
-** have been opened by a prior call to perl_fts3_tokenizer_Open().
+** This does a tokenizing run over the string. Found tokens (and synonyms)
+** are stored by calling xToken()
 */
 static int perl_fts5_tokenizer_Tokenize(
-    sqlite3_tokenizer_cursor *pCursor,  /* Cursor returned by perl_fts3_tokenizer_Open */
-    const char **ppToken,               /* OUT: *ppToken is the token text */
-    int *pnBytes,                       /* OUT: Number of bytes in token */
-    int *piStartOffset,                 /* OUT: Starting offset of token */
-    int *piEndOffset,                   /* OUT: Ending offset of token */
-    int *piPosition                     /* OUT: Position integer of token */
+      Fts5Tokenizer* tokenizer,
+      void *pCtx,
+      int flags,            /* Mask of FTS5_TOKENIZE_* flags */
+      const char *pText, int nText,
+      int (*xToken)(
+        void *pCtx,         /* Copy of 2nd argument to xTokenize() */
+        int tflags,         /* Mask of FTS5_TOKEN_* flags */
+        const char *pToken, /* Pointer to buffer containing token */
+        int nToken,         /* Size of token in bytes */
+        int iStart,         /* Byte offset of token within input text */
+        int iEnd            /* Byte offset of end of token within input text */
+      )
 ){
-    perl_fts3_tokenizer_cursor *c = (perl_fts3_tokenizer_cursor *) pCursor;
+    perl_Fts5Tokenizer *c = (perl_Fts5Tokenizer *) tokenizer;
    int result;
    int n_retval;
    char *token;
@ -418,61 +414,67 @@ static int perl_fts5_tokenizer_Tokenize(
    ENTER;
    SAVETMPS;

-    /* call the cursor */
+    /* call the Perl tokenizer, and pass it our token callback */
    PUSHMARK(SP);
    PUTBACK;
+
+    // XXX Wrap the "found token" callback, and pass it to the user
+    //     Then, restructure the data if it is UTF-8
+    //     First, do all of this in Perl so it is easier to debug
+
+    ///* if we get back an empty list, there is no more token */
+    //if (n_retval == 0) {
+    //    result = SQLITE_DONE;
+    //}
+    ///* otherwise, get token details from the return list */
+    //else {
+    //     if (n_retval != 5) {
+    //        warn("tokenizer cursor returned %d arguments", n_retval);
+    //    }
+    //    *piPosition    = POPi;
+    //    *piEndOffset   = POPi;
+    //    *piStartOffset = POPi;
+    //    *pnBytes       = POPi;
+    //    token          = POPpx;
+    //
+    //    if (c->pInput) { /* if working with utf8 data */
+    //
+    //        /* recompute *pnBytes in bytes, not in chars */
+    //        *pnBytes = strlen(token);
+    //
+    //        /* recompute start/end offsets in bytes, not in chars */
+    //        hop            = *piStartOffset - c->lastCharOffset;
+    //        byteOffset     = (char*)utf8_hop((U8*)c->lastByteOffset, hop);
+    //        hop            = *piEndOffset - *piStartOffset;
+    //        *piStartOffset = byteOffset - c->pInput;
+    //        byteOffset     = (char*)utf8_hop((U8*)byteOffset, hop);
+    //        *piEndOffset   = byteOffset - c->pInput;
+    //
+    //        /* remember where we are for next round */
+    //        c->lastCharOffset = *piEndOffset,
+    //        c->lastByteOffset = byteOffset;
+    //    }
+    //
+    //    /* make sure we have enough storage for copying the token */
+    //    if (*pnBytes > c->nTokenAllocated ){
+    //        char *pNew;
+    //        c->nTokenAllocated = *pnBytes + 20;
+    //        pNew = sqlite3_realloc(c->pToken, c->nTokenAllocated);
+    //        if( !pNew ) return SQLITE_NOMEM;
+    //        c->pToken = pNew;
+    //    }
+    //
+    //    /* need to copy the token into the C cursor before perl frees that
+    //       memory */
+    //    memcpy(c->pToken, token, *pnBytes);
+    //    *ppToken  = c->pToken;
+    //
+    //    result = SQLITE_OK;
+    //
+
    n_retval = call_sv(c->coderef, G_ARRAY);
    SPAGAIN;

-    /* if we get back an empty list, there is no more token */
-    if (n_retval == 0) {
-        result = SQLITE_DONE;
-    }
-    /* otherwise, get token details from the return list */
-    else {
-        if (n_retval != 5) {
-            warn("tokenizer cursor returned %d arguments", n_retval);
-        }
-        *piPosition    = POPi;
-        *piEndOffset   = POPi;
-        *piStartOffset = POPi;
-        *pnBytes       = POPi;
-        token          = POPpx;
-
-        if (c->pInput) { /* if working with utf8 data */
-
-            /* recompute *pnBytes in bytes, not in chars */
-            *pnBytes = strlen(token);
-
-            /* recompute start/end offsets in bytes, not in chars */
-            hop            = *piStartOffset - c->lastCharOffset;
-            byteOffset     = (char*)utf8_hop((U8*)c->lastByteOffset, hop);
-            hop            = *piEndOffset - *piStartOffset;
-            *piStartOffset = byteOffset - c->pInput;
-            byteOffset     = (char*)utf8_hop((U8*)byteOffset, hop);
-            *piEndOffset   = byteOffset - c->pInput;
-
-            /* remember where we are for next round */
-            c->lastCharOffset = *piEndOffset,
-            c->lastByteOffset = byteOffset;
-        }
-
-        /* make sure we have enough storage for copying the token */
-        if (*pnBytes > c->nTokenAllocated ){
-            char *pNew;
-            c->nTokenAllocated = *pnBytes + 20;
-            pNew = sqlite3_realloc(c->pToken, c->nTokenAllocated);
-            if( !pNew ) return SQLITE_NOMEM;
-            c->pToken = pNew;
-        }
-
-        /* need to copy the token into the C cursor before perl frees that
-           memory */
-        memcpy(c->pToken, token, *pnBytes);
-        *ppToken  = c->pToken;
-
-        result = SQLITE_OK;
-    }

    PUTBACK;
    FREETMPS;
@ -485,7 +487,6 @@ static int perl_fts5_tokenizer_Tokenize(
 ** The set of routines that implement the perl FTS5 tokenizer
 */
 fts5_tokenizer perl_fts5_tokenizer_Module = {
-    0,
    perl_fts5_tokenizer_Create,
    perl_fts5_tokenizer_Delete,
    perl_fts5_tokenizer_Tokenize
@ -530,7 +531,7 @@ int sqlite_db_register_fts5_perl_tokenizer(pTHX_ SV *dbh)

    int rc;
    fts5_api *pFts5Api = sqlite_fetch_fts5_api(aTHX_ dbh);
-    sqlite3_tokenizer_module *p = &perl_fts5_tokenizer_Module;
+    fts5_tokenizer *p = &perl_fts5_tokenizer_Module;

    // pFts5Api->xCreateTokenizer(pFts5Api,...);

--- a/lib/DBD/SQLite.pm
+++ b/lib/DBD/SQLite.pm
@ -51,6 +51,7 @@ sub driver {
        DBD::SQLite::db->install_method('sqlite_enable_load_extension');
        DBD::SQLite::db->install_method('sqlite_load_extension');
        DBD::SQLite::db->install_method('sqlite_register_fts3_perl_tokenizer');
+        DBD::SQLite::db->install_method('sqlite_register_fts5_perl_tokenizer');
        DBD::SQLite::db->install_method('sqlite_trace', { O => 0x0004 });
        DBD::SQLite::db->install_method('sqlite_profile', { O => 0x0004 });
        DBD::SQLite::db->install_method('sqlite_table_column_metadata', { O => 0x0004 });