From f68f79415f083d62398497885eb7ec955a884255 Mon Sep 17 00:00:00 2001 From: dami Date: Tue, 11 May 2021 09:01:59 +0200 Subject: [PATCH] test to show that perl tokenizer with unicode is extremely slow --- t/43_fts3.t | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/t/43_fts3.t b/t/43_fts3.t index 7082e6c..f5fb0b0 100644 --- a/t/43_fts3.t +++ b/t/43_fts3.t @@ -2,11 +2,19 @@ use strict; use warnings; no if $] >= 5.022, "warnings", "locale"; use lib "t/lib"; + +# TMP for running tests from Emacs +use lib "lib"; +use lib "../blib/lib"; +use lib "../blib/arch"; + +use Time::HiRes qw/time/; use SQLiteTest; use Test::More; use if -d ".git", "Test::FailWarnings"; use DBD::SQLite; + my @texts = ("il était une bergère", "qui gardait ses moutons", "elle fit un fromage", @@ -111,12 +119,27 @@ for my $use_unicode (0, 1) { my $expected_offsets = "0 0 $offset_une 3"; is_deeply($result, [$expected_offsets], "offsets ($fts, unicode=$use_unicode)"); - # test a longer sentence - $dbh->do("INSERT INTO try_$fts(content) VALUES(?)", {}, join(" ", @texts)); + # test snippet() on a longer sentence + $insert_sth->execute(join " ", @texts); $result = $dbh->selectcol_arrayref($sql_snip, undef, '"bergère qui gardait"'); like($result->[0], qr[une bergère qui gardait ses], "longer snippet ($fts, unicode=$use_unicode)"); + + # simulated large document + open my $fh, "<", $INC{'DBD/SQLite.pm'} or die $!; + my $source_code = do {local $/; <$fh>}; + my $long_doc = $source_code x 1; + + my $t0 = time; + $insert_sth->execute($long_doc); + my $t1 = time; + $result = $dbh->selectcol_arrayref($sql_snip, undef, '"package DBD::SQLite"'); + my $t2 = time; + + note sprintf("long doc (%d chars): insert in %.4f secs, select in %.4f secs", + length($long_doc), $t1-$t0, $t2-$t1); + like($result->[0], qr[^package DBD::SQLite;], 'snippet "package SQLite"'); } }