mirror of
https://github.com/DBD-SQLite/DBD-SQLite
synced 2025-06-07 14:19:10 -04:00
Cookbook patch by Yuriy Kaminskiy
This commit is contained in:
parent
dd05ef54fd
commit
d30f3ab459
1 changed files with 20 additions and 27 deletions
|
@ -42,7 +42,7 @@ adapted from an example implementation in pysqlite.
|
|||
|
||||
my $sigma = 0;
|
||||
foreach my $v ( @$self ) {
|
||||
$sigma += ($x - $mu)**2;
|
||||
$sigma += ($v - $mu)**2;
|
||||
}
|
||||
$sigma = $sigma / ($n - 1);
|
||||
|
||||
|
@ -66,41 +66,38 @@ expense of precision:
|
|||
|
||||
package variance2;
|
||||
|
||||
my $sum = 0;
|
||||
my $count = 0;
|
||||
my %hash;
|
||||
|
||||
sub new { bless [], shift; }
|
||||
sub new { bless {sum => 0, count=>0, hash=> {} }, shift; }
|
||||
|
||||
sub step {
|
||||
my ( $self, $value ) = @_;
|
||||
my $hash = $self->{hash};
|
||||
|
||||
# by truncating and hashing, we can comsume many more data points
|
||||
$value = int($value); # change depending on need for precision
|
||||
# use sprintf for arbitrary fp precision
|
||||
if (defined $hash{$value}) {
|
||||
$hash{$value}++;
|
||||
if (exists $hash->{$value}) {
|
||||
$hash->{$value}++;
|
||||
} else {
|
||||
$hash{$value} = 1;
|
||||
$hash->{$value} = 1;
|
||||
}
|
||||
$sum += $value;
|
||||
$count++;
|
||||
$self->{sum} += $value;
|
||||
$self->{count}++;
|
||||
}
|
||||
|
||||
sub finalize {
|
||||
my $self = $_[0];
|
||||
|
||||
# Variance is NULL unless there is more than one row
|
||||
return undef unless $count > 1;
|
||||
return undef unless $self->{count} > 1;
|
||||
|
||||
# calculate avg
|
||||
my $mu = $sum / $count;
|
||||
my $mu = $self->{sum} / $self->{count};
|
||||
|
||||
my $sigma = 0;
|
||||
foreach my $h (keys %hash) {
|
||||
$sigma += (($h - $mu)**2) * $hash{$h};
|
||||
while (my ($h, $v) = each %{$self->{hash}}) {
|
||||
$sigma += (($h - $mu)**2) * $v;
|
||||
}
|
||||
$sigma = $sigma / ($count - 1);
|
||||
$sigma = $sigma / ($self->{count} - 1);
|
||||
|
||||
return $sigma;
|
||||
}
|
||||
|
@ -115,25 +112,21 @@ The function can then be used as:
|
|||
|
||||
A third variable implementation, designed for arbitrarily large data sets:
|
||||
|
||||
package variance;
|
||||
package variance3;
|
||||
|
||||
my $mu = 0;
|
||||
my $count = 0;
|
||||
my $S = 0
|
||||
|
||||
sub new { bless [], shift; }
|
||||
sub new { bless {mu=>0, count=>0, S=>0}, shift; }
|
||||
|
||||
sub step {
|
||||
my ( $self, $value ) = @_;
|
||||
$count++;
|
||||
$delta = $value - $mu;
|
||||
$mu = $mu + $delta/$count
|
||||
$S = $S + $delta*($value - $mu);
|
||||
$self->{count}++;
|
||||
my $delta = $value - $self->{mu};
|
||||
$self->{mu} += $delta/$self->{count};
|
||||
$self->{S} += $delta*($value - $self->{mu});
|
||||
}
|
||||
|
||||
sub finalize {
|
||||
my $self = $_[0];
|
||||
return $S / ($count - 1);
|
||||
return $self->{S} / ($self->{count} - 1);
|
||||
}
|
||||
|
||||
The function can then be used as:
|
||||
|
|
Loading…
Add table
Reference in a new issue