1
0
Fork 0
mirror of https://github.com/DBD-SQLite/DBD-SQLite synced 2025-06-07 22:28:47 -04:00

Cookbook patch by Yuriy Kaminskiy

This commit is contained in:
Kenichi Ishigaki 2011-09-06 15:21:20 +00:00
parent dd05ef54fd
commit d30f3ab459

View file

@ -42,7 +42,7 @@ adapted from an example implementation in pysqlite.
my $sigma = 0; my $sigma = 0;
foreach my $v ( @$self ) { foreach my $v ( @$self ) {
$sigma += ($x - $mu)**2; $sigma += ($v - $mu)**2;
} }
$sigma = $sigma / ($n - 1); $sigma = $sigma / ($n - 1);
@ -66,41 +66,38 @@ expense of precision:
package variance2; package variance2;
my $sum = 0; sub new { bless {sum => 0, count=>0, hash=> {} }, shift; }
my $count = 0;
my %hash;
sub new { bless [], shift; }
sub step { sub step {
my ( $self, $value ) = @_; my ( $self, $value ) = @_;
my $hash = $self->{hash};
# by truncating and hashing, we can comsume many more data points # by truncating and hashing, we can comsume many more data points
$value = int($value); # change depending on need for precision $value = int($value); # change depending on need for precision
# use sprintf for arbitrary fp precision # use sprintf for arbitrary fp precision
if (defined $hash{$value}) { if (exists $hash->{$value}) {
$hash{$value}++; $hash->{$value}++;
} else { } else {
$hash{$value} = 1; $hash->{$value} = 1;
} }
$sum += $value; $self->{sum} += $value;
$count++; $self->{count}++;
} }
sub finalize { sub finalize {
my $self = $_[0]; my $self = $_[0];
# Variance is NULL unless there is more than one row # Variance is NULL unless there is more than one row
return undef unless $count > 1; return undef unless $self->{count} > 1;
# calculate avg # calculate avg
my $mu = $sum / $count; my $mu = $self->{sum} / $self->{count};
my $sigma = 0; my $sigma = 0;
foreach my $h (keys %hash) { while (my ($h, $v) = each %{$self->{hash}}) {
$sigma += (($h - $mu)**2) * $hash{$h}; $sigma += (($h - $mu)**2) * $v;
} }
$sigma = $sigma / ($count - 1); $sigma = $sigma / ($self->{count} - 1);
return $sigma; return $sigma;
} }
@ -115,25 +112,21 @@ The function can then be used as:
A third variable implementation, designed for arbitrarily large data sets: A third variable implementation, designed for arbitrarily large data sets:
package variance; package variance3;
my $mu = 0; sub new { bless {mu=>0, count=>0, S=>0}, shift; }
my $count = 0;
my $S = 0
sub new { bless [], shift; }
sub step { sub step {
my ( $self, $value ) = @_; my ( $self, $value ) = @_;
$count++; $self->{count}++;
$delta = $value - $mu; my $delta = $value - $self->{mu};
$mu = $mu + $delta/$count $self->{mu} += $delta/$self->{count};
$S = $S + $delta*($value - $mu); $self->{S} += $delta*($value - $self->{mu});
} }
sub finalize { sub finalize {
my $self = $_[0]; my $self = $_[0];
return $S / ($count - 1); return $self->{S} / ($self->{count} - 1);
} }
The function can then be used as: The function can then be used as: