#!/usr/bin/perl
#
use strict;
use lib "$ENV{LJHOME}/cgi-bin";
use DBI::Role;
use DBI;
# need ljconfig to set up database connection
use LJ::Config;
LJ::Config->load;
$LJ::DBIRole = new DBI::Role {
'timeout' => sub {
my ($dsn, $user, $pass, $role) = @_;
return 0 if $role && $role eq "master";
return $LJ::DB_TIMEOUT;
},
'sources' => \%LJ::DBINFO,
'default_db' => "livejournal",
'time_check' => 60,
'time_report' => \&LJ::dbtime_callback,
};
package LJ::DB;
use Carp qw(croak);
#
# name: LJ::DB::time_range_to_ids
# des: Performs a binary search on a table's primary id key looking
# for time boundaries as specified. Returns the boundary ids
# that were found, effectively simulating a key on 'time' for
# the specified table.
# info: This function shouldn't normally be used, but there are
# rare instances where it's useful.
# args: opts
# des-opts: A hashref of keys. Keys are:
# 'table' - table name to query;
# 'roles' - arrayref of db roles to use, in order. Defaults to ['slow'];
# 'idcol' - name of 'id' primary key column.
# 'timecol' - name of unixtime column to use for constraint;
# 'starttime' - starting unixtime time of rows to match;
# 'endtime' - ending unixtime of rows to match.
# returns: startid, endid; id boundaries which should be used by
# the caller.
#
sub time_range_to_ids {
my %args = @_;
my $table = delete $args{table} or croak("no table arg");
my $idcol = delete $args{idcol} or croak("no idcol arg");
my $timecol = delete $args{timecol} or croak("no timecol arg");
my $starttime = delete $args{starttime} or croak("no starttime arg");
my $endtime = delete $args{endtime} or croak("no endtime arg");
my $roles = delete $args{roles};
unless (ref $roles eq 'ARRAY' && @$roles) {
$roles = [ 'slow' ];
}
croak("bogus args: " . join(",", keys %args))
if %args;
my $db = LJ::get_dbh(@$roles)
or die "unable to acquire db handle, roles=", join(",", @$roles);
my ($db_min_id, $db_max_id) = $db->selectrow_array
("SELECT MIN($idcol), MAX($idcol) FROM $table");
die $db->errstr if $db->err;
die "error finding min/max ids: $db_max_id < $db_min_id"
if $db_max_id < $db_min_id;
# final output
my ($startid, $endid);
my $ct_max = 100;
foreach my $curr_ref ([$starttime => \$startid], [$endtime => \$endid]) {
my ($want_time, $dest_ref) = @$curr_ref;
my ($min_id, $max_id) = ($db_min_id, $db_max_id);
my $curr_time = 0;
my $last_time = 0;
my $ct = 0;
while ($ct++ < $ct_max) {
die "unable to find row after $ct tries" if $ct > 100;
my $curr_id = $min_id + int(($max_id - $min_id) / 2)+0;
my $sql =
"SELECT $idcol, $timecol FROM $table " .
"WHERE $idcol>=$curr_id ORDER BY 1 LIMIT 1";
$last_time = $curr_time;
($curr_id, $curr_time) = $db->selectrow_array($sql);
die $db->errstr if $db->err;
# stop condition, two trigger cases:
# * we've found exactly the time we want
# * we're still narrowing but not finding rows in between, stop here with
# the current time being just short of what we were trying to find
if ($curr_time == $want_time || $curr_time == $last_time) {
# if we never modified the max id, then we
# have searched to the end without finding
# what we were looking for
if ($max_id == $db_max_id && $curr_time <= $want_time) {
$$dest_ref = $max_id;
# same for min id
} elsif ($min_id == $db_min_id && $curr_time >= $want_time) {
$$dest_ref = $min_id;
} else {
$$dest_ref = $curr_id;
}
last;
}
# need to traverse into the larger half
if ($curr_time < $want_time) {
$min_id = $curr_id;
next;
}
# need to traverse into the smaller half
if ($curr_time > $want_time) {
$max_id = $curr_id;
next;
}
}
}
return ($startid, $endid);
}
sub dbh_by_role {
return $LJ::DBIRole->get_dbh( @_ );
}
sub dbh_by_name {
my $name = shift;
my $dbh = dbh_by_role("master")
or die "Couldn't contact master to find name of '$name'\n";
my $fdsn = $dbh->selectrow_array("SELECT fdsn FROM dbinfo WHERE name=?", undef, $name);
die "No fdsn found for db name '$name'\n" unless $fdsn;
return $LJ::DBIRole->get_dbh_conn($fdsn);
}
sub dbh_by_fdsn {
my $fdsn = shift;
return $LJ::DBIRole->get_dbh_conn($fdsn);
}
sub root_dbh_by_name {
my $name = shift;
my $dbh = dbh_by_role("master")
or die "Couldn't contact master to find name of '$name'";
my $fdsn = $dbh->selectrow_array("SELECT rootfdsn FROM dbinfo WHERE name=?", undef, $name);
die "No rootfdsn found for db name '$name'\n" unless $fdsn;
return $LJ::DBIRole->get_dbh_conn($fdsn);
}
sub backup_in_progress {
my $name = shift;
my $dbh = dbh_by_role("master")
or die "Couldn't contact master to find name of '$name'";
# return 0 if this a/b is the active side, as wecan't ever have a backup of active side in progress
my ($cid, $is_a_or_b) = user_cluster_details($name);
if ($cid) {
my $active_ab = $LJ::CLUSTER_PAIR_ACTIVE{$cid} or
die "Neither 'a' nor 'b' is active for clusterid $cid?\n";
die "Bogus active side" unless $active_ab =~ /^[ab]$/;
# can't have a backup in progress for an active a/b side. short-circuit
# and don't even ask the database, as it might lie if the process
# was killed or something
return 0 if $active_ab eq $is_a_or_b;
}
my $fdsn = $dbh->selectrow_array("SELECT rootfdsn FROM dbinfo WHERE name=?", undef, $name);
die "No rootfdsn found for db name '$name'\n" unless $fdsn;
$fdsn =~ /\bhost=([\w\.\-]+)/ or die "Can't find host for database '$name'";
my $host = $1;
eval "use IO::Socket::INET; 1;" or die;
my $sock = IO::Socket::INET->new(PeerAddr => "$host:7602") or return 0;
print $sock "is_backup_in_progress\r\n";
my $answer = <$sock>;
chomp $answer;
return $answer eq "1";
}
sub user_cluster_details {
my $name = shift;
my $dbh = dbh_by_role("master") or die;
my $role = $dbh->selectrow_array("SELECT role FROM dbweights w, dbinfo i WHERE i.name=? AND i.dbid=w.dbid",
undef, $name);
return () unless $role && $role =~ /^cluster(\d+)([ab])$/;
return ($1, $2);
}
package LJ;
use Carp qw(croak);
# when calling a supported function (currently: LJ::load_user() or LJ::load_userid*), LJ::SMS::load_mapping()
# ignores in-process request cache, memcache, and selects directly
# from the global master
#
# called as: require_master(sub { block })
sub require_master {
my $callback = shift;
croak "invalid code ref passed to require_master"
unless ref $callback eq 'CODE';
# run code in the block with local var set
local $LJ::_PRAGMA_FORCE_MASTER = 1;
return $callback->();
}
sub no_cache {
my $sb = shift;
local $LJ::MemCache::GET_DISABLED = 1;
return $sb->();
}
sub cond_no_cache {
my ($cond, $sb) = @_;
return no_cache($sb) if $cond;
return $sb->();
}
sub no_ml_cache {
my $sb = shift;
local $LJ::NO_ML_CACHE = 1;
return $sb->();
}
#
# name: LJ::get_dbh
# class: db
# des: Given one or more roles, returns a database handle.
# info:
# args:
# des-:
# returns:
#
sub get_dbh {
my $opts = ref $_[0] eq "HASH" ? shift : {};
unless (exists $opts->{'max_repl_lag'}) {
# for slave or clusterslave roles, don't allow lag
if ($_[0] =~ /slave$/) {
$opts->{'max_repl_lag'} = $LJ::MAX_REPL_LAG || 100_000;
}
}
if ($LJ::DEBUG{'get_dbh'} && $_[0] ne "logs") {
my $errmsg = "get_dbh(@_) at \n";
my $i = 0;
while (my ($p, $f, $l) = caller($i++)) {
next if $i > 3;
$errmsg .= " $p, $f, $l\n";
}
warn $errmsg;
}
my $nodb = sub {
my $roles = shift;
my $err = LJ::errobj("Database::Unavailable",
roles => $roles);
return $err->cond_throw;
};
foreach my $role (@_) {
# let site admin turn off global master write access during
# maintenance
return $nodb->([@_]) if $LJ::DISABLE_MASTER && $role eq "master";
my $db = LJ::get_dbirole_dbh($opts, $role);
return $db if $db;
}
return $nodb->([@_]);
}
sub get_db_reader {
return LJ::get_dbh("master") if $LJ::_PRAGMA_FORCE_MASTER;
return LJ::get_dbh("slave", "master");
}
sub get_db_writer {
return LJ::get_dbh("master");
}
#
# name: LJ::get_cluster_reader
# class: db
# des: Returns a cluster slave for a user, or cluster master if no slaves exist.
# args: uarg
# des-uarg: Either a userid scalar or a user object.
# returns: DB handle. Or undef if all dbs are unavailable.
#
sub get_cluster_reader
{
my $arg = shift;
my $id = isu($arg) ? $arg->{'clusterid'} : $arg;
my @roles = ("cluster${id}slave", "cluster${id}");
if (my $ab = $LJ::CLUSTER_PAIR_ACTIVE{$id}) {
$ab = lc($ab);
# master-master cluster
@roles = ("cluster${id}${ab}") if $ab eq "a" || $ab eq "b";
}
return LJ::get_dbh(@roles);
}
#
# name: LJ::get_cluster_def_reader
# class: db
# des: Returns a definitive cluster reader for a given user, used
# when the caller wants the master handle, but will only
# use it to read.
# args: uarg
# des-uarg: Either a clusterid scalar or a user object.
# returns: DB handle. Or undef if definitive reader is unavailable.
#
sub get_cluster_def_reader
{
my @dbh_opts = scalar(@_) == 2 ? (shift @_) : ();
my $arg = shift;
my $id = isu($arg) ? $arg->{'clusterid'} : $arg;
return LJ::get_cluster_reader(@dbh_opts, $id) if
$LJ::DEF_READER_ACTUALLY_SLAVE{$id};
return LJ::get_dbh(@dbh_opts, LJ::master_role($id));
}
#
# name: LJ::get_cluster_master
# class: db
# des: Returns a cluster master for a given user, used when the caller
# might use it to do a write (insert/delete/update/etc...)
# args: uarg
# des-uarg: Either a clusterid scalar or a user object.
# returns: DB handle. Or undef if master is unavailable.
#
sub get_cluster_master
{
my @dbh_opts = scalar(@_) == 2 ? (shift @_) : ();
my $arg = shift;
my $id = isu($arg) ? $arg->{'clusterid'} : $arg;
return undef if $LJ::READONLY_CLUSTER{$id};
return LJ::get_dbh(@dbh_opts, LJ::master_role($id));
}
# returns the DBI::Role role name of a cluster master given a clusterid
sub master_role {
my $id = shift;
my $role = "cluster${id}";
if (my $ab = $LJ::CLUSTER_PAIR_ACTIVE{$id}) {
$ab = lc($ab);
# master-master cluster
$role = "cluster${id}${ab}" if $ab eq "a" || $ab eq "b";
}
return $role;
}
#
# name: LJ::get_dbirole_dbh
# class: db
# des: Internal function for get_dbh(). Uses the DBIRole to fetch a dbh, with
# hooks into db stats-generation if that's turned on.
# info:
# args: opts, role
# des-opts: A hashref of options.
# des-role: The database role.
# returns: A dbh.
#
sub get_dbirole_dbh {
my $dbh = $LJ::DBIRole->get_dbh( @_ ) or return undef;
if ( $LJ::DB_LOG_HOST && $LJ::HAVE_DBI_PROFILE ) {
$LJ::DB_REPORT_HANDLES{ $dbh->{Name} } = $dbh;
# :TODO: Explain magic number
$dbh->{Profile} ||= "2/DBI::Profile";
# And turn off useless (to us) on_destroy() reports, too.
undef $DBI::Profile::ON_DESTROY_DUMP;
}
return $dbh;
}
#
# name: LJ::get_lock
# des: get a MySQL lock on a given key/dbrole combination.
# returns: undef if called improperly, true on success, die() on failure
# args: db, dbrole, lockname, wait_time?
# des-dbrole: the role this lock should be gotten on, either 'global' or 'user'.
# des-lockname: the name to be used for this lock.
# des-wait_time: an optional timeout argument, defaults to 10 seconds.
#
sub get_lock
{
my ($db, $dbrole, $lockname, $wait_time) = @_;
return undef unless $db && $lockname;
return undef unless $dbrole eq 'global' || $dbrole eq 'user';
my $curr_sub = (caller 1)[3]; # caller of current sub
# die if somebody already has a lock
die "LOCK ERROR: $curr_sub; can't get lock from: $LJ::LOCK_OUT{$dbrole}\n"
if exists $LJ::LOCK_OUT{$dbrole};
# get a lock from mysql
$wait_time ||= 10;
$db->do("SELECT GET_LOCK(?,?)", undef, $lockname, $wait_time)
or return undef;
# successfully got a lock
$LJ::LOCK_OUT{$dbrole} = $curr_sub;
return 1;
}
#
# name: LJ::may_lock
# des: see if we could get a MySQL lock on
# a given key/dbrole combination, but don't actually get it.
# returns: undef if called improperly, true on success, die() on failure
# args: db, dbrole
# des-dbrole: the role this lock should be gotten on, either 'global' or 'user'.
#
sub may_lock
{
my ($db, $dbrole) = @_;
return undef unless $db && ($dbrole eq 'global' || $dbrole eq 'user');
# die if somebody already has a lock
if ($LJ::LOCK_OUT{$dbrole}) {
my $curr_sub = (caller 1)[3]; # caller of current sub
die "LOCK ERROR: $curr_sub; can't get lock from $LJ::LOCK_OUT{$dbrole}\n";
}
# see if a lock is already out
return undef if exists $LJ::LOCK_OUT{$dbrole};
return 1;
}
#
# name: LJ::release_lock
# des: release a MySQL lock on a given key/dbrole combination.
# returns: undef if called improperly, true on success, die() on failure
# args: db, dbrole, lockname
# des-dbrole: role on which to get this lock, either 'global' or 'user'.
# des-lockname: the name to be used for this lock
#
sub release_lock
{
my ($db, $dbrole, $lockname) = @_;
return undef unless $db && $lockname;
return undef unless $dbrole eq 'global' || $dbrole eq 'user';
# get a lock from mysql
$db->do("SELECT RELEASE_LOCK(?)", undef, $lockname);
delete $LJ::LOCK_OUT{$dbrole};
return 1;
}
#
# name: LJ::disconnect_dbs
# des: Clear cached DB handles
#
sub disconnect_dbs {
# clear cached handles
$LJ::DBIRole->disconnect_all( { except => [qw(logs)] });
}
#
# name: LJ::use_diff_db
# class:
# des: given two DB roles, returns true only if it is certain the two roles are
# served by different database servers.
# info: This is useful for, say, the moveusercluster script: You would not want
# to select something from one DB, copy it into another, and then delete it from the
# source if they were both the same machine.
# args:
# des-:
# returns:
#
sub use_diff_db {
$LJ::DBIRole->use_diff_db(@_);
}
# to be called as &nodb; (so this function sees caller's @_)
sub nodb {
shift @_ if
ref $_[0] eq "LJ::DBSet" || ref $_[0] eq "DBI::db" ||
ref $_[0] eq "Apache::DBI::db";
}
sub dbtime_callback {
my ($dsn, $dbtime, $time) = @_;
my $diff = abs($dbtime - $time);
if ($diff > 2) {
$dsn =~ /host=([^:\;\|]*)/;
my $db = $1;
print STDERR "Clock skew of $diff seconds between web($LJ::SERVER_NAME) and db($db)\n";
}
}
sub foreach_cluster {
my $coderef = shift;
my $opts = shift || {};
foreach my $cluster_id (@LJ::CLUSTERS) {
my $dbr = ($LJ::IS_DEV_SERVER) ?
LJ::get_cluster_reader($cluster_id) : LJ::DBUtil->get_inactive_db($cluster_id, $opts->{verbose});
$coderef->($cluster_id, $dbr);
}
}
sub isdb { return ref $_[0] && (ref $_[0] eq "DBI::db" ||
ref $_[0] eq "Apache::DBI::db"); }
sub bindstr { return join(', ', map { '?' } @_); }
package LJ::Error::Database::Unavailable;
sub fields { qw(roles) } # arrayref of roles requested
sub as_string {
my $self = shift;
my $ct = @{$self->field('roles')};
my $clist = join(", ", @{$self->field('roles')});
return $ct == 1 ?
"Database unavailable for role $clist" :
"Database unavailable for roles $clist";
}
package LJ::Error::Database::Failure;
sub fields { qw(db) }
sub user_caused { 0 }
sub as_string {
my $self = shift;
my $code = $self->err;
my $txt = $self->errstr;
return "Database error code $code: $txt";
}
sub err {
my $self = shift;
return $self->field('db')->err;
}
sub errstr {
my $self = shift;
return $self->field('db')->errstr;
}
1;