#!/usr/bin/perl package LJ::Schools; use strict; # # name: LJ::Schools::get_attended # class: schools # des: Gets a list of schools a user has attended. # args: uobj # des-uobj: User id or object of user to get schools attended. # returns: Hashref; schoolid as key, value is hashref containing basic information # about the record: year_start, year_end. Also: keys from get_school/get_school_multi. # sub get_attended { my $u = LJ::want_user(shift); return undef unless $u; # now load what schools they've been to from memcache my $res = LJ::MemCache::get([ $u->{userid}, "saui:$u->{userid}" ]); # if that failed, hit db unless ($res) { my $dbcr = LJ::get_cluster_def_reader($u); return undef unless $dbcr; my $rows = $dbcr->selectall_arrayref(qq{ SELECT schoolid, year_start, year_end FROM user_schools WHERE userid = ? }, undef, $u->{userid}); return undef if $dbcr->err || ! $rows; $res = {}; foreach my $row (@$rows) { $res->{$row->[0]} = { year_start => $row->[1], year_end => $row->[2], }; } LJ::MemCache::add([ $u->{userid}, "saui:$u->{userid}" ], $res); } # now populate with school information my @sids = keys %$res; my $schools = LJ::Schools::load_schools(@sids); foreach my $sid (@sids) { next unless $res->{$sid} && $schools->{$sid}; $schools->{$sid}->{year_start} = $res->{$sid}->{year_start}; $schools->{$sid}->{year_end} = $res->{$sid}->{year_end}; } return $schools; } # # name: LJ::Schools::load_schools # class: schools # des: Returns detailed information about schools. # args: schoolids # des-schoolids: List of school ids to return. # returns: Hashref; key being schoolid, value being a hashref with keys name, url, # citycode, countrycode, statecode. # sub load_schools { my @ids = grep { defined $_ && $_ > 0 } @_; return {} unless @ids; # check from memcache my $res; my %need = map { $_ => 1 } @ids; my @keys = map { [ $_, "sasi:$_" ] } @ids; my $mres = LJ::MemCache::get_multi(@keys); foreach my $key (keys %{$mres || {}}) { if ($key =~ /^sasi:(\d+)$/) { delete $need{$1}; $res->{$1} = $mres->{$key}; } } return $res unless %need; # now fallback to database my $in = join(',', keys %need); my $dbh = LJ::get_db_writer(); # writer to get data for memcache return undef unless $dbh; my $rows = $dbh->selectall_arrayref(qq{ SELECT schoolid, name, country, state, city, url FROM schools WHERE schoolid IN ($in) }); return undef if $dbh->err || ! $rows; foreach my $row (@$rows) { $res->{$row->[0]} = { name => $row->[1], country => $row->[2], state => $row->[3], city => $row->[4], url => $row->[5], }; LJ::MemCache::set([ $row->[0], "sasi:$row->[0]" ], $res->{$row->[0]}); } return $res; } # # name: LJ::Schools::get_attendees # class: schools # des: Gets a list of users that attended a school. # args: schoolid, year? # des-schoolid: School id to get attendees for. # des-year: Optional; if provided, returns people that attended in this year. # returns: List of userids that attended. # sub get_attendees { my $sid = shift() + 0; my $year = shift() + 0; return undef unless $sid; # see if it's in memcache first my $mkey = $year ? "saaly:$sid:$year" : "saal:$sid"; my $list = LJ::MemCache::get([ $sid, $mkey ]); return @$list if $list; # hit database for info my $dbr = LJ::get_db_reader(); return undef unless $dbr; # query changes based on what we're doing my $ids; if ($year) { # this works even if they're null! (the condition just returns null which evaluates # to false which means don't return the row) $ids = $dbr->selectcol_arrayref(qq{ SELECT userid FROM schools_attended WHERE schoolid = ? AND ? BETWEEN year_start AND year_end LIMIT 1000 }, undef, $sid, $year); } else { $ids = $dbr->selectcol_arrayref('SELECT userid FROM schools_attended WHERE schoolid = ? LIMIT 1000', undef, $sid); } return undef if $dbr->err || ! $ids; # set and return LJ::MemCache::set([ $sid, $mkey ], $ids, 300); return @$ids; } # # name: LJ::Schools::get_countries # class: schools # des: Get a list of countries that we have schools in. # returns: Hashref; countrycode as key, hashref of countryname, countrycode, # and number of schools as values. # sub get_countries { # see if we can get it from memcache my $data = LJ::MemCache::get('saccs'); return $data if $data; # if not, pull from db my $dbh = LJ::get_db_writer(); return undef unless $dbh; my $rows = $dbh->selectall_arrayref('SELECT DISTINCT country, COUNT(*) FROM schools GROUP BY country'); return undef if $dbh->err || ! $rows; # now we want to dig out the country codes my %countries; LJ::load_codes({ country => \%countries }); # and now combine them my $res = {}; foreach my $row (@$rows) { $res->{$row->[0]} = { 'code' => $row->[0], 'count' => $row->[1], 'name' => $countries{$row->[0]} || $row->[0], }; } # set to memcache and return LJ::MemCache::set('saccs', $res, 300); return $res; } # # name: LJ::Schools::get_states # class: schools # des: Gets information about what states have been populated with schools. States # and provinces are considered the same thing. # args: countrycode # des-countrycode: The country code provided from [func[LJ::Schools::get_countries]]. # returns: Hashref; statecode as key, statename as the values. # sub get_states { my $ctc = shift; return undef unless $ctc; # see if we can get it from memcache my $data = LJ::MemCache::get("sascs:$ctc"); return $data if $data; # if not, pull from db my $dbh = LJ::get_db_writer(); return undef unless $dbh; my $rows = $dbh->selectcol_arrayref('SELECT DISTINCT state FROM schools WHERE country = ?', undef, $ctc); return undef if $dbh->err || ! $rows; # now we want to dig out the states, if this is the US my %states; if ($ctc eq 'US') { LJ::load_codes({ state => \%states }); } # and now combine them my $res = {}; foreach my $cc (@$rows) { $res->{$cc} = $states{$cc} || $cc; } # set to memcache and return LJ::MemCache::set("sascs:$ctc", $res, 300); return $res; } # # name: LJ::Schools::get_cities # class: schools # des: Gets information about what cities have been populated with schools. # args: countrycode, statecode # des-countrycode: The country code provided from [func[LJ::Schools::get_countries]]. # des-statecode: The state code provided from[func[LJ::Schools::get_states]]. # returns: Hashref; citycode as key, cityname as the values. # sub get_cities { my ($ctc, $sc) = @_; return undef unless $ctc && defined $sc; # FIXME: memcache # just dredge it up from the database (READER) my $dbr = LJ::get_db_reader(); return undef unless $dbr; my $rows; if ($sc) { $rows = $dbr->selectcol_arrayref ('SELECT DISTINCT city FROM schools WHERE country = ? AND state = ?', undef, $ctc, $sc); } else { $rows = $dbr->selectcol_arrayref ('SELECT DISTINCT city FROM schools WHERE country = ? AND state IS NULL', undef, $ctc); } return undef if $dbr->err || ! $rows; # and now combine them my $res = {}; foreach my $cc (@$rows) { $res->{$cc} = $cc; } return $res; } # # name: LJ::Schools::get_schools # class: schools # des: Gets schools defined in a given area. # args: countrycode, statecode, citycode # des-countrycode: The country code provided from [func[LJ::Schools::get_countries]]. # des-statecode: The state code provided from[func[LJ::Schools::get_states]]. # des-citycode: The city code provided from [func[LJ::Schools::get_cities]]. # returns: Hashref; schoolid as key, hashref of schools row as value with # keys: name, city, state, country, url. # sub get_schools { my ($ctc, $sc, $cc) = @_; return undef unless $ctc && defined $sc && defined $cc; # just dredge it up from the database (READER) my $dbr = LJ::get_db_reader(); return undef unless $dbr; # might get some nulls my @args = grep { defined $_ && $_ } ($ctc, $sc, $cc); my $scs = $sc ? "state = ?" : "state IS NULL"; my $ccs = $cc ? "city = ?" : "city IS NULL"; # do the query my $rows = $dbr->selectall_arrayref ("SELECT schoolid, name FROM schools WHERE country = ? AND $scs AND $ccs", undef, @args); return undef if $dbr->err || ! $rows; # and now combine them my $res = {}; foreach my $row (@$rows) { $res->{$row->[0]} = $row->[1]; } return $res; } # # name: LJ::Schools::expand_codes # class: schools # des: Expands country, state, and city codes into actual names. # args: countrycode, statecode?, citycode? # des-countrycode: Code of the country. # des-statecode: Code of the state/province. # des-citycode: Code of the city. # returns: Array of country, state, city. # sub expand_codes { my ($ctc, $sc, $cc, $sid) = @_; return undef unless $ctc; my (%countries, %states); if ($ctc eq 'US') { LJ::load_codes({ country => \%countries, state => \%states }); } else { LJ::load_codes({ country => \%countries }); } # countries are pretty easy, from the list my ($ct, $s, $c, $sn); $ct = $countries{$ctc}; # state codes translate to US states, or are themselves if (defined $sc) { $s = $states{$sc} || $sc; } # for now, city codes = city names if (defined $cc) { $c = $cc; } # simple db query (FIXME: memcache) if (defined $sid && $sid > 0) { my $dbr = LJ::get_db_reader(); my $name = $dbr->selectrow_array('SELECT name FROM schools WHERE schoolid = ?', undef, $sid); $sn = $name; } # la la la return return ($ct, $s, $c, $sn); } # # name: LJ::Schools::determine_location_opts # class: schools # des: Internal; used to perform the logic to determine the location codes to use for # a record based on the inputs. # args: opts # des-opts: Hashref; should contain some combination of city, country, state, citycode, # countrycode, statecode. The codes trump the non-code arguments. # returns: list of: countrycode, statecode, citycode. empty list on error. # sub determine_location_opts { my $opts = shift; return undef unless $opts && ref $opts; my ($ctc, $sc, $cc); # get country code first $ctc = $opts->{countrycode}; unless ($ctc) { my %countries; LJ::load_codes({ country => \%countries }); if (exists $countries{$opts->{country}}) { # valid code, use it $ctc = $opts->{country}; } else { # must be a name, back-convert it %countries = reverse %countries; $ctc = $countries{$opts->{country}}; } } return () unless $ctc; # now get the state code $sc = $opts->{statecode}; unless ($sc) { if ($ctc eq 'US') { my %states; LJ::load_codes({ state => \%states }); if (exists $states{$opts->{state}}) { # valid code, use it $sc = $opts->{state}; } else { # must be a name, back-convert it %states = reverse %states; $sc = $states{$opts->{state}}; } } else { $sc = $opts->{state}; } } # and finally the city $cc = $opts->{citycode} || $opts->{city}; # and the list return ($ctc, $sc, $cc); } # # name: LJ::Schools::add_pending_school # class: schools # des: Adds a school from a user to the pending list of schools. # args: uobj, options # des-uobj: User id or object of user that's adding the row. # des-options: Hashref; Key=>value pairs that can include: name, city, state, country, # citycode, statecode, countrycode, url. # returns: 1 on success, undef on error. # sub add_pending_school { my ($u, $opts) = @_; $u = LJ::want_user($u); return undef unless $u && $opts && ref $opts eq 'HASH'; # verify we have location data my ($ctc, $sc, $cc) = LJ::Schools::determine_location_opts($opts); return undef unless $ctc && defined $sc && defined $cc; # verify we have minimum data (name) return undef unless $opts->{name}; # now undef things that need to be null if blank $sc ||= undef; $cc ||= undef; $opts->{url} ||= undef; # get db and insert my $dbh = LJ::get_db_writer(); $dbh->do("INSERT INTO schools_pending (userid, name, country, state, city, url) VALUES (?, ?, ?, ?, ?, ?)", undef, $u->{userid}, $opts->{name}, $ctc, $sc, $cc, $opts->{url}); return undef if $dbh->err; return 1; } # # name: LJ::Schools::set_attended # class: schools # des: Lists a school as being attended by a user or updates an existing edge. # args: uobj, schoolid, options? # des-uobj: User id or object of user doing the attending. # des-schoolid: School id of school being attended. # des-options: Hashref; Key=>value pairs year_start and year_end, if desired. # returns: 1 on success, undef on error. # sub set_attended { my ($u, $sid, $opts) = @_; $u = LJ::want_user($u); $sid = $sid + 0; $opts ||= {}; return undef unless $u && $sid && $opts; # now, make sure the school is valid my $school = LJ::Schools::load_schools( $sid ); return undef unless $school->{$sid}; # check they aren't adding too many schools my $attended = LJ::no_cache(sub { return LJ::Schools::get_attended($u) }); return undef if !defined $LJ::SCHOOLSMAX->{$u->{journaltype}} || scalar keys %$attended >= $LJ::SCHOOLSMAX->{$u->{journaltype}}; # validate our information my $ys = ($opts->{year_start} + 0) || undef; my $ye = ($opts->{year_end} + 0) || undef; # enforce convention that year end must be undef if year start is # undef; if it's not, it can be either $ye = undef unless $ys; # and now ensure they're in the right order ($ys, $ye) = ($ye, $ys) if defined $ys && defined $ye && $ye < $ys; # now do the insert, if that fails, do an update my $dbcm = LJ::get_cluster_master($u) or return undef; my $dbh = LJ::get_db_writer() or return undef; # see if we're adding a new row or updating my $ct = $dbh->do("INSERT IGNORE INTO schools_attended (schoolid, userid, year_start, year_end) VALUES (?, ?, ?, ?)", undef, $sid, $u->{userid}, $ys, $ye); return undef if $dbh->err; # delete a user's school attended info LJ::MemCache::delete([ $u->{userid}, "saui:$u->{userid}" ]); # now, if we have a count, do the cluster insert and call it good if ($ct > 0) { $dbcm->do("INSERT INTO user_schools (userid, schoolid, year_start, year_end) VALUES (?, ?, ?, ?)", undef, $u->{userid}, $sid, $ys, $ye); # if error there, attempt to roll back global change if ($dbcm->err) { $dbh->do("DELETE FROM schools_attended WHERE schoolid = ? AND userid = ?", undef, $sid, $u->{userid}); return undef; } # must have been successful! return 1; } # okay, so we're doing an update $dbh->do("UPDATE schools_attended SET year_start = ?, year_end = ? WHERE schoolid = ? AND userid = ?", undef, $ys, $ye, $sid, $u->{userid}); return undef if $dbh->err; $dbcm->do("UPDATE user_schools SET year_start = ?, year_end = ? WHERE userid = ? AND schoolid = ?", undef, $ys, $ye, $u->{userid}, $sid); return undef if $dbcm->err; return 1; } # # name: LJ::Schools::delete_attended # class: schools # des: Removes an attended edge from a user/school. # args: uobj, schoolid # des-uobj: User id or object of user doing the attending. # des-schoolid: School id of school being un-attended. # returns: 1 on success, undef on error. # sub delete_attended { my ($u, $sid) = @_; $u = LJ::want_user($u); $sid = $sid + 0; return undef unless $u && $sid; # get the dbs we need my $dbcm = LJ::get_cluster_master($u) or return undef; my $dbh = LJ::get_db_writer() or return undef; # now delete the data $dbh->do("DELETE FROM schools_attended WHERE schoolid = ? AND userid = ?", undef, $sid, $u->{userid}); return undef if $dbh->err; $dbcm->do("DELETE FROM user_schools WHERE userid = ? AND schoolid = ?", undef, $u->{userid}, $sid); return undef if $dbcm->err; # now clear the user's memcache... note that we do not delete the school's # memcache rows, because then we'd have to load more information to get what # years this user attended, and it doesn't help us much. we want the school # attendance lists to be loaded as little as possible. LJ::MemCache::delete([ $u->{userid}, "saui:$u->{userid}" ]); return 1; } # # name: LJ::Schools::approve_pending # class: schools # des: Takes a bunch of pending rows and approves them as a new target school. # args: pendids, options # des-pendids: Arrayref of pendids from the schools_pending table. # des-options: Hashref; Key=>value pairs that define the target school's information. Keys # are one of: name, city, state, country, citycode, statecode, countrycode, url. # returns: Allocated school id on success, Undef on error. # sub approve_pending { my ($pendids, $opts) = @_; return undef unless $pendids && ref $pendids eq 'ARRAY' && @$pendids && $opts && ref $opts eq 'HASH'; # now verify our pendids are valid @$pendids = grep { $_ } map { $_+0 } @$pendids; return undef unless @$pendids; # verify we have location data my ($ctc, $sc, $cc) = LJ::Schools::determine_location_opts($opts); return undef unless $ctc && defined $sc && defined $cc; # and verify other options return undef unless $opts->{name}; # get database handle my $dbh = LJ::get_db_writer(); return undef unless $dbh; # load these pending rows my $in = join(',', @$pendids); my $rows = $dbh->selectall_hashref(qq{ SELECT pendid, userid, name, country, state, city, url FROM schools_pending WHERE pendid IN ($in) }, 'pendid') || {}; return undef if $dbh->err; # setup to add the new school $sc ||= undef; $cc ||= undef; $opts->{url} ||= undef; # actually add the school my $sid = LJ::alloc_global_counter('O'); return undef unless $sid; $dbh->do("INSERT INTO schools (schoolid, name, country, state, city, url) VALUES (?, ?, ?, ?, ?, ?)", undef, $sid, $opts->{name}, $ctc, $sc, $cc, $opts->{url}); return undef if $dbh->err; # now insert the user attendance lists my %userids; foreach my $row (values %$rows) { next if $userids{$row->{userid}}++; LJ::Schools::set_attended($row->{userid}, $sid); } # and delete their pending rows, but ignore errors $dbh->do("DELETE FROM schools_pending WHERE pendid IN ($in)"); # and we're done return $sid; } # # name: LJ::Schools::get_pending # class: schools # des: Returns the next "potentially good" set of records to be processed. # args: uobj, country?, state?, city? # des-uobj: User id or object of user doing the admin work. # des-country: Optional. Country school is in. # des-state: Optional. State school is in, or nothing for undefined state. # des-city: Optional. City school is in. # returns: Hashref; keys being 'primary' with a value of a school hashref, # and 'secondary', 'tertiary' with values being a hashref of # { pendid => { ..school.. } }, where the school hashref contains # name, citycode, statecode, countrycode, url, userid. Undef on error. # sub get_pending { my ($u, $ctc, $sc, $cc) = @_; $u = LJ::want_user($u); return undef unless $u; return undef if (defined $sc || defined $cc) && !defined $ctc; # might get some nulls my @geoargs = grep { defined $_ } ($ctc, $sc, $cc); my $ccs = defined $ctc ? "country = ?" : "1"; my $scs = defined $sc ? "state = ?" : "1"; my $ics = defined $cc ? "city = ?" : "1"; # need db my $dbh = LJ::get_db_writer(); return undef unless $dbh; # step 1: select some rows, so we have a sample to choose from my $rows = $dbh->selectall_hashref(qq{ SELECT pendid, userid, name, country, state, city, url FROM schools_pending WHERE $ccs AND $scs AND $ics LIMIT 200 }, 'pendid', undef, @geoargs); return undef if $dbh->err; # step 2: now, we want to find one that isn't being dealt with; I think we will # not run into too many "at the same time" issues, so we're doing the memcache # queries one at a time instead of implementing the multi logic my $pend; # We want to select a random row out of the number returned, so if # they hit "give me a different one" it actually will since the rows # are returned from the db in the same order every time. my @ids = keys %$rows; my $nrows = scalar @ids; my $school; my $tries = 0; # so we won't loop forever while (1) { return undef if $tries == $nrows; my $rand = int(rand($nrows)); my $pendid = $ids[$rand]; my $userid = LJ::MemCache::get([ $pendid, "sapiu:$pendid" ]); if ($userid && $userid ne $u->{userid}) { $tries++; next; } # nobody's touching it, so mark it for us for 10 minutes $pend = $pendid; $school = $rows->{$pend}; last; } # step 3: find anything relating to this pending record, by name first my $sim_name = $dbh->selectall_hashref(qq{ SELECT pendid, userid, name, country, state, city, url FROM schools_pending WHERE name = ? AND country = ? AND pendid <> ? }, 'pendid', undef, $school->{name}, $school->{country}, $pend) || {}; return undef if $dbh->err; # step 4: now find anything in this location as 'possible' matches my @args = grep { $_ } ( $school->{state}, $school->{city} ); my $state = $school->{state} ? "= ?" : "IS NULL"; my $city = $school->{city} ? "= ?" : "IS NULL"; my $in = join(',', $pend, map { $_+0 } keys %$sim_name); my $sim_loc = $dbh->selectall_hashref(qq{ SELECT pendid, userid, name, country, state, city, url FROM schools_pending WHERE country = ? AND state $state AND city $city AND pendid NOT IN ($in) LIMIT 75 }, 'pendid', undef, $school->{country}, @args) || {}; return undef if $dbh->err; # step 5: note all of these as being 'used' my %set; foreach my $id ($pend, keys %$sim_name, keys %$sim_loc) { next if $set{$id}++; LJ::MemCache::set([ $id, "sapiu:$id" ], $u->{userid}, 600); } # step 6: break things down into secondary and tertiary matches my ($second, $third) = ({}, {}); foreach my $value (values %$sim_loc, values %$sim_name) { if (defined $value->{state} && defined $school->{state} && $value->{state} eq $school->{state} && defined $value->{city} && defined $school->{city} && $value->{city} eq $school->{city}) { # state+city present & matches, this is a good match $second->{$value->{pendid}} = $value; } else { # tertiary match $third->{$value->{pendid}} = $value; } } # step 6: return the results return { primary => $school, secondary => $second, tertiary => $third, }; } # # name: LJ::Schools::canonical_city_name # class: schools # des: Canonicalizes a cities name to a standard format. # args: city # des-city: Name of the city the school is located in. # returns: Canonicalized name of the city. # sub canonical_city_name { my $city = shift; # condense spaces and trim as our first act $city =~ s/^\s+//; $city =~ s/\s+$//; $city =~ s/\s+/ /g; # hash of do not capitalize these words my %nocaps = map { $_ => 1 } qw( de du la le of the and at for ); # canonicalize it to lowercase with each word capitalized $city = lc $city; $city = join(' ', map { $nocaps{$_} ? $_ : ucfirst(lc($_)) } split(/\s+/, $city)); # fix up "O'neill" to "O'Neill" $city =~ s/(O'\w)/uc $1/eg; # fix up "Mccarthy" to "McCarthy" $city =~ s/Mc(\w)/"Mc" . uc $1/eg; # fix up "H.c." into "H.C." $city =~ s/\b((?:\w\.)+ )/uc $1/eg; # fix up "A&m" to "A&M", effectively $city =~ s/\b(\w&\w)\b/uc $1/eg; # fix up "Foo-bar" into "Foo-Bar" $city =~ s/\b(\w)(\w+)-(\w)(\w+)\b/uc($1) . $2 . "-" . uc($3) . $4/eg; # fix "foo & bar" to "foo and bar" $city =~ s/ & / and /g; # now fix "A and M" ... mostly because "A & M" is expanded to such above $city =~ s/ A and M / A&M /; # now ensure the FIRST LETTER is capitalized # fixes case where city names "la Porte" aren't $city = ucfirst($city); return $city; } # # name: LJ::Schools::canonical_school_name # class: schools # des: Canonicalizes a school name to a standard format. # args: name, city # des-name: Name of the school to canonicalize. # des-city: Name of the city the school is located in. # returns: Canonicalized name of the school. # sub canonical_school_name { my ($name, $city) = @_; # condense spaces and trim as our first act $name =~ s/^\s+//; $name =~ s/\s+$//; $name =~ s/\s+/ /g; # remove initial The $name =~ s/^The //i; # hash of do not capitalize these words my %nocaps = map { $_ => 1 } qw( de du la le of the and at for ); # canonicalize it to lowercase with each word capitalized $name = lc $name; $name = join(' ', map { $nocaps{$_} ? $_ : ucfirst(lc($_)) } split(/\s+/, $name)); # fix up "O'neill" to "O'Neill" $name =~ s/(O'\w)/uc $1/eg; #' # fix up "Mccarthy" to "McCarthy" $name =~ s/Mc(\w)/"Mc" . uc $1/eg; # fix up "H.c." into "H.C." $name =~ s/\b((?:\w\.)+ )/uc $1/eg; # fix up "A&m" to "A&M", effectively $name =~ s/\b(\w&\w)\b/uc $1/eg; # fix up "Foo-bar" into "Foo-Bar" $name =~ s/\b(\w)(\w+)-(\w)(\w+)\b/uc($1) . $2 . "-" . uc($3) . $4/eg; # fix up Ft. $name =~ s/^Ft\.? /Fort /; # convert Saint to St. at BEGINNING of name $name =~ s/^Saint /St. /; $name =~ s/^St /St. /; # fix "foo & bar" to "foo and bar" $name =~ s/ & / and /g; # now fix "A and M" ... mostly because "A & M" is expanded to such above $name =~ s/ A and M / A&M /; # fix the fact that people cannot spell $name =~ s/ Elementry / Elementary /; $name =~ s/ Elemantary / Elementary /; $name =~ s/ Elementery / Elementary /; $name =~ s/ Prepatory / Preparatory /; $name =~ s/ Preperatory / Preparatory /; $name =~ s/ Prep / Preparatory /; $name =~ s/ Collage / College /; # fix things that are just "Foo High" $name =~ s/ Elementary$/ Elementary School/; $name =~ s/ Middle$/ Middle School/; $name =~ s/ High$/ High School/; # kill anybody putting ", State" or similar after the name? $name =~ s/\s*,\s*$//; # now ensure the FIRST LETTER is capitalized # fixes case where city names "la Porte" aren't $name = ucfirst($name); return $name; } # # name: LJ::Schools::edit_school # class: schools # des: Edits the information for a school. # args: sid, options # des-sid: School id to edit. # des-options: Hashref; Key=>value pairs that can include: name, city, state, country, # citycode, statecode, countrycode, url. # returns: 1 on success, undef on error. # sub edit_school { my ($sid, $opts) = @_; $sid += 0; return undef unless $sid && $opts && ref $opts eq 'HASH'; # verify we have location data my ($ctc, $sc, $cc) = LJ::Schools::determine_location_opts($opts); return undef unless $ctc && defined $sc && $cc; # verify we have minimum data (name) return undef unless $opts->{name}; # now undef things that need to be null if blank $sc ||= undef; $cc ||= undef; $opts->{url} ||= undef; # get db and update my $dbh = LJ::get_db_writer(); $dbh->do("UPDATE schools SET name = ?, city = ?, state = ?, country = ?, url = ? WHERE schoolid = ?", undef, $opts->{name}, $cc, $sc, $ctc, $opts->{url}, $sid); return undef if $dbh->err; # fix memcache LJ::MemCache::delete([ $sid, "sasi:$sid" ]); return 1; } # # name: LJ::Schools::delete_school # class: schools # des: Deletes an approved school # args: sid # des-sid: School id to delete # returns: 1 on success, undef on error. # sub delete_school { my $sid = shift; $sid += 0; return undef unless $sid; my $dbh = LJ::get_db_writer() or return undef; # Get everyone who attends this school and delete # that relationship. my @attendees = LJ::no_cache(sub { return LJ::Schools::get_attendees($sid) }); if (@attendees) { # Load users my $users = LJ::load_userids(@attendees); # Build them up by cluster and do memcache deletes my %clusters; foreach my $u (values %$users) { push @{$clusters{$u->{clusterid}}}, $u->{userid}; LJ::MemCache::delete([ $u->{userid}, "saui:$u->{userid}" ]); } # Do edge deletes on each cluster foreach my $c (keys %clusters) { # get_attendees is global, might contain info about # an expunged user. that's already gone, though, so we don't # need to worry here. next unless $c; my $dbcm = LJ::get_cluster_master($c); return undef unless $dbcm; my $in = join("','", @{$clusters{$c}}); $dbcm->do("DELETE FROM user_schools WHERE userid IN ('$in') AND schoolid = ?", undef, $sid); return undef if $dbcm->err; } # Delete attendence information for the school # # Doing this second as we could rebuild the previously deleted # information if we were pressed to do so, while this would # be more difficult to rebuild. $dbh->do("DELETE FROM schools_attended WHERE schoolid = ?", undef, $sid); return undef if $dbh->err; } # Delete the actual school $dbh->do("DELETE FROM schools WHERE schoolid = ?", undef, $sid); return undef if $dbh->err; LJ::MemCache::delete([ $sid, "sasi:$sid" ]); return 1; } # # name: LJ::Schools::reject_pending # class: schools # des: Deletes pending schools. # args: pendids # des-pendids: Arrayref of pendids to delete # returns: 1 on success, undef on error. # sub reject_pending { my ($pendids) = @_; return undef unless $pendids && ref $pendids eq 'ARRAY' && @$pendids; # now verify our pendids are valid @$pendids = grep { $_ } map { $_+0 } @$pendids; return undef unless @$pendids; # get database handle my $dbh = LJ::get_db_writer(); return undef unless $dbh; my $in = join(',', @$pendids); # and delete their pending rows, but ignore errors $dbh->do("DELETE FROM schools_pending WHERE pendid IN ($in)"); # and we're done return 1; } # # name: LJ::Schools::rename_state # class: schools # des: Renames a state within a country. # args: countrycode, fromstatecode, tostatecode # des-countrycode: The country the state to rename is in. # des-fromstatecode: Origin statecode. # des-tostatecode: Destination statecode. # returns: 1 on success, undef on error. # sub rename_state { my ($ctc, $from_sc, $to_sc) = @_; return undef unless $ctc && $to_sc; return undef unless $from_sc ne $to_sc; # get db my $dbh = LJ::get_db_writer(); return undef unless $dbh; my @args = grep { defined $_ && $_ } ($ctc, $from_sc, $to_sc); my $scs = $from_sc ? "state = ?" : "state IS NULL"; # rename the state, with an update ignore (merge dupes!) $dbh->do("UPDATE IGNORE schools SET state = ? WHERE country = ? AND $scs", undef, $to_sc, $ctc, $from_sc); return undef if $dbh->err; # now, find anything left, to merge it down... ahh, SQL. 'a' is the "FROM" # record, 'b' is the TO record, and we're merging schools FROM a TO b... get it? my $rows = $dbh->selectall_arrayref(qq{ SELECT a.schoolid, b.schoolid FROM schools a, schools b WHERE a.country = ? AND b.country = a.country AND a.$scs AND b.state = ? AND a.city = b.city AND a.name = b.name }, undef, @args); # now let's merge these down if ($rows && @$rows) { # merge a -> b, which is merge_schools(b, a) LJ::Schools::merge_schools($_->[1], $_->[0]) foreach @$rows; } # all done return 1; } # # name: LJ::Schools::rename_city # class: schools # des: Renames a city within a country and state. # args: countrycode, statecode, fromcitycode, tocitycode # des-countrycode: The country the city to rename is in. # des-statecode: The state the city to rename is in. # des-fromcitycode: Origin citycode. # des-tocitycode: Destination citycode. # returns: 1 on success, undef on error. # sub rename_city { my ($ctc, $sc, $from_cc, $to_cc) = @_; return undef unless $ctc && $sc && $from_cc && $to_cc; return undef unless $from_cc ne $to_cc; # get db my $dbh = LJ::get_db_writer(); return undef unless $dbh; # rename the state, with an update ignore (merge dupes!) $dbh->do("UPDATE IGNORE schools SET city = ? WHERE country = ? AND state = ? AND city = ?", undef, $to_cc, $ctc, $sc, $from_cc); return undef if $dbh->err; # now, find anything left, to merge it down... ahh, SQL. 'a' is the "FROM" # record, 'b' is the TO record, and we're merging schools FROM a TO b... get it? my $rows = $dbh->selectall_arrayref(qq{ SELECT a.schoolid, b.schoolid FROM schools a, schools b WHERE a.country = ? AND b.country = a.country AND a.state = ? AND b.state = a.state AND a.city = ? AND b.city = ? AND a.name = b.name }, undef, $ctc, $sc, $from_cc, $to_cc); # now let's merge these down if ($rows && @$rows) { # merge a -> b, which is merge_schools(b, a) LJ::Schools::merge_schools($_->[1], $_->[0]) foreach @$rows; } # all done return 1; } # # name: LJ::Schools::merge_schools # class: schools # des: Merges schools into one record. # args: parentsid, childsids # des-parentsid: The master/parent schoolid to merge the other schools into. # des-childsids: Arrayref of schoolids to merge into the parentsid. # returns: 1 on success, undef on error. # sub merge_schools { my ($psid, $csids) = @_; $psid += 0; $csids = [ $csids ] unless ref $csids; $csids = [ grep { defined $_ && $_ > 0 && $_ != $psid } @$csids ]; return undef unless $psid && @$csids; # validate the schools my $schools = LJ::Schools::load_schools($psid, @$csids); return undef unless $schools->{$psid}; # database handles my %dbs; my $dbh = LJ::get_db_writer(); # now iterate and combine the schools up foreach my $csid (@$csids) { next unless $schools->{$csid}; # basically find everybody who attended this school... we can't use # the API because it does a LIMIT 1000 and we need everybody my $uids = $dbh->selectcol_arrayref("SELECT userid FROM schools_attended WHERE schoolid = ?", undef, $csid); return undef if $dbh->err; next unless $uids; # now we have a list of users, load them my $us = LJ::load_userids(@$uids); next unless $us; # sort by cluster my %idsbyc; foreach my $u (values %$us) { push @{$idsbyc{$u->{clusterid}} ||= []}, $u; } # now iterate by cluster foreach my $cid (keys %idsbyc) { my $dbcm = ($dbs{$cid} ||= LJ::get_cluster_master($cid)); next unless $dbcm; # we're going to update the schoolid for all users on this cluster my $in = join(',', map { $_->{userid} } @{$idsbyc{$cid}}); $dbcm->do(qq{ UPDATE IGNORE user_schools SET schoolid = ? WHERE userid IN ($in) AND schoolid = ? }, undef, $psid, $csid); next if $dbcm->err; # now delete any that are still around with the old ID -- this is due to # the fact that people may have listed both. ignore errors here. $dbcm->do(qq{ DELETE FROM user_schools WHERE userid IN ($in) AND schoolid = ? }, undef, $csid); } # and now update it on the global, if we have users (empty schools need merging too!) if (@$uids) { my $in = join(',', map { $_+0 } keys %$us); $dbh->do(qq{ UPDATE IGNORE schools_attended SET schoolid = ? WHERE userid IN ($in) AND schoolid = ? }, undef, $psid, $csid); return undef if $dbh->err; } # and again, delete the ones that didn't rename foreach my $table (qw(schools_attended schools)) { $dbh->do("DELETE FROM $table WHERE schoolid = ?", undef, $csid); } # memcache clearing LJ::MemCache::delete([ $csid, "sasi:$csid" ]); LJ::MemCache::delete([ $_, "saui:$_" ]) foreach @$uids; } # done return 1; } # # name: LJ::Schools::find_existing # class: schools # des: Finds an existing school by given criteria. # args: country, name, state?, city?, url? # des-country: country school is in. # des-name: name of school. # des-state: state school is in, or nothing for undefined state. # des-city: Optional. City school is in. # des-url: Optional. URL of school. # returns: single scalar schoolid on exact match, # arrayref of school ids found if multiple, # undef on error; or no results. # sub find_existing { my ($country, $name, $state, $city, $url) = @_; return undef unless $country && $name; my $dbh = LJ::get_db_writer(); return undef unless $dbh; # Now try to find it by name and location my $scs = $state ? "state = ?" : "state IS NULL"; my @args = grep { defined $_ && $_ } ($country, $state); my $sids = $dbh->selectall_arrayref ("SELECT schoolid, name, url, city FROM schools WHERE country = ? AND $scs", undef, @args); return undef if $dbh->err; # Consider them matches if name or URL matches my @res; foreach my $sch (@$sids) { # Return one schoolid if city and (name or url) both match return $sch->[0] if $city && $sch->[3] =~ /^\Q$city\E$/i && $sch->[1] =~ /^\Q$name\E$/i; return $sch->[0] if $city && $url && $sch->[3] =~ /^\Q$city\E$/i && $sch->[2] =~ /^\Q$url\/?\E$/i; # Otherwise, add it as a possible match if name # sort of matches or if url fully matches push @res, $sch->[0] if $sch->[1] =~ /\Q$name\E/i; push @res, $sch->[0] if $url && $sch->[2] =~ /^\Q$url\/?\E$/i; } return \@res if @res; return undef; } 1;