| Current File : /home/jvzmxxx/wiki1/extensions/Wikibase/repo/includes/Store/Sql/SqlChangeDispatchCoordinator.php |
<?php
namespace Wikibase\Store\Sql;
use DatabaseBase;
use DBUnexpectedError;
use Exception;
use LoadBalancer;
use MWException;
use Wikibase\Lib\Reporting\MessageReporter;
use Wikibase\Lib\Reporting\NullMessageReporter;
use Wikibase\Store\ChangeDispatchCoordinator;
use Wikimedia\Assert\Assert;
/**
* SQL based implementation of ChangeDispatchCoordinator;
*
* @since 0.4
*
* @license GPL-2.0+
* @author Daniel Kinzler
*/
class SqlChangeDispatchCoordinator implements ChangeDispatchCoordinator {
/**
* @var callable Override for the array_rand function
*/
private $array_rand = 'array_rand';
/**
* @var callable Override for the time function
*/
private $time = 'time';
/**
* @var callable Override for $db->lock
*/
private $engageClientLockOverride = null;
/**
* @var callable Override for $db->unlock
*/
private $releaseClientLockOverride = null;
/**
* @var callable Override for !$db->lockIsFree
*/
private $isClientLockUsedOverride = null;
/**
* @var int The number of changes to pass to a client wiki at once.
*/
private $batchSize = 1000;
/**
* @var int Number of seconds to wait before dispatching to the same wiki again.
* This affects the effective batch size, and this influences how changes
* can be coalesced.
*/
private $dispatchInterval = 60;
/**
* @var int Number of seconds to wait before testing a lock. Any target with a lock
* timestamp newer than this will not be considered for selection.
*/
private $lockGraceInterval = 60;
/**
* @var int Number of target wikis to select as a base set for random selection.
* Setting this to 1 causes strict "oldest first" behavior, with the possibility
* of grind/starvation if dispatching to the oldest wiki fails.
* Setting this equal to (or greater than) the number of target wikis
* causes a completely random selection of the target, regardless of when it
* was last selected for dispatch.
*/
private $randomness = 10;
/**
* @var string The name of the database table used to record state.
*/
private $stateTable = 'wb_changes_dispatch';
/**
* @todo This shouldn't be here.
* @var string Name of the changes table.
*/
private $changesTable = 'wb_changes';
/**
* @var MessageReporter
*/
private $messageReporter;
/**
* @var string|false The logical name of the repository's database
*/
private $repoDB;
/**
* @var string The repo's global wiki ID
*/
private $repoSiteId;
/**
* @param string|false $repoDB
* @param string $repoSiteId The repo's global wiki ID
*/
public function __construct( $repoDB, $repoSiteId ) {
Assert::parameterType( 'string|boolean', $repoDB, '$repoDB' );
$this->repoDB = $repoDB;
$this->repoSiteId = $repoSiteId;
$this->messageReporter = new NullMessageReporter();
}
/**
* @return int
*/
public function getBatchSize() {
return $this->batchSize;
}
/**
* Sets the number of changes we would prefer to process in one go.
* Clients that are lagged by fewer changes than this may be skipped by selectClient().
*
* @param int $batchSize
*/
public function setBatchSize( $batchSize ) {
$this->batchSize = $batchSize;
}
/**
* @return MessageReporter
*/
public function getMessageReporter() {
return $this->messageReporter;
}
/**
* @param MessageReporter $messageReporter
*/
public function setMessageReporter( MessageReporter $messageReporter ) {
$this->messageReporter = $messageReporter;
}
/**
* @return int
*/
public function getRandomness() {
return $this->randomness;
}
/**
* Sets the randomness level: selectClient() will randomly pick one of the $randomness
* most lagged eligible client wikis.
*
* @param int $randomness
*/
public function setRandomness( $randomness ) {
$this->randomness = $randomness;
}
/**
* @return int
*/
public function getLockGraceInterval() {
return $this->lockGraceInterval;
}
/**
* Sets the number of seconds after a lock should be challenged. This should be at least twice
* as long as we expect a dispatch pass for a single wiki to take. Challenging locks after a
* while safeguards against starving clients that were locked but never unlocked by a process
* that has since died.
*
* @param int $lockGraceInterval
*/
public function setLockGraceInterval( $lockGraceInterval ) {
$this->lockGraceInterval = $lockGraceInterval;
}
/**
* @return int
*/
public function getDispatchInterval() {
return $this->dispatchInterval;
}
/**
* Sets the number of seconds we would prefer to let a client "rest" before dispatching
* to it again. Clients that have received updates less than $dispatchInterval seconds ago
* may be skipped by selectClient().
*
* @param int $dispatchInterval
*/
public function setDispatchInterval( $dispatchInterval ) {
$this->dispatchInterval = $dispatchInterval;
}
/**
* @return callable
*/
public function getArrayRandOverride() {
return $this->array_rand;
}
/**
* Set override for array_rand(), for testing.
*
* @param callable $array_rand
*/
public function setArrayRandOverride( $array_rand ) {
$this->array_rand = $array_rand;
}
/**
* @return callable
*/
public function getTimeOverride() {
return $this->time;
}
/**
* Set override for time(), for testing.
*
* @param callable $time
*/
public function setTimeOverride( $time ) {
$this->time = $time;
}
/**
* @return callable
*/
public function getEngageClientLockOverride() {
return $this->engageClientLockOverride;
}
/**
* Set override for $db->lock, for testing.
*
* @param callable $engageClientLockOverride
*/
public function setEngageClientLockOverride( $engageClientLockOverride ) {
$this->engageClientLockOverride = $engageClientLockOverride;
}
/**
* @return callable
*/
public function getIsClientLockUsedOverride() {
return $this->isClientLockUsedOverride;
}
/**
* Set override for !$db->lockIsFree, for testing.
*
* @param callable $isClientLockUsedOverride
*/
public function setIsClientLockUsedOverride( $isClientLockUsedOverride ) {
$this->isClientLockUsedOverride = $isClientLockUsedOverride;
}
/**
* @return callable
*/
public function getReleaseClientLockOverride() {
return $this->releaseClientLockOverride;
}
/**
* Set override for $db->unlock, for testing.
*
* @param callable $releaseClientLockOverride
*/
public function setReleaseClientLockOverride( $releaseClientLockOverride ) {
$this->releaseClientLockOverride = $releaseClientLockOverride;
}
/**
* @return LoadBalancer the repo's database load balancer.
*/
private function getRepoLB() {
return wfGetLB( $this->repoDB );
}
/**
* @return DatabaseBase A connection to the repo's master database
*/
private function getRepoMaster() {
return $this->getRepoLB()->getConnection( DB_MASTER, array(), $this->repoDB );
}
/**
* @param DatabaseBase $db The repo database connection to release for re-use.
*/
private function releaseRepoMaster( DatabaseBase $db ) {
$this->getRepoLB()->reuseConnection( $db );
}
/**
* Selects a client wiki and locks it. If no suitable client wiki can be found,
* this method returns null.
*
* Note: this implementation will try a wiki from the list returned by getCandidateClients()
* at random. If all have been tried and failed, it returns null.
*
* @return array An associative array containing the state of the selected client wiki
* (or null, if no target could be locked). Fields are:
*
* * chd_site: the client wiki's global site ID
* * chd_db: the client wiki's logical database name
* * chd_seen: the last change ID processed for that client wiki
* * chd_touched: timestamp giving the last time that client wiki was updated
* * chd_lock: the name of a global lock currently active for that client wiki
*
* @throws MWException if no available client wiki could be found.
*
* @see releaseWiki()
*/
public function selectClient() {
$candidates = $this->getCandidateClients();
while ( $candidates ) {
// pick one
$k = call_user_func( $this->array_rand, $candidates );
$wiki = $candidates[ $k ];
unset( $candidates[$k] );
// lock it
$state = $this->lockClient( $wiki );
if ( $state ) {
// got one
return $state;
}
wfDebugLog( __METHOD__, 'Failed to grab dispatch lock for ' . $wiki );
// try again
}
// we ran out of candidates
wfDebugLog( __METHOD__, 'Could not lock any of the candidate client wikis for dispatching' );
return null;
}
/**
* @return int The current time as a timestamp, in seconds since Epoch.
*/
private function now() {
return call_user_func( $this->time );
}
/**
* Returns a list of possible client for the next pass.
* If no suitable clients are found, the resulting list will be empty.
*
* @return array
*
* @see selectClient()
*/
private function getCandidateClients() {
$db = $this->getRepoMaster();
// XXX: subject to clock skew. Use DB based "now" time?
$freshDispatchTime = wfTimestamp( TS_MW, $this->now() - $this->dispatchInterval );
$staleLockTime = wfTimestamp( TS_MW, $this->now() - $this->lockGraceInterval );
// TODO: pass the max change ID as a parameter!
$row = $db->selectRow(
$this->changesTable,
'max( change_id ) as maxid',
array(),
__METHOD__ );
$maxId = $row ? $row->maxid : 0;
// Select all clients that:
// have not been touched for $dispatchInterval seconds
// ( or are lagging by more changes than given by batchSize )
// and are not locked
// ( or the lock is older than $lockGraceInterval ).
// and have not seen all changes
// and are not disabled
// Limit the list to $randomness items. Candidates will be picked
// from the resulting list at random.
$candidates = $db->selectFieldValues(
$this->stateTable,
'chd_site',
array( '( chd_lock is NULL ' . // not locked or...
' OR chd_touched < ' . $db->addQuotes( $staleLockTime ) . ' ) ', // ...the lock is old
'( chd_touched < ' . $db->addQuotes( $freshDispatchTime ) . // and wasn't touched too recently or...
' OR ( ' . (int)$maxId. ' - chd_seen ) > ' . (int)$this->batchSize . ') ' , // or it's lagging by more changes than batchSite
'chd_seen < ' . (int)$maxId, // and not fully up to date.
'chd_disabled = 0' // and not disabled
),
__METHOD__,
array(
'ORDER BY' => 'chd_seen ASC',
'LIMIT' => (int)$this->randomness
)
);
return $candidates;
}
/**
* Initializes the dispatch table by injecting dummy records for all target wikis
* that are in the configuration but not yet in the dispatch table.
*
* @param string[] $clientWikiDBs Associative array mapping client wiki IDs to
* client wiki (logical) database names.
*
* @throws DBUnexpectedError
*/
public function initState( array $clientWikiDBs ) {
$db = $this->getRepoMaster();
$trackedSiteIds = $db->selectFieldValues(
$this->stateTable,
'chd_site',
array(),
__METHOD__
);
$untracked = array_diff_key( $clientWikiDBs, array_flip( $trackedSiteIds ) );
foreach ( $untracked as $siteID => $wikiDB ) {
$state = array(
'chd_site' => $siteID,
'chd_db' => $wikiDB,
'chd_seen' => 0,
'chd_touched' => '00000000000000',
'chd_lock' => null,
'chd_disabled' => 0,
);
$db->insert(
$this->stateTable,
$state,
__METHOD__,
array( 'IGNORE' )
);
$this->log( "Initialized dispatch state for $siteID" );
}
$this->releaseRepoMaster( $db );
}
/**
* Attempt to lock the given target wiki. If it can't be locked because
* another dispatch process is working on it, this method returns false.
*
* @param string $siteID The ID of the client wiki to lock.
*
* @throws MWException if there are no client wikis to chose from.
* @throws Exception
* @return array An associative array containing the state of the selected client wiki
* (see selectClient()) or false if the client wiki could not be locked.
*
* @see selectClient()
*/
public function lockClient( $siteID ) {
$this->trace( "Trying $siteID" );
// start transaction
$db = $this->getRepoMaster();
$db->begin( __METHOD__ );
try {
$this->trace( 'Loaded repo db master' );
// get client state
$state = $db->selectRow(
$this->stateTable,
array( 'chd_site', 'chd_db', 'chd_seen', 'chd_touched', 'chd_lock', 'chd_disabled' ),
array( 'chd_site' => $siteID ),
__METHOD__,
array( 'FOR UPDATE' )
);
$this->trace( "Loaded dispatch changes row for $siteID" );
if ( !$state ) {
$this->warn( "ERROR: $siteID is not in the dispatch table." );
return false;
} else {
$this->trace( "Loading state for $siteID" );
// turn the row object into an array
$state = get_object_vars( $state );
}
$lock = $this->getClientLockName( $siteID );
if ( $state['chd_lock'] !== null ) {
// bail out if another dispatcher instance is holding a lock for that wiki
if ( $this->isClientLockUsed( $db, $lock ) ) {
$this->trace( "$siteID is already being handled by another process."
. " (lock: " . $state['chd_lock'] . ")" );
$db->rollback( __METHOD__ );
$this->releaseRepoMaster( $db );
return false;
}
}
$ok = $this->engageClientLock( $db, $lock );
if ( !$ok ) {
// This really shouldn't happen, since we already checked if another process has a lock.
// The write lock we are holding on the wb_changes_dispatch table should be preventing
// any race conditions.
// However, another process may still hold the lock if it grabbed it without locking
// wb_changes_dispatch, or if it didn't record the lock in wb_changes_dispatch.
$this->trace( "Warning: Failed to acquire lock $lock for site $siteID!" );
$db->rollback( __METHOD__ );
$this->releaseRepoMaster( $db );
return false;
}
$this->trace( "Locked client $siteID with $lock" );
$state['chd_lock'] = $lock;
$state['chd_touched'] = wfTimestamp( TS_MW, $this->now() ); // XXX: use DB time
// update state record for already known client wiki
$db->update(
$this->stateTable,
$state,
array( 'chd_site' => $state['chd_site'] ),
__METHOD__
);
} catch ( Exception $ex ) {
$db->rollback( __METHOD__ );
$this->releaseRepoMaster( $db );
throw $ex;
}
$db->commit( __METHOD__ );
$this->releaseRepoMaster( $db );
$this->trace( "Locked site $siteID at {$state['chd_seen']}." );
unset( $state['chd_disabled'] ); // don't mess with this.
return $state;
}
/**
* Updates the given client wiki's entry in the dispatch table and
* releases the global lock on that wiki.
*
* @param array $state Associative array representing the client wiki's state before the
* update pass, as returned by selectWiki().
*
* @throws Exception
* @see selectWiki()
*/
public function releaseClient( array $state ) {
$siteID = $state['chd_site'];
$wikiDB = $state['chd_db'];
// start transaction
$db = $this->getRepoMaster();
$db->begin( __METHOD__ );
try {
$this->releaseClientLock( $db, $state['chd_lock'] );
$state['chd_lock'] = null;
$state['chd_touched'] = wfTimestamp( TS_MW, $this->now() );
//XXX: use the DB's time to avoid clock skew?
// insert state record with the new state.
$db->update(
$this->stateTable,
$state,
array( 'chd_site' => $state['chd_site'] ),
__METHOD__
);
} catch ( Exception $ex ) {
$db->rollback( __METHOD__ );
$this->releaseRepoMaster( $db );
throw $ex;
}
$db->commit( __METHOD__ );
$this->releaseRepoMaster( $db );
$this->trace( "Released $wikiDB for site $siteID at {$state['chd_seen']}." );
}
/**
* Determines the name of the global lock that should be used to lock the given client.
*
* @param string $siteID The site ID of the wiki to lock
*
* @return string the lock name to use.
*/
private function getClientLockName( $siteID ) {
// NOTE: Lock names are global, not scoped per database. To avoid clashes,
// we need to include both the ID of the repo and the ID of the client.
$name = "Wikibase.{$this->repoSiteId}.dispatchChanges.$siteID";
return str_replace( ' ', '_', $name );
}
/**
* Tries to acquire a global lock on the given client wiki.
*
* @param DatabaseBase $db The database connection to work on.
* @param string $lock The name of the lock to release.
*
* @return bool whether the lock was engaged successfully.
*/
private function engageClientLock( DatabaseBase $db, $lock ) {
if ( isset( $this->engageClientLockOverride ) ) {
return call_user_func( $this->engageClientLockOverride, $db, $lock );
}
return $db->lock( $lock, __METHOD__ );
}
/**
* Releases the given global lock on the given client wiki.
*
* @param DatabaseBase $db The database connection to work on.
* @param string $lock The name of the lock to release.
*
* @return bool whether the lock was released successfully.
*/
private function releaseClientLock( DatabaseBase $db, $lock ) {
if ( isset( $this->releaseClientLockOverride ) ) {
return call_user_func( $this->releaseClientLockOverride, $db, $lock );
}
return $db->unlock( $lock, __METHOD__ );
}
/**
* Checks the given global lock on the given client wiki.
*
* @param DatabaseBase $db The database connection to work on.
* @param string $lock The name of the lock to check.
*
* @return bool true if the given lock is currently held by another process, false otherwise.
*/
private function isClientLockUsed( DatabaseBase $db, $lock ) {
if ( isset( $this->isClientLockUsedOverride ) ) {
return call_user_func( $this->isClientLockUsedOverride, $db, $lock );
}
return !$db->lockIsFree( $lock, __METHOD__ );
}
private function warn( $message ) {
wfLogWarning( $message );
$this->messageReporter->reportMessage( $message );
}
private function log( $message ) {
wfDebugLog( __CLASS__, $message );
$this->messageReporter->reportMessage( $message );
}
private function trace( $message ) {
wfDebugLog( __CLASS__, $message );
}
}