Current File : /home/jvzmxxx/wiki1/extensions/Wikibase/repo/includes/ChangeDispatcher.php
<?php

namespace Wikibase\Repo;

use MWException;
use Wikibase\Change;
use Wikibase\ChunkAccess;
use Wikibase\DataModel\Entity\EntityId;
use Wikibase\EntityChange;
use Wikibase\ItemChange;
use Wikibase\Lib\Reporting\ExceptionHandler;
use Wikibase\Lib\Reporting\LogWarningExceptionHandler;
use Wikibase\Lib\Reporting\MessageReporter;
use Wikibase\Lib\Reporting\NullMessageReporter;
use Wikibase\Repo\Notifications\ChangeNotificationSender;
use Wikibase\Store\ChangeDispatchCoordinator;
use Wikibase\Store\SubscriptionLookup;

/**
 * Interactor class for dispatching change notifications to client wikis via the job queue.
 *
 * @since 0.4
 *
 * @license GPL-2.0+
 * @author Daniel Kinzler
 */
class ChangeDispatcher {

	/**
	 * @var int The number of changes to pass to a client wiki at once.
	 */
	private $batchSize = 1000;

	/**
	 * @var int Factor used to compute the number of changes to load from the changes table at once
	 *           based on $this->batchSize.
	 */
	private $batchChunkFactor = 3;

	/**
	 * @var int Maximum number of chunks or passes per wiki when selecting pending changes.
	 */
	private $maxChunks = 15;

	/**
	 * @var bool Whether output should be verbose.
	 */
	private $verbose = false;

	/**
	 * @var ChangeNotificationSender
	 */
	private $notificationSender;

	/**
	 * @var ChangeDispatchCoordinator
	 */
	private $coordinator;

	/**
	 * @var ExceptionHandler
	 */
	private $exceptionHandler;

	/**
	 * @var MessageReporter
	 */
	private $messageReporter;

	/**
	 * @var ChunkAccess Access to the changes table.
	 */
	private $chunkedChangesAccess;

	/**
	 * @var SubscriptionLookup
	 */
	private $subscriptionLookup;

	/**
	 * @param ChangeDispatchCoordinator $coordinator
	 * @param ChangeNotificationSender $notificationSender
	 * @param ChunkAccess $chunkedChangesAccess Access to the changes table. Should only return
	 * Change objects from loadChunk.
	 * @param SubscriptionLookup $subscriptionLookup
	 */
	public function __construct(
		ChangeDispatchCoordinator $coordinator,
		ChangeNotificationSender $notificationSender,
		ChunkAccess $chunkedChangesAccess,
		SubscriptionLookup $subscriptionLookup
	) {
		$this->coordinator = $coordinator;
		$this->notificationSender = $notificationSender;
		$this->subscriptionLookup = $subscriptionLookup;

		$this->chunkedChangesAccess = $chunkedChangesAccess;

		$this->exceptionHandler = new LogWarningExceptionHandler();
		$this->messageReporter = new NullMessageReporter();
	}

	/**
	 * @return bool
	 */
	public function isVerbose() {
		return $this->verbose;
	}

	/**
	 * @param bool $verbose
	 */
	public function setVerbose( $verbose ) {
		$this->verbose = $verbose;
	}

	/**
	 * @return MessageReporter
	 */
	public function getMessageReporter() {
		return $this->messageReporter;
	}

	/**
	 * @param MessageReporter $messageReporter
	 */
	public function setMessageReporter( $messageReporter ) {
		$this->messageReporter = $messageReporter;
	}

	/**
	 * @return ExceptionHandler
	 */
	public function getExceptionHandler() {
		return $this->exceptionHandler;
	}

	/**
	 * @param ExceptionHandler $exceptionHandler
	 */
	public function setExceptionHandler( $exceptionHandler ) {
		$this->exceptionHandler = $exceptionHandler;
	}

	/**
	 * @return int
	 */
	public function getBatchSize() {
		return $this->batchSize;
	}

	/**
	 * @param int $batchSize
	 */
	public function setBatchSize( $batchSize ) {
		$this->batchSize = $batchSize;
	}

	/**
	 * @return int
	 */
	public function getBatchChunkFactor() {
		return $this->batchChunkFactor;
	}

	/**
	 * @param int $maxChunks Maximum number of chunks or passes per wiki when selecting pending
	 * changes.
	 */
	public function setMaxChunks( $maxChunks ) {
		$this->maxChunks = $maxChunks;
	}

	/**
	 * @return int Maximum number of chunks or passes per wiki when selecting pending changes.
	 */
	public function getMaxChunks() {
		return $this->maxChunks;
	}

	/**
	 * Sets the chunk factor. The governs how many changes getPendingChanges will load
	 * in one go: the number loaded is the batch size multiplied by the batch chunk factor.
	 * A chunk factor > 1 reduces the need to load more changes in case not all end up in
	 * the batch opf pending changes due to programmatic filtering (e.g. by whether the
	 * client site is subscribed to a given change).
	 *
	 * @example Consider loading a batch of 5 changes to dispatch to foowiki, but of the first
	 * 5 changes, only 3 are relevant to foowiki. A chunk factor of 1 means only 5 changes have
	 * been loaded for examination, meaning at least one more batch has to be loaded. A chunk
	 * factor of 2 means 10 changes have been loaded for examination, which makes it more likely
	 * to fine the desired 5 changes for foowiki without loading more changes.
	 *
	 * @param int $batchChunkFactor
	 */
	public function setBatchChunkFactor( $batchChunkFactor ) {
		$this->batchChunkFactor = $batchChunkFactor;
	}

	/**
	 * Selects a client wiki and locks it. If no suitable client wiki can be found,
	 * this method returns null.
	 *
	 * Note: this implementation will try a wiki from the list returned by getCandidateClients()
	 * at random. If all have been tried and failed, it returns null.
	 *
	 * @return array An associative array containing the state of the selected client wiki
	 *               (or null, if no target could be locked). Fields are:
	 *
	 * * chd_site:     the client wiki's global site ID
	 * * chd_db:       the client wiki's logical database name
	 * * chd_seen:     the last change ID processed for that client wiki
	 * * chd_touched:  timestamp giving the last time that client wiki was updated
	 * * chd_lock:     the name of a global lock currently active for that client wiki
	 *
	 * @throws MWException if no available client wiki could be found.
	 *
	 * @see releaseWiki()
	 */
	public function selectClient() {
		return $this->coordinator->selectClient();
	}

	/**
	 * Performs one update pass. This involves the following steps:
	 *
	 * 1) Get a batch of changes for the client wiki.
	 * 2) Post a notification job to the client wiki's job queue.
	 * 3) Update the dispatch log for the client wiki, and release it.
	 *
	 * @param array $wikiState the dispatch state of a client wiki, as returned by lockClient()
	 * @return int The number of changes dispatched
	 */
	public function dispatchTo( array $wikiState ) {
		$siteID = $wikiState['chd_site'];
		$after = (int)$wikiState['chd_seen'];

		// get relevant changes
		$this->trace( "Finding pending changes for $siteID" );
		list( $changes, $continueAfter ) = $this->getPendingChanges( $siteID, $after );

		$n = count( $changes );

		if ( $n > 0 ) {
			$this->trace( "Dispatching $n changes to $siteID, up to #$continueAfter" );

			// notify the client wiki about the changes
			$this->notificationSender->sendNotification( $siteID, $changes );
		}

		$wikiState['chd_seen'] = $continueAfter;

		$this->coordinator->releaseClient( $wikiState );

		if ( $n === 0 ) {
			$this->trace( "Posted no changes to $siteID (nothing to do). "
						. "Next ID is $continueAfter." );
		} else {
			/* @var Change $last */
			$last = end( $changes );

			$this->log( "Posted $n changes to $siteID, "
				. "up to ID " . $last->getId() . ", timestamp " . $last->getTime() . ". "
				. "Lag is " . $last->getAge() . " seconds. "
				. "Next ID is $continueAfter." );
		}

		return $n;
	}

	/**
	 * Returns a batch of changes for the given client wiki, starting from the given position
	 * in the wb_changes table. The changes may be filtered to only include those changes that
	 * are relevant to the given client wiki. The number of changes returned by this method
	 * is limited by $this->batchSize. Changes are returned with IDs in ascending order.
	 *
	 * @note: due to programmatic filtering, this method may use multiple database queries to
	 * collect the changes for the next batch. The number of requests needed can be adjusted
	 * using $this->batchChunkFactor (via the 'dispatchBatchChunkFactor' setting).
	 *
	 * @param string $siteID The client wiki's global site identifier, as used by sitelinks.
	 * @param int $after The last change ID processed by a previous run. All changes returned
	 *                     will have an ID greater than $after.
	 *
	 * @return array( $batch, $seen ), where $batch is a list of Change objects, and $seen
	 *         if the ID of the last change considered for the batch (even if that was filtered out),
	 *         for use as a continuation marker.
	 */
	public function getPendingChanges( $siteID, $after ) {
		// Loop until we have a full batch of size $this->batchSize,
		// or there are no more changes to process.

		//NOTE: we could try to filter the changes directly in the DB, but
		//      that will no longer work once we have a client side usage tracking table
		//      for free-form use.

		$batch = array();
		$batchSize = 0;
		$chunkSize = $this->batchSize * $this->batchChunkFactor;
		$chunksExamined = 0;

		// Track the change ID from which the next pass should start.
		// Note that this is non-trivial due to programmatic filtering.
		$lastIdSeen = $after;

		while ( $batchSize < $this->batchSize && $chunksExamined < $this->maxChunks ) {
			// get a chunk of changes
			$chunk = $this->chunkedChangesAccess->loadChunk( $after + 1, $chunkSize );

			if ( empty( $chunk ) ) {
				break; // no more changes
			}

			// start the next round here
			$last = end( $chunk );
			$after = $last->getId();
			reset( $chunk ); // don't leave the array pointer messy.

			// filter the changes in the chunk and add the result to the batch
			$remaining = $this->batchSize - $batchSize;
			list( $filtered, $lastIdSeen ) = $this->filterChanges( $siteID, $chunk, $remaining );

			$batch = array_merge( $batch, $filtered );
			$batchSize = count( $batch );
			$chunksExamined++;

			//XXX: We could try to adapt $chunkSize based on ratio of changes that get filtered out:
			//     $chunkSize = ( $this->batchSize - count( $batch ) ) * ( count_before / count_after );
		}

		$this->trace( "Got " . count( $batch ) . " pending changes. " );

		return array( $batch, $lastIdSeen );
	}

	/**
	 * Checks whether the given Change is somehow relevant to the given wiki site.
	 *
	 * In particular this check whether the Change modifies any sitelink that refers to the
	 * given wiki site.
	 *
	 * @note: this does not check whether the entity that was changes is or is not at all
	 *        connected with (resp. used on) the target wiki.
	 *
	 * @param Change $change the change to examine.
	 * @param string $siteID the site to consider.
	 *
	 * @return bool
	 */
	private function isRelevantChange( Change $change, $siteID ) {
		if ( $change instanceof ItemChange && !$change->isEmpty() ) {
			$siteLinkDiff = $change->getSiteLinkDiff();

			if ( isset( $siteLinkDiff[ $siteID ] ) ) {
				return true;
			}
		}

		return false;
	}

	/**
	 * Filters a list of changes, removing changes not relevant to the given client wiki.
	 *
	 * Currently, we keep EntityChanges for entities the client wiki is subscribed to, or
	 * that modify a sitelink to the client wiki.
	 *
	 * @param string $siteID The client wiki's global site identifier, as used by sitelinks.
	 * @param Change[] $changes The list of changes to filter.
	 * @param int $limit The max number of changes to return
	 *
	 * @return array( $batch, $seen ), where $batch is the filtered list of Change objects,
	 *         and $seen if the ID of the last change considered for the batch
	 *         (even if that was filtered out), for use as a continuation marker.
	 */
	private function filterChanges( $siteID, array $changes, $limit ) {
		// collect all item IDs mentioned in the changes
		$entitySet = array();
		foreach ( $changes as $change ) {
			if ( !( $change instanceof EntityChange ) ) {
				continue;
			}

			$id = $change->getEntityId();
			$idString = $id->getSerialization();
			$entitySet[$idString] = $id;
		}

		$this->trace( "Checking sitelinks to $siteID for " . count( $entitySet ) . " entities." );

		$subscribedEntities = $this->subscriptionLookup->getSubscriptions( $siteID, $entitySet );
		$subscribedEntities = $this->reIndexEntityIds( $subscribedEntities );

		$this->trace( "Retaining changes for " . count( $subscribedEntities ) . " relevant entities." );

		// find all changes that relate to an item that has a sitelink to $siteID.
		$filteredChanges = array();
		$numberOfChangesFound = 0;
		$lastIdSeen = 0;
		foreach ( $changes as $change ) {
			if ( !( $change instanceof EntityChange ) ) {
				continue;
			}

			$lastIdSeen = $change->getId();
			$idString = $change->getEntityId()->getSerialization();

			// The change is relevant if it alters any sitelinks referring to $siteID,
			// or the item currently links to $siteID.
			if ( isset( $subscribedEntities[$idString] )
				|| $this->isRelevantChange( $change, $siteID )
			) {
				$filteredChanges[] = $change;
				$numberOfChangesFound++;
			}

			if ( $numberOfChangesFound >= $limit ) {
				break;
			}
		}

		$this->trace( "Found " . count( $filteredChanges ) . " changes for items with relevant sitelinks." );

		return array( $filteredChanges, $lastIdSeen );
	}

	/**
	 * @param EntityId[] $entityIds
	 *
	 * @return EntityId[] $entityIds re-keyed by id string.
	 */
	private function reIndexEntityIds( array $entityIds ) {
		$reindexed = array();

		foreach ( $entityIds as $id ) {
			$key = $id->getSerialization();
			$reindexed[$key] = $id;
		}

		return $reindexed;
	}

	/**
	 * Log a message if verbose mode is enabled
	 *
	 * @param string $message
	 */
	private function trace( $message ) {
		if ( $this->verbose ) {
			$this->log( "    " . $message );
		}
	}

	private function log( $message ) {
		$this->messageReporter->reportMessage( $message );
	}

	/**
	 * @return ChangeDispatchCoordinator
	 */
	public function getDispatchCoordinator() {
		return $this->coordinator;
	}

}