Current File : /home/jvzmxxx/wiki1/extensions/Wikibase/client/includes/Store/Sql/BulkSubscriptionUpdater.php
<?php

namespace Wikibase\Client\Store\Sql;

use InvalidArgumentException;
use ResultWrapper;
use Wikibase\Client\Usage\Sql\EntityUsageTable;
use Wikibase\DataModel\Entity\EntityId;
use Wikibase\Lib\Reporting\ExceptionHandler;
use Wikibase\Lib\Reporting\LogWarningExceptionHandler;
use Wikibase\Lib\Reporting\MessageReporter;
use Wikibase\Lib\Reporting\NullMessageReporter;

/**
 * Implements bulk updates for the repo's wb_changes_subscription table,
 * based on the client's local wbc_entity_usage table. The client wiki will be subscribed
 * to be informed about changes to any entity present in the local wbc_entity_usage table.
 *
 * @license GPL-2.0+
 * @author Daniel Kinzler
 */
class BulkSubscriptionUpdater {

	/**
	 * @var ConsistentReadConnectionManager
	 */
	private $localConnectionManager;

	/**
	 * @var ConsistentReadConnectionManager
	 */
	private $repoConnectionManager;

	/**
	 * @var string The local wiki's global ID, to be used as the subscriber ID in the repo's subecription table.
	 */
	private $subscriberWikiId;

	/**
	 * @var string|false The repo wiki's id, as used by the LoadBalancer. Used for wait for slaves.
	 *                   False indicates to use the local wiki's database, and is the default
	 *                   for the repoWiki setting.
	 */
	private $repoWiki;

	/**
	 * @var int
	 */
	private $batchSize;

	/**
	 * @var ExceptionHandler
	 */
	private $exceptionHandler;

	/**
	 * @var MessageReporter
	 */
	private $progressReporter;

	/**
	 * @param ConsistentReadConnectionManager $localConnectionManager Connection manager for DB
	 * connections to the local wiki.
	 * @param ConsistentReadConnectionManager $repoConnectionManager Connection manager for DB
	 * connections to the repo.
	 * @param string $subscriberWikiId The local wiki's global ID, to be used as the subscriber ID
	 * in the repo's subscription table.
	 * @param string|false $repoWiki The repo wiki's id, as used by the LoadBalancer.
	 *                               False (default of the repoWiki setting) indicates to
	 *                               use local wiki database.
	 * @param int $batchSize
	 *
	 * @throws InvalidArgumentException
	 */
	public function __construct(
		ConsistentReadConnectionManager $localConnectionManager,
		ConsistentReadConnectionManager $repoConnectionManager,
		$subscriberWikiId,
		$repoWiki,
		$batchSize = 1000
	) {
		if ( !is_string( $subscriberWikiId ) ) {
			throw new InvalidArgumentException( '$subscriberWikiId must be a string' );
		}

		if ( !is_string( $repoWiki ) && $repoWiki !== false ) {
			throw new InvalidArgumentException( '$repoWiki must be a string or false' );
		}

		if ( !is_int( $batchSize ) || $batchSize < 1 ) {
			throw new InvalidArgumentException( '$batchSize must be an integer >= 1' );
		}

		$this->localConnectionManager = $localConnectionManager;
		$this->repoConnectionManager = $repoConnectionManager;

		$this->subscriberWikiId = $subscriberWikiId;
		$this->repoWiki = $repoWiki;
		$this->batchSize = $batchSize;

		$this->exceptionHandler = new LogWarningExceptionHandler();
		$this->progressReporter = new NullMessageReporter();
	}

	/**
	 * @param MessageReporter $progressReporter
	 */
	public function setProgressReporter( MessageReporter $progressReporter ) {
		$this->progressReporter = $progressReporter;
	}

	/**
	 * @return MessageReporter
	 */
	public function getProgressReporter() {
		return $this->progressReporter;
	}

	/**
	 * @param ExceptionHandler $exceptionHandler
	 */
	public function setExceptionHandler( ExceptionHandler $exceptionHandler ) {
		$this->exceptionHandler = $exceptionHandler;
	}

	/**
	 * @return ExceptionHandler
	 */
	public function getExceptionHandler() {
		return $this->exceptionHandler;
	}

	/**
	 * Insert subscriptions based on entries in wbc_entity_usage.
	 *
	 * @param EntityId|null $startEntity The entity to start with.
	 */
	public function updateSubscriptions( EntityId $startEntity = null ) {
		$this->repoConnectionManager->forceMaster();

		$continuation = $startEntity === null ? null : array( $startEntity->getSerialization() );

		while ( true ) {
			wfWaitForSlaves( null, $this->repoWiki );

			$count = $this->processUpdateBatch( $continuation );

			if ( $count > 0 ) {
				$this->progressReporter->reportMessage( 'Updating subscription table: '
					. "inserted $count subscriptions, continuing at entity #{$continuation[0]}." );
			} else {
				break;
			}
		}
	}

	/**
	 * @param array &$continuation
	 *
	 * @return int The number of subscriptions inserted.
	 */
	private function processUpdateBatch( array &$continuation = null ) {
		$entityIds = $this->getUpdateBatch( $continuation );

		if ( empty( $entityIds ) ) {
			return 0;
		}

		$count = $this->insertUpdateBatch( $entityIds );
		return $count;
	}

	/**
	 * @param string[] $entities Entity-IDs to subscribe to
	 *
	 * @return int The number of rows inserted.
	 */
	private function insertUpdateBatch( array $entities ) {
		$dbw = $this->repoConnectionManager->beginAtomicSection( __METHOD__ );

		$rows = $this->makeSubscriptionRows( $entities );

		$dbw->insert(
			'wb_changes_subscription',
			$rows,
			__METHOD__,
			array(
				'IGNORE'
			)
		);

		$count = $dbw->affectedRows();
		$this->repoConnectionManager->commitAtomicSection( $dbw, __METHOD__ );

		return $count;
	}

	/**
	 * @param array &$continuation
	 *
	 * @return string[] A list of entity id strings.
	 */
	private function getUpdateBatch( array &$continuation = null ) {
		$dbr = $this->localConnectionManager->getReadConnection();

		if ( empty( $continuation ) ) {
			$continuationCondition = '1';
		} else {
			list( $fromEntityId ) = $continuation;
			$continuationCondition = 'eu_entity_id > ' . $dbr->addQuotes( $fromEntityId );
		}

		$res = $dbr->select(
			EntityUsageTable::DEFAULT_TABLE_NAME,
			array( 'DISTINCT eu_entity_id' ),
			$continuationCondition,
			__METHOD__,
			array(
				'ORDER BY' => 'eu_entity_id',
				'LIMIT' => $this->batchSize,
			)
		);

		$this->localConnectionManager->releaseConnection( $dbr );
		return $this->getEntityIdsFromRows( $res, 'eu_entity_id', $continuation );
	}

	/**
	 * Returns a list of rows for insertion, using DatabaseBase's multi-row insert mechanism.
	 * Each row is represented as array( $entityId, $subscriber ).
	 *
	 * @param string[] $entities entity id strings
	 *
	 * @return array[] rows
	 */
	private function makeSubscriptionRows( array $entities ) {
		$rows = array();

		foreach ( $entities as $id ) {
			$rows[] = array(
				'cs_entity_id' => $id,
				'cs_subscriber_id' => $this->subscriberWikiId
			);
		}

		return $rows;
	}

	/**
	 * Extracts entity id strings from the rows in a query result, and updates $continuation
	 * to a position "after" the content of the given query result.
	 *
	 * @param ResultWrapper $res A result set with the field given by $entityIdField field set for each row.
	 *        The result is expected to be sorted by entity id, in ascending order.
	 * @param string $entityIdField The name of the field that contains the entity id.
	 * @param array &$continuation Updated to an array containing the last EntityId in the result.
	 *
	 * @return string[] A list of entity ids strings.
	 */
	private function getEntityIdsFromRows( ResultWrapper $res, $entityIdField, array &$continuation = null ) {
		$entities = array();

		foreach ( $res as $row ) {
			$entities[] = $row->$entityIdField;
		}

		if ( isset( $row ) ) {
			$continuation = array( $row->$entityIdField );
		}

		return $entities;
	}

	/**
	 * Remove subscriptions for entities not present in in wbc_entity_usage.
	 *
	 * @param EntityId|null $startEntity The entity to start with.
	 */
	public function purgeSubscriptions( EntityId $startEntity = null ) {
		$continuation = $startEntity === null ? null : array( $startEntity->getSerialization() );

		$this->repoConnectionManager->forceMaster();

		while ( true ) {
			wfWaitForSlaves( null, $this->repoWiki );

			$count = $this->processDeletionBatch( $continuation );

			if ( $count > 0 ) {
				$this->progressReporter->reportMessage( 'Purging subscription table: '
					. "deleted $count subscriptions, continuing at entity #{$continuation[0]}." );
			} else {
				break;
			}
		}
	}

	/**
	 * @param array &$continuation
	 *
	 * @return int The number of subscriptions deleted.
	 */
	private function processDeletionBatch( array &$continuation = null ) {
		$deletionRange = $this->getDeletionRange( $continuation );

		if ( $deletionRange === false ) {
			return 0;
		}

		list( $minId, $maxId, $count ) = $deletionRange;
		$this->deleteSubscriptionRange( $minId, $maxId );

		return $count;
	}

	/**
	 * Returns a range of entity IDs to delete, based on this updater's batch size.
	 *
	 * @param array &$continuation
	 *
	 * @return bool|string[] list( $minId, $maxId, $count ), or false if there is nothing to delete
	 */
	private function getDeletionRange( array &$continuation = null ) {
		$dbr = $this->repoConnectionManager->getReadConnection();

		$conditions = array(
			'cs_subscriber_id' => $this->subscriberWikiId,
		);

		if ( !empty( $continuation ) ) {
			list( $fromEntityId ) = $continuation;
			$conditions[] = 'cs_entity_id > ' . $dbr->addQuotes( $fromEntityId );
		}

		/**
		 * @note Below, we query and iterate all rows we want to delete in the current batch. That
		 * is rather ugly, but appears to be the best solution, because:
		 *
		 * - Deletions must be paged to avoid lock retention.
		 * - DELETE does not support LIMIT, so we need to know a range (min/max) of IDs.
		 * - GROUP BY does not support LIMIT, so we cannot use aggregate functions to get the
		 *   min/max IDs.
		 *
		 * Thus, using SELECT ... LIMIT seems to be the only reliable way to get the min/max range
		 * needed for batched deletion.
		 */

		$res = $dbr->select(
			'wb_changes_subscription',
			array( 'cs_entity_id' ),
			$conditions,
			__METHOD__,
			array(
				'ORDER BY' => 'cs_entity_id',
				'LIMIT' => $this->batchSize,
			)
		);

		$this->repoConnectionManager->releaseConnection( $dbr );
		$subscriptions = $this->getEntityIdsFromRows( $res, 'cs_entity_id', $continuation );

		if ( empty( $subscriptions ) ) {
			return false;
		}

		$minId = reset( $subscriptions );
		$maxId = end( $subscriptions );
		$count = count( $subscriptions );

		return array( $minId, $maxId, $count );
	}

	/**
	 * Deletes a range of subscriptions.
	 *
	 * @param string $minId Entity id string indicating the first element in the deletion range
	 * @param string $maxId Entity id string indicating the last element in the deletion range
	 */
	private function deleteSubscriptionRange( $minId, $maxId ) {
		$dbw = $this->repoConnectionManager->beginAtomicSection( __METHOD__ );

		$conditions = array(
			'cs_subscriber_id' => $this->subscriberWikiId,
			'cs_entity_id >= ' . $dbw->addQuotes( $minId ),
			'cs_entity_id <= ' . $dbw->addQuotes( $maxId ),
		);

		$dbw->delete(
			'wb_changes_subscription',
			$conditions,
			__METHOD__
		);

		$this->repoConnectionManager->commitAtomicSection( $dbw, __METHOD__ );
	}

}