| Current File : /home/jvzmxxx/wiki1/extensions/Wikibase/client/includes/Usage/Sql/EntityUsageTableBuilder.php |
<?php
namespace Wikibase\Client\Usage\Sql;
use DatabaseBase;
use Exception;
use InvalidArgumentException;
use LoadBalancer;
use ResultWrapper;
use Wikibase\Client\Usage\EntityUsage;
use Wikibase\DataModel\Entity\EntityId;
use Wikibase\DataModel\Entity\EntityIdParser;
use Wikibase\Lib\Reporting\ExceptionHandler;
use Wikibase\Lib\Reporting\LogWarningExceptionHandler;
use Wikibase\Lib\Reporting\MessageReporter;
use Wikibase\Lib\Reporting\NullMessageReporter;
/**
* Implements initial population (priming) for the wbc_entity_usage table,
* based on "wikibase_item" entries in the page_props table.
*
* All usages will be marked as EntityUsage::ALL_USAGE ("X"), since we do not know
* which aspects are actually used beyond the sitelinks aspect. The "X" aspect
* will cause the page to be purged for any kind of change to the respective
* data item; once the page is re-parse, the "X" aspect would be removed with
* whatever aspect(s) are actually used on the page.
*
* @license GPL-2.0+
* @author Daniel Kinzler
*/
class EntityUsageTableBuilder {
/**
* @var EntityIdParser
*/
private $idParser;
/**
* @var LoadBalancer
*/
private $loadBalancer;
/**
* @var string
*/
private $usageTableName;
/**
* @var int
*/
private $batchSize;
/**
* @var ExceptionHandler
*/
private $exceptionHandler;
/**
* @var MessageReporter
*/
private $progressReporter;
/**
* @param EntityIdParser $idParser
* @param LoadBalancer $loadBalancer
* @param int $batchSize defaults to 1000
* @param string|null $usageTableName defaults to wbc_entity_usage
*
* @throws InvalidArgumentException
*/
public function __construct(
EntityIdParser $idParser,
LoadBalancer $loadBalancer,
$batchSize = 1000,
$usageTableName = null
) {
if ( !is_int( $batchSize ) || $batchSize < 1 ) {
throw new InvalidArgumentException( '$batchSize must be an integer >= 1' );
}
if ( !is_string( $usageTableName ) && $usageTableName !== null ) {
throw new InvalidArgumentException( '$usageTableName must be a string or null' );
}
$this->idParser = $idParser;
$this->loadBalancer = $loadBalancer;
$this->batchSize = $batchSize;
$this->usageTableName = $usageTableName ?: EntityUsageTable::DEFAULT_TABLE_NAME;
$this->exceptionHandler = new LogWarningExceptionHandler();
$this->progressReporter = new NullMessageReporter();
}
/**
* @param MessageReporter $progressReporter
*/
public function setProgressReporter( MessageReporter $progressReporter ) {
$this->progressReporter = $progressReporter;
}
/**
* @return MessageReporter
*/
public function getProgressReporter() {
return $this->progressReporter;
}
/**
* @param ExceptionHandler $exceptionHandler
*/
public function setExceptionHandler( ExceptionHandler $exceptionHandler ) {
$this->exceptionHandler = $exceptionHandler;
}
/**
* @return ExceptionHandler
*/
public function getExceptionHandler() {
return $this->exceptionHandler;
}
/**
* Fill the usage table with rows based on entries in page_props.
*
* @param int $fromPageId
*/
public function fillUsageTable( $fromPageId = 0 ) {
do {
$count = $this->processUsageBatch( $fromPageId );
$this->progressReporter->reportMessage( "Filling usage table: processed $count pages, starting with page #$fromPageId." );
} while ( $count > 0 );
}
/**
* @param int &$fromPageId Page ID to start from. Will be updated with the next unprocessed ID,
* to be used as the starting point of the next batch. Pages are processed in order
* of their ID.
*
* @return int The number of entity usages inserted.
*/
private function processUsageBatch( &$fromPageId = 0 ) {
wfWaitForSlaves();
$db = $this->loadBalancer->getConnection( DB_MASTER );
$entityPerPage = $this->getUsageBatch( $db, $fromPageId );
if ( empty( $entityPerPage ) ) {
return 0;
}
$count = $this->insertUsageBatch( $db, $entityPerPage );
// Update $fromPageId to become the first page ID of the next batch.
$fromPageId = max( array_keys( $entityPerPage ) ) + 1;
$this->loadBalancer->reuseConnection( $db );
return $count;
}
/**
* @param DatabaseBase $db
* @param EntityId[] $entityPerPage
*
* @return int The number of rows inserted.
*/
private function insertUsageBatch( DatabaseBase $db, array $entityPerPage ) {
$db->startAtomic( __METHOD__ );
$c = 0;
foreach ( $entityPerPage as $pageId => $entityId ) {
$db->insert(
$this->usageTableName,
array(
'eu_page_id' => (int)$pageId,
'eu_aspect' => EntityUsage::ALL_USAGE,
'eu_entity_id' => $entityId->getSerialization()
),
__METHOD__,
array(
'IGNORE'
)
);
$c++;
}
$db->endAtomic( __METHOD__ );
return $c;
}
/**
* @param DatabaseBase $db
* @param int $fromPageId
*
* @return EntityId[] An associative array mapping page IDs to Entity IDs.
*/
private function getUsageBatch( DatabaseBase $db, $fromPageId = 0 ) {
$res = $db->select(
'page_props',
array( 'pp_page', 'pp_value' ),
array(
'pp_propname' => 'wikibase_item',
'pp_page >= ' . (int)$fromPageId
),
__METHOD__,
array(
'LIMIT' => $this->batchSize,
'ORDER BY pp_page'
)
);
return $this->slurpEntityIds( $res );
}
/**
* @param ResultWrapper $res
*
* @return EntityId[] An associative array mapping page IDs to Entity IDs.
*/
private function slurpEntityIds( ResultWrapper $res ) {
$entityPerPage = array();
foreach ( $res as $row ) {
try {
$entityId = $this->idParser->parse( $row->pp_value );
$entityPerPage[$row->pp_page] = $entityId;
} catch ( Exception $ex ) {
$this->exceptionHandler->handleException(
$ex,
'badEntityId',
__METHOD__ . ': ' . 'Failed to parse entity ID: ' .
$row->pp_value . ' at page ' .
$row->pp_page
);
}
}
return $entityPerPage;
}
}