| Current File : /home/jvzmxxx/wiki1/extensions/Wikibase/repo/maintenance/dumpEntities.php |
<?php
namespace Wikibase;
use Maintenance;
use MWException;
use Wikibase\DataModel\Entity\BasicEntityIdParser;
use Wikibase\Dumpers\DumpGenerator;
use Wikibase\Lib\Reporting\ExceptionHandler;
use Wikibase\Lib\Reporting\ObservableMessageReporter;
use Wikibase\Lib\Reporting\ReportingExceptionHandler;
use Wikibase\Repo\Disposable;
use Wikibase\Repo\IO\EntityIdReader;
use Wikibase\Repo\IO\LineReader;
use Wikibase\Repo\Store\EntityIdPager;
use Wikibase\Repo\Store\EntityPerPage;
use Wikibase\Repo\Store\SQL\EntityPerPageIdPager;
$basePath = getenv( 'MW_INSTALL_PATH' ) !== false ? getenv( 'MW_INSTALL_PATH' ) : __DIR__ . '/../../../..';
require_once $basePath . '/maintenance/Maintenance.php';
/**
* Maintenance script for generating a dump of entities in the repository.
*
* @since 0.5
*
* @license GPL-2.0+
* @author Daniel Kinzler
*/
abstract class DumpScript extends Maintenance {
/**
* @var EntityPerPage
*/
private $entityPerPage;
/**
* @var bool|resource
*/
private $logFileHandle = false;
public function __construct() {
parent::__construct();
$this->addDescription( 'Generate a JSON dump from entities in the repository.' );
$this->addOption( 'list-file', "A file containing one entity ID per line.", false, true );
$this->addOption( 'entity-type', "Only dump this kind of entity, e.g. `item` or `property`.", false, true );
$this->addOption( 'sharding-factor', "The number of shards (must be >= 1)", false, true );
$this->addOption( 'shard', "The shard to output (must be less than the sharding-factor)", false, true );
$this->addOption( 'batch-size', "The number of entities per processing batch", false, true );
$this->addOption( 'output', "Output file (default is stdout). Will be overwritten.", false, true );
$this->addOption( 'log', "Log file (default is stderr). Will be appended.", false, true );
$this->addOption( 'quiet', "Disable progress reporting", false, false );
$this->addOption( 'limit', "Limit how many entities are dumped.", false, true );
}
public function setDumpEntitiesServices( EntityPerPage $entityPerPage ) {
$this->entityPerPage = $entityPerPage;
}
/**
* Create concrete dumper instance
* @param resource $output
* @return DumpGenerator
*/
abstract protected function createDumper( $output );
/**
* Outputs a message vis the output() method.
*
* @see MessageReporter::logMessage()
*
* @param string $message
*/
public function logMessage( $message ) {
if ( $this->logFileHandle ) {
fwrite( $this->logFileHandle, "$message\n" );
fflush( $this->logFileHandle );
} else {
$this->output( "$message\n" );
}
}
/**
* Opens the given file for use by logMessage().
*
* @param string $file use "-" as a shortcut for "php://stdout"
*
* @throws MWException
*/
private function openLogFile( $file ) {
$this->closeLogFile();
if ( $file === '-' ) {
$file = 'php://stdout';
}
// wouldn't streams be nice...
$this->logFileHandle = fopen( $file, 'a' );
if ( !$this->logFileHandle ) {
throw new MWException( 'Failed to open log file: ' . $file );
}
}
/**
* Closes any currently open file opened with openLogFile().
*/
private function closeLogFile() {
if ( $this->logFileHandle
&& $this->logFileHandle !== STDERR
&& $this->logFileHandle !== STDOUT
) {
fclose( $this->logFileHandle );
}
$this->logFileHandle = false;
}
/**
* Do the actual work. All child classes will need to implement this
*/
public function execute() {
//TODO: more validation for options
$entityType = $this->getOption( 'entity-type' );
$shardingFactor = (int)$this->getOption( 'sharding-factor', 1 );
$shard = (int)$this->getOption( 'shard', 0 );
$batchSize = (int)$this->getOption( 'batch-size', 100 );
$limit = (int)$this->getOption( 'limit', 0 );
//TODO: Allow injection of an OutputStream for logging
$this->openLogFile( $this->getOption( 'log', 'php://stderr' ) );
$outFile = $this->getOption( 'output', 'php://stdout' );
if ( $outFile === '-' ) {
$outFile = 'php://stdout';
}
$output = fopen( $outFile, 'w' ); //TODO: Allow injection of an OutputStream
if ( !$output ) {
throw new MWException( 'Failed to open ' . $outFile . '!' );
}
if ( $this->hasOption( 'list-file' ) ) {
$this->logMessage( "Dumping entities listed in " . $this->getOption( 'list-file' ) );
}
if ( $entityType ) {
$this->logMessage( "Dumping entities of type $entityType" );
}
if ( $shardingFactor ) {
$this->logMessage( "Dumping shard $shard/$shardingFactor" );
}
$dumper = $this->createDumper( $output );
$dumper->setLimit( $limit );
$progressReporter = new ObservableMessageReporter();
$progressReporter->registerReporterCallback( array( $this, 'logMessage' ) );
$dumper->setProgressReporter( $progressReporter );
$exceptionReporter = new ReportingExceptionHandler( $progressReporter );
$dumper->setExceptionHandler( $exceptionReporter );
//NOTE: we filter for $entityType twice: filtering in the DB is efficient,
// but filtering in the dumper is needed when working from a list file.
$dumper->setShardingFilter( $shardingFactor, $shard );
$dumper->setEntityTypeFilter( $entityType );
$dumper->setBatchSize( $batchSize );
$idStream = $this->makeIdStream( $entityType, $exceptionReporter );
\MediaWiki\suppressWarnings();
$dumper->generateDump( $idStream );
\MediaWiki\restoreWarnings();
if ( $idStream instanceof Disposable ) {
// close stream / free resources
$idStream->dispose();
}
$this->closeLogFile();
}
/**
* @param null|string $entityType
* @param ExceptionHandler|null $exceptionReporter
*
* @return EntityIdPager a stream of EntityId objects
*/
private function makeIdStream( $entityType = null, ExceptionHandler $exceptionReporter = null ) {
$listFile = $this->getOption( 'list-file' );
if ( $listFile !== null ) {
$stream = $this->makeIdFileStream( $listFile, $exceptionReporter );
} else {
$stream = $this->makeIdQueryStream( $entityType );
}
return $stream;
}
/**
* Returns EntityPerPage::NO_REDIRECTS.
*
* @return mixed a EntityPerPage::XXX_REDIRECTS constant
*/
protected function getRedirectMode() {
return EntityPerPage::NO_REDIRECTS;
}
/**
* @param string|null $entityType
*
* @return EntityIdPager
*/
private function makeIdQueryStream( $entityType ) {
$stream = new EntityPerPageIdPager( $this->entityPerPage, $entityType, $this->getRedirectMode() );
return $stream;
}
/**
* @param string $listFile
* @param ExceptionHandler|null $exceptionReporter
*
* @throws MWException
* @return EntityIdPager
*/
private function makeIdFileStream( $listFile, ExceptionHandler $exceptionReporter = null ) {
$input = fopen( $listFile, 'r' );
if ( !$input ) {
throw new MWException( "Failed to open ID file: $input" );
}
$stream = new EntityIdReader( new LineReader( $input ), new BasicEntityIdParser() );
$stream->setExceptionHandler( $exceptionReporter );
return $stream;
}
}