Current File : /home/jvzmxxx/wiki1/extensions/Flow/maintenance/FlowRemoveOldTopics.php
<?php

use Flow\Container;
use Flow\Data\BufferedCache;
use Flow\Data\ManagerGroup;
use Flow\Data\Utils\RawSql;
use Flow\DbFactory;
use Flow\Model\AbstractRevision;
use Flow\Model\Header;
use Flow\Model\PostRevision;
use Flow\Model\UUID;
use Flow\Model\Workflow;
use Flow\Repository\TreeRepository;

require_once ( getenv( 'MW_INSTALL_PATH' ) !== false
	? getenv( 'MW_INSTALL_PATH' ) . '/maintenance/Maintenance.php'
	: dirname( __FILE__ ) . '/../../../maintenance/Maintenance.php' );

/**
 * @ingroup Maintenance
 */
class FlowRemoveOldTopics extends Maintenance {
	/**
	 * @var bool
	 */
	protected $dryRun = false;

	/**
	 * @var ManagerGroup
	 */
	protected $storage;

	/**
	 * @var TreeRepository
	 */
	protected $treeRepo;

	/**
	 * @var DbFactory
	 */
	protected $dbFactory;

	/**
	 * @var BufferedCache
	 */
	protected $cache;

	public function __construct() {
		parent::__construct();

		$this->mDescription = "Deletes old topics";

		$this->addOption( 'date', 'Date cutoff (in any format understood by wfTimestamp), topics older than this date will be deleted.', true, true );
		$this->addOption( 'dryrun', 'Simulate script run, without actually deleting anything' );

		$this->setBatchSize( 10 );
	}

	public function execute() {
		$this->dryRun = $this->getOption( 'dryrun', false );
		$this->storage = Container::get( 'storage' );
		$this->treeRepo = Container::get( 'repository.tree' );
		$this->dbFactory = Container::get( 'db.factory' );
		$this->cache = Container::get( 'memcache.local_buffered' );

		$timestamp = wfTimestamp( TS_MW, $this->getOption( 'date' ) );

		$this->removeHeader( $timestamp );
		// remove topics that are older than the given timestamp
		$this->removeTopics( $timestamp );
		// remove topics that have more recent updates, but only from Flow talk
		// page manager
		$this->removeTopicsWithFlowUpdates( $timestamp );
	}

	protected function removeHeader( $timestamp ) {
		$dbr = $this->dbFactory->getDB( DB_SLAVE );

		// we don't store a timestamp with revisions - the id also holds date
		// info, so that's what we should compare against
		$endId = UUID::getComparisonUUID( $timestamp );

		// start from around unix epoch - there can be no Flow data before that
		$startId = UUID::getComparisonUUID( '1' );
		do {
			/** @var Header[] $revisions */
			$revisions = $this->storage->find(
				'Header',
				array(
					'rev_user_wiki' => wfWikiID(),
					'rev_type' => 'header',
					new RawSql( 'rev_id > ' . $dbr->addQuotes( $startId->getBinary() ) ),
					new RawSql( 'rev_id < ' . $dbr->addQuotes( $endId->getBinary() ) ),
					// only fetch original post at this point: we still need to
					// narrow down the results
					'rev_parent_id' => null,
				),
				array(
					'limit' => $this->mBatchSize,
					'sort' => 'rev_id',
					'order' => 'ASC',
				)
			);

			if ( empty( $revisions ) ) {
				break;
			}

			// prepare for next batch, which will start at this
			/** @var UUID $startId */
			$startId = end( $revisions )->getRevisionId();

			// we've now found all first revisions prior to a certain date, but we
			// don't want to remove those that have revisions after that date cutoff
			// (we don't want to break history)
			// let's see if any has revisions more recent than timestamp
			$conds = array();
			$uuids = array();
			foreach ( $revisions as $revision ) {
				// keep track of UUIDs we may want to delete
				$uuids[$revision->getCollectionId()->getAlphadecimal()] = $revision->getCollectionId();

				$conds[] = array(
					'rev_user_wiki' => wfWikiID(),
					'rev_type' => 'header',
					new RawSql( 'rev_id >= ' . $dbr->addQuotes( $endId->getBinary() ) ),
					'rev_type_id' => $revision->getCollectionId()->getBinary(),
				);
			}

			/** @var Header[] $recent */
			$recent = $this->storage->findMulti( 'Header', $conds, array( 'limit' => 1 ) );

			// now exclude collection ids where there's a revision that is more
			// recent than the timestamp cutoff
			foreach ( $recent as $revisions ) {
				foreach ( $revisions as $revision ) {
					unset( $uuids[$revision->getCollectionId()->getAlphadecimal()] );
				}
			}

			// by now, there may be nothing left to remove, so move on to the
			// next batch...
			if ( empty( $uuids ) ) {
				continue;
			}

			$revisions = $this->storage->find(
				'Header',
				array(
					'rev_user_wiki' => wfWikiID(),
					'rev_type' => 'header',
					'rev_type_id' => UUID::convertUUIDs( $uuids ),
				)
			);

			$this->output( 'Removing ' . count( $revisions ) . ' header revisions from ' . count( $uuids ) . ' headers (up to ' . $startId->getTimestamp() . ")\n" );

			$this->dbFactory->getDB( DB_MASTER )->begin();
			$this->cache->begin();

			foreach ( $revisions as $revision ) {
				$this->removeReferences( $revision );
			}

			$this->multiRemove( $revisions );

			if ( $this->dryRun ) {
				$this->dbFactory->getDB( DB_MASTER )->rollback();
				$this->cache->rollback();
			} else {
				$this->dbFactory->getDB( DB_MASTER )->commit();
				$this->cache->commit();
				$this->dbFactory->waitForSlaves();
			}
		} while ( !empty( $revisions ) );
	}

	/**
	 * @param string $timestamp Timestamp in TS_MW format
	 * @throws \Flow\Exception\FlowException
	 */
	protected function removeTopics( $timestamp ) {
		$dbr = $this->dbFactory->getDB( DB_SLAVE );

		// start from around unix epoch - there can be no Flow data before that
		$startId = UUID::getComparisonUUID( '1' );
		do {
			$workflows = $this->storage->find(
				'Workflow',
				array(
					new RawSql( 'workflow_id > ' . $dbr->addQuotes( $startId->getBinary() ) ),
					'workflow_wiki' => wfWikiID(),
					'workflow_type' => 'topic',
					new RawSql( 'workflow_last_update_timestamp < ' . $dbr->addQuotes( $timestamp ) ),
				),
				array(
					'limit' => $this->mBatchSize,
					'sort' => 'workflow_id',
					'order' => 'ASC',
				)
			);

			if ( empty( $workflows ) ) {
				break;
			}

			// prepare for next batch
			/** @var UUID $startId */
			$startId = end( $workflows )->getId();

			$this->output( 'Removing ' . count( $workflows ) . ' topic workflows (up to ' . $startId->getTimestamp() . ")\n" );
			$this->removeWorkflows( $workflows );
		} while ( !empty( $workflows ) );
	}

	/**
	 * @param string $timestamp Timestamp in TS_MW format
	 * @throws DBUnexpectedError
	 * @throws \Flow\Exception\FlowException
	 */
	protected function removeTopicsWithFlowUpdates( $timestamp ) {
		$dbr = $this->dbFactory->getDB( DB_SLAVE );
		$talkpageManager = FlowHooks::getOccupationController()->getTalkpageManager();

		// start from around unix epoch - there can be no Flow data before that
		$batchStartId = UUID::getComparisonUUID( '1' );

		// we only care about revisions since cutoff here
		$cutoffStartId = UUID::getComparisonUUID( $timestamp );

		do {
			$workflowIds = $dbr->selectFieldValues(
				array( 'flow_workflow', 'flow_tree_node', 'flow_revision' ),
				'workflow_id',
				array(
					// revisions more recent than cutoff time
					'rev_id > ' . $dbr->addQuotes( $cutoffStartId->getBinary() ),
					// workflow_id condition is only used to batch, the exact
					// $batchStartId otherwise doesn't matter (unlike rev_id)
					'workflow_id > ' . $dbr->addQuotes( $batchStartId->getBinary() ),
					'workflow_wiki' => wfWikiID(),
					'workflow_type' => 'topic',
					'workflow_last_update_timestamp >= ' . $dbr->addQuotes( $timestamp ),
				),
				__METHOD__,
				array(
					'LIMIT' => $this->mBatchSize,
					'ORDER BY' => 'workflow_id ASC',
					// we only want to find topics that were only altered by talk
					// page manager: as long as anyone else edited any post, we're
					// not interested in it
					'GROUP BY' => 'workflow_id',
					'HAVING' => array( 'GROUP_CONCAT(DISTINCT rev_user_id)' => $talkpageManager->getId() ),
				),
				array(
					'flow_tree_node' => array( 'INNER JOIN', array( 'tree_ancestor_id = workflow_id' ) ),
					'flow_revision' => array( 'INNER JOIN', array( 'rev_type_id = tree_descendant_id' ) ),
				)
			);

			if ( empty( $workflowIds ) ) {
				break;
			}

			$workflows = $this->storage->getMulti( 'Workflow', $workflowIds );

			// prepare for next batch
			/** @var UUID $batchStartId */
			$batchStartId = end( $workflows )->getId();

			$this->output( 'Removing ' . count( $workflows ) . ' topic workflows with recent Flow updates (up to ' . $batchStartId->getTimestamp() . ")\n" );
			$this->removeWorkflows( $workflows );
		} while ( !empty( $workflows ) );
	}

	/**
	 * @param Workflow[] $workflows
	 * @throws DBUnexpectedError
	 */
	protected function removeWorkflows( array $workflows ) {
		$this->dbFactory->getDB( DB_MASTER )->begin();
		$this->cache->begin();

		foreach ( $workflows as $workflow ) {
			$this->removeSummary( $workflow );
			$this->removePosts( $workflow );
			$this->removeTopicList( $workflow );
		}

		$this->multiRemove( $workflows );

		if ( $this->dryRun ) {
			$this->dbFactory->getDB( DB_MASTER )->rollback();
			$this->cache->rollback();
		} else {
			$this->dbFactory->getDB( DB_MASTER )->commit();
			$this->cache->commit();
			$this->dbFactory->waitForSlaves();
		}
	}

	protected function removeTopicList( Workflow $workflow ) {
		$entries = $this->storage->find( 'TopicListEntry', array( 'topic_id' => $workflow->getId() ) );
		if ( $entries ) {
			$this->output( 'Removing ' . count( $entries ) . " topiclist entries.\n" );
			$this->multiRemove( $entries );
		}
	}

	protected function removeSummary( Workflow $workflow ) {
		$revisions = $this->storage->find( 'PostSummary', array( 'rev_type_id' => $workflow->getId() ) );
		if ( $revisions ) {
			foreach ( $revisions as $revision ) {
				$this->removeReferences( $revision );
			}

			$this->output( 'Removing ' . count( $revisions ) . " summary revisions from 1 topic.\n" );
			$this->multiRemove( $revisions );
		}
	}

	/**
	 * @param UUID $parentId
	 * @param array $subtree
	 * @return array
	 */
	protected function sortSubtree( UUID $parentId, array $subtree ) {
		$flat = array();

		// first recursively process all children, so they come first in $flat
		foreach ( $subtree['children'] as $id => $data ) {
			$flat = array_merge(
				$flat,
				$this->sortSubtree( UUID::create( $id ), $data )
			);
		}

		// then add parent, which should come last in $flat
		$flat[] = $parentId;

		return $flat;
	}

	protected function removePosts( Workflow $workflow ) {
		// fetch all children (posts) from a topic & reverse-sort all the posts:
		// deepest-nested children should come first, parents last
		$subtree = $this->treeRepo->fetchSubtree( $workflow->getId() );
		$uuids = $this->sortSubtree( $workflow->getId(), $subtree );

		$conds = array();
		foreach ( $uuids as $id ) {
			$conds[] = array( 'rev_type_id' => $id );
		}

		$posts = $this->storage->findMulti( 'PostRevision', $conds );
		$count = 0;
		foreach ( $posts as $revisions ) {
			/** @var PostRevision[] $revisions */
			foreach ( $revisions as $revision ) {
				$this->removeReferences( $revision );
			}

			$count += count( $revisions );
			$this->multiRemove( $revisions );

			foreach ( $revisions as $revision ) {
				$this->treeRepo->delete( $revision->getCollectionId() );
			}
		}
		$this->output( 'Removing ' . $count . ' post revisions from ' . count( $posts ) . " posts.\n" );
	}

	protected function removeReferences( AbstractRevision $revision ) {
		$wikiReferences = $this->storage->find( 'WikiReference', array(
			'ref_src_wiki' => wfWikiID(),
			'ref_src_object_type' => $revision->getRevisionType(),
			'ref_src_object_id' => $revision->getCollectionId(),
		) );
		if ( $wikiReferences ) {
			$this->output( 'Removing ' . count( $wikiReferences ) . " wiki references from 1 revision.\n" );
			$this->multiRemove( $wikiReferences );
		}

		$urlReferences = $this->storage->find( 'URLReference', array(
			'ref_src_wiki' => wfWikiID(),
			'ref_src_object_type' => $revision->getRevisionType(),
			'ref_src_object_id' => $revision->getCollectionId(),
		) );
		if ( $urlReferences ) {
			$this->output( 'Removing ' . count( $urlReferences ) . " url references from 1 revision.\n" );
			$this->multiRemove( $urlReferences );
		}
	}

	protected function multiRemove( array $objects ) {
		$this->storage->multiRemove( $objects );
	}
}

$maintClass = 'FlowRemoveOldTopics';
require_once( RUN_MAINTENANCE_IF_MAIN );