| Current File : /home/jvzmxxx/wiki1/extensions/Flow/includes/Data/Storage/RevisionStorage.php |
<?php
namespace Flow\Data\Storage;
use DatabaseBase;
use ExternalStore;
use Flow\Data\Utils\Merger;
use Flow\Data\Utils\ResultDuplicator;
use Flow\Data\ObjectManager;
use Flow\DbFactory;
use Flow\Exception\DataModelException;
use Flow\Model\UUID;
use MWException;
/**
* Abstract storage implementation for models extending from AbstractRevision
*/
abstract class RevisionStorage extends DbStorage {
/**
* {@inheritDoc}
*/
protected $allowedUpdateColumns = array(
'rev_mod_state',
'rev_mod_user_id',
'rev_mod_user_ip',
'rev_mod_user_wiki',
'rev_mod_timestamp',
'rev_mod_reason',
);
/**
* {@inheritDoc}
*
* @Todo - This may not be necessary anymore since we don't update historical
* revisions ( flow_revision ) during moderation
*/
protected $obsoleteUpdateColumns = array (
'tree_orig_user_text',
'rev_user_text',
'rev_edit_user_text',
'rev_mod_user_text',
'rev_type_id',
);
protected $externalStore;
/**
* Get the table to join for the revision storage, empty string for none
* @return string
*/
protected function joinTable() {
return '';
}
/**
* Get the column to join with flow_revision.rev_id, empty string for none
* @return string
*/
protected function joinField() {
return '';
}
/**
* Insert to joinTable() upon revision insert
* @param array $row
* @return array
*/
protected function insertRelated( array $row ) {
return $row;
}
/**
* Update to joinTable() upon revision update
* @param array $changes
* @param array $old
* @return array
*/
protected function updateRelated( array $changes, array $old ) {
return $changes;
}
/**
* Remove from joinTable upone revision delete
* @param array $row
* @return bool
*/
protected function removeRelated( array $row ) {
return true;
}
/**
* The revision type
* @return string
*/
abstract protected function getRevType();
/**
* @param DbFactory $dbFactory
* @param array|false $externalStore List of external store servers available for insert
* or false to disable. See $wgFlowExternalStore.
*/
public function __construct( DbFactory $dbFactory, $externalStore ) {
parent::__construct( $dbFactory );
$this->externalStore = $externalStore;
}
// Find one by specific attributes
// @todo: this method can probably be generalized in parent class?
public function find( array $attributes, array $options = array() ) {
$multi = $this->findMulti( array( $attributes ), $options );
return $multi ? reset( $multi ) : array();
}
/**
* @param array $attributes
* @param array $options
* @return array
* @throws DataModelException
* @throws MWException
*/
protected function findInternal( array $attributes, array $options = array() ) {
$dbr = $this->dbFactory->getDB( DB_SLAVE );
if ( !$this->validateOptions( $options ) ) {
throw new MWException( "Validation error in database options" );
}
// Add rev_type if rev_type_id exists in query condition
$attributes = $this->addRevTypeToQuery( $attributes );
$tables = array( 'rev' => 'flow_revision' );
$joins = array();
if ( $this->joinTable() ) {
$tables[] = $this->joinTable();
$joins = array( 'rev' => array( 'JOIN', $this->joinField() . ' = rev_id' ) );
}
$res = $dbr->select(
$tables, '*', $this->preprocessSqlArray( $attributes ), __METHOD__, $options, $joins
);
if ( $res === false ) {
throw new DataModelException( __METHOD__ . ': Query failed: ' . $dbr->lastError(), 'process-data' );
}
$retval = array();
foreach ( $res as $row ) {
$row = UUID::convertUUIDs( (array) $row, 'alphadecimal' );
$retval[$row['rev_id']] = $row;
}
return $retval;
}
protected function addRevTypeToQuery( $query ) {
if ( isset( $query['rev_type_id'] ) ) {
$query['rev_type'] = $this->getRevType();
}
return $query;
}
public function findMulti( array $queries, array $options = array() ) {
if ( count( $queries ) < 3 ) {
$res = $this->fallbackFindMulti( $queries, $options );
} else {
$res = $this->findMultiInternal( $queries, $options );
}
return self::mergeExternalContent( $res );
}
protected function fallbackFindMulti( array $queries, array $options ) {
$result = array();
foreach ( $queries as $key => $attributes ) {
$result[$key] = $this->findInternal( $attributes, $options );
}
return $result;
}
protected function findMultiInternal( array $queries, array $options = array() ) {
$queriedKeys = array_keys( reset( $queries ) );
// The findMulti doesn't map well to SQL, basically we are asking to answer a bunch
// of queries. We can optimize those into a single query in a few select instances:
if ( isset( $options['LIMIT'] ) && $options['LIMIT'] == 1 ) {
// Find by primary key
if ( $options == array( 'LIMIT' => 1 ) &&
$queriedKeys === array( 'rev_id' )
) {
return $this->findRevId( $queries );
}
// Find most recent revision of a number of posts
if ( !isset( $options['OFFSET'] ) &&
$queriedKeys == array( 'rev_type_id' ) &&
isset( $options['ORDER BY'] ) &&
$options['ORDER BY'] === array( 'rev_id DESC' )
) {
return $this->findMostRecent( $queries );
}
}
// Fetch a list of revisions for each post
// @todo this is slow and inefficient. Mildly better solution would be if
// the index can ask directly for just the list of rev_id instead of whole rows,
// but would still have the need to run a bunch of queries serially.
if ( count( $options ) === 2 &&
isset( $options['LIMIT'], $options['ORDER BY'] ) &&
$options['ORDER BY'] === array( 'rev_id DESC' )
) {
return $this->fallbackFindMulti( $queries, $options );
// unoptimizable query
} else {
wfDebugLog( 'Flow', __METHOD__
. ': Unoptimizable query for keys: '
. implode( ',', array_keys( $queriedKeys ) )
. ' with options '
. \FormatJson::encode( $options )
);
return $this->fallbackFindMulti( $queries, $options );
}
}
protected function findRevId( array $queries ) {
$duplicator = new ResultDuplicator( array( 'rev_id' ), 1 );
$pks = array();
foreach ( $queries as $idx => $query ) {
$query = UUID::convertUUIDs( (array) $query, 'alphadecimal' );
$duplicator->add( $query, $idx );
$id = $query['rev_id'];
$pks[$id] = UUID::create( $id )->getBinary();
}
return $this->findRevIdReal( $duplicator, $pks );
}
protected function findMostRecent( array $queries ) {
// SELECT MAX( rev_id ) AS rev_id
// FROM flow_tree_revision
// WHERE rev_type= 'post' AND rev_type_id IN (...)
// GROUP BY rev_type_id
$duplicator = new ResultDuplicator( array( 'rev_type_id' ), 1 );
foreach ( $queries as $idx => $query ) {
$query = UUID::convertUUIDs( (array) $query, 'alphadecimal' );
$duplicator->add( $query, $idx );
}
$dbr = $this->dbFactory->getDB( DB_SLAVE );
$res = $dbr->select(
array( 'flow_revision' ),
array( 'rev_id' => "MAX( 'rev_id' )" ),
array( 'rev_type' => $this->getRevType() ) + $this->preprocessSqlArray( $this->buildCompositeInCondition( $dbr, $duplicator->getUniqueQueries() ) ),
__METHOD__,
array( 'GROUP BY' => 'rev_type_id' )
);
if ( $res === false ) {
throw new DataModelException( __METHOD__ . ': Query failed: ' . $dbr->lastError(), 'process-data' );
}
$revisionIds = array();
foreach ( $res as $row ) {
$revisionIds[] = $row->rev_id;
}
// Due to the grouping and max, we cant reliably get a full
// columns info in the above query, forcing the join below
// rather than just querying flow_revision.
return $this->findRevIdReal( $duplicator, $revisionIds );
}
/**
* @param ResultDuplicator $duplicator
* @param array $revisionIds Binary strings representing revision uuid's
* @return array
* @throws DataModelException
*/
protected function findRevIdReal( ResultDuplicator $duplicator, array $revisionIds ) {
if ( $revisionIds ) {
// SELECT * from flow_revision
// JOIN flow_tree_revision ON tree_rev_id = rev_id
// WHERE rev_id IN (...)
$dbr = $this->dbFactory->getDB( DB_SLAVE );
$tables = array( 'flow_revision' );
$joins = array();
if ( $this->joinTable() ) {
$tables['rev'] = $this->joinTable();
$joins = array( 'rev' => array( 'JOIN', "rev_id = " . $this->joinField() ) );
}
$res = $dbr->select(
$tables,
'*',
array( 'rev_id' => $revisionIds ),
__METHOD__,
array(),
$joins
);
if ( $res === false ) {
throw new DataModelException( __METHOD__ . ': Query failed: ' . $dbr->lastError(), 'process-data' );
}
foreach ( $res as $row ) {
$row = UUID::convertUUIDs( (array)$row, 'alphadecimal' );
$duplicator->merge( $row, array( $row ) );
}
}
return $duplicator->getResult();
}
/**
* Handle the injection of externalstore data into a revision
* row. All rows exiting this method will have rev_content_url
* set to either null or the external url. The rev_content
* field will be the final content (possibly compressed still)
*
* @param array $cacheResult 2d array of rows
* @return array 2d array of rows with content merged and rev_content_url populated
*/
public static function mergeExternalContent( array $cacheResult ) {
foreach ( $cacheResult as &$source ) {
if ( $source === null ) {
// unanswered queries return null
continue;
}
foreach ( $source as &$row ) {
$flags = explode( ',', $row['rev_flags'] );
if ( in_array( 'external', $flags ) ) {
$row['rev_content_url'] = $row['rev_content'];
$row['rev_content'] = '';
} else {
$row['rev_content_url'] = null;
}
}
}
return Merger::mergeMulti(
$cacheResult,
/* fromKey = */ 'rev_content_url',
/* callable = */ array( 'ExternalStore', 'batchFetchFromURLs' ),
/* name = */ 'rev_content',
/* default = */ ''
);
}
protected function buildCompositeInCondition( DatabaseBase $dbr, array $queries ) {
$keys = array_keys( reset( $queries ) );
$conditions = array();
if ( count( $keys ) === 1 ) {
// standard in condition: tree_rev_descendant_id IN (1,2...)
$key = reset( $keys );
foreach ( $queries as $query ) {
$conditions[$key][] = reset( $query );
}
return $conditions;
} else {
// composite in condition: ( foo = 1 AND bar = 2 ) OR ( foo = 1 AND bar = 3 )...
// Could be more efficient if composed as a range scan, but seems more complex than
// its benefit.
foreach ( $queries as $query ) {
$conditions[] = $dbr->makeList( $query, LIST_AND );
}
return $dbr->makeList( $conditions, LIST_OR );
}
}
public function insert( array $rows ) {
if ( ! is_array( reset( $rows ) ) ) {
$rows = array( $rows );
}
// Holds the subset of the row to go into the revision table
$revisions = array();
foreach( $rows as $key => $row ) {
$row = $this->processExternalStore( $row );
$revisions[$key] = $this->splitUpdate( $row, 'rev' );
}
$dbw = $this->dbFactory->getDB( DB_MASTER );
$res = $dbw->insert(
'flow_revision',
$this->preprocessNestedSqlArray( $revisions ),
__METHOD__
);
if ( !$res ) {
// throw exception?
return false;
}
return $this->insertRelated( $rows );
}
/**
* Checks whether updating content for an existing revision is allowed.
* This is only needed for rare actions like fixing XSS. Normally a new revision
* is made.
*
* Will throw if column configuration is not consistent
*
* @return bool True if and only if updating existing content is allowed
* @throws DataModelException
*/
public function isUpdatingExistingRevisionContentAllowed() {
// All of these are required to do a consistent mechanical update.
$requiredColumnNames = array(
'rev_content',
'rev_content_length',
'rev_flags',
'rev_previous_content_length',
);
// compare required column names against allowedUpdateColumns
$diff = array_diff( $requiredColumnNames, $this->allowedUpdateColumns );
// we're able to update all columns we need: go ahead!
if ( empty( $diff ) ) {
return true;
}
// we're only able to update part of the columns required to update content
if ( $diff !== $requiredColumnNames ) {
throw new DataModelException( "Allowed update column configuration is inconsistent", 'allowed-update-inconsistent' );
}
// content changes aren't allowed
return false;
}
// If this is a new row (new rows should always have content) or part of an update
// involving a content change, inserts into external store.
protected function processExternalStore( array $row ) {
// Check if we need to insert new content
if (
$this->externalStore &&
isset( $row['rev_content'] )
) {
$row = $this->insertExternalStore( $row );
}
// If a content url is available store that in the db
// instead of real content.
if ( isset( $row['rev_content_url'] ) ) {
$row['rev_content'] = $row['rev_content_url'];
}
unset( $row['rev_content_url'] );
return $row;
}
protected function insertExternalStore( array $row ) {
if ( $row['rev_content'] === null ) {
throw new DataModelException( "Must have data to write to external storage", 'process-data' );
}
$url = ExternalStore::insertWithFallback( $this->externalStore, $row['rev_content'] );
if ( !$url ) {
throw new DataModelException( "Unable to store text to external storage", 'process-data' );
}
$row['rev_content_url'] = $url;
if ( isset( $row['rev_flags'] ) && $row['rev_flags'] ) {
$row['rev_flags'] .= ',external';
} else {
$row['rev_flags'] = 'external';
}
return $row;
}
/**
* Gets the required updates. Any changes to External Store will be reflected in
* the returned array.
*
* @param array $old Associative array mapping prior columns to old values
* @param array $new Associative array mapping updated columns to new values
*
* @return array Validated change set as associative array, mapping columns to
* change to their new values
*/
public function calcUpdates( array $old, array $new ) {
// First, see if there are any changes to content at all.
// If not, processExternalStore will know not to insert a useless row for
// unchanged content (if updating content is allowed).
$unvalidatedChangeset = ObjectManager::calcUpdatesWithoutValidation( $old, $new );
// We check here so if it's not allowed, we don't insert a wasted External
// Store entry, then throw an exception in the parent calcUpdates.
if ( $this->isUpdatingExistingRevisionContentAllowed() ) {
$unvalidatedChangeset = $this->processExternalStore( $unvalidatedChangeset );
}
// The parent calcUpdates does the validation that we're not changing a non-allowed
// field, regardless of whether explicitly passed in, or done by processExternalStore.
$validatedChangeset = parent::calcUpdates( array(), $unvalidatedChangeset );
return $validatedChangeset;
}
// This is to *UPDATE* a revision. It should hardly ever be used.
// For the most part should insert a new revision. This should only be called
// by maintenance scripts and (future) suppression features.
//
// It supports updating content, which is only intended for required mechanical
// transformations, such as XSS fixes. However, since this is only intended for
// maintenance scripts, these columns must first be temporarily added to
// allowedUpdateColumns.
public function update( array $old, array $new ) {
$changeSet = $this->calcUpdates( $old, $new );
$rev = $this->splitUpdate( $changeSet, 'rev' );
if ( $rev ) {
$dbw = $this->dbFactory->getDB( DB_MASTER );
$res = $dbw->update(
'flow_revision',
$this->preprocessSqlArray( $rev ),
$this->preprocessSqlArray( array( 'rev_id' => $old['rev_id'] ) ),
__METHOD__
);
if ( !( $res && $dbw->affectedRows() ) ) {
return false;
}
}
return (bool) $this->updateRelated( $changeSet, $old );
}
// Revisions can only be removed for LIMITED circumstances, in almost all cases
// the offending revision should be updated with appropriate suppression.
// Also note this doesnt delete the whole post, it just deletes the revision.
// The post will *always* exist in the tree structure, it will just show up as
// [deleted] or something
public function remove( array $row ) {
$res = $this->dbFactory->getDB( DB_MASTER )->delete(
'flow_revision',
$this->preprocessSqlArray( array( 'rev_id' => $row['rev_id'] ) ),
__METHOD__
);
if ( !$res ) {
return false;
}
return $this->removeRelated( $row );
}
/**
* Used to locate the index for a query by ObjectLocator::get()
*/
public function getPrimaryKeyColumns() {
return array( 'rev_id' );
}
/**
* When retrieving revisions from DB, self::mergeExternalContent will be
* called to fetch the content. This could fail, resulting in the content
* being a 'false' value.
*
* {@inheritDoc}
*/
public function validate( array $row ) {
return !isset( $row['rev_content'] ) || $row['rev_content'] !== false;
}
/**
* Gets all columns from $row that start with a given prefix and omits other
* columns.
*
* @param array $row Rows to split
* @param string[optional] $prefix
* @return array Remaining rows
*/
protected function splitUpdate( array $row, $prefix = 'rev' ) {
$rev = array();
foreach ( $row as $key => $value ) {
$keyPrefix = strstr( $key, '_', true );
if ( $keyPrefix === $prefix ) {
$rev[$key] = $value;
}
}
return $rev;
}
}