| Current File : /home/jvzmxxx/wiki1/extensions/Flow/includes/Conversion/Utils.php |
<?php
namespace Flow\Conversion;
use DOMDocument;
use DOMNode;
use FauxResponse;
use Flow\Container;
use Flow\Exception\FlowException;
use Flow\Exception\NoParserException;
use Flow\Exception\WikitextException;
use Language;
use Linker;
use MultiHttpClient;
use OutputPage;
use RequestContext;
use Sanitizer;
use Title;
use User;
use VirtualRESTServiceClient;
abstract class Utils {
/**
* Convert from/to wikitext <=> html or topic-title-wikitext => topic-title-html.
* Only these pairs are supported. html => wikitext requires Parsoid, and
* topic-title-html => topic-title-wikitext is not supported.
*
* @param string $from Format of content to convert: html|wikitext|topic-title-wikitext
* @param string $to Format to convert to: html|wikitext|topic-title-html
* @param string $content
* @param Title $title
* @return string
* @throws WikitextException When the requested conversion is unsupported
* @throws NoParserException When the conversion fails
*/
public static function convert( $from, $to, $content, Title $title ) {
if ( $from === $to || $content === '' ) {
return $content;
}
if ( $from === 'wt' ){
$from = 'wikitext';
}
if ( $from === 'wikitext' || $from === 'html' ) {
if ( $to === 'wikitext' || $to === 'html' ) {
if ( self::isParsoidConfigured() ) {
return self::parsoid( $from, $to, $content, $title );
} else {
return self::parser( $from, $to, $content, $title );
}
} else {
throw new WikitextException( "Conversion from '$from' to '$to' was requested, but this is not supported." );
}
} else {
return self::commentParser( $from, $to, $content );
}
}
/**
* Basic conversion of html to plaintext for use in recent changes, history,
* and other places where a roundtrip is undesired.
*
* @param string $html
* @param int|null $truncateLength Maximum length (including ellipses) or null for whole string.
* @param Language $lang Language to use for truncation. Defaults to $wgLang
* @return string plaintext
*/
public static function htmlToPlaintext( $html, $truncateLength = null, Language $lang = null ) {
/** @var Language $wgLang */
global $wgLang;
$plain = trim( html_entity_decode( strip_tags( $html ), ENT_QUOTES ) );
if ( $truncateLength === null ) {
return $plain;
} else {
$lang = $lang ?: $wgLang;
return $lang->truncate( $plain, $truncateLength );
}
}
/**
* Convert from/to wikitext/html via Parsoid/RESTBase.
*
* This will assume Parsoid/RESTBase is installed and configured.
*
* @param string $from Format of content to convert: html|wikitext
* @param string $to Format to convert to: html|wikitext
* @param string $content
* @param Title $title
* @return string
* @throws NoParserException When Parsoid/RESTBase operation fails
* @throws WikitextException When conversion is unsupported
*/
protected static function parsoid( $from, $to, $content, Title $title ) {
global $wgVersion;
$serviceClient = self::getServiceClient();
if ( $from !== 'html' && $from !== 'wikitext' ) {
throw new WikitextException( 'Unknown source format: ' . $from, 'process-wikitext' );
}
$prefixedDbTitle = $title->getPrefixedDBkey();
$params = array(
$from => $content,
'body_only' => 'true',
);
if ( $from === 'html' ) {
$params['scrub_wikitext'] = 'true';
}
$url = '/restbase/local/v1/transform/' . $from . '/to/' . $to . '/' .
urlencode( $prefixedDbTitle );
$request = array(
'method' => 'POST',
'url' => $url,
'body' => $params,
'headers' => array(
'Accept' => 'text/html; charset=utf-8; profile="https://www.mediawiki.org/wiki/Specs/HTML/1.2.1"',
'User-Agent' => "Flow-MediaWiki/$wgVersion",
),
);
$response = $serviceClient->run( $request );
if ( $response['code'] !== 200 ) {
if ( $response['error'] !== '' ) {
$statusMsg = $response['error'];
} else {
$statusMsg = $response['code'];
}
$vrsInfo = $serviceClient->getMountAndService( '/restbase/' );
$serviceName = $vrsInfo[1] ? $vrsInfo[1]->getName() : 'VRS service';
$msg = "Request to " . $serviceName . " for \"$from\" to \"$to\" conversion of content connected to title \"$prefixedDbTitle\" failed: $statusMsg";
Container::get( 'default_logger' )->error(
'Request to {service} for "{sourceFormat}" to "{targetFormat}" conversion of content connected to title "{title}" failed. Code: {code}, Reason: "{reason}", Body: "{body}", Error: "{error}"',
array(
'service' => $serviceName,
'sourceFormat' => $from,
'targetFormat' => $to,
'title' => $prefixedDbTitle,
'code' => $response['code'],
'reason' => $response['reason'],
'error' => $response['error'], // This is sometimes/always empty string
'headers' => $response['headers'],
'body' => $response['body'],
'response' => $response,
)
);
throw new NoParserException( $msg, 'process-wikitext' );
}
$content = $response['body'];
// HACK remove trailing newline inserted by Parsoid (T106925)
if ( $to === 'wikitext' ) {
$content = preg_replace( '/\\n$/', '', $content );
}
return $content;
}
/**
* Convert from/to topic-title-wikitext/topic-title-html using Linker::formatLinksInComment
*
* @param string $from Format of content to convert: topic-title-wikitext
* @param string $to Format of content to convert to: topic-title-html
* @param string $content Content to convert, in topic-title-wikitext format.
* @return string $content in HTML
* @throws WikitextException
*/
protected static function commentParser( $from, $to, $content ) {
if (
$from !== 'topic-title-wikitext' ||
( $to !== 'topic-title-html' && $to !== 'topic-title-plaintext' )
) {
throw new WikitextException( "Conversion from '$from' to '$to' was requested, but this is not supported." );
}
$html = Linker::formatLinksInComment( Sanitizer::escapeHtmlAllowEntities( $content ) );
if ( $to === 'topic-title-plaintext' ) {
return self::htmlToPlaintext( $html );
} else {
return $html;
}
}
/**
* Convert from/to wikitext/html using Parser.
*
* This only supports wikitext to HTML.
*
* @param string $from Format of content to convert: wikitext
* @param string $to Format to convert to: html
* @param string $content
* @param Title $title
* @return string
* @throws WikitextException When the conversion is unsupported
*/
protected static function parser( $from, $to, $content, Title $title ) {
if ( $from !== 'wikitext' || $to !== 'html' ) {
throw new WikitextException( "Conversion from '$from' to '$to' was requested, but core's Parser only supports 'wikitext' to 'html' conversion", 'process-wikitext' );
}
global $wgParser;
$options = new \ParserOptions;
$options->setTidy( true );
$options->setEditSection( false );
$output = $wgParser->parse( $content, $title, $options );
return $output->getText();
}
/**
* Check to see whether a Parsoid or RESTBase service is configured.
*
* @return boolean
*/
public static function isParsoidConfigured() {
try {
self::getServiceClient();
return true;
} catch ( NoParserException $e ) {
return false;
}
}
/**
* @var VirtualRESTServiceClient
*/
protected static $serviceClient = null;
/**
* Returns Flow's Virtual REST Service for Parsoid/RESTBase.
* The Parsoid/RESTBase service will be mounted at /restbase/
* and will answer RESTBase v1 API requests.
*
* @return VirtualRESTServiceClient
* @throws NoParserException When Parsoid/RESTBase is unconfigured
*/
protected static function getServiceClient() {
if ( self::$serviceClient === null ) {
$sc = new VirtualRESTServiceClient( new MultiHttpClient( array() ) );
$sc->mount( '/restbase/', self::getVRSObject() );
self::$serviceClient = $sc;
}
return self::$serviceClient;
}
/**
* Creates the Virtual REST Service object to be used in Flow's
* API calls. The method determines whether to instantiate a
* ParsoidVirtualRESTService or a RestbaseVirtualRESTService
* object based on configuration directives: if
* `$wgVirtualRestConfig['modules']['restbase']` is defined,
* RESTBase is chosen; otherwise Parsoid is used.
* For backwards compatibility, $wgFlowParsoid* variables are used
* to specify a Parsoid configuration as a fall back.
*
* @return \VirtualRESTService the VirtualRESTService object to use
* @throws NoParserException When Parsoid/RESTBase is not configured
*/
private static function getVRSObject() {
global $wgVirtualRestConfig, $wgFlowParsoidURL, $wgFlowParsoidPrefix,
$wgFlowParsoidTimeout, $wgFlowParsoidForwardCookies,
$wgFlowParsoidHTTPProxy;
// the params array to create the service object with
$params = array();
// the VRS class to use; defaults to Parsoid
$class = 'ParsoidVirtualRESTService';
// the global virtual rest service config object, if any
$vrs = $wgVirtualRestConfig;
// HACK: don't use RESTbase because it'll drop data-parsoid, see T115236
/*if ( isset( $vrs['modules'] ) && isset( $vrs['modules']['restbase'] ) ) {
// if restbase is available, use it
$params = $vrs['modules']['restbase'];
$params['parsoidCompat'] = false; // backward compatibility
$class = 'RestbaseVirtualRESTService';
} else*/if ( isset( $vrs['modules'] ) && isset( $vrs['modules']['parsoid'] ) ) {
// there's a global parsoid config, use it next
$params = $vrs['modules']['parsoid'];
$params['restbaseCompat'] = true;
} else {
// no global modules defined, fall back to old defaults
if ( !$wgFlowParsoidURL ) {
throw new NoParserException( 'Flow Parsoid configuration is unavailable', 'process-wikitext' );
}
$params = array(
'URL' => $wgFlowParsoidURL,
'prefix' => $wgFlowParsoidPrefix,
'timeout' => $wgFlowParsoidTimeout,
'HTTPProxy' => $wgFlowParsoidHTTPProxy,
'forwardCookies' => $wgFlowParsoidForwardCookies
);
}
// merge the global and service-specific params
if ( isset( $vrs['global'] ) ) {
$params = array_merge( $vrs['global'], $params );
}
// set up cookie forwarding
if ( $params['forwardCookies'] && !User::isEveryoneAllowed( 'read' ) ) {
if ( PHP_SAPI === 'cli' ) {
// From the command line we need to generate a cookie
$params['forwardCookies'] = self::generateForwardedCookieForCli();
} else {
$params['forwardCookies'] = RequestContext::getMain()->getRequest()->getHeader( 'Cookie' );
}
} else {
$params['forwardCookies'] = false;
}
// create the VRS object and return it
return new $class( $params );
}
/**
* Turns given $content string into a DOMDocument object.
*
* Note that, by default, $content will be prefixed with <?xml encoding="utf-8"?> to force
* libxml to interpret the content as UTF-8. If for some reason you don't want this to happen,
* or you are certain that your input already has <?xml encoding="utf-8"?> or
* <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> , then you can disable
* this behavior by setting $utf8Fragment=false to disable this behavior.
*
* Some libxml errors are forgivable, libxml errors that aren't
* ignored will throw a WikitextException.
*
* The default error codes allowed are:
* 9 - allow illegal characters (they are removed, but this option means it
* doesn't trigger an error.
* 76 - allow unexpected end tag. This is typically old wikitext using deprecated tags.
* 513 - allow multiple tags with same id
* 801 - allow unrecognized tags like figcaption
*
* @param string $content
* @param boolean[optional] $utf8Fragment If true, prefix $content with <?xml encoding="utf-8"?>
* @param array[optional] $ignoreErrorCodes
* @return DOMDocument
* @throws WikitextException
* @see http://www.xmlsoft.org/html/libxml-xmlerror.html
*/
public static function createDOM( $content, $utf8Fragment = true, $ignoreErrorCodes = array( 9, 76, 513, 801 ) ) {
$dom = new DOMDocument();
// Otherwise the parser may attempt to load the dtd from an external source.
// See: https://www.mediawiki.org/wiki/XML_External_Entity_Processing
$loadEntities = libxml_disable_entity_loader( true );
// don't output warnings
$useErrors = libxml_use_internal_errors( true );
// Work around DOMDocument's morbid insistence on using iso-8859-1
// Even $dom = new DOMDocument( '1.0', 'utf-8' ); doesn't work, you have to specify
// encoding ="utf-8" in the string fed to loadHTML()
$html = ( $utf8Fragment ? '<?xml encoding="utf-8"?>' : '' ) . $content;
$dom->loadHTML( $html, LIBXML_PARSEHUGE );
libxml_disable_entity_loader( $loadEntities );
// check error codes; if not in the supplied list of ignorable errors,
// throw an exception
$errors = array_filter(
libxml_get_errors(),
function( $error ) use( $ignoreErrorCodes ) {
return !in_array( $error->code, $ignoreErrorCodes );
}
);
// restore libxml state before anything else
libxml_clear_errors();
libxml_use_internal_errors( $useErrors );
if ( $errors ) {
throw new WikitextException(
implode( "\n", array_map( function( $error ) { return $error->message; }, $errors ) )
. "\n\nFrom source content:\n" . $content,
'process-wikitext'
);
}
return $dom;
}
/**
* Handler for FlowAddModules, avoids rest of Flow having to be aware if
* Parsoid is in use.
*
* @param OutputPage $out OutputPage object
* @return bool
*/
public static function onFlowAddModules( OutputPage $out ) {
if ( self::isParsoidConfigured() ) {
// The module is only necessary when we are using parsoid.
// XXX We only need the Parsoid CSS if some content being
// rendered has getContentFormat() === 'html'.
$out->addModuleStyles( array(
'mediawiki.skinning.content.parsoid',
'ext.cite.style',
) );
}
return true;
}
/**
* Retrieves the html of the nodes children.
*
* @param DOMNode|null $node
* @return string html of the nodes children
*/
public static function getInnerHtml( DOMNode $node = null ) {
$html = array();
if ( $node ) {
$dom = $node instanceof DOMDocument ? $node : $node->ownerDocument;
foreach ( $node->childNodes as $child ) {
$html[] = $dom->saveHTML( $child );
}
}
return implode( '', $html );
}
/**
* Subpage links from Parsoid don't contain any direct context, its applied via
* a <base href="..."> tag, so here we apply a similar rule resolving against
* $title
*
* @param string $text
* @param Title $title Title to resolve relative links against
* @return Title|null
*/
public static function createRelativeTitle( $text, Title $title ) {
// currently parsoid always uses enough ../ or ./ to go
// back to the root, a bit of a kludge but just assume we
// can strip and will end up with a non-relative text.
$text = preg_replace( '|^(\.\.?/)+|', '', $text );
if ( $text && ( $text[0] === '/' || $text[0] === '#' ) ) {
return Title::newFromText( $title->getDBkey() . $text, $title->getNamespace() );
}
return Title::newFromText( $text );
}
// @todo move into FauxRequest
public static function generateForwardedCookieForCli() {
global $wgCookiePrefix;
$user = Container::get( 'occupation_controller' )->getTalkpageManager();
// This takes a request object, but doesnt set the cookies against it.
// patch at https://gerrit.wikimedia.org/r/177403
$user->setCookies( null, null, /* rememberMe */ true );
$response = RequestContext::getMain()->getRequest()->response();
if ( !$response instanceof FauxResponse ) {
throw new FlowException( 'Expected a FauxResponse in CLI environment' );
}
// FauxResponse does not yet expose the full set of cookies
$reflProp = new \ReflectionProperty( $response, 'cookies' );
$reflProp->setAccessible( true );
$cookies = $reflProp->getValue( $response );
// now we need to convert the array into the cookie format of
// foo=bar; baz=bang
$output = array();
foreach ( $cookies as $key => $value ) {
$output[] = "$wgCookiePrefix$key=$value";
}
return implode( '; ', $output );
}
}