Current File : /home/jvzmxxx/wiki1/extensions/Wikibase/repo/includes/Rdf/RdfBuilder.php
<?php

namespace Wikibase\Rdf;

use SiteList;
use Wikibase\DataModel\Entity\EntityDocument;
use Wikibase\DataModel\Entity\EntityId;
use Wikibase\DataModel\Entity\Property;
use Wikibase\DataModel\Entity\PropertyId;
use Wikibase\DataModel\Services\Lookup\EntityLookup;
use Wikibase\DataModel\Services\Lookup\PropertyDataTypeLookup;
use Wikibase\DataModel\Term\FingerprintProvider;
use Wikibase\Lib\Store\RevisionedUnresolvedRedirectException;
use Wikimedia\Purtle\RdfWriter;

/**
 * RDF mapping for wikibase data model.
 *
 * @since 0.4
 *
 * @license GPL-2.0+
 * @author Anja Jentzsch < anja.jentzsch@wikimedia.de >
 * @author Thomas Pellissier Tanon
 * @author Daniel Kinzler
 * @author Stas Malyshev
 */
class RdfBuilder implements EntityRdfBuilder, EntityMentionListener {

	/**
	 * A list of entities mentioned/touched to or by this builder.
	 * The prefixed entity IDs are used as keys in the array, the value 'true'
	 * is used to indicate that the entity has been resolved. If the value
	 * is an EntityId, this indicates that the entity has not yet been resolved
	 * (defined).
	 *
	 * @var bool[]
	 */
	private $entitiesResolved = array();

	/**
	 * What the serializer would produce?
	 *
	 * @var int
	 */
	private $produceWhat;

	/**
	 * @var RdfWriter
	 */
	private $writer;

	/**
	 * @var DedupeBag
	 */
	private $dedupeBag;

	/**
	 * Rdf builder for outputting labels for entity stubs.
	 * @var TermsRdfBuilder
	 */
	private $termsBuilder;

	/**
	 * RDF builders to apply when building RDF for an entity.
	 * @var EntityRdfBuilder[]
	 */
	private $builders = array();

	/**
	 * @var RdfVocabulary
	 */
	private $vocabulary;

	/**
	 * @var PropertyDataTypeLookup
	 */
	private $propertyLookup;

	/**
	 * @var ValueSnakRdfBuilderFactory
	 */
	private $valueSnakRdfBuilderFactory;

	/**
	 * @param SiteList $sites
	 * @param RdfVocabulary $vocabulary
	 * @param ValueSnakRdfBuilderFactory $valueSnakRdfBuilderFactory
	 * @param PropertyDataTypeLookup $propertyLookup
	 * @param int $flavor
	 * @param RdfWriter $writer
	 * @param DedupeBag $dedupeBag
	 */
	public function __construct(
		SiteList $sites,
		RdfVocabulary $vocabulary,
		ValueSnakRdfBuilderFactory $valueSnakRdfBuilderFactory,
		PropertyDataTypeLookup $propertyLookup,
		$flavor,
		RdfWriter $writer,
		DedupeBag $dedupeBag
	) {
		$this->vocabulary = $vocabulary;
		$this->propertyLookup = $propertyLookup;
		$this->valueSnakRdfBuilderFactory = $valueSnakRdfBuilderFactory;
		$this->writer = $writer;
		$this->produceWhat = $flavor;
		$this->dedupeBag = $dedupeBag ?: new HashDedupeBag();

		// XXX: move construction of sub-builders to a factory class.
		$this->termsBuilder = new TermsRdfBuilder( $vocabulary, $writer );
		$this->builders[] = $this->termsBuilder;

		if ( $this->shouldProduce( RdfProducer::PRODUCE_TRUTHY_STATEMENTS ) ) {
			$this->builders[] = $this->newTruthyStatementRdfBuilder();
		}

		if ( $this->shouldProduce( RdfProducer::PRODUCE_ALL_STATEMENTS ) ) {
			$this->builders[] = $this->newFullStatementRdfBuilder();
		}

		// placing this last produces more readable output since all entity things are together
		if ( $this->shouldProduce( RdfProducer::PRODUCE_SITELINKS ) ) {
			$builder = new SiteLinksRdfBuilder( $vocabulary, $writer, $sites );
			// We can use the same bag since namespaces are different
			$builder->setDedupeBag( $this->dedupeBag );
			$this->builders[] = $builder;
		}
	}

	/**
	 * @param string $full
	 *
	 * @return SnakRdfBuilder
	 */
	private function newSnakBuilder( $full ) {
		if ( $full === 'full' ) {
			$statementValueBuilder = $this->valueSnakRdfBuilderFactory->getComplexValueSnakRdfBuilder(
				$this->vocabulary,
				$this->writer,
				$this,
				$this->dedupeBag
			);
		} else {
			$statementValueBuilder = $this->valueSnakRdfBuilderFactory->getSimpleValueSnakRdfBuilder(
				$this->vocabulary,
				$this->writer,
				$this,
				$this->dedupeBag
			);
		}

		$snakBuilder = new SnakRdfBuilder( $this->vocabulary, $statementValueBuilder, $this->propertyLookup );
		$snakBuilder->setEntityMentionListener( $this );

		return $snakBuilder;
	}

	/**
	 * @return EntityRdfBuilder
	 */
	private function newTruthyStatementRdfBuilder() {
		//NOTE: currently, the only simple values are supported in truthy mode!
		$simpleSnakBuilder = $this->newSnakBuilder( 'simple' );
		$statementBuilder = new TruthyStatementRdfBuilder( $this->vocabulary, $this->writer, $simpleSnakBuilder );

		return $statementBuilder;
	}

	/**
	 * @return EntityRdfBuilder
	 */
	private function newFullStatementRdfBuilder() {
		$snakBuilder = $this->newSnakBuilder(
			$this->shouldProduce( RdfProducer::PRODUCE_FULL_VALUES ) ? 'full' : 'simple'
		);

		$builder = new FullStatementRdfBuilder( $this->vocabulary, $this->writer, $snakBuilder );
		$builder->setDedupeBag( $this->dedupeBag );
		$builder->setProduceQualifiers( $this->shouldProduce( RdfProducer::PRODUCE_QUALIFIERS ) );
		$builder->setProduceReferences( $this->shouldProduce( RdfProducer::PRODUCE_REFERENCES ) );

		return $builder;
	}

	/**
	 * Start writing RDF document
	 * Note that this builder does not have to finish it, it may be finished later.
	 */
	public function startDocument() {
		foreach ( $this->getNamespaces() as $gname => $uri ) {
			$this->writer->prefix( $gname, $uri );
		}

		$this->writer->start();
	}

	/**
	 * Finish writing the document
	 * After that, nothing should ever be written into the document.
	 */
	public function finishDocument() {
		$this->writer->finish();
	}

	/**
	 * Returns the RDF generated by the builder
	 *
	 * @return string RDF
	 */
	public function getRDF() {
		return $this->writer->drain();
	}

	/**
	 * Returns a map of namespace names to URIs
	 *
	 * @return array
	 */
	public function getNamespaces() {
		return $this->vocabulary->getNamespaces();
	}

	/**
	 * Should we produce this aspect?
	 *
	 * @param int $what
	 *
	 * @return bool
	 */
	private function shouldProduce( $what ) {
		return ( $this->produceWhat & $what ) !== 0;
	}

	/**
	 * @see EntityMentionListener::entityReferenceMentioned
	 *
	 * @param EntityId $id
	 */
	public function entityReferenceMentioned( EntityId $id ) {
		if ( $this->shouldProduce( RdfProducer::PRODUCE_RESOLVED_ENTITIES ) ) {
			$this->entityToResolve( $id );
		}
	}

	/**
	 * @see EntityMentionListener::propertyMentioned
	 *
	 * @param PropertyId $id
	 */
	public function propertyMentioned( PropertyId $id ) {
		if ( $this->shouldProduce( RdfProducer::PRODUCE_PROPERTIES ) ) {
			$this->entityToResolve( $id );
		}
	}

	/**
	 * Registers an entity as mentioned.
	 * Will be recorded as unresolved
	 * if it wasn't already marked as resolved.
	 *
	 * @param EntityId $entityId
	 */
	private function entityToResolve( EntityId $entityId ) {
		$prefixedId = $entityId->getSerialization();

		if ( !isset( $this->entitiesResolved[$prefixedId] ) ) {
			$this->entitiesResolved[$prefixedId] = $entityId;
		}
	}

	/**
	 * Registers an entity as resolved.
	 *
	 * @param EntityId $entityId
	 */
	private function entityResolved( EntityId $entityId ) {
		$prefixedId = $entityId->getSerialization();
		$this->entitiesResolved[$prefixedId] = true;
	}

	/**
	 * Adds revision information about an entity's revision to the RDF graph.
	 *
	 * @todo: extract into MetaDataRdfBuilder
	 *
	 * @param EntityId $entityId
	 * @param int $revision
	 * @param string $timestamp in TS_MW format
	 */
	public function addEntityRevisionInfo( EntityId $entityId, $revision, $timestamp ) {
		$timestamp = wfTimestamp( TS_ISO_8601, $timestamp );
		$entityLName = $this->vocabulary->getEntityLName( $entityId );

		$this->writer->about( RdfVocabulary::NS_DATA, $entityId )
			->a( RdfVocabulary::NS_SCHEMA_ORG, "Dataset" )
			->say( RdfVocabulary::NS_SCHEMA_ORG, 'about' )->is( RdfVocabulary::NS_ENTITY, $entityLName );

		if ( $this->shouldProduce( RdfProducer::PRODUCE_VERSION_INFO ) ) {
			// Dumps don't need version/license info for each entity, since it is included in the dump header
			$this->writer
				->say( RdfVocabulary::NS_CC, 'license' )->is( RdfVocabulary::LICENSE )
				->say( RdfVocabulary::NS_SCHEMA_ORG, 'softwareVersion' )->value( RdfVocabulary::FORMAT_VERSION );
		}

		$this->writer->say( RdfVocabulary::NS_SCHEMA_ORG, 'version' )->value( $revision, 'xsd', 'integer' )
			->say( RdfVocabulary::NS_SCHEMA_ORG, 'dateModified' )->value( $timestamp, 'xsd', 'dateTime' );
	}

	/**
	 * Write definition for wdno:P123 class to use as novalue
	 * @param string $id
	 */
	private function writeNovalueClass( $id ) {
		$this->writer->about( RdfVocabulary::NSP_NOVALUE, $id )->say( 'a' )->is( 'owl', 'Class' );
		$internalClass = $this->writer->blank();
		$this->writer->say( 'owl', 'complementOf' )->is( '_', $internalClass );
		$this->writer->about( '_', $internalClass )->say( 'a' )->is( 'owl', 'Restriction' );
		$this->writer->say( 'owl', 'onProperty' )->is( RdfVocabulary::NSP_DIRECT_CLAIM, $id );
		$this->writer->say( 'owl', 'someValuesFrom' )->is( 'owl', 'Thing' );
	}

	/**
	 * Write predicates linking property entity to property predicates
	 * @param string $id
	 * @param boolean $isObjectProperty Is the property data or object property?
	 */
	private function writePropertyPredicates( $id, $isObjectProperty ) {
		$this->writer->say( RdfVocabulary::NS_ONTOLOGY, 'directClaim' )->is( RdfVocabulary::NSP_DIRECT_CLAIM, $id );
		$this->writer->say( RdfVocabulary::NS_ONTOLOGY, 'claim' )->is( RdfVocabulary::NSP_CLAIM, $id );
		$this->writer->say( RdfVocabulary::NS_ONTOLOGY, 'statementProperty' )->is( RdfVocabulary::NSP_CLAIM_STATEMENT, $id );
		$this->writer->say( RdfVocabulary::NS_ONTOLOGY, 'statementValue' )->is( RdfVocabulary::NSP_CLAIM_VALUE, $id );
		$this->writer->say( RdfVocabulary::NS_ONTOLOGY, 'qualifier' )->is( RdfVocabulary::NSP_QUALIFIER, $id );
		$this->writer->say( RdfVocabulary::NS_ONTOLOGY, 'qualifierValue' )->is( RdfVocabulary::NSP_QUALIFIER_VALUE, $id );
		$this->writer->say( RdfVocabulary::NS_ONTOLOGY, 'reference' )->is( RdfVocabulary::NSP_REFERENCE, $id );
		$this->writer->say( RdfVocabulary::NS_ONTOLOGY, 'referenceValue' )->is( RdfVocabulary::NSP_REFERENCE_VALUE, $id );
		$this->writer->say( RdfVocabulary::NS_ONTOLOGY, 'novalue' )->is( RdfVocabulary::NSP_NOVALUE, $id );
		// Always object properties
		$this->writer->about( RdfVocabulary::NSP_CLAIM, $id )->a( 'owl', 'ObjectProperty' );
		$this->writer->about( RdfVocabulary::NSP_CLAIM_VALUE, $id )->a( 'owl', 'ObjectProperty' );
		$this->writer->about( RdfVocabulary::NSP_QUALIFIER_VALUE, $id )->a( 'owl', 'ObjectProperty' );
		$this->writer->about( RdfVocabulary::NSP_REFERENCE_VALUE, $id )->a( 'owl', 'ObjectProperty' );
		// Depending on property type
		if ( $isObjectProperty ) {
			$datatype = 'ObjectProperty';
		} else {
			$datatype = 'DatatypeProperty';
		}
		$this->writer->about( RdfVocabulary::NSP_DIRECT_CLAIM, $id )->a( 'owl', $datatype );
		$this->writer->about( RdfVocabulary::NSP_CLAIM_STATEMENT, $id )->a( 'owl', $datatype );
		$this->writer->about( RdfVocabulary::NSP_QUALIFIER, $id )->a( 'owl', $datatype );
		$this->writer->about( RdfVocabulary::NSP_REFERENCE, $id )->a( 'owl', $datatype );
	}

	/**
	 * Check if the property describes link between objects
	 * or just data item.
	 *
	 * @param Property $property
	 * @return boolean
	 */
	private function propertyIsLink( Property $property ) {
		// For now, it's very simple but can be more complex later
		return in_array( $property->getDataTypeId(), array( 'wikibase-item', 'wikibase-property', 'url', 'commonsMedia' ) );
	}

	/**
	 * Adds meta-information about an entity (such as the ID and type) to the RDF graph.
	 *
	 * @todo: extract into MetaDataRdfBuilder
	 *
	 * @param EntityDocument $entity
	 */
	private function addEntityMetaData( EntityDocument $entity ) {
		$entityLName = $this->vocabulary->getEntityLName( $entity->getId() );

		$this->writer->about( RdfVocabulary::NS_ENTITY, $entityLName )
			->a( RdfVocabulary::NS_ONTOLOGY, $this->vocabulary->getEntityTypeName( $entity->getType() ) );

		if ( $entity instanceof Property ) {
			$this->writer->say( RdfVocabulary::NS_ONTOLOGY, 'propertyType' )
				->is( $this->vocabulary->getDataTypeURI( $entity ) );

			$id = $entity->getId()->getSerialization();
			$this->writePropertyPredicates( $id, $this->propertyIsLink( $entity ) );
			$this->writeNovalueClass( $id );
		}
	}

	/**
	 * Add an entity to the RDF graph, including all supported structural components
	 * of the entity.
	 *
	 * @param EntityDocument $entity the entity to output.
	 */
	public function addEntity( EntityDocument $entity ) {
		$this->addEntityMetaData( $entity );

		foreach ( $this->builders as $builder ) {
			$builder->addEntity( $entity );
		}

		$this->entityResolved( $entity->getId() );
	}

	/**
	 * Add stubs for any entities that were previously mentioned (e.g. as properties
	 * or data values).
	 *
	 * @param EntityLookup $entityLookup
	 */
	public function resolveMentionedEntities( EntityLookup $entityLookup ) {
		$hasRedirect = false;

		foreach ( $this->entitiesResolved as $id ) {
			// $value is true if the entity has already been resolved,
			// or an EntityId to resolve.
			if ( !( $id instanceof EntityId ) ) {
				continue;
			}

			try {
				$entity = $entityLookup->getEntity( $id );

				if ( !$entity ) {
					continue;
				}

				$this->addEntityStub( $entity );
			} catch ( RevisionedUnresolvedRedirectException $ex ) {
				// NOTE: this may add more entries to the end of entitiesResolved
				$target = $ex->getRedirectTargetId();
				$this->addEntityRedirect( $id, $target );
				$hasRedirect = true;
			}
		}

		// If we encountered redirects, the redirect targets may now need resolving.
		// They actually got added to $this->entitiesResolved, but may not have been
		// processed by the loop above, because they got added while the loop was in progress.
		if ( $hasRedirect ) {
			// Call resolveMentionedEntities() recursively to resolve any yet unresolved
			// redirect targets. The regress will eventually terminate even for circular
			// redirect chains, because the second time an entity ID is encountered, it
			// will be marked as already resolved.
			$this->resolveMentionedEntities( $entityLookup );
		}
	}

	/**
	 * Adds stub information for the given Entity to the RDF graph.
	 * Stub information means meta information and labels.
	 *
	 * @todo: extract into EntityStubRdfBuilder?
	 *
	 * @param EntityDocument $entity
	 */
	private function addEntityStub( EntityDocument $entity ) {
		$this->addEntityMetaData( $entity );

		if ( $entity instanceof FingerprintProvider ) {
			$fingerprint = $entity->getFingerprint();

			/** @var EntityDocument $entity */
			$entityLName = $this->vocabulary->getEntityLName( $entity->getId() );

			$this->termsBuilder->addLabels( $entityLName, $fingerprint->getLabels() );
			$this->termsBuilder->addDescriptions( $entityLName, $fingerprint->getDescriptions() );
		}
	}

	/**
	 * Declares $from to be an alias for $to, using the owl:sameAs relationship.
	 *
	 * @param EntityId $from
	 * @param EntityId $to
	 */
	public function addEntityRedirect( EntityId $from, EntityId $to ) {
		$fromLName = $this->vocabulary->getEntityLName( $from );
		$toLName = $this->vocabulary->getEntityLName( $to );

		$this->writer->about( RdfVocabulary::NS_ENTITY, $fromLName )
			->say( 'owl', 'sameAs' )
			->is( RdfVocabulary::NS_ENTITY, $toLName );

		$this->entityResolved( $from );

		if ( $this->shouldProduce( RdfProducer::PRODUCE_RESOLVED_ENTITIES ) ) {
			$this->entityToResolve( $to );
		}
	}

	/**
	 * Create header structure for the dump
	 *
	 * @param int $timestamp Timestamp (for testing)
	 */
	public function addDumpHeader( $timestamp = 0 ) {
		// TODO: this should point to "this document"
		$this->writer->about( RdfVocabulary::NS_ONTOLOGY, 'Dump' )
			->a( RdfVocabulary::NS_SCHEMA_ORG, "Dataset" )
			->a( 'owl', 'Ontology' )
			->say( RdfVocabulary::NS_CC, 'license' )->is( RdfVocabulary::LICENSE )
			->say( RdfVocabulary::NS_SCHEMA_ORG, 'softwareVersion' )->value( RdfVocabulary::FORMAT_VERSION )
			->say( RdfVocabulary::NS_SCHEMA_ORG, 'dateModified' )->value( wfTimestamp( TS_ISO_8601, $timestamp ), 'xsd', 'dateTime' )
			->say( 'owl', 'imports' )->is( RdfVocabulary::getOntologyURI() );
	}

}