| Current File : /home/jvzmxxx/wiki/extensions/Wikibase/lib/tests/phpunit/StringNormalizerTest.php |
<?php
namespace Wikibase\Test;
use PHPUnit_Framework_TestCase;
use Wikibase\StringNormalizer;
/**
* @covers Wikibase\StringNormalizer
*
* @group WikibaseLib
* @group Wikibase
*
* @license GPL-2.0+
* @author John Erling Blad < jeblad@gmail.com >
* @author Daniel Kinzler
*/
class StringNormalizerTest extends PHPUnit_Framework_TestCase {
/**
* @dataProvider providerTrimBadChars
*/
public function testTrimBadsChars( $string, $expected ) {
$normalizer = new StringNormalizer();
$this->assertEquals( $expected, $normalizer->trimBadChars( $string ) );
}
public function providerTrimBadChars() {
return array(
array( // #7: empty
"",
""
),
array( // #8: just blanks
" \n ",
" \n "
),
array( // #4: Private Use Area: U+0F818
"\xef\xa0\x98",
"\xef\xa0\x98"
),
array( // #5: badly truncated cyrillic:
"\xd0\xb5\xd0",
"\xd0\xb5",
),
array( // #6: badly truncated katakana:
"\xe3\x82\xa6\xe3\x83",
"\xe3\x82\xa6"
),
array( // #5: badly starting cyrillic:
"\xb5\xd0\xb5",
"\xd0\xb5",
),
array( // #6: badly starting katakana:
"\x82\xa6\xe3\x83\xa6",
"\xe3\x83\xa6"
),
// XXX: this should pass, and it does for some versions of PHP/PCRE
//array( // #7: Latin Extended-D: U+0A7AA
// "\xea\x9e\xaa",
// "\xea\x9e\xaa",
//),
);
}
/**
* @dataProvider providerTrimWhitespace
*/
public function testTrimWhitespace( $string, $expected ) {
$normalizer = new StringNormalizer();
$this->assertEquals( $expected, $normalizer->trimWhitespace( $string ) );
}
public function providerTrimWhitespace() {
return array(
array(
'foo bar',
'foo bar'
),
array(
" foo bar \n",
'foo bar'
),
array(
"foo\tbar",
'foo bar'
),
array(
"foo\nbar",
'foo bar'
),
array(
"foo\rbar",
'foo bar'
),
array(
"\r \t\nfoo\r\t\t\tbar\n\n\n\r\r",
'foo bar' ),
array(
"\r \t\nfoo\r\t\t\t bar\n\n\n\r\r",
'foo bar'
),
array(
html_entity_decode( 'foo‌bar', ENT_QUOTES, 'utf-8' ),
html_entity_decode( 'foo‌bar', ENT_QUOTES, 'utf-8' )
),
array(
html_entity_decode( 'foo‌‌bar', ENT_QUOTES, 'utf-8' ),
html_entity_decode( 'foo‌‌bar', ENT_QUOTES, 'utf-8' )
),
);
}
/**
* @dataProvider providerCleanupToNFC
*/
public function testCleanupToNFC( $string, $expected ) {
$normalizer = new StringNormalizer();
$this->assertSame( $expected, $normalizer->cleanupToNFC( $string ) );
}
public function providerCleanupToNFC() {
return array(
array( "\xC3\x85land", 'Åland' ),
array( "A\xCC\x8Aland", 'Åland' ),
array( "\xE2\x84\xABngstrom (unit)", 'Ångstrom (unit)' ),
);
}
/**
* @dataProvider providerTrimToNFC
*/
public function testTrimToNFC( $src, $dst ) {
$normalizer = new StringNormalizer();
$this->assertEquals( $dst, $normalizer->trimToNFC( $src ), "String '$src' is not the same as the expected '$dst'" );
}
public function providerTrimToNFC() {
return array(
array( " \xC3\x85land øyene ", 'Åland øyene' ), // #0
array( " A\xCC\x8Aland øyene ", 'Åland øyene' ), // #1
array( " \xC3\x85land øyene ", 'Åland øyene' ), // #2
array( " A\xCC\x8Aland øyene ", 'Åland øyene' ), // #3
array( // #4: Private Use Area: U+0F818
"\xef\xa0\x98",
"\xef\xa0\x98"
),
array( // #5: badly truncated cyrillic:
"\xd0\xb5\xd0",
"\xd0\xb5",
),
array( // #6: badly truncated katakana:
"\xe3\x82\xa6\xe3\x83",
"\xe3\x82\xa6"
),
array( // #7: empty
"",
""
),
array( // #8: just blanks
" \n ",
""
),
// XXX: this should pass, and it does for some versions of PHP/PCRE
//array( // #9: Latin Extended-D: U+0A7AA
// "\xea\x9e\xaa",
// "\xea\x9e\xaa",
//),
);
}
}