forked from a64f7bb4-7358-4778-9fbe-3b882c34cc1d/v1
156 lines
5.3 KiB
PHP
156 lines
5.3 KiB
PHP
<?php
|
|
|
|
/**
|
|
* Generates the json files stored in resources/language.
|
|
*
|
|
* CLDR lists about 515 languages, many of them dead (like Latin or Old English).
|
|
* In order to decrease the list to a reasonable size, only the languages
|
|
* for which CLDR itself has translations are listed.
|
|
*/
|
|
|
|
require __DIR__ . '/generate_base.php';
|
|
|
|
$enLanguages = $localeDirectory . 'en/languages.json';
|
|
if (!file_exists($enLanguages)) {
|
|
die("The $enLanguages file was not found");
|
|
}
|
|
|
|
$languages = generate_languages();
|
|
$languages = filter_duplicate_localizations($languages);
|
|
|
|
// Make sure we're starting from a clean slate.
|
|
if (is_dir(__DIR__ . '/language')) {
|
|
die('The language/ directory must not exist.');
|
|
}
|
|
|
|
// Prepare the filesystem.
|
|
mkdir(__DIR__ . '/language');
|
|
|
|
// Write out the localizations.
|
|
foreach ($languages as $locale => $localizedLanguages) {
|
|
$collator = collator_create($locale);
|
|
uasort($localizedLanguages, function ($a, $b) use ($collator) {
|
|
return collator_compare($collator, $a, $b);
|
|
});
|
|
file_put_json(__DIR__ . '/language/' . $locale . '.json', $localizedLanguages);
|
|
}
|
|
|
|
$availableLocales = array_keys($languages);
|
|
sort($availableLocales);
|
|
// Available locales are stored in PHP, then manually
|
|
// transferred to LanguageRepository.
|
|
$data = "<?php\n\n";
|
|
$data .= export_locales($availableLocales);
|
|
file_put_contents(__DIR__ . '/language_data.php', $data);
|
|
|
|
echo "Done.\n";
|
|
|
|
/**
|
|
* Exports locales.
|
|
*/
|
|
function export_locales($data)
|
|
{
|
|
// Wrap the values in single quotes.
|
|
$data = array_map(function ($value) {
|
|
return "'" . $value . "'";
|
|
}, $data);
|
|
|
|
$export = '// ' . count($data) . " available locales. \n";
|
|
$export .= '$locales = [' . "\n";
|
|
$export .= ' ' . implode(', ', $data) . "\n";
|
|
$export .= "];\n";
|
|
|
|
return $export;
|
|
}
|
|
|
|
/**
|
|
* Generates the language lists for each locale.
|
|
*/
|
|
function generate_languages()
|
|
{
|
|
global $localeDirectory;
|
|
|
|
$locales = discover_locales();
|
|
// Make sure 'en' is processed first so that it can be used as a fallback.
|
|
$index = array_search('en', $locales);
|
|
unset($locales[$index]);
|
|
array_unshift($locales, 'en');
|
|
// Skip all languages that aren't an available locale at the same time.
|
|
// This reduces the language list from about 515 to about 185 languages.
|
|
$allowedLanguages = scandir($localeDirectory);
|
|
$allowedLanguages = array_merge($allowedLanguages, ['iu', 'wa']);
|
|
$allowedLanguages = array_diff($allowedLanguages, ['eo', 'ia', 'vo', 'cu', 'gv', 'prg', 'und']);
|
|
// Languages that are untranslated in most locales (as of CLDR v34).
|
|
$allowedLanguages = array_diff($allowedLanguages, ['ccp', 'fa-AF']);
|
|
|
|
$untranslatedCounts = [];
|
|
$languages = [];
|
|
foreach ($locales as $locale) {
|
|
$data = json_decode(file_get_contents($localeDirectory . $locale . '/languages.json'), true);
|
|
$data = $data['main'][$locale]['localeDisplayNames']['languages'];
|
|
foreach ($data as $languageCode => $languageName) {
|
|
if (!in_array($languageCode, $allowedLanguages)) {
|
|
continue;
|
|
}
|
|
|
|
// This language name is untranslated, use to the english version.
|
|
if ($languageCode == str_replace('_', '-', $languageName)) {
|
|
$languageName = $languages['en'][$languageCode];
|
|
// Maintain a count of untranslated languages per locale.
|
|
$untranslatedCounts += [$locale => 0];
|
|
$untranslatedCounts[$locale]++;
|
|
}
|
|
|
|
$languages[$locale][$languageCode] = $languageName;
|
|
}
|
|
// CLDR v34 has an uneven language list due to missing translations.
|
|
if ($locale != 'en') {
|
|
$missingLanguages = array_diff_key($languages['en'], $languages[$locale]);
|
|
foreach ($missingLanguages as $languageCode => $languageName) {
|
|
$languages[$locale][$languageCode] = $languages['en'][$languageCode];
|
|
}
|
|
}
|
|
}
|
|
|
|
// Ignore locales that are more than 80% untranslated.
|
|
foreach ($untranslatedCounts as $locale => $count) {
|
|
$totalCount = count($languages[$locale]);
|
|
$untranslatedPercentage = $count * (100 / $totalCount);
|
|
if ($untranslatedPercentage >= 80) {
|
|
unset($languages[$locale]);
|
|
}
|
|
}
|
|
|
|
return $languages;
|
|
}
|
|
|
|
/**
|
|
* Filters out duplicate localizations (same as their parent locale).
|
|
*
|
|
* For example, "fr-FR" will be removed if "fr" has the same data.
|
|
*/
|
|
function filter_duplicate_localizations(array $localizations)
|
|
{
|
|
$duplicates = [];
|
|
foreach ($localizations as $locale => $localizedLanguages) {
|
|
if ($parentLocale = \CommerceGuys\Intl\Locale::getParent($locale)) {
|
|
$parentLanguages = isset($localizations[$parentLocale]) ? $localizations[$parentLocale] : [];
|
|
$diff = array_udiff($localizedLanguages, $parentLanguages, function ($first, $second) {
|
|
return ($first === $second) ? 0 : 1;
|
|
});
|
|
|
|
if (empty($diff)) {
|
|
// The duplicates are not removed right away because they might
|
|
// still be needed for other duplicate checks (for example,
|
|
// when there are locales like bs-Latn-BA, bs-Latn, bs).
|
|
$duplicates[] = $locale;
|
|
}
|
|
}
|
|
}
|
|
foreach ($duplicates as $locale) {
|
|
unset($localizations[$locale]);
|
|
}
|
|
|
|
return $localizations;
|
|
}
|