156 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			PHP
		
	
	
			
		
		
	
	
			156 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			PHP
		
	
	
| <?php
 | |
| 
 | |
| /**
 | |
|  * Generates the json files stored in resources/language.
 | |
|  *
 | |
|  * CLDR lists about 515 languages, many of them dead (like Latin or Old English).
 | |
|  * In order to decrease the list to a reasonable size, only the languages
 | |
|  * for which CLDR itself has translations are listed.
 | |
|  */
 | |
| 
 | |
| require __DIR__ . '/generate_base.php';
 | |
| 
 | |
| $enLanguages = $localeDirectory . 'en/languages.json';
 | |
| if (!file_exists($enLanguages)) {
 | |
|     die("The $enLanguages file was not found");
 | |
| }
 | |
| 
 | |
| $languages = generate_languages();
 | |
| $languages = filter_duplicate_localizations($languages);
 | |
| 
 | |
| // Make sure we're starting from a clean slate.
 | |
| if (is_dir(__DIR__ . '/language')) {
 | |
|     die('The language/ directory must not exist.');
 | |
| }
 | |
| 
 | |
| // Prepare the filesystem.
 | |
| mkdir(__DIR__ . '/language');
 | |
| 
 | |
| // Write out the localizations.
 | |
| foreach ($languages as $locale => $localizedLanguages) {
 | |
|     $collator = collator_create($locale);
 | |
|     uasort($localizedLanguages, function ($a, $b) use ($collator) {
 | |
|         return collator_compare($collator, $a, $b);
 | |
|     });
 | |
|     file_put_json(__DIR__ . '/language/' . $locale . '.json', $localizedLanguages);
 | |
| }
 | |
| 
 | |
| $availableLocales = array_keys($languages);
 | |
| sort($availableLocales);
 | |
| // Available locales are stored in PHP, then manually
 | |
| // transferred to LanguageRepository.
 | |
| $data = "<?php\n\n";
 | |
| $data .= export_locales($availableLocales);
 | |
| file_put_contents(__DIR__ . '/language_data.php', $data);
 | |
| 
 | |
| echo "Done.\n";
 | |
| 
 | |
| /**
 | |
|  * Exports locales.
 | |
|  */
 | |
| function export_locales($data)
 | |
| {
 | |
|     // Wrap the values in single quotes.
 | |
|     $data = array_map(function ($value) {
 | |
|         return "'" . $value . "'";
 | |
|     }, $data);
 | |
| 
 | |
|     $export = '// ' . count($data) . " available locales. \n";
 | |
|     $export .= '$locales = [' . "\n";
 | |
|     $export .= '    ' . implode(', ', $data) . "\n";
 | |
|     $export .= "];\n";
 | |
| 
 | |
|     return $export;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Generates the language lists for each locale.
 | |
|  */
 | |
| function generate_languages()
 | |
| {
 | |
|     global $localeDirectory;
 | |
| 
 | |
|     $locales = discover_locales();
 | |
|     // Make sure 'en' is processed first so that it can be used as a fallback.
 | |
|     $index = array_search('en', $locales);
 | |
|     unset($locales[$index]);
 | |
|     array_unshift($locales, 'en');
 | |
|     // Skip all languages that aren't an available locale at the same time.
 | |
|     // This reduces the language list from about 515 to about 185 languages.
 | |
|     $allowedLanguages = scandir($localeDirectory);
 | |
|     $allowedLanguages = array_merge($allowedLanguages, ['iu', 'wa']);
 | |
|     $allowedLanguages = array_diff($allowedLanguages, ['eo', 'ia', 'vo', 'cu', 'gv', 'prg', 'und']);
 | |
|     // Languages that are untranslated in most locales (as of CLDR v34).
 | |
|     $allowedLanguages = array_diff($allowedLanguages, ['ccp', 'fa-AF']);
 | |
| 
 | |
|     $untranslatedCounts = [];
 | |
|     $languages = [];
 | |
|     foreach ($locales as $locale) {
 | |
|         $data = json_decode(file_get_contents($localeDirectory . $locale . '/languages.json'), true);
 | |
|         $data = $data['main'][$locale]['localeDisplayNames']['languages'];
 | |
|         foreach ($data as $languageCode => $languageName) {
 | |
|             if (!in_array($languageCode, $allowedLanguages)) {
 | |
|                 continue;
 | |
|             }
 | |
| 
 | |
|             // This language name is untranslated, use to the english version.
 | |
|             if ($languageCode == str_replace('_', '-', $languageName)) {
 | |
|                 $languageName = $languages['en'][$languageCode];
 | |
|                 // Maintain a count of untranslated languages per locale.
 | |
|                 $untranslatedCounts += [$locale => 0];
 | |
|                 $untranslatedCounts[$locale]++;
 | |
|             }
 | |
| 
 | |
|             $languages[$locale][$languageCode] = $languageName;
 | |
|         }
 | |
|         // CLDR v34 has an uneven language list due to missing translations.
 | |
|         if ($locale != 'en') {
 | |
|             $missingLanguages = array_diff_key($languages['en'], $languages[$locale]);
 | |
|             foreach ($missingLanguages as $languageCode => $languageName) {
 | |
|                 $languages[$locale][$languageCode] = $languages['en'][$languageCode];
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     // Ignore locales that are more than 80% untranslated.
 | |
|     foreach ($untranslatedCounts as $locale => $count) {
 | |
|         $totalCount = count($languages[$locale]);
 | |
|         $untranslatedPercentage = $count * (100 / $totalCount);
 | |
|         if ($untranslatedPercentage >= 80) {
 | |
|             unset($languages[$locale]);
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return $languages;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Filters out duplicate localizations (same as their parent locale).
 | |
|  *
 | |
|  * For example, "fr-FR" will be removed if "fr" has the same data.
 | |
|  */
 | |
| function filter_duplicate_localizations(array $localizations)
 | |
| {
 | |
|     $duplicates = [];
 | |
|     foreach ($localizations as $locale => $localizedLanguages) {
 | |
|         if ($parentLocale = \CommerceGuys\Intl\Locale::getParent($locale)) {
 | |
|             $parentLanguages = isset($localizations[$parentLocale]) ? $localizations[$parentLocale] : [];
 | |
|             $diff = array_udiff($localizedLanguages, $parentLanguages, function ($first, $second) {
 | |
|                 return ($first === $second) ? 0 : 1;
 | |
|             });
 | |
| 
 | |
|             if (empty($diff)) {
 | |
|                 // The duplicates are not removed right away because they might
 | |
|                 // still be needed for other duplicate checks (for example,
 | |
|                 // when there are locales like bs-Latn-BA, bs-Latn, bs).
 | |
|                 $duplicates[] = $locale;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     foreach ($duplicates as $locale) {
 | |
|         unset($localizations[$locale]);
 | |
|     }
 | |
| 
 | |
|     return $localizations;
 | |
| }
 |