subiquity/scripts/make-language-lists

119 lines
3.5 KiB
Python
Executable File

#!/usr/bin/python3
"""Generate the language list used by subiquity.
Subiquity has translations checked into git (in the po/ directory) and
we want to show these to the user. The subtleties are 1) we want to
show the native name for the language ("Latviski" rather than
"Latvian" or "lv") and 2) some languages cannot reasonably be
displayed on the linux console.
debian-installer has both these issues as well of course, and stores
information about languages in an idiosyncratically formatted file
'/usr/share/localechooser/languagelist.data.gz'. Not every language
we have translations for appears in here though, and for these
languages we use the files that are part of the iso-codes package to
find the native name (and assume they cannot be represented in the
console).
Even then we still have some special cases!
The information subiquity needs is recorded in its own idiosyncratic
format at $SNAP/languagelist.
"""
import gettext
import glob
import gzip
import json
import os
import sys
LANGLIST_PATH = '/usr/share/localechooser/languagelist'
DATA_PATH = '/usr/share/localechooser/languagelist.data.gz'
JSON_CODES = '/usr/share/iso-codes/json/iso_639-3.json'
lang_dir = 'po'
if len(sys.argv) > 1:
lang_dir = sys.argv[1]
translations = set()
for fname in glob.glob('{}/*.po'.format(lang_dir)):
translations.add(os.path.splitext(os.path.basename(fname))[0])
if not translations:
print("found no languages", file=sys.stderr)
sys.exit(1)
langs = [
('console', 'en_US.UTF-8', 'English'),
('console', 'en_GB.UTF-8', 'English (UK)'),
('ssh', 'kab_DZ.UTF-8', 'Taqbaylit'),
]
for level, code, name in langs:
for subcode in (code, code.split('.')[0], code.split('_')[0]):
if subcode in translations:
translations.remove(subcode)
def kind_for_level(level):
if int(level) <= 2:
return "console"
else:
return "ssh"
code_to_fallbacklocale = {}
with open(LANGLIST_PATH) as data:
for line in data:
(code, supported_environments, countrycode, fallbacklocale, langlist,
console_setup) = line.strip().split(';')
code_to_fallbacklocale[code] = fallbacklocale
with gzip.open(DATA_PATH, mode='rt') as data:
for line in data:
level, code, name, desc = line.strip().split(':')
if code not in translations:
continue
langs.append((kind_for_level(level), code, desc))
translations.remove(code)
with gzip.open(DATA_PATH, mode='rt') as data:
for line in data:
level, code, name, desc = line.strip().split(':')
code = code.split("_", 1)[0]
if code not in translations:
continue
level = int(level)
langs.append((kind_for_level(level), code, desc))
translations.remove(code)
langcodes = json.load(open(JSON_CODES))["639-3"]
for code in translations:
name = None
for lang in langcodes:
if code in [lang.get('alpha_2'), lang.get('alpha_3')]:
name = lang.get('name')
langdb = None
for db in ['iso_639_3', 'iso_639']:
if gettext.find(db, languages=[code]):
langdb = db
break
if name and langdb:
native_lang = gettext.translation(langdb, languages=[code])
native = native_lang.gettext(name).capitalize()
langs.append(('ssh', code, native))
else:
print('do not know native name for {}'.format(code), file=sys.stderr)
sys.exit(1)
for level, code, name in langs:
locale = code_to_fallbacklocale.get(code, code)
sys.stdout.write("{}:{}:{}\n".format(level, locale, name))