#!/usr/bin/python3 """Generate the language list used by subiquity. Subiquity has translations checked into git (in the po/ directory) and we want to show these to the user. The subtleties are 1) we want to show the native name for the language ("Latviski" rather than "Latvian" or "lv") and 2) some languages cannot reasonably be displayed on the linux console. debian-installer has both these issues as well of course, and stores information about languages in an idiosyncratically formatted file '/usr/share/localechooser/languagelist.data.gz'. Not every language we have translations for appears in here though, and for these languages we use the files that are part of the iso-codes package to find the native name (and assume they cannot be represented in the console). Even then we still have some special cases! The information subiquity needs is recorded in its own idiosyncratic format at $SNAP/languagelist. """ import gettext import glob import gzip import json import os import sys LANGLIST_PATH = '/usr/share/localechooser/languagelist' DATA_PATH = '/usr/share/localechooser/languagelist.data.gz' JSON_CODES = '/usr/share/iso-codes/json/iso_639-3.json' lang_dir = 'po' if len(sys.argv) > 1: lang_dir = sys.argv[1] translations = set() for fname in glob.glob('{}/*.po'.format(lang_dir)): translations.add(os.path.splitext(os.path.basename(fname))[0]) if not translations: print("found no languages", file=sys.stderr) sys.exit(1) langs = [ ('console', 'en_US.UTF-8', 'English'), ('console', 'en_GB.UTF-8', 'English (UK)'), ('ssh', 'kab_DZ.UTF-8', 'Taqbaylit'), ] for level, code, name in langs: for subcode in (code, code.split('.')[0], code.split('_')[0]): if subcode in translations: translations.remove(subcode) def kind_for_level(level): if int(level) <= 2: return "console" else: return "ssh" code_to_fallbacklocale = {} with open(LANGLIST_PATH) as data: for line in data: (code, supported_environments, countrycode, fallbacklocale, langlist, console_setup) = line.strip().split(';') code_to_fallbacklocale[code] = fallbacklocale with gzip.open(DATA_PATH, mode='rt') as data: for line in data: level, code, name, desc = line.strip().split(':') if code not in translations: continue langs.append((kind_for_level(level), code, desc)) translations.remove(code) with gzip.open(DATA_PATH, mode='rt') as data: for line in data: level, code, name, desc = line.strip().split(':') code = code.split("_", 1)[0] if code not in translations: continue level = int(level) langs.append((kind_for_level(level), code, desc)) translations.remove(code) langcodes = json.load(open(JSON_CODES))["639-3"] for code in translations: name = None for lang in langcodes: if code in [lang.get('alpha_2'), lang.get('alpha_3')]: name = lang.get('name') langdb = None for db in ['iso_639_3', 'iso_639']: if gettext.find(db, languages=[code]): langdb = db break if name and langdb: native_lang = gettext.translation(langdb, languages=[code]) native = native_lang.gettext(name).capitalize() langs.append(('ssh', code, native)) else: print('do not know native name for {}'.format(code), file=sys.stderr) sys.exit(1) for level, code, name in langs: locale = code_to_fallbacklocale.get(code, code) sys.stdout.write("{}:{}:{}\n".format(level, locale, name))