1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
|
#!/usr/bin/env python3
# Parses all the .po files and generates binary language strings to be loaded
# at runtime via embedded data.
import os, sys
BUILD_LANGS = [
'en', # base strings
'cs',
'de',
'eo',
'es',
'es_MX',
'fi',
'fr',
'gl',
'hu',
'ia',
'ie',
'isv',
'pl',
'ru',
'sk',
'sr',
'tok',
'uk',
'zh_Hans',
'zh_Hant'
]
MODE = 'compile'
ESCAPES = {
'\\': '\\',
'"': '"',
'n': '\n',
'r': '\r',
't': '\t',
'v': '\v',
}
if '--new' in sys.argv:
MODE = 'new'
def unquote(string):
txt = string.strip()
if txt[0] != '"' or txt[-1] != '"':
raise Exception("invalid quoted string: " + string)
txt = txt[1:-1]
out = ''
is_escape = False
for c in txt:
if is_escape:
out += ESCAPES[c]
is_escape = False
continue
if c == '\\':
is_escape = True
else:
out += c
return out
def parse_po(src):
messages = []
is_multi = False # string is multiple lines
is_plural = False
msg_id, msg_str, msg_index = None, None, None
for line in open(src, 'rt', encoding='utf-8').readlines():
line = line.strip()
if is_multi:
if len(line) == 0 or line[0] != '"':
if msg_id:
messages.append((msg_id, msg_str, msg_index))
is_multi = False
else:
msg_str += unquote(line)
if line.startswith('msgid_plural'):
msg_id = unquote(line[12:])
is_plural = True
elif line.startswith('msgid'):
msg_id = unquote(line[6:])
is_plural = False
elif line.startswith('msgstr'):
if line[6] == '[':
msg_index = int(line[7])
line = line[9:]
else:
msg_index = None
line = line[6:]
if line.endswith(' ""'):
is_multi = True
msg_str = ''
else:
msg_str = unquote(line)
if msg_id:
messages.append((msg_id, msg_str, msg_index))
if is_multi and msg_id:
messages.append((msg_id, msg_str, msg_index))
# Apply plural indices to ids.
pluralized = []
for msg_id, msg_str, msg_index in messages:
if not msg_index is None:
msg_id = f'{msg_id[:-1]}{msg_index}'
pluralized.append((msg_id, msg_str))
#print(msg_id, '=>', msg_str)
return pluralized
def compile_string(msg_id, msg_str):
return msg_id.encode('utf-8') + bytes([0]) + \
msg_str.encode('utf-8') + bytes([0])
os.chdir(os.path.dirname(__file__))
if MODE == 'compile':
BASE_STRINGS = {}
PLURALS = set()
for msg_id, msg_str in parse_po('en.po'):
BASE_STRINGS[msg_id] = msg_str
if msg_id.endswith('.0'):
PLURALS.add(msg_id[:-2])
for src in os.listdir('.'):
if src.endswith('.po') and src.split('.')[0] in BUILD_LANGS:
# Make a binary blob with strings sorted by ID.
have_ids = set()
compiled = bytes()
lang = parse_po(src)
for msg_id, _ in lang:
have_ids.add(msg_id)
# Take missing strings from the base language.
for msg_id in BASE_STRINGS:
if msg_id not in have_ids and not msg_id[:-2] in PLURALS:
print('%10s' % src, 'missing:', msg_id)
lang.append((msg_id, BASE_STRINGS[msg_id]))
for msg_id, msg_str in sorted(lang):
compiled += compile_string(msg_id, msg_str)
open(f'../res/lang/{src[:-3]}.bin', 'wb').write(compiled)
elif MODE == 'new':
messages = parse_po('en.po')
f = open('new.po', 'wt', encoding='utf-8')
for msg_id, _ in messages:
print(f'\nmsgid "{msg_id}"\nmsgstr ""\n', file=f)
|