Skip to content

Commit 200482f

Browse files
committed
update
Signed-off-by: shewer <[email protected]>
1 parent 9bfd12e commit 200482f

File tree

14 files changed

+127062
-736
lines changed

14 files changed

+127062
-736
lines changed

english_conv.py

Lines changed: 76 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -16,89 +16,91 @@
1616
import leveldb
1717
import luadata
1818
import re
19-
def load_csv(csvf):
20-
l = []
21-
with open(csvf, newline='') as csvfile:
22-
rows= csv.DictReader(csvfile)
23-
for row in rows:
24-
if row['word'][0] == '#':
25-
continue
26-
if row['phonetic'] != "":
27-
row['phonetic'] = '[' + row['phonetic'] + ']'
28-
l.append(row)
29-
30-
return l
31-
32-
def load_text(textf):
33-
l=[]
34-
with open(textf,newline='') as textfile:
35-
for line in textfile.readlines():
36-
if line[0] == '#':
37-
continue
38-
row = {}
39-
word,data = line.split('\t',1)
40-
row['word']=word
41-
ll= data.split(';',1)
42-
if len(ll)>1:
43-
row['phonetic'] = ll[0]
44-
row['translation']= ll[1]
45-
else:
46-
row['translation']= ll[0]
47-
l.append(row )
48-
49-
return l
50-
51-
def write_chunk(filename,rows):
52-
with open(filename,'w') as fn:
53-
fn.write('return {\n')
54-
for row in rows:
55-
value= luadata.serialize(row) #.replace('\\n','\n').replace('\\r','\r')
56-
fn.write(value + ',\n')
57-
fn.write('}\n')
58-
59-
def write_leveldb(filename,rows):
60-
db = leveldb.LevelDB(filename,create_if_missing=True)
61-
for row in rows:
62-
key = row['word']
63-
if not key:
64-
continue
65-
66-
if re.findall('[A-Z]',key):
67-
key = key.lower() + '\t' + key
68-
69-
value= luadata.serialize(row).replace('\\n','\n').replace('\\r','\r')
70-
db.Put(key.encode('utf-8'),value.encode('utf-8'))
71-
72-
73-
74-
75-
76-
77-
def main(fn,fmt):
19+
def __convert_dict_(rec):
20+
if rec[0] == '#':
21+
return
22+
res= {}
23+
res['word'],data = line.strip().split('\t',1)
24+
ll= data.split(';',1)
25+
if len(ll)>1:
26+
res['phonetic'] = ll[0]
27+
res['translation']= " " + ll[1]
28+
else:
29+
res['phonetic'] = ''
30+
res['translation']= " " + ll[0]
31+
return res
32+
33+
def get_key_str(dict):
34+
key = dict and dict.get('word')
35+
if not key or len(key) ==0 or key[0] == '#':
36+
return
37+
return key if key.islower() else key.lower() + "\t" + key
38+
39+
# rec: type dict or text format
40+
def convert_chunk(rec):
41+
if isinstance(rec, str):
42+
rec = __convert_dicti_(rce)
43+
if not rec:
44+
return
45+
46+
rec['translation'] = re.sub(r'^(\w+\.\s)',' \\1', rec['translation'])
47+
rec['definition'] = re.sub(r'^(\w+\.\s)',' \\1', rec['definition'])
48+
49+
key = get_key_str(rec)
50+
if not key:
51+
return
52+
tmp= { k: re.sub(r'(\\[rn])+','\n',v) for k,v in rec.items()}
53+
if len(tmp['phonetic']) > 0:
54+
tmp['phonetic'] = ('[{}]').format(tmp['phonetic'])
55+
return key, luadata.serialize(tmp).replace('\\\n','\\n')
56+
57+
class LuaChunk:
58+
def __init__(self, name):
59+
self.__name = name
60+
self.__fn = open(self.__name,'w')
61+
self.__status = True
62+
self.__fn.write("return {\n")
63+
def status(self):
64+
return self.__status
65+
def name(self):
66+
return self.__name
67+
def __del__(self):
68+
if self.__status:
69+
self.close()
70+
def Flush(self):
71+
if self.__status:
72+
self.__fn.flush()
73+
def close(self):
74+
if self.__status:
75+
print('close file: ' + self.__name )
76+
self.__fn.write("}\n")
77+
self.__fn.close()
78+
self.__status = False
79+
def Put(self,key, value):
80+
if self.__status:
81+
self.__fn.write( ('{},\n').format(value.decode()))
82+
83+
84+
def __main(fname,fmt):
7885
"""TODO: Docstring for main.
7986
:returns: TODO
8087
python conv_file.py file.[txt|csv] [leveldb|chunk] -- default: luac compile to chunk_bin
8188
8289
"""
8390
try:
84-
85-
f,ext = os.path.splitext(fn)
86-
rows = ext == ".csv" and load_csv(fn) or load_text(fn)
87-
if fmt== 'leveldb':
88-
write_leveldb(f,rows)
89-
elif fmt == 'chunk':
90-
write_chunk(f + '.txtl', rows)
91-
else:
92-
write_chunk(f + '.txtll',rows)
93-
print("compile to bin ")
94-
os.system( 'luac -o '+ f + ".txtl " + f + ".txtll && rm " + f +".txtll" )
95-
96-
91+
f,ext = os.path.splitext(fname)
92+
db = leveldb.LevelDB(f + ".userdb") if fmt =='leveldb' else LuaChunk(f + '.txtl')
93+
with open(fname) as fnode:
94+
inf = csv.DictReader(fnode) if ext=='.csv' else fnode
95+
for row in inf:
96+
key_str, chunk_str=convert_chunk(row)
97+
if key_str and chunk_str :
98+
db.Put(key_str.encode('utf-8'),chunk_str.encode('utf-8'))
99+
del db
97100
except ValueError as ve:
98101
return str(ve)
99102

100103

101-
102104
if __name__ == '__main__' :
103105

104106
print(sys.argv,len(sys.argv))
@@ -107,5 +109,5 @@ def main(fn,fmt):
107109
else:
108110
fn= sys.argv[1]
109111
fmt= len(sys.argv)>2 and sys.argv[2]
110-
sys.exit(main(fn,fmt))
112+
sys.exit(__main(fn,fmt))
111113

lua/component/stroke_count.lua

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ end
6969

7070
function M.func(input,env)
7171
local context=env.engine.context
72+
require("croissant.debugger")()
7273
for cand in input:iter() do
7374
cand.comment = cand.comment .. strock_count_to_str(env.reversedb, cand.text,env.str_fmt)
7475
yield(cand)

0 commit comments

Comments
 (0)