1616import leveldb
1717import luadata
1818import re
19- def load_csv (csvf ):
20- l = []
21- with open (csvf , newline = '' ) as csvfile :
22- rows = csv .DictReader (csvfile )
23- for row in rows :
24- if row ['word' ][0 ] == '#' :
25- continue
26- if row ['phonetic' ] != "" :
27- row ['phonetic' ] = '[' + row ['phonetic' ] + ']'
28- l .append (row )
29-
30- return l
31-
32- def load_text (textf ):
33- l = []
34- with open (textf ,newline = '' ) as textfile :
35- for line in textfile .readlines ():
36- if line [0 ] == '#' :
37- continue
38- row = {}
39- word ,data = line .split ('\t ' ,1 )
40- row ['word' ]= word
41- ll = data .split (';' ,1 )
42- if len (ll )> 1 :
43- row ['phonetic' ] = ll [0 ]
44- row ['translation' ]= ll [1 ]
45- else :
46- row ['translation' ]= ll [0 ]
47- l .append (row )
48-
49- return l
50-
51- def write_chunk (filename ,rows ):
52- with open (filename ,'w' ) as fn :
53- fn .write ('return {\n ' )
54- for row in rows :
55- value = luadata .serialize (row ) #.replace('\\n','\n').replace('\\r','\r')
56- fn .write (value + ',\n ' )
57- fn .write ('}\n ' )
58-
59- def write_leveldb (filename ,rows ):
60- db = leveldb .LevelDB (filename ,create_if_missing = True )
61- for row in rows :
62- key = row ['word' ]
63- if not key :
64- continue
65-
66- if re .findall ('[A-Z]' ,key ):
67- key = key .lower () + '\t ' + key
68-
69- value = luadata .serialize (row ).replace ('\\ n' ,'\n ' ).replace ('\\ r' ,'\r ' )
70- db .Put (key .encode ('utf-8' ),value .encode ('utf-8' ))
71-
72-
73-
74-
75-
76-
77- def main (fn ,fmt ):
19+ def __convert_dict_ (rec ):
20+ if rec [0 ] == '#' :
21+ return
22+ res = {}
23+ res ['word' ],data = line .strip ().split ('\t ' ,1 )
24+ ll = data .split (';' ,1 )
25+ if len (ll )> 1 :
26+ res ['phonetic' ] = ll [0 ]
27+ res ['translation' ]= " " + ll [1 ]
28+ else :
29+ res ['phonetic' ] = ''
30+ res ['translation' ]= " " + ll [0 ]
31+ return res
32+
33+ def get_key_str (dict ):
34+ key = dict and dict .get ('word' )
35+ if not key or len (key ) == 0 or key [0 ] == '#' :
36+ return
37+ return key if key .islower () else key .lower () + "\t " + key
38+
39+ # rec: type dict or text format
40+ def convert_chunk (rec ):
41+ if isinstance (rec , str ):
42+ rec = __convert_dicti_ (rce )
43+ if not rec :
44+ return
45+
46+ rec ['translation' ] = re .sub (r'^(\w+\.\s)' ,' \\ 1' , rec ['translation' ])
47+ rec ['definition' ] = re .sub (r'^(\w+\.\s)' ,' \\ 1' , rec ['definition' ])
48+
49+ key = get_key_str (rec )
50+ if not key :
51+ return
52+ tmp = { k : re .sub (r'(\\[rn])+' ,'\n ' ,v ) for k ,v in rec .items ()}
53+ if len (tmp ['phonetic' ]) > 0 :
54+ tmp ['phonetic' ] = ('[{}]' ).format (tmp ['phonetic' ])
55+ return key , luadata .serialize (tmp ).replace ('\\ \n ' ,'\\ n' )
56+
57+ class LuaChunk :
58+ def __init__ (self , name ):
59+ self .__name = name
60+ self .__fn = open (self .__name ,'w' )
61+ self .__status = True
62+ self .__fn .write ("return {\n " )
63+ def status (self ):
64+ return self .__status
65+ def name (self ):
66+ return self .__name
67+ def __del__ (self ):
68+ if self .__status :
69+ self .close ()
70+ def Flush (self ):
71+ if self .__status :
72+ self .__fn .flush ()
73+ def close (self ):
74+ if self .__status :
75+ print ('close file: ' + self .__name )
76+ self .__fn .write ("}\n " )
77+ self .__fn .close ()
78+ self .__status = False
79+ def Put (self ,key , value ):
80+ if self .__status :
81+ self .__fn .write ( ('{},\n ' ).format (value .decode ()))
82+
83+
84+ def __main (fname ,fmt ):
7885 """TODO: Docstring for main.
7986 :returns: TODO
8087 python conv_file.py file.[txt|csv] [leveldb|chunk] -- default: luac compile to chunk_bin
8188
8289 """
8390 try :
84-
85- f ,ext = os .path .splitext (fn )
86- rows = ext == ".csv" and load_csv (fn ) or load_text (fn )
87- if fmt == 'leveldb' :
88- write_leveldb (f ,rows )
89- elif fmt == 'chunk' :
90- write_chunk (f + '.txtl' , rows )
91- else :
92- write_chunk (f + '.txtll' ,rows )
93- print ("compile to bin " )
94- os .system ( 'luac -o ' + f + ".txtl " + f + ".txtll && rm " + f + ".txtll" )
95-
96-
91+ f ,ext = os .path .splitext (fname )
92+ db = leveldb .LevelDB (f + ".userdb" ) if fmt == 'leveldb' else LuaChunk (f + '.txtl' )
93+ with open (fname ) as fnode :
94+ inf = csv .DictReader (fnode ) if ext == '.csv' else fnode
95+ for row in inf :
96+ key_str , chunk_str = convert_chunk (row )
97+ if key_str and chunk_str :
98+ db .Put (key_str .encode ('utf-8' ),chunk_str .encode ('utf-8' ))
99+ del db
97100 except ValueError as ve :
98101 return str (ve )
99102
100103
101-
102104if __name__ == '__main__' :
103105
104106 print (sys .argv ,len (sys .argv ))
@@ -107,5 +109,5 @@ def main(fn,fmt):
107109 else :
108110 fn = sys .argv [1 ]
109111 fmt = len (sys .argv )> 2 and sys .argv [2 ]
110- sys .exit (main (fn ,fmt ))
112+ sys .exit (__main (fn ,fmt ))
111113
0 commit comments