diff --git a/english_conv.py b/english_conv.py index ec914cd..0282687 100644 --- a/english_conv.py +++ b/english_conv.py @@ -16,89 +16,91 @@ import leveldb import luadata import re -def load_csv(csvf): - l = [] - with open(csvf, newline='') as csvfile: - rows= csv.DictReader(csvfile) - for row in rows: - if row['word'][0] == '#': - continue - if row['phonetic'] != "": - row['phonetic'] = '[' + row['phonetic'] + ']' - l.append(row) - - return l - -def load_text(textf): - l=[] - with open(textf,newline='') as textfile: - for line in textfile.readlines(): - if line[0] == '#': - continue - row = {} - word,data = line.split('\t',1) - row['word']=word - ll= data.split(';',1) - if len(ll)>1: - row['phonetic'] = ll[0] - row['translation']= ll[1] - else: - row['translation']= ll[0] - l.append(row ) - - return l - -def write_chunk(filename,rows): - with open(filename,'w') as fn: - fn.write('return {\n') - for row in rows: - value= luadata.serialize(row) #.replace('\\n','\n').replace('\\r','\r') - fn.write(value + ',\n') - fn.write('}\n') - -def write_leveldb(filename,rows): - db = leveldb.LevelDB(filename,create_if_missing=True) - for row in rows: - key = row['word'] - if not key: - continue - - if re.findall('[A-Z]',key): - key = key.lower() + '\t' + key - - value= luadata.serialize(row).replace('\\n','\n').replace('\\r','\r') - db.Put(key.encode('utf-8'),value.encode('utf-8')) - - - - - - -def main(fn,fmt): +def __convert_dict_(rec): + if rec[0] == '#': + return + res= {} + res['word'],data = line.strip().split('\t',1) + ll= data.split(';',1) + if len(ll)>1: + res['phonetic'] = ll[0] + res['translation']= " " + ll[1] + else: + res['phonetic'] = '' + res['translation']= " " + ll[0] + return res + +def get_key_str(dict): + key = dict and dict.get('word') + if not key or len(key) ==0 or key[0] == '#': + return + return key if key.islower() else key.lower() + "\t" + key + +# rec: type dict or text format +def convert_chunk(rec): + if isinstance(rec, str): + rec = __convert_dicti_(rce) + if not rec: + return + + rec['translation'] = re.sub(r'^(\w+\.\s)',' \\1', rec['translation']) + rec['definition'] = re.sub(r'^(\w+\.\s)',' \\1', rec['definition']) + + key = get_key_str(rec) + if not key: + return + tmp= { k: re.sub(r'(\\[rn])+','\n',v) for k,v in rec.items()} + if len(tmp['phonetic']) > 0: + tmp['phonetic'] = ('[{}]').format(tmp['phonetic']) + return key, luadata.serialize(tmp).replace('\\\n','\\n') + +class LuaChunk: + def __init__(self, name): + self.__name = name + self.__fn = open(self.__name,'w') + self.__status = True + self.__fn.write("return {\n") + def status(self): + return self.__status + def name(self): + return self.__name + def __del__(self): + if self.__status: + self.close() + def Flush(self): + if self.__status: + self.__fn.flush() + def close(self): + if self.__status: + print('close file: ' + self.__name ) + self.__fn.write("}\n") + self.__fn.close() + self.__status = False + def Put(self,key, value): + if self.__status: + self.__fn.write( ('{},\n').format(value.decode())) + + +def __main(fname,fmt): """TODO: Docstring for main. :returns: TODO python conv_file.py file.[txt|csv] [leveldb|chunk] -- default: luac compile to chunk_bin """ try: - - f,ext = os.path.splitext(fn) - rows = ext == ".csv" and load_csv(fn) or load_text(fn) - if fmt== 'leveldb': - write_leveldb(f,rows) - elif fmt == 'chunk': - write_chunk(f + '.txtl', rows) - else: - write_chunk(f + '.txtll',rows) - print("compile to bin ") - os.system( 'luac -o '+ f + ".txtl " + f + ".txtll && rm " + f +".txtll" ) - - + f,ext = os.path.splitext(fname) + db = leveldb.LevelDB(f + ".userdb") if fmt =='leveldb' else LuaChunk(f + '.txtl') + with open(fname) as fnode: + inf = csv.DictReader(fnode) if ext=='.csv' else fnode + for row in inf: + key_str, chunk_str=convert_chunk(row) + if key_str and chunk_str : + db.Put(key_str.encode('utf-8'),chunk_str.encode('utf-8')) + del db except ValueError as ve: return str(ve) - if __name__ == '__main__' : print(sys.argv,len(sys.argv)) @@ -107,5 +109,5 @@ def main(fn,fmt): else: fn= sys.argv[1] fmt= len(sys.argv)>2 and sys.argv[2] - sys.exit(main(fn,fmt)) + sys.exit(__main(fn,fmt)) diff --git a/lua/component/stroke_count.lua b/lua/component/stroke_count.lua index 7659b0d..91ce4c9 100644 --- a/lua/component/stroke_count.lua +++ b/lua/component/stroke_count.lua @@ -69,6 +69,7 @@ end function M.func(input,env) local context=env.engine.context + require("croissant.debugger")() for cand in input:iter() do cand.comment = cand.comment .. strock_count_to_str(env.reversedb, cand.text,env.str_fmt) yield(cand) diff --git a/lua/english/common.lua b/lua/english/common.lua new file mode 100644 index 0000000..5e20763 --- /dev/null +++ b/lua/english/common.lua @@ -0,0 +1,22 @@ +local is_unix = package.config:sub(1,1) == "/" + +return { + component_config= true, --自動設定 segment translato ... + -- proc setting + keys_binding= { + toggle="F10", + comment_mode="Control+F10", + completion="Tab", + completion_back="Shift+Tab", + completion_back1="Shift+ISO_Left_Tab", + }, + comment_mode_default = 0,--comment 顯示模式 + prefix_pattern = "^[%a-][%a_%-]*", + prefix= "!", -- 使用前綴碼觸發 + splite_char1 = "/", -- 字身碼 + splite_char2 = ":", -- 詞類碼 + tag = "english", + enable_njdict = true, -- 啓用 word_ninja + enable_ext_dict = true, -- 啓用 user 字典 + ext_dict = "ext_dict", -- +} diff --git a/lua/english/proc.lua b/lua/english/proc.lua index 9653679..23aa63c 100644 --- a/lua/english/proc.lua +++ b/lua/english/proc.lua @@ -5,177 +5,229 @@ -- -- Distributed under terms of the MIT license. -- - +local COM = require 'english/common' local Env= require 'tools/env_api' local List = require 'tools/list' +local function config_list_find(config, path, elm) + local cl = assert(config:get_list(path), path .. " of value error: expact (List)") + for i=0, cl.size-1 do + if cl:get_value_at(i).value == elm then + return i + end + end +end + + + +local function config_list_value(cl) + local tab = List() + for i = 0, cl.size-1 do + tab:push(cl:get_value_at(i)) + end + return tab +end +local function config_list_string(cl) + local tab = List() + for i = 0, cl.size-1 do + tae:push(cl:get_value_at(i).value) + end + return tab +end + +local function config_map(cm) + local tab = {} + for k in ipairs(cm:keys()) do + tab[k] = cm:get(k) + end + return tab +end + local English="english" local ASCII_PUNCT="ascii_punct" +--load keybinder +-- 1 schema: name_space/keys_binding/name: key ex: english/keybinding/toggle: "F10" +local function keybinder(env) + local config = env.engine.schema.config + local path = env.name_space .. "/keys_binding" + local keys={} + -- default + -- common.lua + for k, kv in next, COM.keys_binding do + local key = config:get_string(path .. "/" .. k) or kv + keys[k] = KeyEvent(key) + end + return keys +end +local function log_components(env) + local config= env.engine.schema.config + log.info("------------------------------------------------------------") + for _,v in next, {"processors", "segmentors", "translators", "filters"} do + local path = "engine/" .. v + local size = config:get_list_size(path) + for i=0, size-1 do + local comp=config:get_string(path.. "/@" .. i) + log.info( string.format("%s\t%d\t%s",v,i,comp) ) + end + end + log.info("------------------------------------------------------------") +end +-- insert component + local function component(env) - local config = env.engine.schema.config - -- definde prefix suffix from name_space or local prefix suffix - -- 加入 recognizer/patterns - --local path= "recognizer/patterns/" .. env.name_space - --local pattern = ("^%s[a-z]%s.*$"):format(prefix,suffix) - --config:set_string(path, pattern) - --puts("log",__FILE__(),__LINE__(),__FUNC__(), path, config:get_string(path), "prefix" , prefix,"suffix" , suffix) - - -- 加入 lua_sgement - local mod_name = 'english' - if rrequire(mod_name .. '.segm') then - local path = "engine/segmentors" - local comp_name = ("lua_%s@%s%s@%s"):format("segmentor",mod_name,".segm",env.name_space) - local slist=List(env:Config_get(path)) - if not slist:find_match(comp_name) then - local index = slist:find_index("matcher") or 2 - config:set_string(path .. '/@before ' .. index-1, comp_name) - end - end - -- 加入 lua_translator - if rrequire(mod_name .. '.tran') then - local path = "engine/translators" - local comp_name = ("lua_%s@%s%s@%s"):format("translator",mod_name,".tran",env.name_space) - if not List(env:Config_get(path)):find_match(comp_name) then - config:set_string(path .. '/@next', comp_name) - end - end - --recognizer/patterns/punct: '^/([0-9]0?|[A-Za-z]+)$' - - local punct = config:get_string("recognizer/patterns/punct") - if punct == nil or punct == "" then - config:set_string("recognizer/patterns/punct",[[^/([0-9]0?|[A-Za-z]*)$]]) - end - - -- 替換 uniquifier filter --> lua_filter@uniquifier 或者加入 - --[[ - local f_path= "engine/filters" - local org_filter= "uniquifier" - local u_ns = "uniquifier" - local r_filter = "lua_filter@uniquifier" - _G[u_ns] = _G[u_ns] or require("component/uniquifier") - local f_index= config:find_index(f_path, org_filter) - if f_index then - config:config_list_replace( f_path, org_filter, r_filter) - else - config:config_list_append( f_path, r_filter) - end - --增加 reject_tags - config:config_list_append( u_ns .. "/reject_tags", env.name_space ) - --]] + local segmentor = "lua_segmentor@*english.segm@" .. env.name_space + local affix = "affix_segmentor@" .. env.name_space + local translator = "lua_translator@*english.tran@" .. env.name_space + + local config = env.engine.schema.config + local tag = config:get_string(env.name_space .. "/tag") or COM.tag or env.name_space + -- insert segmentor before matcher + local path = "engine/segmentors" + if not config_list_find(config, path, mod_name ) then + local index = config_list_find(config, path, "matcher") or 2 + config:set_string(path .. "/@before " .. index , segmentor) + end + -- insert segmentor before punct + local prefix = config:get_string(env.name_space .. "/prefix") or COM.prefix + if prefix and #prefix >0 then + if not config_list_find(config, path, mod_name ) then + local index = config_list_find(config, path, "punct_segmentor") or -2 + config:set_string(path .. "/@before " .. index, affix) + end + + if not config:get_string("recognizer/patterns/".. tag) then + log.warning('recogniOAzer/patterns/' .. tag .. "does not setting") + config:set_strnig("recoginzer/patterns/ .. tag", COM.prefix_pattern) + end + end + -- append translator + local path = "engine/translators" + if not config_list_find(config, path, translator ) then + local index = config:get_list_size(path) + config:set_string(path .. "/@before " .. index , translator) + end end local P={} function P.init(env) - Env(env) - local context=env.engine.context - local config= env.engine.schema.config - --load_config(env) - component(env) - --recognizer/patterns/english: "^[a-zA-Z]+[*/:'._-].*" - env.keys= env:get_keybinds() - env.keys.completion= KeyEvent("Tab") - env.keys.completion_back= KeyEvent("Shift+Tab") - env.keys.completion_back1= KeyEvent("Shift+ISO_Left_Tab") - - --env.comp_key= KeyEvent("Tab") - --env.uncomp_key= KeyEvent("Shift+ISO_Left_Tab") - --env.enable_key= KeyEvent(config:get_string(env.name_space .."/keybinds/toggle") or "F10") - --context:set_option(English,false) - env.history=List() - - env.notifier= List( - context.commit_notifier:connect(function(ctx) - env.history:clear() - end), - context.option_update_notifier:connect(function(ctx,name) - if name == English then - if ctx:get_option(name) then - if not ctx:get_option(ASCII_PUNCT) then - ctx:set_option(ASCII_PUNCT,true) - env.save_ascii_punct= true - end - else - if env.save_ascii_punct then - ctx:set_option(ASCII_PUNCT,false) - end - end - end - end)) + Env(env) + local context=env.engine.context + local config= env.engine.schema.config + -- segmetor translator + if COM.component_config then + component(env) + log_components(env) + end + + + env.splite_char1 = config:get_string(env.name_space .. "/splite_char1") or COM.splite_char1 + env.splite_char2 = config:get_string(env.name_space .. "/splite_char2") or COM.splite_char2 + + env.prefix = config:get_string(env.name_space .. "/prefix") or COM.prefix + env.keys = keybinder(env) + env.tag = config:get_string(env.name_space .. "/tag") or COM.tag + env.history=List() + if GD then GD() end + env.comment_mode_name = config:get_string(env.name_space .. "/comment_mode_name") or env.name_space + local mode = config:get_int(env.name_space .. "/comment_mode_default") or COM.comment_mode_default or 0 + context:set_property(env.comment_mode_name, mode) + + + --rime_api.test_luaobj0(1) + local function commit_func(ctx) env.history:clear() end + env.notifiers= { + context.commit_notifier:connect( + function (ctx) env.history:clear() end), + } end function P.fini(env) - env.notifier:each(function(elm) elm:disconnect() end) + for i,elm in next, env.notifiers do elm:disconnect() end end function P.func(key,env) - local Rejected,Accepted,Noop=0,1,2 - -- 過濾 release ctrl alt key 避免 key_char 重複加入input - local context=env.engine.context - local status=env:get_status() - local key_char= key.keycode >= 0x20 and key.keycode < 128 and string.char( key.keycode) or "" - - -- enable English mode - if key:eq(env.keys.toggle) then - env:Toggle_option(English) - context:refresh_non_confirmed_composition() - return Accepted - end - if key:release() or key:ctrl() or key:alt() then return Noop end - if not context:get_option(English) then return Noop end - - -- reject - -- match env.pattern - -- english mode and key == a - if #key_char == 1 then - local active_inp= context.input .. key_char - if context:is_composing() and key_char:match("^[ ,]") then - context:commit() - return Rejected - elseif active_inp:match("^[%a][%a'.?*/:_%- ]*$") then --context:is_composing() and key_char:match("^[%a'.?*/:_%-]$") or key_char:match("^[%a]$") then + local Rejected,Accepted,Noop=0,1,2 + -- 過濾 release ctrl alt key 避免 key_char 重複加入input + local context = env.engine.context + local status = env:get_status() + local comp = context.composition + local segment = comp:back() + local key_char = key.keycode >= 0x20 and key.keycode < 0x80 + and string.char( key.keycode) or "" + local active_input = context.input .. key_char + + if key:release() or key:alt() then return Noop end + -- enable English mode e + + if key:eq(env.keys.toggle) then + context:set_option( English, not context:get_option(English)) + context:refresh_non_confirmed_composition() + return Accepted + end + + local enable_english = context:get_option(English) or + segment and ( segment:has_tag(English) or segment:has_tag(English.. "_prefix")) +-- start check key + if not enable_english then return Noop end + + local accept_pattern = string.format("^[%%a_%%-*?%s%s]$", env.splite_char1, env.splite_char2) + -- if not context:get_option(English) then return Noop end + if key_char:match(accept_pattern) then context:push_input(key_char) return Accepted - end - end - -- comment mode - if key:eq(env.keys.mode) then - env:Toggle_option("english_info_mode") - return Accepted - end - - -- 反回上一次 input text - if key:eq(env.keys.completion_back) or key:eq(env.keys.completion_back1) then - if #env.history >0 then - context.input = env.history:pop() + end + + if key_char:match("^[,. ]$") then + context:commit() + env.engine:commit_text( key_char==" " and key_char or key_char .. " ") return Accepted - else + -- 切換 comment 顯示模式 + elseif key:eq(env.keys.comment_mode) then + local mode = context:get_property(env.comment_mode_name) +1 + print(env.comment_mode_name, mode) + GD() + context:set_property(env.comment_mode_name, mode) + context:refresh_non_confirmed_composition() return Noop - end - end - -- 補齊input 以cand.type "ninja" 替換部分字段 "english" 替換全字母串 - if status.has_menu then - local cand=context:get_selected_candidate() - if key:eq(env.keys.completion) then + -- 反回上一次 input text + elseif key:eq(env.keys.completion_back) or key:eq(env.keys.completion_back1) then + if #env.history>0 and segment then + context:pop_input(segment._end - segment._start) + local htext= env.history:pop() + context:push_input(htext) + return Accepted + end + return Noop + -- 補齊input 以cand.type "ninja" 替換部分字段 "english" 替換全字母串 + elseif key:eq(env.keys.completion) then + if not status.has_menu then return Noop end + local cand=context:get_selected_candidate() + -- reject - if cand.text == context.input then return Noop end - - local history = context.input + if cand.text == context.input:sub(cand._start+1, cand._end) then return Noop end if cand.type == "english" then - context.input = cand.text + local htext = context.input:sub(cand._start+1,cand._end) + env.history:push(htext) + context:pop_input(cand._end - cand._start) + context:push_input(cand.text) elseif cand.type== "ninja" then - context:push_input( cand.text:sub( cand._end - cand.start ) ) + env.history:push(cand.text) + context:pop_input(cand._end - cand._start) + + elseif cand.type== "english_ext" then - local text = cand.text - cand.text = cand.comment:match("%[(.*)%]") - cand.comment= "[" .. text .. "]" + local text = cand.text + cand.text = cand.comment:match("%[(.*)%]") + cand.comment= "[" .. text .. "]" else - return Noop + return Noop end - env.history:push(history) return Accepted - end - end - return Noop + end + return Noop end + + return P diff --git a/lua/english/segm.lua b/lua/english/segm.lua index e5b9634..2dacda8 100644 --- a/lua/english/segm.lua +++ b/lua/english/segm.lua @@ -6,36 +6,40 @@ -- Distributed under terms of the MIT license. -- - +local COM = require 'english/common' local English="english" local S={} -function S.func(segs ,env) -- segmetation:Segmentation,env_ - local context=env.engine.context - local cartpos= segs:get_current_start_position() - - -- 在chk_english_mode() 為 input 打上 english tag - --if chk_english_mode(env) and context:is_composing() then - local str = segs.input:sub(cartpos) - if not str:match("^%a[%a'?*/:_,.%-]*$") then return true end - if context:get_option(English) and context:is_composing() then - --puts("log", __LINE__() ,"-----trace-----sgement" , str ,context.input ) - - local str= segs.input:sub(segs:get_current_start_position() ) - local seg=Segment(cartpos,segs.input:len()) - seg.tags= Set({English}) - seg.prompt="(english)" - segs:add_segment(seg) - - -- 終止 後面 segmentor 打tag - return false - end - -- 不是 chk_english_mode pass 此 segmentor 由後面處理 - return true -end - function S.init(env) + local config = env.engine.schema.config + env.tag = config:get_string(env.name_space .. "/tag") or "english" + --env.affix_seg= Composegment.Segmentor(env.engine,"", "affix_segmentor@english") end function S.fini(env) end +function S.func(segs ,env) -- segmetation:Segmentation,env_ + local context=env.engine.context + if not context:is_composing() then return true end + if T06 and GD then GD() end + if context:get_option(English) then + --puts("log", __LINE__() ,"-----trace-----sgement" , str ,context.input ) + if not segs.input:match("^%a[%a'?*/:_,.%-]*$") then return true end + + local sp, ep = segs:get_current_start_position(),segs:get_current_end_position() + ep = ep > context.caret_pos and ep or context.caret_pos + local seg=Segment(sp, ep) + seg.tags= Set({English}) + seg.prompt="(english)" + segs:add_segment(seg) + + return false + else + return true + end + + -- 不是 chk_english_mode pass 此 segmentor 由後面處理 + return true +end + + return S diff --git a/lua/english/tran.lua b/lua/english/tran.lua index dc5e72f..bc88887 100644 --- a/lua/english/tran.lua +++ b/lua/english/tran.lua @@ -1,154 +1,220 @@ -local Env= require 'tools/env_api' -local List= require 'tools/list' -local English_dict= require 'tools/english_dict' + +local COM = require 'english.common' +local Env= require 'tools.env_api' +local List= require 'tools.list' +local Word= require 'english.word' local slash = package.path:sub(1,1) -local function njload() - -- try to load wordninja-lua - -- https://github.com/BlindingDark/wordninja-rs-lua - -- rime.lua append - -- cp wordninja.so /lua/plugin - -- append cpath /lua/plugin - -- window lua 版本不符將造成暫時取消 window版本 載入 wordnanja-rs - local ok,res = pcall(require ,'wordnanja') - if ok then return res end - return require'tools/wordninja' -end +local English="english" +local Ninja="ninja" + +local T={} +-- ext_dict local function load_ext_dict(ext_dict) - --local path= string.gsub(debug.getinfo(1).source,"^@(.+/)[^/]+$", "%1") - local path= rime_api.get_user_data_dir() .. slash - filename = path .. ( ext_dict or "ext_dict" ) .. ".txt" - if not isFile(filename) then return end - - local tab = {} - for line in io.open(filename):lines() do - if not line:match("^#") then -- 第一字 # 不納入字典 - local t=line:split("\t") - if t then - tab[t[1]] = t[2] + --local path= string.gsub(debug.getinfo(1).source,"^@(.+/)[^/]+$", "%1") + if T._ext_dict then + return T._ext_dict + end + local slash = package.config:sub(1,1) + local path= ("%s/lua/english/%s.txt") + :format(rime_api.get_user_data_dir(),ext_dict) + :gsub("/", slash ) + if not isFile(filename) then return end + + local dict = {} + for line in io.open(filename):lines() do + if not line:match("^#") then -- 第一字 # 不納入字典 + local t=line:split("\t") + if t then + local key = t[1]:lower() + local word = EngLish_dict.Word{word=t[1], translation= t[2], definition = t[3] } + dict[key] = word + end end - end - end - return tab + end + return dict end -local English="english" -local Ninja="ninja" - -local T={} -T._nj= T._nj or njload() -T._ext_dict= T._ext_dict or load_ext_dict("ext_dict") +-- 流行語 簡語字典 +T._njdict = require 'english/wordninja' function T.init(env) - local t1 = os.clock() - Env(env) - local config= env.engine.schema.config - env.tag= config:get_string(env.name_space .. "/tag") or English - local dict_name= config:get_string(env.name_space .. "/dictionary") or "english_tw" - env.dict = assert( English_dict(dict_name), 'can not Create english dict of ' .. dict_name) - - env.notifiers=List( - env.engine.context.option_update_notifier:connect( - function(ctx,name) - if name=="english_info_mode" then - T._mode = T._mode and T._mode +1 or 0 - env.mode = T._mode - --env.mode= env.mode and (env.mode+1) or 0 - end - end) ) + local config = env.engine.schema.config + local context = env.engine.context + env.tag= config:get_string(env.name_space .. "/tag") or COM.tag + env.quality = config:get_double(env.name_space .. "/quality") or 1 + local mode = config:get_int(env.name_space .. "/comment_mode") + or COM.comment_mode_default or 0 + if config:get_bool(env.name_space .."/enable_njdict") or COM.enable_njdict then + T._njdict = T._njdict or require("english/wordninja") + end + local ext_dict_name = config:get_bool(env.name_space .. "/ext_dict") or COM.ext_dict + if ext_dict_name then + T._ext_dict = T._ext_dict or load_ext_dict(ext_dict_name) + end + local dict_name = config:get_string(env.name_space .. "/dictionary") or "ecdict" + env.db = assert( rime_api.UserDb.LevelDb(dict_name), "can not Create english dict of " .. dict_name) + env.db:open_read_only() + env.comment_mode_name = config:get_string(env.name_space .. "/comment_mode_name") or env.name_space + env.njdict = T._njdict + env.ext_dict = T._ext_dict + -- init pattern + + env.splite_char1 = config:get_string(env.name_space .. "/splite_char1") or COM.splite_char1 or "/" + env.splite_char2 = config:get_string(env.name_space .. "/splite_char2") or COM.splite_char2 or ":" + env.prefix_pattern = config:get_string(env.name_space .. "/prefix_pattern") or COM.prefix_pattern or "^[%a-][%a_%-]*" + env.pattern_pn1 = ("%s([%%a]?)"):format( env.splite_char1) + --env.pattern_pn2 = ('^([%%a-][%%a?*_%s%%-]*)%s(%%a*)$'):format("/", ":") -- ^([%a-][%a?*_/%-]*):(%a*)$ + env.pattern_pn2 = ('^(.*)%s(%%a+)$'):format(env.splite_char2) + print('------->trace pattern ',env.prefix_pattern, env.pattern_pn1, env.pattern_pn2) + + + + env.notifier= context.property_update_notifier:connect(function(ctx,name) + if name == env.comment_mode_name then + env.mode = ctx:get_property(name) + end + end) end + function T.fini(env) - env.notifiers:each(function(elm) elm:disconnect() end) + env.notifier:disconnect() end --- 大寫轉換 ex: Axxxxx Axxxxx , AAxxx AAXXX -local function sync_case(input, candidate_word) - if input:match("^%u%u") then - return candidate_word:upper() - elseif input:match("^%u") then - return candidate_word:gsub("^%a",string.upper) - else - return candidate_word - end +-- EnglishTranslation +local function english_match(word, w_pattern, p_pattern) + local p_match = not p_pattern or + word.translation:match(p_pattern) or + word.definition:match(p_pattern) + return p_match and word.word:match(w_pattern) end --- 處理 英文翻譯長度及格式化 (windows \n ->\r utf8_len 40) -local function system_format(comment,len) - len = len or 40 - local unix = package.config:sub(1,1) == "/" - if not unix then - comment = comment:utf8_sub(1,len):gsub("\n","\r") - end - return comment +local function split_inp(inp,env) + local prefix = inp:match(env.prefix_pattern) + local w , pn = inp:match(env.pattern_pn2) + print('-----pn2 --------->', prefix, w, inp, "pn:", pn) + pn = pn and #pn>0 and "%s".. pn .. "%.%s" or nil + w = w or inp + print('----pn2 recheck---------->', prefix, w, inp, "pn:", pn) + local pattern = w:gsub(env.pattern_pn1, Word.Eng_suffix):gsub("([?*])",".%1") + return prefix, pattern, pn end - --- return Translation -local function eng_tran(dict,mode,prefix_comment,cand) - - return Translation(function() - -- 使用 context.input 杳字典 type "english" - local inp = cand.text - for w in dict:iter(inp) do - -- system_format 處理 comment 字串長度 格式 - local comment = system_format( prefix_comment..w:get_info(mode) ) - local commit = sync_case(inp,w.word) - -- 如果 與 字典相同 替換 first_cand cand.comment - if cand.text:lower() == commit:lower() then - cand.comment= comment +local function english_tran(inp, seg, env) + local prefix, pattern, pn = Word.Split_text(inp:lower(), env.splite_char1, env.splite_cahr2) + print(('-----trace splite res %s-%s-%s'):format(prefix , pattern, pn)) + for k,v in env.db:query(prefix):iter() do + local kw = k:match("([^\t]*)") + local word = kw:match(pattern) and Word.Parse_chunk(v) or nil + if word and word:chk_part(pn) then + yield( word:to_cand(seg._start, seg._end, env.quality, env.mode )) + end + end + end + +-- replace candeidate text , comment +-- 大寫轉換 ex: Axxxxx Axxxxx , AAxxx AAXXX +local function sync_case(cand, raw_inp) + if raw_inp:match("^%u") then + local ccand = cand:get_genuine() + if raw_inp:match("^%u%u") then + ccand.text = ccand.text:upper() else - yield( ShadowCandidate(cand,cand.type,commit,comment) ) + ccand.text:gsub("^%l",string.upper) end - end - end) + end end -function T.func(inp,seg,env) - -- check inp format - if not ( seg:has_tag(env.tag) or env:Get_option(English) ) then return end - - local input = inp:sub(seg.start+1,seg._end) - input = input:match("^[%a][%a_.'/*:%- ]*$") and input or "" - if #input==0 then return end - - - -- first_cand - local first_comment = T.ext_dict and T._ext_dict[input:lower()] - first_comment = first_comment and ' abbr. ('.. first_comment..')' or '[English]' - local first_cand = assert( Candidate(English,seg.start,seg._end,input,first_comment )) - yield(first_cand) - - -- njcand - local nj_commit= input:match("^[%a%_%.%']+$") and T._nj and T._nj.split(input) - local njcand = nj_commit and Candidate(Ninja, seg.start,seg._end, nj_commit, "[ninja]") - if njcand then yield(njcand) end - +-- 處理 英文翻譯長度及格式化 (windows \n ->\r utf8_len 40) +local function win_comment_format(cand,len) + len = len or 40 + local ccand = cand:get_genuine() + ccand.comment = ccand.comment:utf8_sub(1,len) + ccand.comment= ccand.comment:gsb("\n", '\r') +end - -- 使用 context.input 杳字典 type "english" - for cand in eng_tran(env.dict,env.mode,"",first_cand):iter() do - yield(cand) - end +function T.func(inp, seg, env) + if not seg:has_tag(env.tag) then return end + local tran = Translation(T.order_func, inp, seg, env) + print('-------------------trate1',tran) + for cand in tran:iter() do + sync_case(cand, inp) --大小寫轉換 + if package.config:sub(1,1) == "\\" then -- window comment format + win_comment_format(cand) + end + yield(cand) + end + print('-------------------trate end',tran) + +end - -- ecdict 字典支援子句 - -- 使用ninja cand 展開字句查字典 - if not njcand then return end - for cand in eng_tran(env.dict,env.mode,"(Ninja) ",njcand):iter() do - yield(cand) - end +function T.order_func(inp,seg,env) + -- check inp format + local context = env.engine.context + local cands = List() + print('--------------------trac2') + local eng_tran = Translation(english_tran, inp, seg, env) + local translation_next = eng_tran:iter() + --njcand + local nj_commit= env.njdict and inp:match("^[%a%.%']+$") and env.njdict:split(inp) + local nj_cand = nj_commit and nj_commit ~= inp and + Candidate("ninja", seg._start, seg._end, nj_commit, "[ninja]") + + if nj_cand then + for k,v in env.db:query(nj_cand.text):iter() do + local w = Word.Parse_chunk(v) + if w.word:lower() == nj_cand.text:lower() then + nj_cand.comment = nj_cand.comment .. " " .. w:get_info(env.mode) + end + break + end + end + print('--------------------trace3-1', eng_tran) +-- ext_cand + local ext_cand = env.ext_dict and env.ext_dict[inp:lower()] + and env.ext_dict[inp:lower()]:to_cand(1, seg._start, seg._end) + cands:push(ext_cand) + cands:push(translation_next(eng_tran)) + if #cands == 0 then + cands[1] = Candidate('raw', seg._start, seg._end, inp, "RAW") + end + print('--------------------trace3-2', eng_tran) + if nj_cand then + cands:insert_at(2, nj_cand) + end + + -- 輸出三段 tranlsation + cands:each(yield) + for cand in translation_next,eng_tran do + yield(cand) + end + if nj_cand then + local njtext = nj_cand.text:split() + for cand in Translation(english_tran, njtext[#njtext], seg, env):iter() do + yield(cand) + end + end + + -- 使用 ninja 最後一佪字查字典 type "ninja" --local n_word= commit[#commit] + --[[ local n_word = njcand.text:match("%s(%a+)$") if not n_word then return end local snjcand= Candidate("sub_ninja",njcand._end - #n_word,njcand._end, n_word,"[sninja]") yield(snjcand) - for cand in eng_tran(env.dict,env.mode,"(SNinja) ",snjcand):iter() do - yield(cand) + for cand in env.dict:query(inp, seg._start, seg._end, comment_mode):iter() do + cand.type = "sub_ninja" + if quality then + cand.quality=quality + yield(cand) + end end +--]] end - return T diff --git a/lua/english/word.lua b/lua/english/word.lua new file mode 100644 index 0000000..0f23f3d --- /dev/null +++ b/lua/english/word.lua @@ -0,0 +1,194 @@ +#! /usr/bin/env lua +-- +-- english_dict.lua +-- Copyright (C) 2020 Shewer Lu +-- +-- Distributed under terms of the MIT license. +-- +--[[ + + ex : + Eng=require('tools/english_dict' ) + e = Eng( 'english') + for w in e:iter('th/i:ad') do -- * ? / : + print(w.word,w:get_info(1) ) + end + + list = e:match('th/i:ad') --List table + for i,v in ipairs(list) + print(w.word,w:get_info(1) + end + + pw=function(w) print(w.word,w:get_info()) end + list:each(pw) + + +-- +-- +--]] +require 'tools/string' +require 'tools/_file' +local List = List or require 'tools/list' +--local Word= require 'tools/english_word' +local Eng_suffix={ + [''] = '', + ['?'] = '.', + ['*'] = ".*", + ['f'] = "*ful", + ['y'] = "*ly", + ['t'] = "*tion", + ['s'] = "*sion", + ['a'] = "*able", + ['i'] = "*ing", + ['m'] = "*ment", + ['r'] = "*er", + ['g'] = "*ght", + ['n'] = "*ness", + ['l'] = "*less", + +} + +local function Split_text(str, pn1, pn2, pn0) + pn1 = pn1 or "/" + pn2 = pn2 or ":" + pn0 = pn0 or "[_-%a]+" + local w,p = str:match("^(.+)" .. pn2.. "(.+)$") + w = w and w or str + local prefix = w:match("^" .. pn0 ) + local pattern = "^" .. w:gsub(pn1 .. "(%a)", Eng_suffix):gsub("([?*])", Eng_suffix) + pattern = pattern:match("%.$") and pattern .. "$" or pattern + return prefix,pattern, p +end + +local function New(self,...) + local obj= setmetatable({} , self) + return not obj._initialize and obj or obj:_initialize(...) +end +--- Word +-- +local class= require 'tools/class' + + +-- Word +-- class method Phrase_chunk(row_chunk) Parse_text(row_tab) +-- instance method get_info(mode_num) to_s() prefix_match(prefix_str) match:(text) + +--local Word=Class("Word") +--Word.__name= "Word" +--[[ +function Word:__eq(obj) + return self.word == obj +end +--]] +-- +local is_unix = package.config:sub(1,1) == "/" +local NR = is_unix and "\n" or "\r" +local Word = class()--{} +Word.Eng_suffix = Eng_suffix +Word.Split_text = Split_text +Word.__name="Word" +function Word:_initialize(tab) + if type(tab)=="table" and tab.word and tab.word:len() >0 then + --and tab.translation and tab.phonetic then + for k,v in next, tab do + self[k] = v + end + self.phonetic = self.phonetic or "" + return self + end +end + +-- \t +function Word.Parse_text(line) + local tab={} + local word, translation= table.unpack(line:split("\t")) + if word:len() < 1 then return end + tab.word=word + translation= translation or "" + + local head,tail=translation:find('^%[.*%];') + if head and tail then + tab.phonetic=translation:sub(head,tail-1) + tab.translation=translation:sub(tail+1) + else + tab.phonetic="" + tab.translation=translation + end + return Word(tab) +end +function Word.Parse_chunk(str,replace) + if type(str) ~= "string" then return end + --str = replace and str:gsub("\\n","\\n"):gsub("\\r","\r") or str + local tab=load("return " .. str )() + return Word(tab) +end + +function Word:to_s() + local l = List() + for k,v in next,self do + l:push( string.format(" %s=%q",k,v) ) + end + return "{ " .. l:concat(',') .. "}" +end +-- 利用此func 設定comment 輸出格式 +-- +function Word:get_info(mode) + mode= tonumber(mode or Word._mode or 1) + local info= self + if not info then return "" end + mode= (mode) % 7 + + if mode == 1 then + return info.phonetic .. " " .. info.translation + elseif mode == 2 then + return (info.translation) + elseif mode == 3 then + return (info.phonetic) + elseif mode == 4 then + return (info.definition) + elseif mode == 5 then + return (info.word) + elseif mode== 6 then + return "" + else + return info.phonetic .. " " .. info.translation + end +end + +function Word:to_cand(s,e,q, mode) + local cand = Candidate("english",s,e,self.word, self:get_info(mode)) + if q then + cand.quality = q + end + return cand +end + +function Word:chk_prefix(prefix, case_match ) + if case_match then + return self.word:find( prefix) == 1 + else + return self.word:lower():find( prefix:lower()) == 1 + end + return false +end +function Word:chk_word(pattern, case_match) + return case_match and self.word:match(pattern) or self.word:lower():match(pattern) + +end +function Word:chk_part(part) + part= part and #part >0 and ("%%s%s%%.%%s"):format(part) or "" + return (self.translation:match(part) and true) or false +end +-- handle args (word, text, case_match) +function Word:match(text, case_match, handle) + if type(handle) == "function" then + return handle(self, text, case_match) + else + local pw,ww,pn = self.Split_text(text) + return self:chk_word(ww, case_match) and self:chk_part(pn) and true or false + end +end + +Word.is_match= Word.match + +return Word diff --git a/lua/tools/wordninja_words.txt b/lua/english/wordninja.lua old mode 100755 new mode 100644 similarity index 99% rename from lua/tools/wordninja_words.txt rename to lua/english/wordninja.lua index 8fd15e2..ad57a4a --- a/lua/tools/wordninja_words.txt +++ b/lua/english/wordninja.lua @@ -1,3 +1,173 @@ +#! /usr/bin/env lua +-- +-- wordninja1.lua +-- Copyright (C) 2021 Shewer Lu +-- +-- Distributed under terms of the MIT license. +-- +-- example: +-- wordninja = require('wordnija.lua') +-- wordninja:split('ilovelua') +-- +-- API +-- :test(bool) -- print result +-- :init() load_from_self --[[--start ... --]]--end +-- ( "wordninja_words.txt") +-- ( "wordninja_tab.lua" ) -- load table of chunk +-- :split( str ) + +local maxlen=0 +local dict={} + +local function _split(str,sp) + --- init cost list + local cost={[0]={c=0,k=0}} + local function best_match(s,index,minc,bestk) + index = index or #s + minc = minc or 9e999 + bestk = bestk or #s + if index<1 or index < #s - maxlen then + return {c=minc,k=bestk} + end + --[[ + assert(cost[index-1], ("index:%d -1:%s $s c: %s k: %s"):format( + index, index-1, cost[index-1],cost[index-1].k,cost[index-1].c)) + assert(dict[s:sub(index)] or 9e999, "error" ) + ]] + local c = cost[index-1].c + (dict[s:sub(index)] or 9e999 ) + -- update minc & token + if c < minc then + return best_match(s,index-1,c,index-1) + else + return best_match(s,index-1,minc,bestk) + end + end + local function rever_word(str, tab) + tab = tab or {} + if #str <=0 then return tab end + local h,t = str:sub(1,cost[#str].k), str:sub(cost[#str].k+1) + if t=="'s" then + cost[#str].k= cost[#str-2].k -- cost[#s].k move to next k + return rever_word(str, tab) + end + if t:match("^[%d]+$") then -- number + local h1,t1=h:sub(1,cost[#h].k), h:sub(cost[#h].k+1) + if t1:match("^%d$") then -- single number + cost[#str].k = cost[#str].k -1 -- cost[#s].k -1 + return rever_word(str,tab) + end + end + table.insert( tab,1, t ) -- unshift t + return rever_word(h, tab) + end + + ----- start ------ + local ss = str:lower() + for i=1,#ss do + cost[i] = best_match( ss:sub(1,i) ) + end + return table.concat(rever_word(str), sp) +end + +-- Module +local M={} +--M.maxlen=0 +--M.dict={} + + +function M:split(str,sp) + if maxlen <=0 then + print("dict is empty") + return + end + sp = sp or " " + local str_tab= setmetatable({},{__index=table}) + -- "abe,.p,.poeu-,a" -> {abe p poeu a} + for sub_str in str:gmatch("[%w']+") do + str_tab:insert( _split(sub_str, sp) ) + end + return str_tab:concat(sp) +end + + +function M:test(flag) + str="WethepeopleoftheunitedstatesinordertoformamoreperfectunionestablishjusticeinsuredomestictranquilityprovideforthecommondefencepromotethegeneralwelfareandsecuretheblessingsoflibertytoourselvesandourposteritydoordainandestablishthisconstitutionfortheunitedstatesofAmerica" + res="We the people of the united states in order to form a more perfect union establish justice in sure domestic tranquility provide for the common defence promote the general welfare and secure the blessings of liberty to ourselves and our posterity do ordain and establish this constitution for the united states of America" + + local t1= os.clock() + local r = self:split(str) + assert(r == res, 'test: not match\n\n' .. res .."\n\n" .. r) + local time = os.clock() - t1 + if flag then + print(str) + print(r) + end + print("times: ", time) +end + +-- init +local function __FILE__(n) n=n or 2 return debug.getinfo(n,'S').short_src end + +local function append_txt(tab, ffile) + if ffile:match("lua$") then + local ltab = loadfile(ffile)() + for _,w in ipairs(tab) do + table.insert(tab,w) + end + else + local fp= io.open(ffile) + if not fp then return end + for line in fp:lines() do + table.insert(tab, line:match("[%w']+")) + end + fp:close() + end +end + +local function load_from_self(tab, ffile) + local fp= io.open(ffile) + if not fp then return end + while not fp:read():match("^--%[%[%-%-start") do end + for line in fp:lines() do + if line:match("^--%]%]%-%-end") then break end + table.insert(tab, line:match("^[%w']+")) + end + fp:close() +end + + +function M:init(...) + local slash = package.config:sub(1,1) + local File= __FILE__() + local path ,sfile= __FILE__():match("^(.+".. slash ..")(.+)$") + local words={} + local files={...} + if #files == 0 then + load_from_self(words, File) + else + for _, file in next, files do + append_txt(words,path .. file) + end + end + + -- calculator + local tablog= math.log(#words) + dict = {} + maxlen= 0 + for i,w in next , words do + dict[w] = math.log( i * tablog ) + maxlen = maxlen < #w and #w or maxlen + end + dict[""]=0 + M.dict = dict + -- M.words=words + assert(maxlen > 0,"ERROR: init dict failed. maxlen: ".. maxlen ) +end + +M:init() +return M + +--[[--start the of in @@ -94954,7 +95124,7 @@ crozet zygophyllales shillito camucamu -iloveyou +#iloveyou tortoiseshell reenters kanem @@ -126134,3 +126304,4 @@ o'clock 7 8 9 +--]]--end diff --git a/lua/init_processor.lua b/lua/init_processor.lua index ca8dfc6..1257a81 100644 --- a/lua/init_processor.lua +++ b/lua/init_processor.lua @@ -84,7 +84,7 @@ local F={} function F.reload(env) -- reset package.loaded package.loaded={} - for k,v in next, __PKG_LOADED do package.loaded[k] = v end + --for k,v in next, __PKG_LOADED do package.loaded[k] = v end if rime_api.Version() < 139 then env.engine:process_key(env.keys.reload) @@ -93,6 +93,10 @@ function F.reload(env) end return 1 end +function F.reload(env) + env.engine:commit_text("暫時停用") + return 1 +end function F.pgsize(env,size) env.engine.context:clear() if size and size > 0 and size <10 then @@ -106,9 +110,7 @@ end local function init_modules(env) local modules = List( env:Config_get(env.name_space .. "/modules")) return modules:map( function(elm) - if elm.module then rrequire(elm.module,_ENV) end - local comp=Component.Require(env.engine,"",elm.prescription) - return comp and {name=elm.prescription,comp=comp} + return Component.Require(env.engine,"",elm) end) --return modules end @@ -165,7 +167,7 @@ function M.init(env) env.keys.reload = env.keys.reload and env.keys.reload or KeyEvent('F9') -- use key_binder reload - if rime_api.Version() < 139 then + if rime_api.VERSION < 139 then local ckeyb= env:Config_get('key_binder/bindings/@0') local reload_keyb = { ['when']='always', diff --git a/lua/tools/_component.lua b/lua/tools/_component.lua index 938d0f0..2566acc 100644 --- a/lua/tools/_component.lua +++ b/lua/tools/_component.lua @@ -10,50 +10,52 @@ -- local processors= Set{ - "lua_processor", -"ascii_composer", -"chord_composer", -"express_editor", -"fluid_editor", -"fluency_editor", -"key_binder", -"navigator", -"punctuator", -"recognizer", -"selector", -"speller", -"shape_processor" - } + "predictor", + "lua_processor", + "ascii_composer", + "chord_composer", + "express_editor", + "fluid_editor", + "fluency_editor", + "key_binder", + "navigator", + "punctuator", + "recognizer", + "selector", + "speller", + "shape_processor" +} local segmentors= Set{ -"lua_segmentor", -"abc_segmentor", -"affix_segmentor", -"ascii_segmentor", -"matcher", -"punct_segmentor", -"fallback_segmentor" + "lua_segmentor", + "abc_segmentor", + "affix_segmentor", + "ascii_segmentor", + "matcher", + "punct_segmentor", + "fallback_segmentor" } local translators= Set{ -"lua_translator", -"echo_translator", -"punct_translator", -"table_translator", -"script_translator", -"schema_list_translator", -"switch_translator", -"history_translator", -"codepoint_translator", -"trivial_translator" + "predict_translator", + "lua_translator", + "echo_translator", + "punct_translator", + "table_translator", + "script_translator", + "schema_list_translator", + "switch_translator", + "history_translator", + "codepoint_translator", + "trivial_translator" } local filters=Set{ -"lua_filter", -"simplifier", -"uniquifier", -"charset_filter", -"cjk_minifier", -"reverse_lookup_filter", -"single_char_filter", -"charset_filter" + "lua_filter", + "simplifier", + "uniquifier", + "charset_filter", + "cjk_minifier", + "reverse_lookup_filter", + "single_char_filter", + "charset_filter" } local group_components={ @@ -62,47 +64,15 @@ local group_components={ Translator = translators, Filter = filters, } --- return string : 'Processor' 'Segmentor' ... -local function get_comp_name(comp_name) - for group_name, group_mods in next, group_components do - if group_mods[comp_name] then - return group_name - end - end -end - --- ver > 176 Component add Require --- ver <= 176 Fake Component -if not LevelDb or not Component then - print(' fake component wrap- require') - Component = require 'tools._luacomponent' -end - -local function _delegate_func(comp_tab) - --clone tab - local m ={} - for k,v in next, comp_tab do - m[k] = v - end - for k,v in next,m do - -- add component to _component ... - comp_tab["_"..k]= m[k] - -- replace and delegate - comp_tab[k]= function(...) - local t = Ticket(...) - if t and t.klass:match("^lua_") then - rrequire( t.name_space:split("@")[1]) - end - return comp_tab["_"..k](...) - end - end -end -- add Component.Require -_delegate_func(Component) +require 'tools._ticket' function Component.Require(...) local t = Ticket(...) - local gmod_name = get_comp_name(Ticket(...).klass) - return Component[ gmod_name ](...) + for k,v in next, group_components do + if v[t.klass] then + return Component[k](...) + end + end end -return Compoment +return Component diff --git a/lua/tools/_global.lua b/lua/tools/_global.lua index 884647d..91a4f24 100644 --- a/lua/tools/_global.lua +++ b/lua/tools/_global.lua @@ -15,101 +15,95 @@ --- global init -require 'tools/ver_env' +require 'tools/compat' -- compat : warn _ENV +-- librime-lua version + -- append string.methods split utf8_len utf8_split require 'tools/string' -- 加入 utf8_len utf8_sub split function require 'tools/_log' -- _G 加入 Log(type,args.....) -require 'tools/_file' -- _G 加入 rpath() get_full_path(filename) isDir(path) isFile(path) -require 'tools/_req_api' -- _G 加入 rrequire(mod:string [gtab=_ENG [,mod_name=mod]]) +require 'tools/_file' -- _G 加入 rpath() isDir(path) isFile(path) +require 'tools/_req_api' -- 停用 _G 加入 rrequire(mod:string [gtab=_ENG [,mod_name=mod]]) -- bind split utf8_sub utf8_sub utf8_split -require 'tools/_ticket' -- _G 加入類 Ticket(eng,ns,prescription) +--require 'tools/_ticket' -- 停用 _G 加入類 Ticket(eng,ns,prescription) require 'tools/rime_api' -- 擴充 rime_api 及常數 (見 rime_api.lua) -- prerequire for save to __PKG_LOADED -require 'tools/english_dict' -if LevelDb then - require 'tools/leveldb' -end -if rime_api.Version() < 150 then - require 'tools/_shadowcandidate' +if Component then + require 'tools/_component' -- 擴充 Require(...) 可以自動選定 Processor , Segmentor , Translator , Filter end --- clone from rime_api metatable -get_full_path=rime_api.get_full_path -- --- append utf8.methods utf8.split + --append_path(...) paths + "/?.lua" paths + /?/init.lua function append_path(...) + local List = require "tools/list" local slash = package.config:sub(1,1) - local paths = package.path:split(";") - local res =false - for i,vs in next, {...} do - local path1 = ("%s/?.lua"):format(vs):gsub("/",slash) - local path2 = ("%s/?/init.lua"):format(vs):gsub("/",slash) - for i,v in next, {path1,path2} do - if not paths:find(v) then + local path = package.path:gsub(";$","") + local paths=List(path) + + local path_set = Set( path:split(";") ) + + local pattern = List("%s/?.lua", "%s/?/init.lua") + List(...):each(function(elm) + pattern:map(function(p) return p:format(elm):gsub("/",slash) end) + :each(function(v) + if not path_set[v] then + path_set[v]=true paths:push(v) - res = res or true end - end - end - - if res then - package.path= paths:concat(";") - return true - end - return false + end) + end) + package.path= paths:concat(";") + return #paths > 1 end --append_cpath(...) paths + /?.(dll|so|dylib) function append_cpath(...) + local List = require('tools/list') local slash = package.config:sub(1,1) - local df = package.cpath:match('?.so') - or package.cpath:match('?.dylib') - or package.cpath:match('?.dll') - local paths = package.cpath:split(";") - local res =false - - for i,v in next, {...} do - local path= ("%s/%s"):format(v,df):gsub("/",slash) - if not paths:find(path) then - paths:push(path) - res = true - end - end - if res then - package.cpath= paths:concat(";") - return true - end - return false -end + local cpath = package.cpath:gsub(";$","") + local df = cpath:match('?.so') + or cpath:match('?.dylib') + or cpath:match('?.dll') + + local paths=List(cpath) + local path_set = Set(cpath:split(";")) + List(...):map( + function(elm) return ("%s/%s"):format(elm,df):gsub("/",slash) end) + :each(function(v) + if not path_set[v] then + path_set[v]=true + paths:push(v) + end + end) + package.cpath= paths:concat(";") + return #paths >1 +end -- init_path do - __PKG_LOADED = {} - for k,v in next, package.loaded do __PKG_LOADED[k] = v end - append_path((rime_api.get_user_data_dir() or ".") .. "/lua/component") - append_cpath((rime_api.get_user_data_dir() or ".") .. "/lua/plugins") - -- _G 加入 Rescue - if ENABLE_RESCUE then rrequire('Rescue') end - - -- ENABLE trace - for opt in (os.getenv('RIME_OPT') or ""):gmatch("[^%s]+") do - _ENV[opt] = true - TRACE = TRACE or T00 or T01 or T02 or T03 or T04 - end - - if ENABLE_DEBUG or TRACE then - require 'tools/debugtool' - end - -- pretest 在無關 engine 時測試 library - if _TEST then - luatest_proc= require 'test' - end - - - if T00 and GD then GD() end - + -- 通用 component path + append_path((rime_api.USER_DIR or ".") .. "/lua/component") + -- 放置 動態程式 path + append_cpath((rime_api.SHARE_DIR or ".") .. "/lua/plugins") + -- _G 加入 Rescue + if ENABLE_RESCUE then rrequire('Rescue') end + + -- ENABLE trace + for opt in (os.getenv('RIME_OPT') or ""):gmatch("[^%s]+") do + _G[opt] = true + TRACE = TRACE or T00 or T01 or T02 or T03 or T04 + end + + if ENABLE_DEBUG or TRACE then + require 'tools/debugtool' + end + -- pretest 在無關 engine 時測試 library + if _TEST then + luatest_proc= require 'test' + end + if T01 and GD then GD() end end +return true diff --git a/lua/tools/_userdb.lua b/lua/tools/_userdb.lua new file mode 100644 index 0000000..f404cde --- /dev/null +++ b/lua/tools/_userdb.lua @@ -0,0 +1,88 @@ +#! /usr/bin/env lua +-- +-- userdb.lua +-- Copyright (C) 2024 Shewer Lu +-- +-- Distributed under terms of the MIT license. +-- +--[[ +example: +local userdb = require 'userdb' +local ldb=userdb.LevelDb('ecdict') +ldb:open() +for k,v in ldb:query('a'):iter() do print(k,v) end + +--]] +local db_pool_ = {} +local methods = { + update = true, + open_read_only = true, + query = true, + disable = true, + open = true, + enable = true, + close = true, + loaded = true, + erase = true, + fetch = true, +} +local vars_get= { + _loaded=true, + read_only=true, + disabled=true, + name=true, + file_name=true, + } +local vars_set= {} +local userdb_mt = {} +userdb_mt._db_pool = {} +function userdb_mt.__newindex(tab,key,value) + local db = userdb_mt._db_pool[tab._db_key] + if vars_set[key] and db then + db[key]= value + end +end + +function userdb_mt.__index(tab,key) + local db = userdb_mt._db_pool[tab._db_key] + if not db then return end + if vars_get[key] then + return db[key] + elseif methods[key] then + return function (tab, ...) + return db[key](db,...) + end + else + return userdb_mt[key] + end +end + +function userdb_mt:has_db() + return getmetatable(self)._db_pool[self._db_key] and true or false +end + + +local userdb= {} + +function userdb.UserDb(db_name, db_class) + local db_key = db_name .. "." .. db_class + print('-tract------------>', db_key, userdb_mt._db_pool, userdb_mt._db_pool[db_key]) + if not userdb_mt._db_pool[db_key] then + userdb_mt._db_pool[db_key] = UserDb(db_name, db_class) + print( 'trace -------->' ,userdb_mt._db_pool[db_key] ) + end + return setmetatable({ + _db_key = db_key, + _db_name= db_name, + _db_class = db_class, + }, userdb_mt) +end + +function userdb.LevelDb(db_name) + return userdb.UserDb(db_name, "userdb") +end +function userdb.TableDb(db_name) + return userdb.UserDb(db_name, "plain_userdb") +end + +return userdb diff --git a/lua/tools/ver_env.lua b/lua/tools/compat.lua similarity index 54% rename from lua/tools/ver_env.lua rename to lua/tools/compat.lua index 22e884d..fa428b3 100644 --- a/lua/tools/ver_env.lua +++ b/lua/tools/compat.lua @@ -5,7 +5,10 @@ -- -- Distributed under terms of the MIT license. -- - +-- wrap function warn() +-- compat lua 5.1 5.2 5.3 jit : wran +-- compat lua 5.1 jit : _G = _ENV +-- local _warn_enable=false local function warn(...) local msgs= {...} @@ -20,27 +23,8 @@ local function warn(...) end end -local Ver = _VERSION:match("%d.%d$") -Ver = Ver and Ver or "5.1" -Ver = Ver == "5.1" and jit and "jit" or Ver -print("****************", Ver) -local M={} -M['5.1'] =function() - _G['_ENV']= _G - _G['warn']=warn -end -M['5.2'] =function() - _G['warn']=warn -end -M['5.3'] =function() - _G['warn']=warn -end -M['jit'] =function() - _G['_ENV']= _G - _G['warn']=warn -end -M['5.4'] = function() end -M[Ver]() +_G['warn'] = _G['warn'] or warn +_G['_ENV'] = _G['_ENV'] or _G return true diff --git a/lua/tools/config_api.lua b/lua/tools/config_api.lua index f150b2f..b2fa4fd 100644 --- a/lua/tools/config_api.lua +++ b/lua/tools/config_api.lua @@ -6,218 +6,124 @@ -- Distributed under terms of the MIT license. -- --[[ -config_api.lua -提供 luadata ConfigValue ConfigList ConfigMap ConfigItem 轉換 function -包含遞迴轉換,相同型別不轉換 return 原obj - -luadata boolen number string to ConfigValue -array to ConfigList -map to ConfigMap -ConfigData to ConfigItem -ConfigItem to ConfigData - -to_obj -to_item -to_cdata + config_api.lua + luadata , ConfigItem , Config data( Value List Map) 轉換 + M.get_obj(obj, level) -- return lua data 可設定轉換層數 + M.get_item(obj) -- return ConfigItem + M.get_cdata(obj) -- return Config data + ConfigItem Config data 型別檢查邏輯 + obj.type and obj.get_value ==> ConfigItem + obj.type and obj.element ==> ConfigValue or ConfigList or ConfigMap --]] --- 0xff unknown -local Log = require 'tools/debugtool' -local _type={ - ['nil'] = 0, boolean = 1,number=2, string = 3, table = 4, array= 5, map=6, userdata=8, -configitem=0x80, configvalue=0x81, configlist=0x82, configmap=0x83, configdata=0x87, -undef = 0xff -} --- base_type : boolen number string -local function base_type(obj) - local ct = _type[type(obj)] - return ct >= _type.boolen and ct < _type.table -end - -local function utype(uobj) - if uobj.get_list then return _type.configitem - elseif uobj.element then - local _configtype={kScalar=_type.configvalue, kList=_type.configlist, kMap = _type.configmap } - return _configtype[uobj.type] - else - return _type.undef - end -end --- return type_name of number -local function ctype(cobj) - local luatype = _type[ type(cobj) ] - - if luatype <_type.table then return luatype - elseif luatype == _type.table then - return #cobj >0 and _type.array or _type.map - elseif luatype == _type.userdata then - return utype(cobj) - end -end - -local function is_basetype(ct) - return ct == _type.boolean - or ct == _type.number - or ct == _type.string -end -local function is_table(ct) - return ct == _type.array - or ct == _type.map -end -local function is_configdata(ct) - return ct == _type.configvalue - or ct == _type.configlist - or ct == _type.configmap -end -local function __conv_ltype(str) - local tp= tonumber(str) - if tp then return tp end - tp= str:lower() - if tp == "false" then - return false - elseif tp == "true" then - return true +-- ConfigValue to Lua value +local function _conv_ltype(cobj) + local num = cobj:get_double() + if num then + return num and math.tointeger(num) or num else - return str + return cobj:get_bool() or cobj:get_string() end end -local function _conv_ltype(cobj) - local tp = cobj:get_double() or cobj:get_int() or cobj:get_bool() - return tp == nil - and cobj:get_string() - or tp -end - -local function item_to_obj(config_item,level) - level = level or 99 - if level <1 then return config_item end - - local ct=ctype(config_item) - if ct > _type['configitem'] and ct<=_type['configmap'] then - config_item= config_item.element - elseif ct ~= _type['configitem'] then - return config_item - end - - if config_item.type == "kScalar" then - return _conv_ltype( config_item:get_value() ) - - elseif config_item.type == "kList" then - local cl= config_item:get_list() - local tab={} - for k=0,cl.size-1 do - table.insert(tab, item_to_obj(cl:get_at(k),level -1) ) - end - return tab - - elseif config_item.type == "kMap" then - local cm = config_item:get_map() - local tab={} - for i,k in next,cm:keys() do - tab[k] = item_to_obj( cm:get(k), level -1) - end - return tab - end -end - - -local function obj_to_item(obj) - local ct = ctype(obj) - if ct == _type.configitem then - return obj - elseif is_configdata(ct) then - return obj.element - elseif is_basetype(ct) then - return ConfigValue( tostring(obj) ).element - elseif ct == _type.array then - local cobj=ConfigList() - for i,v in ipairs(obj) do - local o = obj_to_item(v) - if o then cobj:append(o) end - end - return cobj.element - elseif ct == _type.map then - local cobj = ConfigMap() - for k,v in pairs(obj) do - if type(k) == "string" then - local o = obj_to_item(v) - if o then cobj:set(k, obj_to_item(v)) end +-- ConfigItem to lua data with level +local function _item_to_obj(obj, level) + level = level or 99 + if level < 1 then return obj end + local tab = {} + if obj.type == "kScalar" then + return _conv_ltype(obj:get_value()) + elseif obj.type == "kList" then + local cl = obj:get_list() + for i=1,cl.size do + tab[i]= _item_to_obj(cl:get_at(i-1), level -1) end - end - return cobj.element - end -end - -local function _cobjtype(cobj) - local ldata,citem,cdata = 0,1,2 - local ct = ctype(cobj) - if is_basetype(ct) or is_table(ct) then - return ldata - elseif is_configdata(ct) then - return cdata - elseif ct == _type.configitem then - return citem - end + return tab + elseif obj.type == "kMap" then + local cm = obj:get_map() + for i,k in ipairs(cm:keys()) do + tab[k] = _item_to_obj( cm:get(k), level -1) + end + return tab + else + error('type error: ' .. tostring(obj) .. ' ( expect ConfigItem)') + end end - - - -local function item_to_cdata(obj) - if obj.type == "kScalar" then return obj:get_value() - elseif obj.type == "kMap" then return obj:get_map() - elseif obj.type == "kList" then return obj:get_list() - end +-- ConfigItem ConfigValue ConfigList ConfigMap to lua data +local function get_obj(obj, level) + if type(obj) ~= 'userdata' then + return obj + elseif obj.type and obj.element then + return _item_to_obj(obj.element, level) + elseif obj.type and obj.get_value then + return _item_to_obj(obj, level) + else + return obj + end end - -local function to_obj(obj) - local ldata,citem,cdata,llist = 0,1,2,3 - if obj == nil then return nil end - - local ct = _cobjtype(obj) - if ct == ldata then - return obj - elseif ct == cdata then - return to_obj(obj.element) - elseif ct == citem then - return item_to_obj(obj) - end +-- obj to Configitem +local function get_item(obj) + local ct = type(obj) + if ct == "nil" then + return + elseif ct == 'table' then + if #obj > 0 then + local cobj = ConfigList() + for i,v in ipairs(obj) do + local o = get_item(v) + if o then cobj:append(o) end + end + return cobj.element + else + local cobj = ConfigMap() + for k,v in pairs(obj) do + if type(k) == 'string' then + local o = get_item(v) + if o then cobj:set(k, get_item(v)) end + end + end + return cobj.element + end + elseif ct == 'userdata' then + if obj.type and obj.element then + return obj.element + elseif obj.type and obj.get_value then + return obj + else + return get_item( tostring(obj)) + end + elseif ct ~= 'function' then + return ConfigValue(tostring(obj)).element + end end - -local function to_item(obj) - local ldata,citem,cdata,llist = 0,1,2,3 - if obj == nil then return nil end - - local ct = _cobjtype(obj) - - if ct == citem then - return obj - elseif ct == cdata then - return obj.element - elseif ct == ldata then - return obj_to_item( obj ) - end +-- obj to ConfigValue or ConfigList or ConfigMap +local function get_cdata(obj) + local item = get_item(obj) + if not item then return end + if item.type == "kScalar" then + return item:get_value() + elseif item.type == "kMap" then + return item:get_map() + elseif item.type == "kList" then + return item:get_list() + end end -local function to_cdata(obj) - local ldata,citem,cdata,llist = 0,1,2,3 - if obj == nil then return nil end - - local ct = _cobjtype(obj) - if ct == cdata then - return obj - elseif ct == citem then - return item_to_cdata(obj) - elseif ct == ldata then - return to_cdata( obj_to_item(obj)) - end +local M={ + get_obj = get_obj, + get_item = get_item, + get_cdata = get_cdata, +} + +function M.wrap_config(config) + local mt = getmetatable(config) + mt.methods.get_obj = function(self,path,level) + return get_obj( self:get_item(path) , level) + end + mt.methods.set_obj = function(self, path, obj) + self:set_item(path, get_item(obj)) + end end -local M={} -M._type = _type -M.ctype= ctype -M.get_obj = to_obj -M.get_item = to_item -M.get_cdata = to_cdata return M diff --git a/lua/tools/english_dict.lua b/lua/tools/english_dict.lua deleted file mode 100755 index 9d21720..0000000 --- a/lua/tools/english_dict.lua +++ /dev/null @@ -1,458 +0,0 @@ -#! /usr/bin/env lua --- --- english_dict.lua --- Copyright (C) 2020 Shewer Lu --- --- Distributed under terms of the MIT license. --- ---[[ - - ex : - Eng=require('tools/english_dict' ) - e = Eng( 'english') - for w in e:iter('th/i:ad') do -- * ? / : - print(w.word,w:get_info(1) ) - end - - list = e:match('th/i:ad') --List table - for i,v in ipairs(list) - print(w.word,w:get_info(1) - end - - pw=function(w) print(w.word,w:get_info()) end - list:each(pw) - - --- --- ---]] -require 'tools/string' -require 'tools/_file' -local List = List or require 'tools/list' ---USERDIR= ( USERDIR or os.getenv("APPDATA") or "" ) .. [[\Rime]] - --- 字典 字根 查碼 table --- ---local eng_suffixe1={ ["Control+f"] ="*ful" , ["Control+y"]= "*ly" , ["Control+n"]= "*tion" , ["Control+a"] = "*able" , ---["Control+i"] = "*ing" , ["Control+m"]= "*ment" , ["Control+r"]= "*er", } ---env.keyname2={ f ="*ful" , y= "*ly" , n= "*tion" , a = "*able" , ---i = "ing" , m= "*ment" , r= "*er", ---} --- f="ful" --> /f or Control+f ---require 'tools/object' --- 設定 存入bin格式 ---local chunk_bin= true -local NR = package.config:sub(1,1):match("/") and "\n" or "\r" - - -local eng_suffix={ f ="ful" , y= "ly" , t= "tion" ,s="sion", a = "able" , -i = "ing" , m= "ment" , r= "er", g="ght" , n="ness", l="less" , } -local eng_suffix_list={ } - --- 詞類 -local eng_parts={ "a", "abbr", "ad", "art", "aux", "phr", "pl", "pp", "prep", "pron", "conj", "int", "v", "vi", "vt" } -setmetatable(eng_parts,{__index=table } ) - --- 可接收輸入字串 %a[%a%-%.%:_[]%?%*]* 萬用字 ? 0-1 *0-N 萬用字 符合字元[%a%-%.:_] [] 限制字元範圍 --- / 快捷字串 : 詞類 --- 字頭[/快捷1[/快捷2][:詞類] --- --- 分割詞類 字頭[/快捷1[/快捷2] [:詞類] --> 字頭[/快捷1[/快捷2] 詞類 --- 分割快捷碼 字頭 [/快捷1 [/快捷2] --> 字頭 快捷 快捷 --- 截取 字頭字串 %a[%a%-%.%:_]* --- 如果字字串 1 字元 查全表 --- 如果字頭字串 大於2 查表至 不符合時 中斷查詢 因爲 字典是按照 字元排序 --- --- 如果input 長度 1 送出全表 不用查 --- --- --- --- 詞類 快捷 字串 查表 轉換 *ing *tion ... 查不到的 字串加上 * v* a* adv ad* ... --- --- --- --- input: string [%a][%a/: %*%.%?_%-] --- ?* -> "([%?%*])","([%a%.%-_]%%%1" --? * [$a%.%-_]? [%a%.%-]* --- --- 1 gen per_suffix input input:match("^%a[%a%-%.-]+") %a[%a-._]+ --- 2 pre_suffix input conver rex with escap gsub("([%.%-_])","%%%1") --- --- --- 3 pattern , wild_word --- 1 split(":") 英文單字:詞類 --- --- 2 split("/") 字根 萬用字 快捷碼分割 --- "ab*/i/n" ab* i n --- --- --- --- "i" -> *ing n -> tion not match: g -> *g - --- 取得 字頭 英文字串 a-z A-Z . - _ -local function pre_suffix_word(wild_word) - return wild_word:match("^[-.%a][%a%.%-_]*"),wild_word -end --- 轉換 reg pattern 隔離字元 - . %- %. 萬用字元 ? * .? .* -local function conver_rex(str) - return str:gsub("([%-%.])","%%%1"):gsub("([?*])",".%1") -end --- 切割 字串 轉換字串 再組合 return 字頭 全字 詞類 -local function split_str(str) - str= type(str)== "string" and str or "" - local w,p=table.unpack(str:split(":")) - local pw= w:match("^[-.%a][%a%.%-_ ]*") - local ws= List( w:split("/") ) - local ww= ws:shift() - ww= ww .. ws:map(function(elm) - return "*" .. (eng_suffix[elm] or elm) - end):concat() - return pw , ww , p or "" -end --- 轉換 reg 字串 字頭 全字 詞類 -local function conv_pattern(org_text) - local pw,ww,p = split_str(org_text) - pw= "^" .. conver_rex(pw:lower() or "") - ww="^" .. conver_rex(ww:lower()) - p= p:len() >0 - --and "%s" .. conver_rex(p:lower() ) .. "[%l%-%.]*%." - and conver_rex(p:lower() ) .. "[%l%-%.]*%." - or "" -- [ ] p [%a]*%." - return pw, ww, p -end -local function conv_pattern1(org_text,level) - level = level or 3 - local pw,ww,p = split_str(org_text) - pw=pw:lower():sub(1,level) - ww=ww:lower() - p=p:lower() - pw= "^" .. conver_rex(pw) - ww="^" .. conver_rex(ww) - p= p:len() >0 and "%s" .. conver_rex(p) .. "[%a-%.]*%." or "" -- [ ] p [%a]*%." - return pw, ww, p -end - ---local Word= require 'tools/english_word' - -local function New(self,...) - local obj= setmetatable({} , self) - return not obj._initialize and obj or obj:_initialize(...) -end ---- Word --- -local class= require 'tools/class' - - --- Word --- class method Phrase_chunk(row_chunk) Parse_text(row_tab) --- instance method get_info(mode_num) to_s() prefix_match(prefix_str) match:(text) - ---local Word=Class("Word") ---Word.__name= "Word" ---[[ -function Word:__eq(obj) - return self.word == obj -end ---]] --- - -local Word = class()--{} -Word.__name="Word" -function Word:_initialize(tab) - if type(tab)=="table" and tab.word and tab.word:len() >0 then - --and tab.translation and tab.phonetic then - for k,v in next, tab do - self[k] = v - end - self.phonetic = self.phonetic or "" - return self - end -end - --- \t -function Word.Parse_text(line) - local tab={} - local word, translation= table.unpack(line:split("\t")) - if word:len() < 1 then return end - tab.word=word - translation= translation or "" - - local head,tail=translation:find('^%[.*%];') - if head and tail then - tab.phonetic=translation:sub(head,tail-1) - tab.translation=translation:sub(tail+1) - else - tab.phonetic="" - tab.translation=translation - end - return Word(tab) -end -function Word.Parse_chunk(str,replace) - if type(str) == "string" then - str = replace and str:gsub("\\n","\\n"):gsub("\\r","\r") or str - local tab=load("return " .. str )() - return Word(tab) - end -end - -function Word:to_s() - local l = List() - for k,v in next,self do - l:push( string.format(" %s=%q",k,v) ) - end - return "{ " .. l:concat(',') .. "}" -end --- 利用此func 設定comment 輸出格式 --- -function Word:get_info(mode) - mode= tonumber(mode) - local info= self - if not info then return "" end - mode= mode and mode % 7 or 0 - - if mode == 1 then - return (info.phonetic .. " " .. info.translation):gsub("\\n",NR) - elseif mode == 2 then - return info.translation:gsub("\\n", " ") - elseif mode == 3 then - return info.translation:gsub("\\n", NR) - elseif mode == 4 then - return info.phonetic - elseif mode == 5 then - return info.word - elseif mode== 6 then - return "" - else - - return (info.phonetic .. " " .. info.translation):gsub("\\n"," ") - end -end -function Word:prefix_match(prefix, case_match ) - if case_match then - return self.word:find( prefix) == 1 - else - return self.word:lower():find( prefix:lower()) == 1 - end - return false -end - -function Word:chk_parts(parts) - return (self.translation:match(parts) and true) or false -end - -function Word:match(text,case_match) - local pw,ww,pn = conv_pattern(text) - --Log(DEBUG,'word match :', pw,ww,pn,self.word, self.word:match(ww) and true or false ,self:prefix_match(pw)) - local w_match = case_match and self.word:match(ww) or self.word:lower():match(ww) - local p_match = #pn<1 and true or self.translation:match(pn) - return w_match and p_match and true or false -end -Word.is_match= Word.match - ---local class = require 'tools/class' ---return class(Word) ---local MT={} ---MT.__index=MT ---MT.__call=New --- Dict instance method --- iter(text) return iter function for match text pattern --- get(word) return --- LuaDict - -local function init_tree(tree_tab, index, word,level) - local prefix=word:sub(1,level):lower() - for i= #prefix, 1,-1 do - local w = prefix:sub(1,i) - if tree_tab[w] then break end - tree_tab[w] = index - init_tree(tree_tab, index, word, level -1) - end -end - -local LuaDict=class() --setmetatable({},MT) ---LuaDict.__index = LuaDict -LuaDict.__name = 'LuaDict' -LuaDict._db={} -function LuaDict:_initialize(full_path,level) - level = level and level>1 and level or 3 - if T03 and GD then GD() end - if not isFile(full_path) then return end - --self._db = loadfile(full_path)() - self._db = dofile(full_path) - self._tree = {} - self._words = {} - for i,v in next,self._db do - local w = Word(v) - self._db[i] = w - self._words[v.word] = i - init_tree(self._tree,i,w.word,level) - end - return self -end -function LuaDict:_prefix_index(pw) - --local pw,ww,pn = conv_pattern(text) - local index = 1 - if #pw <1 then return index end - - -- 找最近的index - for i= #pw,1,-1 do - local tree_index= self._tree[ pw:sub(1,i) ] - if tree_index then - index = tree_index - break - end - end - -- 找 prefix word index - local count = 0 - repeat - local ww = self._db[index] - if self._db[index]:prefix_match(pw) then - -- 找到索引 如果count > 200 增加 pw 索引 - if count > 200 then - self._tree[pw] = index - end - break - end - count = count +1 - index = index+1 - until index > #self._db - return index -end - -function LuaDict:iter(text) - local pw,ww,pn = split_str(text) - --if not index then GD() end - return coroutine.wrap(function() - local index = self:_prefix_index(pw) - while self._db[index] and self._db[index]:prefix_match(pw) do - if self._db[index]:match(text) then - coroutine.yield(self._db[index]) - end - index = index +1 - end - end) -end - -function LuaDict:get(word) - local index = self._words[word] - return self._db[index] -end - --- Dict instance method --- iter(text) return iter function for match text pattern --- get(word) return --- LevelDict - -local LevelDict= LevelDb and class() --setmetatable({},MT) -if LevelDict then - - - --LevelDict.__index = LevelDict - LevelDict.__name = 'LewelDict' - - function LevelDict:_initialize(full_path) - if rime_api.LevelDb.open(full_path) then - self._dbname=full_path - return self - end - end - function LevelDict:_getdict() - return rime_api.LevelDb.get_db(self._dbname) - end - - function LevelDict:iter(text, case_match) - --local pw,ww,pn = conv_pattern(text) - local pw,ww,pn = split_str(text) - local dbacc = self:_getdict():query(pw:lower()) - return coroutine.wrap(function() - for k,v in dbacc:iter() do - local w = Word.Parse_chunk(v) - if w and w:match(text,case_match) then - coroutine.yield(w) - end - end - end) - end - function LevelDict:get(word) - -- have upcae word ex Abort abort\tAbort - word = word:match("%u") and string.format("%s\t%s",word:lower(),word) or word - local chk_str = self:_getdict():fetch(word) - return chk_str and Word.Parse_chunk(chk_str) - end -end --- English(dict_name) --- instance method --- iter(text) return Word for w in e:iter(text) do ... end --- match(text) return Word of List --- word(word) return Word --- -local English = class() -- setmetatable({},MT) ---English.__index=English -English.__name="English" -English._dicts={} -function English:_getdict() - return English._dicts[self._dict_name] -end ---[[ -function English:_setdict() - English._dicts[self._dict_name] = self._dict -end ---]] -function English:_initialize(dict_name,reload) - dict_name= dict_name or "english_tw" - self._dict_name = dict_name - --local dictdb = self:_getdb() - if reload or not self:_getdict() then - self:reload() - end - - return self:_getdict() and self or nil -end - -function English:reload(force) - local dict_name = self._dict_name - local full_path = get_full_path(dict_name) - -- 1 try LevelDict - if LevelDict and full_path and isDir(full_path) then - self._dicts[self._dict_name] = LevelDict(full_path) - return self._dicts[self._dict_name] and true or false - end - -- 2 try LuaDict - full_path= get_full_path(dict_name .. ".txtl") - if LuaDict and full_path and isFile(full_path) then - self._dicts[self._dict_name] = LuaDict(full_path) - return self._dicts[self._dict_name] and true or false - end - Log(ERROR, "English_dict " .. dict_name .. ' not find ') -end - - -function English:iter(org_text,case_match) - return self:_getdict():iter(org_text,case_match) -end - -function English:match(org_text) - local tab=List() - for w in self:iter(org_text) do - tab:push(w) - end - return tab -end - -function English:word(word) - return self:_getdict():get(word) -end - - --- for debug and check ---[[ debug function -English.Split=split_str --1 ('seteu/i/a:m') 字首seteu 單字 seteu*ing*able 詞類 m -English.Conver_rex=conver_rex --2 ('seteu*?ing:m') 展開/ : seteu.*.?ing:ment 2 -English.Conver_pattern= conv_pattern --3 ('seteu/i/a:m') 字首 ^seteu ^seteu.*ing.*able%sm[%a%-%.]*%. -English.Word=Word ---]] -English.LevelDict = LevelDict -English.LuaDict = LuaDict - -return English - - diff --git a/lua/tools/env_api.lua b/lua/tools/env_api.lua index d2a89c8..f24d931 100644 --- a/lua/tools/env_api.lua +++ b/lua/tools/env_api.lua @@ -28,7 +28,7 @@ function E:get_status() stat.has_menu= ctx:has_menu() -- old version check ( Projection userdata) local ok,empty - if rime_api.Version() < 100 then + if rime_api.VERSION < 100 then ok,empty = pcall(comp.empty) empty= ok and empty or comp:empty() -- empty= ( ok ) ? empty : comp:empty() else diff --git a/lua/tools/leveldb.lua b/lua/tools/leveldb.lua deleted file mode 100644 index 20b2ca0..0000000 --- a/lua/tools/leveldb.lua +++ /dev/null @@ -1,56 +0,0 @@ -#! /usr/bin/env lua --- --- leveldb.lua --- Copyright (C) 2022 Shewer Lu --- --- Distributed under terms of the MIT license. --- --- ---support leveldb pool --- ldb = require'tools/leveldb' --- leveldb = ldb.open(fn,dbname) -- return instance of LevelDb - -require 'tools/_file' - -local function find_path(fn) - local full_path = get_full_path(fn) - --return full_path and isDir(full_path) and full_path or rime_api.get_user_data_dir() .. "/" .. fn - return fn -end - -function opendb(fn,dbname) - local filename = find_path(fn) - local db=LevelDb(filename,dbname or "") or nil - if db and not db:loaded() then - db:open() - end - return db -end - -local M = {} -M._db_pool={} - -function M.open(fn,dbname) - if not M._db_pool[fn] then - M._db_pool[fn] = opendb(fn,dbname) or nil - end - return M._db_pool[fn] -end - -function M.pool_status() - local tab = {} - for k,v in next,M._db_pool do - table.insert(tab, ("%s:%s(%s)"):format(k,v,v:loaded() and "opening" or "closed" )) - end - return tab -end -function M.get_db(fn) - return M._db_pool[fn] -end --- M.open(fn,dbname) -- return db --- M.pool_status() -- return status of table --- M.get_db(fn) -- return db or nil -return M - - - diff --git a/lua/tools/list.lua b/lua/tools/list.lua index 8f07a78..b44851e 100755 --- a/lua/tools/list.lua +++ b/lua/tools/list.lua @@ -258,7 +258,16 @@ function M:sort(func,...) return self:clone():sort_self() end - +function M:uniq() + local set={} + return self:reduce(function(elm,res) + if not set[elm] then + res:push(elm) + set[elm] = true + end + return res + end, M()) +end local append_func=function(elm,org) return org:push(elm) end @@ -300,7 +309,6 @@ function M.Range(num1,num2,step) end step = (step and step or 1) * (num1 <= num2 and 1 or -1) local l= M() - print(num1,num2,step) for i=num1,num2 ,step do l:push(i) end diff --git a/lua/tools/rime_api.lua b/lua/tools/rime_api.lua index 874fa77..0b15251 100755 --- a/lua/tools/rime_api.lua +++ b/lua/tools/rime_api.lua @@ -67,9 +67,6 @@ local List = require'tools/list' require 'tools/string' -if not rime_api then - require 'test/fake/rime_api' -end --[[ rime_api.get_user_data_dir=function() return io.popen('pwd'):read() @@ -78,47 +75,55 @@ rime_api.get_shared_data_dir= function() return '/usr/shared/rime-data' end --]] -local function Version(env) - if type(env) == "table" and env.engine then - if env.engine.context.composition:toSegmentation().get_segments then - return 215 - end - end - local ver - if Opencc and Opencc('s2t.json').convert_word then - return 200 - elseif rime_api.regex_match then - return 197 - elseif rime_api.get_distribution_name then - return 185 - elseif LevelDb then - return 177 - elseif Opencc then - return 147 - elseif KeySequence and KeySequence().repr then - return 139 - elseif ConfigMap and ConfigMap().keys then - return 127 - elseif Projection then - return 102 - elseif KeyEvent then - return 100 - elseif Memory then - return 80 - elseif rime_api.get_user_data_dir then - return 9 - elseif log then - return 9 - else - return 0 - end -end - - +local function find_ver() + if Component and Component.TableTranslator then + return 287 + elseif UserDb and TableDb then + return 240 + elseif UserDb then + return 220 + elseif rime_api.regex_match then + return 197 + elseif rime_api.get_distribution_name then + return 185 + elseif LevelDb then + return 177 + elseif Opencc then + return 147 + elseif KeySequence and KeySequence().repr then + return 139 + elseif ConfigMap and ConfigMap().keys then + return 127 + elseif Projection then + return 102 + elseif KeyEvent then + return 100 + elseif Memory then + return 80 + elseif rime_api.get_user_data_dir then + return 9 + elseif log then + return 9 + else + return 0 + end +end +local function Ver_info() + local msg1 = rime_api.get_user_id + and string.format(" %s %s %s (id:%s) ", + rime_api.get_distribution_name(), + rime_api.get_distribution_code_name(), + rime_api.get_distribution_version(), + rime_api.get_user_id()) + or "" + local msg2 = string.format(" Ver: librime %s librime-lua %s lua %s", + rime_api.get_rime_version(), find_ver(), _VERSION ) + return msg1 .. msg2 +end -- 舊的版本 使用 lua_function 轉換 且 模擬 :apply(str) 接口 local function old_Init_projection(config,path) @@ -141,7 +146,7 @@ end local function Init_projection( config, path) -- old version - if Version() < 102 then + if rime_api.VERSION < 102 then return old_Init_projection(config,path) end local patterns= config:get_list( path ) @@ -164,24 +169,11 @@ end ----- rime_api tools -local function Ver_info(env) - local msg1 = rime_api.get_user_id and string.format(" %s %s %s (id:%s) ", - rime_api.get_distribution_name(), - rime_api.get_distribution_code_name(), - rime_api.get_distribution_version(), - rime_api.get_user_id()) or "" - - local msg2 = string.format(" Ver: librime %s librime-lua %s lua %s", - rime_api.get_rime_version() , Version(env) ,_VERSION ) - - return msg1 .. msg2 -end - -- librime-lua ver >=9 local udir=rime_api and rime_api.get_user_data_dir() or "." local sdir=rime_api and rime_api.get_shared_data_dir() or "." local function get_full_path(filename) - local fpath = udir .. "/" .. filename + local fpath = udir .. "/" .. filename or "" if file_exists(fpath) then return fpath,udir,filename end fpath = sdir .. "/" .. filename if file_exists(fpath) then return fpath,sdir,filename end @@ -196,22 +188,29 @@ local function load_reversedb(dict_name) log.warning( env.name_space .. ": can't load Reversedb : " .. reverse_filename ) end + + local M = {} M.__index=M -M.__newindex=function(tab,key,value) -end +M.__newindex = function (tab, key, value) log.warning( key .. 'is unwritable') end +setmetatable(rime_api,M) +M.VERSION = find_ver() +M.INFO = Ver_info() +M.UNIX = package.config:sub(1, 1) == '/' +M.WIN = not M.UNIX +M.NR = M.WIN and "\\" or "/" +M.RIME_VERSION = rime_api.get_rime_version() +M.USER_DIR = rime_api.get_user_data_dir() +M.SHARED_DIR = rime_api.get_shared_data_dir() +--warp LevelDb with db_pool,避免重覆開啓異常 +M.UserDb= UserDb and require 'tools._userdb' or nil + +M.Projection= Init_projection -- warp Projection() +M.ReverseDb= load_reversedb -- warp ReverseDb() +M.get_full_path= get_full_path +-- const var -M.Version=Version -M.Ver_info=Ver_info -M.Projection= Init_projection -M.ReverseDb= load_reversedb -M.VER_INFO= Ver_info() -M.LIBRIME_LUA_VER= Version() -M.LIBRIME_VER = rime_api.get_rime_version() -M.USER_DIR = rime_api.get_user_data_dir() or "." -M.SHARED_DIR = rime_api.get_shared_data_dir() or "." +-- 棄用 warp Comonent --Component = Version() >= 177 and Component or require('tools/_component') -M.LevelDb = LevelDb and require('tools/leveldb') -M.get_full_path= get_full_path -setmetatable(rime_api,M) + return rime_api diff --git a/lua/tools/userdb.lua b/lua/tools/userdb.lua new file mode 100644 index 0000000..9c113cd --- /dev/null +++ b/lua/tools/userdb.lua @@ -0,0 +1,59 @@ +#! /usr/bin/env lua +-- +-- leveldb.lua +-- Copyright (C) 2022 Shewer Lu +-- +-- Distributed under terms of the MIT license. +-- +-- +--support userdb pool +-- ldb = require'tools/leveldb' +-- leveldb = ldb.open(fn,dbname) -- return instance of LevelDb + +require 'tools/_file' + + +local M = {} +M._db_pool={} +function M:pool_status() + local tab = {} + for k,v in next, self._db_pool do + table.insert(tab, ("%s:%s(%s)"):format(k,v,v:loaded() and "opening" or "closed" )) + end + return tab +end +function M:get_db(fn) + return self._db_pool[fn] +end + +function M:Open(fn,dbname) + local name = fn .. "." .. dbname + if self._db_pool[name] then + return self._db_pool[name] + end + if _RL_VERSION >=240 and (dbname == "userdb" or dbname == "plain_userdb") then + self._db_pool[name] = UserDb(fn,dbname) + elseif _RL_VERSION >=177 and (dbname == "userdb") then + self._db_pool[name] = LevelDb(name, dbname) + end + + local db = self._db_pool[name] + if db then + db:open() + return db + else + log.error("failed to open file : " .. name) + end +end + +M.UserDb= M.Open +function M:LevelDb(fn) return self:Open(fn,"userdb") end +function M:TableDb(fn) return self:Open(fn, "plain_userdb") end + +-- M:Open(fn,dbname) -- return db ver>= 177 +-- M:UserDb(fn, dbname) -- same as Open +-- M:LevelDb(fn) -- >= 177 +-- M:TableDb(fn) -- >= 240 +-- M:pool_status() -- return status of table +-- M:get_db(fn) -- return db or nil +return M diff --git a/lua/tools/wordninja.lua b/lua/tools/wordninja.lua deleted file mode 100755 index 001ed60..0000000 --- a/lua/tools/wordninja.lua +++ /dev/null @@ -1,170 +0,0 @@ -#! /usr/bin/env lua --- --- wordninja1.lua --- Copyright (C) 2021 Shewer Lu --- --- Distributed under terms of the MIT license. --- --- wordninja = require('wordnija.lua') --- wordninja.init( "./wordninja_words.txt") --- --- wordninja.split( str ) -- return list table --- .test() test sample --- --- exp: --- wordninja.split("ilovelua"):concat(" ") --- --- --- -local function files_to_lines(...) - local tab=setmetatable({},{__index=table}) - local index=1 - for i,filename in next,{...} do - local fn = io.open(filename) - if fn then - --local fn=io.popen("zcat ".. filename) - for w in fn:lines() do - local ww= w:gsub("%s","") or "" - if not ww or #ww > 0 then - tab:insert( ww ) - end - end - fn:close() - end - end - return tab -end - -local function dictload(...) -- filename) - local tab=files_to_lines(...) - local dict={} - local max_len=0 - local tablog= math.log(#tab) - for i,w in next , tab do - dict[w] = math.log( i * tablog ) - max_len = ( max_len < #w and #w ) or max_len - end - dict[""]=0 - return dict,max_len -end - ---local dict,max_len = dictload("./wordninja/wordninja_words.txt.gz") -local bast_leftword={} - -local function substr(str,i,tp) -- str,i return h str , t str str,i,true return #h str , t.str - i= ( i<0 and 0) or i - return tp and #(str:sub(0,i)) or str:sub(0,i) , str:sub(i+1) -end - -local function _split(str,dict_tab) - --- init cost list - local cost={} - cost[0]={c=0,k=0} - - local function best_match(s,index,minc,bestk) - -- index= index or #s - -- minc=minc or 9e999 - -- k= k or #s - -- print(s,index,minc,bestk,s:sub(index),dict[s:sub(index)]) - -- - -- stop loop max_loop == dict_tab.maxlen - if index<1 or index < #s - dict_tab.maxlen then - -- print("----strlen:", s:len() ,"index" ,index ,"loop=", s:len() -index, "minc", minc ,"best_token" , bestk) - return {c=minc,k=bestk} - end - assert(cost[index-1], ("index:%d -1:%s $s c: %s k: %s"):format( - index, index-1, cost[index-1],cost[index-1].k,cost[index-1].c)) - assert(dict_tab.dict[s:sub(index)] or 9e999, "error" ) - -- print( ("cost[%s].c: %s , dict[ %s]:%s "):format( index-1, cost[index-1].c , s:sub(index), dict[s:sub(index)]or 9e999 )) - local c = cost[index-1].c + ( dict_tab.dict[s:sub(index)] or 9e999 ) - -- update minc & token - if c < minc then - bestk=index-1 - minc=c - end - return best_match(s,index-1,minc,bestk) - end - - local function rever_word( s,dict_tab) - local h,t = substr(s,cost[#s].k) - if #s <=0 then return dict_tab end - --print(s,#s ,cost[#s].k,"===" ,h.." | "..t ,"===" ,table.concat( dict_tab," ") ) - if t=="'s" then - cost[#s].k= cost[#s-2].k -- cost[#s].k move to next k - return rever_word(s, dict_tab) - end - if t:match("^[%d]+$") then - local h1,t1=substr(h,cost[#h].k) - if t1:match("^%d$") then - cost[#s].k = cost[#s].k -1 -- cost[#s].k -1 - --print("---callback ",s,#s ,cost[#s].k,"===" ,h.." | "..t ,"===" ,table.concat( dict_tab," ") ) - return rever_word(s, dict_tab) - end - end - table.insert( dict_tab,1, t ) -- unshift t - return rever_word(h, dict_tab) - end - ----- start ------ - - local ss=str:lower() - for i=1,#ss do - cost[i] = best_match( ss:sub(1,i) ,i, 9e999, i) - end - return rever_word(str,{} ) -end - --- Module -local M={} -function split(s,sp) - sp= sp or " " - local tab= setmetatable({},{__index=table}) - -- "abe,.p,.poeu-,a" -> {abe p poeu a} - for w in s:gmatch("[%a%d']+") do - for i,ww in next , _split(w,M) do - tab:insert(ww) - end - end - return table.concat(tab,sp) -end - - -function M.test() - str="WethepeopleoftheunitedstatesinordertoformamoreperfectunionestablishjusticeinsuredomestictranquilityprovideforthecommondefencepromotethegeneralwelfareandsecuretheblessingsoflibertytoourselvesandourposteritydoordainandestablishthisconstitutionfortheunitedstatesofAmerica" - res="We the people of the united states in order to form a more perfect union establish justice in sure domestic tranquility provide for the common defence promote the general welfare and secure the blessings of liberty to ourselves and our posterity do ordain and establish this constitution for the united states of America" - - local t1= os.clock() - local r = M.split(str) - local time = os.clock() - t1 - return r == res,time -end - -function M.init(filename,...) - local path= string.gsub(debug.getinfo(1).source,"^@(.+/)[^/]+$", "%1") - filename = filename or "wordninja_words.txt" - local files={...} - table.insert(files,filename) - - print("------------------------>>>>> load wordninja-lua") - for i,v in next, files do - files[i] = path .. v - end - M.dict,M.maxlen= dictload( table.unpack(files)) - M.split=split -end - - -local is_windows = package.cpath:match("?.dll") and true or false -if is_windows then - M.init() -else - local ok, res = pcall(require, 'wordninja') - if ok then - M.split = res.split - else - M.init() - end -end - - -return M - diff --git a/processor_plugin.yaml b/processor_plugin.yaml index 2b4cf9d..6b47963 100644 --- a/processor_plugin.yaml +++ b/processor_plugin.yaml @@ -9,7 +9,7 @@ # append after_modules to segmentors translators filters #-------------------------------- patch: - engine/processors/@after 0: lua_processor@init_processor@module + engine/processors/@after 0: lua_processor@*init_processor@module #engine/processors/@after 0: lua_processor@init_proc schema/dependencies/+: - essay @@ -17,12 +17,12 @@ patch: - stroke module: modules: - - { prescription: "lua_processor@command.proc@command" } # lua/command/proc.lua namespace: command - - { prescription: "lua_processor@calculater.proc@cal_cmd" } # lua/command/proc.lua namespace: command - - { prescription: "lua_processor@select_character.proc" } # lua/select_character/proc.lua namespace: translator(default) - - { prescription: "lua_processor@english.proc@english" } # lua/english/proc.lua namespace: english - - { prescription: "lua_processor@conjunctive.proc@conjunctive"} # conjunctive/prac.lua namespace: conjunctive - - { prescription: "lua_processor@multi_reverse.proc@multi_reverse" } # lua/multi_reverse/proc.lua namespace: multi_reverse + #- { prescription: "lua_processor@command.proc@command" } # lua/command/proc.lua namespace: command + #- { prescription: "lua_processor@calculater.proc@cal_cmd" } # lua/command/proc.lua namespace: command + #- { prescription: "lua_processor@select_character.proc" } # lua/select_character/proc.lua namespace: translator(default) + - "lua_processor@*english.proc@english" # lua/english/proc.lua namespace: english + #- { prescription: "lua_processor@conjunctive.proc@conjunctive"} # conjunctive/prac.lua namespace: conjunctive + - "lua_processor@*multi_reverse.proc@multi_reverse" # lua/multi_reverse/proc.lua namespace: multi_reverse keybinds: prtscr: "Control+F12" # commit_text of menu list @@ -33,22 +33,22 @@ patch: # init_processor append comopents before_modules: filters: - - lua_filter@stroke_count + #- lua_filter@*stroke_count #- lua_filter@debug_filter segments: translators: - - lua_translator@unicode_tran + - lua_translator@*unicode_tran #- lua_translator@ecdict after_modules: filters: - - lua_filter@stroke_count + - lua_filter@*stroke_count #- uniquifier #- lua_filter@multi_reverse.filter@S_luna_pinyin - - lua_filter@debug_filter + - lua_filter@*debug_filter segments: translators: - - lua_translator@memory_tran@luna_pinyin - - lua_translator@select_character.tran@select_character # select_character 配件 模擬矩陣符號表 []1-0 選字 /emj /nu /sma /smb /smc + #- lua_translator@*memory_tran@luna_pinyin + - lua_translator@*select_character.tran@select_character # select_character 配件 模擬矩陣符號表 []1-0 選字 /emj /nu /sma /smb /smc # select_character name_space translator 須要調用反查字典和 comment_format 設定 name_space :translator # name_space of select_character hot-key