只记录大致思路和我认为其中须要记录的地方。python
正则匹配的模式很难记忆,即便记住了,也很难写出无错误的匹配模式。可是,借助网上一些提供实时对比的网站,如 regexr.com。
代码示意:json
import os import re ''' define regex mode ''' get_imgpath_regex = re.compile(r'''( (\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2},\d+) # time \s # separate (.*)?get_imgpath\sused\stime\sis\s # info (\d+.\d+) # time \sargs\sis\s\(u' # separate (.*?)',\)\sresult\sis\s # img dir (.*?.jpg) # img path )''', re.VERBOSE) get_imgpath_flag = 'get_imgpath' def main(): for infile in infile_list: # prase line using regex mode with open(infile, "r") as file: for line in file: # 先判断关键词 currentDict = {} if get_imgpath_flag in line: for groups in regex['get_imgpath_regex'].findall(line): # 再提取模式对应的内容 currentDict = {'date': groups[1], 'cost_time':groups[3], 'img_dir':groups[4], 'img_path':groups[5]} # print(currentDict) get_imgpath_match.append(currentDict) else: pass
采起的方式为参数存储在一个单独的文件,如 config.json。windows
{ "FLAG" : { "SAVE_SPILT_LOG_FILE_FLAG" : false , "SAVE_MERGE_LOG_FILE_FLAG" : false , "USE_CURRENT_PATH" : false }, "PATH" : { "INPUT_LOG_FILE_PATH" : "E:\\zwk\\Code\\logger_read\\data\\pro_data" , "SAVE_SPILT_MERGE_LOG_PATH" : "E:\\zwk\\Code\\logger_read\\output\\spilt_merge_log" , "OUTPUT_RESULT_PATH" : "E:\\zwk\\Code\\logger_read\\output" }, "PARAMETERS" : { "windows_size" : 2 , "duplicate_times" : 1 } }
再对参数进行解析,app
import json def main(): # outfile_path = os.path.normpath("output/filtered") global parameters if(config['FLAG']['USE_CURRENT_PATH']): pwd = os.getcwd() config['PATH']['INPUT_LOG_FILE_PATH'] = os.path.join(pwd, 'data') config['PATH']['SAVE_SPILT_MERGE_LOG_PATH'] = os.path.join(pwd, 'output') config['PATH']['OUTPUT_RESULT_PATH'] = os.getcwd() output_file_path = config['PATH'] flag = config['FLAG'] if __name__ == '__main__': this_folder = os.path.dirname(os.path.abspath(__file__)) config_file = os.path.join(this_folder, 'config.json') exists_check = os.path.isfile(config_file) if not exists_check: print('Error: loss of config file, Exit !!!') with open(config_file, 'r') as f: config = json.load(f) # improve it, use as global variable parameters = config['PARAMETERS'] main()
在这里,示例我认为的还不错作法,主要是不受系统影响网站
this_folder = os.path.dirname(os.path.abspath(__file__)) config_file = os.path.join(this_folder, 'config.json') exists_check = os.path.isfile(config_file) if not exists_check: print('Error: loss of config file, Exit !!!') with open(config_file, 'r') as f: config = json.load(f) # 省略中间 # write output to files os.makedirs(outfile['SAVE_SPILT_MERGE_LOG_PATH'], exist_ok=True)
```this