之前写过一个bulk insert,此次又查了下资料,看到另外一种作法,说bulk里保存的是一段JSON数据序列,按照<操做><数据><操做><数据><操做><数据>...的格式保存的。感受比较古怪,本身写了一段python代码测试了一下,还能work。应该有更好的解决方法,但这种dirty code作作实验也够了。python
#ESIP = 'localhost' ESIP = '192.168.0.2' index_name = 'testindex' doc_type_name = 'testindex_data1' #init Elasticsearch es = Elasticsearch([{'host':ESIP}]) es.create(index=index_name, doc_type=doc_type_name, body={'any':'data', 'timestamp':datetime.now()}) #read spreedsheet data terminated by comma data = [] fd = codecs.open('test.csv', 'r', 'utf-8') for line in fd: line = line.strip().split(',') json_data = {} json_data['id'] = line[1] json_data['data'] = line[1] data.append(json_data) #bulk insert cache = [] bulk_size = 500 for d in data: if len(cache) >= bulk_size: es.bulk(body = cache, index=index_name, doc_type=doc_type_name) cache = [] else: new_action = {} new_action['_index'] = index_name new_action['_type'] = doc_type_name new_action['_id'] = d['id'] action = {} action['index'] = new_action cache.append(action) cache.append(d) if len(cache) > 0: es.bulk(body = cache, index=index_name, doc_type=doc_type_name) cache = []