对一个20667行的xlsx文件进行遍历测试python
import pandas as pd # 定义一个计算执行时间的函数做装饰器,传入参数为装饰的函数或方法 def print_execute_time(func): from time import time # 定义嵌套函数,用来打印出装饰的函数的执行时间 def wrapper(*args, **kwargs): # 定义开始时间和结束时间,将func夹在中间执行,取得其返回值 start = time() func_return = func(*args, **kwargs) end = time() # 打印方法名称和其执行时间 print(f'{func.__name__}() execute time: {end - start}s') # 返回func的返回值 return func_return # 返回嵌套的函数 return wrapper file_path = r"D:\git\xxxx\dev\pd-xxx1.2\合并.xlsx" data = pd.read_excel(file_path,sheet_name="xxxx",engine='openpyxl') # 空值处理 df = data.where(data.notnull(),None) @print_execute_time def iterrows(): for index, row in df.iterrows(): # print(index," = ",row['机号']) pass @print_execute_time def itertuples(): for row in df.itertuples(): # print(row['机号']) pass @print_execute_time def iteritems(): for index, row in df.iteritems(): # print(index," = ",row['机号']) pass @print_execute_time def index(): for i in df.index: # print(i," = ",df['机号'].at[i]) pass if __name__ == '__main__': print('begining ...') print(iterrows(),itertuples(),iteritems(),index()) print('Done !')
begining ... iterrows() execute time: 2.003657817840576s itertuples() execute time: 0.04618692398071289s iteritems() execute time: 0.0009987354278564453s index() execute time: 0.0029909610748291016s Done !
iterrows() execute time: 2.2464449405670166s itertuples() execute time: 0.08178043365478516s iteritems() execute time: 0.000997781753540039s index() execute time: 0.0059833526611328125s
所以从效率上考虑,优先采用iteritems
或index
来进行遍历数据git