由于习惯了使用pandas的DataFrame数据结构,同时pandas做为一个方便计算和表操做的数据结构具备十分显著的优点,甚至不少时候dataFrame能够做为excel在使用,而在用python操做gis的shp文件时很不畅,不太符合使用习惯,故写了一个DataFrame与arcgis地理文件相互转换的函数,这个处理起来能够节约大量的思考时间。python
Shp转DataFrame:数据结构
import arcpy import pandas as pd
def Shp2dataframe(path): '''将arcpy表单变为pandas表单输出''' fields=arcpy.ListFields(path) table=[] fieldname=[field.name for field in fields] #游标集合,用for 循环一次后没办法循环第二次!一个游标实例只能循环一次 data=arcpy.SearchCursor(path) for row in data: #Shape字段中的要数是一个几何类 r=[] for field in fields: r.append(row.getValue(field.name)) table.append(r) return pd.DataFrame(table,columns=fieldname)
DataFrame转Shp:app
DataFrame转Shp采用了模板形式,经过模板创建字段文件,坐标系等能够更加快速构建字段。
函数
#将由ReadTable读取的pandas表转换为shp格式,template为模板 def Dataframe2ShpTemplate(df,outpath,geoType,template): ''' Fuction: make the table of pandas's DataFrame convert to the shp of esri Input: df -- pandas DataFrame from the shp converted outpath -- the shp output path geometryType -- the type of geomentey, eg:'POINT','POLYLINE','POLYGON','MULTIPOINT' temple -- the temple, at most time it is used the DataFrame's shp ''' out_path = outpath.replace(outpath.split('/')[-1],'') out_name = outpath.split('/')[-1] geometry_type = geoType #template为模板,能够将里面属性所有赋予新建的要素,包括字段、坐标系 feature_class = arcpy.CreateFeatureclass_management( out_path, out_name, geometry_type, template) #'*'表示插入全部字段,但若是不用模板容易产生位置不对等 #cursor = arcpy.da.InsertCursor(outpath,'*') for row in df.index: #Shape须要改成'SHAPE@'才能够写入 df['SHAPE@'] = df['Shape'] cursor = arcpy.da.InsertCursor(outpath,[field for field in df.columns]) cursor.insertRow([df[field][row] for field in df.columns]) print 'Pandas to shp finish!' del cursor
实例应用:编码
写一个根据gps公交点Txt构建shp数据代码,代码以下:.net
def readDataFile(filetype,filename,savefile): #用'gbk'编码读取,读取成统一编码的unicode with codecs.open(filename,encoding='gbk') as datafile: #以列表形式读取全部文件 pointData = datafile.readlines() #第一行删除并返回为title outputFileName = 'bus'+re.findall('[0-9]*[0-9]',filename)[0]+filetype #检查是否导出文件重复 saveEnv = arcpy.Describe(savefile) for child in saveEnv.children: if child.name == outputFileName: outputFileName = outputFileName + '_1' print 'output path is %s'%(savefile+outputFileName) #设置shp文件模板 template = u'./dealing/temple.gdb/%s'%filetype linename = filename.strip('./dealing\\').decode('gbk').encode('utf-8') if filetype == 'point': df = pd.DataFrame(columns=Shp2dataframe(template).columns) for num in xrange(len(pointData)): row = pointData[num].strip('\r\n').split(' ') df.set_value(num,'name',row[0]) df.set_value(num,'x',row[1]) df.set_value(num,'y',row[2]) df.set_value(num,'line',linename.strip('point.txt')) point = arcpy.PointGeometry(arcpy.Point(row[1],row[2])) df.set_value(num,'Shape',point) elif filetype == 'line': df = pd.DataFrame(columns=Shp2dataframe(template).columns) pointList = [] #构建线集合 for eachPoint in pointData: coord = eachPoint.strip('\r\n').split(' ') pointList.append(arcpy.Point(float(coord[0]),float(coord[1]))) df.set_value(0,'name',linename.strip('line.txt')) #组建线要素arcpy.Polyline(arcpy.Array(pointList)) df.set_value(0,'Shape',arcpy.Polyline(arcpy.Array(pointList))) Dataframe2ShpTemplate(df,savefile+outputFileName,'',template) return df
-------sugar---------------------sugar--------------------sugar-------------------sugar----------------sugar----------excel
#搜索目录下的全部带point.txt和line.txt的文件 pointfiles = glob.glob('./dealing/*point.txt') polylinefiles = glob.glob('./dealing/*line.txt') for pf in pointfiles: print pf readDataFile('point',pf,u'dealing/广州市道路网.gdb/') for pl in polylinefiles: print pl df=readDataFile('line',pl,u'dealing/广州市道路网.gdb/') lineshp = arcpy.Describe(u'dealing/广州市道路网.gdb/') linelist = [] for child in lineshp.children: if 'line' in child.name: linelist.append(u'dealing/广州市道路网.gdb/'+child.name) arcpy.Merge_management(linelist,u'dealing/广州市道路网.gdb/0allLine')
Kanonpycode
http://my.oschina.net/Kanonpy/admin/edit-blog?blog=425633blog