DataFrame与shp文件相互转换

时间 2019-11-10

原文原文链接

由于习惯了使用pandas的DataFrame数据结构，同时pandas做为一个方便计算和表操做的数据结构具备十分显著的优点，甚至不少时候dataFrame能够做为excel在使用，而在用python操做gis的shp文件时很不畅，不太符合使用习惯，故写了一个DataFrame与arcgis地理文件相互转换的函数，这个处理起来能够节约大量的思考时间。python

Shp转DataFrame：数据结构

import arcpy
import pandas as pd

def Shp2dataframe(path):
    '''将arcpy表单变为pandas表单输出'''
    fields=arcpy.ListFields(path)
    table=[]
    fieldname=[field.name for field in fields]
    #游标集合，用for 循环一次后没办法循环第二次!一个游标实例只能循环一次
    data=arcpy.SearchCursor(path)
    for row in data:
        #Shape字段中的要数是一个几何类
        r=[]
        for field in fields:
            r.append(row.getValue(field.name))
        table.append(r)
    return pd.DataFrame(table,columns=fieldname)

DataFrame转Shp：app

DataFrame转Shp采用了模板形式，经过模板创建字段文件，坐标系等能够更加快速构建字段。
函数

#将由ReadTable读取的pandas表转换为shp格式,template为模板
def Dataframe2ShpTemplate(df,outpath,geoType,template):
    '''
    Fuction:
    make the table of pandas's DataFrame convert to the shp of esri
    Input:
    df -- pandas DataFrame from the shp converted
    outpath -- the shp output path
    geometryType -- the type of geomentey, eg:'POINT','POLYLINE','POLYGON','MULTIPOINT'
    temple -- the temple, at most time it is used the DataFrame's shp
    '''
    out_path = outpath.replace(outpath.split('/')[-1],'')
    out_name = outpath.split('/')[-1]
    geometry_type = geoType
    #template为模板，能够将里面属性所有赋予新建的要素，包括字段、坐标系
    feature_class = arcpy.CreateFeatureclass_management(
        out_path, out_name, geometry_type, template)
    #'*'表示插入全部字段，但若是不用模板容易产生位置不对等
    #cursor = arcpy.da.InsertCursor(outpath,'*')
    for row in df.index:
        #Shape须要改成'SHAPE@'才能够写入
        df['SHAPE@'] = df['Shape']
        cursor = arcpy.da.InsertCursor(outpath,[field for field in df.columns])
        cursor.insertRow([df[field][row] for field in df.columns])
    print 'Pandas to shp finish!'
    del cursor

实例应用：编码

写一个根据gps公交点Txt构建shp数据代码，代码以下：.net

def readDataFile(filetype,filename,savefile):
    #用'gbk'编码读取,读取成统一编码的unicode
    with codecs.open(filename,encoding='gbk') as datafile:
        
        #以列表形式读取全部文件
        pointData = datafile.readlines()
        #第一行删除并返回为title
        outputFileName = 'bus'+re.findall('[0-9]*[0-9]',filename)[0]+filetype
        #检查是否导出文件重复
        saveEnv = arcpy.Describe(savefile)
        for child in saveEnv.children:
            if child.name == outputFileName:
                outputFileName = outputFileName + '_1'
        print 'output path is %s'%(savefile+outputFileName)
        #设置shp文件模板
        template = u'./dealing/temple.gdb/%s'%filetype
        linename = filename.strip('./dealing\\').decode('gbk').encode('utf-8')
        if filetype == 'point':
            df = pd.DataFrame(columns=Shp2dataframe(template).columns)
            for num in xrange(len(pointData)):
                row = pointData[num].strip('\r\n').split(' ')
                
                df.set_value(num,'name',row[0])
                df.set_value(num,'x',row[1])
                df.set_value(num,'y',row[2])
                df.set_value(num,'line',linename.strip('point.txt'))
                
                point = arcpy.PointGeometry(arcpy.Point(row[1],row[2]))
                df.set_value(num,'Shape',point)
                
        elif filetype == 'line':
            df = pd.DataFrame(columns=Shp2dataframe(template).columns)
            pointList = []
            #构建线集合
            for eachPoint in pointData:
                coord = eachPoint.strip('\r\n').split(' ')
                pointList.append(arcpy.Point(float(coord[0]),float(coord[1])))
            df.set_value(0,'name',linename.strip('line.txt'))
            #组建线要素arcpy.Polyline(arcpy.Array(pointList))
            df.set_value(0,'Shape',arcpy.Polyline(arcpy.Array(pointList)))
    
    Dataframe2ShpTemplate(df,savefile+outputFileName,'',template)
    return df

-------sugar---------------------sugar--------------------sugar-------------------sugar----------------sugar----------excel

#搜索目录下的全部带point.txt和line.txt的文件

pointfiles = glob.glob('./dealing/*point.txt')
polylinefiles = glob.glob('./dealing/*line.txt')

for pf in pointfiles:
    print pf
    readDataFile('point',pf,u'dealing/广州市道路网.gdb/')

for pl in polylinefiles:
    print pl
    df=readDataFile('line',pl,u'dealing/广州市道路网.gdb/')


lineshp = arcpy.Describe(u'dealing/广州市道路网.gdb/')
linelist = []
for child in lineshp.children:
    if 'line' in child.name:
        linelist.append(u'dealing/广州市道路网.gdb/'+child.name)
arcpy.Merge_management(linelist,u'dealing/广州市道路网.gdb/0allLine')

Kanonpycode

http://my.oschina.net/Kanonpy/admin/edit-blog?blog=425633blog