python实现CentOS下文件复制

原由是公司想对现有网站作SEO优化,在优化URL这一块,思路是简化路径,是链接更短!html

第一步: 分析规则python

原始表
SELECT id , uri,www_uri from `article` LIMIT 100mysql

分析一条链接:
/detail/0ztm/161011/1476154039.html
真实存储路径是:
/_/boss/0/zt/m/16/10/11/1476154039.htmlsql

咱们要作的就是把1476154039.html 替换成从2000开始计数的文件名,例如 2000.html数据库

第二步:分析表中全部链接,转化成真实路径,方便后边python直接调用fetch

考虑把不在原始表上作修改,新建表优化

create table article_pathzsq
(
    zsid BIGINT not null AUTO_INCREMENT PRIMARY KEY,
    id BIGINT ,
    uri VARCHAR(255),
    www_uri varchar(255),
    uri_path varchar(512),
    uri_htm varchar(255),
    www_uri_path varchar(512),
    www_uri_htm varchar(512),
    inum varchar(128),
    isflag int not null DEFAULT 0
)
insert into article_pathzsq (id,uri,www_uri,uri_path,uri_htm,www_uri_path,www_uri_htm)
SELECT a.id , a.uri , a.www_uri ,
CONCAT(
(case when substring_index(a.uri,'/',3) = '/detail/0nsm' then
            REPLACE(substring_index(a.uri,'/',3), '/detail/0nsm', '/_/boss/0/ns/m')
         when substring_index(a.uri,'/',3) = '/detail/0ztm' then
            REPLACE(substring_index(a.uri,'/',3), '/detail/0ztm', '/_/boss/0/zt/m')
         when substring_index(a.uri,'/',3) = '/detail/3nsm' then
            REPLACE(substring_index(a.uri,'/',3), '/detail/3nsm', '/_/boss/3/ns/m')
         when substring_index(a.uri,'/',3) = '/detail/3ztm' then
            REPLACE(substring_index(a.uri,'/',3), '/detail/3ztm', '/_/boss/3/zt/m')
END) ,'/',
SUBSTR(substring_index(a.uri,'/',-2),1,2) ,'/' ,SUBSTR(substring_index(a.uri,'/',-2),3,2),'/',SUBSTR(substring_index(a.uri,'/',-2),5,2),'/'
) as uri_path  , SUBSTR(substring_index(a.uri,'/',-2),8,1000) as uri_htm ,

CONCAT(
(case when substring_index(a.www_uri,'/',3) = '/detail/0nswww' then
            REPLACE(substring_index(a.www_uri,'/',3), '/detail/0nswww', '/_/boss/0/ns/www')
         when substring_index(a.www_uri,'/',3) = '/detail/0ztwww' then
            REPLACE(substring_index(a.www_uri,'/',3), '/detail/0ztwww', '/_/boss/0/zt/www')
         when substring_index(a.www_uri,'/',3) = '/detail/3nswww' then
            REPLACE(substring_index(a.www_uri,'/',3), '/detail/3nswww', '/_/boss/3/ns/www')
         when substring_index(a.www_uri,'/',3) = '/detail/3ztwww' then
            REPLACE(substring_index(a.www_uri,'/',3), '/detail/3ztwww', '/_/boss/3/zt/www')
END) ,'/',
SUBSTR(substring_index(a.www_uri,'/',-2),1,2) ,'/' ,SUBSTR(substring_index(a.www_uri,'/',-2),3,2),'/',SUBSTR(substring_index(a.www_uri,'/',-2),5,2),'/'
) as www_uri_path  , SUBSTR(substring_index(a.www_uri,'/',-2),8,1000) as www_uri_htm
from `article` a
where uri not REGEXP '/_/boss'
LIMIT 10

第三步:在article_pathzsq,规划新建文件名称,按照规则,从2000计数递增网站

UPDATE article_pathzsq SET inum = CONCAT( (zsid + 2000) , '.html')
where inum is null

 

而后用python(我这里使用的是python2.6,系统自带的),读取数据库,结果集;按照结果集,复制以前的文件,生成新的文件:code

#!python
# -*- coding: UTF-8 -*-
import MySQLdb
import os

dbip = "192.168.0.0"
dbuser = "mysql"
dbpwd = r"000000"
dbdata = "zt"

# 打开数据库链接
db = MySQLdb.connect(dbip,dbuser,dbpwd,dbdata )
# 使用cursor()方法获取操做游标
cursor = db.cursor()
try:
    # 执行sql语句
    sqls="SELECT a.zsid , a.uri_path , a.uri_htm , a.www_uri_path, a.www_uri_htm , a.inum  from article_pathzsq a where a.isflag=0 LIMIT 10;"
    cursor.execute(sqls)
    results = cursor.fetchall()
except:
    print("读取失败")
# 关闭数据库链接
db.close()

for row in results:
    zsid = row[0]
    uri_path = row[1]
    uri_htm = row[2]
    www_uri_path = row[3]
    www_uri_htm = row[4]
    inum = row[5]
    # 打印结果
    """
    print(" zsid= %d , uri_path= %s , uri_htm= %s ,www_uri_path= %s , www_uri_htm= %s ,inum= %s " % \
          (zsid, uri_path, uri_htm, www_uri_path, www_uri_htm, inum))
    """
    uri_ps = 'cp  ' + uri_path + uri_htm + '  ' + uri_path + inum
    print(uri_ps)
    #os.system(uri_ps)
    wwwuri_ps = 'cp  ' + www_uri_path + www_uri_htm + '  ' + www_uri_path + inum
	print(wwwuri_ps)
    #os.system(wwwuri_ps)
相关文章
相关标签/搜索