数据挖掘:Pandas时间模块管理!

datetime

import numpy as np
import pandas as pd
import datetimedom

# datetime.date

t = datetime.date.today()  # datetime 模块 date 类 today() 类方法
print(t, type(t))
print("")

t_str = str(t)
print(t, type(t_str))函数

2019-05-26 <class 'datetime.date'>

2019-05-26 <class 'str'>code

# datetime.datetime

now = datetime.datetime.now()
print(now, type(now))orm

2019-05-26 16:18:17.612845 <class 'datetime.datetime'>对象

# datetime.timedelta 时间差

t1 = datetime.datetime(2017,10,1)
print(t1)
print("")

tx = datetime.timedelta(100)  # timedelta(days=0, seconds=0, microseconds=0, milliseconds=0, minutes=0, hours=0, weeks=0)
print(tx)
print("")

t2 = t1 + tx
print(t2)索引

2017-10-01 00:00:00

100 days, 0:00:00

2018-01-09 00:00:00string

pd.Timestamp 跟datetime做用相同 只不过Timestamp是pd模块里的

import numpy as np
import pandas as pd
import datetimepandas

t = datetime.datetime.today()
print(t)
print("")

ts1 = pd.Timestamp(t)
print(ts1)
print("")

ts2 = pd.Timestamp("20171021")
print(ts2)io

2020-06-01 14:28:08.656056

2020-06-01 14:28:08.656056

2017-10-21 00:00:00class

pd.to_datetime 多个时间数据转换成时间戳索引

time_list = ["20171019", "20181020", "20191021"]

t = pd.to_datetime(time_list)
print(t, type(t))

DatetimeIndex(['2017-10-19', '2018-10-20', '2019-10-21'], dtype='datetime64[ns]', freq=None) <class 'pandas.core.indexes.datetimes.DatetimeIndex'>

若是时间序列里包含非时间的数据 ignore(忽略异常 但不会转换成时间戳索引) coerce(把异常值改成NaT)

time_list1 = ["20171019", "20181020", "bbbb", "20191021"]

t1= pd.to_datetime(time_list1, errors="ignore")
print(t1, type(t1))
print("")

t2 = pd.to_datetime(time_list1, errors="coerce")
print(t2)

Index(['20171019', '20181020', 'bbbb', '20191021'], dtype='object') <class 'pandas.core.indexes.base.Index'>

DatetimeIndex(['2017-10-19', '2018-10-20', 'NaT', '2019-10-21'], dtype='datetime64[ns]', freq=None)

pd.DatetimeIndex() 直接生成时间戳序列

rng = pd.DatetimeIndex(["20160910", "11/06/2017", "20180821", "26/05/2019"])
print(rng)
print(type(rng))
print("")

print(rng[0], type(rng[0]))

DatetimeIndex(['2016-09-10', '2017-11-06', '2018-08-21', '2019-05-26'], dtype='datetime64[ns]', freq=None)
<class 'pandas.core.indexes.datetimes.DatetimeIndex'>

2016-09-10 00:00:00 <class 'pandas._libs.tslibs.timestamps.Timestamp'>

st = pd.Series(np.random.rand(4), index=rng) # 把时间戳索引当成index
print(st)

2016-09-10   0.835586
2017-11-06   0.223044
2018-08-21   0.950717
2019-05-26   0.013370
dtype: float64

pd.date_range() 生成日期范围

"""
pd.date_range(start=None, end=None, periods=None, freq=None, tz=None, normalize=False, name=None ,closed=None, **kwargs)

该函数主要用于生成一个固定频率的时间索引,在调用构造方法时,必须指定start、end、periods中的两个参数值,不然报错。

start: 开始日期

end: 结束日期

periods:固定时期,取值为整数或None

freq:日期偏移量,取值为string或DateOffset,默认为'D'

normalize:若参数为True表示将start、end参数值正则化到午夜时间戳 0:00:00 默认为False

name:生成时间索引对象的名称,取值为string或None

closed:能够理解成在closed=None状况下返回的结果中,若closed=‘left’表示在返回的结果基础上,再取左开右闭的结果,若closed='right'表示在返回的结果基础上,再取左闭右开的结果

"""

"\npd.date_range(start=None, end=None, periods=None, freq=None, tz=None, normalize=False, name=None ,closed=None, **kwargs)\n\n该函数主要用于生成一个固定频率的时间索引,在调用构造方法时,必须指定start、end、periods中的两个参数值,不然报错。\n\nstart: 开始日期\n\nend: 结束日期\n\nperiods:固定时期,取值为整数或None\n\nfreq:日期偏移量,取值为string或DateOffset,默认为'D'\n\nnormalize:若参数为True表示将start、end参数值正则化到午夜时间戳 0:00:00 默认为False\n\nname:生成时间索引对象的名称,取值为string或None\n\nclosed:能够理解成在closed=None状况下返回的结果中,若closed=‘left’表示在返回的结果基础上,再取左开右闭的结果,若closed='right'表示在返回的结果基础上,再取左闭右开的结果\n\n"

start end

t_index1 = pd.date_range(start="20181018", end="20191021", name="t_index1")
print(t_index1)

DatetimeIndex(['2018-10-18', '2018-10-19', '2018-10-20', '2018-10-21',
'2018-10-22', '2018-10-23', '2018-10-24', '2018-10-25',
'2018-10-26', '2018-10-27',
...
'2019-10-12', '2019-10-13', '2019-10-14', '2019-10-15',
'2019-10-16', '2019-10-17', '2019-10-18', '2019-10-19',
'2019-10-20', '2019-10-21'],
dtype='datetime64[ns]', name='t_index1', length=369, freq='D')

periods

t_index2 = pd.date_range(start="20181018", periods=10, name="t_index2")
print(t_index2)

DatetimeIndex(['2018-10-18', '2018-10-19', '2018-10-20', '2018-10-21',
'2018-10-22', '2018-10-23', '2018-10-24', '2018-10-25',
'2018-10-26', '2018-10-27'],
dtype='datetime64[ns]', name='t_index2', freq='D')

t_index3 = pd.date_range(end="20181018", periods=10, name="t_index3")
print(t_index3)

DatetimeIndex(['2018-10-09', '2018-10-10', '2018-10-11', '2018-10-12',
'2018-10-13', '2018-10-14', '2018-10-15', '2018-10-16',
'2018-10-17', '2018-10-18'],
dtype='datetime64[ns]', name='t_index3', freq='D')

name normalize

t_index4 = pd.date_range(start="11/09/2019 16:30", periods=10, name="t_index4")
print(t_index4)
print("\n")

t_index4 = pd.date_range(start="11/09/2019 16:30", periods=10, name="t_index4", normalize=True)
print(t_index4)

DatetimeIndex(['2019-11-09 16:30:00', '2019-11-10 16:30:00',
'2019-11-11 16:30:00', '2019-11-12 16:30:00',
'2019-11-13 16:30:00', '2019-11-14 16:30:00',
'2019-11-15 16:30:00', '2019-11-16 16:30:00',
'2019-11-17 16:30:00', '2019-11-18 16:30:00'],
dtype='datetime64[ns]', name='t_index4', freq='D')

DatetimeIndex(['2019-11-09', '2019-11-10', '2019-11-11', '2019-11-12',
'2019-11-13', '2019-11-14', '2019-11-15', '2019-11-16',
'2019-11-17', '2019-11-18'],
dtype='datetime64[ns]', name='t_index4', freq='D')

closed

t_index5 = pd.date_range(start="20190910", end="20190918", name="t_index5")
print(t_index5)
print("\n")

t_index5 = pd.date_range(start="20190910", end="20190918", name="t_index5", closed="left") # 左闭右开
print(t_index5)
print("\n")

t_index5 = pd.date_range(start="20190910", end="20190918", name="t_index5", closed="right") # 左开右闭
print(t_index5)
print("\n")

DatetimeIndex(['2019-09-10', '2019-09-11', '2019-09-12', '2019-09-13',
'2019-09-14', '2019-09-15', '2019-09-16', '2019-09-17',
'2019-09-18'],
dtype='datetime64[ns]', name='t_index5', freq='D')

DatetimeIndex(['2019-09-10', '2019-09-11', '2019-09-12', '2019-09-13',
'2019-09-14', '2019-09-15', '2019-09-16', '2019-09-17'],
dtype='datetime64[ns]', name='t_index5', freq='D')

DatetimeIndex(['2019-09-11', '2019-09-12', '2019-09-13', '2019-09-14',
'2019-09-15', '2019-09-16', '2019-09-17', '2019-09-18'],
dtype='datetime64[ns]', name='t_index5', freq='D')

pd.bdate_range() 默认频率为工做日

t_index6 = pd.bdate_range(start="20191001", end="20191007", name="t_index6")
print(t_index6)

DatetimeIndex(['2019-10-01', '2019-10-02', '2019-10-03', '2019-10-04',
'2019-10-07'],
dtype='datetime64[ns]', name='t_index6', freq='B')

pd.date_range 转换成list 元素为时间戳Timestamp

t_index7_list= pd.date_range(start="20191001", end="20191007", name="t_index7_list")
print(t_index7_list)
print("\n")

t_index7_list= list(pd.date_range(start="20191001", end="20191007", name="t_index7_list"))
print(t_index7_list)

DatetimeIndex(['2019-10-01', '2019-10-02', '2019-10-03', '2019-10-04',
'2019-10-05', '2019-10-06', '2019-10-07'],
dtype='datetime64[ns]', name='t_index7_list', freq='D')

[Timestamp('2019-10-01 00:00:00', freq='D'), Timestamp('2019-10-02 00:00:00', freq='D'), Timestamp('2019-10-03 00:00:00', freq='D'), Timestamp('2019-10-04 00:00:00', freq='D'), Timestamp('2019-10-05 00:00:00', freq='D'), Timestamp('2019-10-06 00:00:00', freq='D'), Timestamp('2019-10-07 00:00:00', freq='D')]

fred 日期偏移量

# 默认freq = 'D' 每日

pd.date_range("10/1/2019", "2019/10/7")

DatetimeIndex(['2019-10-01', '2019-10-02', '2019-10-03', '2019-10-04',
'2019-10-05', '2019-10-06', '2019-10-07'],
dtype='datetime64[ns]', freq='D')

# 'B' 每工做日

pd.date_range("10/01/2019", "10/07/2019", freq = "B")

DatetimeIndex(['2019-10-01', '2019-10-02', '2019-10-03', '2019-10-04',
'2019-10-07'],
dtype='datetime64[ns]', freq='B')

# H 每小时

pd.date_range("10/01/2019 12:00:00", "10/02/2019 12:00:00", freq = "H")

DatetimeIndex(['2019-10-01 12:00:00', '2019-10-01 13:00:00',
'2019-10-01 14:00:00', '2019-10-01 15:00:00',
'2019-10-01 16:00:00', '2019-10-01 17:00:00',
'2019-10-01 18:00:00', '2019-10-01 19:00:00',
'2019-10-01 20:00:00', '2019-10-01 21:00:00',
'2019-10-01 22:00:00', '2019-10-01 23:00:00',
'2019-10-02 00:00:00', '2019-10-02 01:00:00',
'2019-10-02 02:00:00', '2019-10-02 03:00:00',
'2019-10-02 04:00:00', '2019-10-02 05:00:00',
'2019-10-02 06:00:00', '2019-10-02 07:00:00',
'2019-10-02 08:00:00', '2019-10-02 09:00:00',
'2019-10-02 10:00:00', '2019-10-02 11:00:00',
'2019-10-02 12:00:00'],
dtype='datetime64[ns]', freq='H')

# T/MIN 每分

pd.date_range("10/01/2019 12:10:00" , "10/01/2019 12:30:00", freq = "T")

DatetimeIndex(['2019-10-01 12:10:00', '2019-10-01 12:11:00',
'2019-10-01 12:12:00', '2019-10-01 12:13:00',
'2019-10-01 12:14:00', '2019-10-01 12:15:00',
'2019-10-01 12:16:00', '2019-10-01 12:17:00',
'2019-10-01 12:18:00', '2019-10-01 12:19:00',
'2019-10-01 12:20:00', '2019-10-01 12:21:00',
'2019-10-01 12:22:00', '2019-10-01 12:23:00',
'2019-10-01 12:24:00', '2019-10-01 12:25:00',
'2019-10-01 12:26:00', '2019-10-01 12:27:00',
'2019-10-01 12:28:00', '2019-10-01 12:29:00',
'2019-10-01 12:30:00'],
dtype='datetime64[ns]', freq='T')

# S 每秒

pd.date_range("10/01/2019", "10/01/2019 00:00:30", freq = "S")

DatetimeIndex(['2019-10-01 00:00:00', '2019-10-01 00:00:01',
'2019-10-01 00:00:02', '2019-10-01 00:00:03',
'2019-10-01 00:00:04', '2019-10-01 00:00:05',
'2019-10-01 00:00:06', '2019-10-01 00:00:07',
'2019-10-01 00:00:08', '2019-10-01 00:00:09',
'2019-10-01 00:00:10', '2019-10-01 00:00:11',
'2019-10-01 00:00:12', '2019-10-01 00:00:13',
'2019-10-01 00:00:14', '2019-10-01 00:00:15',
'2019-10-01 00:00:16', '2019-10-01 00:00:17',
'2019-10-01 00:00:18', '2019-10-01 00:00:19',
'2019-10-01 00:00:20', '2019-10-01 00:00:21',
'2019-10-01 00:00:22', '2019-10-01 00:00:23',
'2019-10-01 00:00:24', '2019-10-01 00:00:25',
'2019-10-01 00:00:26', '2019-10-01 00:00:27',
'2019-10-01 00:00:28', '2019-10-01 00:00:29',
'2019-10-01 00:00:30'],
dtype='datetime64[ns]', freq='S')

# L 每毫秒 (千分之一秒)

pd.date_range("10/01/2019", "10/01/2019 00:00:30", freq = "L")

DatetimeIndex([       '2019-10-01 00:00:00', '2019-10-01 00:00:00.001000',
'2019-10-01 00:00:00.002000', '2019-10-01 00:00:00.003000',
'2019-10-01 00:00:00.004000', '2019-10-01 00:00:00.005000',
'2019-10-01 00:00:00.006000', '2019-10-01 00:00:00.007000',
'2019-10-01 00:00:00.008000', '2019-10-01 00:00:00.009000',
...
'2019-10-01 00:00:29.991000', '2019-10-01 00:00:29.992000',
'2019-10-01 00:00:29.993000', '2019-10-01 00:00:29.994000',
'2019-10-01 00:00:29.995000', '2019-10-01 00:00:29.996000',
'2019-10-01 00:00:29.997000', '2019-10-01 00:00:29.998000',
'2019-10-01 00:00:29.999000',       '2019-10-01 00:00:30'],
dtype='datetime64[ns]', length=30001, freq='L')

# U 每微秒 (百万分之一秒)

pd.date_range("10/01/2019", "10/01/2019 00:00:30", freq = "U") # U 每微秒 (百万分之一秒)

DatetimeIndex([ '2019-10-01 00:00:00', '2019-10-01 00:00:00.000001',

'2019-10-01 00:00:00.000002', '2019-10-01 00:00:00.000003',
           '2019-10-01 00:00:00.000004', '2019-10-01 00:00:00.000005',
           '2019-10-01 00:00:00.000006', '2019-10-01 00:00:00.000007',
           '2019-10-01 00:00:00.000008', '2019-10-01 00:00:00.000009',
           ...
           '2019-10-01 00:00:29.999991', '2019-10-01 00:00:29.999992',
           '2019-10-01 00:00:29.999993', '2019-10-01 00:00:29.999994',
           '2019-10-01 00:00:29.999995', '2019-10-01 00:00:29.999996',
           '2019-10-01 00:00:29.999997', '2019-10-01 00:00:29.999998',
           '2019-10-01 00:00:29.999999',        '2019-10-01 00:00:30'\],
          dtype='datetime64\[ns\]', length=30000001, freq='U')

星期几缩写 -- MON/TUE/WED/THU/FRI/SAT/SUN

# "W-MON"指定从星期一开始算起 间隔是每周

pd.date_range("2019/10/1", "2019/11/1", freq = "W-MON")

DatetimeIndex(['2019-10-07', '2019-10-14', '2019-10-21', '2019-10-28'], dtype='datetime64[ns]', freq='W-MON')

# "WOM-2MON" 指定每个月从2第个星期一开始算起 间隔是月

pd.date_range("2019/10/1", "2020/10/1", freq = "WOM-2MON")

DatetimeIndex(['2019-10-14', '2019-11-11', '2019-12-09', '2020-01-13',
'2020-02-10', '2020-03-09', '2020-04-13', '2020-05-11',
'2020-06-08', '2020-07-13', '2020-08-10', '2020-09-14'],
dtype='datetime64[ns]', freq='WOM-2MON')

# M -- 每个月最后一个日历日

pd.date_range("2019", "2020", freq = "M")

DatetimeIndex(['2019-01-31', '2019-02-28', '2019-03-31', '2019-04-30',
'2019-05-31', '2019-06-30', '2019-07-31', '2019-08-31',
'2019-09-30', '2019-10-31', '2019-11-30', '2019-12-31'],
dtype='datetime64[ns]', freq='M')

月份

"""
一月     Jan.     January

二月     Feb.     February

三月     Mar.     March

四月     Apr.     April

五月     May.     May

六月     Jun.     June

七月     Jul.     July

八月     Aug.     August

九月     Sept.   September

十月     Oct.     October

十一月   Nov.     November

十二月   Dec.     December
"""

# Q 每一个季度末最后一月的最后一个日历日

print(pd.date_range("2019", "2020", freq="Q-JAN"))

print(pd.date_range("2019", "2020", freq="Q-FEB"))

print(pd.date_range("2019", "2020", freq="Q-MAR"))
print("")

# 因此Q-月只有三种状况 1-4-7-10, 2-5-8-11, 3-6-9-12
print(pd.date_range("2019", "2020", freq="Q-APR"))

DatetimeIndex(['2019-01-31', '2019-04-30', '2019-07-31', '2019-10-31'], dtype='datetime64[ns]', freq='Q-JAN')
DatetimeIndex(['2019-02-28', '2019-05-31', '2019-08-31', '2019-11-30'], dtype='datetime64[ns]', freq='Q-FEB')
DatetimeIndex(['2019-03-31', '2019-06-30', '2019-09-30', '2019-12-31'], dtype='datetime64[ns]', freq='Q-MAR')

DatetimeIndex(['2019-01-31', '2019-04-30', '2019-07-31', '2019-10-31'], dtype='datetime64[ns]', freq='Q-APR')

# A -- 每一年指定月份的最后一个日历日

print(pd.date_range("2019", "2021", freq="A-JAN"))
print(pd.date_range("2019", "2021", freq="A-FEB"))
print(pd.date_range("2019", "2021", freq="A-DEC"))

DatetimeIndex(['2019-01-31', '2020-01-31'], dtype='datetime64[ns]', freq='A-JAN')
DatetimeIndex(['2019-02-28', '2020-02-29'], dtype='datetime64[ns]', freq='A-FEB')
DatetimeIndex(['2019-12-31', '2020-12-31'], dtype='datetime64[ns]', freq='A-DEC')

# BM - 每个月最后一个工做日

print(pd.date_range("2019", "2020", freq="BM"))

DatetimeIndex(['2019-01-31', '2019-02-28', '2019-03-29', '2019-04-30',
'2019-05-31', '2019-06-28', '2019-07-31', '2019-08-30',
'2019-09-30', '2019-10-31', '2019-11-29', '2019-12-31'],
dtype='datetime64[ns]', freq='BM')

# BQ - 每一个季度末最后一月的最后一个工做日

print(pd.date_range("2019", "2021", freq="BQ-JAN"))
print("")
print(pd.date_range("2019", "2021", freq="BQ-FEB"))
print("")
print(pd.date_range("2019", "2021", freq="BQ-MAR"))
print("")
print(pd.date_range("2019", "2021", freq="BQ-APR"))

DatetimeIndex(['2019-01-31', '2019-04-30', '2019-07-31', '2019-10-31',
'2020-01-31', '2020-04-30', '2020-07-31', '2020-10-30'],
dtype='datetime64[ns]', freq='BQ-JAN')

DatetimeIndex(['2019-02-28', '2019-05-31', '2019-08-30', '2019-11-29',
'2020-02-28', '2020-05-29', '2020-08-31', '2020-11-30'],
dtype='datetime64[ns]', freq='BQ-FEB')

DatetimeIndex(['2019-03-29', '2019-06-28', '2019-09-30', '2019-12-31',
'2020-03-31', '2020-06-30', '2020-09-30', '2020-12-31'],
dtype='datetime64[ns]', freq='BQ-MAR')

DatetimeIndex(['2019-01-31', '2019-04-30', '2019-07-31', '2019-10-31',
'2020-01-31', '2020-04-30', '2020-07-31', '2020-10-30'],
dtype='datetime64[ns]', freq='BQ-APR')

# BA -- 每一年指定月份的最后一个工做日

print(pd.date_range("2019", "2021", freq="BA-JAN"))
print(pd.date_range("2019", "2023", freq="BA-FEB"))
print(pd.date_range("2019", "2021", freq="BA-MAR"))

DatetimeIndex(['2019-01-31', '2020-01-31'], dtype='datetime64[ns]', freq='BA-JAN')
DatetimeIndex(['2019-02-28', '2020-02-28', '2021-02-26', '2022-02-28'], dtype='datetime64[ns]', freq='BA-FEB')
DatetimeIndex(['2019-03-29', '2020-03-31'], dtype='datetime64[ns]', freq='BA-MAR')

# MS -- 每个月第一个日历日

pd.date_range("2019", "2020", freq="MS")

DatetimeIndex(['2019-01-01', '2019-02-01', '2019-03-01', '2019-04-01',
'2019-05-01', '2019-06-01', '2019-07-01', '2019-08-01',
'2019-09-01', '2019-10-01', '2019-11-01', '2019-12-01',
'2020-01-01'],
dtype='datetime64[ns]', freq='MS')

# QS - 每一个季度末最后一月的第一个日历日

print(pd.date_range("2019", "2020", freq="QS-JAN"))
print("")
print(pd.date_range("2019", "2020", freq="QS-FEB"))
print("")
print(pd.date_range("2019", "2020", freq="QS-MAR"))
print("")
print(pd.date_range("2019", "2020", freq="QS-APR"))

DatetimeIndex(['2019-01-01', '2019-04-01', '2019-07-01', '2019-10-01',
'2020-01-01'],
dtype='datetime64[ns]', freq='QS-JAN')

DatetimeIndex(['2019-02-01', '2019-05-01', '2019-08-01', '2019-11-01'], dtype='datetime64[ns]', freq='QS-FEB')

DatetimeIndex(['2019-03-01', '2019-06-01', '2019-09-01', '2019-12-01'], dtype='datetime64[ns]', freq='QS-MAR')

DatetimeIndex(['2019-01-01', '2019-04-01', '2019-07-01', '2019-10-01',
'2020-01-01'],
dtype='datetime64[ns]', freq='QS-APR')

# AS -- 每一年指定月份的第一个日历日

print(pd.date_range("2019", "2021", freq="AS-JAN"))
print(pd.date_range("2019", "2021", freq="AS-FEB"))
print(pd.date_range("2019", "2021", freq="AS-DEC"))

DatetimeIndex(['2019-01-01', '2020-01-01', '2021-01-01'], dtype='datetime64[ns]', freq='AS-JAN')
DatetimeIndex(['2019-02-01', '2020-02-01'], dtype='datetime64[ns]', freq='AS-FEB')
DatetimeIndex(['2019-12-01', '2020-12-01'], dtype='datetime64[ns]', freq='AS-DEC')

# BMS -- 每个月第一个工做日

print(pd.date_range("2019", "2021", freq="BMS"))

DatetimeIndex(['2019-01-01', '2019-02-01', '2019-03-01', '2019-04-01',
'2019-05-01', '2019-06-03', '2019-07-01', '2019-08-01',
'2019-09-02', '2019-10-01', '2019-11-01', '2019-12-02',
'2020-01-01', '2020-02-03', '2020-03-02', '2020-04-01',
'2020-05-01', '2020-06-01', '2020-07-01', '2020-08-03',
'2020-09-01', '2020-10-01', '2020-11-02', '2020-12-01',
'2021-01-01'],
dtype='datetime64[ns]', freq='BMS')

# BQS - 每一个季度末最后一月的第一个工做日

print(pd.date_range("2019", "2020", freq="BQS-JAN"))
print("")
print(pd.date_range("2019", "2020", freq="BQS-FEB"))
print("")
print(pd.date_range("2019", "2020", freq="BQS-MAR"))
print("")
print(pd.date_range("2019", "2020", freq="BQS-APR"))

DatetimeIndex(['2019-01-01', '2019-04-01', '2019-07-01', '2019-10-01',
'2020-01-01'],
dtype='datetime64[ns]', freq='BQS-JAN')

DatetimeIndex(['2019-02-01', '2019-05-01', '2019-08-01', '2019-11-01'], dtype='datetime64[ns]', freq='BQS-FEB')

DatetimeIndex(['2019-03-01', '2019-06-03', '2019-09-02', '2019-12-02'], dtype='datetime64[ns]', freq='BQS-MAR')

DatetimeIndex(['2019-01-01', '2019-04-01', '2019-07-01', '2019-10-01',
'2020-01-01'],
dtype='datetime64[ns]', freq='BQS-APR')

# BAS -- 每一年指定月份的第一个工做日

print(pd.date_range("2019", "2021", freq="BAS-JAN"))
print(pd.date_range("2019", "2021", freq="BAS-FEB"))
print(pd.date_range("2019", "2021", freq="BAS-DEC"))

DatetimeIndex(['2019-01-01', '2020-01-01', '2021-01-01'], dtype='datetime64[ns]', freq='BAS-JAN')
DatetimeIndex(['2019-02-01', '2020-02-03'], dtype='datetime64[ns]', freq='BAS-FEB')
DatetimeIndex(['2019-12-02', '2020-12-01'], dtype='datetime64[ns]', freq='BAS-DEC')

复合频率

# 7D 间隔是7天

pd.date_range("2019/10/1", "2019/12/1", freq="7D")

DatetimeIndex(['2019-10-01', '2019-10-08', '2019-10-15', '2019-10-22',
'2019-10-29', '2019-11-05', '2019-11-12', '2019-11-19',
'2019-11-26'],
dtype='datetime64[ns]', freq='7D')

# 2h30min 间隔是2小时30分钟

pd.date_range("2019/10/1 00:00:00", "2019/10/1 12:00:00", freq="2h30min")

DatetimeIndex(['2019-10-01 00:00:00', '2019-10-01 02:30:00',
'2019-10-01 05:00:00', '2019-10-01 07:30:00',
'2019-10-01 10:00:00'],
dtype='datetime64[ns]', freq='150T')

# 2M 每间隔2个月最后一个日历

pd.date_range("2019", "2021", freq="2M")

DatetimeIndex(['2019-01-31', '2019-03-31', '2019-05-31', '2019-07-31',
'2019-09-30', '2019-11-30', '2020-01-31', '2020-03-31',
'2020-05-31', '2020-07-31', '2020-09-30', '2020-11-30'],
dtype='datetime64[ns]', freq='2M')

asfreq 时间频率转换

ts = pd.Series(np.random.rand(4), index=pd.date_range("2019/1/1", "2019/1/4"))
print(ts)
print("\n")

# 这里是把D改成4H
print(ts.asfreq("4H"))
print("\n")

# method 插值模式 ffill 用以前值填充 bfill 用以后值填充
print(ts.asfreq("4H", method="ffill"))
print("\n")

print(ts.asfreq("4H", method="bfill"))

2019-01-01   0.610403
2019-01-02   0.416557
2019-01-03   0.821631
2019-01-04   0.699457
Freq: D, dtype: float64

2019-01-01 00:00:00   0.610403
2019-01-01 04:00:00         NaN
2019-01-01 08:00:00         NaN
2019-01-01 12:00:00         NaN
2019-01-01 16:00:00         NaN
2019-01-01 20:00:00         NaN
2019-01-02 00:00:00   0.416557
2019-01-02 04:00:00         NaN
2019-01-02 08:00:00         NaN
2019-01-02 12:00:00         NaN
2019-01-02 16:00:00         NaN
2019-01-02 20:00:00         NaN
2019-01-03 00:00:00   0.821631
2019-01-03 04:00:00         NaN
2019-01-03 08:00:00         NaN
2019-01-03 12:00:00         NaN
2019-01-03 16:00:00         NaN
2019-01-03 20:00:00         NaN
2019-01-04 00:00:00   0.699457
Freq: 4H, dtype: float64

2019-01-01 00:00:00   0.610403
2019-01-01 04:00:00   0.610403
2019-01-01 08:00:00   0.610403
2019-01-01 12:00:00   0.610403
2019-01-01 16:00:00   0.610403
2019-01-01 20:00:00   0.610403
2019-01-02 00:00:00   0.416557
2019-01-02 04:00:00   0.416557
2019-01-02 08:00:00   0.416557
2019-01-02 12:00:00   0.416557
2019-01-02 16:00:00   0.416557
2019-01-02 20:00:00   0.416557
2019-01-03 00:00:00   0.821631
2019-01-03 04:00:00   0.821631
2019-01-03 08:00:00   0.821631
2019-01-03 12:00:00   0.821631
2019-01-03 16:00:00   0.821631
2019-01-03 20:00:00   0.821631
2019-01-04 00:00:00   0.699457
Freq: 4H, dtype: float64

2019-01-01 00:00:00   0.610403
2019-01-01 04:00:00   0.416557
2019-01-01 08:00:00   0.416557
2019-01-01 12:00:00   0.416557
2019-01-01 16:00:00   0.416557
2019-01-01 20:00:00   0.416557
2019-01-02 00:00:00   0.416557
2019-01-02 04:00:00   0.821631
2019-01-02 08:00:00   0.821631
2019-01-02 12:00:00   0.821631
2019-01-02 16:00:00   0.821631
2019-01-02 20:00:00   0.821631
2019-01-03 00:00:00   0.821631
2019-01-03 04:00:00   0.699457
2019-01-03 08:00:00   0.699457
2019-01-03 12:00:00   0.699457
2019-01-03 16:00:00   0.699457
2019-01-03 20:00:00   0.699457
2019-01-04 00:00:00   0.699457
Freq: 4H, dtype: float64

超前/滞后数据 shift(正数): 数值后移--滞后 ,shift(负数): 数值前移--超前

ts = pd.Series(np.random.rand(4), index=pd.date_range("2019/1/1", "2019/1/4"))
print(ts)
print("\n")

print(ts.shift(1))
print("\n")

print(ts.shift(-2))
print("\n")

# 计算变化百分比 该时间戳的值与上一个时间戳的值相比

per = ts/ts.shift(1)
print(per)

2019-01-01   0.197884
2019-01-02   0.403093
2019-01-03   0.208341
2019-01-04   0.330873
Freq: D, dtype: float64

2019-01-01         NaN
2019-01-02   0.197884
2019-01-03   0.403093
2019-01-04   0.208341
Freq: D, dtype: float64

2019-01-01   0.208341
2019-01-02   0.330873
2019-01-03         NaN
2019-01-04         NaN
Freq: D, dtype: float64

2019-01-01         NaN
2019-01-02   2.037017
2019-01-03   0.516855
2019-01-04   1.588134
Freq: D, dtype: float64

shift(freq) 加上freq参数 对时间戳进行位移 而不是对数值进行位移

print(ts)
print("\n")

print(ts.shift(2, freq="D")) # 按天
print("\n")

print(ts.shift(2, freq="T")) # 按分钟

2019-01-01   0.197884
2019-01-02   0.403093
2019-01-03   0.208341
2019-01-04   0.330873
Freq: D, dtype: float64

2019-01-03   0.197884
2019-01-04   0.403093
2019-01-05   0.208341
2019-01-06   0.330873
Freq: D, dtype: float64

2019-01-01 00:02:00   0.197884 2019-01-02 00:02:00   0.403093 2019-01-03 00:02:00   0.208341 2019-01-04 00:02:00   0.330873 Freq: D, dtype: float64

相关文章
相关标签/搜索