0%

第五周-python数据统计

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import tushare as ts
import pandas as pd
import numpy as np
sock=ts.get_hist_data('600036',start='2018-06-01',end='2018-12-31')
sock1=pd.DataFrame(sock,columns=['open','high','close','low','volume'])
sock1=sock1.sort_values('date')
print(sock1)
print('输出这半年内成交量最低和最高那两天的日期和分别的成交量:')
print(sock1.loc[sock1['volume']==sock1['volume'].max()].loc[:,'volume'])
print(sock1.loc[sock1['volume']==sock1['volume'].min()].loc[:,'volume'])

print('列出成交量在 1000000 以上的记录:')
print(sock1.loc[sock1['volume']>1000000])

print('计算这半年中收盘价(close)高于开盘价(open)的天数:')
print(sock1.loc[sock1['close']>sock1['open']])
print('天数:',sock1.loc[sock1['close']>sock1['open']]['open'].count())

print('''计算前后两天开盘价的涨跌情况,用两种方式表示.
第一种输出每两天之间的差值(后一天减去前一天),
第二种输出一个开盘价涨跌列表,涨用 1 表示,跌用-1 表示。''')
openind=sock1['open'].to_numpy()
opendf1=pd.DataFrame(np.diff(openind))
opendf2=pd.DataFrame(np.sign(np.diff(openind)))
print(opendf1)
print(opendf2)

print(' 计算每月收盘价的平均值: ')
print(sock1.index)
months=[]
print(sock1.index[0])
for i in range(len(sock1.index)):
months.append(sock1.index[i].split('-')[1])
print(len(months))
print(months)
print('----------------------------')
sock1.index=months
# print(sock1)
print(sock1.groupby(sock1.index)['close'].mean())
#
#<参考答案>
print('==============================================================================')
# import tushare as ts
# import numpy as np
# 1
df = ts.get_hist_data('600036', start = '2018-06-01', end = '2018-12-31')
df = df.iloc[:, :5] # 获取前 5 列
df.sort_index(inplace = True) # 按 date 列进行排序
print(df)
# 2
min_day = df.sort_values('volume').iloc[0,]
min_volume = min_day.volume
min_volume_date = min_day.name
print("the min volume of {} is at {}".format(min_volume, min_volume_date))
max_day = df.sort_values('volume').iloc[-1,]
max_volume = max_day.volume
max_volume_date = max_day.name
print("the max volume of {} is at {}".format(max_volume, max_volume_date))
# 3
print(df[df.volume >= 1000000])
# 4
print(len(df[df.close > df.open]))
# 5
print(df.open.diff())
print(np.sign(np.diff(df.open)))
# 6
month = [item[5:7] for item in df.index]
print(df.close.groupby(month).apply(np.mean))