-
Notifications
You must be signed in to change notification settings - Fork 0
/
plotdata.py
103 lines (78 loc) · 3.59 KB
/
plotdata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import bs4 as bs
import pickle
import requests
import datetime as dt
import os
import pandas as pd
import pandas_datareader.data as web
import matplotlib.pyplot as plt
from matplotlib import style
#from mplfinance import candlestick_ohlc
from mplfinance.original_flavor import candlestick_ohlc
import matplotlib.dates as mdates
import seaborn as sb
df = pd.read_csv('stock_details/AMZN.csv', index_col=0, parse_dates=True)
def plotdata():
# This snippet will help us to pick the Adjusted Close column of each stock other than our target stock
# which is AMZN, rename the column as the ticker and merge it in our feature set.
# It will produce a feature set like this. The Date is the index and corresponding to the Date,
# each ticker’s “Adj Close” value. Now, We will see there are a few empty columns initially.
# This is because these companies didn’t start to participate in the stock market back in 2010.
# This will give us a feature set of 200 columns containing 199 company’s values and the Date.
# Now, let’s focus on our target stock the AMZN stock.
# we will start visualizing each of our given column values for the target stock.
# Now, let’s visualize, our stock using the candlestick notation. I am using Pandas version 0.24.2 for this.
# There may be an issue as in the current versions this module is depreciated.
df_ohlc = df['Adj Close'].resample('10D').ohlc()
# print(df_ohlc.head())
df_volume = df['Volume'].resample('10D').sum()
df_ohlc.reset_index(inplace=True)
df_ohlc['Date'] = df_ohlc['Date'].map(mdates.date2num)
ax1 = plt.subplot2grid((6, 1), (0, 0), rowspan=5, colspan=1)
ax2 = plt.subplot2grid((6, 1), (5, 0), rowspan=1, colspan=1, sharex=ax1)
ax1.xaxis_date()
candlestick_ohlc(ax1, df_ohlc.values, width=2, colorup='g')
ax2.fill_between(df_volume.index.map(mdates.date2num), df_volume.values, 0)
plt.show()
plotdata()
def featuredata():
# Now, let’s devise some features that will help us to predict our target.
# We will calculate the 50 moving average.
# This characteristic is used by a lot of traders for predictions.
# New column "Moving_av" is added to the dataframe
df['Moving_av'] = df['Adj Close'].rolling(window=50, min_periods=0).mean()
# print(df.head())
df['Moving_av'].plot()
# Now, we will try to obtain two more features, Rate of increase in volume and rate of increase in Adjusted Close for our stock
i = 1
rate_increase_in_vol = [0]
rate_increase_in_adj_close = [0]
while i < len(df):
rate_increase_in_vol.append(df.iloc[i]['Volume'] - df.iloc[i - 1]['Volume'])
rate_increase_in_adj_close.append(df.iloc[i]['Adj Close'] - df.iloc[i - 1]['Adj Close'])
i += 1
df['Increase_in_vol'] = rate_increase_in_vol
df['Increase_in_adj_close'] = rate_increase_in_adj_close
df.to_csv("dataset_target_2.csv", index=False)
# print(df.head())
# df['Increase_in_vol'].plot()
# df['Increase_in_adj_close'].plot()
def mergedata():
featuredata()
# Now, our feature file for our target stock is ready.
# Now, we merge both these feature files to make the main feature set.
df1 = pd.read_csv('dataset_target_2.csv')
df3 = pd.read_csv('stock_details/AMZN.csv')
df2 = pd.read_csv('Dataset_temp.csv')
Dates = []
i = 0
while i < len(df3):
Dates.append(df3.iloc[i]['Date'])
i += 1
df_new = df1.join(df2, how='outer')
df_new.fillna(0.0)
df_new['Date'] = Dates
df_new.to_csv('Dataset_main.csv', index=False)
# print(df2.head())
# print(df_new.head())
mergedata()