Notes on everything: Deep learning: Prediction of stock price

Caution: This code doesn't work under CUI operation system, which means you can not run this program with virtual box + vagrant (at least without GUI environment).
Disclaimer: This code aims for sharing code examples publicly. Even if you get any loss using this code, we do not take any responsibility.

Introduction

This code needs stock price data. Create a folder for the data as "csv" in your directory.

In the csv folder, put these data files. These are price data of Japanese stock market.
Download: https://github.com/shunakanishi/japanese_stockprice

Go back to the first directory from CSV folder. Create a python script as "stockprice.py".

And the code is:

#-*- coding: utf-8 -*-
import numpy
import pandas
import matplotlib.pyplot as plt

from sklearn import preprocessing
from keras.models import Sequential
from keras.models import model_from_json
from keras.layers.core import Dense, Activation
from keras.layers.recurrent import LSTM
import keras.backend.tensorflow_backend as KTF
import os.path

class Prediction :

def __init__(self):
self.length_of_sequences = 10
self.in_out_neurons = 1
self.hidden_neurons = 300

def load_data(self, data, n_prev=10):
X, Y = [], []
for i in range(len(data) - n_prev):
X.append(data.iloc[i:(i+n_prev)].as_matrix())
Y.append(data.iloc[i+n_prev].as_matrix())
retX = numpy.array(X)
retY = numpy.array(Y)
return retX, retY

def create_model(self, f_model, model_filename, weights_filename) :
print(os.path.join(f_model,model_filename))
if os.path.isfile(os.path.join(f_model,model_filename)):
print('Saved parameters found. I will use this file...')
model = Sequential()
model.add(LSTM(self.hidden_neurons, \
batch_input_shape=(None, self.length_of_sequences, self.in_out_neurons), \
return_sequences=False))
model.add(Dense(self.in_out_neurons))
model.add(Activation("linear"))
model.compile(loss="mape", optimizer="adam")
model.load_weights(os.path.join(f_model,weights_filename))
else:
print('Saved parameters Not found. Creating new one...')
model = Sequential()
model.add(LSTM(self.hidden_neurons, \
batch_input_shape=(None, self.length_of_sequences, self.in_out_neurons), \
return_sequences=False))
model.add(Dense(self.in_out_neurons))
model.add(Activation("linear"))
model.compile(loss="mape", optimizer="adam")
return model

def train(self, f_model, model_filename, weights_filename, X_train, y_train) :
model = self.create_model(f_model, model_filename, weights_filename)
# Learn
model.fit(X_train, y_train, batch_size=10, epochs=15)
return model

if __name__ == "__main__":

f_log = './log'
f_model = './model/stockprice'
model_filename = 'stockprice_model.json'
yaml_filename = 'stockprice_model.yaml'
weights_filename = 'stockprice_model_weights.hdf5'

prediction = Prediction()

# Data
data = None
for year in range(2007, 2017):
data_ = pandas.read_csv('csv/indices_I101_1d_' + str(year) + '.csv')
data = data_ if (data is None) else pandas.concat([data, data_])
data.columns = ['date', 'open', 'high', 'low', 'close']
data['date'] = pandas.to_datetime(data['date'], format='%Y-%m-%d')
# Data of closing price
data['close'] = preprocessing.scale(data['close'])
data = data.sort_values(by='date')
data = data.reset_index(drop=True)
data = data.loc[:, ['date', 'close']]

# 20% of the data is used as test data.
split_pos = int(len(data) * 0.8)
x_train, y_train = prediction.load_data(data[['close']].iloc[0:split_pos], prediction.length_of_sequences)
x_test, y_test = prediction.load_data(data[['close']].iloc[split_pos:], prediction.length_of_sequences)

old_session = KTF.get_session()

model = prediction.train(f_model, model_filename, weights_filename, x_train, y_train)

predicted = model.predict(x_test)
json_string = model.to_json()
open(os.path.join(f_model,model_filename), 'w').write(json_string)
yaml_string = model.to_yaml()
open(os.path.join(f_model,yaml_filename), 'w').write(yaml_string)
print('save weights')
model.save_weights(os.path.join(f_model,weights_filename))
KTF.set_session(old_session)
result = pandas.DataFrame(predicted)
result.columns = ['predict']
result['actual'] = y_test
result.plot()
plt.show()

To save the trained model and parameters, create "model" folder and "log" folder.

And in the "model" folder, create "stockprice" folder.

And run the script:

$ sudo python3 stockprice.py

The result

And the trained model and parameters are saved in "model" -> "stockprice" folder.

Use data from Yahoo Finance

Now we will use data extracted from Yahoo Finance. Get some data from these links:

Data of Nikkei
https://finance.yahoo.com/quote/%5EN225/history?ltr=1

Data of NY Dow
https://finance.yahoo.com/quote/%5EDJI/history?ltr=1

Data of Nasdaq
https://finance.yahoo.com/quote/%5EIXIC/history?ltr=1

And save the data as "stock.csv" in the csv folder.

Now open the "stockprice.py" file and change the inside as follows:

#-*- coding: utf-8 -*-
import numpy
import pandas
import matplotlib.pyplot as plt

from sklearn import preprocessing
from keras.models import Sequential
from keras.models import model_from_json
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.noise import AlphaDropout
from keras.layers.recurrent import LSTM
import keras.backend.tensorflow_backend as KTF
import os.path

class Prediction :

def __init__(self):
self.length_of_sequences = 10
self.in_out_neurons = 1
self.hidden_neurons = 300

def load_data(self, data, n_prev=10):
X, Y = [], []
for i in range(len(data) - n_prev):
X.append(data.iloc[i:(i+n_prev)].as_matrix())
Y.append(data.iloc[i+n_prev].as_matrix())
retX = numpy.array(X)
retY = numpy.array(Y)
return retX, retY

def create_model(self, f_model, model_filename, weights_filename) :
print(os.path.join(f_model,model_filename))
if os.path.isfile(os.path.join(f_model,model_filename)):
print('Saved parameters found. I will use this file...')
model = Sequential()
json_string = open(os.path.join(f_model, model_filename)).read()
model = model_from_json(json_string)
model.compile(loss="mape", optimizer="adam")
model.load_weights(os.path.join(f_model,weights_filename))
else:
print('Saved parameters Not found. Creating new one...')
model = Sequential()
model.add(LSTM(self.hidden_neurons, \
batch_input_shape=(None, self.length_of_sequences, self.in_out_neurons), \
return_sequences=False))
model.add(Dense(self.in_out_neurons))
model.add(Activation("linear"))
model.compile(loss="mape", optimizer="adam")
return model

def train(self, f_model, model_filename, weights_filename, X_train, y_train) :
model = self.create_model(f_model, model_filename, weights_filename)
# Learn
model.fit(X_train, y_train, batch_size=10, epochs=15)
return model

if __name__ == "__main__":

f_log = './log'
f_model = './model/stockprice'
model_filename = 'stockprice_model.json'
yaml_filename = 'stockprice_model.yaml'
weights_filename = 'stockprice_model_weights.hdf5'

prediction = Prediction()

# Data
data = None
data_ = pandas.read_csv('csv/stock.csv')
data = data_ if (data is None) else pandas.concat([data, data_])

data.columns = ['Date', 'Open', 'High', 'Low', 'Close']
data['Date'] = pandas.to_datetime(data['Date'], format='%Y-%m-%d')
# Data of closing price
data['Close'] = preprocessing.scale(data['Close'])
data = data.sort_values(by='Date')
data = data.reset_index(drop=True)
data = data.loc[:, ['Date', 'Close']]

# 20% of the data is used as test data.
split_pos = int(len(data) * 0.9)
x_train, y_train = prediction.load_data(data[['Close']].iloc[0:split_pos], prediction.length_of_sequences)
x_test, y_test = prediction.load_data(data[['Close']].iloc[split_pos:], prediction.length_of_sequences)

model = prediction.train(f_model, model_filename, weights_filename, x_train, y_train)

predicted = model.predict(x_test)
json_string = model.to_json()
open(os.path.join(f_model,model_filename), 'w').write(json_string)
yaml_string = model.to_yaml()
open(os.path.join(f_model,yaml_filename), 'w').write(yaml_string)
print('save weights')
model.save_weights(os.path.join(f_model,weights_filename))
result = pandas.DataFrame(predicted)
result.columns = ['predict']
result['actual'] = y_test
result.plot()
plt.show()

And run the script:

$ sudo python3 stockprice.py

Now the machine learning will start based on the data extracted from Yahoo finance.
(Data of Nikkei has empty cells, so use this code to remove them)
like this:
(if you are using LibreOffice, add "Option VBASupport 1" at the top)

Sub RowsDelete()
Dim i As Long
Dim myRow As Long
myRow = Worksheets("sheet1").Range("A65536").End(xlUp).Row
For i = myRow To 1 Step -1
If Cells(i, 2).Value = "null" Then
Cells(i, 2).EntireRow.Delete
End If
Next i
End Sub

Sunday, September 10, 2017

Deep learning: Prediction of stock price

Introduction

Use data from Yahoo Finance