Disclaimer: This code aims for sharing code examples publicly. Even if you get any loss using this code, we do not take any responsibility.
Introduction
This code needs stock price data. Create a folder for the data as "csv" in your directory.
In the csv folder, put these data files. These are price data of Japanese stock market.
Download: https://github.com/shunakanishi/japanese_stockprice
Go back to the first directory from CSV folder. Create a python script as "stockprice.py".
And the code is:
#-*- coding: utf-8 -*-
import numpy
import pandas
import matplotlib.pyplot as plt
from sklearn import preprocessing
from keras.models import Sequential
from keras.models import model_from_json
from keras.layers.core import Dense, Activation
from keras.layers.recurrent import LSTM
import keras.backend.tensorflow_backend as KTF
import os.path
class Prediction :
def __init__(self):
self.length_of_sequences = 10
self.in_out_neurons = 1
self.hidden_neurons = 300
def load_data(self, data, n_prev=10):
X, Y = [], []
for i in range(len(data) - n_prev):
X.append(data.iloc[i:(i+n_prev)].as_matrix())
Y.append(data.iloc[i+n_prev].as_matrix())
retX = numpy.array(X)
retY = numpy.array(Y)
return retX, retY
def create_model(self, f_model, model_filename, weights_filename) :
print(os.path.join(f_model,model_filename))
if os.path.isfile(os.path.join(f_model,model_filename)):
print('Saved parameters found. I will use this file...')
model = Sequential()
model.add(LSTM(self.hidden_neurons, \
batch_input_shape=(None, self.length_of_sequences, self.in_out_neurons), \
return_sequences=False))
model.add(Dense(self.in_out_neurons))
model.add(Activation("linear"))
model.compile(loss="mape", optimizer="adam")
model.load_weights(os.path.join(f_model,weights_filename))
else:
print('Saved parameters Not found. Creating new one...')
model = Sequential()
model.add(LSTM(self.hidden_neurons, \
batch_input_shape=(None, self.length_of_sequences, self.in_out_neurons), \
return_sequences=False))
model.add(Dense(self.in_out_neurons))
model.add(Activation("linear"))
model.compile(loss="mape", optimizer="adam")
return model
def train(self, f_model, model_filename, weights_filename, X_train, y_train) :
model = self.create_model(f_model, model_filename, weights_filename)
# Learn
model.fit(X_train, y_train, batch_size=10, epochs=15)
return model
if __name__ == "__main__":
f_log = './log'
f_model = './model/stockprice'
model_filename = 'stockprice_model.json'
yaml_filename = 'stockprice_model.yaml'
weights_filename = 'stockprice_model_weights.hdf5'
prediction = Prediction()
# Data
data = None
for year in range(2007, 2017):
data_ = pandas.read_csv('csv/indices_I101_1d_' + str(year) + '.csv')
data = data_ if (data is None) else pandas.concat([data, data_])
data.columns = ['date', 'open', 'high', 'low', 'close']
data['date'] = pandas.to_datetime(data['date'], format='%Y-%m-%d')
# Data of closing price
data['close'] = preprocessing.scale(data['close'])
data = data.sort_values(by='date')
data = data.reset_index(drop=True)
data = data.loc[:, ['date', 'close']]
# 20% of the data is used as test data.
split_pos = int(len(data) * 0.8)
x_train, y_train = prediction.load_data(data[['close']].iloc[0:split_pos], prediction.length_of_sequences)
x_test, y_test = prediction.load_data(data[['close']].iloc[split_pos:], prediction.length_of_sequences)
old_session = KTF.get_session()
model = prediction.train(f_model, model_filename, weights_filename, x_train, y_train)
predicted = model.predict(x_test)
json_string = model.to_json()
open(os.path.join(f_model,model_filename), 'w').write(json_string)
yaml_string = model.to_yaml()
open(os.path.join(f_model,yaml_filename), 'w').write(yaml_string)
print('save weights')
model.save_weights(os.path.join(f_model,weights_filename))
KTF.set_session(old_session)
result = pandas.DataFrame(predicted)
result.columns = ['predict']
result['actual'] = y_test
result.plot()
plt.show()
import numpy
import pandas
import matplotlib.pyplot as plt
from sklearn import preprocessing
from keras.models import Sequential
from keras.models import model_from_json
from keras.layers.core import Dense, Activation
from keras.layers.recurrent import LSTM
import keras.backend.tensorflow_backend as KTF
import os.path
class Prediction :
def __init__(self):
self.length_of_sequences = 10
self.in_out_neurons = 1
self.hidden_neurons = 300
def load_data(self, data, n_prev=10):
X, Y = [], []
for i in range(len(data) - n_prev):
X.append(data.iloc[i:(i+n_prev)].as_matrix())
Y.append(data.iloc[i+n_prev].as_matrix())
retX = numpy.array(X)
retY = numpy.array(Y)
return retX, retY
def create_model(self, f_model, model_filename, weights_filename) :
print(os.path.join(f_model,model_filename))
if os.path.isfile(os.path.join(f_model,model_filename)):
print('Saved parameters found. I will use this file...')
model = Sequential()
model.add(LSTM(self.hidden_neurons, \
batch_input_shape=(None, self.length_of_sequences, self.in_out_neurons), \
return_sequences=False))
model.add(Dense(self.in_out_neurons))
model.add(Activation("linear"))
model.compile(loss="mape", optimizer="adam")
model.load_weights(os.path.join(f_model,weights_filename))
else:
print('Saved parameters Not found. Creating new one...')
model = Sequential()
model.add(LSTM(self.hidden_neurons, \
batch_input_shape=(None, self.length_of_sequences, self.in_out_neurons), \
return_sequences=False))
model.add(Dense(self.in_out_neurons))
model.add(Activation("linear"))
model.compile(loss="mape", optimizer="adam")
return model
def train(self, f_model, model_filename, weights_filename, X_train, y_train) :
model = self.create_model(f_model, model_filename, weights_filename)
# Learn
model.fit(X_train, y_train, batch_size=10, epochs=15)
return model
if __name__ == "__main__":
f_log = './log'
f_model = './model/stockprice'
model_filename = 'stockprice_model.json'
yaml_filename = 'stockprice_model.yaml'
weights_filename = 'stockprice_model_weights.hdf5'
prediction = Prediction()
# Data
data = None
for year in range(2007, 2017):
data_ = pandas.read_csv('csv/indices_I101_1d_' + str(year) + '.csv')
data = data_ if (data is None) else pandas.concat([data, data_])
data.columns = ['date', 'open', 'high', 'low', 'close']
data['date'] = pandas.to_datetime(data['date'], format='%Y-%m-%d')
# Data of closing price
data['close'] = preprocessing.scale(data['close'])
data = data.sort_values(by='date')
data = data.reset_index(drop=True)
data = data.loc[:, ['date', 'close']]
# 20% of the data is used as test data.
split_pos = int(len(data) * 0.8)
x_train, y_train = prediction.load_data(data[['close']].iloc[0:split_pos], prediction.length_of_sequences)
x_test, y_test = prediction.load_data(data[['close']].iloc[split_pos:], prediction.length_of_sequences)
old_session = KTF.get_session()
model = prediction.train(f_model, model_filename, weights_filename, x_train, y_train)
predicted = model.predict(x_test)
json_string = model.to_json()
open(os.path.join(f_model,model_filename), 'w').write(json_string)
yaml_string = model.to_yaml()
open(os.path.join(f_model,yaml_filename), 'w').write(yaml_string)
print('save weights')
model.save_weights(os.path.join(f_model,weights_filename))
KTF.set_session(old_session)
result = pandas.DataFrame(predicted)
result.columns = ['predict']
result['actual'] = y_test
result.plot()
plt.show()
To save the trained model and parameters, create "model" folder and "log" folder.
And in the "model" folder, create "stockprice" folder.
And run the script:
$ sudo python3 stockprice.py
The result
And the trained model and parameters are saved in "model" -> "stockprice" folder.
Use data from Yahoo Finance
Now we will use data extracted from Yahoo Finance. Get some data from these links:
Data of Nikkei
https://finance.yahoo.com/quote/%5EN225/history?ltr=1
Data of NY Dow
https://finance.yahoo.com/quote/%5EDJI/history?ltr=1
Data of Nasdaq
https://finance.yahoo.com/quote/%5EIXIC/history?ltr=1
And save the data as "stock.csv" in the csv folder.
Now open the "stockprice.py" file and change the inside as follows:
#-*- coding: utf-8 -*-
import numpy
import pandas
import matplotlib.pyplot as plt
from sklearn import preprocessing
from keras.models import Sequential
from keras.models import model_from_json
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.noise import AlphaDropout
from keras.layers.recurrent import LSTM
import keras.backend.tensorflow_backend as KTF
import os.path
class Prediction :
def __init__(self):
self.length_of_sequences = 10
self.in_out_neurons = 1
self.hidden_neurons = 300
def load_data(self, data, n_prev=10):
X, Y = [], []
for i in range(len(data) - n_prev):
X.append(data.iloc[i:(i+n_prev)].as_matrix())
Y.append(data.iloc[i+n_prev].as_matrix())
retX = numpy.array(X)
retY = numpy.array(Y)
return retX, retY
def create_model(self, f_model, model_filename, weights_filename) :
print(os.path.join(f_model,model_filename))
if os.path.isfile(os.path.join(f_model,model_filename)):
print('Saved parameters found. I will use this file...')
model = Sequential()
json_string = open(os.path.join(f_model, model_filename)).read()
model = model_from_json(json_string)
model.compile(loss="mape", optimizer="adam")
model.load_weights(os.path.join(f_model,weights_filename))
else:
print('Saved parameters Not found. Creating new one...')
model = Sequential()
model.add(LSTM(self.hidden_neurons, \
batch_input_shape=(None, self.length_of_sequences, self.in_out_neurons), \
return_sequences=False))
model.add(Dense(self.in_out_neurons))
model.add(Activation("linear"))
model.compile(loss="mape", optimizer="adam")
return model
def train(self, f_model, model_filename, weights_filename, X_train, y_train) :
model = self.create_model(f_model, model_filename, weights_filename)
# Learn
model.fit(X_train, y_train, batch_size=10, epochs=15)
return model
if __name__ == "__main__":
f_log = './log'
f_model = './model/stockprice'
model_filename = 'stockprice_model.json'
yaml_filename = 'stockprice_model.yaml'
weights_filename = 'stockprice_model_weights.hdf5'
prediction = Prediction()
# Data
data = None
data_ = pandas.read_csv('csv/stock.csv')
data = data_ if (data is None) else pandas.concat([data, data_])
data.columns = ['Date', 'Open', 'High', 'Low', 'Close']
data['Date'] = pandas.to_datetime(data['Date'], format='%Y-%m-%d')
# Data of closing price
data['Close'] = preprocessing.scale(data['Close'])
data = data.sort_values(by='Date')
data = data.reset_index(drop=True)
data = data.loc[:, ['Date', 'Close']]
# 20% of the data is used as test data.
split_pos = int(len(data) * 0.9)
x_train, y_train = prediction.load_data(data[['Close']].iloc[0:split_pos], prediction.length_of_sequences)
x_test, y_test = prediction.load_data(data[['Close']].iloc[split_pos:], prediction.length_of_sequences)
model = prediction.train(f_model, model_filename, weights_filename, x_train, y_train)
predicted = model.predict(x_test)
json_string = model.to_json()
open(os.path.join(f_model,model_filename), 'w').write(json_string)
yaml_string = model.to_yaml()
open(os.path.join(f_model,yaml_filename), 'w').write(yaml_string)
print('save weights')
model.save_weights(os.path.join(f_model,weights_filename))
result = pandas.DataFrame(predicted)
result.columns = ['predict']
result['actual'] = y_test
result.plot()
plt.show()
import numpy
import pandas
import matplotlib.pyplot as plt
from sklearn import preprocessing
from keras.models import Sequential
from keras.models import model_from_json
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.noise import AlphaDropout
from keras.layers.recurrent import LSTM
import keras.backend.tensorflow_backend as KTF
import os.path
class Prediction :
def __init__(self):
self.length_of_sequences = 10
self.in_out_neurons = 1
self.hidden_neurons = 300
def load_data(self, data, n_prev=10):
X, Y = [], []
for i in range(len(data) - n_prev):
X.append(data.iloc[i:(i+n_prev)].as_matrix())
Y.append(data.iloc[i+n_prev].as_matrix())
retX = numpy.array(X)
retY = numpy.array(Y)
return retX, retY
def create_model(self, f_model, model_filename, weights_filename) :
print(os.path.join(f_model,model_filename))
if os.path.isfile(os.path.join(f_model,model_filename)):
print('Saved parameters found. I will use this file...')
model = Sequential()
json_string = open(os.path.join(f_model, model_filename)).read()
model = model_from_json(json_string)
model.compile(loss="mape", optimizer="adam")
model.load_weights(os.path.join(f_model,weights_filename))
else:
print('Saved parameters Not found. Creating new one...')
model = Sequential()
model.add(LSTM(self.hidden_neurons, \
batch_input_shape=(None, self.length_of_sequences, self.in_out_neurons), \
return_sequences=False))
model.add(Dense(self.in_out_neurons))
model.add(Activation("linear"))
model.compile(loss="mape", optimizer="adam")
return model
def train(self, f_model, model_filename, weights_filename, X_train, y_train) :
model = self.create_model(f_model, model_filename, weights_filename)
# Learn
model.fit(X_train, y_train, batch_size=10, epochs=15)
return model
if __name__ == "__main__":
f_log = './log'
f_model = './model/stockprice'
model_filename = 'stockprice_model.json'
yaml_filename = 'stockprice_model.yaml'
weights_filename = 'stockprice_model_weights.hdf5'
prediction = Prediction()
# Data
data = None
data_ = pandas.read_csv('csv/stock.csv')
data = data_ if (data is None) else pandas.concat([data, data_])
data.columns = ['Date', 'Open', 'High', 'Low', 'Close']
data['Date'] = pandas.to_datetime(data['Date'], format='%Y-%m-%d')
# Data of closing price
data['Close'] = preprocessing.scale(data['Close'])
data = data.sort_values(by='Date')
data = data.reset_index(drop=True)
data = data.loc[:, ['Date', 'Close']]
# 20% of the data is used as test data.
split_pos = int(len(data) * 0.9)
x_train, y_train = prediction.load_data(data[['Close']].iloc[0:split_pos], prediction.length_of_sequences)
x_test, y_test = prediction.load_data(data[['Close']].iloc[split_pos:], prediction.length_of_sequences)
model = prediction.train(f_model, model_filename, weights_filename, x_train, y_train)
predicted = model.predict(x_test)
json_string = model.to_json()
open(os.path.join(f_model,model_filename), 'w').write(json_string)
yaml_string = model.to_yaml()
open(os.path.join(f_model,yaml_filename), 'w').write(yaml_string)
print('save weights')
model.save_weights(os.path.join(f_model,weights_filename))
result = pandas.DataFrame(predicted)
result.columns = ['predict']
result['actual'] = y_test
result.plot()
plt.show()
And run the script:
$ sudo python3 stockprice.py
Now the machine learning will start based on the data extracted from Yahoo finance.
(Data of Nikkei has empty cells, so use this code to remove them)
like this:
(if you are using LibreOffice, add "Option VBASupport 1" at the top)
Sub RowsDelete()
Dim i As Long
Dim myRow As Long
myRow = Worksheets("sheet1").Range("A65536").End(xlUp).Row
For i = myRow To 1 Step -1
If Cells(i, 2).Value = "null" Then
Cells(i, 2).EntireRow.Delete
End If
Next i
End Sub
Dim i As Long
Dim myRow As Long
myRow = Worksheets("sheet1").Range("A65536").End(xlUp).Row
For i = myRow To 1 Step -1
If Cells(i, 2).Value = "null" Then
Cells(i, 2).EntireRow.Delete
End If
Next i
End Sub