Sunday, September 10, 2017

Deep learning: Prediction of stock price

Caution: This code doesn't work under CUI operation system, which means you can not run this program with virtual box + vagrant (at least without GUI environment).
Disclaimer: This code aims for sharing code examples publicly. Even if you get any loss using this code, we do not take any responsibility.

Introduction


This code needs stock price data. Create a folder for the data as "csv" in your directory.

In the csv folder, put these data files. These are price data of Japanese stock market.
Download: https://github.com/shunakanishi/japanese_stockprice

Go back to the first directory from CSV folder. Create a python script as "stockprice.py".

And the code is:
#-*- coding: utf-8 -*-
import numpy
import pandas
import matplotlib.pyplot as plt

from sklearn import preprocessing
from keras.models import Sequential
from keras.models import model_from_json
from keras.layers.core import Dense, Activation
from keras.layers.recurrent import LSTM
import keras.backend.tensorflow_backend as KTF
import os.path

class Prediction :

  def __init__(self):
    self.length_of_sequences = 10
    self.in_out_neurons = 1
    self.hidden_neurons = 300


  def load_data(self, data, n_prev=10):
    X, Y = [], []
    for i in range(len(data) - n_prev):
      X.append(data.iloc[i:(i+n_prev)].as_matrix())
      Y.append(data.iloc[i+n_prev].as_matrix())
    retX = numpy.array(X)
    retY = numpy.array(Y)
    return retX, retY


  def create_model(self, f_model, model_filename, weights_filename) :
    print(os.path.join(f_model,model_filename))
    if os.path.isfile(os.path.join(f_model,model_filename)):
      print('Saved parameters found. I will use this file...')
      model = Sequential()
      model.add(LSTM(self.hidden_neurons, \
              batch_input_shape=(None, self.length_of_sequences, self.in_out_neurons), \
              return_sequences=False))
      model.add(Dense(self.in_out_neurons))
      model.add(Activation("linear"))
      model.compile(loss="mape", optimizer="adam")
      model.load_weights(os.path.join(f_model,weights_filename))
    else:
      print('Saved parameters Not found. Creating new one...')
      model = Sequential()
      model.add(LSTM(self.hidden_neurons, \
              batch_input_shape=(None, self.length_of_sequences, self.in_out_neurons), \
              return_sequences=False))
      model.add(Dense(self.in_out_neurons))
      model.add(Activation("linear"))
      model.compile(loss="mape", optimizer="adam")
    return model


  def train(self, f_model, model_filename, weights_filename, X_train, y_train) :
    model = self.create_model(f_model, model_filename, weights_filename)
    # Learn
    model.fit(X_train, y_train, batch_size=10, epochs=15)
    return model


if __name__ == "__main__":

  f_log = './log'
  f_model = './model/stockprice'
  model_filename = 'stockprice_model.json'
  yaml_filename = 'stockprice_model.yaml'
  weights_filename = 'stockprice_model_weights.hdf5'

  prediction = Prediction()

  # Data
  data = None
  for year in range(2007, 2017):
    data_ = pandas.read_csv('csv/indices_I101_1d_' + str(year) +  '.csv')
    data = data_ if (data is None) else pandas.concat([data, data_])
  data.columns = ['date', 'open', 'high', 'low', 'close']
  data['date'] = pandas.to_datetime(data['date'], format='%Y-%m-%d')
  # Data of closing price
  data['close'] = preprocessing.scale(data['close'])
  data = data.sort_values(by='date')
  data = data.reset_index(drop=True)
  data = data.loc[:, ['date', 'close']]

  # 20% of the data is used as test data.
  split_pos = int(len(data) * 0.8)
  x_train, y_train = prediction.load_data(data[['close']].iloc[0:split_pos], prediction.length_of_sequences)
  x_test,  y_test  = prediction.load_data(data[['close']].iloc[split_pos:], prediction.length_of_sequences)

  old_session = KTF.get_session()

  model = prediction.train(f_model, model_filename, weights_filename, x_train, y_train)

  predicted = model.predict(x_test)
  json_string = model.to_json()
  open(os.path.join(f_model,model_filename), 'w').write(json_string)
  yaml_string = model.to_yaml()
  open(os.path.join(f_model,yaml_filename), 'w').write(yaml_string)
  print('save weights')
  model.save_weights(os.path.join(f_model,weights_filename))
  KTF.set_session(old_session)
  result = pandas.DataFrame(predicted)
  result.columns = ['predict']
  result['actual'] = y_test
  result.plot()
  plt.show()

To save the trained model and parameters, create "model" folder and "log" folder.


And in the "model" folder, create "stockprice" folder.

And run the script:
$ sudo python3 stockprice.py


The result

And the trained model and parameters are saved in "model" -> "stockprice" folder. 


Use data from Yahoo Finance


Now we will use data extracted from Yahoo Finance. Get some data from these links:

Data of Nikkei
https://finance.yahoo.com/quote/%5EN225/history?ltr=1

Data of NY Dow
https://finance.yahoo.com/quote/%5EDJI/history?ltr=1

Data of Nasdaq
https://finance.yahoo.com/quote/%5EIXIC/history?ltr=1

And save the data as "stock.csv" in the csv folder.

Now open the "stockprice.py" file and change the inside as follows:
#-*- coding: utf-8 -*-
import numpy
import pandas
import matplotlib.pyplot as plt

from sklearn import preprocessing
from keras.models import Sequential
from keras.models import model_from_json
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.noise import AlphaDropout
from keras.layers.recurrent import LSTM
import keras.backend.tensorflow_backend as KTF
import os.path

class Prediction :

  def __init__(self):
    self.length_of_sequences = 10
    self.in_out_neurons = 1
    self.hidden_neurons = 300


  def load_data(self, data, n_prev=10):
    X, Y = [], []
    for i in range(len(data) - n_prev):
      X.append(data.iloc[i:(i+n_prev)].as_matrix())
      Y.append(data.iloc[i+n_prev].as_matrix())
    retX = numpy.array(X)
    retY = numpy.array(Y)
    return retX, retY


  def create_model(self, f_model, model_filename, weights_filename) :
    print(os.path.join(f_model,model_filename))
    if os.path.isfile(os.path.join(f_model,model_filename)):
      print('Saved parameters found. I will use this file...')
      model = Sequential()
      json_string = open(os.path.join(f_model, model_filename)).read()
      model = model_from_json(json_string)
      model.compile(loss="mape", optimizer="adam")
      model.load_weights(os.path.join(f_model,weights_filename))
    else:
      print('Saved parameters Not found. Creating new one...')
      model = Sequential()
      model.add(LSTM(self.hidden_neurons, \
              batch_input_shape=(None, self.length_of_sequences, self.in_out_neurons), \
              return_sequences=False))
      model.add(Dense(self.in_out_neurons))
      model.add(Activation("linear"))
      model.compile(loss="mape", optimizer="adam")
    return model

  def train(self, f_model, model_filename, weights_filename, X_train, y_train) :
    model = self.create_model(f_model, model_filename, weights_filename)
    # Learn
    model.fit(X_train, y_train, batch_size=10, epochs=15)
    return model


if __name__ == "__main__":

  f_log = './log'
  f_model = './model/stockprice'
  model_filename = 'stockprice_model.json'
  yaml_filename = 'stockprice_model.yaml'
  weights_filename = 'stockprice_model_weights.hdf5'

  prediction = Prediction()

  # Data
  data = None
  data_ = pandas.read_csv('csv/stock.csv')
  data = data_ if (data is None) else pandas.concat([data, data_])

  data.columns = ['Date', 'Open', 'High', 'Low', 'Close']
  data['Date'] = pandas.to_datetime(data['Date'], format='%Y-%m-%d')
  # Data of closing price
  data['Close'] = preprocessing.scale(data['Close'])
  data = data.sort_values(by='Date')
  data = data.reset_index(drop=True)
  data = data.loc[:, ['Date', 'Close']]

  # 20% of the data is used as test data.
  split_pos = int(len(data) * 0.9)
  x_train, y_train = prediction.load_data(data[['Close']].iloc[0:split_pos], prediction.length_of_sequences)
  x_test,  y_test  = prediction.load_data(data[['Close']].iloc[split_pos:], prediction.length_of_sequences)

  model = prediction.train(f_model, model_filename, weights_filename, x_train, y_train)

  predicted = model.predict(x_test)
  json_string = model.to_json()
  open(os.path.join(f_model,model_filename), 'w').write(json_string)
  yaml_string = model.to_yaml()
  open(os.path.join(f_model,yaml_filename), 'w').write(yaml_string)
  print('save weights')
  model.save_weights(os.path.join(f_model,weights_filename))
  result = pandas.DataFrame(predicted)
  result.columns = ['predict']
  result['actual'] = y_test
  result.plot()
  plt.show()

And run the script:
$ sudo python3 stockprice.py

Now the machine learning will start based on the data extracted from Yahoo finance.
(Data of Nikkei has empty cells, so use this code to remove them)
like this:
(if you are using LibreOffice, add "Option VBASupport 1" at the top)
Sub RowsDelete()
   Dim i As Long
   Dim myRow As Long
   myRow = Worksheets("sheet1").Range("A65536").End(xlUp).Row
   For i = myRow To 1 Step -1
       If Cells(i, 2).Value = "null" Then
           Cells(i, 2).EntireRow.Delete
       End If
   Next i
End Sub