1. Prediction of stockprice (Training a model)
2. Prediction of stockprice (Prediction by CSV)
3. Prediction of stockprice with trained weight with Google API
Save the code shown below as stockprice.py and use it this way:
$ python3.5 stockprice.py location/of/your_csv.csv
You can specify a file or a folder path. If it is a file, it trains the model based on the single csv file. If it is a folder, it puts together all the csv files in the folder, and train the model based on the csv files.You can download csv of stock price from Yahoo finance.
The code:
#-*- coding: utf-8 -*-
import numpy
import pandas
import matplotlib.pyplot as plt
import sys
from pathlib import Path
from keras.models import load_model
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.models import model_from_json
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.noise import AlphaDropout
from keras.layers.recurrent import LSTM
import keras.backend.tensorflow_backend as KTF
import os.path
class Prediction :
def __init__(self):
self.length_of_sequences = 10
self.in_out_neurons = 1
self.hidden_neurons = 600
def load_data(self, data, n_prev=10):
X, Y = [], []
for i in range(len(data) - n_prev):
X.append(data.iloc[i:(i+n_prev)].as_matrix())
Y.append(data.iloc[i+n_prev].as_matrix())
retX = numpy.array(X)
retY = numpy.array(Y)
return retX, retY
def create_model(self, f_model, model_filename) :
print(os.path.join(f_model,model_filename))
if os.path.isfile(os.path.join(f_model,model_filename)):
print('Saved parameters found. I will use this file...')
model = load_model(os.path.join(f_model,model_filename))
else:
print('Saved parameters Not found. Creating new one...')
model = Sequential()
model.add(LSTM(self.hidden_neurons, \
batch_input_shape=(None, self.length_of_sequences, self.in_out_neurons), \
return_sequences=False))
model.add(Dense(self.in_out_neurons))
model.add(Activation("linear"))
model.compile(loss="mape", optimizer="adam")
return model
def train(self, f_model, model_filename, X_train, y_train) :
model = self.create_model(f_model, model_filename)
# Learn
model.fit(X_train, y_train, batch_size=10, epochs=20)
return model
if __name__ == "__main__":
f_log = './'
f_model = './'
model_filename = 'stockprice_model.hdf5'
prediction = Prediction()
# Data
data = None
try:
csv_loc = str(sys.argv[1])
except NameError:
print("Please give a location of the csv file.")
if(csv_loc == ""):
print("Please give a location of the csv file.")
print(csv_loc)
if(os.path.isfile(csv_loc)):
data_ = pandas.read_csv(csv_loc)
data = data_ if (data is None) else pandas.concat([data, data_])
elif(os.path.isdir(csv_loc)):
pathlist = Path(csv_loc).glob('**/*.csv')
for path in pathlist:
# because path is object not string
path_in_str = str(path)
data_ = pandas.read_csv(path_in_str)
data = data_ if (data is None) else pandas.concat([data, data_])
else:
print("This is not a file nor a folder.")
data = pandas.read_csv(csv_loc)
data = data.drop('Volume',axis=1)
data = data.drop('Adj Close',axis=1)
data.columns = ['Date', 'Open', 'High', 'Low', 'Close']
data['Date'] = pandas.to_datetime(data['Date'], format='%Y-%m-%d')
# Data of closing price
scaler = StandardScaler()
scaler.fit(data[['Close']])
data['Close'] = scaler.transform(data[['Close']])
data = data.sort_values(by='Date')
data = data.reset_index(drop=True)
data = data.loc[:, ['Date', 'Close']]
# 10% of the data is used as test data.
split_pos = int(len(data) * 0.9)
x_train, y_train = prediction.load_data(data[['Close']].iloc[0:split_pos], prediction.length_of_sequences)
x_test, y_test = prediction.load_data(data[['Close']].iloc[split_pos:], prediction.length_of_sequences)
model = prediction.train(f_model, model_filename, x_train, y_train)
predicted = model.predict(x_test)
print('save weights')
model.save(os.path.join(f_model,model_filename))
result = pandas.DataFrame(scaler.inverse_transform(predicted))
result.columns = ['predict']
result['actual'] = scaler.inverse_transform(y_test)
result.plot()
plt.show()
import numpy
import pandas
import matplotlib.pyplot as plt
import sys
from pathlib import Path
from keras.models import load_model
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.models import model_from_json
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.noise import AlphaDropout
from keras.layers.recurrent import LSTM
import keras.backend.tensorflow_backend as KTF
import os.path
class Prediction :
def __init__(self):
self.length_of_sequences = 10
self.in_out_neurons = 1
self.hidden_neurons = 600
def load_data(self, data, n_prev=10):
X, Y = [], []
for i in range(len(data) - n_prev):
X.append(data.iloc[i:(i+n_prev)].as_matrix())
Y.append(data.iloc[i+n_prev].as_matrix())
retX = numpy.array(X)
retY = numpy.array(Y)
return retX, retY
def create_model(self, f_model, model_filename) :
print(os.path.join(f_model,model_filename))
if os.path.isfile(os.path.join(f_model,model_filename)):
print('Saved parameters found. I will use this file...')
model = load_model(os.path.join(f_model,model_filename))
else:
print('Saved parameters Not found. Creating new one...')
model = Sequential()
model.add(LSTM(self.hidden_neurons, \
batch_input_shape=(None, self.length_of_sequences, self.in_out_neurons), \
return_sequences=False))
model.add(Dense(self.in_out_neurons))
model.add(Activation("linear"))
model.compile(loss="mape", optimizer="adam")
return model
def train(self, f_model, model_filename, X_train, y_train) :
model = self.create_model(f_model, model_filename)
# Learn
model.fit(X_train, y_train, batch_size=10, epochs=20)
return model
if __name__ == "__main__":
f_log = './'
f_model = './'
model_filename = 'stockprice_model.hdf5'
prediction = Prediction()
# Data
data = None
try:
csv_loc = str(sys.argv[1])
except NameError:
print("Please give a location of the csv file.")
if(csv_loc == ""):
print("Please give a location of the csv file.")
print(csv_loc)
if(os.path.isfile(csv_loc)):
data_ = pandas.read_csv(csv_loc)
data = data_ if (data is None) else pandas.concat([data, data_])
elif(os.path.isdir(csv_loc)):
pathlist = Path(csv_loc).glob('**/*.csv')
for path in pathlist:
# because path is object not string
path_in_str = str(path)
data_ = pandas.read_csv(path_in_str)
data = data_ if (data is None) else pandas.concat([data, data_])
else:
print("This is not a file nor a folder.")
data = pandas.read_csv(csv_loc)
data = data.drop('Volume',axis=1)
data = data.drop('Adj Close',axis=1)
data.columns = ['Date', 'Open', 'High', 'Low', 'Close']
data['Date'] = pandas.to_datetime(data['Date'], format='%Y-%m-%d')
# Data of closing price
scaler = StandardScaler()
scaler.fit(data[['Close']])
data['Close'] = scaler.transform(data[['Close']])
data = data.sort_values(by='Date')
data = data.reset_index(drop=True)
data = data.loc[:, ['Date', 'Close']]
# 10% of the data is used as test data.
split_pos = int(len(data) * 0.9)
x_train, y_train = prediction.load_data(data[['Close']].iloc[0:split_pos], prediction.length_of_sequences)
x_test, y_test = prediction.load_data(data[['Close']].iloc[split_pos:], prediction.length_of_sequences)
model = prediction.train(f_model, model_filename, x_train, y_train)
predicted = model.predict(x_test)
print('save weights')
model.save(os.path.join(f_model,model_filename))
result = pandas.DataFrame(scaler.inverse_transform(predicted))
result.columns = ['predict']
result['actual'] = scaler.inverse_transform(y_test)
result.plot()
plt.show()