Question 7: Implement K-Means clustering/ hierarchical clustering on sales_data_sample.csv dataset. Determine the number of clusters using the elbow method.Mini Project - Use Share Market dataset to predict ups and downs in the market and predict future stock price returns based on Indian Market data from 2000 to 2020. Download hole Program / Project code, by clicking following link: Question ? Answer Programming Code: Following code write in: ML_P07.py # ML Project Program 07
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score
nRowRead = 1000
data = pd.read_csv('./Indian_stock_market_dataset/Indian_stock_market.csv', delimiter=',', nrows = nRowRead)
data.dataframeName = 'Indian_stock_market.csv'
nRow, nCol = data.shape
data.info()
data.describe()
data.columns
# check null values
data.isnull().sum()
data['Adj Close'].plot()
# set Target variable
output_var = pd.DataFrame(data['Adj Close'])
# selecting the Features
features = ["Open", "High", "Low", "Volume"]
# Normalizing Dataset
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
feature_transform = scaler.fit_transform(data[features])
feature_transform = pd.DataFrame(columns = features, data = feature_transform, index = data.index)
feature_transform.info()
from sklearn.model_selection import TimeSeriesSplit
time_split = TimeSeriesSplit(n_splits = 10)
for train_index, test_index in time_split.split(feature_transform):
x_train, x_test = feature_transform[:len(train_index)], feature_transform[len(train_index): (len(train_index) + len(test_index))]
y_train, y_test = output_var[:len(train_index)].values.ravel(), output_var[len(train_index): (len(train_index) + len(test_index))].values.ravel()
# Process the data for LSTM
trainX = np.array(x_train)
testX = np.array(x_test)
x_train = trainX.reshape(x_train.shape[0],1, x_train.shape[1])
x_test = testX.reshape(x_test.shape[0], 1, x_test.shape[1])
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.utils.vis_utils import plot_model
# Building LSTM Model
# for LSTM 1 input
lstm = Sequential()
lstm.add(LSTM(32, input_shape=(1, trainX.shape[1]), activation="relu", return_sequences = False))
lstm.add(Dense(1))
lstm.compile(loss = "mean_squared_error", optimizer = "adam")
plot_model(lstm, show_shapes = True, show_layer_names = True)
# Building LSTM Model
# for LSTM 2 input
lstm = Sequential()
lstm.add(LSTM(32, input_shape=(1, trainX.shape[1]), activation="relu", return_sequences = False))
lstm.add(Dense(1))
lstm.compile(loss = "mean_squared_error", optimizer = "adam")
plot_model(lstm, show_shapes = True, show_layer_names = True)
history = lstm.fit(x_train, y_train, epochs = 100, batch_size = 8, verbose = 1, shuffle = False)
# LSTM Prediction
y_pred = lstm.predict(x_test)
# Prediction vs True Adj Close Value - LSTM
plt.plot(y_test, label = "True Vale")
plt.plot(y_pred, label = "LSTM Value")
plt.title("Prediction by LSTM Model")
plt.xlabel("Time Scale")
plt.ylabel("Scaled USD")
plt.legend()
plt.show()
from sklearn import metrics
# R2 Score
metrics.r2_score(y_test, y_pred)
# R2 score is : 0.87
# Thanks For Reading.
Output:
# ML Project Program 07 import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.metrics import mean_squared_error, r2_score nRowRead = 1000 data = pd.read_csv('./Indian_stock_market_dataset/Indian_stock_market.csv', delimiter=',', nrows = nRowRead) data.dataframeName = 'Indian_stock_market.csv' nRow, nCol = data.shape data.info() data.describe() data.columns # check null values data.isnull().sum() data['Adj Close'].plot() # set Target variable output_var = pd.DataFrame(data['Adj Close']) # selecting the Features features = ["Open", "High", "Low", "Volume"] # Normalizing Dataset from sklearn.preprocessing import MinMaxScaler scaler = MinMaxScaler() feature_transform = scaler.fit_transform(data[features]) feature_transform = pd.DataFrame(columns = features, data = feature_transform, index = data.index) feature_transform.info() from sklearn.model_selection import TimeSeriesSplit time_split = TimeSeriesSplit(n_splits = 10) for train_index, test_index in time_split.split(feature_transform): x_train, x_test = feature_transform[:len(train_index)], feature_transform[len(train_index): (len(train_index) + len(test_index))] y_train, y_test = output_var[:len(train_index)].values.ravel(), output_var[len(train_index): (len(train_index) + len(test_index))].values.ravel() # Process the data for LSTM trainX = np.array(x_train) testX = np.array(x_test) x_train = trainX.reshape(x_train.shape[0],1, x_train.shape[1]) x_test = testX.reshape(x_test.shape[0], 1, x_test.shape[1]) from keras.models import Sequential from keras.layers import Dense, LSTM from keras.utils.vis_utils import plot_model # Building LSTM Model # for LSTM 1 input lstm = Sequential() lstm.add(LSTM(32, input_shape=(1, trainX.shape[1]), activation="relu", return_sequences = False)) lstm.add(Dense(1)) lstm.compile(loss = "mean_squared_error", optimizer = "adam") plot_model(lstm, show_shapes = True, show_layer_names = True) # Building LSTM Model # for LSTM 2 input lstm = Sequential() lstm.add(LSTM(32, input_shape=(1, trainX.shape[1]), activation="relu", return_sequences = False)) lstm.add(Dense(1)) lstm.compile(loss = "mean_squared_error", optimizer = "adam") plot_model(lstm, show_shapes = True, show_layer_names = True) history = lstm.fit(x_train, y_train, epochs = 100, batch_size = 8, verbose = 1, shuffle = False) # LSTM Prediction y_pred = lstm.predict(x_test) # Prediction vs True Adj Close Value - LSTM plt.plot(y_test, label = "True Vale") plt.plot(y_pred, label = "LSTM Value") plt.title("Prediction by LSTM Model") plt.xlabel("Time Scale") plt.ylabel("Scaled USD") plt.legend() plt.show() from sklearn import metrics # R2 Score metrics.r2_score(y_test, y_pred) # R2 score is : 0.87 # Thanks For Reading.
Output:
0 Comments