Question 02: Classify the email using the binary classification method. Email Spam detection has two states:
a) Normal State – Not Spam.
b) Abnormal State – Spam.
Use K-Nearest Neighbors and Support Vector Machine for classification. Analyze their performance. Download hole Program / Project code, by clicking following link: Question ? Answer Programming Code: Following code write in: Email_Spam_Detection.py # ML Project Program 02
import numpy as np
import pandas as pd
# here read the csv file data and store it in data variable
data = pd.read_csv('./email_dataset/emails.csv')
# print dataset
data
# print dataset column name only
data.columns
# dataset information
data.info()
# check is null value in dataset
data.isnull()
# null value in columns
data.isnull().sum()
# 0(zero) or False means not null value
# 1(one) or True means having null value in dataset
# print few top row of dataset
data.head()
# it by default print 5 rows
# if print 10 rows then simple
data.head(10)
# same for bottom rows
data.tail()
# drop column if value is true
data.drop(columns = ['Email No.'], inplace = True)
data.tail()
# check any null value in dataset
data.isnull().any().value_counts()
# Separating Features and Labels
X = data.iloc[:, :data.shape[1] -1 ]
y = data.iloc[:, -1]
X.shape, y.shape
# Splitting into Train and Test Dataset
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.15)
# Machine Learning Models
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier #KNN
models = {"Logistic Regression": LogisticRegression(solver='lbfgs', max_iter = 2000),
"Linear SVM": LinearSVC(max_iter = 3000),
"Polynomial SVM": SVC(kernel = "poly", degree = 2),
"RBF SVM": SVC(kernel = "rbf"),
"Sigmoid SVM": SVC(kernel = "sigmoid"),
"Multi-layer Perceptron Classification": MLPClassifier(hidden_layer_sizes = [20, 20]),
"K-Nearest Neighbors": KNeighborsClassifier(n_neighbors = 20)
}
# Predict Accuracy Score for Each Model
from sklearn.metrics import accuracy_score
for model_name, model in models.items():
y_pred = model.fit(x_train, y_train).predict(x_test)
print(f"Accuracy for {model_name} model is : {accuracy_score(y_test, y_pred)}")
# Accuracy in percentage is:
# Logistic Regression model: 98%
# Linear SVM model: 96%
# Polynomial SVM model: 76%
# RBF SVM model: 82%
# Sigmoid SVM model: 61%
# Multi-layer Perceptron Classification model: 98%
# K-Nearest Neighbors: 89%
# Thanks For Watching
# Thanks For Reading.
Output:
a) Normal State – Not Spam.
b) Abnormal State – Spam.
Use K-Nearest Neighbors and Support Vector Machine for classification. Analyze their performance.
# ML Project Program 02 import numpy as np import pandas as pd # here read the csv file data and store it in data variable data = pd.read_csv('./email_dataset/emails.csv') # print dataset data # print dataset column name only data.columns # dataset information data.info() # check is null value in dataset data.isnull() # null value in columns data.isnull().sum() # 0(zero) or False means not null value # 1(one) or True means having null value in dataset # print few top row of dataset data.head() # it by default print 5 rows # if print 10 rows then simple data.head(10) # same for bottom rows data.tail() # drop column if value is true data.drop(columns = ['Email No.'], inplace = True) data.tail() # check any null value in dataset data.isnull().any().value_counts() # Separating Features and Labels X = data.iloc[:, :data.shape[1] -1 ] y = data.iloc[:, -1] X.shape, y.shape # Splitting into Train and Test Dataset from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.15) # Machine Learning Models from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC, LinearSVC from sklearn.neural_network import MLPClassifier from sklearn.neighbors import KNeighborsClassifier #KNN models = {"Logistic Regression": LogisticRegression(solver='lbfgs', max_iter = 2000), "Linear SVM": LinearSVC(max_iter = 3000), "Polynomial SVM": SVC(kernel = "poly", degree = 2), "RBF SVM": SVC(kernel = "rbf"), "Sigmoid SVM": SVC(kernel = "sigmoid"), "Multi-layer Perceptron Classification": MLPClassifier(hidden_layer_sizes = [20, 20]), "K-Nearest Neighbors": KNeighborsClassifier(n_neighbors = 20) } # Predict Accuracy Score for Each Model from sklearn.metrics import accuracy_score for model_name, model in models.items(): y_pred = model.fit(x_train, y_train).predict(x_test) print(f"Accuracy for {model_name} model is : {accuracy_score(y_test, y_pred)}") # Accuracy in percentage is: # Logistic Regression model: 98% # Linear SVM model: 96% # Polynomial SVM model: 76% # RBF SVM model: 82% # Sigmoid SVM model: 61% # Multi-layer Perceptron Classification model: 98% # K-Nearest Neighbors: 89% # Thanks For Watching # Thanks For Reading.
Output:
0 Comments