Total 721 different pokemons and approx 800 total pokemon stats containing Normal,Legendary and Mega Evolutions.
dataset = pd.read_csv("Pokemon(copy).csv")
print ('Datasets:' ,dataset.shape )
Datasets: (800, 9)
Lets drop the Name column.
dataset.drop("Name",axis=1,inplace=True)
sns.countplot(dataset['Legendary'],label="Count")
dataset.columns
Index([‘Total’, ‘HP’, ‘Attack’, ‘Defense’, ‘Sp. Atk’, ‘Sp. Def’, ‘Speed’,
‘Legendary’],
dtype=’object’)
dataset.describe()
Total | HP | Attack | Defense | Sp. Atk | Sp. Def | Speed | Legendary | |
---|---|---|---|---|---|---|---|---|
count | 800.000 | 800.0000 | 800.0000 | 800.0000 | 800.0000 | 800.0000 | 800.0000 | 800.000 |
mean | 435.1025 | 69.25875 | 79.00125 | 73.8425 | 72.8200 | 71.9025 | 68.2775 | 0.08125 |
std | 119.96304 | 25.534669 | 32.457366 | 31.183501 | 32.722294 | 27.828916 | 29.060474 | 0.27339 |
min | 180.00000 | 1.000000 | 5.000000 | 5.000000 | 10.000000 | 20.0000 | 5.000000 | 0.00000 |
25% | 330.000 | 50.0000 | 55.0000 | 50.0000 | 49.7500 | 50.0000 | 45.0000 | 0.00000 |
50% | 450.000 | 65.0000 | 75.0000 | 70.0000 | 65.0000 | 70.0000 | 65.0000 | 0.00000 |
75% | 515.000 | 80.0000 | 100.0000 | 90.0000 | 95.0000 | 90.0000 | 90.0000 | 0.00000 |
max | 780.000 | 255.0000 | 190.0000 | 230.0000 | 194.0000 | 230.0000 | 180.0000 | 1.000 |
stats=[‘Total’,’HP’, ‘Attack’, ‘Defense’, ‘Sp. Atk’, ‘Sp. Def’, ‘Speed’]
corr = dataset[stats].corr() # .corr is used for find corelation
plt.figure(figsize=(14,14))
sns.heatmap(corr, cbar = True, square = True, annot=True, fmt= ‘.2f’,annot_kws={‘size’: 15},
xticklabels= stats, yticklabels= stats,
cmap= ‘coolwarm’)
color_function = {0: “blue”, 1: “red”}
colors = dataset[“Legendary”].map(lambda x: color_function.get(x))
pd.scatter_matrix(dataset[stats], c=colors, alpha = 0.6, figsize = (12, 12));
Predicting Legendary Pokemon from Stats
Train = pd.read_csv("PokemonTrain.csv") Test = pd.read_csv("PokemonTest.csv") pokemons1 = Train[ :600 ] pokemons2 = Test[ 600: ] X_train = pokemons1.drop("Legendary", axis=1) Y_train = pokemons1["Legendary"] X_test = pokemons2 X_train.shape, Y_train.shape, X_test.shape logreg = LogisticRegression() logreg.fit(X_train, Y_train) Y_pred = logreg.predict(X_test) acc_log = round(logreg.score(X_train, Y_train) * 100, 2) acc_log svc = SVC() svc.fit(X_train, Y_train) Y_pred = svc.predict(X_test) acc_svc = round(svc.score(X_train, Y_train) * 100, 2) acc_svc knn = KNeighborsClassifier(n_neighbors = 3) knn.fit(X_train, Y_train) Y_pred = knn.predict(X_test) acc_knn = round(knn.score(X_train, Y_train) * 100, 2) acc_knn gaussian = GaussianNB() gaussian.fit(X_train, Y_train) Y_pred = gaussian.predict(X_test) acc_gaussian = round(gaussian.score(X_train, Y_train) * 100, 2) acc_gaussian perceptron = Perceptron() perceptron.fit(X_train, Y_train) Y_pred = perceptron.predict(X_test) acc_perceptron = round(perceptron.score(X_train, Y_train) * 100, 2) acc_perceptron linear_svc = LinearSVC() linear_svc.fit(X_train, Y_train) Y_pred = linear_svc.predict(X_test) acc_linear_svc = round(linear_svc.score(X_train, Y_train) * 100, 2) acc_linear_svc sgd = SGDClassifier() sgd.fit(X_train, Y_train) Y_pred = sgd.predict(X_test) acc_sgd = round(sgd.score(X_train, Y_train) * 100, 2) acc_sgd decision_tree = DecisionTreeClassifier() decision_tree.fit(X_train, Y_train) Y_pred = decision_tree.predict(X_test) acc_decision_tree = round(decision_tree.score(X_train, Y_train) * 100, 2) acc_decision_tree random_forest = RandomForestClassifier(n_estimators=100) random_forest.fit(X_train, Y_train) Y_pred = random_forest.predict(X_test) random_forest.score(X_train, Y_train) acc_random_forest = round(random_forest.score(X_train, Y_train) * 100, 2) acc_random_forest models = pd.DataFrame({'Model': ['Support Vector Machines', 'KNN', 'Logistic Regression','Random Forest', 'Naive Bayes', 'Perceptron','Stochastic Gradient Decent', 'Linear SVC','Decision Tree'], 'Score': [acc_svc,acc_knn,acc_log,acc_random_forest,acc_gaussian,acc_perceptron,acc_sgd, acc_linear_svc,acc_decision_tree]}) models.sort_values(by='Score', ascending=False)
Model | Score | |
---|---|---|
0 | Support Vector Machines | 99.50 |
3 | Random Forest | 99.50 |
8 | Decision Tree | 99.50 |
1 | KNN | 98.17 |
2 | Logistic Regression | 93.67 |
4 | Naive Bayes | 92.83 |
7 | Linear SVC | 92.83 |
6 | Stochastic Gradient Decent | 92.67 |
5 | Perceptron | 87.17 |
K-Fold cross validation :
def classification_model(model,data,prediction_input,output):
model.fit(data[prediction_input],data[output]) #Here we fit the model using training set
predictions = model.predict(data[prediction_input])
accuracy = metrics.accuracy_score(predictions,data[output])
print("Accuracy : %s" % "{0:.3%}".format(accuracy))
kf = KFold(data.shape[0], n_folds=5)
error = []
for train, test in kf:
train_X = (data[prediction_input].iloc[train,:])
train_y = data[output].iloc[train]
model.fit(train_X, train_y)
test_X=data[prediction_input].iloc[test,:]
test_y=data[output].iloc[test]
error.append(model.score(test_X,test_y))
print("Cross-Validation Score : %s" % "{0:.3%}".format(np.mean(error)))
model = DecisionTreeClassifier()
data=dataset
prediction_var = stats
outcome_var= “Legendary”
classification_model(model,data,prediction_var,outcome_var)
model=LogisticRegression()
classification_model(model,data,prediction_var,outcome_var)
Accuracy : 93.625% Cross-Validation Score : 96.250% Cross-Validation Score : 96.250% Cross-Validation Score : 95.000% Cross-Validation Score : 94.219% Cross-Validation Score : 92.875%
model = RandomForestClassifier(n_estimators=100)
classification_model(model,data,prediction_var,outcome_var)