Commit 9e255863 authored by Jonathan Poalses's avatar Jonathan Poalses

Finished the code

parent c13764f4
......@@ -22,7 +22,7 @@
},
{
"cell_type": "code",
"execution_count": 263,
"execution_count": 309,
"outputs": [],
"source": [
"# Importing pyplot so we can visualize things\n",
......@@ -37,6 +37,9 @@
"# Importing sklearn metrics for the reports and confusion matrix\n",
"from sklearn import metrics\n",
"\n",
"# Importing statistics for model evaluation\n",
"import statistics\n",
"\n",
"# Importing the three classifying algorithms\n",
"from sklearn.naive_bayes import GaussianNB\n",
"from sklearn.svm import SVC\n",
......@@ -58,8 +61,8 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-05-24T19:57:31.138574Z",
"start_time": "2023-05-24T19:57:31.051741Z"
"end_time": "2023-05-25T14:28:54.613430Z",
"start_time": "2023-05-25T14:28:54.538858Z"
}
}
},
......@@ -75,13 +78,13 @@
},
{
"cell_type": "code",
"execution_count": 264,
"execution_count": 310,
"outputs": [
{
"data": {
"text/plain": "array([0, 1, 2, ..., 8, 9, 8])"
},
"execution_count": 264,
"execution_count": 310,
"metadata": {},
"output_type": "execute_result"
}
......@@ -96,8 +99,8 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-05-24T19:57:31.167262Z",
"start_time": "2023-05-24T19:57:31.058062Z"
"end_time": "2023-05-25T14:28:54.638244Z",
"start_time": "2023-05-25T14:28:54.545300Z"
}
}
},
......@@ -112,13 +115,13 @@
},
{
"cell_type": "code",
"execution_count": 265,
"execution_count": 311,
"outputs": [
{
"data": {
"text/plain": "array([[[ 0., 0., 5., ..., 1., 0., 0.],\n [ 0., 0., 13., ..., 15., 5., 0.],\n [ 0., 3., 15., ..., 11., 8., 0.],\n ...,\n [ 0., 4., 11., ..., 12., 7., 0.],\n [ 0., 2., 14., ..., 12., 0., 0.],\n [ 0., 0., 6., ..., 0., 0., 0.]],\n\n [[ 0., 0., 0., ..., 5., 0., 0.],\n [ 0., 0., 0., ..., 9., 0., 0.],\n [ 0., 0., 3., ..., 6., 0., 0.],\n ...,\n [ 0., 0., 1., ..., 6., 0., 0.],\n [ 0., 0., 1., ..., 6., 0., 0.],\n [ 0., 0., 0., ..., 10., 0., 0.]],\n\n [[ 0., 0., 0., ..., 12., 0., 0.],\n [ 0., 0., 3., ..., 14., 0., 0.],\n [ 0., 0., 8., ..., 16., 0., 0.],\n ...,\n [ 0., 9., 16., ..., 0., 0., 0.],\n [ 0., 3., 13., ..., 11., 5., 0.],\n [ 0., 0., 0., ..., 16., 9., 0.]],\n\n ...,\n\n [[ 0., 0., 1., ..., 1., 0., 0.],\n [ 0., 0., 13., ..., 2., 1., 0.],\n [ 0., 0., 16., ..., 16., 5., 0.],\n ...,\n [ 0., 0., 16., ..., 15., 0., 0.],\n [ 0., 0., 15., ..., 16., 0., 0.],\n [ 0., 0., 2., ..., 6., 0., 0.]],\n\n [[ 0., 0., 2., ..., 0., 0., 0.],\n [ 0., 0., 14., ..., 15., 1., 0.],\n [ 0., 4., 16., ..., 16., 7., 0.],\n ...,\n [ 0., 0., 0., ..., 16., 2., 0.],\n [ 0., 0., 4., ..., 16., 2., 0.],\n [ 0., 0., 5., ..., 12., 0., 0.]],\n\n [[ 0., 0., 10., ..., 1., 0., 0.],\n [ 0., 2., 16., ..., 1., 0., 0.],\n [ 0., 0., 15., ..., 15., 0., 0.],\n ...,\n [ 0., 4., 16., ..., 16., 6., 0.],\n [ 0., 8., 16., ..., 16., 8., 0.],\n [ 0., 1., 8., ..., 12., 1., 0.]]])"
},
"execution_count": 265,
"execution_count": 311,
"metadata": {},
"output_type": "execute_result"
}
......@@ -130,8 +133,8 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-05-24T19:57:31.167448Z",
"start_time": "2023-05-24T19:57:31.072164Z"
"end_time": "2023-05-25T14:28:54.638690Z",
"start_time": "2023-05-25T14:28:54.563741Z"
}
}
},
......@@ -156,7 +159,7 @@
},
{
"cell_type": "code",
"execution_count": 266,
"execution_count": 312,
"outputs": [
{
"data": {
......@@ -183,8 +186,8 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-05-24T19:57:31.255710Z",
"start_time": "2023-05-24T19:57:31.085709Z"
"end_time": "2023-05-25T14:28:54.815532Z",
"start_time": "2023-05-25T14:28:54.702672Z"
}
}
},
......@@ -199,13 +202,13 @@
},
{
"cell_type": "code",
"execution_count": 267,
"execution_count": 313,
"outputs": [
{
"data": {
"text/plain": "array([[ 0., 0., 5., ..., 0., 0., 0.],\n [ 0., 0., 0., ..., 10., 0., 0.],\n [ 0., 0., 0., ..., 16., 9., 0.],\n ...,\n [ 0., 0., 1., ..., 6., 0., 0.],\n [ 0., 0., 2., ..., 12., 0., 0.],\n [ 0., 0., 10., ..., 12., 1., 0.]])"
},
"execution_count": 267,
"execution_count": 313,
"metadata": {},
"output_type": "execute_result"
}
......@@ -218,8 +221,8 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-05-24T19:57:31.260813Z",
"start_time": "2023-05-24T19:57:31.258154Z"
"end_time": "2023-05-25T14:28:54.821812Z",
"start_time": "2023-05-25T14:28:54.816152Z"
}
}
},
......@@ -254,7 +257,7 @@
},
{
"cell_type": "code",
"execution_count": 268,
"execution_count": 314,
"outputs": [],
"source": [
"# We'll start by splitting the data into training and testing, going with a 75% train, 25% test split, a 50/50 split, and a 25% train 75% test split.\n",
......@@ -265,8 +268,8 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-05-24T19:57:31.266344Z",
"start_time": "2023-05-24T19:57:31.262708Z"
"end_time": "2023-05-25T14:28:54.827791Z",
"start_time": "2023-05-25T14:28:54.824178Z"
}
}
},
......@@ -281,7 +284,7 @@
},
{
"cell_type": "code",
"execution_count": 269,
"execution_count": 315,
"outputs": [],
"source": [
"# First the Gaussian Bayes\n",
......@@ -302,8 +305,8 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-05-24T19:57:31.331327Z",
"start_time": "2023-05-24T19:57:31.269108Z"
"end_time": "2023-05-25T14:28:54.895348Z",
"start_time": "2023-05-25T14:28:54.831677Z"
}
}
},
......@@ -318,7 +321,7 @@
},
{
"cell_type": "code",
"execution_count": 270,
"execution_count": 316,
"outputs": [
{
"name": "stdout",
......@@ -347,14 +350,14 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-05-24T19:57:31.340955Z",
"start_time": "2023-05-24T19:57:31.332522Z"
"end_time": "2023-05-25T14:28:54.906409Z",
"start_time": "2023-05-25T14:28:54.897176Z"
}
}
},
{
"cell_type": "code",
"execution_count": 271,
"execution_count": 317,
"outputs": [
{
"name": "stdout",
......@@ -383,14 +386,14 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-05-24T19:57:31.410490Z",
"start_time": "2023-05-24T19:57:31.343315Z"
"end_time": "2023-05-25T14:28:54.991884Z",
"start_time": "2023-05-25T14:28:54.906637Z"
}
}
},
{
"cell_type": "code",
"execution_count": 272,
"execution_count": 318,
"outputs": [
{
"name": "stdout",
......@@ -419,8 +422,8 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-05-24T19:57:31.621919Z",
"start_time": "2023-05-24T19:57:31.412912Z"
"end_time": "2023-05-25T14:28:55.205099Z",
"start_time": "2023-05-25T14:28:54.990989Z"
}
}
},
......@@ -436,14 +439,87 @@
},
{
"cell_type": "code",
"execution_count": 272,
"outputs": [],
"source": [],
"execution_count": 319,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best single model : K Nearest Neighbour 1st Model\n",
"Best overall algorithm : K Nearest Neighbour Algorithm\n"
]
},
{
"data": {
"text/plain": "<Figure size 640x480 with 2 Axes>",
"image/png": "\n"
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Put all the scores into a dictionary (Not sure why python calls it a dictionary, it's called a map everywhere else\n",
"dictionary = {gnb_score : \"Gaussian Naive Bayes 1st Model\",\n",
" gnb2_score : \"Gaussian Naive Bayes 2nd Model\",\n",
" gnb3_score : \"Gaussian Naive Bayes 3rd Model\",\n",
" knc_score : \"K Nearest Neighbour 1st Model\",\n",
" knc2_score : \"K Nearest Neighbour 2nd Model\",\n",
" knc3_score : \"K Nearest Neighbour 3rd Model\",\n",
" svc_score : \"Support Vector Classification 1st Model\",\n",
" svc2_score : \"Support Vector Classification 2nd Model\",\n",
" svc3_score : \"Support Vector Classification 3rd Model\"\n",
" }\n",
"# Prepare a dictionary to get the predicted values\n",
"prediction_dictionary = {gnb_score : gnb_predicted,\n",
" gnb2_score : gnb2_predicted,\n",
" gnb3_score : gnb3_predicted,\n",
" knc_score : knc_predicted,\n",
" knc2_score : knc2_predicted,\n",
" knc3_score : knc3_predicted,\n",
" svc_score : svc_predicted,\n",
" svc2_score : svc2_predicted,\n",
" svc3_score : svc3_predicted\n",
" }\n",
"# And finally a dictionary for the test values\n",
"test_dictionary = {gnb_score : y_test,\n",
" gnb2_score : y_test2,\n",
" gnb3_score : y_test3,\n",
" knc_score : y_test,\n",
" knc2_score : y_test2,\n",
" knc3_score : y_test3,\n",
" svc_score : y_test,\n",
" svc2_score : y_test2,\n",
" svc3_score : y_test3\n",
" }\n",
"# Get the average scores and put those in a dictionary\n",
"gnb_average = statistics.fmean((gnb_score, gnb2_score, gnb3_score))\n",
"knc_average = statistics.fmean((knc_score, knc2_score, knc3_score))\n",
"svc_average = statistics.fmean((svc_score, svc2_score, svc3_score))\n",
"average_dictionary = {gnb_average : \"Gaussian Naive Bayes Algorithm\",\n",
" knc_average : \"K Nearest Neighbour Algorithm\",\n",
" svc_average : \"Support Vector Classification Algorithm\"}\n",
"# Get the highest of those values\n",
"highest_score = max(dictionary.keys())\n",
"highest = dictionary.get(highest_score)\n",
"highest_average = average_dictionary.get(max(average_dictionary.keys()))\n",
"\n",
"# Print the best\n",
"print(\"Best single model :\", highest)\n",
"print(\"Best overall algorithm :\", highest_average)\n",
"\n",
"# Now let's generate and look at the confusion matrix from the best model\n",
"display = metrics.ConfusionMatrixDisplay.from_predictions(test_dictionary.get(highest_score), prediction_dictionary.get(highest_score))\n",
"# Add a title\n",
"display.figure_.suptitle(f\"{highest} Confusion Matrix\")\n",
"# Again nonsense code to prevent weird unwanted output\n",
"nothing2 = 2"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-05-24T19:57:31.624901Z",
"start_time": "2023-05-24T19:57:31.622175Z"
"end_time": "2023-05-25T14:28:55.437282Z",
"start_time": "2023-05-25T14:28:55.208964Z"
}
}
},
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment