Commit 9e255863 authored by Jonathan Poalses's avatar Jonathan Poalses

Finished the code

parent c13764f4
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 263, "execution_count": 309,
"outputs": [], "outputs": [],
"source": [ "source": [
"# Importing pyplot so we can visualize things\n", "# Importing pyplot so we can visualize things\n",
...@@ -37,6 +37,9 @@ ...@@ -37,6 +37,9 @@
"# Importing sklearn metrics for the reports and confusion matrix\n", "# Importing sklearn metrics for the reports and confusion matrix\n",
"from sklearn import metrics\n", "from sklearn import metrics\n",
"\n", "\n",
"# Importing statistics for model evaluation\n",
"import statistics\n",
"\n",
"# Importing the three classifying algorithms\n", "# Importing the three classifying algorithms\n",
"from sklearn.naive_bayes import GaussianNB\n", "from sklearn.naive_bayes import GaussianNB\n",
"from sklearn.svm import SVC\n", "from sklearn.svm import SVC\n",
...@@ -58,8 +61,8 @@ ...@@ -58,8 +61,8 @@
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-05-24T19:57:31.138574Z", "end_time": "2023-05-25T14:28:54.613430Z",
"start_time": "2023-05-24T19:57:31.051741Z" "start_time": "2023-05-25T14:28:54.538858Z"
} }
} }
}, },
...@@ -75,13 +78,13 @@ ...@@ -75,13 +78,13 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 264, "execution_count": 310,
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": "array([0, 1, 2, ..., 8, 9, 8])" "text/plain": "array([0, 1, 2, ..., 8, 9, 8])"
}, },
"execution_count": 264, "execution_count": 310,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
...@@ -96,8 +99,8 @@ ...@@ -96,8 +99,8 @@
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-05-24T19:57:31.167262Z", "end_time": "2023-05-25T14:28:54.638244Z",
"start_time": "2023-05-24T19:57:31.058062Z" "start_time": "2023-05-25T14:28:54.545300Z"
} }
} }
}, },
...@@ -112,13 +115,13 @@ ...@@ -112,13 +115,13 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 265, "execution_count": 311,
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": "array([[[ 0., 0., 5., ..., 1., 0., 0.],\n [ 0., 0., 13., ..., 15., 5., 0.],\n [ 0., 3., 15., ..., 11., 8., 0.],\n ...,\n [ 0., 4., 11., ..., 12., 7., 0.],\n [ 0., 2., 14., ..., 12., 0., 0.],\n [ 0., 0., 6., ..., 0., 0., 0.]],\n\n [[ 0., 0., 0., ..., 5., 0., 0.],\n [ 0., 0., 0., ..., 9., 0., 0.],\n [ 0., 0., 3., ..., 6., 0., 0.],\n ...,\n [ 0., 0., 1., ..., 6., 0., 0.],\n [ 0., 0., 1., ..., 6., 0., 0.],\n [ 0., 0., 0., ..., 10., 0., 0.]],\n\n [[ 0., 0., 0., ..., 12., 0., 0.],\n [ 0., 0., 3., ..., 14., 0., 0.],\n [ 0., 0., 8., ..., 16., 0., 0.],\n ...,\n [ 0., 9., 16., ..., 0., 0., 0.],\n [ 0., 3., 13., ..., 11., 5., 0.],\n [ 0., 0., 0., ..., 16., 9., 0.]],\n\n ...,\n\n [[ 0., 0., 1., ..., 1., 0., 0.],\n [ 0., 0., 13., ..., 2., 1., 0.],\n [ 0., 0., 16., ..., 16., 5., 0.],\n ...,\n [ 0., 0., 16., ..., 15., 0., 0.],\n [ 0., 0., 15., ..., 16., 0., 0.],\n [ 0., 0., 2., ..., 6., 0., 0.]],\n\n [[ 0., 0., 2., ..., 0., 0., 0.],\n [ 0., 0., 14., ..., 15., 1., 0.],\n [ 0., 4., 16., ..., 16., 7., 0.],\n ...,\n [ 0., 0., 0., ..., 16., 2., 0.],\n [ 0., 0., 4., ..., 16., 2., 0.],\n [ 0., 0., 5., ..., 12., 0., 0.]],\n\n [[ 0., 0., 10., ..., 1., 0., 0.],\n [ 0., 2., 16., ..., 1., 0., 0.],\n [ 0., 0., 15., ..., 15., 0., 0.],\n ...,\n [ 0., 4., 16., ..., 16., 6., 0.],\n [ 0., 8., 16., ..., 16., 8., 0.],\n [ 0., 1., 8., ..., 12., 1., 0.]]])" "text/plain": "array([[[ 0., 0., 5., ..., 1., 0., 0.],\n [ 0., 0., 13., ..., 15., 5., 0.],\n [ 0., 3., 15., ..., 11., 8., 0.],\n ...,\n [ 0., 4., 11., ..., 12., 7., 0.],\n [ 0., 2., 14., ..., 12., 0., 0.],\n [ 0., 0., 6., ..., 0., 0., 0.]],\n\n [[ 0., 0., 0., ..., 5., 0., 0.],\n [ 0., 0., 0., ..., 9., 0., 0.],\n [ 0., 0., 3., ..., 6., 0., 0.],\n ...,\n [ 0., 0., 1., ..., 6., 0., 0.],\n [ 0., 0., 1., ..., 6., 0., 0.],\n [ 0., 0., 0., ..., 10., 0., 0.]],\n\n [[ 0., 0., 0., ..., 12., 0., 0.],\n [ 0., 0., 3., ..., 14., 0., 0.],\n [ 0., 0., 8., ..., 16., 0., 0.],\n ...,\n [ 0., 9., 16., ..., 0., 0., 0.],\n [ 0., 3., 13., ..., 11., 5., 0.],\n [ 0., 0., 0., ..., 16., 9., 0.]],\n\n ...,\n\n [[ 0., 0., 1., ..., 1., 0., 0.],\n [ 0., 0., 13., ..., 2., 1., 0.],\n [ 0., 0., 16., ..., 16., 5., 0.],\n ...,\n [ 0., 0., 16., ..., 15., 0., 0.],\n [ 0., 0., 15., ..., 16., 0., 0.],\n [ 0., 0., 2., ..., 6., 0., 0.]],\n\n [[ 0., 0., 2., ..., 0., 0., 0.],\n [ 0., 0., 14., ..., 15., 1., 0.],\n [ 0., 4., 16., ..., 16., 7., 0.],\n ...,\n [ 0., 0., 0., ..., 16., 2., 0.],\n [ 0., 0., 4., ..., 16., 2., 0.],\n [ 0., 0., 5., ..., 12., 0., 0.]],\n\n [[ 0., 0., 10., ..., 1., 0., 0.],\n [ 0., 2., 16., ..., 1., 0., 0.],\n [ 0., 0., 15., ..., 15., 0., 0.],\n ...,\n [ 0., 4., 16., ..., 16., 6., 0.],\n [ 0., 8., 16., ..., 16., 8., 0.],\n [ 0., 1., 8., ..., 12., 1., 0.]]])"
}, },
"execution_count": 265, "execution_count": 311,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
...@@ -130,8 +133,8 @@ ...@@ -130,8 +133,8 @@
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-05-24T19:57:31.167448Z", "end_time": "2023-05-25T14:28:54.638690Z",
"start_time": "2023-05-24T19:57:31.072164Z" "start_time": "2023-05-25T14:28:54.563741Z"
} }
} }
}, },
...@@ -156,7 +159,7 @@ ...@@ -156,7 +159,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 266, "execution_count": 312,
"outputs": [ "outputs": [
{ {
"data": { "data": {
...@@ -183,8 +186,8 @@ ...@@ -183,8 +186,8 @@
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-05-24T19:57:31.255710Z", "end_time": "2023-05-25T14:28:54.815532Z",
"start_time": "2023-05-24T19:57:31.085709Z" "start_time": "2023-05-25T14:28:54.702672Z"
} }
} }
}, },
...@@ -199,13 +202,13 @@ ...@@ -199,13 +202,13 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 267, "execution_count": 313,
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": "array([[ 0., 0., 5., ..., 0., 0., 0.],\n [ 0., 0., 0., ..., 10., 0., 0.],\n [ 0., 0., 0., ..., 16., 9., 0.],\n ...,\n [ 0., 0., 1., ..., 6., 0., 0.],\n [ 0., 0., 2., ..., 12., 0., 0.],\n [ 0., 0., 10., ..., 12., 1., 0.]])" "text/plain": "array([[ 0., 0., 5., ..., 0., 0., 0.],\n [ 0., 0., 0., ..., 10., 0., 0.],\n [ 0., 0., 0., ..., 16., 9., 0.],\n ...,\n [ 0., 0., 1., ..., 6., 0., 0.],\n [ 0., 0., 2., ..., 12., 0., 0.],\n [ 0., 0., 10., ..., 12., 1., 0.]])"
}, },
"execution_count": 267, "execution_count": 313,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
...@@ -218,8 +221,8 @@ ...@@ -218,8 +221,8 @@
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-05-24T19:57:31.260813Z", "end_time": "2023-05-25T14:28:54.821812Z",
"start_time": "2023-05-24T19:57:31.258154Z" "start_time": "2023-05-25T14:28:54.816152Z"
} }
} }
}, },
...@@ -254,7 +257,7 @@ ...@@ -254,7 +257,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 268, "execution_count": 314,
"outputs": [], "outputs": [],
"source": [ "source": [
"# We'll start by splitting the data into training and testing, going with a 75% train, 25% test split, a 50/50 split, and a 25% train 75% test split.\n", "# We'll start by splitting the data into training and testing, going with a 75% train, 25% test split, a 50/50 split, and a 25% train 75% test split.\n",
...@@ -265,8 +268,8 @@ ...@@ -265,8 +268,8 @@
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-05-24T19:57:31.266344Z", "end_time": "2023-05-25T14:28:54.827791Z",
"start_time": "2023-05-24T19:57:31.262708Z" "start_time": "2023-05-25T14:28:54.824178Z"
} }
} }
}, },
...@@ -281,7 +284,7 @@ ...@@ -281,7 +284,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 269, "execution_count": 315,
"outputs": [], "outputs": [],
"source": [ "source": [
"# First the Gaussian Bayes\n", "# First the Gaussian Bayes\n",
...@@ -302,8 +305,8 @@ ...@@ -302,8 +305,8 @@
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-05-24T19:57:31.331327Z", "end_time": "2023-05-25T14:28:54.895348Z",
"start_time": "2023-05-24T19:57:31.269108Z" "start_time": "2023-05-25T14:28:54.831677Z"
} }
} }
}, },
...@@ -318,7 +321,7 @@ ...@@ -318,7 +321,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 270, "execution_count": 316,
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
...@@ -347,14 +350,14 @@ ...@@ -347,14 +350,14 @@
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-05-24T19:57:31.340955Z", "end_time": "2023-05-25T14:28:54.906409Z",
"start_time": "2023-05-24T19:57:31.332522Z" "start_time": "2023-05-25T14:28:54.897176Z"
} }
} }
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 271, "execution_count": 317,
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
...@@ -383,14 +386,14 @@ ...@@ -383,14 +386,14 @@
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-05-24T19:57:31.410490Z", "end_time": "2023-05-25T14:28:54.991884Z",
"start_time": "2023-05-24T19:57:31.343315Z" "start_time": "2023-05-25T14:28:54.906637Z"
} }
} }
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 272, "execution_count": 318,
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
...@@ -419,8 +422,8 @@ ...@@ -419,8 +422,8 @@
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-05-24T19:57:31.621919Z", "end_time": "2023-05-25T14:28:55.205099Z",
"start_time": "2023-05-24T19:57:31.412912Z" "start_time": "2023-05-25T14:28:54.990989Z"
} }
} }
}, },
...@@ -436,14 +439,87 @@ ...@@ -436,14 +439,87 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 272, "execution_count": 319,
"outputs": [], "outputs": [
"source": [], {
"name": "stdout",
"output_type": "stream",
"text": [
"Best single model : K Nearest Neighbour 1st Model\n",
"Best overall algorithm : K Nearest Neighbour Algorithm\n"
]
},
{
"data": {
"text/plain": "<Figure size 640x480 with 2 Axes>",
"image/png": "\n"
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Put all the scores into a dictionary (Not sure why python calls it a dictionary, it's called a map everywhere else\n",
"dictionary = {gnb_score : \"Gaussian Naive Bayes 1st Model\",\n",
" gnb2_score : \"Gaussian Naive Bayes 2nd Model\",\n",
" gnb3_score : \"Gaussian Naive Bayes 3rd Model\",\n",
" knc_score : \"K Nearest Neighbour 1st Model\",\n",
" knc2_score : \"K Nearest Neighbour 2nd Model\",\n",
" knc3_score : \"K Nearest Neighbour 3rd Model\",\n",
" svc_score : \"Support Vector Classification 1st Model\",\n",
" svc2_score : \"Support Vector Classification 2nd Model\",\n",
" svc3_score : \"Support Vector Classification 3rd Model\"\n",
" }\n",
"# Prepare a dictionary to get the predicted values\n",
"prediction_dictionary = {gnb_score : gnb_predicted,\n",
" gnb2_score : gnb2_predicted,\n",
" gnb3_score : gnb3_predicted,\n",
" knc_score : knc_predicted,\n",
" knc2_score : knc2_predicted,\n",
" knc3_score : knc3_predicted,\n",
" svc_score : svc_predicted,\n",
" svc2_score : svc2_predicted,\n",
" svc3_score : svc3_predicted\n",
" }\n",
"# And finally a dictionary for the test values\n",
"test_dictionary = {gnb_score : y_test,\n",
" gnb2_score : y_test2,\n",
" gnb3_score : y_test3,\n",
" knc_score : y_test,\n",
" knc2_score : y_test2,\n",
" knc3_score : y_test3,\n",
" svc_score : y_test,\n",
" svc2_score : y_test2,\n",
" svc3_score : y_test3\n",
" }\n",
"# Get the average scores and put those in a dictionary\n",
"gnb_average = statistics.fmean((gnb_score, gnb2_score, gnb3_score))\n",
"knc_average = statistics.fmean((knc_score, knc2_score, knc3_score))\n",
"svc_average = statistics.fmean((svc_score, svc2_score, svc3_score))\n",
"average_dictionary = {gnb_average : \"Gaussian Naive Bayes Algorithm\",\n",
" knc_average : \"K Nearest Neighbour Algorithm\",\n",
" svc_average : \"Support Vector Classification Algorithm\"}\n",
"# Get the highest of those values\n",
"highest_score = max(dictionary.keys())\n",
"highest = dictionary.get(highest_score)\n",
"highest_average = average_dictionary.get(max(average_dictionary.keys()))\n",
"\n",
"# Print the best\n",
"print(\"Best single model :\", highest)\n",
"print(\"Best overall algorithm :\", highest_average)\n",
"\n",
"# Now let's generate and look at the confusion matrix from the best model\n",
"display = metrics.ConfusionMatrixDisplay.from_predictions(test_dictionary.get(highest_score), prediction_dictionary.get(highest_score))\n",
"# Add a title\n",
"display.figure_.suptitle(f\"{highest} Confusion Matrix\")\n",
"# Again nonsense code to prevent weird unwanted output\n",
"nothing2 = 2"
],
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
"ExecuteTime": { "ExecuteTime": {
"end_time": "2023-05-24T19:57:31.624901Z", "end_time": "2023-05-25T14:28:55.437282Z",
"start_time": "2023-05-24T19:57:31.622175Z" "start_time": "2023-05-25T14:28:55.208964Z"
} }
} }
}, },
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment