{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "
Заметка 4. Деревья решения и др.
\n", "
курса Введение в машинное обучение.
\n", "
Шокуров Антон В.
\n", "
shokurov.anton.v@yandex.ru
\n", "
http://машинноезрение.рф
\n", "
Версия 0.11
\n", "\n", "
Анотация
\n", "

\n", "Деревья решений. Ближайшего соседа. Используем библиотеку scikit-learn\n", "

\n", "\n", "

Это предварительная версия! Любые замечания приветствуются.

" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "from itertools import product\n", "\n", "import graphviz\n", "from sklearn import tree\n", "\n", "from sklearn import datasets\n", "from sklearn.tree import DecisionTreeClassifier\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "iris = datasets.load_iris()\n", "X = iris.data[:, [0, 2]]\n", "y = iris.target" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names'])" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iris.keys()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sepal length (cm)sepal width (cm)petal length (cm)petal width (cm)
05.13.51.40.2
14.93.01.40.2
24.73.21.30.2
34.63.11.50.2
45.03.61.40.2
55.43.91.70.4
64.63.41.40.3
75.03.41.50.2
84.42.91.40.2
94.93.11.50.1
105.43.71.50.2
114.83.41.60.2
124.83.01.40.1
134.33.01.10.1
145.84.01.20.2
155.74.41.50.4
165.43.91.30.4
175.13.51.40.3
185.73.81.70.3
195.13.81.50.3
205.43.41.70.2
215.13.71.50.4
224.63.61.00.2
235.13.31.70.5
244.83.41.90.2
255.03.01.60.2
265.03.41.60.4
275.23.51.50.2
285.23.41.40.2
294.73.21.60.2
...............
1206.93.25.72.3
1215.62.84.92.0
1227.72.86.72.0
1236.32.74.91.8
1246.73.35.72.1
1257.23.26.01.8
1266.22.84.81.8
1276.13.04.91.8
1286.42.85.62.1
1297.23.05.81.6
1307.42.86.11.9
1317.93.86.42.0
1326.42.85.62.2
1336.32.85.11.5
1346.12.65.61.4
1357.73.06.12.3
1366.33.45.62.4
1376.43.15.51.8
1386.03.04.81.8
1396.93.15.42.1
1406.73.15.62.4
1416.93.15.12.3
1425.82.75.11.9
1436.83.25.92.3
1446.73.35.72.5
1456.73.05.22.3
1466.32.55.01.9
1476.53.05.22.0
1486.23.45.42.3
1495.93.05.11.8
\n", "

150 rows × 4 columns

\n", "
" ], "text/plain": [ " sepal length (cm) sepal width (cm) petal length (cm) petal width (cm)\n", "0 5.1 3.5 1.4 0.2\n", "1 4.9 3.0 1.4 0.2\n", "2 4.7 3.2 1.3 0.2\n", "3 4.6 3.1 1.5 0.2\n", "4 5.0 3.6 1.4 0.2\n", "5 5.4 3.9 1.7 0.4\n", "6 4.6 3.4 1.4 0.3\n", "7 5.0 3.4 1.5 0.2\n", "8 4.4 2.9 1.4 0.2\n", "9 4.9 3.1 1.5 0.1\n", "10 5.4 3.7 1.5 0.2\n", "11 4.8 3.4 1.6 0.2\n", "12 4.8 3.0 1.4 0.1\n", "13 4.3 3.0 1.1 0.1\n", "14 5.8 4.0 1.2 0.2\n", "15 5.7 4.4 1.5 0.4\n", "16 5.4 3.9 1.3 0.4\n", "17 5.1 3.5 1.4 0.3\n", "18 5.7 3.8 1.7 0.3\n", "19 5.1 3.8 1.5 0.3\n", "20 5.4 3.4 1.7 0.2\n", "21 5.1 3.7 1.5 0.4\n", "22 4.6 3.6 1.0 0.2\n", "23 5.1 3.3 1.7 0.5\n", "24 4.8 3.4 1.9 0.2\n", "25 5.0 3.0 1.6 0.2\n", "26 5.0 3.4 1.6 0.4\n", "27 5.2 3.5 1.5 0.2\n", "28 5.2 3.4 1.4 0.2\n", "29 4.7 3.2 1.6 0.2\n", ".. ... ... ... ...\n", "120 6.9 3.2 5.7 2.3\n", "121 5.6 2.8 4.9 2.0\n", "122 7.7 2.8 6.7 2.0\n", "123 6.3 2.7 4.9 1.8\n", "124 6.7 3.3 5.7 2.1\n", "125 7.2 3.2 6.0 1.8\n", "126 6.2 2.8 4.8 1.8\n", "127 6.1 3.0 4.9 1.8\n", "128 6.4 2.8 5.6 2.1\n", "129 7.2 3.0 5.8 1.6\n", "130 7.4 2.8 6.1 1.9\n", "131 7.9 3.8 6.4 2.0\n", "132 6.4 2.8 5.6 2.2\n", "133 6.3 2.8 5.1 1.5\n", "134 6.1 2.6 5.6 1.4\n", "135 7.7 3.0 6.1 2.3\n", "136 6.3 3.4 5.6 2.4\n", "137 6.4 3.1 5.5 1.8\n", "138 6.0 3.0 4.8 1.8\n", "139 6.9 3.1 5.4 2.1\n", "140 6.7 3.1 5.6 2.4\n", "141 6.9 3.1 5.1 2.3\n", "142 5.8 2.7 5.1 1.9\n", "143 6.8 3.2 5.9 2.3\n", "144 6.7 3.3 5.7 2.5\n", "145 6.7 3.0 5.2 2.3\n", "146 6.3 2.5 5.0 1.9\n", "147 6.5 3.0 5.2 2.0\n", "148 6.2 3.4 5.4 2.3\n", "149 5.9 3.0 5.1 1.8\n", "\n", "[150 rows x 4 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame( iris.data )\n", "df.columns = iris.feature_names\n", "df" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "clf1 = DecisionTreeClassifier(max_depth=1)\n", "clf2 = DecisionTreeClassifier(max_depth=2)\n", "clf3 = DecisionTreeClassifier(max_depth=3)\n", "clf4 = DecisionTreeClassifier(max_depth=5)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=5,\n", " max_features=None, max_leaf_nodes=None,\n", " min_impurity_decrease=0.0, min_impurity_split=None,\n", " min_samples_leaf=1, min_samples_split=2,\n", " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", " splitter='best')" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf1.fit(X, y)\n", "clf2.fit(X, y)\n", "clf3.fit(X, y)\n", "clf4.fit(X, y)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\n", "y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\n", "xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),\n", " np.arange(y_min, y_max, 0.1))\n", "\n", "f, axarr = plt.subplots(2, 2, sharex='col', sharey='row', figsize=(10, 8))\n", "\n", "for idx, clf, tt in zip(product([0, 1], [0, 1]),\n", " [clf1, clf2, clf3, clf4],\n", " ['Decision Tree (depth=1)', 'Decision Tree (depth=2)',\n", " 'Decision Tree (depth=3)', 'Decision Tree (depth=5)']):\n", "\n", " Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n", " Z = Z.reshape(xx.shape)\n", "\n", " axarr[idx[0], idx[1]].contourf(xx, yy, Z, alpha=0.4)\n", " axarr[idx[0], idx[1]].scatter(X[:, 0], X[:, 1], c=y,\n", " s=20, edgecolor='k')\n", " axarr[idx[0], idx[1]].set_title(tt)\n", "\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "dot_data = tree.export_graphviz(clf1, out_file=None) \n", "graph = graphviz.Source(dot_data) \n", "#graph.render(\"iris\") " ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "Tree\n", "\n", "\n", "0\n", "\n", "petal length (cm) ≤ 2.45\n", "gini = 0.667\n", "samples = 150\n", "value = [50, 50, 50]\n", "class = setosa\n", "\n", "\n", "1\n", "\n", "gini = 0.0\n", "samples = 50\n", "value = [50, 0, 0]\n", "class = setosa\n", "\n", "\n", "0->1\n", "\n", "\n", "True\n", "\n", "\n", "2\n", "\n", "gini = 0.5\n", "samples = 100\n", "value = [0, 50, 50]\n", "class = versicolor\n", "\n", "\n", "0->2\n", "\n", "\n", "False\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dot_data = tree.export_graphviz(clf1, out_file=None, \n", " feature_names=[iris.feature_names[0],iris.feature_names[2]],\n", " class_names=iris.target_names, \n", " filled=True, rounded=True, \n", " special_characters=True)\n", "graph = graphviz.Source(dot_data)\n", "graph" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "Tree\n", "\n", "\n", "0\n", "\n", "petal length (cm) ≤ 2.45\n", "gini = 0.667\n", "samples = 150\n", "value = [50, 50, 50]\n", "class = setosa\n", "\n", "\n", "1\n", "\n", "gini = 0.0\n", "samples = 50\n", "value = [50, 0, 0]\n", "class = setosa\n", "\n", "\n", "0->1\n", "\n", "\n", "True\n", "\n", "\n", "2\n", "\n", "petal length (cm) ≤ 4.75\n", "gini = 0.5\n", "samples = 100\n", "value = [0, 50, 50]\n", "class = versicolor\n", "\n", "\n", "0->2\n", "\n", "\n", "False\n", "\n", "\n", "3\n", "\n", "gini = 0.043\n", "samples = 45\n", "value = [0, 44, 1]\n", "class = versicolor\n", "\n", "\n", "2->3\n", "\n", "\n", "\n", "\n", "4\n", "\n", "gini = 0.194\n", "samples = 55\n", "value = [0, 6, 49]\n", "class = virginica\n", "\n", "\n", "2->4\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dot_data = tree.export_graphviz(clf2, out_file=None, \n", " feature_names=[iris.feature_names[0],iris.feature_names[2]],\n", " class_names=iris.target_names, \n", " filled=True, rounded=True, \n", " special_characters=True)\n", "graph = graphviz.Source(dot_data)\n", "graph" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "Tree\n", "\n", "\n", "0\n", "\n", "petal length (cm) ≤ 2.45\n", "gini = 0.667\n", "samples = 150\n", "value = [50, 50, 50]\n", "class = setosa\n", "\n", "\n", "1\n", "\n", "gini = 0.0\n", "samples = 50\n", "value = [50, 0, 0]\n", "class = setosa\n", "\n", "\n", "0->1\n", "\n", "\n", "True\n", "\n", "\n", "2\n", "\n", "petal length (cm) ≤ 4.75\n", "gini = 0.5\n", "samples = 100\n", "value = [0, 50, 50]\n", "class = versicolor\n", "\n", "\n", "0->2\n", "\n", "\n", "False\n", "\n", "\n", "3\n", "\n", "sepal length (cm) ≤ 4.95\n", "gini = 0.043\n", "samples = 45\n", "value = [0, 44, 1]\n", "class = versicolor\n", "\n", "\n", "2->3\n", "\n", "\n", "\n", "\n", "6\n", "\n", "petal length (cm) ≤ 5.05\n", "gini = 0.194\n", "samples = 55\n", "value = [0, 6, 49]\n", "class = virginica\n", "\n", "\n", "2->6\n", "\n", "\n", "\n", "\n", "4\n", "\n", "gini = 0.5\n", "samples = 2\n", "value = [0, 1, 1]\n", "class = versicolor\n", "\n", "\n", "3->4\n", "\n", "\n", "\n", "\n", "5\n", "\n", "gini = 0.0\n", "samples = 43\n", "value = [0, 43, 0]\n", "class = versicolor\n", "\n", "\n", "3->5\n", "\n", "\n", "\n", "\n", "7\n", "\n", "gini = 0.473\n", "samples = 13\n", "value = [0, 5, 8]\n", "class = virginica\n", "\n", "\n", "6->7\n", "\n", "\n", "\n", "\n", "8\n", "\n", "gini = 0.046\n", "samples = 42\n", "value = [0, 1, 41]\n", "class = virginica\n", "\n", "\n", "6->8\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dot_data = tree.export_graphviz(clf3, out_file=None, \n", " feature_names=[iris.feature_names[0],iris.feature_names[2]],\n", " class_names=iris.target_names, \n", " filled=True, rounded=True, \n", " special_characters=True)\n", "graph = graphviz.Source(dot_data)\n", "graph" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "Tree\n", "\n", "\n", "0\n", "\n", "petal length (cm) ≤ 2.45\n", "gini = 0.667\n", "samples = 150\n", "value = [50, 50, 50]\n", "class = setosa\n", "\n", "\n", "1\n", "\n", "gini = 0.0\n", "samples = 50\n", "value = [50, 0, 0]\n", "class = setosa\n", "\n", "\n", "0->1\n", "\n", "\n", "True\n", "\n", "\n", "2\n", "\n", "petal length (cm) ≤ 4.75\n", "gini = 0.5\n", "samples = 100\n", "value = [0, 50, 50]\n", "class = versicolor\n", "\n", "\n", "0->2\n", "\n", "\n", "False\n", "\n", "\n", "3\n", "\n", "sepal length (cm) ≤ 4.95\n", "gini = 0.043\n", "samples = 45\n", "value = [0, 44, 1]\n", "class = versicolor\n", "\n", "\n", "2->3\n", "\n", "\n", "\n", "\n", "8\n", "\n", "petal length (cm) ≤ 5.05\n", "gini = 0.194\n", "samples = 55\n", "value = [0, 6, 49]\n", "class = virginica\n", "\n", "\n", "2->8\n", "\n", "\n", "\n", "\n", "4\n", "\n", "petal length (cm) ≤ 3.9\n", "gini = 0.5\n", "samples = 2\n", "value = [0, 1, 1]\n", "class = versicolor\n", "\n", "\n", "3->4\n", "\n", "\n", "\n", "\n", "7\n", "\n", "gini = 0.0\n", "samples = 43\n", "value = [0, 43, 0]\n", "class = versicolor\n", "\n", "\n", "3->7\n", "\n", "\n", "\n", "\n", "5\n", "\n", "gini = 0.0\n", "samples = 1\n", "value = [0, 1, 0]\n", "class = versicolor\n", "\n", "\n", "4->5\n", "\n", "\n", "\n", "\n", "6\n", "\n", "gini = 0.0\n", "samples = 1\n", "value = [0, 0, 1]\n", "class = virginica\n", "\n", "\n", "4->6\n", "\n", "\n", "\n", "\n", "9\n", "\n", "sepal length (cm) ≤ 6.5\n", "gini = 0.473\n", "samples = 13\n", "value = [0, 5, 8]\n", "class = virginica\n", "\n", "\n", "8->9\n", "\n", "\n", "\n", "\n", "14\n", "\n", "sepal length (cm) ≤ 6.05\n", "gini = 0.046\n", "samples = 42\n", "value = [0, 1, 41]\n", "class = virginica\n", "\n", "\n", "8->14\n", "\n", "\n", "\n", "\n", "10\n", "\n", "petal length (cm) ≤ 4.95\n", "gini = 0.32\n", "samples = 10\n", "value = [0, 2, 8]\n", "class = virginica\n", "\n", "\n", "9->10\n", "\n", "\n", "\n", "\n", "13\n", "\n", "gini = 0.0\n", "samples = 3\n", "value = [0, 3, 0]\n", "class = versicolor\n", "\n", "\n", "9->13\n", "\n", "\n", "\n", "\n", "11\n", "\n", "gini = 0.408\n", "samples = 7\n", "value = [0, 2, 5]\n", "class = virginica\n", "\n", "\n", "10->11\n", "\n", "\n", "\n", "\n", "12\n", "\n", "gini = 0.0\n", "samples = 3\n", "value = [0, 0, 3]\n", "class = virginica\n", "\n", "\n", "10->12\n", "\n", "\n", "\n", "\n", "15\n", "\n", "sepal length (cm) ≤ 5.95\n", "gini = 0.32\n", "samples = 5\n", "value = [0, 1, 4]\n", "class = virginica\n", "\n", "\n", "14->15\n", "\n", "\n", "\n", "\n", "18\n", "\n", "gini = 0.0\n", "samples = 37\n", "value = [0, 0, 37]\n", "class = virginica\n", "\n", "\n", "14->18\n", "\n", "\n", "\n", "\n", "16\n", "\n", "gini = 0.0\n", "samples = 4\n", "value = [0, 0, 4]\n", "class = virginica\n", "\n", "\n", "15->16\n", "\n", "\n", "\n", "\n", "17\n", "\n", "gini = 0.0\n", "samples = 1\n", "value = [0, 1, 0]\n", "class = versicolor\n", "\n", "\n", "15->17\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dot_data = tree.export_graphviz(clf4, out_file=None, \n", " feature_names=[iris.feature_names[0],iris.feature_names[2]],\n", " class_names=iris.target_names, \n", " filled=True, rounded=True, \n", " special_characters=True)\n", "graph = graphviz.Source(dot_data)\n", "graph" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=3,\n", " max_features=None, max_leaf_nodes=None,\n", " min_impurity_decrease=0.0, min_impurity_split=None,\n", " min_samples_leaf=1, min_samples_split=2,\n", " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", " splitter='best')" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf = DecisionTreeClassifier(max_depth=3)\n", "clf.fit(iris.data, iris.target)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "Tree\n", "\n", "\n", "0\n", "\n", "petal length (cm) ≤ 2.45\n", "gini = 0.667\n", "samples = 150\n", "value = [50, 50, 50]\n", "class = setosa\n", "\n", "\n", "1\n", "\n", "gini = 0.0\n", "samples = 50\n", "value = [50, 0, 0]\n", "class = setosa\n", "\n", "\n", "0->1\n", "\n", "\n", "True\n", "\n", "\n", "2\n", "\n", "petal width (cm) ≤ 1.75\n", "gini = 0.5\n", "samples = 100\n", "value = [0, 50, 50]\n", "class = versicolor\n", "\n", "\n", "0->2\n", "\n", "\n", "False\n", "\n", "\n", "3\n", "\n", "petal length (cm) ≤ 4.95\n", "gini = 0.168\n", "samples = 54\n", "value = [0, 49, 5]\n", "class = versicolor\n", "\n", "\n", "2->3\n", "\n", "\n", "\n", "\n", "6\n", "\n", "petal length (cm) ≤ 4.85\n", "gini = 0.043\n", "samples = 46\n", "value = [0, 1, 45]\n", "class = virginica\n", "\n", "\n", "2->6\n", "\n", "\n", "\n", "\n", "4\n", "\n", "gini = 0.041\n", "samples = 48\n", "value = [0, 47, 1]\n", "class = versicolor\n", "\n", "\n", "3->4\n", "\n", "\n", "\n", "\n", "5\n", "\n", "gini = 0.444\n", "samples = 6\n", "value = [0, 2, 4]\n", "class = virginica\n", "\n", "\n", "3->5\n", "\n", "\n", "\n", "\n", "7\n", "\n", "gini = 0.444\n", "samples = 3\n", "value = [0, 1, 2]\n", "class = virginica\n", "\n", "\n", "6->7\n", "\n", "\n", "\n", "\n", "8\n", "\n", "gini = 0.0\n", "samples = 43\n", "value = [0, 0, 43]\n", "class = virginica\n", "\n", "\n", "6->8\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dot_data = tree.export_graphviz(clf, out_file=None, \n", " feature_names=iris.feature_names,\n", " class_names=iris.target_names, \n", " filled=True, rounded=True, \n", " special_characters=True)\n", "graph = graphviz.Source(dot_data)\n", "graph" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,\n", " max_features=None, max_leaf_nodes=None,\n", " min_impurity_decrease=0.0, min_impurity_split=None,\n", " min_samples_leaf=1, min_samples_split=2,\n", " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", " splitter='best')" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf = DecisionTreeClassifier(max_depth=4)\n", "clf.fit(iris.data, iris.target)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "Tree\n", "\n", "\n", "0\n", "\n", "petal width (cm) ≤ 0.8\n", "gini = 0.667\n", "samples = 150\n", "value = [50, 50, 50]\n", "class = setosa\n", "\n", "\n", "1\n", "\n", "gini = 0.0\n", "samples = 50\n", "value = [50, 0, 0]\n", "class = setosa\n", "\n", "\n", "0->1\n", "\n", "\n", "True\n", "\n", "\n", "2\n", "\n", "petal width (cm) ≤ 1.75\n", "gini = 0.5\n", "samples = 100\n", "value = [0, 50, 50]\n", "class = versicolor\n", "\n", "\n", "0->2\n", "\n", "\n", "False\n", "\n", "\n", "3\n", "\n", "petal length (cm) ≤ 4.95\n", "gini = 0.168\n", "samples = 54\n", "value = [0, 49, 5]\n", "class = versicolor\n", "\n", "\n", "2->3\n", "\n", "\n", "\n", "\n", "10\n", "\n", "petal length (cm) ≤ 4.85\n", "gini = 0.043\n", "samples = 46\n", "value = [0, 1, 45]\n", "class = virginica\n", "\n", "\n", "2->10\n", "\n", "\n", "\n", "\n", "4\n", "\n", "petal width (cm) ≤ 1.65\n", "gini = 0.041\n", "samples = 48\n", "value = [0, 47, 1]\n", "class = versicolor\n", "\n", "\n", "3->4\n", "\n", "\n", "\n", "\n", "7\n", "\n", "petal width (cm) ≤ 1.55\n", "gini = 0.444\n", "samples = 6\n", "value = [0, 2, 4]\n", "class = virginica\n", "\n", "\n", "3->7\n", "\n", "\n", "\n", "\n", "5\n", "\n", "gini = 0.0\n", "samples = 47\n", "value = [0, 47, 0]\n", "class = versicolor\n", "\n", "\n", "4->5\n", "\n", "\n", "\n", "\n", "6\n", "\n", "gini = 0.0\n", "samples = 1\n", "value = [0, 0, 1]\n", "class = virginica\n", "\n", "\n", "4->6\n", "\n", "\n", "\n", "\n", "8\n", "\n", "gini = 0.0\n", "samples = 3\n", "value = [0, 0, 3]\n", "class = virginica\n", "\n", "\n", "7->8\n", "\n", "\n", "\n", "\n", "9\n", "\n", "gini = 0.444\n", "samples = 3\n", "value = [0, 2, 1]\n", "class = versicolor\n", "\n", "\n", "7->9\n", "\n", "\n", "\n", "\n", "11\n", "\n", "sepal length (cm) ≤ 5.95\n", "gini = 0.444\n", "samples = 3\n", "value = [0, 1, 2]\n", "class = virginica\n", "\n", "\n", "10->11\n", "\n", "\n", "\n", "\n", "14\n", "\n", "gini = 0.0\n", "samples = 43\n", "value = [0, 0, 43]\n", "class = virginica\n", "\n", "\n", "10->14\n", "\n", "\n", "\n", "\n", "12\n", "\n", "gini = 0.0\n", "samples = 1\n", "value = [0, 1, 0]\n", "class = versicolor\n", "\n", "\n", "11->12\n", "\n", "\n", "\n", "\n", "13\n", "\n", "gini = 0.0\n", "samples = 2\n", "value = [0, 0, 2]\n", "class = virginica\n", "\n", "\n", "11->13\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dot_data = tree.export_graphviz(clf, out_file=None, \n", " feature_names=iris.feature_names,\n", " class_names=iris.target_names, \n", " filled=True, rounded=True, \n", " special_characters=True)\n", "graph = graphviz.Source(dot_data)\n", "graph" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,\n", " max_features=None, max_leaf_nodes=None,\n", " min_impurity_decrease=0.15, min_impurity_split=None,\n", " min_samples_leaf=1, min_samples_split=2,\n", " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", " splitter='best')" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf = DecisionTreeClassifier(max_depth=4, min_impurity_decrease=0.15)\n", "clf.fit(iris.data, iris.target)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "Tree\n", "\n", "\n", "0\n", "\n", "petal width (cm) ≤ 0.8\n", "gini = 0.667\n", "samples = 150\n", "value = [50, 50, 50]\n", "class = setosa\n", "\n", "\n", "1\n", "\n", "gini = 0.0\n", "samples = 50\n", "value = [50, 0, 0]\n", "class = setosa\n", "\n", "\n", "0->1\n", "\n", "\n", "True\n", "\n", "\n", "2\n", "\n", "petal width (cm) ≤ 1.75\n", "gini = 0.5\n", "samples = 100\n", "value = [0, 50, 50]\n", "class = versicolor\n", "\n", "\n", "0->2\n", "\n", "\n", "False\n", "\n", "\n", "3\n", "\n", "gini = 0.168\n", "samples = 54\n", "value = [0, 49, 5]\n", "class = versicolor\n", "\n", "\n", "2->3\n", "\n", "\n", "\n", "\n", "4\n", "\n", "gini = 0.043\n", "samples = 46\n", "value = [0, 1, 45]\n", "class = virginica\n", "\n", "\n", "2->4\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dot_data = tree.export_graphviz(clf, out_file=None, \n", " feature_names=iris.feature_names,\n", " class_names=iris.target_names, \n", " filled=True, rounded=True, \n", " special_characters=True)\n", "graph = graphviz.Source(dot_data)\n", "graph" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Стрктура бинарного дерева имеет 5 узлов и следущий вид:\n", "узел=0 проверка: перейти к узлу 1 если sepal length (cm) <= 0.800000011920929 иначе к узлу 2.\n", "\tузел=1 конечный.\n", "\tузел=2 проверка: перейти к узлу 3 если petal length (cm) <= 1.75 иначе к узлу 4.\n", "\t\tузел=3 конечный.\n", "\t\tузел=4 конечный.\n", "\n" ] } ], "source": [ "n_nodes = clf.tree_.node_count\n", "children_left = clf.tree_.children_left\n", "children_right = clf.tree_.children_right\n", "feature = clf.tree_.feature\n", "feature_names = iris.feature_names#clf.tree_.feature\n", "threshold = clf.tree_.threshold\n", "\n", "\n", "# The tree structure can be traversed to compute various properties such\n", "# as the depth of each node and whether or not it is a leaf.\n", "node_depth = np.zeros(shape=n_nodes, dtype=np.int64)\n", "is_leaves = np.zeros(shape=n_nodes, dtype=bool)\n", "stack = [(0, -1)] # seed is the root node id and its parent depth\n", "while len(stack) > 0:\n", " node_id, parent_depth = stack.pop()\n", " node_depth[node_id] = parent_depth + 1\n", "\n", " # If we have a test node\n", " if (children_left[node_id] != children_right[node_id]):\n", " stack.append((children_left[node_id], parent_depth + 1))\n", " stack.append((children_right[node_id], parent_depth + 1))\n", " else:\n", " is_leaves[node_id] = True\n", "\n", "print(\"Стрктура бинарного дерева имеет %s узлов и следущий вид:\"\n", " % n_nodes)\n", "for i in range(n_nodes):\n", " if is_leaves[i]:\n", " print(\"%sузел=%s конечный.\" % (node_depth[i] * \"\\t\", i))\n", " else:\n", " print(\"%sузел=%s проверка: перейти к узлу %s если %s <= %s иначе к узлу %s.\"\n", " % (node_depth[i] * \"\\t\",\n", " i,\n", " children_left[i],\n", " feature_names[i],\n", " threshold[i],\n", " children_right[i],\n", " ))\n", "print()\n" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "sample = 33" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "<1x5 sparse matrix of type ''\n", "\twith 2 stored elements in Compressed Sparse Row format>" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "node_indicator = clf.decision_path( [iris.data[ sample ]] )\n", "node_indicator" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "leave_id = clf.apply( [iris.data[ sample ]] )\n", "leave_id[0]" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(array([0]), array(['setosa'], dtype=' -2.0)\n" ] } ], "source": [ "# Now, it's possible to get the tests that were used to predict a sample or\n", "# a group of samples. First, let's make it for the sample.\n", "\n", "sample_id = 0\n", "node_index = node_indicator.indices[node_indicator.indptr[0]:\n", " node_indicator.indptr[0 + 1]]\n", "\n", "print('Используемые правила для предсказания %s: ' % sample)\n", "for node_id in node_index:\n", " if leave_id[sample_id] != node_id:\n", " continue\n", "\n", " if (iris.data[sample, feature[node_id]] <= threshold[node_id]):\n", " threshold_sign = \"<=\"\n", " else:\n", " threshold_sign = \">\"\n", "\n", " print(\"узел дерева решений %s : ( фича %s, (= %s) %s %s)\"\n", " % (node_id,\n", " feature[node_id],\n", " iris.data[sample, feature[node_id]],\n", " threshold_sign,\n", " threshold[node_id]))" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.96" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf.score(iris.data, iris.target)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "X_train, X_test, y_train, y_test = train_test_split( iris.data, iris.target, test_size=0.15)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(0.8466666666666667, 0.15333333333333332)" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train.shape[0]/iris.data.shape[0], X_test.shape[0]/iris.data.shape[0]" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,\n", " max_features=None, max_leaf_nodes=None,\n", " min_impurity_decrease=0.15, min_impurity_split=None,\n", " min_samples_leaf=1, min_samples_split=2,\n", " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", " splitter='best')" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf = DecisionTreeClassifier(max_depth=4, min_impurity_decrease=0.15)\n", "clf.fit( X_train, y_train )" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.8695652173913043" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf.score( X_test, y_test )" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.8695652173913043" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf = DecisionTreeClassifier(max_depth=5, min_impurity_decrease=0.01 )\n", "clf.fit( X_train, y_train )\n", "clf.score( X_test, y_test )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Лассо регрессия" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(506, 13)\n" ] } ], "source": [ "from sklearn.datasets import load_boston\n", "boston = load_boston()\n", "print(boston.data.shape)" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "dict_keys(['data', 'target', 'feature_names', 'DESCR'])" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "boston.keys()" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789101112
00.0063218.02.310.00.5386.57565.24.09001.0296.015.3396.904.98
10.027310.07.070.00.4696.42178.94.96712.0242.017.8396.909.14
20.027290.07.070.00.4697.18561.14.96712.0242.017.8392.834.03
30.032370.02.180.00.4586.99845.86.06223.0222.018.7394.632.94
40.069050.02.180.00.4587.14754.26.06223.0222.018.7396.905.33
50.029850.02.180.00.4586.43058.76.06223.0222.018.7394.125.21
60.0882912.57.870.00.5246.01266.65.56055.0311.015.2395.6012.43
70.1445512.57.870.00.5246.17296.15.95055.0311.015.2396.9019.15
80.2112412.57.870.00.5245.631100.06.08215.0311.015.2386.6329.93
90.1700412.57.870.00.5246.00485.96.59215.0311.015.2386.7117.10
100.2248912.57.870.00.5246.37794.36.34675.0311.015.2392.5220.45
110.1174712.57.870.00.5246.00982.96.22675.0311.015.2396.9013.27
120.0937812.57.870.00.5245.88939.05.45095.0311.015.2390.5015.71
130.629760.08.140.00.5385.94961.84.70754.0307.021.0396.908.26
140.637960.08.140.00.5386.09684.54.46194.0307.021.0380.0210.26
150.627390.08.140.00.5385.83456.54.49864.0307.021.0395.628.47
161.053930.08.140.00.5385.93529.34.49864.0307.021.0386.856.58
170.784200.08.140.00.5385.99081.74.25794.0307.021.0386.7514.67
180.802710.08.140.00.5385.45636.63.79654.0307.021.0288.9911.69
190.725800.08.140.00.5385.72769.53.79654.0307.021.0390.9511.28
201.251790.08.140.00.5385.57098.13.79794.0307.021.0376.5721.02
210.852040.08.140.00.5385.96589.24.01234.0307.021.0392.5313.83
221.232470.08.140.00.5386.14291.73.97694.0307.021.0396.9018.72
230.988430.08.140.00.5385.813100.04.09524.0307.021.0394.5419.88
240.750260.08.140.00.5385.92494.14.39964.0307.021.0394.3316.30
250.840540.08.140.00.5385.59985.74.45464.0307.021.0303.4216.51
260.671910.08.140.00.5385.81390.34.68204.0307.021.0376.8814.81
270.955770.08.140.00.5386.04788.84.45344.0307.021.0306.3817.28
280.772990.08.140.00.5386.49594.44.45474.0307.021.0387.9412.80
291.002450.08.140.00.5386.67487.34.23904.0307.021.0380.2311.98
..........................................
4764.871410.018.100.00.6146.48493.62.305324.0666.020.2396.2118.68
47715.023400.018.100.00.6145.30497.32.100724.0666.020.2349.4824.91
47810.233000.018.100.00.6146.18596.72.170524.0666.020.2379.7018.03
47914.333700.018.100.00.6146.22988.01.951224.0666.020.2383.3213.11
4805.824010.018.100.00.5326.24264.73.424224.0666.020.2396.9010.74
4815.708180.018.100.00.5326.75074.93.331724.0666.020.2393.077.74
4825.731160.018.100.00.5327.06177.03.410624.0666.020.2395.287.01
4832.818380.018.100.00.5325.76240.34.098324.0666.020.2392.9210.42
4842.378570.018.100.00.5835.87141.93.724024.0666.020.2370.7313.34
4853.673670.018.100.00.5836.31251.93.991724.0666.020.2388.6210.58
4865.691750.018.100.00.5836.11479.83.545924.0666.020.2392.6814.98
4874.835670.018.100.00.5835.90553.23.152324.0666.020.2388.2211.45
4880.150860.027.740.00.6095.45492.71.82094.0711.020.1395.0918.06
4890.183370.027.740.00.6095.41498.31.75544.0711.020.1344.0523.97
4900.207460.027.740.00.6095.09398.01.82264.0711.020.1318.4329.68
4910.105740.027.740.00.6095.98398.81.86814.0711.020.1390.1118.07
4920.111320.027.740.00.6095.98383.52.10994.0711.020.1396.9013.35
4930.173310.09.690.00.5855.70754.02.38176.0391.019.2396.9012.01
4940.279570.09.690.00.5855.92642.62.38176.0391.019.2396.9013.59
4950.178990.09.690.00.5855.67028.82.79866.0391.019.2393.2917.60
4960.289600.09.690.00.5855.39072.92.79866.0391.019.2396.9021.14
4970.268380.09.690.00.5855.79470.62.89276.0391.019.2396.9014.10
4980.239120.09.690.00.5856.01965.32.40916.0391.019.2396.9012.92
4990.177830.09.690.00.5855.56973.52.39996.0391.019.2395.7715.10
5000.224380.09.690.00.5856.02779.72.49826.0391.019.2396.9014.33
5010.062630.011.930.00.5736.59369.12.47861.0273.021.0391.999.67
5020.045270.011.930.00.5736.12076.72.28751.0273.021.0396.909.08
5030.060760.011.930.00.5736.97691.02.16751.0273.021.0396.905.64
5040.109590.011.930.00.5736.79489.32.38891.0273.021.0393.456.48
5050.047410.011.930.00.5736.03080.82.50501.0273.021.0396.907.88
\n", "

506 rows × 13 columns

\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 7 8 9 \\\n", "0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 \n", "1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 \n", "2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 \n", "3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 \n", "4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 \n", "5 0.02985 0.0 2.18 0.0 0.458 6.430 58.7 6.0622 3.0 222.0 \n", "6 0.08829 12.5 7.87 0.0 0.524 6.012 66.6 5.5605 5.0 311.0 \n", "7 0.14455 12.5 7.87 0.0 0.524 6.172 96.1 5.9505 5.0 311.0 \n", "8 0.21124 12.5 7.87 0.0 0.524 5.631 100.0 6.0821 5.0 311.0 \n", "9 0.17004 12.5 7.87 0.0 0.524 6.004 85.9 6.5921 5.0 311.0 \n", "10 0.22489 12.5 7.87 0.0 0.524 6.377 94.3 6.3467 5.0 311.0 \n", "11 0.11747 12.5 7.87 0.0 0.524 6.009 82.9 6.2267 5.0 311.0 \n", "12 0.09378 12.5 7.87 0.0 0.524 5.889 39.0 5.4509 5.0 311.0 \n", "13 0.62976 0.0 8.14 0.0 0.538 5.949 61.8 4.7075 4.0 307.0 \n", "14 0.63796 0.0 8.14 0.0 0.538 6.096 84.5 4.4619 4.0 307.0 \n", "15 0.62739 0.0 8.14 0.0 0.538 5.834 56.5 4.4986 4.0 307.0 \n", "16 1.05393 0.0 8.14 0.0 0.538 5.935 29.3 4.4986 4.0 307.0 \n", "17 0.78420 0.0 8.14 0.0 0.538 5.990 81.7 4.2579 4.0 307.0 \n", "18 0.80271 0.0 8.14 0.0 0.538 5.456 36.6 3.7965 4.0 307.0 \n", "19 0.72580 0.0 8.14 0.0 0.538 5.727 69.5 3.7965 4.0 307.0 \n", "20 1.25179 0.0 8.14 0.0 0.538 5.570 98.1 3.7979 4.0 307.0 \n", "21 0.85204 0.0 8.14 0.0 0.538 5.965 89.2 4.0123 4.0 307.0 \n", "22 1.23247 0.0 8.14 0.0 0.538 6.142 91.7 3.9769 4.0 307.0 \n", "23 0.98843 0.0 8.14 0.0 0.538 5.813 100.0 4.0952 4.0 307.0 \n", "24 0.75026 0.0 8.14 0.0 0.538 5.924 94.1 4.3996 4.0 307.0 \n", "25 0.84054 0.0 8.14 0.0 0.538 5.599 85.7 4.4546 4.0 307.0 \n", "26 0.67191 0.0 8.14 0.0 0.538 5.813 90.3 4.6820 4.0 307.0 \n", "27 0.95577 0.0 8.14 0.0 0.538 6.047 88.8 4.4534 4.0 307.0 \n", "28 0.77299 0.0 8.14 0.0 0.538 6.495 94.4 4.4547 4.0 307.0 \n", "29 1.00245 0.0 8.14 0.0 0.538 6.674 87.3 4.2390 4.0 307.0 \n", ".. ... ... ... ... ... ... ... ... ... ... \n", "476 4.87141 0.0 18.10 0.0 0.614 6.484 93.6 2.3053 24.0 666.0 \n", "477 15.02340 0.0 18.10 0.0 0.614 5.304 97.3 2.1007 24.0 666.0 \n", "478 10.23300 0.0 18.10 0.0 0.614 6.185 96.7 2.1705 24.0 666.0 \n", "479 14.33370 0.0 18.10 0.0 0.614 6.229 88.0 1.9512 24.0 666.0 \n", "480 5.82401 0.0 18.10 0.0 0.532 6.242 64.7 3.4242 24.0 666.0 \n", "481 5.70818 0.0 18.10 0.0 0.532 6.750 74.9 3.3317 24.0 666.0 \n", "482 5.73116 0.0 18.10 0.0 0.532 7.061 77.0 3.4106 24.0 666.0 \n", "483 2.81838 0.0 18.10 0.0 0.532 5.762 40.3 4.0983 24.0 666.0 \n", "484 2.37857 0.0 18.10 0.0 0.583 5.871 41.9 3.7240 24.0 666.0 \n", "485 3.67367 0.0 18.10 0.0 0.583 6.312 51.9 3.9917 24.0 666.0 \n", "486 5.69175 0.0 18.10 0.0 0.583 6.114 79.8 3.5459 24.0 666.0 \n", "487 4.83567 0.0 18.10 0.0 0.583 5.905 53.2 3.1523 24.0 666.0 \n", "488 0.15086 0.0 27.74 0.0 0.609 5.454 92.7 1.8209 4.0 711.0 \n", "489 0.18337 0.0 27.74 0.0 0.609 5.414 98.3 1.7554 4.0 711.0 \n", "490 0.20746 0.0 27.74 0.0 0.609 5.093 98.0 1.8226 4.0 711.0 \n", "491 0.10574 0.0 27.74 0.0 0.609 5.983 98.8 1.8681 4.0 711.0 \n", "492 0.11132 0.0 27.74 0.0 0.609 5.983 83.5 2.1099 4.0 711.0 \n", "493 0.17331 0.0 9.69 0.0 0.585 5.707 54.0 2.3817 6.0 391.0 \n", "494 0.27957 0.0 9.69 0.0 0.585 5.926 42.6 2.3817 6.0 391.0 \n", "495 0.17899 0.0 9.69 0.0 0.585 5.670 28.8 2.7986 6.0 391.0 \n", "496 0.28960 0.0 9.69 0.0 0.585 5.390 72.9 2.7986 6.0 391.0 \n", "497 0.26838 0.0 9.69 0.0 0.585 5.794 70.6 2.8927 6.0 391.0 \n", "498 0.23912 0.0 9.69 0.0 0.585 6.019 65.3 2.4091 6.0 391.0 \n", "499 0.17783 0.0 9.69 0.0 0.585 5.569 73.5 2.3999 6.0 391.0 \n", "500 0.22438 0.0 9.69 0.0 0.585 6.027 79.7 2.4982 6.0 391.0 \n", "501 0.06263 0.0 11.93 0.0 0.573 6.593 69.1 2.4786 1.0 273.0 \n", "502 0.04527 0.0 11.93 0.0 0.573 6.120 76.7 2.2875 1.0 273.0 \n", "503 0.06076 0.0 11.93 0.0 0.573 6.976 91.0 2.1675 1.0 273.0 \n", "504 0.10959 0.0 11.93 0.0 0.573 6.794 89.3 2.3889 1.0 273.0 \n", "505 0.04741 0.0 11.93 0.0 0.573 6.030 80.8 2.5050 1.0 273.0 \n", "\n", " 10 11 12 \n", "0 15.3 396.90 4.98 \n", "1 17.8 396.90 9.14 \n", "2 17.8 392.83 4.03 \n", "3 18.7 394.63 2.94 \n", "4 18.7 396.90 5.33 \n", "5 18.7 394.12 5.21 \n", "6 15.2 395.60 12.43 \n", "7 15.2 396.90 19.15 \n", "8 15.2 386.63 29.93 \n", "9 15.2 386.71 17.10 \n", "10 15.2 392.52 20.45 \n", "11 15.2 396.90 13.27 \n", "12 15.2 390.50 15.71 \n", "13 21.0 396.90 8.26 \n", "14 21.0 380.02 10.26 \n", "15 21.0 395.62 8.47 \n", "16 21.0 386.85 6.58 \n", "17 21.0 386.75 14.67 \n", "18 21.0 288.99 11.69 \n", "19 21.0 390.95 11.28 \n", "20 21.0 376.57 21.02 \n", "21 21.0 392.53 13.83 \n", "22 21.0 396.90 18.72 \n", "23 21.0 394.54 19.88 \n", "24 21.0 394.33 16.30 \n", "25 21.0 303.42 16.51 \n", "26 21.0 376.88 14.81 \n", "27 21.0 306.38 17.28 \n", "28 21.0 387.94 12.80 \n", "29 21.0 380.23 11.98 \n", ".. ... ... ... \n", "476 20.2 396.21 18.68 \n", "477 20.2 349.48 24.91 \n", "478 20.2 379.70 18.03 \n", "479 20.2 383.32 13.11 \n", "480 20.2 396.90 10.74 \n", "481 20.2 393.07 7.74 \n", "482 20.2 395.28 7.01 \n", "483 20.2 392.92 10.42 \n", "484 20.2 370.73 13.34 \n", "485 20.2 388.62 10.58 \n", "486 20.2 392.68 14.98 \n", "487 20.2 388.22 11.45 \n", "488 20.1 395.09 18.06 \n", "489 20.1 344.05 23.97 \n", "490 20.1 318.43 29.68 \n", "491 20.1 390.11 18.07 \n", "492 20.1 396.90 13.35 \n", "493 19.2 396.90 12.01 \n", "494 19.2 396.90 13.59 \n", "495 19.2 393.29 17.60 \n", "496 19.2 396.90 21.14 \n", "497 19.2 396.90 14.10 \n", "498 19.2 396.90 12.92 \n", "499 19.2 395.77 15.10 \n", "500 19.2 396.90 14.33 \n", "501 21.0 391.99 9.67 \n", "502 21.0 396.90 9.08 \n", "503 21.0 396.90 5.64 \n", "504 21.0 393.45 6.48 \n", "505 21.0 396.90 7.88 \n", "\n", "[506 rows x 13 columns]" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "df = pd.DataFrame(boston['data'])\n", "df" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTAT
00.0063218.02.310.00.5386.57565.24.09001.0296.015.3396.904.98
10.027310.07.070.00.4696.42178.94.96712.0242.017.8396.909.14
20.027290.07.070.00.4697.18561.14.96712.0242.017.8392.834.03
30.032370.02.180.00.4586.99845.86.06223.0222.018.7394.632.94
40.069050.02.180.00.4587.14754.26.06223.0222.018.7396.905.33
50.029850.02.180.00.4586.43058.76.06223.0222.018.7394.125.21
60.0882912.57.870.00.5246.01266.65.56055.0311.015.2395.6012.43
70.1445512.57.870.00.5246.17296.15.95055.0311.015.2396.9019.15
80.2112412.57.870.00.5245.631100.06.08215.0311.015.2386.6329.93
90.1700412.57.870.00.5246.00485.96.59215.0311.015.2386.7117.10
100.2248912.57.870.00.5246.37794.36.34675.0311.015.2392.5220.45
110.1174712.57.870.00.5246.00982.96.22675.0311.015.2396.9013.27
120.0937812.57.870.00.5245.88939.05.45095.0311.015.2390.5015.71
130.629760.08.140.00.5385.94961.84.70754.0307.021.0396.908.26
140.637960.08.140.00.5386.09684.54.46194.0307.021.0380.0210.26
150.627390.08.140.00.5385.83456.54.49864.0307.021.0395.628.47
161.053930.08.140.00.5385.93529.34.49864.0307.021.0386.856.58
170.784200.08.140.00.5385.99081.74.25794.0307.021.0386.7514.67
180.802710.08.140.00.5385.45636.63.79654.0307.021.0288.9911.69
190.725800.08.140.00.5385.72769.53.79654.0307.021.0390.9511.28
201.251790.08.140.00.5385.57098.13.79794.0307.021.0376.5721.02
210.852040.08.140.00.5385.96589.24.01234.0307.021.0392.5313.83
221.232470.08.140.00.5386.14291.73.97694.0307.021.0396.9018.72
230.988430.08.140.00.5385.813100.04.09524.0307.021.0394.5419.88
240.750260.08.140.00.5385.92494.14.39964.0307.021.0394.3316.30
250.840540.08.140.00.5385.59985.74.45464.0307.021.0303.4216.51
260.671910.08.140.00.5385.81390.34.68204.0307.021.0376.8814.81
270.955770.08.140.00.5386.04788.84.45344.0307.021.0306.3817.28
280.772990.08.140.00.5386.49594.44.45474.0307.021.0387.9412.80
291.002450.08.140.00.5386.67487.34.23904.0307.021.0380.2311.98
..........................................
4764.871410.018.100.00.6146.48493.62.305324.0666.020.2396.2118.68
47715.023400.018.100.00.6145.30497.32.100724.0666.020.2349.4824.91
47810.233000.018.100.00.6146.18596.72.170524.0666.020.2379.7018.03
47914.333700.018.100.00.6146.22988.01.951224.0666.020.2383.3213.11
4805.824010.018.100.00.5326.24264.73.424224.0666.020.2396.9010.74
4815.708180.018.100.00.5326.75074.93.331724.0666.020.2393.077.74
4825.731160.018.100.00.5327.06177.03.410624.0666.020.2395.287.01
4832.818380.018.100.00.5325.76240.34.098324.0666.020.2392.9210.42
4842.378570.018.100.00.5835.87141.93.724024.0666.020.2370.7313.34
4853.673670.018.100.00.5836.31251.93.991724.0666.020.2388.6210.58
4865.691750.018.100.00.5836.11479.83.545924.0666.020.2392.6814.98
4874.835670.018.100.00.5835.90553.23.152324.0666.020.2388.2211.45
4880.150860.027.740.00.6095.45492.71.82094.0711.020.1395.0918.06
4890.183370.027.740.00.6095.41498.31.75544.0711.020.1344.0523.97
4900.207460.027.740.00.6095.09398.01.82264.0711.020.1318.4329.68
4910.105740.027.740.00.6095.98398.81.86814.0711.020.1390.1118.07
4920.111320.027.740.00.6095.98383.52.10994.0711.020.1396.9013.35
4930.173310.09.690.00.5855.70754.02.38176.0391.019.2396.9012.01
4940.279570.09.690.00.5855.92642.62.38176.0391.019.2396.9013.59
4950.178990.09.690.00.5855.67028.82.79866.0391.019.2393.2917.60
4960.289600.09.690.00.5855.39072.92.79866.0391.019.2396.9021.14
4970.268380.09.690.00.5855.79470.62.89276.0391.019.2396.9014.10
4980.239120.09.690.00.5856.01965.32.40916.0391.019.2396.9012.92
4990.177830.09.690.00.5855.56973.52.39996.0391.019.2395.7715.10
5000.224380.09.690.00.5856.02779.72.49826.0391.019.2396.9014.33
5010.062630.011.930.00.5736.59369.12.47861.0273.021.0391.999.67
5020.045270.011.930.00.5736.12076.72.28751.0273.021.0396.909.08
5030.060760.011.930.00.5736.97691.02.16751.0273.021.0396.905.64
5040.109590.011.930.00.5736.79489.32.38891.0273.021.0393.456.48
5050.047410.011.930.00.5736.03080.82.50501.0273.021.0396.907.88
\n", "

506 rows × 13 columns

\n", "
" ], "text/plain": [ " CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX \\\n", "0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 \n", "1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 \n", "2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 \n", "3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 \n", "4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 \n", "5 0.02985 0.0 2.18 0.0 0.458 6.430 58.7 6.0622 3.0 222.0 \n", "6 0.08829 12.5 7.87 0.0 0.524 6.012 66.6 5.5605 5.0 311.0 \n", "7 0.14455 12.5 7.87 0.0 0.524 6.172 96.1 5.9505 5.0 311.0 \n", "8 0.21124 12.5 7.87 0.0 0.524 5.631 100.0 6.0821 5.0 311.0 \n", "9 0.17004 12.5 7.87 0.0 0.524 6.004 85.9 6.5921 5.0 311.0 \n", "10 0.22489 12.5 7.87 0.0 0.524 6.377 94.3 6.3467 5.0 311.0 \n", "11 0.11747 12.5 7.87 0.0 0.524 6.009 82.9 6.2267 5.0 311.0 \n", "12 0.09378 12.5 7.87 0.0 0.524 5.889 39.0 5.4509 5.0 311.0 \n", "13 0.62976 0.0 8.14 0.0 0.538 5.949 61.8 4.7075 4.0 307.0 \n", "14 0.63796 0.0 8.14 0.0 0.538 6.096 84.5 4.4619 4.0 307.0 \n", "15 0.62739 0.0 8.14 0.0 0.538 5.834 56.5 4.4986 4.0 307.0 \n", "16 1.05393 0.0 8.14 0.0 0.538 5.935 29.3 4.4986 4.0 307.0 \n", "17 0.78420 0.0 8.14 0.0 0.538 5.990 81.7 4.2579 4.0 307.0 \n", "18 0.80271 0.0 8.14 0.0 0.538 5.456 36.6 3.7965 4.0 307.0 \n", "19 0.72580 0.0 8.14 0.0 0.538 5.727 69.5 3.7965 4.0 307.0 \n", "20 1.25179 0.0 8.14 0.0 0.538 5.570 98.1 3.7979 4.0 307.0 \n", "21 0.85204 0.0 8.14 0.0 0.538 5.965 89.2 4.0123 4.0 307.0 \n", "22 1.23247 0.0 8.14 0.0 0.538 6.142 91.7 3.9769 4.0 307.0 \n", "23 0.98843 0.0 8.14 0.0 0.538 5.813 100.0 4.0952 4.0 307.0 \n", "24 0.75026 0.0 8.14 0.0 0.538 5.924 94.1 4.3996 4.0 307.0 \n", "25 0.84054 0.0 8.14 0.0 0.538 5.599 85.7 4.4546 4.0 307.0 \n", "26 0.67191 0.0 8.14 0.0 0.538 5.813 90.3 4.6820 4.0 307.0 \n", "27 0.95577 0.0 8.14 0.0 0.538 6.047 88.8 4.4534 4.0 307.0 \n", "28 0.77299 0.0 8.14 0.0 0.538 6.495 94.4 4.4547 4.0 307.0 \n", "29 1.00245 0.0 8.14 0.0 0.538 6.674 87.3 4.2390 4.0 307.0 \n", ".. ... ... ... ... ... ... ... ... ... ... \n", "476 4.87141 0.0 18.10 0.0 0.614 6.484 93.6 2.3053 24.0 666.0 \n", "477 15.02340 0.0 18.10 0.0 0.614 5.304 97.3 2.1007 24.0 666.0 \n", "478 10.23300 0.0 18.10 0.0 0.614 6.185 96.7 2.1705 24.0 666.0 \n", "479 14.33370 0.0 18.10 0.0 0.614 6.229 88.0 1.9512 24.0 666.0 \n", "480 5.82401 0.0 18.10 0.0 0.532 6.242 64.7 3.4242 24.0 666.0 \n", "481 5.70818 0.0 18.10 0.0 0.532 6.750 74.9 3.3317 24.0 666.0 \n", "482 5.73116 0.0 18.10 0.0 0.532 7.061 77.0 3.4106 24.0 666.0 \n", "483 2.81838 0.0 18.10 0.0 0.532 5.762 40.3 4.0983 24.0 666.0 \n", "484 2.37857 0.0 18.10 0.0 0.583 5.871 41.9 3.7240 24.0 666.0 \n", "485 3.67367 0.0 18.10 0.0 0.583 6.312 51.9 3.9917 24.0 666.0 \n", "486 5.69175 0.0 18.10 0.0 0.583 6.114 79.8 3.5459 24.0 666.0 \n", "487 4.83567 0.0 18.10 0.0 0.583 5.905 53.2 3.1523 24.0 666.0 \n", "488 0.15086 0.0 27.74 0.0 0.609 5.454 92.7 1.8209 4.0 711.0 \n", "489 0.18337 0.0 27.74 0.0 0.609 5.414 98.3 1.7554 4.0 711.0 \n", "490 0.20746 0.0 27.74 0.0 0.609 5.093 98.0 1.8226 4.0 711.0 \n", "491 0.10574 0.0 27.74 0.0 0.609 5.983 98.8 1.8681 4.0 711.0 \n", "492 0.11132 0.0 27.74 0.0 0.609 5.983 83.5 2.1099 4.0 711.0 \n", "493 0.17331 0.0 9.69 0.0 0.585 5.707 54.0 2.3817 6.0 391.0 \n", "494 0.27957 0.0 9.69 0.0 0.585 5.926 42.6 2.3817 6.0 391.0 \n", "495 0.17899 0.0 9.69 0.0 0.585 5.670 28.8 2.7986 6.0 391.0 \n", "496 0.28960 0.0 9.69 0.0 0.585 5.390 72.9 2.7986 6.0 391.0 \n", "497 0.26838 0.0 9.69 0.0 0.585 5.794 70.6 2.8927 6.0 391.0 \n", "498 0.23912 0.0 9.69 0.0 0.585 6.019 65.3 2.4091 6.0 391.0 \n", "499 0.17783 0.0 9.69 0.0 0.585 5.569 73.5 2.3999 6.0 391.0 \n", "500 0.22438 0.0 9.69 0.0 0.585 6.027 79.7 2.4982 6.0 391.0 \n", "501 0.06263 0.0 11.93 0.0 0.573 6.593 69.1 2.4786 1.0 273.0 \n", "502 0.04527 0.0 11.93 0.0 0.573 6.120 76.7 2.2875 1.0 273.0 \n", "503 0.06076 0.0 11.93 0.0 0.573 6.976 91.0 2.1675 1.0 273.0 \n", "504 0.10959 0.0 11.93 0.0 0.573 6.794 89.3 2.3889 1.0 273.0 \n", "505 0.04741 0.0 11.93 0.0 0.573 6.030 80.8 2.5050 1.0 273.0 \n", "\n", " PTRATIO B LSTAT \n", "0 15.3 396.90 4.98 \n", "1 17.8 396.90 9.14 \n", "2 17.8 392.83 4.03 \n", "3 18.7 394.63 2.94 \n", "4 18.7 396.90 5.33 \n", "5 18.7 394.12 5.21 \n", "6 15.2 395.60 12.43 \n", "7 15.2 396.90 19.15 \n", "8 15.2 386.63 29.93 \n", "9 15.2 386.71 17.10 \n", "10 15.2 392.52 20.45 \n", "11 15.2 396.90 13.27 \n", "12 15.2 390.50 15.71 \n", "13 21.0 396.90 8.26 \n", "14 21.0 380.02 10.26 \n", "15 21.0 395.62 8.47 \n", "16 21.0 386.85 6.58 \n", "17 21.0 386.75 14.67 \n", "18 21.0 288.99 11.69 \n", "19 21.0 390.95 11.28 \n", "20 21.0 376.57 21.02 \n", "21 21.0 392.53 13.83 \n", "22 21.0 396.90 18.72 \n", "23 21.0 394.54 19.88 \n", "24 21.0 394.33 16.30 \n", "25 21.0 303.42 16.51 \n", "26 21.0 376.88 14.81 \n", "27 21.0 306.38 17.28 \n", "28 21.0 387.94 12.80 \n", "29 21.0 380.23 11.98 \n", ".. ... ... ... \n", "476 20.2 396.21 18.68 \n", "477 20.2 349.48 24.91 \n", "478 20.2 379.70 18.03 \n", "479 20.2 383.32 13.11 \n", "480 20.2 396.90 10.74 \n", "481 20.2 393.07 7.74 \n", "482 20.2 395.28 7.01 \n", "483 20.2 392.92 10.42 \n", "484 20.2 370.73 13.34 \n", "485 20.2 388.62 10.58 \n", "486 20.2 392.68 14.98 \n", "487 20.2 388.22 11.45 \n", "488 20.1 395.09 18.06 \n", "489 20.1 344.05 23.97 \n", "490 20.1 318.43 29.68 \n", "491 20.1 390.11 18.07 \n", "492 20.1 396.90 13.35 \n", "493 19.2 396.90 12.01 \n", "494 19.2 396.90 13.59 \n", "495 19.2 393.29 17.60 \n", "496 19.2 396.90 21.14 \n", "497 19.2 396.90 14.10 \n", "498 19.2 396.90 12.92 \n", "499 19.2 395.77 15.10 \n", "500 19.2 396.90 14.33 \n", "501 21.0 391.99 9.67 \n", "502 21.0 396.90 9.08 \n", "503 21.0 396.90 5.64 \n", "504 21.0 393.45 6.48 \n", "505 21.0 396.90 7.88 \n", "\n", "[506 rows x 13 columns]" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns = boston['feature_names']\n", "df" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "array([24. , 21.6, 34.7, 33.4, 36.2, 28.7, 22.9, 27.1, 16.5, 18.9, 15. ,\n", " 18.9, 21.7, 20.4, 18.2, 19.9, 23.1, 17.5, 20.2, 18.2, 13.6, 19.6,\n", " 15.2, 14.5, 15.6, 13.9, 16.6, 14.8, 18.4, 21. , 12.7, 14.5, 13.2,\n", " 13.1, 13.5, 18.9, 20. , 21. , 24.7, 30.8, 34.9, 26.6, 25.3, 24.7,\n", " 21.2, 19.3, 20. , 16.6, 14.4, 19.4, 19.7, 20.5, 25. , 23.4, 18.9,\n", " 35.4, 24.7, 31.6, 23.3, 19.6, 18.7, 16. , 22.2, 25. , 33. , 23.5,\n", " 19.4, 22. , 17.4, 20.9, 24.2, 21.7, 22.8, 23.4, 24.1, 21.4, 20. ,\n", " 20.8, 21.2, 20.3, 28. , 23.9, 24.8, 22.9, 23.9, 26.6, 22.5, 22.2,\n", " 23.6, 28.7, 22.6, 22. , 22.9, 25. , 20.6, 28.4, 21.4, 38.7, 43.8,\n", " 33.2, 27.5, 26.5, 18.6, 19.3, 20.1, 19.5, 19.5, 20.4, 19.8, 19.4,\n", " 21.7, 22.8, 18.8, 18.7, 18.5, 18.3, 21.2, 19.2, 20.4, 19.3, 22. ,\n", " 20.3, 20.5, 17.3, 18.8, 21.4, 15.7, 16.2, 18. , 14.3, 19.2, 19.6,\n", " 23. , 18.4, 15.6, 18.1, 17.4, 17.1, 13.3, 17.8, 14. , 14.4, 13.4,\n", " 15.6, 11.8, 13.8, 15.6, 14.6, 17.8, 15.4, 21.5, 19.6, 15.3, 19.4,\n", " 17. , 15.6, 13.1, 41.3, 24.3, 23.3, 27. , 50. , 50. , 50. , 22.7,\n", " 25. , 50. , 23.8, 23.8, 22.3, 17.4, 19.1, 23.1, 23.6, 22.6, 29.4,\n", " 23.2, 24.6, 29.9, 37.2, 39.8, 36.2, 37.9, 32.5, 26.4, 29.6, 50. ,\n", " 32. , 29.8, 34.9, 37. , 30.5, 36.4, 31.1, 29.1, 50. , 33.3, 30.3,\n", " 34.6, 34.9, 32.9, 24.1, 42.3, 48.5, 50. , 22.6, 24.4, 22.5, 24.4,\n", " 20. , 21.7, 19.3, 22.4, 28.1, 23.7, 25. , 23.3, 28.7, 21.5, 23. ,\n", " 26.7, 21.7, 27.5, 30.1, 44.8, 50. , 37.6, 31.6, 46.7, 31.5, 24.3,\n", " 31.7, 41.7, 48.3, 29. , 24. , 25.1, 31.5, 23.7, 23.3, 22. , 20.1,\n", " 22.2, 23.7, 17.6, 18.5, 24.3, 20.5, 24.5, 26.2, 24.4, 24.8, 29.6,\n", " 42.8, 21.9, 20.9, 44. , 50. , 36. , 30.1, 33.8, 43.1, 48.8, 31. ,\n", " 36.5, 22.8, 30.7, 50. , 43.5, 20.7, 21.1, 25.2, 24.4, 35.2, 32.4,\n", " 32. , 33.2, 33.1, 29.1, 35.1, 45.4, 35.4, 46. , 50. , 32.2, 22. ,\n", " 20.1, 23.2, 22.3, 24.8, 28.5, 37.3, 27.9, 23.9, 21.7, 28.6, 27.1,\n", " 20.3, 22.5, 29. , 24.8, 22. , 26.4, 33.1, 36.1, 28.4, 33.4, 28.2,\n", " 22.8, 20.3, 16.1, 22.1, 19.4, 21.6, 23.8, 16.2, 17.8, 19.8, 23.1,\n", " 21. , 23.8, 23.1, 20.4, 18.5, 25. , 24.6, 23. , 22.2, 19.3, 22.6,\n", " 19.8, 17.1, 19.4, 22.2, 20.7, 21.1, 19.5, 18.5, 20.6, 19. , 18.7,\n", " 32.7, 16.5, 23.9, 31.2, 17.5, 17.2, 23.1, 24.5, 26.6, 22.9, 24.1,\n", " 18.6, 30.1, 18.2, 20.6, 17.8, 21.7, 22.7, 22.6, 25. , 19.9, 20.8,\n", " 16.8, 21.9, 27.5, 21.9, 23.1, 50. , 50. , 50. , 50. , 50. , 13.8,\n", " 13.8, 15. , 13.9, 13.3, 13.1, 10.2, 10.4, 10.9, 11.3, 12.3, 8.8,\n", " 7.2, 10.5, 7.4, 10.2, 11.5, 15.1, 23.2, 9.7, 13.8, 12.7, 13.1,\n", " 12.5, 8.5, 5. , 6.3, 5.6, 7.2, 12.1, 8.3, 8.5, 5. , 11.9,\n", " 27.9, 17.2, 27.5, 15. , 17.2, 17.9, 16.3, 7. , 7.2, 7.5, 10.4,\n", " 8.8, 8.4, 16.7, 14.2, 20.8, 13.4, 11.7, 8.3, 10.2, 10.9, 11. ,\n", " 9.5, 14.5, 14.1, 16.1, 14.3, 11.7, 13.4, 9.6, 8.7, 8.4, 12.8,\n", " 10.5, 17.1, 18.4, 15.4, 10.8, 11.8, 14.9, 12.6, 14.1, 13. , 13.4,\n", " 15.2, 16.1, 17.8, 14.9, 14.1, 12.7, 13.5, 14.9, 20. , 16.4, 17.7,\n", " 19.5, 20.2, 21.4, 19.9, 19. , 19.1, 19.1, 20.1, 19.9, 19.6, 23.2,\n", " 29.8, 13.8, 13.3, 16.7, 12. , 14.6, 21.4, 23. , 23.7, 25. , 21.8,\n", " 20.6, 21.2, 19.1, 20.6, 15.2, 7. , 8.1, 13.6, 20.1, 21.8, 24.5,\n", " 23.1, 19.7, 18.3, 21.2, 17.5, 16.8, 22.4, 20.6, 23.9, 22. , 11.9])" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "boston['target']" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Цена
024.0
121.6
234.7
333.4
436.2
528.7
622.9
727.1
816.5
918.9
1015.0
1118.9
1221.7
1320.4
1418.2
1519.9
1623.1
1717.5
1820.2
1918.2
2013.6
2119.6
2215.2
2314.5
2415.6
2513.9
2616.6
2714.8
2818.4
2921.0
......
47616.7
47712.0
47814.6
47921.4
48023.0
48123.7
48225.0
48321.8
48420.6
48521.2
48619.1
48720.6
48815.2
4897.0
4908.1
49113.6
49220.1
49321.8
49424.5
49523.1
49619.7
49718.3
49821.2
49917.5
50016.8
50122.4
50220.6
50323.9
50422.0
50511.9
\n", "

506 rows × 1 columns

\n", "
" ], "text/plain": [ " Цена\n", "0 24.0\n", "1 21.6\n", "2 34.7\n", "3 33.4\n", "4 36.2\n", "5 28.7\n", "6 22.9\n", "7 27.1\n", "8 16.5\n", "9 18.9\n", "10 15.0\n", "11 18.9\n", "12 21.7\n", "13 20.4\n", "14 18.2\n", "15 19.9\n", "16 23.1\n", "17 17.5\n", "18 20.2\n", "19 18.2\n", "20 13.6\n", "21 19.6\n", "22 15.2\n", "23 14.5\n", "24 15.6\n", "25 13.9\n", "26 16.6\n", "27 14.8\n", "28 18.4\n", "29 21.0\n", ".. ...\n", "476 16.7\n", "477 12.0\n", "478 14.6\n", "479 21.4\n", "480 23.0\n", "481 23.7\n", "482 25.0\n", "483 21.8\n", "484 20.6\n", "485 21.2\n", "486 19.1\n", "487 20.6\n", "488 15.2\n", "489 7.0\n", "490 8.1\n", "491 13.6\n", "492 20.1\n", "493 21.8\n", "494 24.5\n", "495 23.1\n", "496 19.7\n", "497 18.3\n", "498 21.2\n", "499 17.5\n", "500 16.8\n", "501 22.4\n", "502 20.6\n", "503 23.9\n", "504 22.0\n", "505 11.9\n", "\n", "[506 rows x 1 columns]" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "targ = pd.DataFrame( boston.target )\n", "targ.columns = [\"Цена\"]\n", "targ" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "21.6" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "targ[\"Цена\"][1]" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTATЦена
00.0063218.02.310.00.5386.57565.24.09001.0296.015.3396.904.9824.0
10.027310.07.070.00.4696.42178.94.96712.0242.017.8396.909.1421.6
20.027290.07.070.00.4697.18561.14.96712.0242.017.8392.834.0334.7
30.032370.02.180.00.4586.99845.86.06223.0222.018.7394.632.9433.4
40.069050.02.180.00.4587.14754.26.06223.0222.018.7396.905.3336.2
50.029850.02.180.00.4586.43058.76.06223.0222.018.7394.125.2128.7
60.0882912.57.870.00.5246.01266.65.56055.0311.015.2395.6012.4322.9
70.1445512.57.870.00.5246.17296.15.95055.0311.015.2396.9019.1527.1
80.2112412.57.870.00.5245.631100.06.08215.0311.015.2386.6329.9316.5
90.1700412.57.870.00.5246.00485.96.59215.0311.015.2386.7117.1018.9
100.2248912.57.870.00.5246.37794.36.34675.0311.015.2392.5220.4515.0
110.1174712.57.870.00.5246.00982.96.22675.0311.015.2396.9013.2718.9
120.0937812.57.870.00.5245.88939.05.45095.0311.015.2390.5015.7121.7
130.629760.08.140.00.5385.94961.84.70754.0307.021.0396.908.2620.4
140.637960.08.140.00.5386.09684.54.46194.0307.021.0380.0210.2618.2
150.627390.08.140.00.5385.83456.54.49864.0307.021.0395.628.4719.9
161.053930.08.140.00.5385.93529.34.49864.0307.021.0386.856.5823.1
170.784200.08.140.00.5385.99081.74.25794.0307.021.0386.7514.6717.5
180.802710.08.140.00.5385.45636.63.79654.0307.021.0288.9911.6920.2
190.725800.08.140.00.5385.72769.53.79654.0307.021.0390.9511.2818.2
201.251790.08.140.00.5385.57098.13.79794.0307.021.0376.5721.0213.6
210.852040.08.140.00.5385.96589.24.01234.0307.021.0392.5313.8319.6
221.232470.08.140.00.5386.14291.73.97694.0307.021.0396.9018.7215.2
230.988430.08.140.00.5385.813100.04.09524.0307.021.0394.5419.8814.5
240.750260.08.140.00.5385.92494.14.39964.0307.021.0394.3316.3015.6
250.840540.08.140.00.5385.59985.74.45464.0307.021.0303.4216.5113.9
260.671910.08.140.00.5385.81390.34.68204.0307.021.0376.8814.8116.6
270.955770.08.140.00.5386.04788.84.45344.0307.021.0306.3817.2814.8
280.772990.08.140.00.5386.49594.44.45474.0307.021.0387.9412.8018.4
291.002450.08.140.00.5386.67487.34.23904.0307.021.0380.2311.9821.0
.............................................
4764.871410.018.100.00.6146.48493.62.305324.0666.020.2396.2118.6816.7
47715.023400.018.100.00.6145.30497.32.100724.0666.020.2349.4824.9112.0
47810.233000.018.100.00.6146.18596.72.170524.0666.020.2379.7018.0314.6
47914.333700.018.100.00.6146.22988.01.951224.0666.020.2383.3213.1121.4
4805.824010.018.100.00.5326.24264.73.424224.0666.020.2396.9010.7423.0
4815.708180.018.100.00.5326.75074.93.331724.0666.020.2393.077.7423.7
4825.731160.018.100.00.5327.06177.03.410624.0666.020.2395.287.0125.0
4832.818380.018.100.00.5325.76240.34.098324.0666.020.2392.9210.4221.8
4842.378570.018.100.00.5835.87141.93.724024.0666.020.2370.7313.3420.6
4853.673670.018.100.00.5836.31251.93.991724.0666.020.2388.6210.5821.2
4865.691750.018.100.00.5836.11479.83.545924.0666.020.2392.6814.9819.1
4874.835670.018.100.00.5835.90553.23.152324.0666.020.2388.2211.4520.6
4880.150860.027.740.00.6095.45492.71.82094.0711.020.1395.0918.0615.2
4890.183370.027.740.00.6095.41498.31.75544.0711.020.1344.0523.977.0
4900.207460.027.740.00.6095.09398.01.82264.0711.020.1318.4329.688.1
4910.105740.027.740.00.6095.98398.81.86814.0711.020.1390.1118.0713.6
4920.111320.027.740.00.6095.98383.52.10994.0711.020.1396.9013.3520.1
4930.173310.09.690.00.5855.70754.02.38176.0391.019.2396.9012.0121.8
4940.279570.09.690.00.5855.92642.62.38176.0391.019.2396.9013.5924.5
4950.178990.09.690.00.5855.67028.82.79866.0391.019.2393.2917.6023.1
4960.289600.09.690.00.5855.39072.92.79866.0391.019.2396.9021.1419.7
4970.268380.09.690.00.5855.79470.62.89276.0391.019.2396.9014.1018.3
4980.239120.09.690.00.5856.01965.32.40916.0391.019.2396.9012.9221.2
4990.177830.09.690.00.5855.56973.52.39996.0391.019.2395.7715.1017.5
5000.224380.09.690.00.5856.02779.72.49826.0391.019.2396.9014.3316.8
5010.062630.011.930.00.5736.59369.12.47861.0273.021.0391.999.6722.4
5020.045270.011.930.00.5736.12076.72.28751.0273.021.0396.909.0820.6
5030.060760.011.930.00.5736.97691.02.16751.0273.021.0396.905.6423.9
5040.109590.011.930.00.5736.79489.32.38891.0273.021.0393.456.4822.0
5050.047410.011.930.00.5736.03080.82.50501.0273.021.0396.907.8811.9
\n", "

506 rows × 14 columns

\n", "
" ], "text/plain": [ " CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX \\\n", "0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 \n", "1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 \n", "2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 \n", "3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 \n", "4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 \n", "5 0.02985 0.0 2.18 0.0 0.458 6.430 58.7 6.0622 3.0 222.0 \n", "6 0.08829 12.5 7.87 0.0 0.524 6.012 66.6 5.5605 5.0 311.0 \n", "7 0.14455 12.5 7.87 0.0 0.524 6.172 96.1 5.9505 5.0 311.0 \n", "8 0.21124 12.5 7.87 0.0 0.524 5.631 100.0 6.0821 5.0 311.0 \n", "9 0.17004 12.5 7.87 0.0 0.524 6.004 85.9 6.5921 5.0 311.0 \n", "10 0.22489 12.5 7.87 0.0 0.524 6.377 94.3 6.3467 5.0 311.0 \n", "11 0.11747 12.5 7.87 0.0 0.524 6.009 82.9 6.2267 5.0 311.0 \n", "12 0.09378 12.5 7.87 0.0 0.524 5.889 39.0 5.4509 5.0 311.0 \n", "13 0.62976 0.0 8.14 0.0 0.538 5.949 61.8 4.7075 4.0 307.0 \n", "14 0.63796 0.0 8.14 0.0 0.538 6.096 84.5 4.4619 4.0 307.0 \n", "15 0.62739 0.0 8.14 0.0 0.538 5.834 56.5 4.4986 4.0 307.0 \n", "16 1.05393 0.0 8.14 0.0 0.538 5.935 29.3 4.4986 4.0 307.0 \n", "17 0.78420 0.0 8.14 0.0 0.538 5.990 81.7 4.2579 4.0 307.0 \n", "18 0.80271 0.0 8.14 0.0 0.538 5.456 36.6 3.7965 4.0 307.0 \n", "19 0.72580 0.0 8.14 0.0 0.538 5.727 69.5 3.7965 4.0 307.0 \n", "20 1.25179 0.0 8.14 0.0 0.538 5.570 98.1 3.7979 4.0 307.0 \n", "21 0.85204 0.0 8.14 0.0 0.538 5.965 89.2 4.0123 4.0 307.0 \n", "22 1.23247 0.0 8.14 0.0 0.538 6.142 91.7 3.9769 4.0 307.0 \n", "23 0.98843 0.0 8.14 0.0 0.538 5.813 100.0 4.0952 4.0 307.0 \n", "24 0.75026 0.0 8.14 0.0 0.538 5.924 94.1 4.3996 4.0 307.0 \n", "25 0.84054 0.0 8.14 0.0 0.538 5.599 85.7 4.4546 4.0 307.0 \n", "26 0.67191 0.0 8.14 0.0 0.538 5.813 90.3 4.6820 4.0 307.0 \n", "27 0.95577 0.0 8.14 0.0 0.538 6.047 88.8 4.4534 4.0 307.0 \n", "28 0.77299 0.0 8.14 0.0 0.538 6.495 94.4 4.4547 4.0 307.0 \n", "29 1.00245 0.0 8.14 0.0 0.538 6.674 87.3 4.2390 4.0 307.0 \n", ".. ... ... ... ... ... ... ... ... ... ... \n", "476 4.87141 0.0 18.10 0.0 0.614 6.484 93.6 2.3053 24.0 666.0 \n", "477 15.02340 0.0 18.10 0.0 0.614 5.304 97.3 2.1007 24.0 666.0 \n", "478 10.23300 0.0 18.10 0.0 0.614 6.185 96.7 2.1705 24.0 666.0 \n", "479 14.33370 0.0 18.10 0.0 0.614 6.229 88.0 1.9512 24.0 666.0 \n", "480 5.82401 0.0 18.10 0.0 0.532 6.242 64.7 3.4242 24.0 666.0 \n", "481 5.70818 0.0 18.10 0.0 0.532 6.750 74.9 3.3317 24.0 666.0 \n", "482 5.73116 0.0 18.10 0.0 0.532 7.061 77.0 3.4106 24.0 666.0 \n", "483 2.81838 0.0 18.10 0.0 0.532 5.762 40.3 4.0983 24.0 666.0 \n", "484 2.37857 0.0 18.10 0.0 0.583 5.871 41.9 3.7240 24.0 666.0 \n", "485 3.67367 0.0 18.10 0.0 0.583 6.312 51.9 3.9917 24.0 666.0 \n", "486 5.69175 0.0 18.10 0.0 0.583 6.114 79.8 3.5459 24.0 666.0 \n", "487 4.83567 0.0 18.10 0.0 0.583 5.905 53.2 3.1523 24.0 666.0 \n", "488 0.15086 0.0 27.74 0.0 0.609 5.454 92.7 1.8209 4.0 711.0 \n", "489 0.18337 0.0 27.74 0.0 0.609 5.414 98.3 1.7554 4.0 711.0 \n", "490 0.20746 0.0 27.74 0.0 0.609 5.093 98.0 1.8226 4.0 711.0 \n", "491 0.10574 0.0 27.74 0.0 0.609 5.983 98.8 1.8681 4.0 711.0 \n", "492 0.11132 0.0 27.74 0.0 0.609 5.983 83.5 2.1099 4.0 711.0 \n", "493 0.17331 0.0 9.69 0.0 0.585 5.707 54.0 2.3817 6.0 391.0 \n", "494 0.27957 0.0 9.69 0.0 0.585 5.926 42.6 2.3817 6.0 391.0 \n", "495 0.17899 0.0 9.69 0.0 0.585 5.670 28.8 2.7986 6.0 391.0 \n", "496 0.28960 0.0 9.69 0.0 0.585 5.390 72.9 2.7986 6.0 391.0 \n", "497 0.26838 0.0 9.69 0.0 0.585 5.794 70.6 2.8927 6.0 391.0 \n", "498 0.23912 0.0 9.69 0.0 0.585 6.019 65.3 2.4091 6.0 391.0 \n", "499 0.17783 0.0 9.69 0.0 0.585 5.569 73.5 2.3999 6.0 391.0 \n", "500 0.22438 0.0 9.69 0.0 0.585 6.027 79.7 2.4982 6.0 391.0 \n", "501 0.06263 0.0 11.93 0.0 0.573 6.593 69.1 2.4786 1.0 273.0 \n", "502 0.04527 0.0 11.93 0.0 0.573 6.120 76.7 2.2875 1.0 273.0 \n", "503 0.06076 0.0 11.93 0.0 0.573 6.976 91.0 2.1675 1.0 273.0 \n", "504 0.10959 0.0 11.93 0.0 0.573 6.794 89.3 2.3889 1.0 273.0 \n", "505 0.04741 0.0 11.93 0.0 0.573 6.030 80.8 2.5050 1.0 273.0 \n", "\n", " PTRATIO B LSTAT Цена \n", "0 15.3 396.90 4.98 24.0 \n", "1 17.8 396.90 9.14 21.6 \n", "2 17.8 392.83 4.03 34.7 \n", "3 18.7 394.63 2.94 33.4 \n", "4 18.7 396.90 5.33 36.2 \n", "5 18.7 394.12 5.21 28.7 \n", "6 15.2 395.60 12.43 22.9 \n", "7 15.2 396.90 19.15 27.1 \n", "8 15.2 386.63 29.93 16.5 \n", "9 15.2 386.71 17.10 18.9 \n", "10 15.2 392.52 20.45 15.0 \n", "11 15.2 396.90 13.27 18.9 \n", "12 15.2 390.50 15.71 21.7 \n", "13 21.0 396.90 8.26 20.4 \n", "14 21.0 380.02 10.26 18.2 \n", "15 21.0 395.62 8.47 19.9 \n", "16 21.0 386.85 6.58 23.1 \n", "17 21.0 386.75 14.67 17.5 \n", "18 21.0 288.99 11.69 20.2 \n", "19 21.0 390.95 11.28 18.2 \n", "20 21.0 376.57 21.02 13.6 \n", "21 21.0 392.53 13.83 19.6 \n", "22 21.0 396.90 18.72 15.2 \n", "23 21.0 394.54 19.88 14.5 \n", "24 21.0 394.33 16.30 15.6 \n", "25 21.0 303.42 16.51 13.9 \n", "26 21.0 376.88 14.81 16.6 \n", "27 21.0 306.38 17.28 14.8 \n", "28 21.0 387.94 12.80 18.4 \n", "29 21.0 380.23 11.98 21.0 \n", ".. ... ... ... ... \n", "476 20.2 396.21 18.68 16.7 \n", "477 20.2 349.48 24.91 12.0 \n", "478 20.2 379.70 18.03 14.6 \n", "479 20.2 383.32 13.11 21.4 \n", "480 20.2 396.90 10.74 23.0 \n", "481 20.2 393.07 7.74 23.7 \n", "482 20.2 395.28 7.01 25.0 \n", "483 20.2 392.92 10.42 21.8 \n", "484 20.2 370.73 13.34 20.6 \n", "485 20.2 388.62 10.58 21.2 \n", "486 20.2 392.68 14.98 19.1 \n", "487 20.2 388.22 11.45 20.6 \n", "488 20.1 395.09 18.06 15.2 \n", "489 20.1 344.05 23.97 7.0 \n", "490 20.1 318.43 29.68 8.1 \n", "491 20.1 390.11 18.07 13.6 \n", "492 20.1 396.90 13.35 20.1 \n", "493 19.2 396.90 12.01 21.8 \n", "494 19.2 396.90 13.59 24.5 \n", "495 19.2 393.29 17.60 23.1 \n", "496 19.2 396.90 21.14 19.7 \n", "497 19.2 396.90 14.10 18.3 \n", "498 19.2 396.90 12.92 21.2 \n", "499 19.2 395.77 15.10 17.5 \n", "500 19.2 396.90 14.33 16.8 \n", "501 21.0 391.99 9.67 22.4 \n", "502 21.0 396.90 9.08 20.6 \n", "503 21.0 396.90 5.64 23.9 \n", "504 21.0 393.45 6.48 22.0 \n", "505 21.0 396.90 7.88 11.9 \n", "\n", "[506 rows x 14 columns]" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.concat([df, targ], axis=1) " ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "from sklearn.linear_model import Lasso\n", "las = Lasso( 0.95)" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Lasso(alpha=0.95, copy_X=True, fit_intercept=True, max_iter=1000,\n", " normalize=False, positive=False, precompute=False, random_state=None,\n", " selection='cyclic', tol=0.0001, warm_start=False)" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train, X_test, y_train, y_test = train_test_split( boston.data, boston.target, test_size=0.15)\n", "las.fit( X_train, y_train )" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.7039796490622132" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "las.score( X_train, y_train )" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.6800820288881564" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "las.score( X_test, y_test )" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([-6.60302033e-02, 4.37445786e-02, -0.00000000e+00, 0.00000000e+00,\n", " -0.00000000e+00, 2.11143949e+00, 1.06164045e-03, -6.88378197e-01,\n", " 2.00499266e-01, -1.16324372e-02, -7.52971157e-01, 7.47979040e-03,\n", " -6.47627672e-01])" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "las.coef_" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "from sklearn.tree import DecisionTreeRegressor\n", "regressor = DecisionTreeRegressor( ) #max_depth=5, min_impurity_decrease=0.01, random_state=0" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,\n", " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", " min_impurity_split=None, min_samples_leaf=1,\n", " min_samples_split=2, min_weight_fraction_leaf=0.0,\n", " presort=False, random_state=None, splitter='best')" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "regressor.fit( boston.data, boston.target )" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split( boston.data, boston.target, test_size=0.15)" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,\n", " max_leaf_nodes=None, min_impurity_decrease=0.0,\n", " min_impurity_split=None, min_samples_leaf=1,\n", " min_samples_split=2, min_weight_fraction_leaf=0.0,\n", " presort=False, random_state=None, splitter='best')" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "regressor.fit( X_train, y_train )" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1.0" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "regressor.score( X_train, y_train )" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.8698912616229756" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "regressor.score( X_test, y_test )" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([23.7, 29.1, 33.4, 19.4, 18.3, 27.5, 21.8, 21.7, 19.2, 28.7, 48.5,\n", " 19.3, 12.5, 19.5, 23.1, 17.1, 17.4, 16.7, 23.7, 26.7, 34.9, 48.8,\n", " 50. , 18.3, 22.2, 18.9, 13.1, 14.8, 11.5, 22.6, 30.1, 19. , 32.4,\n", " 23.2, 13.4, 19.2, 15.6, 15.6, 36.2, 7.2, 26.4, 13.1, 28.7, 24. ,\n", " 21.4, 21.2, 14.9, 8.3, 26.5, 8.3, 25.1, 7.2, 27. , 13.4, 24. ,\n", " 22.2, 22.8, 38.7, 42.3, 23.1, 21. , 14.9, 7.2, 21.7, 27.1, 25. ,\n", " 22.9, 30.8, 33.4, 7.2, 19.4, 33.1, 23.7, 20.5, 21.4, 50. ])" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pred = regressor.predict( X_test )\n", "pred" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([25.3, 24.1, 35.4, 23.4, 19.3, 30.1, 19.9, 21.2, 19.4, 23.6, 50. ,\n", " 19.7, 15.1, 16. , 22.9, 17.6, 18.4, 10.9, 24.2, 31.5, 34.9, 50. ,\n", " 50. , 23.1, 23.9, 16.6, 18.4, 15.2, 12.8, 29.1, 29. , 15.6, 32. ,\n", " 16.1, 13.4, 19.6, 13.9, 14.6, 25.2, 10.5, 18.6, 14.1, 23.9, 21.2,\n", " 25. , 18.9, 17.8, 8.8, 22. , 10.9, 23.3, 10.2, 25. , 14.9, 18.2,\n", " 23.9, 20.2, 43.8, 44. , 16.5, 18.4, 14.6, 12.1, 19.9, 24.4, 24.6,\n", " 21.4, 30.1, 33.3, 8.5, 18. , 29.4, 23.5, 21.4, 20.6, 43.1])" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_test" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(array([ 2., 4., 7., 14., 21., 15., 7., 1., 3., 2.]),\n", " array([0.59504132, 0.6887482 , 0.78245508, 0.87616195, 0.96986883,\n", " 1.06357571, 1.15728258, 1.25098946, 1.34469634, 1.43840321,\n", " 1.53211009]),\n", " )" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAD8CAYAAABw1c+bAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAEWxJREFUeJzt3XGsJWV9xvHvU8Caqi3oXhCBdW2KRDSC5Ba1pAak4LIQ0YZaNlZRsatWjTamkbaJGP2HplEbi5WssAGNomkVJGEVN2qCVlEvCLiICtK1XJewq1jQYGOX/vrHnW2ul3P2Hs+cew+77/eTnJyZd96Z93cG9tnZOWdmUlVIktrxW9MuQJK0ugx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMOnnYBg6xZs6bWrVs37TIkab9x8803/6SqZkbp+5gM/nXr1jE3NzftMiRpv5HkR6P29VSPJDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ15jF55a60nHUXXT+VcXdccvZUxpUmySN+SWqMwS9JjTH4JakxBr8kNcbgl6TGLBv8SY5J8uUkdya5I8nbuvYnJ9mW5K7u/bAh61/Q9bkryQWT/gCSpN/MKEf8e4B3VNWzgBcAb05yPHAR8MWqOhb4Yjf/a5I8GbgYeD5wMnDxsL8gJEmrY9ngr6r7quqWbvrnwJ3AUcC5wFVdt6uAlw1Y/SXAtqp6oKp+BmwD1k+icEnSeH6jc/xJ1gHPA74BHFFV98HCXw7A4QNWOQq4d9H8fNcmSZqSkYM/yROBTwNvr6qHRl1tQFsN2f6mJHNJ5nbv3j1qWZKk39BIwZ/kEBZC/+NV9Zmu+f4kR3bLjwR2DVh1Hjhm0fzRwM5BY1TV5qqararZmZmRHhQvSRrDKL/qCXAFcGdVvX/RouuAvb/SuQD47IDVbwDOTHJY96XumV2bJGlKRjniPwV4FfDiJLd2rw3AJcAZSe4CzujmSTKb5HKAqnoAeC/wre71nq5NkjQly96ds6q+yuBz9QCnD+g/B7x+0fwWYMu4BUqSJssrdyWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjVn2QSxJtgDnALuq6jld26eA47ouhwL/VVUnDlh3B/Bz4BFgT1XNTqhuSdKYlg1+4ErgUuCjexuq6s/3Tid5H/DgPtY/rap+Mm6BkqTJGuXRizcmWTdoWfcg9lcAL55sWZKkldL3HP8fA/dX1V1DlhfwhSQ3J9m0rw0l2ZRkLsnc7t27e5YlSRqmb/BvBK7ex/JTquok4CzgzUleNKxjVW2uqtmqmp2ZmelZliRpmLGDP8nBwJ8CnxrWp6p2du+7gGuAk8cdT5I0GX2O+P8E+F5VzQ9amOQJSZ60dxo4E9jeYzxJ0gQsG/xJrga+DhyXZD7Jhd2i81lymifJ05Js7WaPAL6a5Dbgm8D1VfX5yZUuSRrHKL/q2Tik/TUD2nYCG7rpe4ATetYnSZowr9yVpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMaM8iGVLkl1Jti9qe3eSHye5tXttGLLu+iTfT3J3kosmWbgkaTyjHPFfCawf0P6Bqjqxe21dujDJQcCHWHjQ+vHAxiTH9ylWktTfssFfVTcCD4yx7ZOBu6vqnqr6FfBJ4NwxtiNJmqA+5/jfkuT27lTQYQOWHwXcu2h+vmuTJE3Rss/cHeLDwHuB6t7fB7xuSZ8MWK+GbTDJJmATwNq1a8csS1pZ6y66firj7rjk7KmMqwPTWEf8VXV/VT1SVf8LfISF0zpLzQPHLJo/Gti5j21urqrZqpqdmZkZpyxJ0gjGCv4kRy6afTmwfUC3bwHHJnlGkscB5wPXjTOeJGlylj3Vk+Rq4FRgTZJ54GLg1CQnsnDqZgfwhq7v04DLq2pDVe1J8hbgBuAgYEtV3bEin0KSNLJlg7+qNg5ovmJI353AhkXzW4FH/dRTkjQ9XrkrSY0x+CWpMQa/JDXG4Jekxhj8ktSYca/claZ2Faukfjzil6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktSYZYM/yZYku5JsX9T2j0m+l+T2JNckOXTIujuSfCfJrUnmJlm4JGk8oxzxXwmsX9K2DXhOVT0X+AHwt/tY/7SqOrGqZscrUZI0ScsGf1XdCDywpO0LVbWnm70JOHoFapMkrYBJnON/HfC5IcsK+EKSm5NsmsBYkqSeet2WOcnfA3uAjw/pckpV7UxyOLAtyfe6f0EM2tYmYBPA2rVr+5QlSdqHsY/4k1wAnAO8sqpqUJ+q2tm97wKuAU4etr2q2lxVs1U1OzMzM25ZkqRljBX8SdYD7wReWlUPD+nzhCRP2jsNnAlsH9RXkrR6Rvk559XA14HjkswnuRC4FHgSC6dvbk1yWdf3aUm2dqseAXw1yW3AN4Hrq+rzK/IpJEkjW/Ycf1VtHNB8xZC+O4EN3fQ9wAm9qpMkTZxX7kpSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGjNS8CfZkmRXku2L2p6cZFuSu7r3w4ase0HX567uOb2SpCka9Yj/SmD9kraLgC9W1bHAF7v5X5PkycDFwPNZeND6xcP+gpAkrY6Rgr+qbgQeWNJ8LnBVN30V8LIBq74E2FZVD1TVz4BtPPovEEnSKupzjv+IqroPoHs/fECfo4B7F83Pd22PkmRTkrkkc7t37+5RliRpX1b6y90MaKtBHatqc1XNVtXszMzMCpclSe3qE/z3JzkSoHvfNaDPPHDMovmjgZ09xpQk9dQn+K8D9v5K5wLgswP63ACcmeSw7kvdM7s2SdKUjPpzzquBrwPHJZlPciFwCXBGkruAM7p5kswmuRygqh4A3gt8q3u9p2uTJE3JwaN0qqqNQxadPqDvHPD6RfNbgC1jVSdJmjiv3JWkxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxYwd/kuOS3Lro9VCSty/pc2qSBxf1eVf/kiVJfYz0IJZBqur7wIkASQ4CfgxcM6DrV6rqnHHHkSRN1qRO9ZwO/LCqfjSh7UmSVsikgv984Oohy16Y5LYkn0vy7AmNJ0kaU+/gT/I44KXAvw5YfAvw9Ko6Afhn4Np9bGdTkrkkc7t37+5bliRpiEkc8Z8F3FJV9y9dUFUPVdUvuumtwCFJ1gzaSFVtrqrZqpqdmZmZQFmSpEEmEfwbGXKaJ8lTk6SbPrkb76cTGFOSNKaxf9UDkOR3gDOANyxqeyNAVV0GnAe8Kcke4JfA+VVVfcaUJPXTK/ir6mHgKUvaLls0fSlwaZ8xJEmT1Sv49diw7qLrp12CVtg0/xvvuOTsqY2tleEtGySpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWrMJB62viPJd5LcmmRuwPIk+WCSu5PcnuSkvmNKksY3qQexnFZVPxmy7Czg2O71fODD3bskaQpW41TPucBHa8FNwKFJjlyFcSVJA0wi+Av4QpKbk2wasPwo4N5F8/Nd269JsinJXJK53bt3T6AsSdIgkwj+U6rqJBZO6bw5yYuWLM+AdepRDVWbq2q2qmZnZmYmUJYkaZDewV9VO7v3XcA1wMlLuswDxyyaPxrY2XdcSdJ4egV/kickedLeaeBMYPuSbtcBr+5+3fMC4MGquq/PuJKk8fX9Vc8RwDVJ9m7rE1X1+SRvBKiqy4CtwAbgbuBh4LU9x5Qk9dAr+KvqHuCEAe2XLZou4M19xpEkTY5X7kpSYwx+SWqMwS9JjTH4Jakxk7pXT/PWXXT9tEuQpJF4xC9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY3xyl1Jj0nTvBp+xyVnT23s1TD2EX+SY5J8OcmdSe5I8rYBfU5N8mCSW7vXu/qVK0nqq88R/x7gHVV1S/f4xZuTbKuq7y7p95WqOqfHOJKkCRr7iL+q7quqW7rpnwN3AkdNqjBJ0sqYyJe7SdYBzwO+MWDxC5PcluRzSZ49ifEkSePr/eVukicCnwbeXlUPLVl8C/D0qvpFkg3AtcCxQ7azCdgEsHbt2r5lSZKG6HXEn+QQFkL/41X1maXLq+qhqvpFN70VOCTJmkHbqqrNVTVbVbMzMzN9ypIk7UOfX/UEuAK4s6reP6TPU7t+JDm5G++n444pSeqvz6meU4BXAd9JcmvX9nfAWoCqugw4D3hTkj3AL4Hzq6p6jClJ6mns4K+qrwJZps+lwKXjjiFJmrwD7spdn30rqa9p5chqXTHsvXokqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxB9yVu5Imy6vhDzwe8UtSYwx+SWqMwS9JjTH4JakxBr8kNabvM3fXJ/l+kruTXDRg+W8n+VS3/BtJ1vUZT5LUX59n7h4EfAg4Czge2Jjk+CXdLgR+VlV/AHwA+Idxx5MkTUafI/6Tgbur6p6q+hXwSeDcJX3OBa7qpv8NOH3vw9clSdPRJ/iPAu5dND/ftQ3sU1V7gAeBp/QYU5LUU58rdwcdudcYfRY6JpuATd3sL5J8v0dta4Cf9Fj/QOA+cB+A+wD2o32QfifDnz5qxz7BPw8cs2j+aGDnkD7zSQ4Gfg94YNDGqmozsLlHPf8vyVxVzU5iW/sr94H7ANwH4D4YpM+pnm8BxyZ5RpLHAecD1y3pcx1wQTd9HvClqhp4xC9JWh1jH/FX1Z4kbwFuAA4CtlTVHUneA8xV1XXAFcDHktzNwpH++ZMoWpI0vl5356yqrcDWJW3vWjT938Cf9RljTBM5ZbSfcx+4D8B9AO6DR4lnXiSpLd6yQZIas18H/3K3jOj6vCLJd5PckeQTq13jShvhthlrk3w5ybeT3J5kwzTqXElJtiTZlWT7kOVJ8sFuH92e5KTVrnEljfD5X9l97tuTfC3JCatd40pbbh8s6veHSR5Jct5q1faYVFX75YuFL5R/CPw+8DjgNuD4JX2OBb4NHNbNHz7tuqewDzYDb+qmjwd2TLvuFdgPLwJOArYPWb4B+BwL15W8APjGtGte5c//R4v+DJx1oH3+UfZB1+cg4EssfC953rRrnuZrfz7iH+WWEX8JfKiqfgZQVbtWucaVNso+KOB3u+nf49HXWuz3qupGhlwf0jkX+GgtuAk4NMmRq1Pdylvu81fV1/b+GQBuYuGamwPKCP8PALwV+DRwoOXAb2x/Dv5RbhnxTOCZSf49yU1J1q9adatjlH3wbuAvksyzcKTz1tUp7TFllP3UigtZ+NdPU5IcBbwcuGzatTwW7M/BP8rtIA5m4XTPqcBG4PIkh65wXatplH2wEbiyqo5m4ZTHx5Lsz//dxzHyrUMOZElOYyH43zntWqbgn4B3VtUj0y7ksaDX7/inbNRbRtxUVf8D/Ed3/59jWbjq+EAwyj64EFgPUFVfT/J4Fu5d0tI/d0fZTwe0JM8FLgfOqqqfTrueKZgFPtndHHgNsCHJnqq6drplTcf+fOQ3yi0jrgVOA0iyhoVTP/esapUra5R98J/A6QBJngU8Hti9qlVO33XAq7tf97wAeLCq7pt2UaslyVrgM8CrquoH065nGqrqGVW1rqrWsXCL+L9qNfRhPz7ir9FuGXEDcGaS7wKPAH9zIB3tjLgP3gF8JMlfs3B64zXV/cThQJHkahZO563pvsu4GDgEoKouY+G7jQ3A3cDDwGunU+nKGOHzv4uF26H/S3fEu6cOsJuWjbAPtIhX7kpSY/bnUz2SpDEY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNeb/AICD0SCRovKoAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.hist( pred/y_test )" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.17617657143740723" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.std(pred / y_test)" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3.331974907874248" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.std(pred - y_test)" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 0.54666729, 0.59044073, -1.45687285, 0.40456359, 0.65648541,\n", " 0.39547472, 0.22922286, 0.36025836, -2.13574704, 0.17980798])" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.model_selection import cross_val_score\n", "cross_val_score(regressor, boston.data, boston.target, cv=10)" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [], "source": [ "from sklearn.tree import DecisionTreeRegressor\n", "regressor = DecisionTreeRegressor( max_depth=5, min_impurity_decrease=0.01 ) #random_state=0" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DecisionTreeRegressor(criterion='mse', max_depth=5, max_features=None,\n", " max_leaf_nodes=None, min_impurity_decrease=0.01,\n", " min_impurity_split=None, min_samples_leaf=1,\n", " min_samples_split=2, min_weight_fraction_leaf=0.0,\n", " presort=False, random_state=None, splitter='best')" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "regressor.fit( boston.data, boston.target )" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split( boston.data, boston.target, test_size=0.15)" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DecisionTreeRegressor(criterion='mse', max_depth=5, max_features=None,\n", " max_leaf_nodes=None, min_impurity_decrease=0.01,\n", " min_impurity_split=None, min_samples_leaf=1,\n", " min_samples_split=2, min_weight_fraction_leaf=0.0,\n", " presort=False, random_state=None, splitter='best')" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "regressor.fit( X_train, np.log(y_train) )" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.6993837946164134" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "regressor.score( X_train, np.log(y_train) )" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.6283914392130323" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "regressor.score( X_test, np.log(y_test) )" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([2.42322041, 2.81949328, 2.81949328, 2.42322041, 3.5790192 ,\n", " 3.5790192 , 2.81949328, 2.81949328, 3.11438922, 2.42322041,\n", " 3.11438922, 3.5790192 , 3.11438922, 3.11438922, 2.81949328,\n", " 3.11438922, 2.42322041, 3.11438922, 3.5790192 , 3.5790192 ,\n", " 3.5790192 , 3.5790192 , 3.11438922, 3.11438922, 3.11438922,\n", " 3.11438922, 3.11438922, 3.11438922, 3.5790192 , 3.11438922,\n", " 3.11438922, 3.11438922, 3.11438922, 3.11438922, 2.81949328,\n", " 2.81949328, 3.11438922, 3.5790192 , 3.5790192 , 3.11438922,\n", " 3.11438922, 3.11438922, 2.42322041, 2.81949328, 3.11438922,\n", " 2.42322041, 3.11438922, 2.81949328, 3.11438922, 3.11438922,\n", " 3.5790192 , 3.11438922, 3.11438922, 2.81949328, 3.11438922,\n", " 3.11438922, 3.11438922, 3.11438922, 2.81949328, 2.81949328,\n", " 3.5790192 , 3.5790192 , 3.11438922, 3.11438922, 2.42322041,\n", " 3.5790192 , 3.11438922, 3.5790192 , 3.5790192 , 3.11438922,\n", " 2.42322041, 3.11438922, 3.11438922, 2.81949328, 2.81949328,\n", " 2.81949328])" ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pred = regressor.predict( X_test )\n", "pred" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([2.67414865, 2.63905733, 2.81540872, 2.2512918 , 3.91202301,\n", " 3.28091122, 2.74727091, 2.97041447, 3.17387846, 2.14006616,\n", " 2.4765384 , 3.91202301, 3.21084365, 3.34638915, 2.63188884,\n", " 3.08190997, 2.54160199, 3.41772668, 3.59731226, 3.09104245,\n", " 3.35689712, 3.56671182, 3.02529108, 3.19047635, 3.21084365,\n", " 3.06805294, 3.91202301, 3.12676054, 3.91202301, 3.32862669,\n", " 2.96527307, 3.314186 , 3.10009229, 3.16547505, 2.99573227,\n", " 2.96527307, 2.80336038, 3.58351894, 3.21887582, 3.13113691,\n", " 3.0155349 , 3.36729583, 2.00148 , 2.73436751, 3.32862669,\n", " 2.6461748 , 3.13983262, 3.06805294, 3.11794991, 3.02042489,\n", " 3.49953328, 3.0301337 , 3.09557761, 2.90690106, 3.06339092,\n", " 3.0056826 , 3.14415228, 3.16547505, 2.61006979, 2.62466859,\n", " 3.61899333, 3.63495111, 2.90142159, 3.20680324, 3.00071982,\n", " 3.39785848, 2.9601051 , 3.28466357, 3.8815638 , 3.10009229,\n", " 2.87919846, 3.08648664, 2.99071973, 3.07731226, 2.94443898,\n", " 2.74727091])" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.log(y_test)" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(array([ 1., 1., 1., 10., 13., 22., 17., 7., 3., 1.]),\n", " array([-0.79763378, -0.65408532, -0.51053686, -0.3669884 , -0.22343994,\n", " -0.07989148, 0.06365698, 0.20720544, 0.3507539 , 0.49430236,\n", " 0.63785082]),\n", " )" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAADVRJREFUeJzt3W2MXOdZxvH/3WxSBE3BwWvXlJil4KBGlXDEKipUJSlpUNpITSoVSESRK0UYESqBeJFWFAkEX9yipgK1ghoSxSBSQqFpLJzSuCaRoYpLNyTkzbROg0ndWLaj0NIIAU1782GOk9Vm13N25uyZnZv/T1rNmTPP7nPp7Pra42dmzkZmIkmafq+YdABJUjcsdEkqwkKXpCIsdEkqwkKXpCIsdEkqwkKXpCIsdEkqwkKXpCJm+pxs8+bNOTc31+eUkjT1HnzwwWczc3bYuF4LfW5ujsXFxT6nlKSpFxH/3macSy6SVISFLklFWOiSVISFLklFWOiSVISFLklFWOiSVISFLklFWOiSVESv7xSVNqq5hQMTm/v4nmsnNrdq8Qxdkoqw0CWpCAtdkoqw0CWpCAtdkoqw0CWpCAtdkoqw0CWpCAtdkoqw0CWpCAtdkoqw0CWpCAtdkoqw0CWpiKGFHhEXR8R9EXE0Ih6PiF9u9l8UEQcj4lhzu2n940qSVtPmDP0F4Ncy8/XAG4FfiohLgQXgUGbuAA419yVJEzK00DPzZGb+c7P9deAo8FrgOmBfM2wfcP16hZQkDbemNfSImAMuAz4HbM3MkzAofWBL1+EkSe21LvSIeBXwN8CvZOZ/ruHzdkfEYkQsnjlzZpSMkqQWWhV6RJzPoMz/IjM/0ew+FRHbmse3AadX+tzM3JuZ85k5Pzs720VmSdIK2rzKJYBbgaOZecuSh/YDu5rtXcDd3ceTJLU102LMm4CfAx6NiIebfb8J7AH+KiJuAp4Gfmp9IkqS2hha6Jn5j0Cs8vBV3caRJI3Kd4pKUhEWuiQVYaFLUhEWuiQVYaFLUhEWuiQVYaFLUhEWuiQVYaFLUhEWuiQVYaFLUhEWuiQVYaFLUhEWuiQVYaFLUhEWuiQVYaFLUhEWuiQVYaFLUhEWuiQVYaFLUhEWuiQVYaFLUhEzkw4g/X83t3BgIvMe33PtRObV+vEMXZKKsNAlqQgLXZKKsNAlqQgLXZKKsNAlqQgLXZKKsNAlqQgLXZKKsNAlqQgLXZKKsNAlqQgLXZKKsNAlqYihhR4Rt0XE6Yh4bMm+34mIr0TEw83H29c3piRpmDZn6LcD16yw/0OZubP5uKfbWJKktRpa6Jl5GHiuhyySpDGMs4b+3oh4pFmS2dRZIknSSEYt9D8CfgDYCZwEPrjawIjYHRGLEbF45syZEaeTJA0zUqFn5qnM/GZmfgv4E+Dyc4zdm5nzmTk/Ozs7ak5J0hAjFXpEbFty953AY6uNlST1Y2bYgIj4GHAlsDkiTgC/DVwZETuBBI4Dv7COGSVJLQwt9My8cYXdt65DFknSGHynqCQVYaFLUhEWuiQVYaFLUhEWuiQVYaFLUhEWuiQVYaFLUhEWuiQVYaFLUhEWuiQVYaFLUhEWuiQVMfRqi1Kf5hYOTDqCNLU8Q5ekIix0SSrCQpekIix0SSrCQpekIix0SSrCQpekIix0SSrCQpekIix0SSrCQpekIix0SSrCQpekIix0SSrCQpekIix0SSrCQpekIix0SSrCQpekIix0SSrCQpekIix0SSrCQpekIix0SSpiaKFHxG0RcToiHluy76KIOBgRx5rbTesbU5I0TJsz9NuBa5btWwAOZeYO4FBzX5I0QUMLPTMPA88t230dsK/Z3gdc33EuSdIajbqGvjUzTwI0t1u6iyRJGsXMek8QEbuB3QDbt29f7+nUgbmFA5OOIGkEo56hn4qIbQDN7enVBmbm3sycz8z52dnZEaeTJA0zaqHvB3Y127uAu7uJI0kaVZuXLX4MeAD4oYg4ERE3AXuAqyPiGHB1c1+SNEFD19Az88ZVHrqq4yySpDH4TlFJKsJCl6QiLHRJKsJCl6QiLHRJKsJCl6QiLHRJKsJCl6QiLHRJKsJCl6QiLHRJKsJCl6QiLHRJKsJCl6QiLHRJKsJCl6QiLHRJKsJCl6QiLHRJKsJCl6QiLHRJKsJCl6QiLHRJKmJm0gEkTcbcwoGJzX18z7UTm7syz9AlqQgLXZKKsNAlqQgLXZKKsNAlqQgLXZKKsNAlqQgLXZKKsNAlqQgLXZKKsNAlqQgLXZKKsNAlqQgLXZKKGOvyuRFxHPg68E3ghcyc7yKUJGnturge+lsy89kOvo4kaQwuuUhSEeMWegL3RsSDEbG7i0CSpNGMu+Typsx8JiK2AAcj4l8z8/DSAU3R7wbYvn37mNNJklYz1hl6Zj7T3J4G7gIuX2HM3sycz8z52dnZcaaTJJ3DyIUeEd8RERee3QZ+Enisq2CSpLUZZ8llK3BXRJz9Ondk5t91kkqStGYjF3pmPgX8cIdZJElj8GWLklSEhS5JRVjoklSEhS5JRVjoklSEhS5JRVjoklSEhS5JRVjoklSEhS5JRVjoklSEhS5JRVjoklSEhS5JRYz7J+gkac3mFg5MZN7je66dyLx98Qxdkoqw0CWpCAtdkoqw0CWpCAtdkoqw0CWpCAtdkoqw0CWpCAtdkoqw0CWpCAtdkoqw0CWpCAtdkoqw0CWpCAtdkoqw0CWpCAtdkoqYmr9YNKm/cCKpjkn2SB9/LckzdEkqwkKXpCIsdEkqwkKXpCIsdEkqYqxCj4hrIuILEfFkRCx0FUqStHYjF3pEnAd8BHgbcClwY0Rc2lUwSdLajHOGfjnwZGY+lZn/C/wlcF03sSRJazVOob8W+PKS+yeafZKkCRjnnaKxwr582aCI3cDu5u7zEfGFEefbDDw74uf2yZzdm5as05ITpifrtOSEIVnj/WN97e9rM2icQj8BXLzk/vcCzywflJl7gb1jzANARCxm5vy4X2e9mbN705J1WnLC9GSdlpywMbKOs+TyeWBHRHx/RFwA3ADs7yaWJGmtRj5Dz8wXIuK9wKeB84DbMvPxzpJJktZkrKstZuY9wD0dZRlm7GWbnpize9OSdVpywvRknZacsAGyRubLnseUJE0h3/ovSUVs2EKPiIsi4mBEHGtuN60y7gMR8XhEHI2IP4yIlV5OuRFybo+Ie5ucT0TEXJ85mwytsjZjXx0RX4mID/eZsZl7aM6I2BkRDzTf+0ci4md6zHfOS15ExCsj4s7m8c9N4nvd5BiW81ebn8VHIuJQRLR6adx6aHsZkYh4V0RkREzk1SRtckbETzfH9fGIuKPXgJm5IT+ADwALzfYC8P4VxvwY8FkGT8qeBzwAXLnRcjaP3Q9c3Wy/Cvj2jXhMl4z9A+AO4MMbMSdwCbCj2f4e4CTwXT1kOw/4EvA64ALgX4BLl425GfjjZvsG4M4JHMM2Od9y9ucQ+MVJ5GybtRl3IXAYOALMb8ScwA7gIWBTc39Lnxk37Bk6g8sI7Gu29wHXrzAmgW9jcHBfCZwPnOol3UuG5myucTOTmQcBMvP5zPyv/iK+qM0xJSJ+BNgK3NtTruWG5szML2bmsWb7GeA0MNtDtjaXvFia/6+Bq/r+nyMtcmbmfUt+Do8weC/JJLS9jMjvMfhl/999hluiTc6fBz6Smf8BkJmn+wy4kQt9a2aeBGhutywfkJkPAPcxODs7CXw6M4/2mrJFTgZnk1+NiE9ExEMR8fvNxc36NjRrRLwC+CDwGz1nW6rNMX1RRFzO4Jf6l3rI1uaSFy+OycwXgK8B391DthUzNIZdmuMm4FPrmmh1Q7NGxGXAxZn5t30GW6bNMb0EuCQiPhsRRyLimt7SMeE/Eh0RnwFes8JD72v5+T8IvJ6XziwORsSPZ+bhjiKenWesnAyO85uBy4CngTuB9wC3dpFvqQ6y3gzck5lfXs+Tyg5ynv0624A/B3Zl5re6yDZsyhX2LX+pWKvLYqyz1hki4t3APHDFuiZa3TmzNicZH2Lwb2aS2hzTGQbLLlcy6KV/iIg3ZOZX1znbi5NPTGa+dbXHIuJURGzLzJPNP9qV/uvyTuBIZj7ffM6ngDcyWGfbSDlPAA9l5lPN53yyydl5oXeQ9UeBN0fEzQzW+i+IiOczs9Pr3XeQk4h4NXAA+K3MPNJlvnNoc8mLs2NORMQM8J3Ac/3Ee1mGs1a8NEdEvJXBL9ErMvN/esq23LCsFwJvAO5vTjJeA+yPiHdk5mJvKdt/749k5jeAf4vBtat2MHhn/brbyEsu+4FdzfYu4O4VxjwNXBERMxFxPoMzjL6XXNrk/DywKSLOrvH+BPBED9mWG5o1M382M7dn5hzw68CfdV3mLQzN2Vxu4i4G+T7eY7Y2l7xYmv9dwN9n8wxZj4bmbJYxPgq8o++13mXOmTUzv5aZmzNzrvm5PMIgc59lPjRn45MMnmwmIjYzWIJ5qreEfT9T3PaDwZrjIeBYc3tRs38e+NN86VnnjzIo8SeAWzZizub+1cAjwKPA7cAFGzXrkvHvYTKvcmnzvX838A3g4SUfO3vK93bgiwzW7N/X7PtdBiUDgyfqPw48CfwT8Lq+j2HLnJ9h8CKCs8dv/yRytsm6bOz9TOBVLi2PaQC3NH30KHBDn/l8p6gkFbGRl1wkSWtgoUtSERa6JBVhoUtSERa6JBVhoUtSERa6JBVhoUtSEf8HvkMrPNGwf0gAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.hist(pred - np.log(y_test) )" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(array([ 2., 5., 14., 23., 16., 8., 5., 1., 1., 1.]),\n", " array([0.45039343, 0.59459503, 0.73879662, 0.88299822, 1.02719981,\n", " 1.17140141, 1.315603 , 1.4598046 , 1.60400619, 1.74820779,\n", " 1.89240938]),\n", " )" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAADMxJREFUeJzt3X+MpIVdx/H3p702/ii21FsIoZxbG6iQxtJ6Ilo1IGmlkAhN0Ija0oZ4Rm1TTWN66R+20X+uJrXGWG2uLQGNpWksLShIJdhKtdC4WAoHiCA98YRwh2hbNUaBr3/MYLawezO7MzvP3pf3K7nszuxzNx+423eee3ZnLlWFJOnY97yhB0iS5sOgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqYsciH2znzp21vLy8yIeUpGPe7bff/lhVLU06bqFBX15eZmVlZZEPKUnHvCT/NM1xXnKRpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJhb6TFEdG5b3Xj/YYx/cd+Fgjy0d6zxDl6QmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMTg57klCSfS3JvkruTvHN8/0uT3JTk/vHb47d+riRpPdOcoT8BvKuqTgfOBn45yRnAXuDmqjoVuHl8W5I0kIlBr6pHqurvxu9/A7gXOBm4CLhqfNhVwMVbNVKSNNmGrqEnWQZeA3wJOLGqHoFR9IET5j1OkjS9qYOe5EXAp4Bfqaqvb+Dn7UmykmTlyJEjm9koSZrCVEFP8gJGMf/jqrpmfPejSU4af/wk4PBaP7eq9lfV7qravbS0NI/NkqQ1TPNdLgE+BtxbVb+96kPXAZeN378MuHb+8yRJ09oxxTGvA94M3JXkjvF97wH2AZ9McjnwEPCTWzNRkjSNiUGvqr8Gss6Hz5vvHEnSZvlMUUlqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJamJHUMPkFZb3nv9II97cN+FgzyuNE+eoUtSEwZdkpow6JLUhEGXpCYmBj3JFUkOJzmw6r73JfmXJHeMf1ywtTMlSZNMc4Z+JXD+Gvd/sKrOHP+4Yb6zJEkbNTHoVXUL8PgCtkiSZjDLNfS3J7lzfEnm+LktkiRtymaD/gfAK4AzgUeAD6x3YJI9SVaSrBw5cmSTDydJmmRTQa+qR6vqyap6CvgIcNZRjt1fVburavfS0tJmd0qSJthU0JOctOrmm4AD6x0rSVqMia/lkuRq4BxgZ5JDwHuBc5KcCRRwEPiFLdwoSZrCxKBX1aVr3P2xLdgiSZqBzxSVpCYMuiQ1YdAlqQmDLklN+C8WbWND/es9ko5NnqFLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTUwMepIrkhxOcmDVfS9NclOS+8dvj9/amZKkSaY5Q78SOP8Z9+0Fbq6qU4Gbx7clSQOaGPSqugV4/Bl3XwRcNX7/KuDiOe+SJG3QZq+hn1hVjwCM354wv0mSpM3Y8i+KJtmTZCXJypEjR7b64STpOWuzQX80yUkA47eH1zuwqvZX1e6q2r20tLTJh5MkTbLZoF8HXDZ+/zLg2vnMkSRt1jTftng1cCvwyiSHklwO7ANen+R+4PXj25KkAe2YdEBVXbrOh86b8xZJ0gx8pqgkNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITE19tUXouWN57/WCPfXDfhYM9tnrxDF2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklN7JjlJyc5CHwDeBJ4oqp2z2OUJGnjZgr62LlV9dgcfh1J0gy85CJJTcwa9AL+IsntSfbMY5AkaXNmveTyuqp6OMkJwE1J/r6qbll9wDj0ewB27do148MNY3nv9UNPUGND/fk6uO/CQR5XW2emM/Sqenj89jDwaeCsNY7ZX1W7q2r30tLSLA8nSTqKTQc9ybcnOe7p94E3AAfmNUyStDGzXHI5Efh0kqd/nY9X1Y1zWSVJ2rBNB72qHgRePcctkqQZ+G2LktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJamLH0AOmtbz3+qEnSNK25hm6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmjpknFkmaL5+st1gH91245Y/hGbokNWHQJakJgy5JTRh0SWpipqAnOT/JfUkeSLJ3XqMkSRu36aAneT7wIeCNwBnApUnOmNcwSdLGzHKGfhbwQFU9WFX/A3wCuGg+syRJGzVL0E8G/nnV7UPj+yRJA5jliUVZ47561kHJHmDP+OZ/JLnvGYfsBB6bYceiuHO+3Dl/x8rW5+TOvH+mn/5d0xw0S9APAaesuv0y4OFnHlRV+4H96/0iSVaqavcMOxbCnfPlzvk7Vra6c+vMcsnlb4FTk7w8yQuBnwaum88sSdJGbfoMvaqeSPJ24LPA84ErquruuS2TJG3ITC/OVVU3ADfMuGHdyzHbjDvny53zd6xsdecWSdWzvo4pSToG+dR/SWpiIUGf9iUCklySpJIM8pXlaXYm+akk9yS5O8nHF71x1Y6jbk2yK8nnknw5yZ1JLhhg4xVJDic5sM7Hk+R3x/8NdyZ57aI3rtoyaevPjjfemeSLSV696I3jHUfdueq470/yZJJLFrXtGY8/cWeSc5LcMf5c+qtF7lu1YdLv+4uT/GmSr4x3vm3RGzekqrb0B6MvmP4j8N3AC4GvAGescdxxwC3AbcDurd61mZ3AqcCXgePHt09Y9M4NbN0P/OL4/TOAgwPs/FHgtcCBdT5+AfDnjJ7TcDbwpSH+f0659YdW/b6/caitk3au+vPxl4y+vnXJdtwJvAS4B9g1vj3U59Kkne8B3j9+fwl4HHjhEFun+bGIM/RpXyLgN4HfAv57AZvWMs3Onwc+VFX/BlBVhxe88WnTbC3gO8bvv5g1niOw1arqFkafAOu5CPjDGrkNeEmSkxaz7ptN2lpVX3z6953RScfLFjLs2Tsm/T8FeAfwKWCoP5/T7PwZ4Jqqemh8/CBbp9hZwHFJArxofOwTi9i2GYsI+sSXCEjyGuCUqvqzBexZzzQvZXAacFqSv0lyW5LzF7bum02z9X3AzyU5xOhM7R2LmbYhx+rLR1zO6G8W206Sk4E3AR8eessEpwHHJ/l8ktuTvGXoQev4PeB0RidEdwHvrKqnhp20vkX8m6JHfYmAJM8DPgi8dQFbjmaalzLYweiyyzmMztC+kORVVfXvW7ztmabZeilwZVV9IMkPAn803rqd/jBO9fIR20mScxkF/YeH3rKO3wHeXVVPjk4qt60dwPcB5wHfCtya5Laq+odhZz3LjwN3AD8GvAK4KckXqurrw85a2yLO0Ce9RMBxwKuAzyc5yOha6nUDfGF0mpcyOARcW1X/W1VfBe5jFPhFm2br5cAnAarqVuBbGL02xXYy1ctHbBdJvhf4KHBRVf3r0HvWsRv4xPhz6RLg95NcPOykNR0Cbqyq/6yqxxh9/WyQLzRP8DZGl4aqqh4Avgp8z8Cb1rWIoB/1JQKq6mtVtbOqlqtqmdH1yZ+oqpUFbJt659hngHMBkuxk9NfGBxe6cmSarQ8xOvshyemMgn5koSsnuw54y/i7Xc4GvlZVjww9ai1JdgHXAG/ehmeR/6+qXr7qc+lPgF+qqs8MPGst1wI/kmRHkm8DfgC4d+BNa1n9eXQi8EqG+ZyfypZfcql1XiIgyW8AK1W1LV7/ZcqdnwXekOQe4Eng14Y4U5ty67uAjyT5VUaXMd5a4y/VL0qSqxldnto5vpb/XuAF4/+GDzO6tn8B8ADwX4zOhgYxxdZfB76T0RkvwBM1wAs3TbFzW5i0s6ruTXIjcCfwFPDRqjrqt2IOsZPRN2tcmeQuRpcI3z3+G8W25DNFJakJnykqSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJamJ/wMJcthuu0Uc1QAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.hist(np.exp(pred - np.log(y_test)))" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [], "source": [ "#from sklearn.datasets import fetch_california_housing\n", "#housing = fetch_california_housing()" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [], "source": [ "from sklearn.neighbors import KNeighborsClassifier" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [], "source": [ "neigh = KNeighborsClassifier(n_neighbors=3)" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split( iris.data, iris.target, test_size=0.15)" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n", " metric_params=None, n_jobs=1, n_neighbors=3, p=2,\n", " weights='uniform')" ] }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ "neigh.fit( X_train, y_train )" ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.9763779527559056" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "neigh.score( X_train, y_train )" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.9565217391304348" ] }, "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ "neigh.score( X_test, y_test )" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1. , 0.93333333, 1. , 0.93333333, 0.86666667,\n", " 1. , 0.93333333, 1. , 1. , 1. ])" ] }, "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.model_selection import cross_val_score\n", "cross_val_score( neigh, iris.data, iris.target, cv=10)" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.96\n", "0.9533333333333334\n", "0.9666666666666666\n", "0.9666666666666666\n" ] } ], "source": [ "for n in range(1,5):\n", " neigh = KNeighborsClassifier(n_neighbors=n)\n", " print(np.mean(cross_val_score( neigh, iris.data, iris.target, cv=10)))" ] }, { "cell_type": "code", "execution_count": 76, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1. , 0.93333333, 1. , 1. , 1. ,\n", " 0.86666667, 0.93333333, 0.93333333, 1. , 1. ])" ] }, "execution_count": 76, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.neighbors import RadiusNeighborsClassifier\n", "rad = RadiusNeighborsClassifier( radius = 0.75 )\n", "cross_val_score( rad, iris.data, iris.target, cv=10)" ] }, { "cell_type": "code", "execution_count": 77, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.75 0.9666666666666668\n", "0.9 0.9533333333333334\n", "1.05 0.9533333333333334\n", "1.2000000000000002 0.9400000000000001\n", "1.35 0.9200000000000002\n", "1.5 0.8866666666666667\n", "1.6500000000000001 0.8933333333333333\n", "1.8000000000000003 0.8866666666666667\n", "1.9500000000000002 0.8933333333333333\n", "2.1 0.9\n", "2.25 0.8800000000000001\n", "2.4000000000000004 0.8666666666666668\n", "2.5500000000000003 0.8800000000000001\n", "2.7 0.8933333333333333\n", "2.8500000000000005 0.8733333333333333\n", "3.0000000000000004 0.82\n", "3.1500000000000004 0.7866666666666666\n", "3.3000000000000003 0.7466666666666666\n", "3.45 0.7\n", "3.6000000000000005 0.6333333333333332\n", "3.7500000000000004 0.5800000000000001\n", "3.9000000000000004 0.56\n", "4.050000000000001 0.52\n", "4.200000000000001 0.47999999999999987\n", "4.3500000000000005 0.43999999999999995\n", "4.5 0.4133333333333334\n", "4.65 0.38000000000000006\n", "4.800000000000001 0.36000000000000004\n", "4.950000000000001 0.33333333333333337\n" ] } ], "source": [ "for r in np.arange(0.75, 5, 0.15):\n", " rad = RadiusNeighborsClassifier( radius = r )\n", " print(r, np.mean(cross_val_score( rad, iris.data, iris.target, cv=10)))" ] }, { "cell_type": "code", "execution_count": 78, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "X = iris.data[:, [0, 2]]\n", "y = iris.target\n", "\n", "x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\n", "y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\n", "xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),\n", " np.arange(y_min, y_max, 0.1))\n", "\n", "f, axarr = plt.subplots(2, 2, sharex='col', sharey='row', figsize=(10, 8))\n", "\n", "for idx, clf, tt in zip(product([0, 1], [0, 1]),\n", " [KNeighborsClassifier(n_neighbors=1), KNeighborsClassifier(n_neighbors=3),\n", " RadiusNeighborsClassifier( radius = 4. ), DecisionTreeClassifier(max_depth=4, min_impurity_decrease=0.1)],\n", " ['KNeighborsClassifier, 1', 'KNeighborsClassifier, 3',\n", " 'RadiusNeighborsClassifier( radius = 4. ) 4', 'Decision Tree']):\n", "\n", " clf.fit( X, y )\n", " Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n", " Z = Z.reshape(xx.shape)\n", "\n", " axarr[idx[0], idx[1]].contourf(xx, yy, Z, alpha=0.4)\n", " axarr[idx[0], idx[1]].scatter(X[:, 0], X[:, 1], c=y,\n", " s=20, edgecolor='k')\n", " axarr[idx[0], idx[1]].set_title(tt)\n", "\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 79, "metadata": {}, "outputs": [], "source": [ "from sklearn.tree import DecisionTreeRegressor\n", "regressor = DecisionTreeRegressor( max_depth=5, min_impurity_decrease=0.01 ) #random_state=0" ] }, { "cell_type": "code", "execution_count": 80, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DecisionTreeRegressor(criterion='mse', max_depth=5, max_features=None,\n", " max_leaf_nodes=None, min_impurity_decrease=0.01,\n", " min_impurity_split=None, min_samples_leaf=1,\n", " min_samples_split=2, min_weight_fraction_leaf=0.0,\n", " presort=False, random_state=None, splitter='best')" ] }, "execution_count": 80, "metadata": {}, "output_type": "execute_result" } ], "source": [ "regressor.fit( boston.data, boston.target )" ] }, { "cell_type": "code", "execution_count": 81, "metadata": {}, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split( boston.data, boston.target, test_size=0.15)" ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DecisionTreeRegressor(criterion='mse', max_depth=5, max_features=None,\n", " max_leaf_nodes=None, min_impurity_decrease=0.01,\n", " min_impurity_split=None, min_samples_leaf=1,\n", " min_samples_split=2, min_weight_fraction_leaf=0.0,\n", " presort=False, random_state=None, splitter='best')" ] }, "execution_count": 82, "metadata": {}, "output_type": "execute_result" } ], "source": [ "regressor.fit( X_train, np.log(y_train) )" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.7038988391585246" ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ "regressor.score( X_train, np.log(y_train) )" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.5879761258737521" ] }, "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ "regressor.score( X_test, np.log(y_test) )" ] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([3.53986065, 3.53986065, 3.08085185, 3.08085185, 3.08085185,\n", " 3.53986065, 3.08085185, 2.29529697, 2.75531767, 3.08085185,\n", " 3.08085185, 3.53986065, 3.08085185, 3.53986065, 3.08085185,\n", " 3.53986065, 3.08085185, 2.75531767, 3.53986065, 2.75531767,\n", " 2.75531767, 3.53986065, 3.08085185, 2.29529697, 3.08085185,\n", " 3.53986065, 2.29529697, 2.29529697, 3.08085185, 3.08085185,\n", " 3.53986065, 3.08085185, 3.08085185, 3.53986065, 3.08085185,\n", " 2.75531767, 3.08085185, 2.75531767, 3.08085185, 2.75531767,\n", " 3.08085185, 3.08085185, 3.08085185, 2.29529697, 3.08085185,\n", " 3.08085185, 3.08085185, 3.53986065, 3.53986065, 3.53986065,\n", " 3.08085185, 3.08085185, 3.53986065, 3.08085185, 3.08085185,\n", " 3.08085185, 3.53986065, 2.29529697, 3.08085185, 2.29529697,\n", " 3.08085185, 3.08085185, 2.75531767, 3.08085185, 3.08085185,\n", " 3.08085185, 3.53986065, 3.53986065, 2.75531767, 2.75531767,\n", " 2.75531767, 3.08085185, 3.08085185, 2.29529697, 3.53986065,\n", " 2.29529697])" ] }, "execution_count": 85, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pred = regressor.predict( X_test )\n", "pred" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([3.5085559 , 3.77963382, 3.03974916, 2.77881927, 2.93916192,\n", " 3.55820113, 3.18221184, 3.314186 , 2.49320545, 3.21084365,\n", " 3.42751469, 3.33220451, 3.06339092, 3.5055574 , 3.15273602,\n", " 3.5085559 , 3.58905912, 2.9601051 , 3.49650756, 2.66722821,\n", " 2.87919846, 3.56671182, 3.02529108, 2.84490938, 3.09104245,\n", " 3.63495111, 2.59525471, 2.00148 , 3.01062089, 3.10009229,\n", " 3.41114771, 3.35689712, 3.05400118, 3.56104608, 2.83907846,\n", " 2.93385687, 3.11351531, 2.44234704, 2.96527307, 2.8507065 ,\n", " 2.77881927, 3.33576958, 3.32862669, 2.81540872, 3.16547505,\n", " 3.00071982, 3.13549422, 3.8286414 , 3.44041809, 3.54673969,\n", " 2.94443898, 2.97552957, 3.34990409, 2.93916192, 3.13983262,\n", " 3.39450839, 3.26575941, 2.73436751, 3.13983262, 2.57261223,\n", " 3.19458313, 3.12676054, 2.78501124, 3.44998755, 3.40452517,\n", " 2.8094027 , 3.48737508, 3.40452517, 2.87919846, 2.46809953,\n", " 2.72129543, 3.16968558, 3.28091122, 2.2617631 , 3.314186 ,\n", " 2.01490302])" ] }, "execution_count": 86, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.log(y_test)" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(array([ 1., 0., 0., 3., 2., 10., 11., 23., 13., 13.]),\n", " array([-1.01888904, -0.88570307, -0.7525171 , -0.61933114, -0.48614517,\n", " -0.3529592 , -0.21977323, -0.08658727, 0.0465987 , 0.17978467,\n", " 0.31297064]),\n", " )" ] }, "execution_count": 87, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAADJRJREFUeJzt3X+MpIVdx/H3pxzUxFqFsuBZe65aNKCpEFdSJZUqxWBJhBobJVavCcmZ1CYafyQXa6LRf7BNW2NqjKcQrkZrbS1CAhWuJ5GYQO0hhELP9pAgvXLhjuCPkiZVytc/9rl2PXZ3ZndmZ3a/fb+Szcw8++w839vsvPe5Z+aZTVUhSdr5XjbvASRJ02HQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1sWuWGzv//PNrcXFxlpuUpB3vwQcffLaqFkatN9OgLy4ucuTIkVluUpJ2vCT/Ps56HnKRpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJmZ6pqik7WNx/51z2/aTN107t2135h66JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmhgZ9CSvSXJvkqNJHkvyK8Py85IcSnJsuDx368eVJK1lnD30F4Bfr6qLgdcDv5zkEmA/cLiqLgIOD7clSXMyMuhVdaKq/mW4/kXgKPBq4Drg4LDaQeD6rRpSkjTaho6hJ1kELgM+CVxYVSdgOfrABdMeTpI0vrGDnuQVwN8Cv1pV/72Br9uX5EiSI6dOndrMjJKkMYwV9CRnsxzzv6yqjw2Ln0mye/j8buDkal9bVQeqaqmqlhYWFqYxsyRpFeO8yiXAzcDRqnrfik/dAewdru8Fbp/+eJKkce0aY50rgF8APp3k4WHZbwE3AX+T5EbgKeCtWzOiJGkcI4NeVf8EZI1PXzXdcSRJm+WZopLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpiZFBT3JLkpNJHl2x7HeTfCHJw8PHm7d2TEnSKOPsod8KXLPK8vdX1aXDx13THUuStFEjg15V9wHPzWAWSdIEJjmG/s4kjwyHZM6d2kSSpE3ZbND/BPhu4FLgBPDetVZMsi/JkSRHTp06tcnNSZJG2VTQq+qZqvpKVb0I/Blw+TrrHqiqpapaWlhY2OyckqQRNhX0JLtX3HwL8Oha60qSZmPXqBWSfAh4I3B+kuPA7wBvTHIpUMCTwC9t4YySpDGMDHpV3bDK4pu3YBZJ0gQ8U1SSmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJamJke+2KEnTtrj/znmPMHNP3nTtlm/DPXRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTRh0SWrCv1gkzdnX41/v0dZwD12SmjDoktSEQZekJgy6JDUxMuhJbklyMsmjK5adl+RQkmPD5blbO6YkaZRx9tBvBa45Y9l+4HBVXQQcHm5LkuZoZNCr6j7guTMWXwccHK4fBK6f8lySpA3a7DH0C6vqBMBwecH0RpIkbcaWn1iUZB+wD2DPnj1bvTlpUzy5Rx1sdg/9mSS7AYbLk2utWFUHqmqpqpYWFhY2uTlJ0iibDfodwN7h+l7g9umMI0narHFetvgh4H7ge5McT3IjcBNwdZJjwNXDbUnSHI08hl5VN6zxqaumPIskaQKeKSpJTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2Smtg1yRcneRL4IvAV4IWqWprGUJKkjZso6IMfq6pnp3A/kqQJeMhFkpqYNOgF3JPkwST7pjGQJGlzJj3kckVVPZ3kAuBQkn+tqvtWrjCEfh/Anj17JtycJGktE+2hV9XTw+VJ4Dbg8lXWOVBVS1W1tLCwMMnmJEnr2HTQk3xjkm86fR34CeDRaQ0mSdqYSQ65XAjcluT0/fxVVf39VKaSJG3YpoNeVU8APzDFWSRJE/Bli5LUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU1M449Eq5nF/XfObdtP3nTt3LYt7XTuoUtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasITi7StzPOkJmmncw9dkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITO+bEIv+KjiStzz10SWrCoEtSEwZdkpow6JLUxERBT3JNks8meTzJ/mkNJUnauE0HPclZwB8DPwlcAtyQ5JJpDSZJ2phJ9tAvBx6vqieq6n+Avwaum85YkqSNmiTorwY+v+L28WGZJGkOJjmxKKssq5eslOwD9g03n0/y2Qm2udL5wLNTuq915Q+menczm3vKnHt2duLM4NzrmrAj3zHOSpME/TjwmhW3vx14+syVquoAcGCC7awqyZGqWpr2/W41556tnTj3TpwZnHs7mOSQy6eAi5J8Z5JzgJ8D7pjOWJKkjdr0HnpVvZDkncDdwFnALVX12NQmkyRtyERvzlVVdwF3TWmWjZr6YZwZce7Z2olz78SZwbnnLlUveR5TkrQDeeq/JDWxY4Ke5K1JHkvyYpI1n5Hebm9HkOS8JIeSHBsuz11jvXcP/76jSf4oyWovC52ZDcy9J8k9w9yfSbI420lfMs9Ycw/rvjLJF5J8YJYzrjLHyJmTXJrk/uFn5JEkPzuPWYdZ1n2MJXl5kg8Pn//kvH8mhplGzfxrw8/vI0kOJxnrZYLbzY4JOvAo8NPAfWutsE3fjmA/cLiqLgIOD7f/nyQ/AlwBvA74fuCHgCtnOeQqRs49+CDwnqq6mOWzh0/OaL61jDs3wO8D/ziTqdY3zsxfAn6xqr4PuAb4wyTfMsMZgbEfYzcC/1FVrwXeD0z3TI4NGnPmh4Clqnod8FHg3bOdcjp2TNCr6mhVjTopaTu+HcF1wMHh+kHg+lXWKeAbgHOAlwNnA8/MZLq1jZx7eFDsqqpDAFX1fFV9aXYjrmqc7zdJfhC4ELhnRnOtZ+TMVfW5qjo2XH+a5V+cCzOb8GvGeYyt/Pd8FLhqzv/jHDlzVd274mf3AZbPq9lxdkzQx7Qd347gwqo6ATBcXnDmClV1P3AvcGL4uLuqjs50ypcaOTfwPcB/JvlYkoeSvGfYG5qnkXMneRnwXuA3ZzzbWsb5Xn9VkstZ/uX/bzOY7UzjPMa+uk5VvQD8F/CqmUy3uo124Ubg41s60RbZVn9TNMkngG9d5VPvqqrbx7mLVZZt+ct41pt7zK9/LXAxX9srOJTkR6tqzcNL0zDp3Cz//LwBuAx4Cvgw8Hbg5mnMt5YpzP0O4K6q+vysdhynMPPp+9kN/AWwt6penMZsGzTOY2wuj8N1jD1PkrcBS8z/kOembKugV9WbJryLsd6OYNrWmzvJM0l2V9WJ4cG42jHmtwAPVNXzw9d8HHg96zxfMA1TmPs48FBVPTF8zd+xPPeWBn0Kc/8w8IYk7wBeAZyT5Pmq2rIn0acwM0leCdwJ/HZVPbBFo44yzmPs9DrHk+wCvhl4bjbjrWqsLiR5E8u/YK+sqi/PaLap6nbIZTu+HcEdwN7h+l5gtf9pPAVcmWRXkrNZ3juY9yGXceb+FHBuktPHcn8c+MwMZlvPyLmr6uerak9VLQK/AXxwK2M+hpEzDz/Pt7E860dmONuZxnmMrfz3/AzwDzXfE15GzpzkMuBPgZ+qqnk/sb95VbUjPljeiz0OfJnlJwzvHpZ/G8v/fT693puBz7F8fPFd22DuV7H8yoVjw+V5w/Il4M+H62ex/MN0lOUgvm8nzD3cvhp4BPg0cCtwzk6Ye8X6bwc+sN1nBt4G/C/w8IqPS+c070seY8DvsRxDWH6C/yPA48A/A981z+/vmDN/YujK6e/tHfOeeTMfnikqSU10O+QiSV+3DLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUxP8BDmaWt2S9cfsAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.hist(pred - np.log(y_test))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }