Заметка 4. Деревья решения и др. курса Введение в машинное обучение. Шокуров Антон В. shokurov.anton.v@yandex.ru http://машинноезрение.рф Версия 0.11

Анотация

Деревья решений. Ближайшего соседа. Используем библиотеку scikit-learn

Это предварительная версия! Любые замечания приветствуются.

import numpy as np
import matplotlib.pyplot as plt

from itertools import product

import graphviz
from sklearn import tree

from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier
%matplotlib inline

iris = datasets.load_iris()
X = iris.data[:, [0, 2]]
y = iris.target

iris.keys()

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names'])

import pandas as pd

df = pd.DataFrame( iris.data )
df.columns = iris.feature_names
df

clf1 = DecisionTreeClassifier(max_depth=1)
clf2 = DecisionTreeClassifier(max_depth=2)
clf3 = DecisionTreeClassifier(max_depth=3)
clf4 = DecisionTreeClassifier(max_depth=5)

clf1.fit(X, y)
clf2.fit(X, y)
clf3.fit(X, y)
clf4.fit(X, y)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=5,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
                     np.arange(y_min, y_max, 0.1))

f, axarr = plt.subplots(2, 2, sharex='col', sharey='row', figsize=(10, 8))

for idx, clf, tt in zip(product([0, 1], [0, 1]),
                        [clf1, clf2, clf3, clf4],
                        ['Decision Tree (depth=1)', 'Decision Tree (depth=2)',
                         'Decision Tree (depth=3)', 'Decision Tree (depth=5)']):

    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    axarr[idx[0], idx[1]].contourf(xx, yy, Z, alpha=0.4)
    axarr[idx[0], idx[1]].scatter(X[:, 0], X[:, 1], c=y,
                                  s=20, edgecolor='k')
    axarr[idx[0], idx[1]].set_title(tt)

plt.show()

dot_data = tree.export_graphviz(clf1, out_file=None) 
graph = graphviz.Source(dot_data) 
#graph.render("iris")

dot_data = tree.export_graphviz(clf1, out_file=None, 
                         feature_names=[iris.feature_names[0],iris.feature_names[2]],
                         class_names=iris.target_names,  
                         filled=True, rounded=True,  
                         special_characters=True)
graph = graphviz.Source(dot_data)
graph

dot_data = tree.export_graphviz(clf2, out_file=None, 
                         feature_names=[iris.feature_names[0],iris.feature_names[2]],
                         class_names=iris.target_names,  
                         filled=True, rounded=True,  
                         special_characters=True)
graph = graphviz.Source(dot_data)
graph

dot_data = tree.export_graphviz(clf3, out_file=None, 
                         feature_names=[iris.feature_names[0],iris.feature_names[2]],
                         class_names=iris.target_names,  
                         filled=True, rounded=True,  
                         special_characters=True)
graph = graphviz.Source(dot_data)
graph

dot_data = tree.export_graphviz(clf4, out_file=None, 
                         feature_names=[iris.feature_names[0],iris.feature_names[2]],
                         class_names=iris.target_names,  
                         filled=True, rounded=True,  
                         special_characters=True)
graph = graphviz.Source(dot_data)
graph

clf = DecisionTreeClassifier(max_depth=3)
clf.fit(iris.data, iris.target)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=3,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

dot_data = tree.export_graphviz(clf, out_file=None, 
                         feature_names=iris.feature_names,
                         class_names=iris.target_names,  
                         filled=True, rounded=True,  
                         special_characters=True)
graph = graphviz.Source(dot_data)
graph

clf = DecisionTreeClassifier(max_depth=4)
clf.fit(iris.data, iris.target)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

dot_data = tree.export_graphviz(clf, out_file=None, 
                         feature_names=iris.feature_names,
                         class_names=iris.target_names,  
                         filled=True, rounded=True,  
                         special_characters=True)
graph = graphviz.Source(dot_data)
graph

clf = DecisionTreeClassifier(max_depth=4, min_impurity_decrease=0.15)
clf.fit(iris.data, iris.target)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.15, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

dot_data = tree.export_graphviz(clf, out_file=None, 
                         feature_names=iris.feature_names,
                         class_names=iris.target_names,  
                         filled=True, rounded=True,  
                         special_characters=True)
graph = graphviz.Source(dot_data)
graph

n_nodes = clf.tree_.node_count
children_left = clf.tree_.children_left
children_right = clf.tree_.children_right
feature = clf.tree_.feature
feature_names = iris.feature_names#clf.tree_.feature
threshold = clf.tree_.threshold


# The tree structure can be traversed to compute various properties such
# as the depth of each node and whether or not it is a leaf.
node_depth = np.zeros(shape=n_nodes, dtype=np.int64)
is_leaves = np.zeros(shape=n_nodes, dtype=bool)
stack = [(0, -1)]  # seed is the root node id and its parent depth
while len(stack) > 0:
    node_id, parent_depth = stack.pop()
    node_depth[node_id] = parent_depth + 1

    # If we have a test node
    if (children_left[node_id] != children_right[node_id]):
        stack.append((children_left[node_id], parent_depth + 1))
        stack.append((children_right[node_id], parent_depth + 1))
    else:
        is_leaves[node_id] = True

print("Стрктура бинарного дерева имеет %s узлов и следущий вид:"
      % n_nodes)
for i in range(n_nodes):
    if is_leaves[i]:
        print("%sузел=%s конечный." % (node_depth[i] * "\t", i))
    else:
        print("%sузел=%s проверка: перейти к узлу %s если %s <= %s иначе к узлу %s."
              % (node_depth[i] * "\t",
                 i,
                 children_left[i],
                 feature_names[i],
                 threshold[i],
                 children_right[i],
                 ))
print()

Стрктура бинарного дерева имеет 5 узлов и следущий вид:
узел=0 проверка: перейти к узлу 1 если sepal length (cm) <= 0.800000011920929 иначе к узлу 2.
	узел=1 конечный.
	узел=2 проверка: перейти к узлу 3 если petal length (cm) <= 1.75 иначе к узлу 4.
		узел=3 конечный.
		узел=4 конечный.

sample = 33

node_indicator = clf.decision_path( [iris.data[ sample ]] )
node_indicator

<1x5 sparse matrix of type '<class 'numpy.int64'>'
	with 2 stored elements in Compressed Sparse Row format>

leave_id = clf.apply( [iris.data[ sample ]] )
leave_id[0]

1

class_id = clf.predict( [iris.data[ sample ]] )
class_id, iris.target_names[class_id]

(array([0]), array(['setosa'], dtype='<U10'))

# Now, it's possible to get the tests that were used to predict a sample or
# a group of samples. First, let's make it for the sample.

sample_id = 0
node_index = node_indicator.indices[node_indicator.indptr[0]:
                                    node_indicator.indptr[0 + 1]]

print('Используемые правила для предсказания %s: ' % sample)
for node_id in node_index:
    if leave_id[sample_id] != node_id:
        continue

    if (iris.data[sample, feature[node_id]] <= threshold[node_id]):
        threshold_sign = "<="
    else:
        threshold_sign = ">"

    print("узел дерева решений %s : ( фича %s, (= %s) %s %s)"
          % (node_id,
             feature[node_id],
             iris.data[sample, feature[node_id]],
             threshold_sign,
             threshold[node_id]))

Используемые правила для предсказания 33: 
узел дерева решений 1 : ( фича -2, (= 1.4) > -2.0)

clf.score(iris.data, iris.target)

0.96

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( iris.data, iris.target, test_size=0.15)

X_train.shape[0]/iris.data.shape[0], X_test.shape[0]/iris.data.shape[0]

(0.8466666666666667, 0.15333333333333332)

clf = DecisionTreeClassifier(max_depth=4, min_impurity_decrease=0.15)
clf.fit( X_train, y_train )

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.15, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

clf.score( X_test, y_test )

0.8695652173913043

clf = DecisionTreeClassifier(max_depth=5, min_impurity_decrease=0.01 )
clf.fit( X_train, y_train )
clf.score( X_test, y_test )

0.8695652173913043

Лассо регрессия

from sklearn.datasets import load_boston
boston = load_boston()
print(boston.data.shape)

(506, 13)

boston.keys()

dict_keys(['data', 'target', 'feature_names', 'DESCR'])

import pandas as pd
df = pd.DataFrame(boston['data'])
df

df.columns = boston['feature_names']
df

boston['target']

array([24. , 21.6, 34.7, 33.4, 36.2, 28.7, 22.9, 27.1, 16.5, 18.9, 15. ,
       18.9, 21.7, 20.4, 18.2, 19.9, 23.1, 17.5, 20.2, 18.2, 13.6, 19.6,
       15.2, 14.5, 15.6, 13.9, 16.6, 14.8, 18.4, 21. , 12.7, 14.5, 13.2,
       13.1, 13.5, 18.9, 20. , 21. , 24.7, 30.8, 34.9, 26.6, 25.3, 24.7,
       21.2, 19.3, 20. , 16.6, 14.4, 19.4, 19.7, 20.5, 25. , 23.4, 18.9,
       35.4, 24.7, 31.6, 23.3, 19.6, 18.7, 16. , 22.2, 25. , 33. , 23.5,
       19.4, 22. , 17.4, 20.9, 24.2, 21.7, 22.8, 23.4, 24.1, 21.4, 20. ,
       20.8, 21.2, 20.3, 28. , 23.9, 24.8, 22.9, 23.9, 26.6, 22.5, 22.2,
       23.6, 28.7, 22.6, 22. , 22.9, 25. , 20.6, 28.4, 21.4, 38.7, 43.8,
       33.2, 27.5, 26.5, 18.6, 19.3, 20.1, 19.5, 19.5, 20.4, 19.8, 19.4,
       21.7, 22.8, 18.8, 18.7, 18.5, 18.3, 21.2, 19.2, 20.4, 19.3, 22. ,
       20.3, 20.5, 17.3, 18.8, 21.4, 15.7, 16.2, 18. , 14.3, 19.2, 19.6,
       23. , 18.4, 15.6, 18.1, 17.4, 17.1, 13.3, 17.8, 14. , 14.4, 13.4,
       15.6, 11.8, 13.8, 15.6, 14.6, 17.8, 15.4, 21.5, 19.6, 15.3, 19.4,
       17. , 15.6, 13.1, 41.3, 24.3, 23.3, 27. , 50. , 50. , 50. , 22.7,
       25. , 50. , 23.8, 23.8, 22.3, 17.4, 19.1, 23.1, 23.6, 22.6, 29.4,
       23.2, 24.6, 29.9, 37.2, 39.8, 36.2, 37.9, 32.5, 26.4, 29.6, 50. ,
       32. , 29.8, 34.9, 37. , 30.5, 36.4, 31.1, 29.1, 50. , 33.3, 30.3,
       34.6, 34.9, 32.9, 24.1, 42.3, 48.5, 50. , 22.6, 24.4, 22.5, 24.4,
       20. , 21.7, 19.3, 22.4, 28.1, 23.7, 25. , 23.3, 28.7, 21.5, 23. ,
       26.7, 21.7, 27.5, 30.1, 44.8, 50. , 37.6, 31.6, 46.7, 31.5, 24.3,
       31.7, 41.7, 48.3, 29. , 24. , 25.1, 31.5, 23.7, 23.3, 22. , 20.1,
       22.2, 23.7, 17.6, 18.5, 24.3, 20.5, 24.5, 26.2, 24.4, 24.8, 29.6,
       42.8, 21.9, 20.9, 44. , 50. , 36. , 30.1, 33.8, 43.1, 48.8, 31. ,
       36.5, 22.8, 30.7, 50. , 43.5, 20.7, 21.1, 25.2, 24.4, 35.2, 32.4,
       32. , 33.2, 33.1, 29.1, 35.1, 45.4, 35.4, 46. , 50. , 32.2, 22. ,
       20.1, 23.2, 22.3, 24.8, 28.5, 37.3, 27.9, 23.9, 21.7, 28.6, 27.1,
       20.3, 22.5, 29. , 24.8, 22. , 26.4, 33.1, 36.1, 28.4, 33.4, 28.2,
       22.8, 20.3, 16.1, 22.1, 19.4, 21.6, 23.8, 16.2, 17.8, 19.8, 23.1,
       21. , 23.8, 23.1, 20.4, 18.5, 25. , 24.6, 23. , 22.2, 19.3, 22.6,
       19.8, 17.1, 19.4, 22.2, 20.7, 21.1, 19.5, 18.5, 20.6, 19. , 18.7,
       32.7, 16.5, 23.9, 31.2, 17.5, 17.2, 23.1, 24.5, 26.6, 22.9, 24.1,
       18.6, 30.1, 18.2, 20.6, 17.8, 21.7, 22.7, 22.6, 25. , 19.9, 20.8,
       16.8, 21.9, 27.5, 21.9, 23.1, 50. , 50. , 50. , 50. , 50. , 13.8,
       13.8, 15. , 13.9, 13.3, 13.1, 10.2, 10.4, 10.9, 11.3, 12.3,  8.8,
        7.2, 10.5,  7.4, 10.2, 11.5, 15.1, 23.2,  9.7, 13.8, 12.7, 13.1,
       12.5,  8.5,  5. ,  6.3,  5.6,  7.2, 12.1,  8.3,  8.5,  5. , 11.9,
       27.9, 17.2, 27.5, 15. , 17.2, 17.9, 16.3,  7. ,  7.2,  7.5, 10.4,
        8.8,  8.4, 16.7, 14.2, 20.8, 13.4, 11.7,  8.3, 10.2, 10.9, 11. ,
        9.5, 14.5, 14.1, 16.1, 14.3, 11.7, 13.4,  9.6,  8.7,  8.4, 12.8,
       10.5, 17.1, 18.4, 15.4, 10.8, 11.8, 14.9, 12.6, 14.1, 13. , 13.4,
       15.2, 16.1, 17.8, 14.9, 14.1, 12.7, 13.5, 14.9, 20. , 16.4, 17.7,
       19.5, 20.2, 21.4, 19.9, 19. , 19.1, 19.1, 20.1, 19.9, 19.6, 23.2,
       29.8, 13.8, 13.3, 16.7, 12. , 14.6, 21.4, 23. , 23.7, 25. , 21.8,
       20.6, 21.2, 19.1, 20.6, 15.2,  7. ,  8.1, 13.6, 20.1, 21.8, 24.5,
       23.1, 19.7, 18.3, 21.2, 17.5, 16.8, 22.4, 20.6, 23.9, 22. , 11.9])

targ = pd.DataFrame( boston.target )
targ.columns = ["Цена"]
targ

targ["Цена"][1]

21.6

pd.concat([df, targ], axis=1)

from sklearn.linear_model import Lasso
las = Lasso( 0.95)

X_train, X_test, y_train, y_test = train_test_split( boston.data, boston.target, test_size=0.15)
las.fit( X_train, y_train )

Lasso(alpha=0.95, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

las.score( X_train, y_train )

0.7039796490622132

las.score( X_test, y_test )

0.6800820288881564

las.coef_

array([-6.60302033e-02,  4.37445786e-02, -0.00000000e+00,  0.00000000e+00,
       -0.00000000e+00,  2.11143949e+00,  1.06164045e-03, -6.88378197e-01,
        2.00499266e-01, -1.16324372e-02, -7.52971157e-01,  7.47979040e-03,
       -6.47627672e-01])

from sklearn.tree import DecisionTreeRegressor
regressor = DecisionTreeRegressor(  ) #max_depth=5, min_impurity_decrease=0.01, random_state=0

regressor.fit( boston.data, boston.target )

DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
           max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best')

X_train, X_test, y_train, y_test = train_test_split( boston.data, boston.target, test_size=0.15)

regressor.fit( X_train, y_train )

DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
           max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best')

regressor.score( X_train, y_train )

1.0

regressor.score( X_test, y_test )

0.8698912616229756

pred = regressor.predict( X_test )
pred

array([23.7, 29.1, 33.4, 19.4, 18.3, 27.5, 21.8, 21.7, 19.2, 28.7, 48.5,
       19.3, 12.5, 19.5, 23.1, 17.1, 17.4, 16.7, 23.7, 26.7, 34.9, 48.8,
       50. , 18.3, 22.2, 18.9, 13.1, 14.8, 11.5, 22.6, 30.1, 19. , 32.4,
       23.2, 13.4, 19.2, 15.6, 15.6, 36.2,  7.2, 26.4, 13.1, 28.7, 24. ,
       21.4, 21.2, 14.9,  8.3, 26.5,  8.3, 25.1,  7.2, 27. , 13.4, 24. ,
       22.2, 22.8, 38.7, 42.3, 23.1, 21. , 14.9,  7.2, 21.7, 27.1, 25. ,
       22.9, 30.8, 33.4,  7.2, 19.4, 33.1, 23.7, 20.5, 21.4, 50. ])

y_test

array([25.3, 24.1, 35.4, 23.4, 19.3, 30.1, 19.9, 21.2, 19.4, 23.6, 50. ,
       19.7, 15.1, 16. , 22.9, 17.6, 18.4, 10.9, 24.2, 31.5, 34.9, 50. ,
       50. , 23.1, 23.9, 16.6, 18.4, 15.2, 12.8, 29.1, 29. , 15.6, 32. ,
       16.1, 13.4, 19.6, 13.9, 14.6, 25.2, 10.5, 18.6, 14.1, 23.9, 21.2,
       25. , 18.9, 17.8,  8.8, 22. , 10.9, 23.3, 10.2, 25. , 14.9, 18.2,
       23.9, 20.2, 43.8, 44. , 16.5, 18.4, 14.6, 12.1, 19.9, 24.4, 24.6,
       21.4, 30.1, 33.3,  8.5, 18. , 29.4, 23.5, 21.4, 20.6, 43.1])

plt.hist( pred/y_test )

(array([ 2.,  4.,  7., 14., 21., 15.,  7.,  1.,  3.,  2.]),
 array([0.59504132, 0.6887482 , 0.78245508, 0.87616195, 0.96986883,
        1.06357571, 1.15728258, 1.25098946, 1.34469634, 1.43840321,
        1.53211009]),
 <a list of 10 Patch objects>)

np.std(pred / y_test)

0.17617657143740723

np.std(pred - y_test)

3.331974907874248

from sklearn.model_selection import cross_val_score
cross_val_score(regressor, boston.data, boston.target, cv=10)

array([ 0.54666729,  0.59044073, -1.45687285,  0.40456359,  0.65648541,
        0.39547472,  0.22922286,  0.36025836, -2.13574704,  0.17980798])

from sklearn.tree import DecisionTreeRegressor
regressor = DecisionTreeRegressor( max_depth=5, min_impurity_decrease=0.01 ) #random_state=0

regressor.fit( boston.data, boston.target )

DecisionTreeRegressor(criterion='mse', max_depth=5, max_features=None,
           max_leaf_nodes=None, min_impurity_decrease=0.01,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best')

X_train, X_test, y_train, y_test = train_test_split( boston.data, boston.target, test_size=0.15)

regressor.fit( X_train, np.log(y_train) )

DecisionTreeRegressor(criterion='mse', max_depth=5, max_features=None,
           max_leaf_nodes=None, min_impurity_decrease=0.01,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best')

regressor.score( X_train, np.log(y_train) )

0.6993837946164134

regressor.score( X_test, np.log(y_test) )

0.6283914392130323

pred = regressor.predict( X_test )
pred

array([2.42322041, 2.81949328, 2.81949328, 2.42322041, 3.5790192 ,
       3.5790192 , 2.81949328, 2.81949328, 3.11438922, 2.42322041,
       3.11438922, 3.5790192 , 3.11438922, 3.11438922, 2.81949328,
       3.11438922, 2.42322041, 3.11438922, 3.5790192 , 3.5790192 ,
       3.5790192 , 3.5790192 , 3.11438922, 3.11438922, 3.11438922,
       3.11438922, 3.11438922, 3.11438922, 3.5790192 , 3.11438922,
       3.11438922, 3.11438922, 3.11438922, 3.11438922, 2.81949328,
       2.81949328, 3.11438922, 3.5790192 , 3.5790192 , 3.11438922,
       3.11438922, 3.11438922, 2.42322041, 2.81949328, 3.11438922,
       2.42322041, 3.11438922, 2.81949328, 3.11438922, 3.11438922,
       3.5790192 , 3.11438922, 3.11438922, 2.81949328, 3.11438922,
       3.11438922, 3.11438922, 3.11438922, 2.81949328, 2.81949328,
       3.5790192 , 3.5790192 , 3.11438922, 3.11438922, 2.42322041,
       3.5790192 , 3.11438922, 3.5790192 , 3.5790192 , 3.11438922,
       2.42322041, 3.11438922, 3.11438922, 2.81949328, 2.81949328,
       2.81949328])

np.log(y_test)

array([2.67414865, 2.63905733, 2.81540872, 2.2512918 , 3.91202301,
       3.28091122, 2.74727091, 2.97041447, 3.17387846, 2.14006616,
       2.4765384 , 3.91202301, 3.21084365, 3.34638915, 2.63188884,
       3.08190997, 2.54160199, 3.41772668, 3.59731226, 3.09104245,
       3.35689712, 3.56671182, 3.02529108, 3.19047635, 3.21084365,
       3.06805294, 3.91202301, 3.12676054, 3.91202301, 3.32862669,
       2.96527307, 3.314186  , 3.10009229, 3.16547505, 2.99573227,
       2.96527307, 2.80336038, 3.58351894, 3.21887582, 3.13113691,
       3.0155349 , 3.36729583, 2.00148   , 2.73436751, 3.32862669,
       2.6461748 , 3.13983262, 3.06805294, 3.11794991, 3.02042489,
       3.49953328, 3.0301337 , 3.09557761, 2.90690106, 3.06339092,
       3.0056826 , 3.14415228, 3.16547505, 2.61006979, 2.62466859,
       3.61899333, 3.63495111, 2.90142159, 3.20680324, 3.00071982,
       3.39785848, 2.9601051 , 3.28466357, 3.8815638 , 3.10009229,
       2.87919846, 3.08648664, 2.99071973, 3.07731226, 2.94443898,
       2.74727091])

plt.hist(pred - np.log(y_test) )

(array([ 1.,  1.,  1., 10., 13., 22., 17.,  7.,  3.,  1.]),
 array([-0.79763378, -0.65408532, -0.51053686, -0.3669884 , -0.22343994,
        -0.07989148,  0.06365698,  0.20720544,  0.3507539 ,  0.49430236,
         0.63785082]),
 <a list of 10 Patch objects>)

plt.hist(np.exp(pred - np.log(y_test)))

(array([ 2.,  5., 14., 23., 16.,  8.,  5.,  1.,  1.,  1.]),
 array([0.45039343, 0.59459503, 0.73879662, 0.88299822, 1.02719981,
        1.17140141, 1.315603  , 1.4598046 , 1.60400619, 1.74820779,
        1.89240938]),
 <a list of 10 Patch objects>)

#from sklearn.datasets import fetch_california_housing
#housing = fetch_california_housing()

from sklearn.neighbors import KNeighborsClassifier

neigh = KNeighborsClassifier(n_neighbors=3)

X_train, X_test, y_train, y_test = train_test_split( iris.data, iris.target, test_size=0.15)

neigh.fit( X_train, y_train )

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=3, p=2,
           weights='uniform')

neigh.score( X_train, y_train )

0.9763779527559056

neigh.score( X_test, y_test )

0.9565217391304348

from sklearn.model_selection import cross_val_score
cross_val_score( neigh, iris.data, iris.target, cv=10)

array([1.        , 0.93333333, 1.        , 0.93333333, 0.86666667,
       1.        , 0.93333333, 1.        , 1.        , 1.        ])

for n in range(1,5):
    neigh = KNeighborsClassifier(n_neighbors=n)
    print(np.mean(cross_val_score( neigh, iris.data, iris.target, cv=10)))

0.96
0.9533333333333334
0.9666666666666666
0.9666666666666666

from sklearn.neighbors import RadiusNeighborsClassifier
rad = RadiusNeighborsClassifier( radius = 0.75 )
cross_val_score( rad, iris.data, iris.target, cv=10)

array([1.        , 0.93333333, 1.        , 1.        , 1.        ,
       0.86666667, 0.93333333, 0.93333333, 1.        , 1.        ])

for r in np.arange(0.75, 5, 0.15):
    rad = RadiusNeighborsClassifier( radius = r )
    print(r, np.mean(cross_val_score( rad, iris.data, iris.target, cv=10)))

0.75 0.9666666666666668
0.9 0.9533333333333334
1.05 0.9533333333333334
1.2000000000000002 0.9400000000000001
1.35 0.9200000000000002
1.5 0.8866666666666667
1.6500000000000001 0.8933333333333333
1.8000000000000003 0.8866666666666667
1.9500000000000002 0.8933333333333333
2.1 0.9
2.25 0.8800000000000001
2.4000000000000004 0.8666666666666668
2.5500000000000003 0.8800000000000001
2.7 0.8933333333333333
2.8500000000000005 0.8733333333333333
3.0000000000000004 0.82
3.1500000000000004 0.7866666666666666
3.3000000000000003 0.7466666666666666
3.45 0.7
3.6000000000000005 0.6333333333333332
3.7500000000000004 0.5800000000000001
3.9000000000000004 0.56
4.050000000000001 0.52
4.200000000000001 0.47999999999999987
4.3500000000000005 0.43999999999999995
4.5 0.4133333333333334
4.65 0.38000000000000006
4.800000000000001 0.36000000000000004
4.950000000000001 0.33333333333333337

X = iris.data[:, [0, 2]]
y = iris.target

x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
                     np.arange(y_min, y_max, 0.1))

f, axarr = plt.subplots(2, 2, sharex='col', sharey='row', figsize=(10, 8))

for idx, clf, tt in zip(product([0, 1], [0, 1]),
                        [KNeighborsClassifier(n_neighbors=1), KNeighborsClassifier(n_neighbors=3),
                         RadiusNeighborsClassifier( radius = 4. ), DecisionTreeClassifier(max_depth=4, min_impurity_decrease=0.1)],
                        ['KNeighborsClassifier, 1', 'KNeighborsClassifier, 3',
                         'RadiusNeighborsClassifier( radius = 4. ) 4', 'Decision Tree']):

    clf.fit( X, y )
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    axarr[idx[0], idx[1]].contourf(xx, yy, Z, alpha=0.4)
    axarr[idx[0], idx[1]].scatter(X[:, 0], X[:, 1], c=y,
                                  s=20, edgecolor='k')
    axarr[idx[0], idx[1]].set_title(tt)

plt.show()

from sklearn.tree import DecisionTreeRegressor
regressor = DecisionTreeRegressor( max_depth=5, min_impurity_decrease=0.01 ) #random_state=0

regressor.fit( boston.data, boston.target )

DecisionTreeRegressor(criterion='mse', max_depth=5, max_features=None,
           max_leaf_nodes=None, min_impurity_decrease=0.01,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best')

X_train, X_test, y_train, y_test = train_test_split( boston.data, boston.target, test_size=0.15)

regressor.fit( X_train, np.log(y_train) )

DecisionTreeRegressor(criterion='mse', max_depth=5, max_features=None,
           max_leaf_nodes=None, min_impurity_decrease=0.01,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best')

regressor.score( X_train, np.log(y_train) )

0.7038988391585246

regressor.score( X_test, np.log(y_test) )

0.5879761258737521

pred = regressor.predict( X_test )
pred

array([3.53986065, 3.53986065, 3.08085185, 3.08085185, 3.08085185,
       3.53986065, 3.08085185, 2.29529697, 2.75531767, 3.08085185,
       3.08085185, 3.53986065, 3.08085185, 3.53986065, 3.08085185,
       3.53986065, 3.08085185, 2.75531767, 3.53986065, 2.75531767,
       2.75531767, 3.53986065, 3.08085185, 2.29529697, 3.08085185,
       3.53986065, 2.29529697, 2.29529697, 3.08085185, 3.08085185,
       3.53986065, 3.08085185, 3.08085185, 3.53986065, 3.08085185,
       2.75531767, 3.08085185, 2.75531767, 3.08085185, 2.75531767,
       3.08085185, 3.08085185, 3.08085185, 2.29529697, 3.08085185,
       3.08085185, 3.08085185, 3.53986065, 3.53986065, 3.53986065,
       3.08085185, 3.08085185, 3.53986065, 3.08085185, 3.08085185,
       3.08085185, 3.53986065, 2.29529697, 3.08085185, 2.29529697,
       3.08085185, 3.08085185, 2.75531767, 3.08085185, 3.08085185,
       3.08085185, 3.53986065, 3.53986065, 2.75531767, 2.75531767,
       2.75531767, 3.08085185, 3.08085185, 2.29529697, 3.53986065,
       2.29529697])

np.log(y_test)

array([3.5085559 , 3.77963382, 3.03974916, 2.77881927, 2.93916192,
       3.55820113, 3.18221184, 3.314186  , 2.49320545, 3.21084365,
       3.42751469, 3.33220451, 3.06339092, 3.5055574 , 3.15273602,
       3.5085559 , 3.58905912, 2.9601051 , 3.49650756, 2.66722821,
       2.87919846, 3.56671182, 3.02529108, 2.84490938, 3.09104245,
       3.63495111, 2.59525471, 2.00148   , 3.01062089, 3.10009229,
       3.41114771, 3.35689712, 3.05400118, 3.56104608, 2.83907846,
       2.93385687, 3.11351531, 2.44234704, 2.96527307, 2.8507065 ,
       2.77881927, 3.33576958, 3.32862669, 2.81540872, 3.16547505,
       3.00071982, 3.13549422, 3.8286414 , 3.44041809, 3.54673969,
       2.94443898, 2.97552957, 3.34990409, 2.93916192, 3.13983262,
       3.39450839, 3.26575941, 2.73436751, 3.13983262, 2.57261223,
       3.19458313, 3.12676054, 2.78501124, 3.44998755, 3.40452517,
       2.8094027 , 3.48737508, 3.40452517, 2.87919846, 2.46809953,
       2.72129543, 3.16968558, 3.28091122, 2.2617631 , 3.314186  ,
       2.01490302])

plt.hist(pred - np.log(y_test))

(array([ 1.,  0.,  0.,  3.,  2., 10., 11., 23., 13., 13.]),
 array([-1.01888904, -0.88570307, -0.7525171 , -0.61933114, -0.48614517,
        -0.3529592 , -0.21977323, -0.08658727,  0.0465987 ,  0.17978467,
         0.31297064]),
 <a list of 10 Patch objects>)

	0	1	2	3	4	5	6	7	8	9	10	11	12
0	0.00632	18.0	2.31	0.0	0.538	6.575	65.2	4.0900	1.0	296.0	15.3	396.90	4.98
1	0.02731	0.0	7.07	0.0	0.469	6.421	78.9	4.9671	2.0	242.0	17.8	396.90	9.14
2	0.02729	0.0	7.07	0.0	0.469	7.185	61.1	4.9671	2.0	242.0	17.8	392.83	4.03
3	0.03237	0.0	2.18	0.0	0.458	6.998	45.8	6.0622	3.0	222.0	18.7	394.63	2.94
4	0.06905	0.0	2.18	0.0	0.458	7.147	54.2	6.0622	3.0	222.0	18.7	396.90	5.33
5	0.02985	0.0	2.18	0.0	0.458	6.430	58.7	6.0622	3.0	222.0	18.7	394.12	5.21
6	0.08829	12.5	7.87	0.0	0.524	6.012	66.6	5.5605	5.0	311.0	15.2	395.60	12.43
7	0.14455	12.5	7.87	0.0	0.524	6.172	96.1	5.9505	5.0	311.0	15.2	396.90	19.15
8	0.21124	12.5	7.87	0.0	0.524	5.631	100.0	6.0821	5.0	311.0	15.2	386.63	29.93
9	0.17004	12.5	7.87	0.0	0.524	6.004	85.9	6.5921	5.0	311.0	15.2	386.71	17.10
10	0.22489	12.5	7.87	0.0	0.524	6.377	94.3	6.3467	5.0	311.0	15.2	392.52	20.45
11	0.11747	12.5	7.87	0.0	0.524	6.009	82.9	6.2267	5.0	311.0	15.2	396.90	13.27
12	0.09378	12.5	7.87	0.0	0.524	5.889	39.0	5.4509	5.0	311.0	15.2	390.50	15.71
13	0.62976	0.0	8.14	0.0	0.538	5.949	61.8	4.7075	4.0	307.0	21.0	396.90	8.26
14	0.63796	0.0	8.14	0.0	0.538	6.096	84.5	4.4619	4.0	307.0	21.0	380.02	10.26
15	0.62739	0.0	8.14	0.0	0.538	5.834	56.5	4.4986	4.0	307.0	21.0	395.62	8.47
16	1.05393	0.0	8.14	0.0	0.538	5.935	29.3	4.4986	4.0	307.0	21.0	386.85	6.58
17	0.78420	0.0	8.14	0.0	0.538	5.990	81.7	4.2579	4.0	307.0	21.0	386.75	14.67
18	0.80271	0.0	8.14	0.0	0.538	5.456	36.6	3.7965	4.0	307.0	21.0	288.99	11.69
19	0.72580	0.0	8.14	0.0	0.538	5.727	69.5	3.7965	4.0	307.0	21.0	390.95	11.28
20	1.25179	0.0	8.14	0.0	0.538	5.570	98.1	3.7979	4.0	307.0	21.0	376.57	21.02
21	0.85204	0.0	8.14	0.0	0.538	5.965	89.2	4.0123	4.0	307.0	21.0	392.53	13.83
22	1.23247	0.0	8.14	0.0	0.538	6.142	91.7	3.9769	4.0	307.0	21.0	396.90	18.72
23	0.98843	0.0	8.14	0.0	0.538	5.813	100.0	4.0952	4.0	307.0	21.0	394.54	19.88
24	0.75026	0.0	8.14	0.0	0.538	5.924	94.1	4.3996	4.0	307.0	21.0	394.33	16.30
25	0.84054	0.0	8.14	0.0	0.538	5.599	85.7	4.4546	4.0	307.0	21.0	303.42	16.51
26	0.67191	0.0	8.14	0.0	0.538	5.813	90.3	4.6820	4.0	307.0	21.0	376.88	14.81
27	0.95577	0.0	8.14	0.0	0.538	6.047	88.8	4.4534	4.0	307.0	21.0	306.38	17.28
28	0.77299	0.0	8.14	0.0	0.538	6.495	94.4	4.4547	4.0	307.0	21.0	387.94	12.80
29	1.00245	0.0	8.14	0.0	0.538	6.674	87.3	4.2390	4.0	307.0	21.0	380.23	11.98
...	...	...	...	...	...	...	...	...	...	...	...	...	...
476	4.87141	0.0	18.10	0.0	0.614	6.484	93.6	2.3053	24.0	666.0	20.2	396.21	18.68
477	15.02340	0.0	18.10	0.0	0.614	5.304	97.3	2.1007	24.0	666.0	20.2	349.48	24.91
478	10.23300	0.0	18.10	0.0	0.614	6.185	96.7	2.1705	24.0	666.0	20.2	379.70	18.03
479	14.33370	0.0	18.10	0.0	0.614	6.229	88.0	1.9512	24.0	666.0	20.2	383.32	13.11
480	5.82401	0.0	18.10	0.0	0.532	6.242	64.7	3.4242	24.0	666.0	20.2	396.90	10.74
481	5.70818	0.0	18.10	0.0	0.532	6.750	74.9	3.3317	24.0	666.0	20.2	393.07	7.74
482	5.73116	0.0	18.10	0.0	0.532	7.061	77.0	3.4106	24.0	666.0	20.2	395.28	7.01
483	2.81838	0.0	18.10	0.0	0.532	5.762	40.3	4.0983	24.0	666.0	20.2	392.92	10.42
484	2.37857	0.0	18.10	0.0	0.583	5.871	41.9	3.7240	24.0	666.0	20.2	370.73	13.34
485	3.67367	0.0	18.10	0.0	0.583	6.312	51.9	3.9917	24.0	666.0	20.2	388.62	10.58
486	5.69175	0.0	18.10	0.0	0.583	6.114	79.8	3.5459	24.0	666.0	20.2	392.68	14.98
487	4.83567	0.0	18.10	0.0	0.583	5.905	53.2	3.1523	24.0	666.0	20.2	388.22	11.45
488	0.15086	0.0	27.74	0.0	0.609	5.454	92.7	1.8209	4.0	711.0	20.1	395.09	18.06
489	0.18337	0.0	27.74	0.0	0.609	5.414	98.3	1.7554	4.0	711.0	20.1	344.05	23.97
490	0.20746	0.0	27.74	0.0	0.609	5.093	98.0	1.8226	4.0	711.0	20.1	318.43	29.68
491	0.10574	0.0	27.74	0.0	0.609	5.983	98.8	1.8681	4.0	711.0	20.1	390.11	18.07
492	0.11132	0.0	27.74	0.0	0.609	5.983	83.5	2.1099	4.0	711.0	20.1	396.90	13.35
493	0.17331	0.0	9.69	0.0	0.585	5.707	54.0	2.3817	6.0	391.0	19.2	396.90	12.01
494	0.27957	0.0	9.69	0.0	0.585	5.926	42.6	2.3817	6.0	391.0	19.2	396.90	13.59
495	0.17899	0.0	9.69	0.0	0.585	5.670	28.8	2.7986	6.0	391.0	19.2	393.29	17.60
496	0.28960	0.0	9.69	0.0	0.585	5.390	72.9	2.7986	6.0	391.0	19.2	396.90	21.14
497	0.26838	0.0	9.69	0.0	0.585	5.794	70.6	2.8927	6.0	391.0	19.2	396.90	14.10
498	0.23912	0.0	9.69	0.0	0.585	6.019	65.3	2.4091	6.0	391.0	19.2	396.90	12.92
499	0.17783	0.0	9.69	0.0	0.585	5.569	73.5	2.3999	6.0	391.0	19.2	395.77	15.10
500	0.22438	0.0	9.69	0.0	0.585	6.027	79.7	2.4982	6.0	391.0	19.2	396.90	14.33
501	0.06263	0.0	11.93	0.0	0.573	6.593	69.1	2.4786	1.0	273.0	21.0	391.99	9.67
502	0.04527	0.0	11.93	0.0	0.573	6.120	76.7	2.2875	1.0	273.0	21.0	396.90	9.08
503	0.06076	0.0	11.93	0.0	0.573	6.976	91.0	2.1675	1.0	273.0	21.0	396.90	5.64
504	0.10959	0.0	11.93	0.0	0.573	6.794	89.3	2.3889	1.0	273.0	21.0	393.45	6.48
505	0.04741	0.0	11.93	0.0	0.573	6.030	80.8	2.5050	1.0	273.0	21.0	396.90	7.88

	CRIM	ZN	INDUS	CHAS	NOX	RM	AGE	DIS	RAD	TAX	PTRATIO	B	LSTAT
0	0.00632	18.0	2.31	0.0	0.538	6.575	65.2	4.0900	1.0	296.0	15.3	396.90	4.98
1	0.02731	0.0	7.07	0.0	0.469	6.421	78.9	4.9671	2.0	242.0	17.8	396.90	9.14
2	0.02729	0.0	7.07	0.0	0.469	7.185	61.1	4.9671	2.0	242.0	17.8	392.83	4.03
3	0.03237	0.0	2.18	0.0	0.458	6.998	45.8	6.0622	3.0	222.0	18.7	394.63	2.94
4	0.06905	0.0	2.18	0.0	0.458	7.147	54.2	6.0622	3.0	222.0	18.7	396.90	5.33
5	0.02985	0.0	2.18	0.0	0.458	6.430	58.7	6.0622	3.0	222.0	18.7	394.12	5.21
6	0.08829	12.5	7.87	0.0	0.524	6.012	66.6	5.5605	5.0	311.0	15.2	395.60	12.43
7	0.14455	12.5	7.87	0.0	0.524	6.172	96.1	5.9505	5.0	311.0	15.2	396.90	19.15
8	0.21124	12.5	7.87	0.0	0.524	5.631	100.0	6.0821	5.0	311.0	15.2	386.63	29.93
9	0.17004	12.5	7.87	0.0	0.524	6.004	85.9	6.5921	5.0	311.0	15.2	386.71	17.10
10	0.22489	12.5	7.87	0.0	0.524	6.377	94.3	6.3467	5.0	311.0	15.2	392.52	20.45
11	0.11747	12.5	7.87	0.0	0.524	6.009	82.9	6.2267	5.0	311.0	15.2	396.90	13.27
12	0.09378	12.5	7.87	0.0	0.524	5.889	39.0	5.4509	5.0	311.0	15.2	390.50	15.71
13	0.62976	0.0	8.14	0.0	0.538	5.949	61.8	4.7075	4.0	307.0	21.0	396.90	8.26
14	0.63796	0.0	8.14	0.0	0.538	6.096	84.5	4.4619	4.0	307.0	21.0	380.02	10.26
15	0.62739	0.0	8.14	0.0	0.538	5.834	56.5	4.4986	4.0	307.0	21.0	395.62	8.47
16	1.05393	0.0	8.14	0.0	0.538	5.935	29.3	4.4986	4.0	307.0	21.0	386.85	6.58
17	0.78420	0.0	8.14	0.0	0.538	5.990	81.7	4.2579	4.0	307.0	21.0	386.75	14.67
18	0.80271	0.0	8.14	0.0	0.538	5.456	36.6	3.7965	4.0	307.0	21.0	288.99	11.69
19	0.72580	0.0	8.14	0.0	0.538	5.727	69.5	3.7965	4.0	307.0	21.0	390.95	11.28
20	1.25179	0.0	8.14	0.0	0.538	5.570	98.1	3.7979	4.0	307.0	21.0	376.57	21.02
21	0.85204	0.0	8.14	0.0	0.538	5.965	89.2	4.0123	4.0	307.0	21.0	392.53	13.83
22	1.23247	0.0	8.14	0.0	0.538	6.142	91.7	3.9769	4.0	307.0	21.0	396.90	18.72
23	0.98843	0.0	8.14	0.0	0.538	5.813	100.0	4.0952	4.0	307.0	21.0	394.54	19.88
24	0.75026	0.0	8.14	0.0	0.538	5.924	94.1	4.3996	4.0	307.0	21.0	394.33	16.30
25	0.84054	0.0	8.14	0.0	0.538	5.599	85.7	4.4546	4.0	307.0	21.0	303.42	16.51
26	0.67191	0.0	8.14	0.0	0.538	5.813	90.3	4.6820	4.0	307.0	21.0	376.88	14.81
27	0.95577	0.0	8.14	0.0	0.538	6.047	88.8	4.4534	4.0	307.0	21.0	306.38	17.28
28	0.77299	0.0	8.14	0.0	0.538	6.495	94.4	4.4547	4.0	307.0	21.0	387.94	12.80
29	1.00245	0.0	8.14	0.0	0.538	6.674	87.3	4.2390	4.0	307.0	21.0	380.23	11.98
...	...	...	...	...	...	...	...	...	...	...	...	...	...
476	4.87141	0.0	18.10	0.0	0.614	6.484	93.6	2.3053	24.0	666.0	20.2	396.21	18.68
477	15.02340	0.0	18.10	0.0	0.614	5.304	97.3	2.1007	24.0	666.0	20.2	349.48	24.91
478	10.23300	0.0	18.10	0.0	0.614	6.185	96.7	2.1705	24.0	666.0	20.2	379.70	18.03
479	14.33370	0.0	18.10	0.0	0.614	6.229	88.0	1.9512	24.0	666.0	20.2	383.32	13.11
480	5.82401	0.0	18.10	0.0	0.532	6.242	64.7	3.4242	24.0	666.0	20.2	396.90	10.74
481	5.70818	0.0	18.10	0.0	0.532	6.750	74.9	3.3317	24.0	666.0	20.2	393.07	7.74
482	5.73116	0.0	18.10	0.0	0.532	7.061	77.0	3.4106	24.0	666.0	20.2	395.28	7.01
483	2.81838	0.0	18.10	0.0	0.532	5.762	40.3	4.0983	24.0	666.0	20.2	392.92	10.42
484	2.37857	0.0	18.10	0.0	0.583	5.871	41.9	3.7240	24.0	666.0	20.2	370.73	13.34
485	3.67367	0.0	18.10	0.0	0.583	6.312	51.9	3.9917	24.0	666.0	20.2	388.62	10.58
486	5.69175	0.0	18.10	0.0	0.583	6.114	79.8	3.5459	24.0	666.0	20.2	392.68	14.98
487	4.83567	0.0	18.10	0.0	0.583	5.905	53.2	3.1523	24.0	666.0	20.2	388.22	11.45
488	0.15086	0.0	27.74	0.0	0.609	5.454	92.7	1.8209	4.0	711.0	20.1	395.09	18.06
489	0.18337	0.0	27.74	0.0	0.609	5.414	98.3	1.7554	4.0	711.0	20.1	344.05	23.97
490	0.20746	0.0	27.74	0.0	0.609	5.093	98.0	1.8226	4.0	711.0	20.1	318.43	29.68
491	0.10574	0.0	27.74	0.0	0.609	5.983	98.8	1.8681	4.0	711.0	20.1	390.11	18.07
492	0.11132	0.0	27.74	0.0	0.609	5.983	83.5	2.1099	4.0	711.0	20.1	396.90	13.35
493	0.17331	0.0	9.69	0.0	0.585	5.707	54.0	2.3817	6.0	391.0	19.2	396.90	12.01
494	0.27957	0.0	9.69	0.0	0.585	5.926	42.6	2.3817	6.0	391.0	19.2	396.90	13.59
495	0.17899	0.0	9.69	0.0	0.585	5.670	28.8	2.7986	6.0	391.0	19.2	393.29	17.60
496	0.28960	0.0	9.69	0.0	0.585	5.390	72.9	2.7986	6.0	391.0	19.2	396.90	21.14
497	0.26838	0.0	9.69	0.0	0.585	5.794	70.6	2.8927	6.0	391.0	19.2	396.90	14.10
498	0.23912	0.0	9.69	0.0	0.585	6.019	65.3	2.4091	6.0	391.0	19.2	396.90	12.92
499	0.17783	0.0	9.69	0.0	0.585	5.569	73.5	2.3999	6.0	391.0	19.2	395.77	15.10
500	0.22438	0.0	9.69	0.0	0.585	6.027	79.7	2.4982	6.0	391.0	19.2	396.90	14.33
501	0.06263	0.0	11.93	0.0	0.573	6.593	69.1	2.4786	1.0	273.0	21.0	391.99	9.67
502	0.04527	0.0	11.93	0.0	0.573	6.120	76.7	2.2875	1.0	273.0	21.0	396.90	9.08
503	0.06076	0.0	11.93	0.0	0.573	6.976	91.0	2.1675	1.0	273.0	21.0	396.90	5.64
504	0.10959	0.0	11.93	0.0	0.573	6.794	89.3	2.3889	1.0	273.0	21.0	393.45	6.48
505	0.04741	0.0	11.93	0.0	0.573	6.030	80.8	2.5050	1.0	273.0	21.0	396.90	7.88

	CRIM	ZN	INDUS	CHAS	NOX	RM	AGE	DIS	RAD	TAX	PTRATIO	B	LSTAT	Цена
0	0.00632	18.0	2.31	0.0	0.538	6.575	65.2	4.0900	1.0	296.0	15.3	396.90	4.98	24.0
1	0.02731	0.0	7.07	0.0	0.469	6.421	78.9	4.9671	2.0	242.0	17.8	396.90	9.14	21.6
2	0.02729	0.0	7.07	0.0	0.469	7.185	61.1	4.9671	2.0	242.0	17.8	392.83	4.03	34.7
3	0.03237	0.0	2.18	0.0	0.458	6.998	45.8	6.0622	3.0	222.0	18.7	394.63	2.94	33.4
4	0.06905	0.0	2.18	0.0	0.458	7.147	54.2	6.0622	3.0	222.0	18.7	396.90	5.33	36.2
5	0.02985	0.0	2.18	0.0	0.458	6.430	58.7	6.0622	3.0	222.0	18.7	394.12	5.21	28.7
6	0.08829	12.5	7.87	0.0	0.524	6.012	66.6	5.5605	5.0	311.0	15.2	395.60	12.43	22.9
7	0.14455	12.5	7.87	0.0	0.524	6.172	96.1	5.9505	5.0	311.0	15.2	396.90	19.15	27.1
8	0.21124	12.5	7.87	0.0	0.524	5.631	100.0	6.0821	5.0	311.0	15.2	386.63	29.93	16.5
9	0.17004	12.5	7.87	0.0	0.524	6.004	85.9	6.5921	5.0	311.0	15.2	386.71	17.10	18.9
10	0.22489	12.5	7.87	0.0	0.524	6.377	94.3	6.3467	5.0	311.0	15.2	392.52	20.45	15.0
11	0.11747	12.5	7.87	0.0	0.524	6.009	82.9	6.2267	5.0	311.0	15.2	396.90	13.27	18.9
12	0.09378	12.5	7.87	0.0	0.524	5.889	39.0	5.4509	5.0	311.0	15.2	390.50	15.71	21.7
13	0.62976	0.0	8.14	0.0	0.538	5.949	61.8	4.7075	4.0	307.0	21.0	396.90	8.26	20.4
14	0.63796	0.0	8.14	0.0	0.538	6.096	84.5	4.4619	4.0	307.0	21.0	380.02	10.26	18.2
15	0.62739	0.0	8.14	0.0	0.538	5.834	56.5	4.4986	4.0	307.0	21.0	395.62	8.47	19.9
16	1.05393	0.0	8.14	0.0	0.538	5.935	29.3	4.4986	4.0	307.0	21.0	386.85	6.58	23.1
17	0.78420	0.0	8.14	0.0	0.538	5.990	81.7	4.2579	4.0	307.0	21.0	386.75	14.67	17.5
18	0.80271	0.0	8.14	0.0	0.538	5.456	36.6	3.7965	4.0	307.0	21.0	288.99	11.69	20.2
19	0.72580	0.0	8.14	0.0	0.538	5.727	69.5	3.7965	4.0	307.0	21.0	390.95	11.28	18.2
20	1.25179	0.0	8.14	0.0	0.538	5.570	98.1	3.7979	4.0	307.0	21.0	376.57	21.02	13.6
21	0.85204	0.0	8.14	0.0	0.538	5.965	89.2	4.0123	4.0	307.0	21.0	392.53	13.83	19.6
22	1.23247	0.0	8.14	0.0	0.538	6.142	91.7	3.9769	4.0	307.0	21.0	396.90	18.72	15.2
23	0.98843	0.0	8.14	0.0	0.538	5.813	100.0	4.0952	4.0	307.0	21.0	394.54	19.88	14.5
24	0.75026	0.0	8.14	0.0	0.538	5.924	94.1	4.3996	4.0	307.0	21.0	394.33	16.30	15.6
25	0.84054	0.0	8.14	0.0	0.538	5.599	85.7	4.4546	4.0	307.0	21.0	303.42	16.51	13.9
26	0.67191	0.0	8.14	0.0	0.538	5.813	90.3	4.6820	4.0	307.0	21.0	376.88	14.81	16.6
27	0.95577	0.0	8.14	0.0	0.538	6.047	88.8	4.4534	4.0	307.0	21.0	306.38	17.28	14.8
28	0.77299	0.0	8.14	0.0	0.538	6.495	94.4	4.4547	4.0	307.0	21.0	387.94	12.80	18.4
29	1.00245	0.0	8.14	0.0	0.538	6.674	87.3	4.2390	4.0	307.0	21.0	380.23	11.98	21.0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
476	4.87141	0.0	18.10	0.0	0.614	6.484	93.6	2.3053	24.0	666.0	20.2	396.21	18.68	16.7
477	15.02340	0.0	18.10	0.0	0.614	5.304	97.3	2.1007	24.0	666.0	20.2	349.48	24.91	12.0
478	10.23300	0.0	18.10	0.0	0.614	6.185	96.7	2.1705	24.0	666.0	20.2	379.70	18.03	14.6
479	14.33370	0.0	18.10	0.0	0.614	6.229	88.0	1.9512	24.0	666.0	20.2	383.32	13.11	21.4
480	5.82401	0.0	18.10	0.0	0.532	6.242	64.7	3.4242	24.0	666.0	20.2	396.90	10.74	23.0
481	5.70818	0.0	18.10	0.0	0.532	6.750	74.9	3.3317	24.0	666.0	20.2	393.07	7.74	23.7
482	5.73116	0.0	18.10	0.0	0.532	7.061	77.0	3.4106	24.0	666.0	20.2	395.28	7.01	25.0
483	2.81838	0.0	18.10	0.0	0.532	5.762	40.3	4.0983	24.0	666.0	20.2	392.92	10.42	21.8
484	2.37857	0.0	18.10	0.0	0.583	5.871	41.9	3.7240	24.0	666.0	20.2	370.73	13.34	20.6
485	3.67367	0.0	18.10	0.0	0.583	6.312	51.9	3.9917	24.0	666.0	20.2	388.62	10.58	21.2
486	5.69175	0.0	18.10	0.0	0.583	6.114	79.8	3.5459	24.0	666.0	20.2	392.68	14.98	19.1
487	4.83567	0.0	18.10	0.0	0.583	5.905	53.2	3.1523	24.0	666.0	20.2	388.22	11.45	20.6
488	0.15086	0.0	27.74	0.0	0.609	5.454	92.7	1.8209	4.0	711.0	20.1	395.09	18.06	15.2
489	0.18337	0.0	27.74	0.0	0.609	5.414	98.3	1.7554	4.0	711.0	20.1	344.05	23.97	7.0
490	0.20746	0.0	27.74	0.0	0.609	5.093	98.0	1.8226	4.0	711.0	20.1	318.43	29.68	8.1
491	0.10574	0.0	27.74	0.0	0.609	5.983	98.8	1.8681	4.0	711.0	20.1	390.11	18.07	13.6
492	0.11132	0.0	27.74	0.0	0.609	5.983	83.5	2.1099	4.0	711.0	20.1	396.90	13.35	20.1
493	0.17331	0.0	9.69	0.0	0.585	5.707	54.0	2.3817	6.0	391.0	19.2	396.90	12.01	21.8
494	0.27957	0.0	9.69	0.0	0.585	5.926	42.6	2.3817	6.0	391.0	19.2	396.90	13.59	24.5
495	0.17899	0.0	9.69	0.0	0.585	5.670	28.8	2.7986	6.0	391.0	19.2	393.29	17.60	23.1
496	0.28960	0.0	9.69	0.0	0.585	5.390	72.9	2.7986	6.0	391.0	19.2	396.90	21.14	19.7
497	0.26838	0.0	9.69	0.0	0.585	5.794	70.6	2.8927	6.0	391.0	19.2	396.90	14.10	18.3
498	0.23912	0.0	9.69	0.0	0.585	6.019	65.3	2.4091	6.0	391.0	19.2	396.90	12.92	21.2
499	0.17783	0.0	9.69	0.0	0.585	5.569	73.5	2.3999	6.0	391.0	19.2	395.77	15.10	17.5
500	0.22438	0.0	9.69	0.0	0.585	6.027	79.7	2.4982	6.0	391.0	19.2	396.90	14.33	16.8
501	0.06263	0.0	11.93	0.0	0.573	6.593	69.1	2.4786	1.0	273.0	21.0	391.99	9.67	22.4
502	0.04527	0.0	11.93	0.0	0.573	6.120	76.7	2.2875	1.0	273.0	21.0	396.90	9.08	20.6
503	0.06076	0.0	11.93	0.0	0.573	6.976	91.0	2.1675	1.0	273.0	21.0	396.90	5.64	23.9
504	0.10959	0.0	11.93	0.0	0.573	6.794	89.3	2.3889	1.0	273.0	21.0	393.45	6.48	22.0
505	0.04741	0.0	11.93	0.0	0.573	6.030	80.8	2.5050	1.0	273.0	21.0	396.90	7.88	11.9

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2
5	5.4	3.9	1.7	0.4
6	4.6	3.4	1.4	0.3
7	5.0	3.4	1.5	0.2
8	4.4	2.9	1.4	0.2
9	4.9	3.1	1.5	0.1
10	5.4	3.7	1.5	0.2
11	4.8	3.4	1.6	0.2
12	4.8	3.0	1.4	0.1
13	4.3	3.0	1.1	0.1
14	5.8	4.0	1.2	0.2
15	5.7	4.4	1.5	0.4
16	5.4	3.9	1.3	0.4
17	5.1	3.5	1.4	0.3
18	5.7	3.8	1.7	0.3
19	5.1	3.8	1.5	0.3
20	5.4	3.4	1.7	0.2
21	5.1	3.7	1.5	0.4
22	4.6	3.6	1.0	0.2
23	5.1	3.3	1.7	0.5
24	4.8	3.4	1.9	0.2
25	5.0	3.0	1.6	0.2
26	5.0	3.4	1.6	0.4
27	5.2	3.5	1.5	0.2
28	5.2	3.4	1.4	0.2
29	4.7	3.2	1.6	0.2
...	...	...	...	...
120	6.9	3.2	5.7	2.3
121	5.6	2.8	4.9	2.0
122	7.7	2.8	6.7	2.0
123	6.3	2.7	4.9	1.8
124	6.7	3.3	5.7	2.1
125	7.2	3.2	6.0	1.8
126	6.2	2.8	4.8	1.8
127	6.1	3.0	4.9	1.8
128	6.4	2.8	5.6	2.1
129	7.2	3.0	5.8	1.6
130	7.4	2.8	6.1	1.9
131	7.9	3.8	6.4	2.0
132	6.4	2.8	5.6	2.2
133	6.3	2.8	5.1	1.5
134	6.1	2.6	5.6	1.4
135	7.7	3.0	6.1	2.3
136	6.3	3.4	5.6	2.4
137	6.4	3.1	5.5	1.8
138	6.0	3.0	4.8	1.8
139	6.9	3.1	5.4	2.1
140	6.7	3.1	5.6	2.4
141	6.9	3.1	5.1	2.3
142	5.8	2.7	5.1	1.9
143	6.8	3.2	5.9	2.3
144	6.7	3.3	5.7	2.5
145	6.7	3.0	5.2	2.3
146	6.3	2.5	5.0	1.9
147	6.5	3.0	5.2	2.0
148	6.2	3.4	5.4	2.3
149	5.9	3.0	5.1	1.8

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2
5	5.4	3.9	1.7	0.4
6	4.6	3.4	1.4	0.3
7	5.0	3.4	1.5	0.2
8	4.4	2.9	1.4	0.2
9	4.9	3.1	1.5	0.1
10	5.4	3.7	1.5	0.2
11	4.8	3.4	1.6	0.2
12	4.8	3.0	1.4	0.1
13	4.3	3.0	1.1	0.1
14	5.8	4.0	1.2	0.2
15	5.7	4.4	1.5	0.4
16	5.4	3.9	1.3	0.4
17	5.1	3.5	1.4	0.3
18	5.7	3.8	1.7	0.3
19	5.1	3.8	1.5	0.3
20	5.4	3.4	1.7	0.2
21	5.1	3.7	1.5	0.4
22	4.6	3.6	1.0	0.2
23	5.1	3.3	1.7	0.5
24	4.8	3.4	1.9	0.2
25	5.0	3.0	1.6	0.2
26	5.0	3.4	1.6	0.4
27	5.2	3.5	1.5	0.2
28	5.2	3.4	1.4	0.2
29	4.7	3.2	1.6	0.2
...	...	...	...	...
120	6.9	3.2	5.7	2.3
121	5.6	2.8	4.9	2.0
122	7.7	2.8	6.7	2.0
123	6.3	2.7	4.9	1.8
124	6.7	3.3	5.7	2.1
125	7.2	3.2	6.0	1.8
126	6.2	2.8	4.8	1.8
127	6.1	3.0	4.9	1.8
128	6.4	2.8	5.6	2.1
129	7.2	3.0	5.8	1.6
130	7.4	2.8	6.1	1.9
131	7.9	3.8	6.4	2.0
132	6.4	2.8	5.6	2.2
133	6.3	2.8	5.1	1.5
134	6.1	2.6	5.6	1.4
135	7.7	3.0	6.1	2.3
136	6.3	3.4	5.6	2.4
137	6.4	3.1	5.5	1.8
138	6.0	3.0	4.8	1.8
139	6.9	3.1	5.4	2.1
140	6.7	3.1	5.6	2.4
141	6.9	3.1	5.1	2.3
142	5.8	2.7	5.1	1.9
143	6.8	3.2	5.9	2.3
144	6.7	3.3	5.7	2.5
145	6.7	3.0	5.2	2.3
146	6.3	2.5	5.0	1.9
147	6.5	3.0	5.2	2.0
148	6.2	3.4	5.4	2.3
149	5.9	3.0	5.1	1.8

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2
5	5.4	3.9	1.7	0.4
6	4.6	3.4	1.4	0.3
7	5.0	3.4	1.5	0.2
8	4.4	2.9	1.4	0.2
9	4.9	3.1	1.5	0.1
10	5.4	3.7	1.5	0.2
11	4.8	3.4	1.6	0.2
12	4.8	3.0	1.4	0.1
13	4.3	3.0	1.1	0.1
14	5.8	4.0	1.2	0.2
15	5.7	4.4	1.5	0.4
16	5.4	3.9	1.3	0.4
17	5.1	3.5	1.4	0.3
18	5.7	3.8	1.7	0.3
19	5.1	3.8	1.5	0.3
20	5.4	3.4	1.7	0.2
21	5.1	3.7	1.5	0.4
22	4.6	3.6	1.0	0.2
23	5.1	3.3	1.7	0.5
24	4.8	3.4	1.9	0.2
25	5.0	3.0	1.6	0.2
26	5.0	3.4	1.6	0.4
27	5.2	3.5	1.5	0.2
28	5.2	3.4	1.4	0.2
29	4.7	3.2	1.6	0.2
...	...	...	...	...
120	6.9	3.2	5.7	2.3
121	5.6	2.8	4.9	2.0
122	7.7	2.8	6.7	2.0
123	6.3	2.7	4.9	1.8
124	6.7	3.3	5.7	2.1
125	7.2	3.2	6.0	1.8
126	6.2	2.8	4.8	1.8
127	6.1	3.0	4.9	1.8
128	6.4	2.8	5.6	2.1
129	7.2	3.0	5.8	1.6
130	7.4	2.8	6.1	1.9
131	7.9	3.8	6.4	2.0
132	6.4	2.8	5.6	2.2
133	6.3	2.8	5.1	1.5
134	6.1	2.6	5.6	1.4
135	7.7	3.0	6.1	2.3
136	6.3	3.4	5.6	2.4
137	6.4	3.1	5.5	1.8
138	6.0	3.0	4.8	1.8
139	6.9	3.1	5.4	2.1
140	6.7	3.1	5.6	2.4
141	6.9	3.1	5.1	2.3
142	5.8	2.7	5.1	1.9
143	6.8	3.2	5.9	2.3
144	6.7	3.3	5.7	2.5
145	6.7	3.0	5.2	2.3
146	6.3	2.5	5.0	1.9
147	6.5	3.0	5.2	2.0
148	6.2	3.4	5.4	2.3
149	5.9	3.0	5.1	1.8