from sklearn.metrics.pairwise import euclidean_distances
euclidean_distances([[1,2,3], [100,200,300]])
# return:
# array([[ 0. , 370.42408129],
# [370.42408129, 0. ]])
from sklearn.metrics.pairwise import cosine_similarity
cosine_similarity([[1,2,3],[100,200,300]])
# return:
# array([[1., 1.],
# [1., 1.]])
from scipy.stats.stats import pearsonr
pearsonr([1,2,3], [100,200,300])
# return ('1.0', 0.0) // (Pearson’s correlation coefficient, 2-tailed p-value)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import urllib2
from numpy import mat, ones, shape, exp, array, arange
import matplotlib.pyplot as plt
def createDataSet():
features = []
labels = []
lines = urllib2.urlopen('https://raw.github.com/pbharrin/machinelearninginaction/master/Ch05/testSet.txt').readlines()
for line in lines:
line = line.strip().split()
features.append([1.0, float(line[0]), float(line[1])]) # set x0 to 1.0
labels.append(int(line[2]))
return features, labels
def sigmoid(value):
return 1.0 / (1 + exp(-value))
def gradAscent(features, labels, alpha=0.001, iterations=500):
'''
梯度上升算法:
- 批处理算法:每次更新回归系数时都需要遍历整个数据集
'''
featureMatrix = mat(features)
labelMatrix = mat(labels).transpose()
m, n = shape(featureMatrix)
weights = ones((n, 1))
for k in range(iterations):
h = sigmoid(featureMatrix*weights)
error = (labelMatrix - h)
weig