Hexo


  • Startseite

  • Archiv

机器学习实战之KNN算法

Veröffentlicht am 2017-12-10

1.Python导入数据

knn.py

1
2
3
4
5
6
7
8
from numpy import * # import scientific computing package numpy
import operator # import operator modular

# createDataSet主要用来创建数据集和标签
def createDateSet():
group = array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
labels = ['A','A','B','B']
return group,labels

knnTest.py

1
2
3
import knn
datingDataMat,datingLabels = kNN.file2matrix('datingTestSet2.txt')
print(datingDataMat);print(datingLabels)

结果

1
2
3
4
5
6
7
8
[[  4.09200000e+04   8.32697600e+00   9.53952000e-01]
[ 1.44880000e+04 7.15346900e+00 1.67390400e+00]
[ 2.60520000e+04 1.44187100e+00 8.05124000e-01]
...,
[ 2.65750000e+04 1.06501020e+01 8.66627000e-01]
[ 4.81110000e+04 9.13452800e+00 7.28045000e-01]
[ 4.37570000e+04 7.88260100e+00 1.33244600e+00]]
[3, 2, 1, 1, 1, 1, 3, 3, 1, 3, 1, 1, 2, 1, 1, 1, 1, 1, 2, 3, 2, 1, 2, 3, 2, 3, 2, 3, 2, 1, 3, 1, 3, 1, 2, 1, 1, 2, 3, 3, 1, 2, 3, 3, 3, 1, 1, 1, 1, 2, 2, 1, 3, 2, 2, 2, 2, 3, 1, 2, 1, 2, 2, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 1, 3, 1, 1, 3, 3, 1, 2, 3, 1, 3, 1, 2, 2, 1, 1, 3, 3, 1, 2, 1, 3, 3, 2, 1, 1, 3, 1, 2, 3, 3, 2, 3, 3, 1, 2, 3, 2, 1, 3, 1, 2, 1, 1, 2, 3, 2, 3, 2, 3, 2, 1, 3, 3, 3, 1, 3, 2, 2, 3, 1, 3, 3, 3, 1, 3, 1, 1, 3, 3, 2, 3, 3, 1, 2, 3, 2, 2, 3, 3, 3, 1, 2, 2, 1, 1, 3, 2, 3, 3, 1, 2, 1, 3, 1, 2, 3, 2, 3, 1, 1, 1, 3, 2, 3, 1, 3, 2, 1, 3, 2, 2, 3, 2, 3, 2, 1, 1, 3, 1, 3, 2, 2, 2, 3, 2, 2, 1, 2, 2, 3, 1, 3, 3, 2, 1, 1, 1, 2, 1, 3, 3, 3, 3, 2, 1, 1, 1, 2, 3, 2, 1, 3, 1, 3, 2, 2, 3, 1, 3, 1, 1, 2, 1, 2, 2, 1, 3, 1, 3, 2, 3, 1, 2, 3, 1, 1, 1, 1, 2, 3, 2, 2, 3, 1, 2, 1, 1, 1, 3, 3, 2, 1, 1, 1, 2, 2, 3, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 2, 3, 2, 3, 3, 3, 3, 1, 2, 3, 1, 1, 1, 3, 1, 3, 2, 2, 1, 3, 1, 3, 2, 2, 1, 2, 2, 3, 1, 3, 2, 1, 1, 3, 3, 2, 3, 3, 2, 3, 1, 3, 1, 3, 3, 1, 3, 2, 1, 3, 1, 3, 2, 1, 2, 2, 1, 3, 1, 1, 3, 3, 2, 2, 3, 1, 2, 3, 3, 2, 2, 1, 1, 1, 1, 3, 2, 1, 1, 3, 2, 1, 1, 3, 3, 3, 2, 3, 2, 1, 1, 1, 1, 1, 3, 2, 2, 1, 2, 1, 3, 2, 1, 3, 2, 1, 3, 1, 1, 3, 3, 3, 3, 2, 1, 1, 2, 1, 3, 3, 2, 1, 2, 3, 2, 1, 2, 2, 2, 1, 1, 3, 1, 1, 2, 3, 1, 1, 2, 3, 1, 3, 1, 1, 2, 2, 1, 2, 2, 2, 3, 1, 1, 1, 3, 1, 3, 1, 3, 3, 1, 1, 1, 3, 2, 3, 3, 2, 2, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 1, 1, 3, 3, 2, 3, 3, 2, 3, 3, 3, 2, 3, 3, 1, 2, 3, 2, 1, 1, 1, 1, 3, 3, 3, 3, 2, 1, 1, 1, 1, 3, 1, 1, 2, 1, 1, 2, 3, 2, 1, 2, 2, 2, 3, 2, 1, 3, 2, 3, 2, 3, 2, 1, 1, 2, 3, 1, 3, 3, 3, 1, 2, 1, 2, 2, 1, 2, 2, 2, 2, 2, 3, 2, 1, 3, 3, 2, 2, 2, 3, 1, 2, 1, 1, 3, 2, 3, 2, 3, 2, 3, 3, 2, 2, 1, 3, 1, 2, 1, 3, 1, 1, 1, 3, 1, 1, 3, 3, 2, 2, 1, 3, 1, 1, 3, 2, 3, 1, 1, 3, 1, 3, 3, 1, 2, 3, 1, 3, 1, 1, 2, 1, 3, 1, 1, 1, 1, 2, 1, 3, 1, 2, 1, 3, 1, 3, 1, 1, 2, 2, 2, 3, 2, 2, 1, 2, 3, 3, 2, 3, 3, 3, 2, 3, 3, 1, 3, 2, 3, 2, 1, 2, 1, 1, 1, 2, 3, 2, 2, 1, 2, 2, 1, 3, 1, 3, 3, 3, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 2, 1, 3, 1, 2, 3, 1, 1, 1, 2, 2, 3, 1, 3, 1, 1, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 2, 2, 2, 3, 1, 3, 1, 2, 3, 2, 2, 3, 1, 2, 3, 2, 3, 1, 2, 2, 3, 1, 1, 1, 2, 2, 1, 1, 2, 1, 2, 1, 2, 3, 2, 1, 3, 3, 3, 1, 1, 3, 1, 2, 3, 3, 2, 2, 2, 1, 2, 3, 2, 2, 3, 2, 2, 2, 3, 3, 2, 1, 3, 2, 1, 3, 3, 1, 2, 3, 2, 1, 3, 3, 3, 1, 2, 2, 2, 3, 2, 3, 3, 1, 2, 1, 1, 2, 1, 3, 1, 2, 2, 1, 3, 2, 1, 3, 3, 2, 2, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 3, 1, 1, 2, 3, 2, 2, 3, 1, 1, 1, 1, 3, 2, 2, 1, 3, 1, 2, 3, 1, 3, 1, 3, 1, 1, 3, 2, 3, 1, 1, 3, 3, 3, 3, 1, 3, 2, 2, 1, 1, 3, 3, 2, 2, 2, 1, 2, 1, 2, 1, 3, 2, 1, 2, 2, 3, 1, 2, 2, 2, 3, 2, 1, 2, 1, 2, 3, 3, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2, 2, 2, 1, 3, 3, 3, 3, 3, 1, 1, 3, 2, 1, 2, 1, 2, 2, 3, 2, 2, 2, 3, 1, 2, 1, 2, 2, 1, 1, 2, 3, 3, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 1, 3, 3, 2, 3, 2, 3, 3, 2, 2, 1, 1, 1, 3, 3, 1, 1, 1, 3, 3, 2, 1, 2, 1, 1, 2, 2, 1, 1, 1, 3, 1, 1, 2, 3, 2, 2, 1, 3, 1, 2, 3, 1, 2, 2, 2, 2, 3, 2, 3, 3, 1, 2, 1, 2, 3, 1, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 1, 3, 3, 3]

2.KNN算法实现

knn.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#knn classification
def classify0(inX,dataSet,labels,k): #test,train,label,k
dataSetSize = dataSet.shape[0] # shape[0]行,shape[1]列
diffMat = tile(inX,(dataSetSize,1))-dataSet # tile(a,rep)a在各个维度重复,此处指dataSetSize行,1列
sqDiffMat = diffMat**2 # 距离平方
sqDistances = sqDiffMat.sum(axis=1) # axis=0按行相加,axis=1按列相加
distances = sqDistances**0.5 # 距离平方开根号
sortedDistIndices = distances.argsort() # 距离由小到大排序
classCount = {} # dict存储标签及其出现的次数
for i in range(k):
voteIlabel = labels[sortedDistIndices[i]] # 距离最近的第i个
classCount[voteIlabel] = classCount.get(voteIlabel,0)+1 # 字典中对应出现次数+1
sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)
#itemgetter(0)按照key排序,itemgetter(1)按照value排序,reverse默认False从小到大,True的话从大到小
return sortedClassCount[0][0] # 排在最前面那对item的第一个值(A or B,the label we need)

knnTest.py

1
2
import knn
print(kNN.classify0([0,0],group,labels,3))

结果

1
B

3.Example:使用KNN改进约会网站的配对效果

3.1.从文本文件中读取数据并解析

knn.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#kNN readfile
def file2matrix(filename):
fr = open(filename)
arrayOLines = fr.readlines() # 一次读取整个文件,并将文件内容自动分析成一个行的列表
numberOfLines = len(arrayOLines) # 获得行数n
returnMat = zeros((numberOfLines,3)) # n行3列值全为0的数组
classLabelVector = []
index = 0
for line in arrayOLines:
line = line.strip() # 移除字符串首尾的空格
listFromLine = line.split('\t') # split(str,num)按照分隔符str进行切片,str默认空格、\n、\t,返回列表
returnMat[index,:] = listFromLine[0:3] # 给第index行赋值(获取文件每行前3维元素)
classLabelVector.append(int(listFromLine[-1])) # 获取文件每行最后一个元素label
index += 1
return returnMat,classLabelVector #返回文件处理的样本矩阵和类标签向量

knnTest.py

1
2
3
import kNN
datingDataMat,datingLabels = kNN.file2matrix('datingTestSet2.txt')
print(datingDataMat);print(datingLabels[0:20])

结果:

1
2
3
4
5
6
7
8
[[  4.09200000e+04   8.32697600e+00   9.53952000e-01]
[ 1.44880000e+04 7.15346900e+00 1.67390400e+00]
[ 2.60520000e+04 1.44187100e+00 8.05124000e-01]
...,
[ 2.65750000e+04 1.06501020e+01 8.66627000e-01]
[ 4.81110000e+04 9.13452800e+00 7.28045000e-01]
[ 4.37570000e+04 7.88260100e+00 1.33244600e+00]]
[3, 2, 1, 1, 1, 1, 3, 3, 1, 3, 1, 1, 2, 1, 1, 1, 1, 1, 2, 3]

3.2.使用matplotlib创建散点图

knnTest.py

1
2
3
4
5
6
7
8
9
10
import matplotlib
import matplotlib.pyplot as plt
fig = plt.figure()

ax = fig.add_subplot(211) # ax放在2行1列的第一个位置
ax.scatter(datingDataMat[:,1],datingDataMat[:,2],15.0*array(datingLabels),15.0*array(datingLabels))

bx = fig.add_subplot(212) # bx放在2行1列的第二个位置
bx.scatter(datingDataMat[:,0],datingDataMat[:,1],15.0*array(datingLabels),15.0*array(datingLabels))
plt.show()

散点图使用datingDataMat矩阵的第2和第3列数据,分别表示玩视频游戏所耗时间百分比和每周所消费的冰淇淋公升数

结果

1.png

3.3.归一化数值

knn.py

1
2
3
4
5
6
7
8
9
10
#kNN normalization
def autoNorm(dataSet):
minVals = dataSet.min(0) # calculate min from every col,res:1*3
maxVals = dataSet.max(0)
ranges = maxVals-minVals
normDataSet = zeros(shape(dataSet)) # shape--read the len of mat
m = dataSet.shape[0] # shape[0]--row;shape[1]--col
normDataSet = dataSet-tile(minVals,(m,1)) # numpy.tile([0,0],(2,1))#在列方向上重复[0,0]1次,行2次
normDataSet = normDataSet/tile(ranges,(m,1))
return normDataSet,ranges,minVals # 返回标准化的数据,范围和最小值

knnTest.py

1
2
normMat,ranges,minVals = kNN.autoNorm(datingDataMat)
print(normMat);print(ranges);print(minVals)

结果

1
2
3
4
5
6
7
8
9
[[ 0.44832535  0.39805139  0.56233353]
[ 0.15873259 0.34195467 0.98724416]
[ 0.28542943 0.06892523 0.47449629]
...,
[ 0.29115949 0.50910294 0.51079493]
[ 0.52711097 0.43665451 0.4290048 ]
[ 0.47940793 0.3768091 0.78571804]]
[ 9.12730000e+04 2.09193490e+01 1.69436100e+00]
[ 0. 0. 0.001156]

3.4.预测分类器效果

knn.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#kNN vertification
def datingClassTest():
hoRatio = 0.10 #hold out 10% as testing data
datingDataMat,datingLabels = file2matrix('datingTestSet2.txt') #load message from file
normMat,ranges,minVals = autoNorm(datingDataMat) #normalization
m = normMat.shape[0] #get numbers of normalized data
numTestVecs = int(m*hoRatio)
errorCount = 0
for i in range(numTestVecs):
classifierResult = classify0(normMat[i,:],normMat[numTestVecs:m,:],\
datingLabels[numTestVecs:m],3)
print("the classifier came back with:%d,the real answer is:%d"\
%(classifierResult,datingLabels[i]))
if(classifierResult != datingLabels[i]):errorCount += 1.0
print("the total error rate is: %f "%(errorCount/float(numTestVecs)))

knnTest.py

1
kNN.datingClassTest()

结果

1
2
3
4
5
6
7
8
9
10
the classifier came back with:3,the real answer is:3
the classifier came back with:2,the real answer is:2
the classifier came back with:1,the real answer is:1
the classifier came back with:1,the real answer is:1
the classifier came back with:1,the real answer is:1
...,
the classifier came back with:2,the real answer is:2
the classifier came back with:1,the real answer is:1
the classifier came back with:3,the real answer is:1
the total error rate is: 0.050000

3.5.约会网站结果预测

knn.py

1
2
3
4
5
6
7
8
9
10
11
#kNN prediction
def classifyPerson():
resultList = ['not at all','in small doses','in large doses'] # 结果标签
percentTats = float(input("percentage of time spent playing video games?")) # 输入数据
ffMiles = float(input("frequent flier miles earned per year:"))
iceCream = float(input("liters of ice cream consumed per year?"))
datingDataMat,datingLabels = file2matrix('datingTestSet2.txt') # 从文件中读入数据
normMat,ranges,minVals = autoNorm(datingDataMat) # 数据归一化
inArr = array([ffMiles,percentTats,iceCream]) # 输入数据转化为数组作为预测数据
classifierResult = classify0((inArr-minVals)/ranges,normMat,datingLabels,3)
print('you will probably like this person:',resultList[classifierResult-1])

knnTest.py

1
kNN.classifyPerson()

结果

1
2
3
4
percentage of time spent playing video games?3
frequent flier miles earned per year:10000
liters of ice cream consumed per year?5
you will probably like this person: in small doses

4.手写识别系统

4.1图形转为向量

knn.py

1
2
3
4
5
6
7
8
9
10
11
#convert img to vector
# 把一个32*32的图片转化为1*1024的向量
from os import listdir #list filename of file
def img2vector(filename):
returnVect = zeros((1,1024)) # 1*1024numpy向量
fr = open(filename)
for i in range(32): # 读入32行
lineStr = fr.readline()
for j in range(32):
returnVect[0,32*i+j] = int(lineStr[j]) # 每行数据存储在numpy向量中
return returnVect

knnTest.py

1
2
3
testVector = kNN.img2vector('testDigits/0_13.txt')
print(testVector[0,0:31])
print(testVector[0,32:63])

结果

1
2
3
4
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  1.  1.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1.
1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

4.2.k-紧邻算法识别手写数字

knn.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#test numbers using kNN classify0
def handwritingClassTest():
hwLabels = []
trainingFileList = listdir('trainingDigits')
m = len(trainingFileList) # 图片数量
trainingMat = zeros((m,1024))
for i in range(m):
fileNameStr = trainingFileList[i] # 第i个图片
fileStr = fileNameStr.split('.')[0] # 根据.分割文件名称并获取第一个字符(0_1或1_1或2_1等)
classNumStr = int(fileStr.split('_')[0]) # 根据_分割文件名称并获取0 1等
hwLabels.append(classNumStr)
trainingMat[i,:] = img2vector('trainingDigits/%s'%fileNameStr)

testFileList = listdir('testDigits')
errorCount = 0.0
mTest = len(testFileList)
for i in range(mTest):
fileNameStr = testFileList[i] # 第i个文件名称的字符串形式
fileStr = fileNameStr.split('.')[0] # 根据.分割文件名称并获取第一个字符(文件顺序0_1,0_2,0_3...)
classNumStr = int(fileStr.split('_')[0]) # 获取文件名1,2,3...
vectorUnderTest = img2vector('testDigits/%s'%fileNameStr) # 图像转化为向量
classifierResult = classify0(vectorUnderTest,\
trainingMat,hwLabels,3)
print('the classifier came back with:%d,the real answer is:%d'\
%(classifierResult,classNumStr))
if(classifierResult != classNumStr):errorCount += 1.0
print('\nthe total number of errors is :%d'%errorCount)
print('\nthe total error rate is :%f'%(errorCount/float(mTest)))

knnTest.py

1
kNN.handwritingClassTest()

结果

1
2
3
4
5
6
7
8
9
10
the classifier came back with:9,the real answer is:9
the classifier came back with:7,the real answer is:7
the classifier came back with:7,the real answer is:7
...,
the classifier came back with:4,the real answer is:4
the classifier came back with:5,the real answer is:5

the total number of errors is :11

the total error rate is :0.011628

这个算法并不高效,有900个测试图片;
对于每一个测试图片,首先要一步复杂度为1024运的算转化为测试向量;
每个测试向量都要运行900次距离计算;
每次距离计算都是1024个浮点数的计算。。。
有没有一种算法更加节省空间和时间呢?
跟着《机器学习实战》这本书的步伐,很快我们就知道,有一种叫做k决策树的大佬,
据说是k-紧邻算法的优化版,可以大大得节省计算开销。

python函数式编程之返回函数

Veröffentlicht am 2017-12-09

函数作为返回值

1.返回函数

返回函数时相关参数和变量保存在返回函数中

1
2
3
4
5
6
7
8
9
10
11
def lazy_sum(*args):
def sum():
ans = 0
for i in args:
ans = ans+i
return ans
return sum
a = lazy_sum(1,2,3,4,5)
print(a)#返回函数
b = a()
print(b)#返回结果

1
2
<function lazy_sum.<locals>.sum at 0x7f47807908c8>
15

每次调用lazy_sum时,都会返回一个新的函数,即使传入相同的参数

1
2
3
f1 = lazy_sum()
f2 = lazy_sum()
print(f1 == f2)

1
False

2.闭包

闭包:
1.内部函数可以引用外部函数的参数或变量(不一定就是上一层函数中的参数和变量)
2.外部函数把内部函数作为返回值,相关参数和变量都保存在内部函数中

1
2
3
4
5
6
7
8
9
10
11
def count():
fn = []
for i in range(1, 4):
def get():
def f():
return i * i
return f
fn.append(get())
return fn
f1,f2,f3 = count()
print(f1(),f2(),f3())

1
9 9 9

直到调用f()才执行函数,此时i已经变成3,所以最终结果是9 9 9,而不是1,4,9
所以函数最好不要引用循环变量,以及后续会发生变化的量
如果一定要引用循环变量,再创建一个函数,用函数绑定当前循环变量的值

1
2
3
4
5
6
7
8
9
10
11
def count():
def f(j):
def g():
return j*j
return g
fn = []
for i in range(1,4):
fn.append(f(i))
return fn
f1,f2,f3 = count()
print(f1(),f2(),f3())

1
1 4 9

3.练习

闭包返回计数器函数,每次返回递增整数

1
2
3
4
5
6
7
8
9
def createCounter():
i = 0
def counter():
nonlocal i#闭包对闭包函数外,函数内的变量进行引用
i = i+1
return i
return counter
countA = createCounter()
print(countA(),countA(),countA())

1
1 2 3

4.三种不同写法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
#----------------------------------------------------
def count():
fn = []
def get():
def f():
return 3*3
return f#######返回f函数
fn.append(get())###返回结果
return fn
#----------------------------------------------------
def count():
fn = []
def get():
def f():
return 3*3
return f()######返回结果
fn.append(get)#####返回函数
return fn
#----------------------------------------------------
def count():
fn = []
def f():
return 3*3
fn.append(f)#####返回函数
return fn

最终结果都是一样的哈哈,现在的程序员,真的是……没毛病!!!

python函数式编程sorted

Veröffentlicht am 2017-12-09

#python函数式编程之sorted用法

sorted(iterable,key,reverse) 对所有可迭代的对象进行排序,返回一个list.
sorted主要用在list和dict中
iterable是一个可迭代对象
key制定排序规则,可以是一个函数,比如abs;也可以是指定的参数的某一个元素,参数来自可迭代对象中
reverse表示排序规则,reverse = True降序,reverse = False升序(默认)

  1. example 1:对数字列表进行排序
    1
    2
    3
    a = [2,6,-4,1,-7,9,3];
    b = sorted(a);print(b)
    c = sorted(a,key=abs);print(c)
1
2
[-7, -4, 1, 2, 3, 6, 9]
[1, 2, 3, -4, 6, -7, 9]
  1. example 2:利用key排序set
    1
    2
    3
    l = {('q',1),('e',6),('g',7),('b',3)}
    nl = sorted(l,key = lambda x:x[1])
    print(nl)
1
[('q', 1), ('b', 3), ('e', 6), ('g', 7)]
  1. example 3:利用key排序字符串
    1
    2
    3
    str0 = 'Hello world Python what are you Doing'.split()
    a = sorted(str0,key=str.lower,reverse=True)#lower不用加括号,只标明函数就行
    print(a)
1
['you', 'world', 'what', 'Python', 'Hello', 'Doing', 'are']
  1. example 4:test按照不同元素排序
    1
    2
    3
    4
    from operator import itemgetter
    L = [('john', 'A', 1), ('jane', 'B', 2),('bob','C',1)]
    ans = sorted(L,key = lambda x:x[1]);print(ans) #lambda函数根据第1个元素排序
    ans = sorted(L,key = itemgetter(2,1));print(ans) #itemgetter根据第2个-根据第1个排序
1
2
[('john', 'A', 1), ('jane', 'B', 2), ('bob', 'C', 1)]
[('john', 'A', 1), ('bob', 'C', 1), ('jane', 'B', 2)]

python函数式编程reduce

Veröffentlicht am 2017-12-09

#python函数式编程之reduce用法

reduce(f,序列)
reduce用f在序列上的相邻元素上作用,然后把结果继续和下一个元素迭代计算
reduce返回什么结果关键看f函数

  1. example 1: reduce计算和
    1
    2
    3
    4
    5
    from functools import reduce
    def add(x,y):
    return x+y
    ans = reduce(add,[1,2,3,4,5,6,7,8,9])
    print(ans)
1
45
  1. example 2: reduce计算积的和
    1
    2
    3
    4
    5
    from functools import reduce
    def fn(x,y):
    return x*10+y
    ans = reduce(fn,[1,2,3,4,5,6,7,8,9])
    print(ans)
1
123456789
  1. example 3: reduce把字符串转为数字
    1
    2
    3
    4
    5
    6
    7
    def char2num(a):
    digit = {'0':0,'1':1,'2':2,'3':3,'4':4,'5':5,'6':6,'7':7,'8':8,'9':9}
    return digit[a]
    ans = reduce(fn,map(char2num,['1','2','3']))
    ans1 = reduce(fn,map(char2num,'123456'))
    print(ans,ans1,end=' ')
    print()
1
123 123456
  1. example 4: 提炼为cha2num函数
    1
    2
    3
    4
    5
    6
    7
    8
    9
    digit = {'0':0,'1':1,'2':2,'3':3,'4':4,'5':5,'6':6,'7':7,'8':8,'9':9}
    def str2num(s):
    def fn(x,y):
    return x*10+y
    def char2num(a):
    return digit[a]
    return reduce(fn,map(char2num,s))
    s = '12345678'
    print(str2num(s))
1
12345678
  1. example5: cha2num函数用lamda函数式简化
    1
    2
    3
    def str2num(s):
    return reduce(lambda x,y:x*10+y,map(lambda x:digit[x],s))
    print(str2num('321'))
1
321
  1. test1:将字符串转为首字母大写,其余小写
    1
    2
    3
    4
    5
    6
    def normalize(name):
    ans = name.lower().capitalize()
    return ans
    L1 = ['adam', 'LISA', 'barT']
    L2 = list(map(normalize,L1))
    print(L2)
1
['Adam', 'Lisa', 'Bart']
  1. test2:求积
    1
    2
    3
    def prod(L):
    return reduce(lambda x,y:x*y,L)
    print('3*5*7*9 = ',prod([3,5,7,9]))
1
3*5*7*9 = 945
  1. test3:字符串转为浮点数
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    digit = {'0':0,'1':1,'2':2,'3':3,'4':4,'5':5,'6':6,'7':7,'8':8,'9':9}
    def str2float(s):
    j = 0
    ans1 = 0.0
    ans2 = 0
    length = len(s)
    flag = False
    for i in (range(length)):
    if(s[i] == '.'):
    j = 0
    flag = True
    continue
    if(flag):
    j = j + 1
    ans2 = ans2+digit[s[i]]*pow(0.1,j)
    else:
    ans1 = ans1*10+digit[s[i]]
    return ans1+ans2
    print(str2float('12.23'))
1
12.23

python函数式编程map

Veröffentlicht am 2017-12-09

#python函数式编程之map用法

map(f,iterator)把结果作为新的iterator返回
map返回什么结果看iterator是什么类型

  1. example 1

    1
    2
    3
    4
    5
    def f(x):
    return x*x
    ans = map(f,[1,2,3,4,5,6,7,8,9])#f作用在list的每一个元素并把结果返回为新的list
    print(ans)#看不懂的东西
    print(list(ans))# iterator是一个惰性序列,通过list让整个函数都计算出来并返回一个list
  2. example 2

    1
    2
    ans = list(map(str,[1,2,3,4,5,6,7,8,9]))
    print(ans)

python高级特性-生成器

Veröffentlicht am 2017-12-09

#生成器generator

python中,一边循环一边计算的机制,成为生成器
利用生成器,可以减少内存使用

  1. 生成方法一生成器表达式

    1
    2
    3
    a = (x for x in range(10))
    for b in a: print(b,end=' ')
    print()
  2. 生成方法二生成器函数

    1
    2
    3
    4
    5
    6
    7
    def fib(maxn):
    n,a,b = 0,0,1
    while n<maxn:
    yield b
    a,b = b,a+b
    n = n+1
    a = fib(8);print(a)#a是一个generator
  3. 输出方法一for循环迭代输出

    1
    2
    for x in a: print(x,end=' ')
    print()
  4. 输出方法二next()迭代输出

    1
    2
    3
    4
    5
    6
    7
    8
    while True :
    try:
    x = next(a)
    print(x,end=' ')
    except StopIteration as e:
    print('\nGenertor return value:',e.value)
    break
    print(a)

每次调用next()的时候,遇到yield返回,下一次从返回处继续执行
循环不断调用yield,就会不断中断,从而边计算边取值,达到节省内存效果
可以用yield方法调用,当没有更多元素时,抛出StopIteration错误

  1. 注意:生成器只能遍历一次

    1
    2
    3
    4
    5
    #以下代码只能输出一组结果
    for x in a: print(x,end=' ')
    print()
    for x in a: print(x,end=' ')
    print()
  2. 利用打印杨辉三角

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    def test(maxn):
    ans = [1];yield ans
    ans = [1,1];yield ans
    n,tmp = 2,[1,1]#等式右边相当于一个tuple分别给等式左边变量赋值
    while n<maxn:
    ans = []
    n = n+1
    ans.append(1)
    for i in range(n-2):
    ans.append(tmp[i]+tmp[i+1])
    ans.append(1)
    tmp = ans
    yield ans
    a = test(10)
    for x in a:
    print(x)

python高级特性-迭代器

Veröffentlicht am 2017-12-09
  1. 迭代

    可以直接作用于for循环的对象,称为可迭代对象Iterable:
    1.集合数据类型:list,tuple,set,dict,str
    2.generator:包括生成器和带yield的generator funcion

    1. list迭代

      1
      2
      3
         list = [1,2,3,4,5]
      for a in list:print(a,end=' ')
      print()
    2. dict迭代

      1
      2
      3
      4
      5
      d = {'a': 1, 'b': 2, 'c': 3}
      for key in d:print(key,end=' ')
      print()
      for value in d.values():print(value,end=' ')
      print()
    3. 字符串迭代

      1
      2
         for a in 'afsf':print(a,end=' ')
      print()
    4. 判断是否可以迭代

      1
      2
         from collections import Iterable
      print(isinstance(list,Iterable))
    5. list变成索引-元素对的例子

      1
      2
      for i,value in enumerate(['a','b','c']):
      print(i,value)
  2. 迭代器

    可以被next()不断调用并返回下一个值的对象称为Iterator
    集合数据类型list,tuple,set,dict,str都是,Iterable,但都不是Iterator\
    for循环内部实际上就是先调用iter()把Iterable转为Iterator再进行循环迭代的

    1. 迭代器判断

      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      from collections import Iterator
      print(isinstance((),Iterator))
      print(isinstance({},Iterator))
      print(isinstance([],Iterator))
      print(isinstance('fsaf',Iterator))
      #使用iter()函数变成Iterator
      from collections import Iterator
      print(isinstance(iter(()),Iterator))
      print(isinstance(iter({}),Iterator))
      print(isinstance(iter([]),Iterator))
      print(isinstance(iter('fsaf'),Iterator))
    2. 为什么list不是Iterator

      Iterator表示一个数据流,我们可以知道它的长度,通过next()来计算下一个数据,可以是无限长的
      Iterator是一个惰性序列,它的计算是惰性的,需要返回下一个数据时才会去计算
      而集合数据类型是无法达到这样的功能的

python函数式编程-filter

Veröffentlicht am 2017-12-07
  • filter用法

    1. filter(f,iterator),返回由符合条件元素组成的新列表
    2. filter和map类似,根据每个值是True还是False决定是否保留元素
    3. filter返回的也是惰性序列,由于使用惰性计算,只有取filter结果的时候,才会每次真正筛选并返回筛选的元素.
  • example 1: 判断奇偶

    1
    2
    3
    4
    def is_odd(x):
    return x%2 == 0
    ans = list(filter(is_odd,[1,2,3,4,5,6,7,8,9]))
    print(ans)
    1
    [2, 4, 6, 8]
  • example 2: 删除空字符

    1
    2
    3
    4
    def not_empty(x):
    return x and x.strip()
    ans = list(filter(not_empty,['a',' ','b',' ']))
    print(ans)
    1
    ['a', 'b']
  • example 3: 欧几里得算法

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
     def _not_visible(n):
    return lambda x:x%n>0
    #返回的是一个匿名函数
    #比如_not_visible(3)返回函数lambda x:x%3>0,调用lambda x:x%3>0这个函数时传入x
    def _odd_filter():
    n = 1
    while True:
    n = n+2
    yield n#yield节省内存空间
    def primes():
    yield 2
    it = _odd_filter()
    while True:
    n = next(it)
    yield n#获取序列的第一个数
    it = filter(_not_visible(n),it)#每个元素对n进行%判断,留下质数作为新序列
    for n in primes():
    if n < 100:
    print(n,end=' ')
    else:
    break
    print()#换行
    1
    2 3 5 7 11 13 17 19 23 29 31 37 41 43 47 53 59 61 67 71 73 79 83 89 97
  • test : 判断200以内回文数
    思路:转为字符串,判断回文串

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    def is_palidrome(n):
    a = str(n)
    start = 0;end = len(a)-1
    while start < end:
    if(a[start] != a[end]):
    return False
    start,end = start+1,end-1;
    return True
    ans = list(filter(is_palidrome,range(1,200)))
    print(ans)
    1
    [1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 22, 33, 44, 55, 66, 77, 88, 99, 101, 111, 121, 131, 141, 151, 161, 171, 181, 191]

参考:
Python filter函数
高阶函数filter

hello,github

Veröffentlicht am 2017-12-01

hello,github!

John Doe

9 Artikel
2 Tags
© 2017 John Doe
Erstellt mit Hexo
|
Theme — NexT.Muse v5.1.3