机器学习实战之KNN算法

Veröffentlicht am 2017-12-10

1.Python导入数据

knn.py

from numpy import * # import scientific computing package numpy
import operator # import operator modular

# createDataSet主要用来创建数据集和标签
def createDateSet():
    group = array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
    labels = ['A','A','B','B']
    return group,labels

knnTest.py

1
2
3

import knn
datingDataMat,datingLabels = kNN.file2matrix('datingTestSet2.txt')
print(datingDataMat);print(datingLabels)

结果

[[  4.09200000e+04   8.32697600e+00   9.53952000e-01]
 [  1.44880000e+04   7.15346900e+00   1.67390400e+00]
 [  2.60520000e+04   1.44187100e+00   8.05124000e-01]
 ..., 
 [  2.65750000e+04   1.06501020e+01   8.66627000e-01]
 [  4.81110000e+04   9.13452800e+00   7.28045000e-01]
 [  4.37570000e+04   7.88260100e+00   1.33244600e+00]]
[3, 2, 1, 1, 1, 1, 3, 3, 1, 3, 1, 1, 2, 1, 1, 1, 1, 1, 2, 3, 2, 1, 2, 3, 2, 3, 2, 3, 2, 1, 3, 1, 3, 1, 2, 1, 1, 2, 3, 3, 1, 2, 3, 3, 3, 1, 1, 1, 1, 2, 2, 1, 3, 2, 2, 2, 2, 3, 1, 2, 1, 2, 2, 2, 2, 2, 3, 2, 3, 1, 2, 3, 2, 2, 1, 3, 1, 1, 3, 3, 1, 2, 3, 1, 3, 1, 2, 2, 1, 1, 3, 3, 1, 2, 1, 3, 3, 2, 1, 1, 3, 1, 2, 3, 3, 2, 3, 3, 1, 2, 3, 2, 1, 3, 1, 2, 1, 1, 2, 3, 2, 3, 2, 3, 2, 1, 3, 3, 3, 1, 3, 2, 2, 3, 1, 3, 3, 3, 1, 3, 1, 1, 3, 3, 2, 3, 3, 1, 2, 3, 2, 2, 3, 3, 3, 1, 2, 2, 1, 1, 3, 2, 3, 3, 1, 2, 1, 3, 1, 2, 3, 2, 3, 1, 1, 1, 3, 2, 3, 1, 3, 2, 1, 3, 2, 2, 3, 2, 3, 2, 1, 1, 3, 1, 3, 2, 2, 2, 3, 2, 2, 1, 2, 2, 3, 1, 3, 3, 2, 1, 1, 1, 2, 1, 3, 3, 3, 3, 2, 1, 1, 1, 2, 3, 2, 1, 3, 1, 3, 2, 2, 3, 1, 3, 1, 1, 2, 1, 2, 2, 1, 3, 1, 3, 2, 3, 1, 2, 3, 1, 1, 1, 1, 2, 3, 2, 2, 3, 1, 2, 1, 1, 1, 3, 3, 2, 1, 1, 1, 2, 2, 3, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 2, 3, 2, 3, 3, 3, 3, 1, 2, 3, 1, 1, 1, 3, 1, 3, 2, 2, 1, 3, 1, 3, 2, 2, 1, 2, 2, 3, 1, 3, 2, 1, 1, 3, 3, 2, 3, 3, 2, 3, 1, 3, 1, 3, 3, 1, 3, 2, 1, 3, 1, 3, 2, 1, 2, 2, 1, 3, 1, 1, 3, 3, 2, 2, 3, 1, 2, 3, 3, 2, 2, 1, 1, 1, 1, 3, 2, 1, 1, 3, 2, 1, 1, 3, 3, 3, 2, 3, 2, 1, 1, 1, 1, 1, 3, 2, 2, 1, 2, 1, 3, 2, 1, 3, 2, 1, 3, 1, 1, 3, 3, 3, 3, 2, 1, 1, 2, 1, 3, 3, 2, 1, 2, 3, 2, 1, 2, 2, 2, 1, 1, 3, 1, 1, 2, 3, 1, 1, 2, 3, 1, 3, 1, 1, 2, 2, 1, 2, 2, 2, 3, 1, 1, 1, 3, 1, 3, 1, 3, 3, 1, 1, 1, 3, 2, 3, 3, 2, 2, 1, 1, 1, 2, 1, 2, 2, 3, 3, 3, 1, 1, 3, 3, 2, 3, 3, 2, 3, 3, 3, 2, 3, 3, 1, 2, 3, 2, 1, 1, 1, 1, 3, 3, 3, 3, 2, 1, 1, 1, 1, 3, 1, 1, 2, 1, 1, 2, 3, 2, 1, 2, 2, 2, 3, 2, 1, 3, 2, 3, 2, 3, 2, 1, 1, 2, 3, 1, 3, 3, 3, 1, 2, 1, 2, 2, 1, 2, 2, 2, 2, 2, 3, 2, 1, 3, 3, 2, 2, 2, 3, 1, 2, 1, 1, 3, 2, 3, 2, 3, 2, 3, 3, 2, 2, 1, 3, 1, 2, 1, 3, 1, 1, 1, 3, 1, 1, 3, 3, 2, 2, 1, 3, 1, 1, 3, 2, 3, 1, 1, 3, 1, 3, 3, 1, 2, 3, 1, 3, 1, 1, 2, 1, 3, 1, 1, 1, 1, 2, 1, 3, 1, 2, 1, 3, 1, 3, 1, 1, 2, 2, 2, 3, 2, 2, 1, 2, 3, 3, 2, 3, 3, 3, 2, 3, 3, 1, 3, 2, 3, 2, 1, 2, 1, 1, 1, 2, 3, 2, 2, 1, 2, 2, 1, 3, 1, 3, 3, 3, 2, 2, 3, 3, 1, 2, 2, 2, 3, 1, 2, 1, 3, 1, 2, 3, 1, 1, 1, 2, 2, 3, 1, 3, 1, 1, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 2, 2, 2, 3, 1, 3, 1, 2, 3, 2, 2, 3, 1, 2, 3, 2, 3, 1, 2, 2, 3, 1, 1, 1, 2, 2, 1, 1, 2, 1, 2, 1, 2, 3, 2, 1, 3, 3, 3, 1, 1, 3, 1, 2, 3, 3, 2, 2, 2, 1, 2, 3, 2, 2, 3, 2, 2, 2, 3, 3, 2, 1, 3, 2, 1, 3, 3, 1, 2, 3, 2, 1, 3, 3, 3, 1, 2, 2, 2, 3, 2, 3, 3, 1, 2, 1, 1, 2, 1, 3, 1, 2, 2, 1, 3, 2, 1, 3, 3, 2, 2, 2, 1, 2, 2, 1, 3, 1, 3, 1, 3, 3, 1, 1, 2, 3, 2, 2, 3, 1, 1, 1, 1, 3, 2, 2, 1, 3, 1, 2, 3, 1, 3, 1, 3, 1, 1, 3, 2, 3, 1, 1, 3, 3, 3, 3, 1, 3, 2, 2, 1, 1, 3, 3, 2, 2, 2, 1, 2, 1, 2, 1, 3, 2, 1, 2, 2, 3, 1, 2, 2, 2, 3, 2, 1, 2, 1, 2, 3, 3, 2, 3, 1, 1, 3, 3, 1, 2, 2, 2, 2, 2, 2, 1, 3, 3, 3, 3, 3, 1, 1, 3, 2, 1, 2, 1, 2, 2, 3, 2, 2, 2, 3, 1, 2, 1, 2, 2, 1, 1, 2, 3, 3, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 1, 3, 3, 2, 3, 2, 3, 3, 2, 2, 1, 1, 1, 3, 3, 1, 1, 1, 3, 3, 2, 1, 2, 1, 1, 2, 2, 1, 1, 1, 3, 1, 1, 2, 3, 2, 2, 1, 3, 1, 2, 3, 1, 2, 2, 2, 2, 3, 2, 3, 3, 1, 2, 1, 2, 3, 1, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 1, 3, 3, 3]

2.KNN算法实现

knn.py

#knn classification
def classify0(inX,dataSet,labels,k):    #test,train,label,k
    dataSetSize = dataSet.shape[0]  # shape[0]行，shape[1]列
    diffMat = tile(inX,(dataSetSize,1))-dataSet # tile(a,rep)a在各个维度重复，此处指dataSetSize行，1列
    sqDiffMat = diffMat**2  # 距离平方
    sqDistances = sqDiffMat.sum(axis=1) # axis=0按行相加，axis=1按列相加
    distances = sqDistances**0.5 # 距离平方开根号
    sortedDistIndices = distances.argsort() # 距离由小到大排序
    classCount = {} # dict存储标签及其出现的次数
    for i in range(k):
        voteIlabel = labels[sortedDistIndices[i]]   # 距离最近的第i个
        classCount[voteIlabel] = classCount.get(voteIlabel,0)+1 # 字典中对应出现次数+1
    sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)
    #itemgetter(0)按照key排序，itemgetter(1)按照value排序，reverse默认False从小到大，True的话从大到小
    return sortedClassCount[0][0]   # 排在最前面那对item的第一个值（A or B,the label we need）

knnTest.py

1 2	import knn print(kNN.classify0([0,0],group,labels,3))

结果

3.Example:使用KNN改进约会网站的配对效果

3.1.从文本文件中读取数据并解析

knn.py

#kNN readfile
def file2matrix(filename):
    fr = open(filename)
    arrayOLines = fr.readlines()    # 一次读取整个文件，并将文件内容自动分析成一个行的列表
    numberOfLines = len(arrayOLines)    # 获得行数n
    returnMat = zeros((numberOfLines,3)) # n行3列值全为0的数组
    classLabelVector = []
    index = 0
    for line in arrayOLines:
        line = line.strip() # 移除字符串首尾的空格
        listFromLine = line.split('\t') # split(str,num)按照分隔符str进行切片，str默认空格、\n、\t，返回列表
        returnMat[index,:] = listFromLine[0:3] # 给第index行赋值（获取文件每行前3维元素）
        classLabelVector.append(int(listFromLine[-1]))  # 获取文件每行最后一个元素label
        index += 1
    return returnMat,classLabelVector   #返回文件处理的样本矩阵和类标签向量

knnTest.py

1
2
3

import kNN
datingDataMat,datingLabels = kNN.file2matrix('datingTestSet2.txt')
print(datingDataMat);print(datingLabels[0:20])

结果：

[[  4.09200000e+04   8.32697600e+00   9.53952000e-01]
 [  1.44880000e+04   7.15346900e+00   1.67390400e+00]
 [  2.60520000e+04   1.44187100e+00   8.05124000e-01]
 ..., 
 [  2.65750000e+04   1.06501020e+01   8.66627000e-01]
 [  4.81110000e+04   9.13452800e+00   7.28045000e-01]
 [  4.37570000e+04   7.88260100e+00   1.33244600e+00]]
[3, 2, 1, 1, 1, 1, 3, 3, 1, 3, 1, 1, 2, 1, 1, 1, 1, 1, 2, 3]

3.2.使用matplotlib创建散点图

knnTest.py

import matplotlib
import matplotlib.pyplot as plt
fig = plt.figure()

ax = fig.add_subplot(211)   # ax放在2行1列的第一个位置
ax.scatter(datingDataMat[:,1],datingDataMat[:,2],15.0*array(datingLabels),15.0*array(datingLabels))

bx = fig.add_subplot(212)   # bx放在2行1列的第二个位置
bx.scatter(datingDataMat[:,0],datingDataMat[:,1],15.0*array(datingLabels),15.0*array(datingLabels))
plt.show()

散点图使用datingDataMat矩阵的第2和第3列数据，分别表示玩视频游戏所耗时间百分比和每周所消费的冰淇淋公升数

结果

3.3.归一化数值

knn.py

#kNN normalization
def autoNorm(dataSet):
    minVals = dataSet.min(0)    # calculate min from every col,res:1*3
    maxVals = dataSet.max(0)
    ranges = maxVals-minVals
    normDataSet = zeros(shape(dataSet)) # shape--read the len of mat
    m = dataSet.shape[0]    # shape[0]--row;shape[1]--col
    normDataSet = dataSet-tile(minVals,(m,1))   # numpy.tile([0,0],(2,1))#在列方向上重复[0,0]1次，行2次
    normDataSet = normDataSet/tile(ranges,(m,1))
    return normDataSet,ranges,minVals   # 返回标准化的数据，范围和最小值

knnTest.py

1 2	normMat,ranges,minVals = kNN.autoNorm(datingDataMat) print(normMat);print(ranges);print(minVals)

结果

[[ 0.44832535  0.39805139  0.56233353]
 [ 0.15873259  0.34195467  0.98724416]
 [ 0.28542943  0.06892523  0.47449629]
 ..., 
 [ 0.29115949  0.50910294  0.51079493]
 [ 0.52711097  0.43665451  0.4290048 ]
 [ 0.47940793  0.3768091   0.78571804]]
[  9.12730000e+04   2.09193490e+01   1.69436100e+00]
[ 0.        0.        0.001156]

3.4.预测分类器效果

knn.py

#kNN vertification
def datingClassTest():
    hoRatio = 0.10  #hold out 10% as testing data
    datingDataMat,datingLabels = file2matrix('datingTestSet2.txt')  #load message from file
    normMat,ranges,minVals = autoNorm(datingDataMat)    #normalization
    m = normMat.shape[0]    #get numbers of normalized data
    numTestVecs = int(m*hoRatio)
    errorCount = 0
    for i in range(numTestVecs):
        classifierResult = classify0(normMat[i,:],normMat[numTestVecs:m,:],\
                                     datingLabels[numTestVecs:m],3)
        print("the classifier came back with:%d,the real answer is:%d"\
              %(classifierResult,datingLabels[i]))
        if(classifierResult != datingLabels[i]):errorCount += 1.0
    print("the total error rate is: %f "%(errorCount/float(numTestVecs)))

knnTest.py

1	kNN.datingClassTest()

结果

the classifier came back with:3,the real answer is:3
the classifier came back with:2,the real answer is:2
the classifier came back with:1,the real answer is:1
the classifier came back with:1,the real answer is:1
the classifier came back with:1,the real answer is:1
...,
the classifier came back with:2,the real answer is:2
the classifier came back with:1,the real answer is:1
the classifier came back with:3,the real answer is:1
the total error rate is: 0.050000

3.5.约会网站结果预测

knn.py

#kNN prediction
def classifyPerson():
    resultList = ['not at all','in small doses','in large doses']   # 结果标签
    percentTats = float(input("percentage of time spent playing video games?")) # 输入数据
    ffMiles = float(input("frequent flier miles earned per year:"))
    iceCream = float(input("liters of ice cream consumed per year?"))
    datingDataMat,datingLabels = file2matrix('datingTestSet2.txt')  # 从文件中读入数据
    normMat,ranges,minVals = autoNorm(datingDataMat)    # 数据归一化
    inArr = array([ffMiles,percentTats,iceCream])   # 输入数据转化为数组作为预测数据
    classifierResult = classify0((inArr-minVals)/ranges,normMat,datingLabels,3)
    print('you will probably like this person:',resultList[classifierResult-1])

knnTest.py

1	kNN.classifyPerson()

结果

percentage of time spent playing video games?3
frequent flier miles earned per year:10000
liters of ice cream consumed per year?5
you will probably like this person: in small doses

4.手写识别系统

4.1图形转为向量

knn.py

#convert img to vector
# 把一个32*32的图片转化为1*1024的向量
from os import listdir  #list filename of file
def img2vector(filename):
    returnVect = zeros((1,1024))    # 1*1024numpy向量
    fr = open(filename)
    for i in range(32): # 读入32行
        lineStr = fr.readline()
        for j in range(32):
            returnVect[0,32*i+j] = int(lineStr[j]) # 每行数据存储在numpy向量中
    return returnVect

knnTest.py

1
2
3

testVector = kNN.img2vector('testDigits/0_13.txt')
print(testVector[0,0:31])
print(testVector[0,32:63])

结果

[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  1.  1.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  1.  1.  1.  1.
  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]

4.2.k-紧邻算法识别手写数字

knn.py

#test numbers using kNN classify0
def handwritingClassTest():
    hwLabels = []
    trainingFileList = listdir('trainingDigits')
    m = len(trainingFileList)   # 图片数量
    trainingMat = zeros((m,1024))
    for i in range(m):
        fileNameStr = trainingFileList[i]   # 第i个图片
        fileStr = fileNameStr.split('.')[0] # 根据.分割文件名称并获取第一个字符（0_1或1_1或2_1等）
        classNumStr = int(fileStr.split('_')[0])    # 根据_分割文件名称并获取0 1等
        hwLabels.append(classNumStr)
        trainingMat[i,:] = img2vector('trainingDigits/%s'%fileNameStr)

    testFileList = listdir('testDigits')
    errorCount = 0.0
    mTest = len(testFileList)
    for i in range(mTest):
        fileNameStr = testFileList[i]   # 第i个文件名称的字符串形式
        fileStr = fileNameStr.split('.')[0] # 根据.分割文件名称并获取第一个字符（文件顺序0_1,0_2,0_3...）
        classNumStr = int(fileStr.split('_')[0])    # 获取文件名1,2,3...
        vectorUnderTest = img2vector('testDigits/%s'%fileNameStr)   # 图像转化为向量
        classifierResult = classify0(vectorUnderTest,\
                                     trainingMat,hwLabels,3)
        print('the classifier came back with:%d,the real answer is:%d'\
              %(classifierResult,classNumStr))
        if(classifierResult != classNumStr):errorCount += 1.0
    print('\nthe total number of errors is :%d'%errorCount)
    print('\nthe total error rate is :%f'%(errorCount/float(mTest)))

knnTest.py

1	kNN.handwritingClassTest()

结果

the classifier came back with:9,the real answer is:9
the classifier came back with:7,the real answer is:7
the classifier came back with:7,the real answer is:7
...,
the classifier came back with:4,the real answer is:4
the classifier came back with:5,the real answer is:5

the total number of errors is :11

the total error rate is :0.011628

这个算法并不高效，有900个测试图片；
对于每一个测试图片，首先要一步复杂度为1024运的算转化为测试向量；
每个测试向量都要运行900次距离计算；
每次距离计算都是1024个浮点数的计算。。。
有没有一种算法更加节省空间和时间呢？
跟着《机器学习实战》这本书的步伐，很快我们就知道，有一种叫做k决策树的大佬，
据说是k-紧邻算法的优化版，可以大大得节省计算开销。

python函数式编程之返回函数

Veröffentlicht am 2017-12-09

函数作为返回值

1.返回函数

返回函数时相关参数和变量保存在返回函数中

def lazy_sum(*args):
    def sum():
        ans = 0
        for i in args:
            ans = ans+i
        return ans
    return sum
a = lazy_sum(1,2,3,4,5)
print(a)#返回函数
b = a()
print(b)#返回结果

1 2	<function lazy_sum.<locals>.sum at 0x7f47807908c8> 15

每次调用lazy_sum时，都会返回一个新的函数，即使传入相同的参数

1
2
3

f1 = lazy_sum()
f2 = lazy_sum()
print(f1 == f2)

False

2.闭包

闭包：
1.内部函数可以引用外部函数的参数或变量（不一定就是上一层函数中的参数和变量）
2.外部函数把内部函数作为返回值，相关参数和变量都保存在内部函数中

def count():
    fn = []
    for i in range(1, 4):
        def get():
            def f():
                return i * i
            return f
        fn.append(get())
    return fn
f1,f2,f3 = count()
print(f1(),f2(),f3())

9 9 9

直到调用f()才执行函数，此时i已经变成３,所以最终结果是9 9 9，而不是1,4,9
所以函数最好不要引用循环变量，以及后续会发生变化的量
如果一定要引用循环变量，再创建一个函数，用函数绑定当前循环变量的值

def count():
    def f(j):
        def g():
            return j*j
        return g
    fn = []
    for i in range(1,4):
        fn.append(f(i))
    return fn
f1,f2,f3 = count()
print(f1(),f2(),f3())

1 4 9

3.练习

闭包返回计数器函数，每次返回递增整数

def createCounter():
    i = 0
    def counter():
        nonlocal i#闭包对闭包函数外，函数内的变量进行引用
        i = i+1
        return i
    return counter
countA = createCounter()
print(countA(),countA(),countA())

1 2 3

4.三种不同写法

#----------------------------------------------------
def count():
fn = []
def get():
    def f():
        return 3*3
    return f#######返回f函数
fn.append(get())###返回结果
return fn
#----------------------------------------------------
def count():
fn = []
def get():
    def f():
        return 3*3
    return f()######返回结果
fn.append(get)#####返回函数
return fn
#----------------------------------------------------
def count():
fn = []
def f():
    return 3*3
fn.append(f)#####返回函数
return fn

最终结果都是一样的哈哈，现在的程序员，真的是……没毛病！！！

python函数式编程sorted

Veröffentlicht am 2017-12-09

#python函数式编程之sorted用法

sorted(iterable,key,reverse) 对所有可迭代的对象进行排序，返回一个list.
sorted主要用在list和dict中
iterable是一个可迭代对象
key制定排序规则，可以是一个函数，比如abs；也可以是指定的参数的某一个元素，参数来自可迭代对象中
reverse表示排序规则，reverse = True降序，reverse = False升序（默认）

example 1:对数字列表进行排序

1
2
3

a = [2,6,-4,1,-7,9,3];
b = sorted(a);print(b)
c = sorted(a,key=abs);print(c)

1 2	[-7, -4, 1, 2, 3, 6, 9] [1, 2, 3, -4, 6, -7, 9]

example 2:利用key排序set

1
2
3

l = {('q',1),('e',6),('g',7),('b',3)}
nl = sorted(l,key = lambda x:x[1])
print(nl)

1	[('q', 1), ('b', 3), ('e', 6), ('g', 7)]

example 3:利用key排序字符串

1
2
3

str0 = 'Hello world Python what are you Doing'.split()
a = sorted(str0,key=str.lower,reverse=True)#lower不用加括号，只标明函数就行
print(a)

1	['you', 'world', 'what', 'Python', 'Hello', 'Doing', 'are']

example 4:test按照不同元素排序

from operator import itemgetter
L = [('john', 'A', 1), ('jane', 'B', 2),('bob','C',1)]
ans = sorted(L,key = lambda x:x[1]);print(ans) #lambda函数根据第1个元素排序
ans = sorted(L,key = itemgetter(2,1));print(ans) #itemgetter根据第2个-根据第1个排序

1 2	[('john', 'A', 1), ('jane', 'B', 2), ('bob', 'C', 1)] [('john', 'A', 1), ('bob', 'C', 1), ('jane', 'B', 2)]

python函数式编程reduce

Veröffentlicht am 2017-12-09

#python函数式编程之reduce用法

reduce(f,序列)
reduce用f在序列上的相邻元素上作用，然后把结果继续和下一个元素迭代计算
reduce返回什么结果关键看f函数

example 1: reduce计算和

from functools import reduce
def add(x,y):
    return x+y
ans = reduce(add,[1,2,3,4,5,6,7,8,9])
print(ans)

example 2: reduce计算积的和

from functools import reduce
def fn(x,y):
    return x*10+y
ans =    reduce(fn,[1,2,3,4,5,6,7,8,9])
print(ans)

123456789

example 3: reduce把字符串转为数字

def char2num(a):
    digit = {'0':0,'1':1,'2':2,'3':3,'4':4,'5':5,'6':6,'7':7,'8':8,'9':9}
    return digit[a]
ans = reduce(fn,map(char2num,['1','2','3']))
ans1 = reduce(fn,map(char2num,'123456'))
print(ans,ans1,end=' ')
print()

1	123 123456

example 4: 提炼为cha2num函数

digit = {'0':0,'1':1,'2':2,'3':3,'4':4,'5':5,'6':6,'7':7,'8':8,'9':9}
def str2num(s):
    def fn(x,y):
        return x*10+y
    def char2num(a):
        return digit[a]
    return reduce(fn,map(char2num,s))
s = '12345678'
print(str2num(s))

12345678

example5: cha2num函数用lamda函数式简化

1
2
3

def str2num(s):
    return reduce(lambda x,y:x*10+y,map(lambda x:digit[x],s))
print(str2num('321'))

test1：将字符串转为首字母大写，其余小写

def normalize(name):
    ans = name.lower().capitalize()
    return ans
L1 = ['adam', 'LISA', 'barT']
L2 = list(map(normalize,L1))
print(L2)

1	['Adam', 'Lisa', 'Bart']

test2:求积

1
2
3

def prod(L):
    return reduce(lambda x,y:x*y,L)
print('3*5*7*9 = ',prod([3,5,7,9]))

1	357*9 = 945

test3:字符串转为浮点数

digit = {'0':0,'1':1,'2':2,'3':3,'4':4,'5':5,'6':6,'7':7,'8':8,'9':9}
def str2float(s):
    j = 0
    ans1 = 0.0
    ans2 = 0
    length = len(s)
    flag = False
    for i in (range(length)):
        if(s[i] == '.'):
            j = 0
            flag  = True
            continue
        if(flag):
            j = j + 1
            ans2 = ans2+digit[s[i]]*pow(0.1,j)
        else:
            ans1 = ans1*10+digit[s[i]]
    return ans1+ans2
print(str2float('12.23'))

12.23

python函数式编程map

Veröffentlicht am 2017-12-09

#python函数式编程之map用法

map(f,iterator)把结果作为新的iterator返回
map返回什么结果看iterator是什么类型

example 1

def f(x):
    return x*x
ans = map(f,[1,2,3,4,5,6,7,8,9])#f作用在list的每一个元素并把结果返回为新的list
print(ans)#看不懂的东西
print(list(ans))# iterator是一个惰性序列，通过list让整个函数都计算出来并返回一个list

example 2

1 2	ans = list(map(str,[1,2,3,4,5,6,7,8,9])) print(ans)

python高级特性-生成器

Veröffentlicht am 2017-12-09

#生成器generator

python中，一边循环一边计算的机制，成为生成器
利用生成器，可以减少内存使用

生成方法一生成器表达式

1
2
3

a = (x for x in range(10))
for b in a: print(b,end=' ')
print()

生成方法二生成器函数

def fib(maxn):
    n,a,b = 0,0,1
    while n<maxn:
        yield b
        a,b = b,a+b
        n = n+1
a = fib(8);print(a)#a是一个generator

输出方法一for循环迭代输出
1
2
for x in a: print(x,end=' ')
print()

输出方法二next()迭代输出

while True :
    try:
        x = next(a)
        print(x,end=' ')
    except StopIteration as e:
        print('\nGenertor return value:',e.value)
        break
print(a)

每次调用next()的时候，遇到yield返回，下一次从返回处继续执行
循环不断调用yield，就会不断中断，从而边计算边取值，达到节省内存效果
可以用yield方法调用,当没有更多元素时，抛出StopIteration错误

注意：生成器只能遍历一次

#以下代码只能输出一组结果
for x in a: print(x,end=' ')
print()
for x in a: print(x,end=' ')
print()

利用打印杨辉三角

def test(maxn):
    ans = [1];yield ans
    ans = [1,1];yield ans
    n,tmp = 2,[1,1]#等式右边相当于一个tuple分别给等式左边变量赋值
    while n<maxn:
        ans = []
        n = n+1
        ans.append(1)
        for i in range(n-2):
            ans.append(tmp[i]+tmp[i+1])
        ans.append(1)
        tmp = ans
        yield ans
a = test(10)
for x in a:
    print(x)

python高级特性-迭代器

Veröffentlicht am 2017-12-09

迭代

可以直接作用于for循环的对象,称为可迭代对象Iterable:
1.集合数据类型：list,tuple,set,dict,str
2.generator:包括生成器和带yield的generator funcion

list迭代

1
2
3

   list = [1,2,3,4,5]
for a in list:print(a,end=' ')
print()

dict迭代

d = {'a': 1, 'b': 2, 'c': 3}
for key in d:print(key,end=' ')
print()
for value in d.values():print(value,end=' ')
print()

字符串迭代

1 2	for a in 'afsf':print(a,end=' ') print()

判断是否可以迭代

1 2	from collections import Iterable print(isinstance(list,Iterable))

list变成索引-元素对的例子

1 2	for i,value in enumerate(['a','b','c']): print(i,value)

迭代器

可以被next()不断调用并返回下一个值的对象称为Iterator
集合数据类型list,tuple,set,dict,str都是，Iterable，但都不是Iterator\
for循环内部实际上就是先调用iter()把Iterable转为Iterator再进行循环迭代的

迭代器判断

from collections import Iterator
print(isinstance((),Iterator))
print(isinstance({},Iterator))
print(isinstance([],Iterator))
print(isinstance('fsaf',Iterator))
#使用iter()函数变成Iterator
from collections import Iterator
print(isinstance(iter(()),Iterator))
print(isinstance(iter({}),Iterator))
print(isinstance(iter([]),Iterator))
print(isinstance(iter('fsaf'),Iterator))

为什么list不是Iterator

Iterator表示一个数据流，我们可以知道它的长度，通过next()来计算下一个数据，可以是无限长的
Iterator是一个惰性序列，它的计算是惰性的，需要返回下一个数据时才会去计算
而集合数据类型是无法达到这样的功能的

python函数式编程-filter

Veröffentlicht am 2017-12-07

filter用法
1. filter(f,iterator)，返回由符合条件元素组成的新列表
2. filter和map类似，根据每个值是True还是False决定是否保留元素
3. filter返回的也是惰性序列，由于使用惰性计算，只有取filter结果的时候，才会每次真正筛选并返回筛选的元素．

example 1: 判断奇偶

def is_odd(x):
    return x%2 == 0
ans = list(filter(is_odd,[1,2,3,4,5,6,7,8,9]))
print(ans)

1	[2, 4, 6, 8]

example 2: 删除空字符

def not_empty(x):
    return x and x.strip()
ans = list(filter(not_empty,['a',' ','b',' ']))
print(ans)

1	['a', 'b']

example 3: 欧几里得算法

 def _not_visible(n):
    return lambda x:x%n>0
    #返回的是一个匿名函数
    #比如_not_visible(3)返回函数lambda x:x%3>0，调用lambda x:x%3>0这个函数时传入x
def _odd_filter():
    n = 1
    while True:
        n = n+2
        yield n#yield节省内存空间
def primes():
    yield 2
    it = _odd_filter()
    while True:
        n = next(it)
        yield n#获取序列的第一个数
        it = filter(_not_visible(n),it)#每个元素对n进行％判断，留下质数作为新序列
for n in primes():
    if n < 100:
        print(n,end=' ')
    else:
        break
print()#换行

1	2 3 5 7 11 13 17 19 23 29 31 37 41 43 47 53 59 61 67 71 73 79 83 89 97

test : 判断200以内回文数
思路：转为字符串，判断回文串

def is_palidrome(n):
    a = str(n)
    start = 0;end = len(a)-1
    while start < end:
        if(a[start] != a[end]):
            return False
        start,end = start+1,end-1;
    return True
ans = list(filter(is_palidrome,range(1,200)))
print(ans)

1	[1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 22, 33, 44, 55, 66, 77, 88, 99, 101, 111, 121, 131, 141, 151, 161, 171, 181, 191]

参考：
Python filter函数
 高阶函数filter

hello,github

Veröffentlicht am 2017-12-01

hello,github!