欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

Python3《机器学习实战》代码笔记(十五)--- MapReduce

程序员文章站 2022-03-24 13:46:35
...

参考资料:

机器学习实战

Mapper

'''
@version: 0.0.1
@Author: tqrs
@dev: python3 vscode
@Date: 2019-11-12 22:53:08
@LastEditTime: 2019-11-12 23:08:24
@FilePath: \\机器学习实战\\15-MapReduce\\mrMeanMapper.py
@Descripttion: 分布式计算均值和方差的mapper
'''

import sys
import numpy as np


# 读取数据
def read_input(file):
    for line in file:
        yield line.rstrip()


input = read_input(sys.stdin)  # creates a list of input lines
input = [float(line) for line in input]  # overwrite with floats
numInputs = len(input)
input = np.mat(input)
sqInput = np.power(input, 2)

# output size, mean, mean(square values)
print("%d\t%f\t%f" % (numInputs, np.mean(input), np.mean(sqInput)))
# print(sys.stderr, "report: still alive")

Reducer

'''
@version: 0.0.1
@Author: tqrs
@dev: python3 vscode
@Date: 2019-11-12 23:02:44
@LastEditTime: 2019-11-12 23:25:23
@FilePath: \\机器学习实战\\15-MapReduce\\mrMeanReducer.py
@Descripttion: 分布式计算均值和方差的reducer
'''

import sys


def read_input(file):
    for line in file:
        yield line.rstrip()


input = read_input(sys.stdin)  # creates a list of input lines

# split input lines into separate items and store in list of lists
mapperOut = [line.split('\t') for line in input]

# 均值
cumVal = 0.0
# 平方和均值
cumSumSq = 0.0
# 大小
cumN = 0.0
for instance in mapperOut:
    nj = float(instance[0])
    cumN += nj
    cumVal += nj * float(instance[1])
    cumSumSq += nj * float(instance[2])

# calculate means
mean = cumVal / cumN
meanSq = cumSumSq / cumN

# output size, mean, mean(square values)
print("%d\t%f\t%f" % (cumN, mean, meanSq))
print(sys.stderr, "report: still alive")