Python3《机器学习实战》代码笔记(十五)--- MapReduce
程序员文章站
2022-03-24 13:46:35
...
参考资料:
Mapper
'''
@version: 0.0.1
@Author: tqrs
@dev: python3 vscode
@Date: 2019-11-12 22:53:08
@LastEditTime: 2019-11-12 23:08:24
@FilePath: \\机器学习实战\\15-MapReduce\\mrMeanMapper.py
@Descripttion: 分布式计算均值和方差的mapper
'''
import sys
import numpy as np
# 读取数据
def read_input(file):
for line in file:
yield line.rstrip()
input = read_input(sys.stdin) # creates a list of input lines
input = [float(line) for line in input] # overwrite with floats
numInputs = len(input)
input = np.mat(input)
sqInput = np.power(input, 2)
# output size, mean, mean(square values)
print("%d\t%f\t%f" % (numInputs, np.mean(input), np.mean(sqInput)))
# print(sys.stderr, "report: still alive")
Reducer
'''
@version: 0.0.1
@Author: tqrs
@dev: python3 vscode
@Date: 2019-11-12 23:02:44
@LastEditTime: 2019-11-12 23:25:23
@FilePath: \\机器学习实战\\15-MapReduce\\mrMeanReducer.py
@Descripttion: 分布式计算均值和方差的reducer
'''
import sys
def read_input(file):
for line in file:
yield line.rstrip()
input = read_input(sys.stdin) # creates a list of input lines
# split input lines into separate items and store in list of lists
mapperOut = [line.split('\t') for line in input]
# 均值
cumVal = 0.0
# 平方和均值
cumSumSq = 0.0
# 大小
cumN = 0.0
for instance in mapperOut:
nj = float(instance[0])
cumN += nj
cumVal += nj * float(instance[1])
cumSumSq += nj * float(instance[2])
# calculate means
mean = cumVal / cumN
meanSq = cumSumSq / cumN
# output size, mean, mean(square values)
print("%d\t%f\t%f" % (cumN, mean, meanSq))
print(sys.stderr, "report: still alive")
上一篇: 排序算法(一)冒泡排序
下一篇: Mapreduce提交任务