NumPy相关知识点汇总
头文件
import numpy as np
ndarray的属性
ndarray就是NumPy最具特色的一点,被称为N维数组对象,其实就是一个N维数组,下面先随便定义一个array,说说他的相关属性吧。
import numpy as np
score=np.array([[7, 37, 81, 90, 45],
[43, 26, 29, 27, 77],
[49, 64, 29, 82, 60],
[38, 0, 26, 20, 44],
[89, 0, 33, 83, 94],
[88, 11, 61, 4, 56],
[18, 69, 25, 98, 56],
[91, 59, 91, 76, 32]])
ndarray的形状
score.shape# (8,5)
ndarray的维度
score.ndim
ndarray的元素数量
score.size
ndarray的类型
score.dtype
生成数组的方法
生成0和1
np.zeros()
# 生成0和1的数组
np.zeros(shape=(3, 4), dtype="float32")
输出:
array([[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]], dtype=float32)
np.ones()
np.ones(shape=(2, 3), dtype=np.int32)
输出:
array([[1, 1, 1],
[1, 1, 1]])
从现有数组中生成
np.array
data1=np.array(score)
np.copy
data2=np.copy(score)
np.asarray(浅拷贝)
改变原数组的值后,生成的数组也会被改变
data3=np.asarray(score)
生成固定范围的数组
np.linspace(0,10,100)
[0, 10]等距离的100个数
np.arange()
np.arange(0, 11, 5)
输出:
array([ 0, 5, 10])
range(a, b, c)
[a ,b) c是步长
生成随机数组
均匀分布
data1=np.random.uniform(low=-1, high=1, size=1000000)
数据图像:
显示数据图像的代码:
import matplotlib.pyplot as plt
# 创建画布
plt.figure(figsize=(20, 8), dpi=80)
# 绘制直方图
plt.hist(data1, 1000)
#显示图像
plt.show()
正态分布
data2=np.random.normal(loc=1.75, scale=0.1, size=1000000)#loc均值,scale标准差
数据图像:
绘图代码:
import matplotlib.pyplot as plt
# 创建画布
plt.figure(figsize=(20, 8), dpi=80)
# 绘制直方图
plt.hist(data2, 1000)
#显示图像
plt.show()
切片
1.先生成一组数据;
stock_change=np.random.normal(loc=0, scale=1, size=(8, 10))
数据展示:
array([[-1.23069929, -0.09689858, 0.34533148, -1.21129873, -0.49555038,
-1.15395071, -0.35045563, -0.19777101, 1.58952968, 0.33511842],
[-0.11232624, -1.51017588, 0.45292765, -1.60198671, -0.17582026,
-0.87866601, 0.83254436, -0.17215236, 0.35663974, 1.0173457 ],
[ 0.76208493, 2.01926225, 0.26000738, -0.2412584 , 0.10541199,
0.05698847, 0.65372375, 1.20697029, -0.41501513, 0.63331921],
[ 1.39089225, -1.3000879 , 0.37726128, -1.35670391, 1.26396375,
0.37078607, 0.54823076, 1.27194705, 0.03629491, 1.13593746],
[-0.02291549, 0.95949581, -0.38881878, -1.11513863, 0.82017542,
-0.34096471, -1.00410163, -0.93113913, -1.3155623 , 0.02619724],
[ 0.18065202, -0.61370958, 0.7928718 , -1.01097122, -0.77427605,
0.91698914, 0.67400046, -0.09777629, -0.33256196, 0.57330873],
[-0.39856787, 0.75992096, -0.68918329, 0.89445347, 0.0327467 ,
0.0996788 , -0.49912995, -0.89001777, -1.25522405, 0.27481911],
[-0.05903677, -1.37252017, -0.78561022, -1.19413853, 0.62745735,
0.12389948, -0.25296184, -1.74770253, -0.5348701 , -0.84352973]])
2.切片;
stock_change[0, 0:3]#第一个括号的前三个数据
结果展示:
array([-1.23069929, -0.09689858, 0.34533148])
形状修改
注:下面所用到的stock_change为自定义的一个数组。
- ndarray.reshape()返回新的ndarray,原始数据没有改
stock_change.reshape((10, 8))#修改形状
输出展示:
array([[-1.23069929, -0.09689858, 0.34533148, -1.21129873, -0.49555038,
-1.15395071, -0.35045563, -0.19777101],
[ 1.58952968, 0.33511842, -0.11232624, -1.51017588, 0.45292765,
-1.60198671, -0.17582026, -0.87866601],
[ 0.83254436, -0.17215236, 0.35663974, 1.0173457 , 0.76208493,
2.01926225, 0.26000738, -0.2412584 ],
[ 0.10541199, 0.05698847, 0.65372375, 1.20697029, -0.41501513,
0.63331921, 1.39089225, -1.3000879 ],
[ 0.37726128, -1.35670391, 1.26396375, 0.37078607, 0.54823076,
1.27194705, 0.03629491, 1.13593746],
[-0.02291549, 0.95949581, -0.38881878, -1.11513863, 0.82017542,
-0.34096471, -1.00410163, -0.93113913],
[-1.3155623 , 0.02619724, 0.18065202, -0.61370958, 0.7928718 ,
-1.01097122, -0.77427605, 0.91698914],
[ 0.67400046, -0.09777629, -0.33256196, 0.57330873, -0.39856787,
0.75992096, -0.68918329, 0.89445347],
[ 0.0327467 , 0.0996788 , -0.49912995, -0.89001777, -1.25522405,
0.27481911, -0.05903677, -1.37252017],
[-0.78561022, -1.19413853, 0.62745735, 0.12389948, -0.25296184,
-1.74770253, -0.5348701 , -0.84352973]])
- ndarray.resize()无返回值,原始数据已改
注:结果与reshape()相同
stock_change.resize((10, 8))#无返回值
- ndarray.T矩阵转置
stock_change.T#转置
结果展示:
array([[-1.23069929, -0.11232624, 0.76208493, 1.39089225, -0.02291549,
0.18065202, -0.39856787, -0.05903677],
[-0.09689858, -1.51017588, 2.01926225, -1.3000879 , 0.95949581,
-0.61370958, 0.75992096, -1.37252017],
[ 0.34533148, 0.45292765, 0.26000738, 0.37726128, -0.38881878,
0.7928718 , -0.68918329, -0.78561022],
[-1.21129873, -1.60198671, -0.2412584 , -1.35670391, -1.11513863,
-1.01097122, 0.89445347, -1.19413853],
[-0.49555038, -0.17582026, 0.10541199, 1.26396375, 0.82017542,
-0.77427605, 0.0327467 , 0.62745735],
[-1.15395071, -0.87866601, 0.05698847, 0.37078607, -0.34096471,
0.91698914, 0.0996788 , 0.12389948],
[-0.35045563, 0.83254436, 0.65372375, 0.54823076, -1.00410163,
0.67400046, -0.49912995, -0.25296184],
[-0.19777101, -0.17215236, 1.20697029, 1.27194705, -0.93113913,
-0.09777629, -0.89001777, -1.74770253],
[ 1.58952968, 0.35663974, -0.41501513, 0.03629491, -1.3155623 ,
-0.33256196, -1.25522405, -0.5348701 ],
[ 0.33511842, 1.0173457 , 0.63331921, 1.13593746, 0.02619724,
0.57330873, 0.27481911, -0.84352973]])
类型修改
ndarray.astype(type)
stock_change.astype("int32")
ndarray序列化到本地(ndarray.tostring())
stock_change.tostring()
数组去重
temp=np.array([[1,2,3,4], [3,4,5,6]])
np.unique(temp)
输出展示:
array([1, 2, 3, 4, 5, 6])
set(temp.flatten())
注:flatten()函数作用为降维。
输出展示:
{1, 2, 3, 4, 5, 6}
ndarray运算
逻辑运算
#逻辑判断, 大于0.5为True, 否则为False
stock_change>0.5
结果展示:
array([[False, False, False, False, False, False, False, True, False,
True],
[False, True, False, False, False, False, False, False, False,
False],
[False, False, False, False, True, False, True, False, False,
False],
[False, False, False, True, False, True, False, False, True,
False],
[False, False, False, False, False, True, True, False, False,
False],
[False, True, True, True, False, True, False, False, True,
False],
[False, False, False, False, True, False, False, True, True,
False],
[False, False, True, False, False, False, False, True, False,
False]])
bool索引
#所有大于0.5的改为1.1(bool索引)
stock_change[stock_change>0.5]=1.1
通用判断函数
np.all()(只要有一个false就返回false,只有全是true才返回true)
# 判断stack_change[0:2 0:5]全是上涨的
np.all(stock_change[0:2,0:5]>0)
np.any()(只要有一个true就返回true,只有全是false才返回false)
# 判断stack_change[0:2 0:5]是否有上涨的
np.any(stock_change[0:2, 0:5])
np.where(bool数组, 若为true则该的值,若为false则该的值)
np.where(temp>0, 1, 0)
np.logical_and(范围)
# 复杂运算
np.logical_and(temp>0.5, temp<1)
logical_or(范围)
np.logical_or(temp>0.5, temp<-0.5)
统计运算
- 统计指标函数
- min, max, mean, median, var, std
- np.函数名
- ndarray.方法名
temp.max()
np.max(temp)
temp.max(axis=0)# 按列求最大值
np.max(temp, axis=1)# 按行求最大值
- 返回最大值、最小值所在位置
- np.argmax(temp, axis=)
- np.argmin(temp, axis=)
np.argmax(temp, axis=1)#最大值所在的位置
数组间运算
统一加/减/乘/除一个数字;
score1+10
广播机制
- 维度相等
- shape(其中相对应的一个地方为1)
数据:
score
array([[1],
[5]], dtype=int64)
score1
array([[5, 0, 3, 2, 0, 2],
[1, 5, 4, 4, 3, 4]], dtype=int64)
score1+score
#array([[ 6, 1, 4, 3, 1, 3],
# [ 6, 10, 9, 9, 8, 9]], dtype=int64)
score1*score
#array([[ 5, 0, 3, 2, 0, 2],
# [ 5, 25, 20, 20, 15, 20]], dtype=int64)
矩阵运算
- 矩阵和二维数组的区别
- 矩阵可以用二维数组存储, 但是二维数组不一定是矩阵
- 矩阵存储
- ndarray 二维数组
- 矩阵乘法
- np.matmul(a, b)
- np.dot(a, b)
- aaa@qq.com
- 矩阵乘法
- matrix 数据结构
- 矩阵乘法
- a * b
- 矩阵乘法
- ndarray 二维数组
matrix存储矩阵
np_mat=np.mat([[85, 91],
[85, 84],
[79, 90],
[95, 82],
[75, 85],
[84, 87],
[79, 93],
[82, 79]])
ndarray存储矩阵
b=np.array([[0.3], [0.7]])
矩阵乘积
np.matmul(a, b)
np.dot(a, b)
aaa@qq.com
合并与分割
- 拼接
- 水平拼接
- np.hstack((a, b))
- np.concatenate((a, b), axis=1)
- 竖直拼接
- np.vstack((a, b))
- np.concatenate((a, b), axis=0)
- 水平拼接
- 分割
- np.split(a, b)(把a分成b份)
上一篇: 大数据如何变革商业 一张机票成就了Farecast
下一篇: YY使用截图功能的图文教程