数据分析01
程序员文章站
2022-04-03 21:15:29
...
1.ndarray数组
import numpy as np
ary = np.array([1,2,3,4,5,6])
print(ary, type(ary))
for i in ary:
print(i)
# 数组的矢量化运算
print(ary * 10)
print(ary + 10)
print(ary > 2)
print(ary + ary)
print(ary * ary)
print(ary[0])
print(ary[::-1])
print(list(ary[::-1]))
2.ndarray数组的创建
"""
ndarray对象的创建
"""
import numpy as np
ary01 = np.array([1, 2, 3, 4, 5, 6])
print(ary01, ary01.shape)
ary02 = np.array([[1, 2, 3], [4, 5, 6]])
print(ary02, ary02.shape)
# np.arange()
ary03 = np.arange(0, 10, 2)
print(ary03)
# np.zeros()
ary04 = np.zeros(10)
print(ary04)
print(ary04 + 1)
ary05 = np.zeros(10, dtype=int)
print(ary05)
ary06 = np.zeros(10, dtype='int32')
print(ary06)
ary07 = np.ones((2, 4), dtype='float32')
print(ary07)
ary08 = np.ones((2, 4), dtype='bool')
print(ary08)
# np.zeros_like() 像哪个数组
ary09 = np.zeros_like(ary08)
print(ary09)
ary10 = np.ones_like(ary08)
print(ary10)
# 创建5个0.2
a = np.zeros(5)
print(a + 0.2)
3.ndarray对象的属性
import numpy as np
ary = np.arange(1, 19)
print(ary, ary.shape)
# 打印结果 [ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18] (18,)
ary.shape = (3, 6)
print(ary, ary.shape)
# 打印结果
#[[ 1 2 3 4 5 6]
#[ 7 8 9 10 11 12]
#[13 14 15 16 17 18]] (3, 6)
ary.shape = (2, 3, 3)
print(ary, ary.shape)
"""打印结果
[[[ 1 2 3]
[ 4 5 6]
[ 7 8 9]]
[[10 11 12]
[13 14 15]
[16 17 18]]] (2, 3, 3)
"""
ary.shape = (3, 2, 3)
print(ary, ary.shape)
"""
[[[ 1 2 3]
[ 4 5 6]]
[[ 7 8 9]
[10 11 12]]
[[13 14 15]
[16 17 18]]] (3, 2, 3)
"""
# dtype
ary = np.arange(1, 7)
print(ary, ary.dtype)
# 打印结果 [1 2 3 4 5 6] int64
# ary.dtype = 'int32'
# print(ary, ary.dtype) 不能这样用
ary = ary.astype('int32')
print(ary, ary.dtype)
# 打印结果 [1 2 3 4 5 6] int32
# size
ary = np.arange(1, 9).reshape(2, 4)
print(ary, ary.shape, ary.size, len(ary))
"""打印结果
[[1 2 3 4]
[5 6 7 8]] (2, 4) 8 2
"""
# 索引访问
ary.shape = (2,2,2)
print(ary, ary.shape)
"""打印结果
[[[1 2]
[3 4]]
[[5 6]
[7 8]]] (2, 2, 2)
"""
print('--' * 100)
print(ary[0])
print(ary[0][0])
print(ary[0][0][0])
print(ary[0,0,0])
"""打印结果
[[1 2]
[3 4]]
[1 2]
1
1
"""
# 使用3层for循环 迭代
print('==================')
for i in range(ary.shape[0]):
for j in range(ary.shape[1]):
for k in range(ary.shape[2]):
print(ary[i, j, k])
"""打印结果
1
2
3
4
5
6
7
8
"""
4.自定义复合类型
import numpy as n
data = [0, 1, 2, 3, 4]
ary = n.array(data, dtype='int32')
print(ary, ary.dtype)
"""打印结果
[0 1 2 3 4] int32
"""
ary = n.array(data, dtype='float64')
print(ary, ary.dtype)
"""打印结果
[0. 1. 2. 3. 4.] float64
"""
ary = n.array(data, dtype='bool')
print(ary, ary.dtype)
"""打印结果
[False True True True True] bool
"""
ary = n.array(data, dtype='str')
print(ary, ary.dtype)
"""打印结果
['0' '1' '2' '3' '4'] <U1
"""
data = ['aaaaa', 1]
ary = n.array(data, dtype='str')
print(ary, ary.dtype)
"""打印结果
['aaaaa' '1'] <U5
"""
print('----------------------')
data = [
('zs', [90, 80, 85], 15),
('ls', [92, 81, 83], 16),
('ww', [95, 85, 95], 15)
]
ary = n.array(data, dtype='U3, 3int32, int32')
print(ary,ary.shape)
print(ary[1][2])
"""打印结果
[('zs', [90, 80, 85], 15) ('ls', [92, 81, 83], 16)
('ww', [95, 85, 95], 15)] (3,)
16
"""
print('*' * 30)
ary = n.array(data, dtype=[('name', 'str', 2),
('scores', 'int32', 3),
('age', 'int32', 1)
])
print(ary)
print(ary[0][0])
print(ary[0]['name'])
print(ary['scores'])
"""打印结果
[('zs', [90, 80, 85], 15) ('ls', [92, 81, 83], 16)
('ww', [95, 85, 95], 15)]
zs
zs
[[90 80 85]
[92 81 83]
[95 85 95]]
"""
print('*' * 30)
ary = n.array(data, dtype={'names':['name','score','age'],
'formats' :['U3','3int32', 'int32']
})
print(ary)
print(ary[0][0])
print(ary[0]['name'])
print(ary['score'])
"""打印结果
[('zs', [90, 80, 85], 15) ('ls', [92, 81, 83], 16)
('ww', [95, 85, 95], 15)]
zs
zs
[[90 80 85]
[92 81 83]
[95 85 95]]
"""
print('*' * 30)
datestrs = n.array(['2011','2011-02','2011-03-01','2011-04-01 10:10:10'])
dates = datestrs.astype('datetime64[h]')
print(dates)
"""打印结果
['2011-01-01T00' '2011-02-01T00' '2011-03-01T00' '2011-04-01T10']
"""
dates = datestrs.astype('datetime64[D]')
print(dates)
print(dates[1]-dates[0])
"""打印结果
['2011-01-01' '2011-02-01' '2011-03-01' '2011-04-01']
31 days
"""
5.维度操作
"""
维度操作
"""
import numpy as np
# 视图变维
a = np.arange(1,9)
print(a, a.shape)
"""打印结果
[1 2 3 4 5 6 7 8] (8,)
"""
b = a.reshape(2,4)
print(b)
"""打印结果
[[1 2 3 4]
[5 6 7 8]]
"""
a[0] = 999
print(b,b.shape)
"""打印结果 改变了a[0],打印的是b的结果,但是b的结果也变了
[[999 2 3 4]
[ 5 6 7 8]] (2, 4)
"""
c = b.ravel() # 把维度变成一维
print(c,c.shape)
print(b,"this is b")
"""打印结果
[999 2 3 4 5 6 7 8] (8,)
[[999 2 3 4]
[ 5 6 7 8]] this is b
"""
# 复制变维
print('*' * 30)
d = c.flatten() # 把维度变成一维
print(d, d.shape)
"""打印结果
[999 2 3 4 5 6 7 8] (8,)
"""
c[0] = 1
print(d, d.shape)
"""打印结果
[999 2 3 4 5 6 7 8] (8,)
"""
# 就地变维
print('*' * 50)
c.resize(2,2,2)
print(c, c.shape)
"""打印结果
[[[1 2]
[3 4]]
[[5 6]
[7 8]]] (2, 2, 2)
"""
c.shape = (8,)
print(c, c.shape)
"""打印结果
[1 2 3 4 5 6 7 8] (8,)
"""
6.高维数组的切片
# 高维数组切片
# 多个切边之间用逗号","隔开
import numpy as np
a = np.arange(1, 19).reshape(3, 6)
print(a, a.shape)
"""打印结果
[[ 1 2 3 4 5 6]
[ 7 8 9 10 11 12]
[13 14 15 16 17 18]] (3, 6)
"""
print(a[:2, :])
"""打印结果
[[ 1 2 3 4 5 6]
[ 7 8 9 10 11 12]]
"""
print(a[::2, :])
"""打印结果
[[ 1 2 3 4 5 6]
[13 14 15 16 17 18]]
"""
print(a[:2, ::2])
"""打印结果
[[ 1 3 5]
[ 7 9 11]]
"""
a.resize(2, 3, 3)
print(a, a.shape)
"""打印结果
[[[ 1 2 3]
[ 4 5 6]
[ 7 8 9]]
[[10 11 12]
[13 14 15]
[16 17 18]]] (2, 3, 3)
"""
print('*' * 30)
print(a[:, :2, :2]) # 拿到所有页的前两行的前两列
"""打印结果
[[[ 1 2]
[ 4 5]]
[[10 11]
[13 14]]]
"""
7.数组的掩码
"""
掩码 - 如何从一个数组当中获取一个数组的子集
"""
import numpy as np
a = np.arange(1, 10)
mask = a % 2 == 1
print(a)
print(mask)
print(a[mask])
"""打印结果 前三个print的打印结果
[1 2 3 4 5 6 7 8 9]
[ True False True False True False True False True]
[1 3 5 7 9]
"""
a[mask] = 999
print(a)
"""打印结果
[999 2 999 4 999 6 999 8 999]
"""
a = np.arange(1, 100)
print(a[a % 7 == 0])
print(a[(a % 7 == 0) & (a % 3 == 0)]) # 按位与
"""打印结果
[ 7 14 21 28 35 42 49 56 63 70 77 84 91 98]
[21 42 63 84]
"""
print('*' * 30)
# 索引掩码
products = np.array(['Xiaomi', 'Oppo', 'Vivo', 'Apple', 'Huawei'])
sort_indices = [0, 2, 1, 4, 3]
print(products[sort_indices])
"""打印结果
['Xiaomi' 'Vivo' 'Oppo' 'Huawei' 'Apple']
"""
sort_indices = [0, 2, 1, 4, 3, 0, 0, 0, 0]
print(products[sort_indices])
"""打印结果
['Xiaomi' 'Vivo' 'Oppo' 'Huawei' 'Apple' 'Xiaomi' 'Xiaomi' 'Xiaomi'
'Xiaomi']
"""
data = [
('zs', [90, 80, 85], 15),
('ls', [92, 81, 83], 16),
('ww', [95, 85, 95], 15)
]
ary = np.array(data, dtype={'names': ['name', 'score', 'age'],
'formats': ['U3', '3int32', 'int32']
})
print(ary[ary['age'] == 15])
# ary['age'] --> [15,16,15] -- > [True,Fales,True]
"""打印结果
[('zs', [90, 80, 85], 15) ('ww', [95, 85, 95], 15)]
"""
8.数组的组合
"""
demo08_stack.py 组合
"""
import numpy as np
a = np.arange(1, 7).reshape(2, 3)
b = np.arange(7, 13).reshape(2, 3)
print(a)
print(b)
"""打印结果
[[1 2 3]
[4 5 6]]
[[ 7 8 9]
[10 11 12]]
"""
c = np.hstack((a, b))
print(c)
"""打印结果
[[ 1 2 3 7 8 9]
[ 4 5 6 10 11 12]]
"""
a, b = np.hsplit(c, 2)
print(a)
print(b)
"""打印结果 前两个print打印的结果
[[1 2 3]
[4 5 6]]
[[ 7 8 9]
[10 11 12]]
"""
c = np.vstack((a, b))
print(c,c.shape)
"""打印结果
[[ 1 2 3]
[ 4 5 6]
[ 7 8 9]
[10 11 12]] (4, 3)
"""
a, b = np.vsplit(c, 2)
print(a)
print(b)
"""打印结果 前两个print打印结果
[[1 2 3]
[4 5 6]]
[[ 7 8 9]
[10 11 12]]
"""
c = np.dstack((a, b))
print(c)
"""打印结果
[[[ 1 7]
[ 2 8]
[ 3 9]]
[[ 4 10]
[ 5 11]
[ 6 12]]] (2, 3, 2)
"""
a, b = np.dsplit(c, 2)
print(a)
"""打印结果
[[[1]
[2]
[3]]
[[4]
[5]
[6]]]
"""
print(b)
"""打印结果
[[[ 7]
[ 8]
[ 9]]
[[10]
[11]
[12]]]
"""
ary = np.arange(1, 5)
print(ary)
"""打印结果
[1 2 3 4]
"""
# 当两个数组长度不一样的时候,不好合并,所以有了下面的方法
ary = np.pad(ary, pad_width=(2, 2), mode='constant', constant_values=-1)
print(ary)
"""打印结果
[-1 -1 1 2 3 4 -1 -1]
"""
上一篇: python实现实时肺炎患者数据分布图