欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

数据分析01

程序员文章站 2022-04-03 21:15:29
...

1.ndarray数组

import numpy as np

ary = np.array([1,2,3,4,5,6])

print(ary, type(ary))
for i in ary:
    print(i)
# 数组的矢量化运算
print(ary * 10)
print(ary + 10)
print(ary > 2)
print(ary + ary)
print(ary * ary)


print(ary[0])
print(ary[::-1])
print(list(ary[::-1]))

数据分析01

2.ndarray数组的创建

"""
ndarray对象的创建
"""
import numpy as np

ary01 = np.array([1, 2, 3, 4, 5, 6])
print(ary01, ary01.shape)

ary02 = np.array([[1, 2, 3], [4, 5, 6]])
print(ary02, ary02.shape)

# np.arange()
ary03 = np.arange(0, 10, 2)
print(ary03)

# np.zeros()

ary04 = np.zeros(10)
print(ary04)
print(ary04 + 1)

ary05 = np.zeros(10, dtype=int)
print(ary05)

ary06 = np.zeros(10, dtype='int32')
print(ary06)

ary07 = np.ones((2, 4), dtype='float32')
print(ary07)

ary08 = np.ones((2, 4), dtype='bool')
print(ary08)

# np.zeros_like() 像哪个数组
ary09 = np.zeros_like(ary08)
print(ary09)

ary10 = np.ones_like(ary08)
print(ary10)

# 创建5个0.2
a = np.zeros(5)
print(a + 0.2)

数据分析01

3.ndarray对象的属性

import numpy as np

ary = np.arange(1, 19)
print(ary, ary.shape)
# 打印结果 [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18] (18,)
ary.shape = (3, 6)
print(ary, ary.shape)
# 打印结果
#[[ 1  2  3  4  5  6]
 #[ 7  8  9 10 11 12]
 #[13 14 15 16 17 18]] (3, 6)

ary.shape = (2, 3, 3)
print(ary, ary.shape)
"""打印结果
[[[ 1  2  3]
  [ 4  5  6]
  [ 7  8  9]]

 [[10 11 12]
  [13 14 15]
  [16 17 18]]] (2, 3, 3)
"""

ary.shape = (3, 2, 3)
print(ary, ary.shape)
"""
[[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]

 [[13 14 15]
  [16 17 18]]] (3, 2, 3)
"""

# dtype
ary = np.arange(1, 7)
print(ary, ary.dtype)
# 打印结果 [1 2 3 4 5 6] int64

# ary.dtype = 'int32'
# print(ary, ary.dtype) 不能这样用
ary = ary.astype('int32')
print(ary, ary.dtype)
# 打印结果 [1 2 3 4 5 6] int32


# size
ary = np.arange(1, 9).reshape(2, 4)
print(ary, ary.shape, ary.size, len(ary))
"""打印结果
[[1 2 3 4]
 [5 6 7 8]] (2, 4) 8 2
"""

# 索引访问
ary.shape = (2,2,2)
print(ary, ary.shape)
"""打印结果
[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]] (2, 2, 2)
"""
print('--' * 100)
print(ary[0])
print(ary[0][0])
print(ary[0][0][0])
print(ary[0,0,0])
"""打印结果
[[1 2]
 [3 4]]
[1 2]
1
1
"""

# 使用3层for循环 迭代
print('==================')
for i in range(ary.shape[0]):
    for j in range(ary.shape[1]):
        for k in range(ary.shape[2]):
            print(ary[i, j, k])
"""打印结果
1
2
3
4
5
6
7
8
"""

4.自定义复合类型

import numpy as n

data = [0, 1, 2, 3, 4]
ary = n.array(data, dtype='int32')
print(ary, ary.dtype)
"""打印结果
[0 1 2 3 4] int32
"""

ary = n.array(data, dtype='float64')
print(ary, ary.dtype)
"""打印结果
[0. 1. 2. 3. 4.] float64
"""
ary = n.array(data, dtype='bool')
print(ary, ary.dtype)
"""打印结果
[False  True  True  True  True] bool
"""
ary = n.array(data, dtype='str')
print(ary, ary.dtype)
"""打印结果
['0' '1' '2' '3' '4'] <U1
"""

data = ['aaaaa', 1]
ary = n.array(data, dtype='str')
print(ary, ary.dtype)
"""打印结果
['aaaaa' '1'] <U5
"""
print('----------------------')
data = [
    ('zs', [90, 80, 85], 15),
    ('ls', [92, 81, 83], 16),
    ('ww', [95, 85, 95], 15)
]
ary = n.array(data, dtype='U3, 3int32, int32')
print(ary,ary.shape)
print(ary[1][2])
"""打印结果
[('zs', [90, 80, 85], 15) ('ls', [92, 81, 83], 16)
 ('ww', [95, 85, 95], 15)] (3,)
16
"""

print('*' * 30)
ary = n.array(data, dtype=[('name', 'str', 2),
                     ('scores', 'int32', 3),
                     ('age', 'int32', 1)
                     ])
print(ary)
print(ary[0][0])
print(ary[0]['name'])
print(ary['scores'])
"""打印结果
[('zs', [90, 80, 85], 15) ('ls', [92, 81, 83], 16)
 ('ww', [95, 85, 95], 15)]
zs
zs
[[90 80 85]
 [92 81 83]
 [95 85 95]]
 """
print('*' * 30)
ary = n.array(data, dtype={'names':['name','score','age'],
                           'formats' :['U3','3int32', 'int32']
                           })

print(ary)
print(ary[0][0])
print(ary[0]['name'])
print(ary['score'])
"""打印结果
[('zs', [90, 80, 85], 15) ('ls', [92, 81, 83], 16)
 ('ww', [95, 85, 95], 15)]
zs
zs
[[90 80 85]
 [92 81 83]
 [95 85 95]]
 """

print('*' * 30)
datestrs = n.array(['2011','2011-02','2011-03-01','2011-04-01 10:10:10'])

dates = datestrs.astype('datetime64[h]')
print(dates)
"""打印结果
['2011-01-01T00' '2011-02-01T00' '2011-03-01T00' '2011-04-01T10']
"""
dates = datestrs.astype('datetime64[D]')
print(dates)
print(dates[1]-dates[0])
"""打印结果
['2011-01-01' '2011-02-01' '2011-03-01' '2011-04-01']
31 days
"""

5.维度操作

"""
维度操作
"""
import numpy as np

# 视图变维
a = np.arange(1,9)
print(a, a.shape)
"""打印结果
[1 2 3 4 5 6 7 8] (8,)
"""
b = a.reshape(2,4)
print(b)
"""打印结果
[[1 2 3 4]
 [5 6 7 8]]
 """

a[0] = 999
print(b,b.shape)
"""打印结果  改变了a[0],打印的是b的结果,但是b的结果也变了
[[999   2   3   4]
 [  5   6   7   8]] (2, 4)
 """

c = b.ravel() # 把维度变成一维
print(c,c.shape)
print(b,"this is b")
"""打印结果
[999   2   3   4   5   6   7   8] (8,)
[[999   2   3   4]
 [  5   6   7   8]] this is b
"""

# 复制变维
print('*' * 30)
d = c.flatten() # 把维度变成一维
print(d, d.shape)
"""打印结果
[999   2   3   4   5   6   7   8] (8,)
"""
c[0] = 1
print(d, d.shape)
"""打印结果
[999   2   3   4   5   6   7   8] (8,)
"""

# 就地变维
print('*' * 50)
c.resize(2,2,2)
print(c, c.shape)
"""打印结果
[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]] (2, 2, 2)
"""
c.shape = (8,)
print(c, c.shape)
"""打印结果
[1 2 3 4 5 6 7 8] (8,)
"""

6.高维数组的切片

# 高维数组切片
# 多个切边之间用逗号","隔开
import numpy as np

a = np.arange(1, 19).reshape(3, 6)
print(a, a.shape)
"""打印结果
[[ 1  2  3  4  5  6]
 [ 7  8  9 10 11 12]
 [13 14 15 16 17 18]] (3, 6)
"""

print(a[:2, :])
"""打印结果
[[ 1  2  3  4  5  6]
 [ 7  8  9 10 11 12]]
"""
print(a[::2, :])
"""打印结果
[[ 1  2  3  4  5  6]
 [13 14 15 16 17 18]]
"""
print(a[:2, ::2])
"""打印结果
[[ 1  3  5]
 [ 7  9 11]]
"""

a.resize(2, 3, 3)
print(a, a.shape)
"""打印结果
[[[ 1  2  3]
  [ 4  5  6]
  [ 7  8  9]]

 [[10 11 12]
  [13 14 15]
  [16 17 18]]] (2, 3, 3)
"""
print('*' * 30)

print(a[:, :2, :2])  # 拿到所有页的前两行的前两列
"""打印结果
[[[ 1  2]
  [ 4  5]]

 [[10 11]
  [13 14]]]
"""

7.数组的掩码

"""
掩码 - 如何从一个数组当中获取一个数组的子集
"""
import numpy as np

a = np.arange(1, 10)

mask = a % 2 == 1

print(a)
print(mask)
print(a[mask])
"""打印结果  前三个print的打印结果

[1 2 3 4 5 6 7 8 9]
[ True False  True False  True False  True False  True]
[1 3 5 7 9]
"""
a[mask] = 999
print(a)
"""打印结果
[999   2 999   4 999   6 999   8 999]
"""
a = np.arange(1, 100)
print(a[a % 7 == 0])
print(a[(a % 7 == 0) & (a % 3 == 0)])  # 按位与
"""打印结果  
[ 7 14 21 28 35 42 49 56 63 70 77 84 91 98]  
[21 42 63 84]
"""

print('*' * 30)
# 索引掩码
products = np.array(['Xiaomi', 'Oppo', 'Vivo', 'Apple', 'Huawei'])
sort_indices = [0, 2, 1, 4, 3]
print(products[sort_indices])
"""打印结果
['Xiaomi' 'Vivo' 'Oppo' 'Huawei' 'Apple']
"""

sort_indices = [0, 2, 1, 4, 3, 0, 0, 0, 0]
print(products[sort_indices])
"""打印结果
['Xiaomi' 'Vivo' 'Oppo' 'Huawei' 'Apple' 'Xiaomi' 'Xiaomi' 'Xiaomi'
 'Xiaomi']
"""

data = [
    ('zs', [90, 80, 85], 15),
    ('ls', [92, 81, 83], 16),
    ('ww', [95, 85, 95], 15)
]
ary = np.array(data, dtype={'names': ['name', 'score', 'age'],
                            'formats': ['U3', '3int32', 'int32']
                            })

print(ary[ary['age'] == 15])
# ary['age'] --> [15,16,15] -- > [True,Fales,True]
"""打印结果
[('zs', [90, 80, 85], 15) ('ww', [95, 85, 95], 15)]
"""

8.数组的组合

"""
demo08_stack.py 组合
"""
import numpy as np

a = np.arange(1, 7).reshape(2, 3)
b = np.arange(7, 13).reshape(2, 3)
print(a)
print(b)
"""打印结果
[[1 2 3]
 [4 5 6]]
[[ 7  8  9]
 [10 11 12]]
"""
c = np.hstack((a, b))
print(c)
"""打印结果
[[ 1  2  3  7  8  9]
 [ 4  5  6 10 11 12]]
"""
a, b = np.hsplit(c, 2)
print(a)
print(b)
"""打印结果 前两个print打印的结果
[[1 2 3]
 [4 5 6]]
[[ 7  8  9]
 [10 11 12]]
"""

c = np.vstack((a, b))
print(c,c.shape)
"""打印结果
[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]] (4, 3)
"""
a, b = np.vsplit(c, 2)
print(a)
print(b)
"""打印结果 前两个print打印结果
[[1 2 3]
 [4 5 6]]
[[ 7  8  9]
 [10 11 12]]
"""
c = np.dstack((a, b))
print(c)
"""打印结果
[[[ 1  7]
  [ 2  8]
  [ 3  9]]

 [[ 4 10]
  [ 5 11]
  [ 6 12]]] (2, 3, 2)
"""
a, b = np.dsplit(c, 2)
print(a)
"""打印结果
[[[1]
  [2]
  [3]]

 [[4]
  [5]
  [6]]]
"""
print(b)
"""打印结果
[[[ 7]
  [ 8]
  [ 9]]

 [[10]
  [11]
  [12]]]
"""

ary = np.arange(1, 5)
print(ary)
"""打印结果
[1 2 3 4]
"""
# 当两个数组长度不一样的时候,不好合并,所以有了下面的方法
ary = np.pad(ary, pad_width=(2, 2), mode='constant', constant_values=-1)
print(ary)
"""打印结果
[-1 -1  1  2  3  4 -1 -1]
"""
相关标签: 数据分析