数据分析01

程序员文章站 2022-04-03 21:15:29

...

文章目录

1.ndarray数组
2.ndarray数组的创建
3.ndarray对象的属性
4.自定义复合类型
5.维度操作
6.高维数组的切片
7.数组的掩码
8.数组的组合

1.ndarray数组

import numpy as np

ary = np.array([1,2,3,4,5,6])

print(ary, type(ary))
for i in ary:
    print(i)
# 数组的矢量化运算
print(ary * 10)
print(ary + 10)
print(ary > 2)
print(ary + ary)
print(ary * ary)


print(ary[0])
print(ary[::-1])
print(list(ary[::-1]))

数据分析01

2.ndarray数组的创建

"""
ndarray对象的创建
"""
import numpy as np

ary01 = np.array([1, 2, 3, 4, 5, 6])
print(ary01, ary01.shape)

ary02 = np.array([[1, 2, 3], [4, 5, 6]])
print(ary02, ary02.shape)

# np.arange()
ary03 = np.arange(0, 10, 2)
print(ary03)

# np.zeros()

ary04 = np.zeros(10)
print(ary04)
print(ary04 + 1)

ary05 = np.zeros(10, dtype=int)
print(ary05)

ary06 = np.zeros(10, dtype='int32')
print(ary06)

ary07 = np.ones((2, 4), dtype='float32')
print(ary07)

ary08 = np.ones((2, 4), dtype='bool')
print(ary08)

# np.zeros_like() 像哪个数组
ary09 = np.zeros_like(ary08)
print(ary09)

ary10 = np.ones_like(ary08)
print(ary10)

# 创建5个0.2
a = np.zeros(5)
print(a + 0.2)

数据分析01

3.ndarray对象的属性

import numpy as np

ary = np.arange(1, 19)
print(ary, ary.shape)
# 打印结果 [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18] (18,)
ary.shape = (3, 6)
print(ary, ary.shape)
# 打印结果
#[[ 1  2  3  4  5  6]
 #[ 7  8  9 10 11 12]
 #[13 14 15 16 17 18]] (3, 6)

ary.shape = (2, 3, 3)
print(ary, ary.shape)
"""打印结果
[[[ 1  2  3]
  [ 4  5  6]
  [ 7  8  9]]

 [[10 11 12]
  [13 14 15]
  [16 17 18]]] (2, 3, 3)
"""

ary.shape = (3, 2, 3)
print(ary, ary.shape)
"""
[[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]

 [[13 14 15]
  [16 17 18]]] (3, 2, 3)
"""

# dtype
ary = np.arange(1, 7)
print(ary, ary.dtype)
# 打印结果 [1 2 3 4 5 6] int64

# ary.dtype = 'int32'
# print(ary, ary.dtype) 不能这样用
ary = ary.astype('int32')
print(ary, ary.dtype)
# 打印结果 [1 2 3 4 5 6] int32


# size
ary = np.arange(1, 9).reshape(2, 4)
print(ary, ary.shape, ary.size, len(ary))
"""打印结果
[[1 2 3 4]
 [5 6 7 8]] (2, 4) 8 2
"""

# 索引访问
ary.shape = (2,2,2)
print(ary, ary.shape)
"""打印结果
[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]] (2, 2, 2)
"""
print('--' * 100)
print(ary[0])
print(ary[0][0])
print(ary[0][0][0])
print(ary[0,0,0])
"""打印结果
[[1 2]
 [3 4]]
[1 2]
1
1
"""

# 使用3层for循环 迭代
print('==================')
for i in range(ary.shape[0]):
    for j in range(ary.shape[1]):
        for k in range(ary.shape[2]):
            print(ary[i, j, k])
"""打印结果
1
2
3
4
5
6
7
8
"""

4.自定义复合类型

import numpy as n

data = [0, 1, 2, 3, 4]
ary = n.array(data, dtype='int32')
print(ary, ary.dtype)
"""打印结果
[0 1 2 3 4] int32
"""

ary = n.array(data, dtype='float64')
print(ary, ary.dtype)
"""打印结果
[0. 1. 2. 3. 4.] float64
"""
ary = n.array(data, dtype='bool')
print(ary, ary.dtype)
"""打印结果
[False  True  True  True  True] bool
"""
ary = n.array(data, dtype='str')
print(ary, ary.dtype)
"""打印结果
['0' '1' '2' '3' '4'] <U1
"""

data = ['aaaaa', 1]
ary = n.array(data, dtype='str')
print(ary, ary.dtype)
"""打印结果
['aaaaa' '1'] <U5
"""
print('----------------------')
data = [
    ('zs', [90, 80, 85], 15),
    ('ls', [92, 81, 83], 16),
    ('ww', [95, 85, 95], 15)
]
ary = n.array(data, dtype='U3, 3int32, int32')
print(ary,ary.shape)
print(ary[1][2])
"""打印结果
[('zs', [90, 80, 85], 15) ('ls', [92, 81, 83], 16)
 ('ww', [95, 85, 95], 15)] (3,)
16
"""

print('*' * 30)
ary = n.array(data, dtype=[('name', 'str', 2),
                     ('scores', 'int32', 3),
                     ('age', 'int32', 1)
                     ])
print(ary)
print(ary[0][0])
print(ary[0]['name'])
print(ary['scores'])
"""打印结果
[('zs', [90, 80, 85], 15) ('ls', [92, 81, 83], 16)
 ('ww', [95, 85, 95], 15)]
zs
zs
[[90 80 85]
 [92 81 83]
 [95 85 95]]
 """
print('*' * 30)
ary = n.array(data, dtype={'names':['name','score','age'],
                           'formats' :['U3','3int32', 'int32']
                           })

print(ary)
print(ary[0][0])
print(ary[0]['name'])
print(ary['score'])
"""打印结果
[('zs', [90, 80, 85], 15) ('ls', [92, 81, 83], 16)
 ('ww', [95, 85, 95], 15)]
zs
zs
[[90 80 85]
 [92 81 83]
 [95 85 95]]
 """

print('*' * 30)
datestrs = n.array(['2011','2011-02','2011-03-01','2011-04-01 10:10:10'])

dates = datestrs.astype('datetime64[h]')
print(dates)
"""打印结果
['2011-01-01T00' '2011-02-01T00' '2011-03-01T00' '2011-04-01T10']
"""
dates = datestrs.astype('datetime64[D]')
print(dates)
print(dates[1]-dates[0])
"""打印结果
['2011-01-01' '2011-02-01' '2011-03-01' '2011-04-01']
31 days
"""

5.维度操作

"""
维度操作
"""
import numpy as np

# 视图变维
a = np.arange(1,9)
print(a, a.shape)
"""打印结果
[1 2 3 4 5 6 7 8] (8,)
"""
b = a.reshape(2,4)
print(b)
"""打印结果
[[1 2 3 4]
 [5 6 7 8]]
 """

a[0] = 999
print(b,b.shape)
"""打印结果  改变了a[0],打印的是b的结果,但是b的结果也变了
[[999   2   3   4]
 [  5   6   7   8]] (2, 4)
 """

c = b.ravel() # 把维度变成一维
print(c,c.shape)
print(b,"this is b")
"""打印结果
[999   2   3   4   5   6   7   8] (8,)
[[999   2   3   4]
 [  5   6   7   8]] this is b
"""

# 复制变维
print('*' * 30)
d = c.flatten() # 把维度变成一维
print(d, d.shape)
"""打印结果
[999   2   3   4   5   6   7   8] (8,)
"""
c[0] = 1
print(d, d.shape)
"""打印结果
[999   2   3   4   5   6   7   8] (8,)
"""

# 就地变维
print('*' * 50)
c.resize(2,2,2)
print(c, c.shape)
"""打印结果
[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]] (2, 2, 2)
"""
c.shape = (8,)
print(c, c.shape)
"""打印结果
[1 2 3 4 5 6 7 8] (8,)
"""

6.高维数组的切片

# 高维数组切片
# 多个切边之间用逗号","隔开
import numpy as np

a = np.arange(1, 19).reshape(3, 6)
print(a, a.shape)
"""打印结果
[[ 1  2  3  4  5  6]
 [ 7  8  9 10 11 12]
 [13 14 15 16 17 18]] (3, 6)
"""

print(a[:2, :])
"""打印结果
[[ 1  2  3  4  5  6]
 [ 7  8  9 10 11 12]]
"""
print(a[::2, :])
"""打印结果
[[ 1  2  3  4  5  6]
 [13 14 15 16 17 18]]
"""
print(a[:2, ::2])
"""打印结果
[[ 1  3  5]
 [ 7  9 11]]
"""

a.resize(2, 3, 3)
print(a, a.shape)
"""打印结果
[[[ 1  2  3]
  [ 4  5  6]
  [ 7  8  9]]

 [[10 11 12]
  [13 14 15]
  [16 17 18]]] (2, 3, 3)
"""
print('*' * 30)

print(a[:, :2, :2])  # 拿到所有页的前两行的前两列
"""打印结果
[[[ 1  2]
  [ 4  5]]

 [[10 11]
  [13 14]]]
"""

7.数组的掩码

"""
掩码 - 如何从一个数组当中获取一个数组的子集
"""
import numpy as np

a = np.arange(1, 10)

mask = a % 2 == 1

print(a)
print(mask)
print(a[mask])
"""打印结果  前三个print的打印结果

[1 2 3 4 5 6 7 8 9]
[ True False  True False  True False  True False  True]
[1 3 5 7 9]
"""
a[mask] = 999
print(a)
"""打印结果
[999   2 999   4 999   6 999   8 999]
"""
a = np.arange(1, 100)
print(a[a % 7 == 0])
print(a[(a % 7 == 0) & (a % 3 == 0)])  # 按位与
"""打印结果  
[ 7 14 21 28 35 42 49 56 63 70 77 84 91 98]  
[21 42 63 84]
"""

print('*' * 30)
# 索引掩码
products = np.array(['Xiaomi', 'Oppo', 'Vivo', 'Apple', 'Huawei'])
sort_indices = [0, 2, 1, 4, 3]
print(products[sort_indices])
"""打印结果
['Xiaomi' 'Vivo' 'Oppo' 'Huawei' 'Apple']
"""

sort_indices = [0, 2, 1, 4, 3, 0, 0, 0, 0]
print(products[sort_indices])
"""打印结果
['Xiaomi' 'Vivo' 'Oppo' 'Huawei' 'Apple' 'Xiaomi' 'Xiaomi' 'Xiaomi'
 'Xiaomi']
"""

data = [
    ('zs', [90, 80, 85], 15),
    ('ls', [92, 81, 83], 16),
    ('ww', [95, 85, 95], 15)
]
ary = np.array(data, dtype={'names': ['name', 'score', 'age'],
                            'formats': ['U3', '3int32', 'int32']
                            })

print(ary[ary['age'] == 15])
# ary['age'] --> [15,16,15] -- > [True,Fales,True]
"""打印结果
[('zs', [90, 80, 85], 15) ('ww', [95, 85, 95], 15)]
"""

8.数组的组合

"""
demo08_stack.py 组合
"""
import numpy as np

a = np.arange(1, 7).reshape(2, 3)
b = np.arange(7, 13).reshape(2, 3)
print(a)
print(b)
"""打印结果
[[1 2 3]
 [4 5 6]]
[[ 7  8  9]
 [10 11 12]]
"""
c = np.hstack((a, b))
print(c)
"""打印结果
[[ 1  2  3  7  8  9]
 [ 4  5  6 10 11 12]]
"""
a, b = np.hsplit(c, 2)
print(a)
print(b)
"""打印结果 前两个print打印的结果
[[1 2 3]
 [4 5 6]]
[[ 7  8  9]
 [10 11 12]]
"""

c = np.vstack((a, b))
print(c,c.shape)
"""打印结果
[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]] (4, 3)
"""
a, b = np.vsplit(c, 2)
print(a)
print(b)
"""打印结果 前两个print打印结果
[[1 2 3]
 [4 5 6]]
[[ 7  8  9]
 [10 11 12]]
"""
c = np.dstack((a, b))
print(c)
"""打印结果
[[[ 1  7]
  [ 2  8]
  [ 3  9]]

 [[ 4 10]
  [ 5 11]
  [ 6 12]]] (2, 3, 2)
"""
a, b = np.dsplit(c, 2)
print(a)
"""打印结果
[[[1]
  [2]
  [3]]

 [[4]
  [5]
  [6]]]
"""
print(b)
"""打印结果
[[[ 7]
  [ 8]
  [ 9]]

 [[10]
  [11]
  [12]]]
"""

ary = np.arange(1, 5)
print(ary)
"""打印结果
[1 2 3 4]
"""
# 当两个数组长度不一样的时候,不好合并,所以有了下面的方法
ary = np.pad(ary, pad_width=(2, 2), mode='constant', constant_values=-1)
print(ary)
"""打印结果
[-1 -1  1  2  3  4 -1 -1]
"""

数据分析01

文章目录

1.ndarray数组

2.ndarray数组的创建

3.ndarray对象的属性

4.自定义复合类型

5.维度操作

6.高维数组的切片

7.数组的掩码

8.数组的组合

一次SQL调优数据库性能问题后的过程(300W)

js字符串反转函数（react收集表单数据插件）

sqlserver数据库迁移后,孤立账号解决办法

小议sqlserver数据库主键选取策略

uniqueidentifier转换成varchar数据类型的sql语句

sql ntext数据类型字符替换实现代码

SQL对冗余数据的删除重复记录只保留单条的说明

大数据编程基础是什么（编程零基础学习大数据）

jQuery+ajax的资源回收处理机制分析

mysql的jdbc配置（mysql数据库备份讲解）