Pandas-数据结构-DataFrame(二):DF的属性【形状/shape、行索引/index、列索引/columns、查看值/values、转置/T、head.()、tail.()】
2.DataFrameDataFrame是一个类似于二维数组或表格(如excel)的对象,既有行索引,又有列索引行索引,表明不同行,横向索引,叫index,0轴,axis=0列索引,表名不同列,纵向索引,叫columns,1轴,axis=12.1 DataFrame的创建# 导入pandasimport pandas as pdpd.DataFrame(data=None, index=None,
·
DataFrame是一个类似于二维数组或表格(如excel)的对象,既有行索引,又有列索引
- 行索引,表明不同行,横向索引,叫index,0轴,axis=0
- 列索引,表名不同列,纵向索引,叫columns,1轴,axis=1
DataFrame属性
- shape – 形状
- index – 行索引
- columns – 列索引
- values – 查看值
- T – 转置
- head() – 查看头部内容
- tail() – 查看尾部内容
import pandas as pd
# Dataframe 数据结构
# Dataframe是一个表格型的数据结构,“带有标签的二维数组”。
# Dataframe带有index(行标签)和columns(列标签)
data = {'name': ['Jack', 'Tom', 'Mary'],
'age': [18, 19, 20],
'gender': ['m', 'm', 'w']}
frame = pd.DataFrame(data)
# 查看数据,数据类型为dataframe
print("frame = \n{0} \ntype(frame) = {1}".format(frame, type(frame)))
print("-" * 100)
# .index查看行标签
# .columns查看列标签
# .values查看值,数据类型为ndarray
print("frame.index = {0}----type(frame.index) = {1}".format(frame.index, type(frame.index)))
print("-" * 50)
print("frame.columns = {0}----type(frame.columns) = {1}".format(frame.columns, type(frame.columns)))
print("-" * 50)
print("frame.values = \n{0} \ntype(frame.values) = {1}".format(frame.values, type(frame.values)))
打印结果:
frame =
name age gender
0 Jack 18 m
1 Tom 19 m
2 Mary 20 w
type(frame) = <class 'pandas.core.frame.DataFrame'>
----------------------------------------------------------------------------------------------------
frame.index = RangeIndex(start=0, stop=3, step=1)----type(frame.index) = <class 'pandas.core.indexes.range.RangeIndex'>
--------------------------------------------------
frame.columns = Index(['name', 'age', 'gender'], dtype='object')----type(frame.columns) = <class 'pandas.core.indexes.base.Index'>
--------------------------------------------------
frame.values =
[['Jack' 18 'm']
['Tom' 19 'm']
['Mary' 20 'w']]
type(frame.values) = <class 'numpy.ndarray'>
Process finished with exit code 0
一、 形状/shape
import pandas as pd
data = {'name': ['Jack', 'Tom', 'Mary'],
'age': [18, 19, 20],
'gender': ['m', 'm', 'w']}
data = pd.DataFrame(data)
print("data = \n{0} \ntype(data) = {1}".format(data, type(data)))
print("-" * 50)
print("data.shape = ", data.shape)
打印结果:
data =
name age gender
0 Jack 18 m
1 Tom 19 m
2 Mary 20 w
type(data) = <class 'pandas.core.frame.DataFrame'>
--------------------------------------------------
data.shape = (3, 3)
二、 行索引/index
1、默认行索引
import pandas as pd
data = {'name': ['Jack', 'Tom', 'Mary'],
'age': [18, 19, 20],
'gender': ['m', 'm', 'w']}
data = pd.DataFrame(data)
print("data = \n{0} \ntype(data) = {1}".format(data, type(data)))
print("-" * 50)
print("data.index = ", data.index)
打印结果:
data =
name age gender
0 Jack 18 m
1 Tom 19 m
2 Mary 20 w
type(data) = <class 'pandas.core.frame.DataFrame'>
--------------------------------------------------
data.index = RangeIndex(start=0, stop=3, step=1)
2、自定义行索引
import numpy as np
import pandas as pd
# 生成10名同学,5门功课的数据
score = np.random.randint(40, 100, (10, 5))
print("score = \n", score)
print("-" * 100)
# 构造行索引序列
subjects = ["语文", "数学", "英语", "政治", "体育"]
# 构造列索引序列
stu = ['同学' + str(i) for i in range(score.shape[0])]
# 添加行索引
data = pd.DataFrame(score, columns=subjects, index=stu)
print("data = \n", data)
print("-" * 50)
print("data.index = ", data.index)
打印结果:
score =
[[99 69 59 55 84]
[46 88 70 92 95]
[60 43 72 99 91]
[86 98 61 75 67]
[79 89 43 60 85]
[44 88 84 79 94]
[88 77 83 71 70]
[49 76 79 41 63]
[78 80 65 84 73]
[45 67 81 76 98]]
----------------------------------------------------------------------------------------------------
data =
语文 数学 英语 政治 体育
同学0 99 69 59 55 84
同学1 46 88 70 92 95
同学2 60 43 72 99 91
同学3 86 98 61 75 67
同学4 79 89 43 60 85
同学5 44 88 84 79 94
同学6 88 77 83 71 70
同学7 49 76 79 41 63
同学8 78 80 65 84 73
同学9 45 67 81 76 98
--------------------------------------------------
data.index = Index(['同学0', '同学1', '同学2', '同学3', '同学4', '同学5', '同学6', '同学7', '同学8', '同学9'], dtype='object')
Process finished with exit code 0
三、列索引/columns
DataFrame的列索引列表
import numpy as np
import pandas as pd
# 生成10名同学,5门功课的数据
score = np.random.randint(40, 100, (10, 5))
print("score = \n", score)
print("-" * 100)
# 构造行索引序列
subjects = ["语文", "数学", "英语", "政治", "体育"]
# 构造列索引序列
stu = ['同学' + str(i) for i in range(score.shape[0])]
# 添加行索引
data = pd.DataFrame(score, columns=subjects, index=stu)
print("data = \n", data)
print("-" * 50)
print("data.columns = ", data.columns)
打印结果:
score =
[[68 83 77 81 80]
[72 60 49 73 77]
[93 52 50 53 59]
[50 59 91 89 84]
[77 41 62 80 88]
[44 93 48 70 58]
[92 55 79 60 71]
[82 76 92 69 89]
[68 75 67 57 73]
[95 44 48 87 90]]
----------------------------------------------------------------------------------------------------
data =
语文 数学 英语 政治 体育
同学0 68 83 77 81 80
同学1 72 60 49 73 77
同学2 93 52 50 53 59
同学3 50 59 91 89 84
同学4 77 41 62 80 88
同学5 44 93 48 70 58
同学6 92 55 79 60 71
同学7 82 76 92 69 89
同学8 68 75 67 57 73
同学9 95 44 48 87 90
--------------------------------------------------
data.columns = Index(['语文', '数学', '英语', '政治', '体育'], dtype='object')
四、查看值/values
直接获取其中array的值
import numpy as np
import pandas as pd
# 生成10名同学,5门功课的数据
score = np.random.randint(40, 100, (10, 5))
print("score = \n", score)
print("-" * 100)
# 构造行索引序列
subjects = ["语文", "数学", "英语", "政治", "体育"]
# 构造列索引序列
stu = ['同学' + str(i) for i in range(score.shape[0])]
# 添加行索引
data = pd.DataFrame(score, columns=subjects, index=stu)
print("data = \n", data)
print("-" * 50)
print("data.values = \n", data.values)
打印结果:
score =
[[69 82 50 67 53]
[58 65 90 68 50]
[67 45 73 93 96]
[65 81 91 48 69]
[97 80 51 80 78]
[76 82 97 52 72]
[99 59 42 83 58]
[78 87 71 90 61]
[55 88 57 48 60]
[48 68 89 98 69]]
----------------------------------------------------------------------------------------------------
data =
语文 数学 英语 政治 体育
同学0 69 82 50 67 53
同学1 58 65 90 68 50
同学2 67 45 73 93 96
同学3 65 81 91 48 69
同学4 97 80 51 80 78
同学5 76 82 97 52 72
同学6 99 59 42 83 58
同学7 78 87 71 90 61
同学8 55 88 57 48 60
同学9 48 68 89 98 69
--------------------------------------------------
data.values =
[[69 82 50 67 53]
[58 65 90 68 50]
[67 45 73 93 96]
[65 81 91 48 69]
[97 80 51 80 78]
[76 82 97 52 72]
[99 59 42 83 58]
[78 87 71 90 61]
[55 88 57 48 60]
[48 68 89 98 69]]
Process finished with exit code 0
五、转置/T
import numpy as np
import pandas as pd
# 生成10名同学,5门功课的数据
score = np.random.randint(40, 100, (10, 5))
print("score = \n", score)
print("-" * 100)
# 构造行索引序列
subjects = ["语文", "数学", "英语", "政治", "体育"]
# 构造列索引序列
stu = ['同学' + str(i) for i in range(score.shape[0])]
# 添加行索引
data = pd.DataFrame(score, columns=subjects, index=stu)
print("data = \n", data)
print("-" * 50)
print("data.T = \n", data.T)
打印结果:
score =
[[73 51 51 47 53]
[77 90 74 65 91]
[78 41 92 52 87]
[82 90 89 74 52]
[84 43 98 89 41]
[60 81 56 90 44]
[74 86 58 67 72]
[95 49 99 58 92]
[47 55 83 88 41]
[56 55 61 93 92]]
----------------------------------------------------------------------------------------------------
data =
语文 数学 英语 政治 体育
同学0 73 51 51 47 53
同学1 77 90 74 65 91
同学2 78 41 92 52 87
同学3 82 90 89 74 52
同学4 84 43 98 89 41
同学5 60 81 56 90 44
同学6 74 86 58 67 72
同学7 95 49 99 58 92
同学8 47 55 83 88 41
同学9 56 55 61 93 92
--------------------------------------------------
data.T =
同学0 同学1 同学2 同学3 同学4 同学5 同学6 同学7 同学8 同学9
语文 73 77 78 82 84 60 74 95 47 56
数学 51 90 41 90 43 81 86 49 55 55
英语 51 74 92 89 98 56 58 99 83 61
政治 47 65 52 74 89 90 67 58 88 93
体育 53 91 87 52 41 44 72 92 41 92
Process finished with exit code 0
六、head():显示前5行内容
如果不补充参数,默认5行。填入参数N则显示前N行
import numpy as np
import pandas as pd
# 生成10名同学,5门功课的数据
score = np.random.randint(40, 100, (10, 5))
print("score = \n", score)
print("-" * 100)
# 构造行索引序列
subjects = ["语文", "数学", "英语", "政治", "体育"]
# 构造列索引序列
stu = ['同学' + str(i) for i in range(score.shape[0])]
# 添加行索引
data = pd.DataFrame(score, columns=subjects, index=stu)
print("data = \n", data)
print("-" * 50)
print("data.head() = \n", data.head())
打印结果:
score =
[[72 57 55 99 64]
[94 46 62 92 74]
[54 88 84 86 65]
[92 72 82 53 71]
[72 43 58 69 87]
[97 61 92 90 40]
[86 45 90 44 85]
[40 46 42 94 67]
[92 79 95 72 78]
[42 66 88 52 58]]
----------------------------------------------------------------------------------------------------
data =
语文 数学 英语 政治 体育
同学0 72 57 55 99 64
同学1 94 46 62 92 74
同学2 54 88 84 86 65
同学3 92 72 82 53 71
同学4 72 43 58 69 87
同学5 97 61 92 90 40
同学6 86 45 90 44 85
同学7 40 46 42 94 67
同学8 92 79 95 72 78
同学9 42 66 88 52 58
--------------------------------------------------
data.head() =
语文 数学 英语 政治 体育
同学0 72 57 55 99 64
同学1 94 46 62 92 74
同学2 54 88 84 86 65
同学3 92 72 82 53 71
同学4 72 43 58 69 87
Process finished with exit code 0
七、tail():显示后5行内容
如果不补充参数,默认5行。填入参数N则显示后N行
import numpy as np
import pandas as pd
# 生成10名同学,5门功课的数据
score = np.random.randint(40, 100, (10, 5))
print("score = \n", score)
print("-" * 100)
# 构造行索引序列
subjects = ["语文", "数学", "英语", "政治", "体育"]
# 构造列索引序列
stu = ['同学' + str(i) for i in range(score.shape[0])]
# 添加行索引
data = pd.DataFrame(score, columns=subjects, index=stu)
print("data = \n", data)
print("-" * 50)
print("data.tail() = \n", data.tail())
打印结果:
score =
[[93 50 72 51 46]
[42 60 59 47 69]
[67 60 87 48 60]
[77 74 81 67 55]
[97 70 40 49 66]
[88 46 99 68 70]
[94 96 80 61 65]
[90 67 77 57 80]
[54 42 52 93 55]
[54 68 58 97 99]]
----------------------------------------------------------------------------------------------------
data =
语文 数学 英语 政治 体育
同学0 93 50 72 51 46
同学1 42 60 59 47 69
同学2 67 60 87 48 60
同学3 77 74 81 67 55
同学4 97 70 40 49 66
同学5 88 46 99 68 70
同学6 94 96 80 61 65
同学7 90 67 77 57 80
同学8 54 42 52 93 55
同学9 54 68 58 97 99
--------------------------------------------------
data.tail() =
语文 数学 英语 政治 体育
同学5 88 46 99 68 70
同学6 94 96 80 61 65
同学7 90 67 77 57 80
同学8 54 42 52 93 55
同学9 54 68 58 97 99
Process finished with exit code 0
更多推荐
已为社区贡献24条内容
所有评论(0)