第三章 Pandas数据结构

2020-06-06
  • Series数据结构
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#Series数据结构 一位数组
#创建
s1=pd.Series(["a","b","c","d"])
#指定索引
s2=pd.Series([1,2,3,4],index=["a","b","c","d"])
s2
#传入一个字典
s3=pd.Series({"a":1,"b":2,"c":3,"d":4})
s3
#利用index方法获取Series的索引
s1.index[2] 
#利用values获取Series的值
s1.values[3]
  • DataFrame表格型数据结构
import pandas as pd
#创建
df1=pd.DataFrame(["a","b","c","d"])
df1

#传入一个嵌套列表
df2=pd.DataFrame([["a","A"],["b","B"],["c","C"],["d","D"]])
df2

#指定行、列索引
df3=pd.DataFrame([["a","A"],["b","B"],["c","C"],["d","D"]],columns = ["小写","大写"])
df3

df32=pd.DataFrame([["a","A"],["b","B"],["c","C"],["d","D"]],index = ["一","二","三","四"])
df32
#传入一个字典
data={"小写":["a","b","c","d"],"大写":["A","B","C","D"]}
df41=pd.DataFrame(data)
df41

#再以字典的形式传入DataFrame时,字典的key值相当于列索引,行索引从0开始当然也可以自行设置

data={"小写":["a","b","c","d"],"大写":["A","B","C","D"]}
df42=pd.DataFrame(data,index = ["一","二","三","四"])
df42

#获取DataFrame的行、列索引
df2.columns[1]
df32.columns[2]
df2.index[1]

第四章:导入Excle

import pandas as pd
#路径前一般需要加一个转义字符r,或者把所有路径里面的\替换成/
df=pd.read_excel(r"E:\Python\t2.xlsx")
df

指定插入哪个Sheet

import pandas as pd
#指定Sheet的名字
df=pd.read_excel(r"E:\Python\t2.xlsx",sheet_name="Sheet2")
df
#传入Sheet的顺序,从0开始计数
df=pd.read_excel(r"E:\Python\t2.xlsx",sheet_name=0)
df
#指定行索引,从0开始的默认索引
df=pd.read_excel(r"E:\Python\t2.xlsx",sheet_name=0,index_col=0)
df
#指定列索引,
df=pd.read_excel(r"E:\Python\t2.xlsx",sheet_name=0,header=0)
df
#指定导入列 通过usecols参数设置
df=pd.read_excel(r"E:\Python\t2.xlsx",usecols=[0,1])
df

导入.csv文件

#直接导入
import pandas as pd
df=pd.read_csv(r"E"\Python\x2.csv')
df

#指明分割符号
df=df=pd.read_csv(r"E"\Python\x2.csv",sep=" ")
df

#指明读取行数 用nrows控制
df=df=pd.read_csv(r"E"\Python\x2.csv",sep=" ",nrows=2)
df

#如果是CSV UTF-8格式文件不需要加encoding参数反之如果是CSV文件需要加
df=df=pd.read_csv(r"E"\Python\x2.csv",sep=" ",ecoding="gbk")
df

#enging指定
#当文件路径或者文件名包含中文时,用上面方式导入是会报错
df=pd.read_csv(r"E"\Python\x2.csv",sep=" ",engine="Python")
df

导入.txt文件

利用read_tablke()

import pandas as pd
df=pd.read_table(r"E"\Python\x2.csv",sep=" ")
df

导入sql文件

import pymysql

#创建连接
eng=pymysql.connect(host='localhost',
		     user='user',
		     password='password',
		     db='db',
	    	     charset='utf8')
#user:用户名
#password:密码
#host:数据库地址/本机使用 localhost
#db:数据库名
#charset:数据库编码,一般为UTF-8


#连接好数据库以后就可以执行sql查询语句,利用的是read_sql()方法
pd.read_sql(sql,con)
#参数sql是需要执行的sql语句
#参数con是第一步创建好的数据库连接,即eng


标题:第三章 Pandas数据结构
作者:xiaob0
地址:https://xiaobo.net.cn/articles/2020/06/06/1591440787160.html