第三章 Pandas数据结构
2020-06-06
- Series数据结构
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#Series数据结构 一位数组
#创建
s1=pd.Series(["a","b","c","d"])
#指定索引
s2=pd.Series([1,2,3,4],index=["a","b","c","d"])
s2
#传入一个字典
s3=pd.Series({"a":1,"b":2,"c":3,"d":4})
s3
#利用index方法获取Series的索引
s1.index[2]
#利用values获取Series的值
s1.values[3]
- DataFrame表格型数据结构
import pandas as pd
#创建
df1=pd.DataFrame(["a","b","c","d"])
df1
#传入一个嵌套列表
df2=pd.DataFrame([["a","A"],["b","B"],["c","C"],["d","D"]])
df2
#指定行、列索引
df3=pd.DataFrame([["a","A"],["b","B"],["c","C"],["d","D"]],columns = ["小写","大写"])
df3
df32=pd.DataFrame([["a","A"],["b","B"],["c","C"],["d","D"]],index = ["一","二","三","四"])
df32
#传入一个字典
data={"小写":["a","b","c","d"],"大写":["A","B","C","D"]}
df41=pd.DataFrame(data)
df41
#再以字典的形式传入DataFrame时,字典的key值相当于列索引,行索引从0开始当然也可以自行设置
data={"小写":["a","b","c","d"],"大写":["A","B","C","D"]}
df42=pd.DataFrame(data,index = ["一","二","三","四"])
df42
#获取DataFrame的行、列索引
df2.columns[1]
df32.columns[2]
df2.index[1]
第四章:导入Excle
import pandas as pd
#路径前一般需要加一个转义字符r,或者把所有路径里面的\替换成/
df=pd.read_excel(r"E:\Python\t2.xlsx")
df
指定插入哪个Sheet
import pandas as pd
#指定Sheet的名字
df=pd.read_excel(r"E:\Python\t2.xlsx",sheet_name="Sheet2")
df
#传入Sheet的顺序,从0开始计数
df=pd.read_excel(r"E:\Python\t2.xlsx",sheet_name=0)
df
#指定行索引,从0开始的默认索引
df=pd.read_excel(r"E:\Python\t2.xlsx",sheet_name=0,index_col=0)
df
#指定列索引,
df=pd.read_excel(r"E:\Python\t2.xlsx",sheet_name=0,header=0)
df
#指定导入列 通过usecols参数设置
df=pd.read_excel(r"E:\Python\t2.xlsx",usecols=[0,1])
df
导入.csv文件
#直接导入
import pandas as pd
df=pd.read_csv(r"E"\Python\x2.csv')
df
#指明分割符号
df=df=pd.read_csv(r"E"\Python\x2.csv",sep=" ")
df
#指明读取行数 用nrows控制
df=df=pd.read_csv(r"E"\Python\x2.csv",sep=" ",nrows=2)
df
#如果是CSV UTF-8格式文件不需要加encoding参数反之如果是CSV文件需要加
df=df=pd.read_csv(r"E"\Python\x2.csv",sep=" ",ecoding="gbk")
df
#enging指定
#当文件路径或者文件名包含中文时,用上面方式导入是会报错
df=pd.read_csv(r"E"\Python\x2.csv",sep=" ",engine="Python")
df
导入.txt文件
利用read_tablke()
import pandas as pd
df=pd.read_table(r"E"\Python\x2.csv",sep=" ")
df
导入sql文件
import pymysql
#创建连接
eng=pymysql.connect(host='localhost',
user='user',
password='password',
db='db',
charset='utf8')
#user:用户名
#password:密码
#host:数据库地址/本机使用 localhost
#db:数据库名
#charset:数据库编码,一般为UTF-8
#连接好数据库以后就可以执行sql查询语句,利用的是read_sql()方法
pd.read_sql(sql,con)
#参数sql是需要执行的sql语句
#参数con是第一步创建好的数据库连接,即eng