python io文件数据的处理方法
这篇文章主要介绍"python io文件数据的处理方法",在日常操作中,相信很多人在python io文件数据的处理方法问题上存在疑惑,小编查阅了各式资料,整理出简单好用的操作方法,希望对大家解答"python io文件数据的处理方法"的疑惑有所帮助!接下来,请跟着小编一起来学习吧!
具体代码如下:
格式有可能会乱,建议直接下载文件:
io读取写入
mac路径path
path = '/Users/Public/odpscmd_public/odps_download/'
path = '/Users/Public'
csv 文档
csv 文档读取
import pandas as pd
from datetime import datetime
import time
def read_csv(from_path):
table = pd.read_csv(from_path,encoding='gbk') ##选取表
return table
def write_csv(to_path,data):
data.to_csv(to_path,index=False)
if __name__ == "__main__":
start_time = time.time() # 开始时间
path = '/Users/Public'
path = '/Users/Public/odpscmd_public/odps_download/'
file_name ='活动三行为数据.csv'
from_path = path+"/"+file_name
print(from_path)
df = read_csv(from_path)
df.head()
end_time = time.time() #结束时间
print("程序耗时%f秒.">
print(df.head())
csv文档保存
import re
import pandas as pd
from datetime import datetime
import time
def write_csv(to_path,data):
data.to_csv(to_path,index=False)
return 'successful write'
if __name__ == "__main__":
start_time = time.time() # 开始时间
path = '/Users/Public'
file_name ='新用户首次注册下单明细表.csv'
to_path = path+"/"+file_name
print(to_path)
write_csv(to_path,df.head())
end_time = time.time() #结束时间
print("程序耗时%f秒." % (end_time - start_time))
excel文件IO
excel文件读取
import re
import pandas as pd
from datetime import datetime
import time
def read_xlsx(path,sheet_name):
xlsx_file = pd.ExcelFile(path) ##路径
table = xlsx_file.parse(sheet_name) ##选取表
return table
if __name__ == "__main__":
start_time = time.time() # 开始时间
path = '/Users/Public'
file_name ='source双子座uvpv.xlsx'
sheet_name_list = {
'hive':'Sheet',
'mysql':'Sheet4'
}
path = path+"/"+file_name
sheet_name = sheet_name_list['hive']
#sheet_name = sheet_name_list['email']
df = read_xlsx(path,sheet_name)
print(df.head())
end_time = time.time() #结束时间
print("程序耗时%f秒." % (end_time - start_time))
Excel保存
def write_xlsx(to_path,data,sheet_name):
writer = pd.ExcelWriter(to_path, engine='xlsxwriter')
data.to_excel(writer,'Sheet1',index=False)
writer.save()
if __name__ == "__main__":
start_time = time.time() # 开始时间
path = '/Users/Public'
file_name ='撤点点位.xlsx'
sheet_name = 'Sheet1'
to_path = path+"\\"+file_name
data = df
try:
write_xlsx(to_path,data,sheet_name)
except:
print('Excel保存失败')
end_time = time.time() #结束时间
print("程序耗时%f秒." % (end_time - start_time))
mysql数据iO
mysql保存
import pymysql
from sqlalchemy import create_engine
#import mysql.connector as sql
def py_mysql(table,table_name):
try:
engine = create_engine("mysql+pymysql://root:password@127.0.0.1:3306/db_name?charset=utf8mb4")
table.to_sql(name=table_name,con=engine,if_exists='replace',index=False,chunksize=10000
)
print('数据库写入成功')
except :
print ('数据库写入失败')
if __name__ == "__main__":
start_time = time.time() # 开始时间
try:
py_mysql(df_trade_im,'tmp_news_im_v3')
except:
print('sql查询失败')
end_time = time.time() #结束时间
print("程序耗时%f秒." % (end_time - start_time))
mysql读取
import pymysql
import time
import pandas as pd
def read_mysql(sql):
db_connection= pymysql.connect(host="127.0.0.1",port=3306,user="root"
,passwd="password",db="db_name" )
df = pd.read_sql(sql, con=db_connection)
# 打开数据库连接
df = pd.read_sql(sql, con=db_connection)
# 使用 cursor() 方法创建一个游标对象 cursor
#cursor = db.cursor()
# 使用 execute() 方法执行 SQL,如果表存在则删除
# 使用预处理语句创建表
#data = cursor.execute(sql)
# 关闭数据库连接
db_connection.close()
return df
if __name__ == "__main__":
start_time = time.time() # 开始时间
sql ="""
select * from table_name
"""
try:
df = read_mysql(sql)
print(df.head())
except:
print('sql查询失败')
end_time = time.time() #结束时间
print("程序耗时%f秒." % (end_time - start_time))
到此,关于"python io文件数据的处理方法"的学习就结束了,希望能够解决大家的疑惑。理论与实践的搭配能更好的帮助大家学习,快去试试吧!若想继续学习更多相关知识,请继续关注网站,小编会继续努力为大家带来更多实用的文章!