在第一章从CSV中获取数据后,需要将提取的数据或经Pandas等处理后的数据进行存储。这里简单介绍下使用cx-Oracle操作数据库,和通过Oracle的数据载入工具sqlldr来匹配导入数据。
1.cx-Oracle
1.1.安装:
pip install cx_Oracle
1.2.安装oracle客户端,并添加到Path
oracle客户端下载地址
(安装过程,及Linux环境变量过程,略)
1.3.示例代码
操作Oracle工具 oracle_utils.py
class ConnectOracleDatabase(object):
def __init__(self):
self.user = DB_USER
self.password = DB_PASSWORD
self.ip = DB_IP
self.port = DB_PORT
self.service_name = DB_SERVICE
self.oci_path = ORACLE_OCI_DIR
# 这种设置环境变量,在Pycharm中运行无效
# 在菜单Run->Edit configurations 中,手动设置Environment variables,添加LD_LIBRARY_PATH的内容,即可解决问题。
# LD_LIBRARY_PATH=/home/oracle/instantclient_19_5
sys_version = platform.architecture()
if sys_version[1] is not 'WindowsPE':
os.environ['LD_LIBRARY_PATH'] = self.oci_path
else:
os.environ['PATH'] = os.environ['PATH'] + ';' + self.oci_path
# 解决中文乱码问题
os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.ZHS16GBK'
self.db_conn = None
self.db_cursor = None
self.create_connect()
def create_connect(self):
try:
self.db_conn = cx_Oracle.connect(self.user, self.password,
'%s:%s/%s' % (self.ip,
self.port,
self.service_name))
self.db_cursor = self.db_conn.cursor()
except Exception as error:
print("Error creating database connection: %s" % str(error))
# 执行DDL语句
def exec_sql_command(self, sql_command):
if sql_command:
print('ddl sql is', sql_command)
query_result = self.db_cursor.execute(sql_command)
return query_result
# 返回SQL的所有值
def query_sql(self, sql):
self.db_cursor.execute(sql)
return self.db_cursor.fetchall()
def __enter__(self):
self.create_connect()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
try:
self.db_cursor.close()
self.db_conn.close()
except AttributeError as error:
print(str(error))
使用示例:
with ConnectOracleDatabase() as oracle:
oracle.exec_sql_command('create table person(id number(10),name varchar2(255))')
2.Python执行sqlldr入库
cx-Oracle可以处理少量数据,一旦数据量过大则效率就会降低。所以较大的结果数据入库一般使用数据库自身提供的工具,这里以sqlldr举例:
2.1.示例代码
主要分两部分,一是根据数据模型,生成sqlldr的控制文件。二是由Python直接调用sqlldr。
生成sqlldr控制文件ctl
# 在环境变量ORACLE_CTL_PATH指定的目录中生成ctl文件
ctl_path = os.path.join(os.environ[ORACLE_CTL_PATH], table_name+'.ctl')
if not os.path.exists(ctl_path):
with open(ctl_path, 'w', encoding='utf-8') as file:
file.write("load data"+os.linesep)
file.write("replace into table " + table_name + os.linesep)
file.write("fields terminated by '" + terminated + "'" + os.linesep)
file.write("trailing nullcols (" + os.linesep)
table_conf = zip(table_fields, table_fields_type)
colunms_tmps=[]
for fields, fields_type in table_conf:
colunms_tmp = fields
if 'NUMBER' in fields_type.upper():
colunms_tmp = colunms_tmp + ' DECIMAL EXTERNAL'
elif 'DATE' in fields_type.upper():
colunms_tmp = colunms_tmp + ' date "yyyy-mm-dd hh24:mi:ss"'
colunms_tmps.append(colunms_tmp)
file.write(','.join(colunms_tmps)+' TERMINATED BY WHITESPACE )' + os.linesep)
调用sqlldr数据入库
# 判断文件编码 ,直接结局sqlldr导入中文乱码问题
with open(data, 'rb') as f:
filedata = f.read()
print('file infos is', chardet.detect(filedata))
if 'GB' in chardet.detect(filedata)['encoding']:
os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.ZHS16GBK'
else:
os.environ['NLS_LANG'] = 'AMERICAN_AMERICA.AL32UTF8'
# 导入数据
sqllod_command = os.environ['ORACLE_HOME']+'/sqlldr userid='+DB_USER+'/'+DB_PASSWORD+TNSNAME
sqllod_command += ' control=' + ctl_path
sqllod_command += ' data=' + data
sqllod_command += ' log=' + os.path.join(os.environ[ORACLE_LOG_PATH], table_name+'.log')
sqllod_command += ' skip_index_maintenance=true readsize=15000000 streamsize=15000000 ' \
'columnarrayrows=5000 rows=1000 errors=1000000 direct=true skip=1'
print('sqllod_command=', sqllod_command)
print(os.environ['NLS_LANG'])
os.system(sqllod_command)
更多文章,请关注:
来源:oschina
链接:https://my.oschina.net/u/3275937/blog/3196169