python 爬虫，将数据转存入sqlite3数据库并写入文件

python 爬虫，将数据转存入sqlite3数据库并写入文件
51job.com

import re
from urllib import request
import sqlite3
#导入sqlite3库文件

'''
爬取数据使得到的数据存入数据库
'''

url_print=input('请输入您想要搜索的内容：')
page=input('请输入你想要搜索第几页:')


f=open('D:\\python学习文件\python生成读写的文件\\'+url_print+'.txt','w+')
f.write('关键字'+'        '+'地点'+'        '+'薪资min'+'         '+'薪资max'+'\r\n')

#读取哪一页，表示页数
url='https://search.51job.com/list/010000%252C020000%252C030200%252C040000,000000,0000,00,9,99,'+url_print+',2,'+page+'.html'
#print(url)

#请求 
r=request.urlopen(url)

#读取urllib.request返回的数据
htmls=r.read()

#read（）返回的是二进制序列，利用decode（）转换成html'字符串'

#read（）返回的是二进制序列，利用encoding（）转换成html'字符串'
htmls=str(htmls,encoding='gbk')

s=re.findall('<span class="t3">(北京|上海|深圳|广州).*</span>\s*<span class="t4">(\d+\.*\d*)-(\d+\.*\d*)(千|万)/(年|月)</span>',htmls)
#s=re.findall('<div class="el">([\s\S]*?)</div>',htmls)
#s=re.findall('<span class="t3">(北京|上海|深圳|广州).*</span>\s*<span class="t4">(\d+\.*\d*)-(\d+\.*\d*)(千|万)/(年|月)</span>',htmls)
#print(s)
    

#连接数据库：
#数据库存在时，直接连接；不存在时，创建相应的数据库，此时当前目录下可以找到对应的数据文件
conn=sqlite3.connect('D:\\python学习文件\python数据库\51.db')
#cursor=conn.cursor()
c=conn.cursor()
#创建表
c.execute('''CREATE TABLE IF NOT EXISTS jobs
              (url_print text,addr text, min text, max text)''')

c.execute('''delete from jobs''')
conn.commit()


for j in s:
        x_min=float(j[1])
        x_max=float(j[2])
              
        if j[3]=='千' :
            x_min=x_min/10
            x_max=x_max/10
                              
        if j[4]=='年' :
            x_min=x_min/12
            x_max=x_max/12
                          
        print(url_print+'          '+j[0]+'          '+str(round(x_min,2))+'          '+str(round(x_max,2)))
            
        f.write(url_print+'          '+j[0]+'          '+str(round(x_min,2))+'          '+str(round(x_max,2)))
        f.write('\n')
        c.execute("INSERT INTO jobs VALUES (?,?,?,?)",(url_print,j[0],str(round(x_min,2)),str(round(x_max,2))))

conn.commit()#提交事务
conn.close()


#注意要关掉打开的文件，否则就会写不进去    
f.close()

来源：CSDN

作者：G_AOFAN

链接：https://blog.csdn.net/G_AOFAN/article/details/84109494

标签

python

python爬虫

sqlite3

python 爬虫，将数据转存入sqlite3数据库并写入文件

python 爬虫，将数据转存入sqlite3数据库并写入文件51job.com

python 爬虫，将数据转存入sqlite3数据库并写入文件
51job.com