问题
I made the following code that I will present below to create a web crawler (elaborated in scrapy) and I want to put this data in a database, the one being mysql. For this I used the pipeline file and made the following configurations:
pipeline.py:
class ScrapySpiderPipeline(object): def __init__(self): engine = db_connect() create_table(engine) self.Session = sessionmaker(bind=engine) def process_item(self, item, spider): session = self.Session() quotedb = QuoteDB() quotedb.Titulo = item["Titulo"] quotedb.Tipo_Negocio = item["Tipo_Negocio"] quotedb.Preco = item["Preco"] quotedb.Tipo_Imovel = item["Tipo_Imovel"] try: session.add(quotedb) session.commit() except: session.rollback() raise finally: session.close() return item
models.py:
from scrapy.utils.project import get_project_settings DeclarativeBase = declarative_base() def db_connect(): return create_engine(get_project_settings().get("CONNECTION_STRING")) def create_table(engine): DeclarativeBase.metadata.create_all(engine) class QuoteDB(DeclarativeBase): __tablename__ = "imovel_table" id = Column(Integer, primary_key=True) Titulo = Column('Titulo', VARCHAR(200)) Tipo_Negocio = Column('Tipo_Negocio', String(100)) Preco = Column('Preco', Text()) Tipo_Imovel = Column('Tipo_Imovel', Text()) print(QuoteDB.__table__)
items.py:
class QuoteItem(scrapy.Item): Titulo = scrapy.Field() Tipo_Negocio = scrapy.Field() Preco = scrapy.Field() Tipo_Imovel = scrapy.Field()
setting.py:
ITEM_PIPELINES = { 'novo.pipelines.ScrapySpiderPipeline' : 300, } CONNECTION_STRING = "{drivername}://{user}:{passwd}@{host}:{port}/{db_name}?charset=utf8".format( drivername="mysql", user="root", passwd="", host="localhost", port="3306", db_name="projeto", )
crawler.py:
class SapoSpider(scrapy.Spider): name = "imoveis" allowed_domains = ["maisconsultores.pt"] start_urls = ["https://www.maisconsultores.pt/properties?page=%d&s=eedce" % i for i in range(23)] def parse(self,response): subpage_links = [] for i in response.css('div.item.col-sm-4'): youritem = { 'Titulo':i.css('div[class=image] h3::text').extract(), 'Tipo_Negocio':i.css('div.price::text').re('[^\t\n\r\a]+'), 'Preco':i.css('div.price span::text').extract(), } subpage_link = i.css('div[class=image] a::attr(href)').extract_first() full_url = response.urljoin(subpage_link) yield scrapy.Request(full_url, callback=self.parse_subpage, meta={'item':youritem}) def parse_subpage(self,response): youritem = response.meta.get('item') youritem['Tipo_Imovel'] = response.xpath('//ul[@class="amenities"]//li[1]/text()').extract() yield youritem
Can someone tell me where the error is? I'm in it for a long time and I can not find it. Any idea ? I will appreciate it if someone can help me!
Backtrace:
2018-06-27 17:17:04 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.maisconsultores.pt/1651/apartamento-t2-em-linda-a-velha> (referer: https://www.maisconsultores.pt/properties?page=18&s=eedce)
2018-06-27 17:17:04 [scrapy.core.scraper] ERROR: Error processing {'Titulo': ['Apartamento T2 em Linda-a-Velha '], 'Tipo_Negocio': [' Venda', ' ', ' '], 'Preco': ['240 000€', 'Novo Preço!'], 'Localizacao': [' Algés (Algés)'], 'Tipo_Imovel': [' Apartamento'], 'Condicao': [' Usado'], 'Numero_Divisoes': [' 3'], 'Numero_Quartos': [' 2'], 'Numero_Casas_Banho': [' 1'], 'Certificado_Energetico': ' Avaliação em Curso', 'Ano_Construcao': ' 1980', 'Area_Util': ' ', 'Area_Bruta': ' ', 'Piso': [' 2º andar', ' ']}
Traceback (most recent call last):
File "C:\Users\joani\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py", line 1193, in _execute_context
context)
File "C:\Users\joani\Anaconda3\lib\site-packages\sqlalchemy\engine\default.py", line 508, in do_execute
cursor.execute(statement, parameters)
File "C:\Users\joani\Anaconda3\lib\site-packages\MySQLdb\cursors.py", line 250, in execute
self.errorhandler(self, exc, value)
File "C:\Users\joani\Anaconda3\lib\site-packages\MySQLdb\connections.py", line 50, in defaulterrorhandler
raise errorvalue
File "C:\Users\joani\Anaconda3\lib\site-packages\MySQLdb\cursors.py", line 247, in execute
res = self._query(query)
File "C:\Users\joani\Anaconda3\lib\site-packages\MySQLdb\cursors.py", line 411, in _query
rowcount = self._do_query(q)
File "C:\Users\joani\Anaconda3\lib\site-packages\MySQLdb\cursors.py", line 374, in _do_query
db.query(q)
File "C:\Users\joani\Anaconda3\lib\site-packages\MySQLdb\connections.py", line 277, in query
_mysql.connection.query(self, query)
_mysql_exceptions.OperationalError: (1241, 'Operand should contain 1 column(s)')
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\joani\Anaconda3\lib\site-packages\twisted\internet\defer.py", line 653, in _runCallbacks
current.result = callback(current.result, *args, **kw)
File "C:\Users\joani\Desktop\outra_tentativa\novo\novo\pipelines.py", line 111, in process_item
session.commit()
File "C:\Users\joani\Anaconda3\lib\site-packages\sqlalchemy\orm\session.py", line 943, in commit
self.transaction.commit()
File "C:\Users\joani\Anaconda3\lib\site-packages\sqlalchemy\orm\session.py", line 467, in commit
self._prepare_impl()
File "C:\Users\joani\Anaconda3\lib\site-packages\sqlalchemy\orm\session.py", line 447, in _prepare_impl
self.session.flush()
File "C:\Users\joani\Anaconda3\lib\site-packages\sqlalchemy\orm\session.py", line 2254, in flush
self._flush(objects)
File "C:\Users\joani\Anaconda3\lib\site-packages\sqlalchemy\orm\session.py", line 2380, in _flush
transaction.rollback(_capture_exception=True)
File "C:\Users\joani\Anaconda3\lib\site-packages\sqlalchemy\util\langhelpers.py", line 66, in __exit__
compat.reraise(exc_type, exc_value, exc_tb)
File "C:\Users\joani\Anaconda3\lib\site-packages\sqlalchemy\util\compat.py", line 187, in reraise
raise value
File "C:\Users\joani\Anaconda3\lib\site-packages\sqlalchemy\orm\session.py", line 2344, in _flush
flush_context.execute()
File "C:\Users\joani\Anaconda3\lib\site-packages\sqlalchemy\orm\unitofwork.py", line 391, in execute
rec.execute(self)
File "C:\Users\joani\Anaconda3\lib\site-packages\sqlalchemy\orm\unitofwork.py", line 556, in execute
uow
File "C:\Users\joani\Anaconda3\lib\site-packages\sqlalchemy\orm\persistence.py", line 181, in save_obj
mapper, table, insert)
File "C:\Users\joani\Anaconda3\lib\site-packages\sqlalchemy\orm\persistence.py", line 866, in _emit_insert_statements
execute(statement, params)
File "C:\Users\joani\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py", line 948, in execute
return meth(self, multiparams, params)
File "C:\Users\joani\Anaconda3\lib\site-packages\sqlalchemy\sql\elements.py", line 269, in _execute_on_connection
return connection._execute_clauseelement(self, multiparams, params)
File "C:\Users\joani\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py", line 1060, in _execute_clauseelement
compiled_sql, distilled_params
File "C:\Users\joani\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py", line 1200, in _execute_context
context)
File "C:\Users\joani\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py", line 1413, in _handle_dbapi_exception
exc_info
File "C:\Users\joani\Anaconda3\lib\site-packages\sqlalchemy\util\compat.py", line 203, in raise_from_cause
reraise(type(exception), exception, tb=exc_tb, cause=cause)
File "C:\Users\joani\Anaconda3\lib\site-packages\sqlalchemy\util\compat.py", line 186, in reraise
raise value.with_traceback(tb)
File "C:\Users\joani\Anaconda3\lib\site-packages\sqlalchemy\engine\base.py", line 1193, in _execute_context
context)
File "C:\Users\joani\Anaconda3\lib\site-packages\sqlalchemy\engine\default.py", line 508, in do_execute
cursor.execute(statement, parameters)
File "C:\Users\joani\Anaconda3\lib\site-packages\MySQLdb\cursors.py", line 250, in execute
self.errorhandler(self, exc, value)
File "C:\Users\joani\Anaconda3\lib\site-packages\MySQLdb\connections.py", line 50, in defaulterrorhandler
raise errorvalue
File "C:\Users\joani\Anaconda3\lib\site-packages\MySQLdb\cursors.py", line 247, in execute
res = self._query(query)
File "C:\Users\joani\Anaconda3\lib\site-packages\MySQLdb\cursors.py", line 411, in _query
rowcount = self._do_query(q)
File "C:\Users\joani\Anaconda3\lib\site-packages\MySQLdb\cursors.py", line 374, in _do_query
db.query(q)
File "C:\Users\joani\Anaconda3\lib\site-packages\MySQLdb\connections.py", line 277, in query
_mysql.connection.query(self, query)
sqlalchemy.exc.OperationalError: <unprintable OperationalError object>
来源:https://stackoverflow.com/questions/51062223/python-sqlalchemy-exc-operationalerror-unprintable-operationalerror-object