I\'m trying to extract data from this script tag using Scrapy:
This script requires Mozilla Firefox and python-selenium installed, also I made the tests using a file called script.txt, which contains the script surrounded by tag. Here's the code:
from selenium import webdriver
script_content = open("script.txt").read()
#Removing script tags
exec_script = script_content.replace("", "")
#Removing jq function call
exec_script = exec_script.replace("jq(function() {", "").replace("});", "")
#Setting some helper functions to avoid javascript errors
helper_functions = """function hardwareTemplateFunctions(){
return {init: function(){}};};
accessoryFunctions = additionalServiceFunctions =
hardwareTemplateFunctions;"""
#Returning data variable
return_statement = "return data;"
wd = webdriver.Firefox()
#Getting data variable in result
result = wd.execute_script(helper_functions + exec_script + return_statement)
The result variable looks like this:
{u'bundles': {u'KONTANT_KOMPLETT.REGULAR': {u'commitmentTime': 0,
u'monthlyPrice': 0,
u'newMsisdnFee': 0,
u'offeringTitle': u'SMART Kontant',
u'offeringType': u'PREPAID',
u'signupFee': 0,
u'upfrontPrice': 3499},
u'SMART_BASIC.TLF12PLEAS': {u'commitmentTime': 12,
u'monthlyPrice': 299,
u'newMsisdnFee': 199,
u'offeringTitle': u'SMART Basis',
u'offeringType': u'VOICE',
u'signupFee': 0,
u'upfrontPrice': 2199},
u'SMART_MINI.TLF12PLEAS': {u'commitmentTime': 12,
u'monthlyPrice': 199,
u'newMsisdnFee': 199,
u'offeringTitle': u'SMART Mini',
u'offeringType': u'VOICE',
u'signupFee': 0,
u'upfrontPrice': 2999},
u'SMART_PLUSS.TLF12PLEAS': {u'commitmentTime': 12,
u'monthlyPrice': 399,
u'newMsisdnFee': 199,
u'offeringTitle': u'SMART Pluss',
u'offeringType': u'VOICE',
u'signupFee': 0,
u'upfrontPrice': 1599},
u'SMART_SUPERX.TLF12PLEAS': {u'commitmentTime': 12,
u'monthlyPrice': 499,
u'newMsisdnFee': 199,
u'offeringTitle': u'SMART Super',
u'offeringType': u'VOICE',
u'signupFee': 0,
u'upfrontPrice': 1099}},
u'categoryId': 10001,
u'defaultTab': u'',
u'hardwareProductCode': u'9054832',
u'offeringCode': u'SMART_BASIC.TLF12PLEAS',
u'reviewJson': {},
u'reviewSummaryBox': None}