问题
I want to monitor the network of a page and get all the URLs of the JavaScript network events, similar to what PhantomJS' page.onResourceRequested is doing, but I couldn't figure it out how to do this with Google Chrome's Puppeteer.
I've been dabbling with Google Chrome's puppeteer, but I couldn't figure out how to make it work, as the output of it looks like this:
Page {
domain: null,
_events: {
request: [Function]
},
_eventsCount: 1,
_maxListeners: undefined,
_client: Session {
domain: null,
_events: {
'Page.frameAttached': [Function],
'Page.frameNavigated': [Function],
'Page.frameDetached': [Function],
'Runtime.executionContextCreated': [Function],
'Network.requestWillBeSent': [Function: bound _onRequestWillBeSent],
'Network.requestIntercepted': [Function: bound _onRequestIntercepted],
'Network.responseReceived': [Function: bound _onResponseReceived],
'Network.loadingFinished': [Function: bound _onLoadingFinished],
'Network.loadingFailed': [Function: bound _onLoadingFailed],
'Page.loadEventFired': [Function],
'Runtime.consoleAPICalled': [Function],
'Page.javascriptDialogOpening': [Function],
'Runtime.exceptionThrown': [Function],
'Security.certificateError': [Function],
'Inspector.targetCrashed': [Function]
},
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: Connection {
domain: null,
_events: {},
_eventsCount: 0,
_maxListeners: undefined,
_url: 'ws://127.0.0.1:65343/devtools/browser/ca214df4-4357-4b8f-8552-a1524d6652ff',
_lastId: 17,
_callbacks: Map {},
_delay: 0,
_ws: [Object],
_sessions: [Object]
},
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_keyboard: Keyboard {
_client: Session {
domain: null,
_events: [Object],
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: [Object],
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_modifiers: 0,
_pressedKeys: Set {}
},
_mouse: Mouse {
_client: Session {
domain: null,
_events: [Object],
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: [Object],
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_keyboard: Keyboard {
_client: [Object],
_modifiers: 0,
_pressedKeys: Set {}
},
_x: 0,
_y: 0,
_button: 'none'
},
_frameManager: FrameManager {
domain: null,
_events: {
frameattached: [Function],
framedetached: [Function],
framenavigated: [Function]
},
_eventsCount: 3,
_maxListeners: undefined,
_client: Session {
domain: null,
_events: [Object],
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: [Object],
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_mouse: Mouse {
_client: [Object],
_keyboard: [Object],
_x: 0,
_y: 0,
_button: 'none'
},
_frames: Map {
'232.1' => [Object]
},
_mainFrame: Frame {
_client: [Object],
_mouse: [Object],
_parentFrame: null,
_url: 'http://mytestdomain.com/',
_id: '232.1',
_defaultContextId: 4,
_waitTasks: Set {},
_childFrames: Set {},
_name: undefined,
_loadingFailed: false
}
},
_networkManager: NetworkManager {
domain: null,
_events: {
request: [Function],
response: [Function],
requestfailed: [Function],
requestfinished: [Function]
},
_eventsCount: 4,
_maxListeners: undefined,
_client: Session {
domain: null,
_events: [Object],
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: [Object],
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_requestIdToRequest: Map {},
_interceptionIdToRequest: Map {
null => [Object], 'id-1' => [Object], 'id-2' => [Object], 'id-3' => [Object]
},
_extraHTTPHeaders: Map {},
_requestInterceptionEnabled: true,
_requestHashToRequestIds: Multimap {
_map: [Object]
},
_requestHashToInterceptions: Multimap {
_map: Map {}
}
},
_emulationManager: EmulationManager {
_client: Session {
domain: null,
_events: [Object],
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: [Object],
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_emulatingMobile: false,
_injectedTouchScriptId: null
},
_tracing: Tracing {
_client: Session {
domain: null,
_events: [Object],
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: [Object],
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_recording: false,
_path: ''
},
_pageBindings: Map {},
_ignoreHTTPSErrors: false,
_screenshotTaskQueue: TaskQueue {
_chain: Promise {
undefined
}
},
_viewport: {
width: 800,
height: 600
}
}
Can you please tell me how can I get all the URLs of the JavaScript network events with Puppeteer?
回答1:
Check out the sample that intercepts image requests. Easy to modify that to look at other types of resource requests:
await page.setRequestInterceptionEnabled(true);
page.on('request', request => {
if (/\.js$/i.test(request.url)) {
// request for js resource
}
request.continue();
});
await page.goto('https://example.com');
回答2:
Came across this post and SetRequestInterceptionEnabled has been renamed to
page.setRequestInterception(value)
Here is a piece of code i found on the Documentation:
const puppeteer = require('puppeteer');
puppeteer.launch().then(async browser => {
const page = await browser.newPage();
await page.setRequestInterception(true);
page.on('request', interceptedRequest => {
if (interceptedRequest.url.endsWith('.png') || interceptedRequest.url.endsWith('.jpg'))
interceptedRequest.abort();
else
interceptedRequest.continue();
});
await page.goto('https://example.com');
await browser.close();
});
NOTE Enabling request interception disables page caching.
Here is the URL for the puppeteer Documentation: Puppeteer Documentation
来源:https://stackoverflow.com/questions/45941601/how-to-inspect-network-traffic-and-get-the-url-of-resource-requests