I'm trying to crawler weather data from a website , simulate the web browser and the windows configuration of proxy, my web browser need to configure net proxy settings so that web browser can access internet website. I have the proxy ip and port. I want to reappear web browser process of accessing website by nodejs.
I use nodejs:http,nodejs:https and nodejs:tls packs.
I have done this before by using C language, so I am familiar with the http request procedure.
1. client need to build a tcp connect with proxy
2. client send http CONNECT request and the proxy may response connection established.
3. client start negotiation with proxy , say hello , exchange cypher suites and so on, which I'm not quite exactly clear but the OPENSSL do most of the job.
4. client send and recv through the read bio and the write bio which provide by openssl. Here you could send your GET/POST requests as it like in http.
I'm reading the nodejs mdn, seemed http,https,tls have something with to do my job. I also tried the requst pack, which seemed to be the encapsulation of of these metion before. So I dropped it.
I have write a little piece of code below.
What I have tried:
var http=require('http')
var https=require('https')
var tls=require('tls')
var opt={
host:'172.254.18.15',
port: 8080,
keepAlive:true,
method:'CONNECT',
headers:{
host:'www.163.com',
path:'/',
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36'
}
}
var req=http.request(opt, function(res) {
console.log("Got response: " + res.statusCode)
if(res.statusCode===301||res.statusCode===302) {
var raw_headers=res.rawHeaders;
console.log('Please redirect '+raw_headers[raw_headers.indexOf('Location')+1]);
}
let body='';
res.on('data',function(d){
body += d
}).on('end', function(){
console.log(body)
});
}).on('connect',(res,socket,head)=>{
console.log('request method '+this.method)
console.log("Got response: " + res.statusCode)
console.log(res.rawHeaders)
console.log('connected.')
console.log(head.toString())
let tls_opts={
socket:socket,
method:'GET',
host:'www.baidu.com',
path:'/'
}
let tls_req=https.request(tls_opts,(res)=>{
let tls_data=''
res.on('data',(data)=>{
tls_data+=data
this.end()
}).on('end',()=>{
console.log(tls_data)
console.log('disconnected.')
})
}).on('error',(err)=>{
console.log('tls error:'+err);
})
tls_req.end()
}).on('error', function(e) {
console.log("Got error: " + e.message);
}).setTimeout(3000)
req.end()
It did not work. In my case, the CONNECT request sended and the proxy responsed the 'connection established', but the tls was wrong. I'm not familiar with nodejs. Much of the code on the internet did not show the detail of proxy.