javascript - 关于nodejs爬虫的问题?
PHP中文网
PHP中文网 2017-04-11 12:52:39
[JavaScript讨论组]

var request = require('request');
var fs = require('fs');
var cheerio = require("cheerio");
var url = 'http://www.fssxhsd.com/category.php?id=332';

request(url,function(err,result){
    if(err){
        console.log(err);
    }
    var $ = cheerio.load(result.body);
    $('dl img').each(function(index,element){
        var img_src = 'www.fssxhsd.com/' + $(this).attr('src');
        console.log(img_src)
        //采用request模块,向服务器发起一次请求,获取图片资源
        request.head(img_src,function(err,res,body){
            if(err){
                console.log(err);
            }
        });
        request(img_src).pipe(fs.createWriteStream('./image/'+index));
    })
})

这是我的代码,但是报错:

Error: Invalid URI "www.fssxhsd.com/images/201703/thumb_img/277292_thumb_G_1489165224300.jpg"
    at Request.init (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\node_modules\request\request.js:276:31)
    at new Request (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\node_modules\request\request.js:130:8)
    at request (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\node_modules\request\index.js:54:10)
    at Function.head (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\node_modules\request\index.js:62:12)
    at Object. (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\reptile2.js:16:17)
    at initialize.exports.each (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\node_modules\cheerio\lib\api\traversing.js:300:24)
    at Request._callback (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\reptile2.js:12:17)
    at Request.self.callback (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\node_modules\request\request.js:188:22)
    at emitTwo (events.js:106:13)
    at Request.emit (events.js:194:7)
events.js:163
      throw er; // Unhandled 'error' event
      ^

Error: Invalid URI "www.fssxhsd.com/images/201703/thumb_img/277292_thumb_G_1489165224300.jpg"
    at Request.init (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\node_modules\request\request.js:276:31)
    at new Request (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\node_modules\request\request.js:130:8)
    at request (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\node_modules\request\index.js:54:10)
    at Object. (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\reptile2.js:21:9)
    at initialize.exports.each (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\node_modules\cheerio\lib\api\traversing.js:300:24)
    at Request._callback (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\reptile2.js:12:17)
    at Request.self.callback (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\node_modules\request\request.js:188:22)
    at emitTwo (events.js:106:13)
    at Request.emit (events.js:194:7)
    at Request. (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\node_modules\request\request.js:1171:10)

请问这是为什么啊,它这个报错说无效的url但是这个url是可以进入的啊?
谢谢!

PHP中文网
PHP中文网

认证高级PHP讲师

全部回复(2)
迷茫
  var img_src = 'www.fssxhsd.com/' + $(this).attr('src');

加上 http或者https,谢谢

怪我咯

前面要加 http://

热门教程
更多>
最新下载
更多>
网站特效
网站源码
网站素材
前端模板
关于我们 免责申明 举报中心 意见反馈 讲师合作 广告合作 最新更新 English
php中文网:公益在线php培训,帮助PHP学习者快速成长!
关注服务号 技术交流群
PHP中文网订阅号
每天精选资源文章推送
PHP中文网APP
随时随地碎片化学习

Copyright 2014-2025 https://www.php.cn/ All Rights Reserved | php.cn | 湘ICP备2023035733号