批量下载页面img元素图片

来源：本站原创发布时间：2023-10-10 11:46:18 浏览次数：次【字体：小大】

最近做适老化配置，你会发现搭建的测试站如果没有图片将不好测试，那么是否可以搞个工具批量下载正式站的仅仅栏目页上的img图片呢。下面为大家提供nodejs实现的工具如下：

新建：downloadimg.js

const axios = require('axios');
const fs = require('fs');
const path = require('path');
const cheerio = require('cheerio');

//下载的原网址
const downloadWeb='http://www.gov.cn';

// 多个网页的URL
const webpageUrls = [
    'http://www.test.net:9558/',
    'http://www.test:9558/xwzx',
    'http://www.test:9558/zwgk',
    // 添加更多网页的URL
];

//删除含有特定字符串的数组内容
function filterArrayByString(array, specificString) {
    return array.filter(item => !item.includes(specificString));
}

// 统一的Referer,有些网站设置了防盗链，如嘉定和庆阳的站群。
const referer = downloadWeb; // 添加统一的Referer

// 设置存储根目录
const rootDownloadDir = path.join(__dirname, './downloads');

// 创建存储根目录（如果不存在）
if (!fs.existsSync(rootDownloadDir)) {
    fs.mkdirSync(rootDownloadDir, { recursive: true });
}

// 下载并保存图片
async function downloadAndSaveImage(imageUrl) {
    try {
        // 解析图片的URL
        const urlObj = new URL(imageUrl);

        // 获取图片文件名
        const imageName = path.basename(urlObj.pathname);

        // 解码中文文件名
        const decodedFileName = decodeURIComponent(imageName);

        // 构建图片的存储目录
        const imageDir = path.join(rootDownloadDir, urlObj.hostname, path.dirname(urlObj.pathname));

        // 创建存储目录（包括父文件夹，如果不存在）
        if (!fs.existsSync(imageDir)) {
            fs.mkdirSync(imageDir, { recursive: true });
        }

        // 完整的本地文件路径
        const imagePath = path.join(imageDir, decodedFileName);

        // 发起HTTP GET请求下载图片，包括统一的Referer头
        const response = await axios.get(imageUrl, {
            responseType: 'stream',
            headers: {
                Referer: referer, // 设置统一的Referer头
            },
        });

        // 将图片流写入本地文件
        const imageStream = response.data.pipe(fs.createWriteStream(imagePath));

        // 当流写入完成时
        imageStream.on('finish', () => {
            console.log(`图片已下载到：${imagePath}`);
        });
    } catch (error) {
        console.error(`下载图片时发生错误：${imageUrl}`, error);
    }
}

// 循环处理每个网页
webpageUrls.forEach(async (webpageUrl) => {
    // 发起HTTP GET请求获取网页内容
    axios.get(webpageUrl)
        .then(async response => {
            if (response.status === 200) {
                const html = response.data;
                const $ = cheerio.load(html);

                // 用于存储图片地址的数组
                const notFoundImages = [];

                // 查找所有的img元素
                $('img').each(async (index, element) => {
                    const imgSrc = $(element).attr('src');

                    // 检查img元素的src属性是否存在
                    if (imgSrc) {
                        notFoundImages.push(imgSrc); // 将图片地址添加到数组中
                    }
                });

                // 打印所有的图片地址
                const filteredArray = filterArrayByString(notFoundImages, downloadWeb);//去除含有原来网址的图片地址
                const resultArray = filteredArray
                    .filter(item => !/_\d+_\d+/.test(item)) // 移除符合 _数字_数字的模式
                    .map(item => downloadWeb + item); // 添加前缀
                console.log('所有图片地址：', resultArray);
                // 循环处理每个图片下载
                resultArray.forEach(imageUrl => {
                    downloadAndSaveImage(imageUrl);
                });
            } else {
                console.error(`无法获取网页内容。状态码：${response.status}`);
            }
        })
        .catch(error => {
            console.error(`发生错误：${error.message}`);
        });
})

安装依赖

npm install axios
npm install cheerio
npm install decode-uri-component

//如果因为墙的问题可以安装淘宝镜像cnpm执行，安装代码：
npm install -g cnpm --registry=https://registry.npm.taobao.org

利用nodejs运行js下载图片

node downloadimg.js

前端技术

批量下载页面img元素图片

新建：downloadimg.js

安装依赖

利用nodejs运行js下载图片

相关内容

用户登录

前端技术

批量下载页面img元素图片

新建：downloadimg.js

安装依赖

利用nodejs运行js下载图片

相关内容

用户登录

还没有账号？