2024/10/15

Asynchronous Crawler In PHP, Node.js And Python

久久就要複習一下爬蟲寫法,不然不常用就生疏了,針對三種語言寫個 async 的版本吧。

node.js

const axios = require('axios');  
const urls = [  
    'https://jsonplaceholder.typicode.com/todos/1',  
    'https://jsonplaceholder.typicode.com/todos/2',  
    'https://jsonplaceholder.typicode.com/todos/3',  
];  
  
(async () => {  
    try {  
        const requests = urls.map(url => axios.get(url));  
        const responses = await Promise.all(requests);  
  
        responses.forEach(response => {  
            console.log(response.data.id);  
        });  
    } catch (error) {  
        console.error(error.message);  
    }  
})();

Python

import aiohttp  
import asyncio  
  
urls = [  
    'https://jsonplaceholder.typicode.com/todos/1',  
    'https://jsonplaceholder.typicode.com/todos/2',  
    'https://jsonplaceholder.typicode.com/todos/3',  
]  
  
  
async def fetch_url(session, url):  
    async with session.get(url) as response:  
        return await response.json()  
  
  
async def main():  
    async with aiohttp.ClientSession() as session:  
        tasks = [fetch_url(session, url) for url in urls]  
        responses = await asyncio.gather(*tasks)  
  
        for responses in responses:  
            print(responses.get('id'))  
  
  
if __name__ == '__main__':  
    asyncio.run(main())

PHP

<?php

use GuzzleHttp\Client;
use GuzzleHttp\Promise\Utils;

include "vendor/autoload.php";

$urls = [
    'https://jsonplaceholder.typicode.com/todos/1',
    'https://jsonplaceholder.typicode.com/todos/2',
    'https://jsonplaceholder.typicode.com/todos/3',
];

$client = new Client([
    'verify' => false,
]);
$promises = [];

foreach ($urls as $url) {
    $promises[] = $client->getAsync($url);
}

$results = Utils::all($promises)->wait();

foreach ($results as $result) {
    echo json_decode($result->getBody()->getContents(), true)['id'], PHP_EOL;
}

沒有留言: