Scraping Free Proxy Listing Website

Find the data you need here

We provide programming data of 20 most popular languages, hope to help you!

Previous PostNext Post

Scraping free proxy listing website

IP Address Port Code Country Anonymity Google Https Last Checked 0 2.50.154.155 53281.0 AE United Arab Emirates elite proxy no yes 6 seconds ago 1 134.249.165.49 53281.0 UA Ukraine elite proxy no yes 6 seconds ago 2 158.58.133.106 41258.0 RU Russian Federation elite proxy no yes 6 seconds ago 3 92.52.186.123 32329.0 UA Ukraine elite proxy no

import requests
import re

url = 'https://free-proxy-list.net/'
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Cafari/537.36'}

source = requests.get(url, headers=headers, timeout=10).text

proxies = re.findall(r'([0-9]{1,3}\.){3}[0-9]{1,3}(:[0-9]{2,4})?', source)

print(proxies)
import re
url = 'https://free-proxy-list.net/'
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Cafari/537.36'}
source = str(requests.get(url, headers=headers, timeout=10).text)
data = [list(filter(None, i))[0] for i in re.findall('<td class="hm">(.*?)</td>|<td>(.*?)</td>', source)]
groupings = [dict(zip(['ip', 'port', 'code', 'using_anonymous'], data[i:i+4])) for i in range(0, len(data), 4)]
[{'ip': '47.88.242.10', 'port': '80', 'code': 'SG', 'using_anonymous': 'anonymous'}, {'ip': '118.189.172.136', 'port': '80', 'code': 'SG', 'using_anonymous': 'elite proxy'}, {'ip': '147.135.210.114', 'port': '54566', 'code': 'PL', 'using_anonymous': 'anonymous'}, {'ip': '5.148.150.155', 'port': '8080', 'code': 'GB', 'using_anonymous': 'elite proxy'}, {'ip': '186.227.8.21', 'port': '3128', 'code': 'BR', 'using_anonymous': 'anonymous'}, {'ip': '49.151.155.60', 'port': '8080', 'code': 'PH', 'using_anonymous': 'anonymous'}, {'ip': '52.170.255.17', 'port': '80', 'code': 'US', 'using_anonymous': 'anonymous'}, {'ip': '51.15.35.239', 'port': '3128', 'code': 'NL', 'using_anonymous': 'elite proxy'}, {'ip': '163.172.27.213', 'port': '3128', 'code': 'GB', 'using_anonymous': 'elite proxy'}, {'ip': '94.137.31.214', 'port': '8080', 'code': 'RU', 'using_anonymous': 'anonymous'}]
final_groupings = [{'full_ip':"{ip}:{port}".format(**i)} for i in groupings]
[{'full_ip': '47.88.242.10:80'}, {'full_ip': '118.189.172.136:80'}, {'full_ip': '147.135.210.114:54566'}, {'full_ip': '5.148.150.155:8080'}, {'full_ip': '186.227.8.21:3128'}, {'full_ip': '49.151.155.60:8080'}, {'full_ip': '52.170.255.17:80'}, {'full_ip': '51.15.35.239:3128'}, {'full_ip': '163.172.27.213:3128'}, {'full_ip': '94.137.31.214:8080'}]
import requests
from bs4 import BeautifulSoup

res = requests.get('https://free-proxy-list.net/', headers={'User-Agent':'Mozilla/5.0'})
soup = BeautifulSoup(res.text,"lxml")
for items in soup.select("#proxylisttable tbody tr"):
    proxy_list = ':'.join([item.text for item in items.select("td")[:2]])
    print(proxy_list)
122.183.139.109:8080
154.66.122.130:53281
110.77.183.158:42619
159.192.226.247:54214
47.89.41.164:80
import requests
import pandas as pd 

resp = requests.get('https://free-proxy-list.net/') 
df = pd.read_html(resp.text)[0]
         IP Address     Port Code               Country    Anonymity Google Https    Last Checked
0      2.50.154.155  53281.0   AE  United Arab Emirates  elite proxy     no   yes   6 seconds ago
1    134.249.165.49  53281.0   UA               Ukraine  elite proxy     no   yes   6 seconds ago
2    158.58.133.106  41258.0   RU    Russian Federation  elite proxy     no   yes   6 seconds ago
3     92.52.186.123  32329.0   UA               Ukraine  elite proxy     no   yes   6 seconds ago
4     178.213.0.207  35140.0   UA               Ukraine  elite proxy     no   yes   6 seconds ago
..              ...      ...  ...                   ...          ...    ...   ...             ...
296    93.185.96.60  41003.0   CZ        Czech Republic  elite proxy     no   yes  22 minutes ago
297    1.20.103.248  52574.0   TH              Thailand  elite proxy     no   yes  22 minutes ago
298    190.210.8.92   8080.0   AR             Argentina  elite proxy     no   yes  22 minutes ago
299  166.150.32.182  56074.0   US         United States  elite proxy     no   yes  22 minutes ago
300             NaN      NaN  NaN                   NaN          NaN    NaN   NaN             NaN

[301 rows x 8 columns]
         IP Address     Port Code        Country    Anonymity Google Https    Last Checked
32    138.68.53.220   5836.0   US  United States  elite proxy     no   yes   6 seconds ago
76   173.217.255.36  33351.0   US  United States  elite proxy     no    no  10 seconds ago
86    24.172.34.114  40675.0   US  United States  elite proxy     no    no  10 seconds ago
111   209.190.32.28   3128.0   US  United States  elite proxy     no   yes  10 seconds ago
150  104.148.76.176   3128.0   US  United States  elite proxy     no    no  11 minutes ago
151  104.148.76.185   3128.0   US  United States  elite proxy     no    no  11 minutes ago
168  104.148.76.136   3128.0   US  United States  elite proxy     no    no  11 minutes ago
169  104.148.76.182   3128.0   US  United States  elite proxy     no    no  11 minutes ago
182  104.148.76.183   3128.0   US  United States  elite proxy     no   yes  11 minutes ago
184      3.95.11.66   3128.0   US  United States  elite proxy     no   yes  12 minutes ago
190    63.249.67.70  53281.0   US  United States  elite proxy     no    no  12 minutes ago
288  205.201.49.141  53281.0   US  United States  elite proxy     no   yes  22 minutes ago
299  166.150.32.182  56074.0   US  United States  elite proxy     no   yes  22 minutes ago

Use Rotating Proxy in Scraping Scripts

Use a proxy from the list to scrape web pages. Change to another proxy to avoid that IP being blocked. After a while (one hour), get a new proxy list (Step 1). Rotating Proxy. When using our rotating proxy service, your script needs only one proxy to …

# https://curl.haxx.se/download.html
# Change the URL to your target website
curl --proxy http://user:[email protected]:2000/ \
http://checkip.amazonaws.com

# Sample output
# 149.90.31.59
import requests

proxies = {
    "http": "http://user:[email protected]:2000",
    "https": "http://user:[email protected]:2000"
}

# Pretend to be Firefox
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:87.0) Gecko/20100101 Firefox/87.0',
    'Accept-Language': 'en-US,en;q=0.5'
}

# Change the URL to your target website
url = "http://checkip.amazonaws.com"
try:
    r = requests.get(url, proxies=proxies, headers=headers, timeout=20)
    print(r.text)
except Exception as e:
    print(e)
# scrapy.org - a scraping framework for Python
# 1. Enable HttpProxyMiddleware in your settings.py
DOWNLOADER_MIDDLEWARES = {
    'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware': 1
}

# 2. Pass proxy to request via request.meta
# Change the URL to your target website
request = Request(url="http://checkip.amazonaws.com")
request.meta['proxy'] = "http://user:[email protected]:2000"
yield request
// use https://www.npmjs.com/package/request
var request = require('request');

// Change the URL to your target website
var url = 'http://checkip.amazonaws.com';
var proxy = 'http://user:[email protected]:2000';

request({
    url: url,
    proxy: proxy
}, function (error, response, body) {
    if (error) {
        console.log(error);
    } else {
        console.log(response);
    }
});
<?php
// Change the URL to your target website
$url = 'http://checkip.amazonaws.com';
$proxy_ip = 'gate.proxy.com';
$proxy_port = '2000';
$userpass = 'username:password';

$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_PROXYPORT, $proxy_port);
curl_setopt($ch, CURLOPT_PROXYTYPE, 'HTTP');
curl_setopt($ch, CURLOPT_PROXY, $proxy_ip);
curl_setopt($ch, CURLOPT_PROXYUSERPWD, $userpass);
$data = curl_exec($ch);
curl_close($ch);

echo $data;
import org.apache.http.HttpHost;
import org.apache.http.client.fluent.*;

public class Example {
    public static void main(String[] args) throws Exception {
        HttpHost proxy = new HttpHost("gate.proxy.com", 2000);

        // Change the URL to your target website
        String res = Executor.newInstance()
            .auth(proxy, "username", "password")
            .execute(Request.Get("http://checkip.amazonaws.com")
            .viaProxy(proxy))
            .returnContent().asString();
        System.out.println(res);
    }
}
using System;
using System.Net;

class Example
{
    static void Main()
    {
        var client = new WebClient();
        client.Proxy = new WebProxy("gate.proxy.com:2000");
        client.Proxy.Credentials =
          new NetworkCredential("username", "password");

        // Change the URL to your target website
        Console.WriteLine(client.DownloadString("http://checkip.amazonaws.com"));
    }
}
#!/usr/bin/ruby

require 'uri'
require 'net/http'

uri = URI.parse('http://checkip.amazonaws.com')
proxy = Net::HTTP::Proxy('gate.proxy.com', 2000,  'user', 'pass')
req = Net::HTTP::Get.new(uri.path)

result = proxy.start(uri.host,uri.port) do |http|
    http.request(req)
end

puts result.body
Imports System.Net

Module Example
    Sub Main()
        Dim Client As New WebClient
        Client.Proxy = New WebProxy("http://gate.proxy.com:2000")
        Client.Proxy.Credentials = _
          New NetworkCredential("username", "password")
        Console.WriteLine(Client.DownloadString("http://checkip.amazonaws.com"))
    End Sub
End Module
# https://github.com/ytdl-org/youtube-dl
youtube-dl --proxy http://user:[email protected]:2000/  \
https://www.youtube.com/watch?v=xxxx

Previous PostNext Post

Related code examples