ExamplesΒΆ
All examples are also available at GitHub
"""Find and show 10 working HTTP(S) proxies."""
import asyncio
from proxybroker import Broker
async def show(proxies):
while True:
proxy = await proxies.get()
if proxy is None:
break
print("Found proxy: %s" % proxy)
async def main():
proxies = asyncio.Queue()
broker = Broker(proxies)
await asyncio.gather(broker.find(types=["HTTP", "HTTPS"], limit=10), show(proxies))
if __name__ == "__main__":
asyncio.run(main())
"""Find 10 working HTTP(S) proxies and save them to a file."""
import asyncio
from proxybroker import Broker
async def save(proxies, filename):
"""Save proxies to a file."""
with open(filename, "w") as f:
while True:
proxy = await proxies.get()
if proxy is None:
break
proto = "https" if "HTTPS" in proxy.types else "http"
row = "%s://%s:%d\n" % (proto, proxy.host, proxy.port)
f.write(row)
async def main():
proxies = asyncio.Queue()
broker = Broker(proxies)
await asyncio.gather(
broker.find(types=["HTTP", "HTTPS"], limit=10),
save(proxies, filename="proxies.txt"),
)
if __name__ == "__main__":
asyncio.run(main())
"""Find working proxies and use them concurrently.
Note: Pay attention to Broker.serve(), instead of the code listed below.
Perhaps it will be much useful and friendlier.
"""
import asyncio
from urllib.parse import urlparse
import aiohttp
from proxybroker import Broker, ProxyPool
from proxybroker.errors import NoProxyError
async def fetch(url, proxy_pool, timeout=10):
resp, proxy = None, None
try:
print("Waiting a proxy...")
proxy = await proxy_pool.get(scheme=urlparse(url).scheme)
print("Found proxy:", proxy)
proxy_url = "http://%s:%d" % (proxy.host, proxy.port)
_timeout = aiohttp.ClientTimeout(total=timeout)
async with (
aiohttp.ClientSession(timeout=_timeout) as session,
session.get(url, proxy=proxy_url) as response,
):
resp = await response.text()
except (
aiohttp.ClientOSError,
aiohttp.ClientResponseError,
aiohttp.ServerDisconnectedError,
asyncio.TimeoutError,
NoProxyError,
) as e:
print("Error!\nURL: %s;\nError: %r\n", url, e)
finally:
if proxy:
proxy_pool.put(proxy)
return (url, resp)
async def get_pages(urls, proxy_pool, timeout=10):
tasks = [fetch(url, proxy_pool, timeout) for url in urls]
for task in asyncio.as_completed(tasks):
url, content = await task
print("%s\nDone!\nURL: %s;\nContent: %s" % ("-" * 20, url, content))
async def main():
proxies = asyncio.Queue()
proxy_pool = ProxyPool(proxies)
judges = [
"http://httpbin.org/get?show_env",
"https://httpbin.org/get?show_env",
]
providers = [
"http://www.proxylists.net/",
"http://ipaddress.com/proxy-list/",
"https://www.sslproxies.org/",
]
broker = Broker(
proxies,
timeout=8,
max_conn=200,
max_tries=3,
verify_ssl=False,
judges=judges,
providers=providers,
)
types = [("HTTP", ("Anonymous", "High"))]
countries = ["US", "UK", "DE", "FR"]
urls = [
"http://httpbin.org/get",
"http://httpbin.org/redirect/1",
"http://httpbin.org/anything",
"http://httpbin.org/status/404",
]
await asyncio.gather(
broker.find(types=types, countries=countries, strict=True, limit=10),
get_pages(urls, proxy_pool),
)
# broker.show_stats(verbose=True)
if __name__ == "__main__":
asyncio.run(main())
"""Find 10 working proxies supporting CONNECT method
to 25 port (SMTP) and save them to a file."""
import asyncio
from proxybroker import Broker
async def save(proxies, filename):
"""Save proxies to a file."""
with open(filename, "w") as f:
while True:
proxy = await proxies.get()
if proxy is None:
break
f.write("smtp://%s:%d\n" % (proxy.host, proxy.port))
def main():
proxies = asyncio.Queue()
broker = Broker(proxies, judges=["smtp://smtp.gmail.com"], max_tries=1)
# Check proxy in spam databases (DNSBL). By default is disabled.
# more databases: http://www.dnsbl.info/dnsbl-database-check.php
dnsbl = [
"bl.spamcop.net",
"cbl.abuseat.org",
"dnsbl.sorbs.net",
"zen.spamhaus.org",
"bl.mcafee.com",
"spam.spamrats.com",
]
tasks = asyncio.gather(
broker.find(types=["CONNECT:25"], dnsbl=dnsbl, limit=10),
save(proxies, filename="proxies.txt"),
)
loop = asyncio.get_event_loop()
loop.run_until_complete(tasks)
if __name__ == "__main__":
main()
"""Gather proxies from the providers without
checking and save them to a file."""
import asyncio
from proxybroker import Broker
async def save(proxies, filename):
"""Save proxies to a file."""
with open(filename, "w") as f:
while True:
proxy = await proxies.get()
if proxy is None:
break
f.write("%s:%d\n" % (proxy.host, proxy.port))
async def main():
proxies = asyncio.Queue()
broker = Broker(proxies)
await asyncio.gather(
broker.grab(countries=["US", "GB"], limit=10),
save(proxies, filename="proxies.txt"),
)
if __name__ == "__main__":
asyncio.run(main())
"""Run a local proxy server that distributes
incoming requests to external proxies."""
import asyncio
import aiohttp
from proxybroker import Broker
async def fetch(url, proxy_url):
resp = None
try:
async with (
aiohttp.ClientSession() as session,
session.get(url, proxy=proxy_url) as response,
):
resp = await response.json()
except (
aiohttp.errors.ClientOSError,
aiohttp.errors.ClientResponseError,
aiohttp.errors.ServerDisconnectedError,
) as e:
print("Error!\nURL: %s;\nError: %r" % (url, e))
finally:
return (url, resp)
async def get_pages(urls, proxy_url):
tasks = [fetch(url, proxy_url) for url in urls]
for task in asyncio.as_completed(tasks):
url, content = await task
print("%s\nDone!\nURL: %s;\nContent: %s" % ("-" * 20, url, content))
def main():
host, port = "127.0.0.1", 8888 # by default
types = [("HTTP", "High"), "HTTPS", "CONNECT:80"]
codes = [200, 301, 302]
broker = Broker(max_tries=1)
# Broker.serve() also supports all arguments that are accepted
# Broker.find() method: data, countries, post, strict, dnsbl.
# Note: serve() creates its own event loop
broker.serve(
host=host,
port=port,
types=types,
limit=10,
max_tries=3,
prefer_connect=True,
min_req_proxy=5,
max_error_rate=0.5,
max_resp_time=8,
http_allowed_codes=codes,
backlog=100,
)
if __name__ == "__main__":
main()