# ---- download.py ---- import json import logging import os from pathlib import Path from urllib.request import urlopen, Request logger = logging.getLogger(__name__) types = {'image/jpeg', 'image/png'} def get_links(client_id): headers = {'Authorization': 'Client-ID {}'.format(client_id)} req = Request('https://api.imgur.com/3/gallery/random/random/', headers=headers, method='GET') with urlopen(req) as resp: data = json.loads(resp.read().decode('utf-8')) return [item['link'] for item in data['data'] if 'type' in item and item['type'] in types] def download_link(directory, link): download_path = directory / os.path.basename(link) with urlopen(link) as image, download_path.open('wb') as f: f.write(image.read()) logger.info('Downloaded %s', link) def setup_download_dir(): download_dir = Path('images') if not download_dir.exists(): download_dir.mkdir() return download_dir # ---- single.py ---- import logging import os from time import time from download import setup_download_dir, get_links, download_link logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) def main(): ts = time() client_id = os.getenv('IMGUR_CLIENT_ID') if not client_id: raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!") download_dir = setup_download_dir() links = get_links(client_id) for link in links: download_link(download_dir, link) logging.info('Took %s seconds', time() - ts) if __name__ == '__main__': main() # ---- single2.py ---- import logging import os from queue import Queue from threading import Thread from time import time from download import setup_download_dir, get_links, download_link logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) class DownloadWorker(Thread): def __init__(self, queue): Thread.__init__(self) self.queue = queue def run(self): while True: # Get the work from the queue and expand the tuple directory, link = self.queue.get() try: download_link(directory, link) finally: self.queue.task_done() def main(): ts = time() client_id = os.getenv('IMGUR_CLIENT_ID') if not client_id: raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!") download_dir = setup_download_dir() links = get_links(client_id) # Create a queue to communicate with the worker threads queue = Queue() # Create 8 worker threads for x in range(8): worker = DownloadWorker(queue) # Setting daemon to True will let the main thread exit even though the workers are blocking worker.daemon = True worker.start() # Put the tasks into the queue as a tuple for link in links: logger.info('Queueing {}'.format(link)) queue.put((download_dir, link)) # Causes the main thread to wait for the queue to finish processing all the tasks queue.join() logging.info('Took %s', time() - ts) if __name__ == '__main__': main()
Public https://www.toptal.com/python/beginners-guide-to-concurrency-and-parallelism-in-python
Type hints help humans and linters (like mypy
) to understand what to expect "in" and "out" for a function. Not only it serves as a documentation for others (and you after some time, when the code is wiped from your "brain cache"), but also allows using automated tools to find type errors.
This code is not really needed or may be simplified
return [item['link'] for item in data['data'] if item.get('type') in types]
print()
is a nice way to output to stdout. But one day you'll need to not only write to stdout, but also, say, to a file. Another day you'll need to output only severe errors' messages, and nothing else. This all could be solved if using logging
module. Usually it's as easy as from logging import getLogger; log = getLogger(__name__)
.
This code is not really needed or may be simplified
download_dir.mkdir(exist_ok=True)
Exceptions should be easy to catch. If your code throws only Exception
or ValueError
, then it's very hard to catch specific errors, because all thrown exception classes are the same. Create application-specific exceptions, so that every logical error has its own exception class: class VerySpecificException(Exception): pass
Calling super()
is a well-readable way to show that you want to call super method. If you want to call direct parent's method, just use super().method(args)
. Isn't it beautiful?
Developers always write code without bugs. So there's no chance of writing a code that will never exit infinite loop, right? However, if it happens anyway and your program ends up in an infinite loop, it won't just break and raise an exception. It will get stuck, become unresponsive, and maybe will eat all memory if you allocate something inside the loop. Very dangerous, but easy to avoid - use for
loops with iteration limit: for _ in range(10_000): <your code> else: raise LoopError('Ran code 10k times and didn't break from loop, something went terribly wrong')
Create new review request