# ---- download.py ----
import json
import logging
import os
from pathlib import Path
from urllib.request import urlopen, Request

logger = logging.getLogger(__name__)

types = {'image/jpeg', 'image/png'}


def get_links(client_id):
    headers = {'Authorization': 'Client-ID {}'.format(client_id)}
    req = Request('https://api.imgur.com/3/gallery/random/random/', headers=headers, method='GET')
    with urlopen(req) as resp:
        data = json.loads(resp.read().decode('utf-8'))
    return [item['link'] for item in data['data'] if 'type' in item and item['type'] in types]


def download_link(directory, link):
    download_path = directory / os.path.basename(link)
    with urlopen(link) as image, download_path.open('wb') as f:
        f.write(image.read())
    logger.info('Downloaded %s', link)


def setup_download_dir():
    download_dir = Path('images')
    if not download_dir.exists():
        download_dir.mkdir()
    return download_dir


# ---- single.py ----
import logging
import os
from time import time

from download import setup_download_dir, get_links, download_link

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = get_links(client_id)
    for link in links:
        download_link(download_dir, link)
    logging.info('Took %s seconds', time() - ts)

if __name__ == '__main__':
    main()
    
    
# ---- single2.py ----
import logging
import os
from queue import Queue
from threading import Thread
from time import time

from download import setup_download_dir, get_links, download_link


logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

logger = logging.getLogger(__name__)


class DownloadWorker(Thread):

    def __init__(self, queue):
        Thread.__init__(self)
        self.queue = queue

    def run(self):
        while True:
            # Get the work from the queue and expand the tuple
            directory, link = self.queue.get()
            try:
                download_link(directory, link)
            finally:
                self.queue.task_done()


def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = get_links(client_id)
    # Create a queue to communicate with the worker threads
    queue = Queue()
    # Create 8 worker threads
    for x in range(8):
        worker = DownloadWorker(queue)
        # Setting daemon to True will let the main thread exit even though the workers are blocking
        worker.daemon = True
        worker.start()
    # Put the tasks into the queue as a tuple
    for link in links:
        logger.info('Queueing {}'.format(link))
        queue.put((download_dir, link))
    # Causes the main thread to wait for the queue to finish processing all the tasks
    queue.join()
    logging.info('Took %s', time() - ts)

if __name__ == '__main__':
    main()

 Public https://www.toptal.com/python/beginners-guide-to-concurrency-and-parallelism-in-python

Share a link to this review

5.22% issue ratio

R1 Missing type hints

Type hints help humans and linters (like mypy) to understand what to expect "in" and "out" for a function. Not only it serves as a documentation for others (and you after some time, when the code is wiped from your "brain cache"), but also allows using automated tools to find type errors.

L12 Redundant code / overengineering

This code is not really needed or may be simplified

Suggested change:
return [item['link'] for item in data['data'] if item.get('type') in types]
Nice!
You did a great job avoiding this case. Many developers don't.
O15 Using print()

print() is a nice way to output to stdout. But one day you'll need to not only write to stdout, but also, say, to a file. Another day you'll need to output only severe errors' messages, and nothing else. This all could be solved if using logging module. Usually it's as easy as from logging import getLogger; log = getLogger(__name__).

L12 Redundant code / overengineering

This code is not really needed or may be simplified

Suggested change:
download_dir.mkdir(exist_ok=True)
L39 Using generic exception

Exceptions should be easy to catch. If your code throws only Exception or ValueError, then it's very hard to catch specific errors, because all thrown exception classes are the same. Create application-specific exceptions, so that every logical error has its own exception class: class VerySpecificException(Exception): pass

L49 Not using super()

Calling super() is a well-readable way to show that you want to call super method. If you want to call direct parent's method, just use super().method(args). Isn't it beautiful?

O16 While True

Developers always write code without bugs. So there's no chance of writing a code that will never exit infinite loop, right? However, if it happens anyway and your program ends up in an infinite loop, it won't just break and raise an exception. It will get stuck, become unresponsive, and maybe will eat all memory if you allocate something inside the loop. Very dangerous, but easy to avoid - use for loops with iteration limit: for _ in range(10_000): <your code> else: raise LoopError('Ran code 10k times and didn't break from loop, something went terribly wrong')


Create new review request