使用@classmethod的类方法实现更通用的代码(书)

import os
from threading import Thread

# abstract_class
class InputData(object):
    def read(self):
        raise NotImplementedError


# abstract_class
class Worker(object):
    def map(self):
        raise NotImplementedError

    def reduce(self, other):
        raise NotImplementedError


# 每个文件的读取
class PathInputData(InputData):
    def __init__(self, path):
        super(PathInputData, self).__init__()
        self.path = path

    def read(self):
        return open(self.path, "r").read()


# 创建工人类
class LineCountWorker(Worker):
    def __init__(self, input_data):
        self.input_data = input_data
        self.result = 0

    def map(self):
        data = self.input_data.read()
        self.result = data.count('1')

    def reduce(self, other):
        self.result += other.result


# 传入文件夹目录,返回(相当于)PathInputData类的实例对象的容器
def generate_inputs(data_dir):
    for name in os.listdir(data_dir):
        yield PathInputData(os.path.join(data_dir, name))


# 传入装有PathInputData类的实例对象的容器,返回装有工人类实例的容器
def create_workers(input_list):
    workers = []
    for input_data in input_list:
        workers.append(LineCountWorker(input_data))
    return workers


def execute(workers):
    threads = [Thread(target=w.map) for w in workers]
    for thread in threads: thread.start()
    for thread in threads: thread.join()
    first, rest = workers[0], workers[1:]
    for worker in rest:
        first.reduce(worker)
    return first.result


def mapreduce(data_dir):
    inputs = generate_inputs(data_dir)  # 返回(相当于)PathInputData类的实例对象的容器
    workers = create_workers(inputs)  # 返回装有工人类实例的容器
    return execute(workers)  # 执行


if __name__ == "__main__":
    print(mapreduce("e:/ddd/"))  # 计算有几个"1"

import os
from threading import Thread

# abstract_class
class GenerateInputData(object):
    def read(self):
        raise NotImplementedError

    @classmethod
    def generate_inputs(cls, config):
        raise NotImplementedError


# abstract_class
class GenerateWorker(object):
    def map(self):
        raise NotImplementedError

    def reduce(self, other):
        raise NotImplementedError

    @classmethod
    def create_workers(cls, input_class, config):
        raise NotImplementedError


class PathInputData(GenerateInputData):
    def __init__(self, path):
        self.path = path

    def read(self):
        return open(self.path).read()

    @classmethod
    def generate_inputs(cls, config):
        data_dir = config['data_dir']
        for name in os.listdir(data_dir):
            yield cls(os.path.join(data_dir, name))


class LineCountWorker(GenerateWorker):
    def __init__(self, input_data):
        self.input_data = input_data
        self.result = 0

    def map(self):
        data = self.input_data.read()
        self.result = data.count('1')

    def reduce(self, other):
        self.result += other.result

    @classmethod
    def create_workers(cls, input_class, config):
        workers = []
        for input_data in input_class.generate_inputs(config):
            workers.append(cls(input_data))
        return workers


def execute(workers):
    threads = [Thread(target=w.map) for w in workers]
    for thread in threads: thread.start()
    for thread in threads: thread.join()
    first, rest = workers[0], workers[1:]
    for worker in rest:
        first.reduce(worker)
    return first.result


def mapreduce(work_class, input_class, config):
    workers = work_class.create_workers(input_class, config)
    return execute(workers)


if __name__ == "__main__":
    config = {'data_dir': 'e:/ddd/'}
    print(mapreduce(LineCountWorker, PathInputData, config))

posted @ 2018-03-26 20:47  lilied  阅读(155)  评论(0编辑  收藏  举报