dataflow_optimizer/dnn_optimizer.py at master · YuFengUofR/dataflow_optimizer · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
#!/usr/bin/python2.7
import numpy as np
import scipy
import sys


# import my own modules
import layer_optimizer
import layer_static_method
import layer_exhaustive_searcher
import deconv_exhaustive_searcher

import layer3d_optimizer
import layer3d_exhaustive_searcher

method = None
buffer_partition = None
enable = {
    "static" : False,
    "combine" : False,
    "split" : False,
}

def setup(meta_data, hardware_constraints):
    global enable, method, buffer_partition
    # define the search method
    method = meta_data["method"]

    if meta_data["schedule"]["static"] and \
        "buffer_partition" not in meta_data:
        raise Exception("The static scheduling is not supported"
            " without specifying the buffer partition.")

    if "buffer_partition" in meta_data:
        buffer_partition = meta_data["buffer_partition"]

    # set the schedule policy
    enable["static"] = meta_data["schedule"]["static"]
    enable["combine"] = meta_data["schedule"]["combine"]
    enable["split"] = meta_data["schedule"]["split"]

def single_layer_optimization(data, sys_info):
    global method, enable, buffer_partition
    # if "static" option is enabled, it will be prioritized
    if enable["static"]:
      return layer_static_method.\
          LayerStaticMethod(data, sys_info, buffer_partition).optimize()

    # check the potential method we use here.
    if method == "Constrained":
        if data["type"] == "2D":
            return layer_optimizer.\
                LayerOptimizer(data, sys_info).optimize()
        else:
            return layer3d_optimizer.\
                Layer3dOptimizer(data, sys_info).optimize()
    elif method == "Exhaustive":
        if data["type"] == "2D":
            return layer_exhaustive_searcher.\
                LayerExhaustiveSearcher(data, sys_info).optimize()
        else:
            return layer3d_exhaustive_searcher.\
                Layer3dExhaustiveSearcher(data, sys_info).optimize()
    elif method == "Combined":
        return layer_optimizer.\
            LayerOptimizer(data, sys_info, True).optimize()
    else:
        raise Exception("Unknown search method: {}".format(method))

def single_combine_optimization(data, sys_info):
    global method
    if method == "Constrained":
      return layer_optimizer.\
            LayerOptimizer(data, sys_info).optimize()
    elif method == "Exhaustive":
        return deconv_exhaustive_searcher.\
            DeconvExhaustiveSearcher(data, sys_info).optimize()
    elif method == "Combined":
        return layer_optimizer.\
            LayerOptimizer(data, sys_info, True).optimize()
    else:
        raise Exception("Unknown search method: {}".format(method))

def sub_kernel_sizes(layer):
    add_one = [(i+1)/2 for i in layer["kernel"]]
    sub_one = [i/2 for i in layer["kernel"]]

    sizes = [[]]
    for i in range(len(layer["kernel"])):
      tmp = []
      for j in sizes:
        e1 = list(j) + [add_one[i]]
        e2 = list(j) + [sub_one[i]]
        tmp += [e1, e2]

      sizes = tmp

    return sizes

def single_split_optimization(layer, sys_info):
    subs = []

    # iterate all possible sub_kernels
    for sub_size in sub_kernel_sizes(layer):
        sub = dict(layer)
        sub["kernel"] = sub_size
        subs.append(single_layer_optimization(sub, sys_info))

    return subs

def opti_deconv(layer, sys_info):
    global method, enable
    # collect individual result from sub_kernels
    subs = []

    # if the convolution size is odd;
    if layer["kernel"][0]%2 == 1:
        if enable["combine"]:
            subs.append(single_combine_optimization(layer, sys_info))
        else:
            subs = single_split_optimization(layer, sys_info)
    # if the convolution size is even;
    else:
        sub = dict(layer)
        sub["kernel"][0] = sub["kernel"][0]/2
        sub["kernel"][1] = sub["kernel"][1]/2
        if enable_combine:
            # this will consider four same-size sub-kernels
            # as one sub-kernel with more channels
            sub["out_channel"] = sub["out_channel"]*4
            subs.append(single_layer_optimization(sub4, sys_info))
        else:
            # without combining sub-kernels
            res = single_layer_optimization(sub, sys_info)
            # times 4 of each individual sub-kernel"s
            # memory traffic and cycles.
            res["total_traffic"] = res["total_traffic"]*4
            res["total_cycle"] = res["total_cycle"]*4
            subs.append(res)

    return subs

# the main routine of optimizing the dnn.
def opti_dnn(meta_data, hardware_constraints):
    # set up the configurations;
    setup(meta_data, hardware_constraints)
    dnn = meta_data["dnn"]
    sys_info = meta_data["system_info"]

    results = []

    # optimize for each layer
    for i in range(len(dnn)):
        layer = dnn[i]
        # start to optimize ordinary Conv layer.
        data = dict(layer)

        # check if this layer is Deconv, True == YES
        if layer["Deconv?"] == True:
            if enable["split"]:
                # if split the deconv into smaller ones
                results.append({
                        "data" : data,
                        "result" : opti_deconv(layer, sys_info)
                        })
            else:
                data["ofmap"] = [0] * len(data["ifmap"])
                # scale up the ifmap to the ifmap based on the stride size.
                for i in range(len(data["ifmap"])-1):
                    data["ifmap"][i] = layer["ifmap"][i]*2/layer["stride"]
                    data["ofmap"][j] = layer["ifmap"][j]/layer["stride"]

                # the last element is ofmap channel, so treat it separately
                data["ofmap"][-1] = data["out_channel"]

                # add the result
                results.append({
                        "data" : data,
                        "result" : single_layer_optimization(data, sys_info)
                        })
        else:
            data["ofmap"] = [0] * len(data["ifmap"])
            # scale down the ifmap to the ifmap based on the stride size.
            for j in range(len(data["ifmap"])-1):
                data["ofmap"][j] = layer["ifmap"][j]/layer["stride"]

            # the last element is ofmap channel, so treat it separately
            data["ofmap"][-1] = data["out_channel"]

            results.append({
                        "data" : data,
                        "result" : single_layer_optimization(data, sys_info)
                        })

    return results