我们从Python开源项目中,提取了以下9个代码示例,用于说明如何使用mxnet.AttrScope()。
def bn(data, name, eps=1.001e-5, fix_gamma=False, use_global_stats=None): if use_global_stats is None: use_global_stats = cfg.get('bn_use_global_stats', False) if fix_gamma: with mx.AttrScope(lr_mult='0.', wd_mult='0.'): gamma = mx.sym.Variable('{}_gamma'.format(name)) beta = mx.sym.Variable('{}_beta'.format(name)) return mx.sym.BatchNorm(data=data, gamma=gamma, beta=beta, name=name, eps=eps, fix_gamma=True, use_global_stats=use_global_stats) else: lr_type = cfg.get('lr_type', 'torch') with _attr_scope_lr(lr_type, 'weight'): gamma = mx.sym.Variable('{}_gamma'.format(name)) with _attr_scope_lr(lr_type, 'bias'): beta = mx.sym.Variable('{}_beta'.format(name)) return mx.sym.BatchNorm(data=data, gamma=gamma, beta=beta, name=name, eps=eps, fix_gamma=False, use_global_stats=use_global_stats)
def encode(self, data: mx.sym.Symbol, data_length: Optional[mx.sym.Symbol], seq_len: int) -> Tuple[mx.sym.Symbol, mx.sym.Symbol, int]: """ Encodes data given sequence lengths of individual examples and maximum sequence length. :param data: Input data. :param data_length: Vector with sequence lengths. :param seq_len: Maximum sequence length. :return: Encoded versions of input data (data, data_length, seq_len). """ with mx.AttrScope(__layout__=C.TIME_MAJOR): return mx.sym.swapaxes(data=data, dim1=0, dim2=1), data_length, seq_len
def test_ctx_group(): with mx.AttrScope(ctx_group='stage1'): data = mx.symbol.Variable('data') fc1 = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=128) act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu") set_stage1 = set(act1.list_arguments()) with mx.AttrScope(ctx_group='stage2'): fc2 = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64) act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu") fc3 = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=10) fc3 = mx.symbol.BatchNorm(fc3) mlp = mx.symbol.SoftmaxOutput(data = fc3, name = 'softmax') set_stage2 = set(mlp.list_arguments()) - set_stage1 group2ctx = { 'stage1' : mx.cpu(1), 'stage2' : mx.cpu(2) } texec = mlp.simple_bind(mx.cpu(0), group2ctx=group2ctx, data=(1,200)) for arr, name in zip(texec.arg_arrays, mlp.list_arguments()): if name in set_stage1: assert arr.context == group2ctx['stage1'] else: assert arr.context == group2ctx['stage2']
def test_chain(): n = 2 data1 = mx.sym.Variable('data1') data2 = mx.sym.Variable('data2') with mx.AttrScope(ctx_group='dev1'): net = data1 + data2 net = net * 3 with mx.AttrScope(ctx_group='dev2'): net = net + data1 with mx.Context(mx.cpu(0)): shape = (4, 5) arr = [mx.nd.empty(shape) for i in range(n)] arr_grad = [mx.nd.empty(shape) for i in range(n)] exec1 = net.bind(mx.cpu(), args=arr, args_grad=arr_grad, group2ctx={'dev1': mx.cpu(0), 'dev2': mx.cpu(1)}) arr[0][:] = 1.0 arr[1][:] = 2.0 arr2 = [a.copyto(mx.cpu()) for a in arr] arr_grad2 = [a.copyto(mx.cpu()) for a in arr_grad] exec2 = net.bind(mx.cpu(), args=arr2, args_grad=arr_grad2) # Show the execution plan that involves copynode print(exec1.debug_str()) exec1.forward() exec2.forward() assert reldiff(exec1.outputs[0].asnumpy(), exec2.outputs[0].asnumpy()) < 1e-6 out_grad = mx.nd.empty(shape, mx.cpu(1)) out_grad[:] = 1.0 exec1.backward([out_grad]) exec2.backward([out_grad.copyto(mx.cpu())]) for a, b in zip(arr_grad, arr_grad2): assert reldiff(a.asnumpy(), b.asnumpy()) < 1e-6
def test_attr_basic(): with mx.AttrScope(group='4', data='great'): data = mx.symbol.Variable('data', attr={'dtype':'data', 'group': '1'}) gdata = mx.symbol.Variable('data2') assert gdata.attr('group') == '4' assert data.attr('group') == '1' data2 = pkl.loads(pkl.dumps(data)) assert data.attr('dtype') == data2.attr('dtype')
def test_operator(): data = mx.symbol.Variable('data') with mx.AttrScope(group='4', data='great'): fc1 = mx.symbol.Activation(data, act_type='relu') with mx.AttrScope(init_bias='0.0'): fc2 = mx.symbol.FullyConnected(fc1, num_hidden=10, name='fc2') assert fc1.attr('data') == 'great' assert fc2.attr('data') == 'great' assert fc2.attr('init_bias') == '0.0' fc2copy = pkl.loads(pkl.dumps(fc2)) assert fc2copy.tojson() == fc2.tojson() fc2weight = fc2.get_internals()['fc2_weight']
def _attr_scope_lr(lr_type, lr_owner): assert lr_type in ('alex', 'alex10', 'torch') # weight (lr_mult, wd_mult); bias; # 1, 1; 2, 0; if lr_type == 'alex': if lr_owner == 'weight': return mx.AttrScope() elif lr_owner == 'bias': return mx.AttrScope(lr_mult='2.', wd_mult='0.') else: assert False # 10, 1; 20, 0; if lr_type == 'alex10': if lr_owner == 'weight': return mx.AttrScope(lr_mult='10.', wd_mult='1.') elif lr_owner == 'bias': return mx.AttrScope(lr_mult='20.', wd_mult='0.') else: assert False # 0, 0; 0, 0; # so apply this to both if lr_type == 'fixed': assert lr_owner in ('weight', 'bias') return mx.AttrScope(lr_mult='0.', wd_mult='0.') # 1, 1; 1, 1; # so do nothing return mx.AttrScope()
def get_dssm(): doc_pos = mx.sym.Variable('doc_pos') doc_neg = mx.sym.Variable('doc_neg') data_usr = mx.sym.Variable("data_usr", stype='csr') #with mx.AttrScope(ctx_group="cpu"): w_usr = mx.sym.Variable('usr_weight', stype='row_sparse', shape=(USR_NUM, OUT_DIM)) # shared weights w1 = mx.sym.Variable('fc1_doc_weight') w2 = mx.sym.Variable('fc2_doc_weight') w3 = mx.sym.Variable('fc3_doc_weight') b1 = mx.sym.Variable('fc1_doc_bias') b2 = mx.sym.Variable('fc2_doc_bias') b3 = mx.sym.Variable('fc3_doc_bias') def cosine(usr, doc): dot = usr * doc dot = mx.sym.sum_axis(dot, axis=1) return dot def doc_mlp(data): fc1 = mx.sym.FullyConnected(data=data, num_hidden=num_hidden, name='fc1', weight=w1, bias=b1) fc1 = mx.sym.Activation(data=fc1, act_type='relu') fc2 = mx.sym.FullyConnected(data=fc1, num_hidden=num_hidden, name='fc2', weight=w2, bias=b2) fc2 = mx.sym.Activation(data=fc2, act_type='relu') fc3 = mx.sym.FullyConnected(data=fc2, num_hidden=OUT_DIM, name='fc3', weight=w3, bias=b3) fc3 = mx.sym.Activation(data=fc3, act_type='relu') fc3 = mx.sym.L2Normalization(data=fc3) return fc3 # usr net #with mx.AttrScope(ctx_group="cpu"): usr1 = mx.sym.dot(data_usr, w_usr) usr = mx.sym.L2Normalization(data=usr1) # doc net mlp_pos = doc_mlp(doc_pos) mlp_neg = doc_mlp(doc_neg) cosine_pos = cosine(usr, mlp_pos) cosine_neg = cosine(usr, mlp_neg) exp = mx.sym.exp(data=(cosine_neg - cosine_pos)) pred = mx.sym.log1p(data=exp) out = mx.sym.MAERegressionOutput(data=pred, name='mae') return out
def lstm_unroll(num_lstm_layer, num_hidden, dropout=0., concat_decode=True, use_loss=False): """unrolled lstm network""" with mx.AttrScope(ctx_group='decode'): cls_weight = mx.sym.Variable("cls_weight") cls_bias = mx.sym.Variable("cls_bias") param_cells = [] last_states = [] for i in range(num_lstm_layer): with mx.AttrScope(ctx_group='layer%d' % i): param_cells.append(LSTMParam(i2h_weight = mx.sym.Variable("l%d_i2h_weight" % i), i2h_bias = mx.sym.Variable("l%d_i2h_bias" % i), h2h_weight = mx.sym.Variable("l%d_h2h_weight" % i), h2h_bias = mx.sym.Variable("l%d_h2h_bias" % i))) state = LSTMState(c=mx.sym.Variable("l%d_init_c" % i), h=mx.sym.Variable("l%d_init_h" % i)) last_states.append(state) # stack LSTM hidden = mx.sym.SliceChannel(data=mx.sym.Variable("data"), num_outputs=MAX_LEN, squeeze_axis=0) for i in range(num_lstm_layer): next_hidden = [] for t in range(MAX_LEN): with mx.AttrScope(ctx_group='layer%d' % i): next_state = lstm(n_hidden, indata=hidden[t], prev_state=last_states[i], param=param_cells[i], layeridx=i, dropout=0.) next_hidden.append(next_state.h) last_states[i] = next_state hidden = next_hidden[:] sm = [] labels = mx.sym.SliceChannel(data=mx.sym.Variable("labels"), num_outputs=MAX_LEN, squeeze_axis=0) for t in range(MAX_LEN): fc = mx.sym.FullyConnected(data=hidden[t], weight=cls_weight, bias=cls_bias, num_hidden=n_classes) sm.append(mx.sym.softmax_cross_entropy(fc, labels[t], name="sm")) for i in range(num_lstm_layer): state = last_states[i] state = LSTMState(c=mx.sym.BlockGrad(state.c, name="l%d_last_c" % i), h=mx.sym.BlockGrad(state.h, name="l%d_last_h" % i)) last_states[i] = state unpack_c = [state.c for state in last_states] unpack_h = [state.h for state in last_states] list_all = sm + unpack_c + unpack_h return mx.sym.Group(list_all)