1 Star 0 Fork 4

Anders221/project_3408244

forked from Carl-Xie/AutodiffEngine 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
autodiff.py 18.78 KB
一键复制 编辑 原始数据 按行查看 历史
Carl-Xie 提交于 2018-03-18 20:33 . 迁移
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610
import numpy as np
class Node(object):
"""Node in a computation graph."""
def __init__(self):
"""Constructor, new node is indirectly created by Op object __call__ method.
Instance variables
------------------
self.inputs: the list of input nodes.
self.op: the associated op object,
e.g. add_op object if this node is created by adding two other nodes.
self.const_attr: the add or multiply constant,
e.g. self.const_attr=5 if this node is created by x+5.
self.name: node name for debugging purposes.
"""
self.inputs = []
self.op = None
self.const_attr = None
self.name = ""
def __add__(self, other):
"""Adding two nodes return a new node."""
if isinstance(other, Node):
new_node = add_op(self, other)
else:
# Add by a constant stores the constant in the new node's const_attr field.
# 'other' argument is a constant
new_node = add_byconst_op(self, other)
return new_node
def __mul__(self, other):
if isinstance(other, Node):
new_node = mul_op(self, other)
else:
new_node = mul_byconst_op(self, other)
return new_node
def __truediv__(self, other):
if isinstance(other, Node):
new_node = div_op(self, other)
else:
new_node = div_byconst_op(self, other)
return new_node
def __rtruediv__(self, other):
if isinstance(other, Node):
new_node = div_op(self, other)
else:
new_node = rdiv_byconst_op(self, other)
return new_node
def __sub__(self, other):
if isinstance(other, Node):
new_node = sub_op(self, other)
else:
new_node = sub_byconst_op(self, other)
return new_node
def __rsub__(self, other):
if isinstance(other, Node):
new_node = sub_op(self, other)
else:
new_node = rsub_byconst_op(self, other)
return new_node
def __neg__(self):
return neg_op(self)
# Allow left-hand-side add and multiply.
__radd__ = __add__
__rmul__ = __mul__
def __str__(self):
"""Allow print to display node name."""
return self.name
def Variable(name):
"""User defined variables in an expression.
e.g. x = Variable(name = "x")
"""
placeholder_node = placeholder_op()
placeholder_node.name = name
return placeholder_node
class Op(object):
"""Op represents operations performed on nodes."""
def __call__(self):
"""Create a new node and associate the op object with the node.
Returns
-------
The new node object.
"""
new_node = Node()
new_node.op = self
return new_node
def compute(self, node, input_vals):
"""Given values of input nodes, compute the output value.
Parameters
----------
node: node that performs the compute.
input_vals: values of input nodes.
Returns
-------
An output value of the node.
"""
assert False, "Implemented in subclass"
def gradient(self, node, output_grad):
"""Given value of output gradient, compute gradient contributions to each input node.
Parameters
----------
node: node that performs the gradient.
output_grad: value of output gradient summed from children nodes' contributions
Returns
-------
A list of gradient contributions to each input node respectively.
"""
assert False, "Implemented in subclass"
class NegOp(Op):
def __call__(self, node):
new_node = Op.__call__(self)
new_node.inputs = [node]
new_node.name = "-%s" % node.name
return new_node
def compute(self, node, input_vals):
assert len(input_vals) == 1
return -input_vals[0]
def gradient(self, node, output_grad):
return [-output_grad]
class AddOp(Op):
"""Op to element-wise add two nodes."""
def __call__(self, node_A, node_B):
new_node = Op.__call__(self)
new_node.inputs = [node_A, node_B]
new_node.name = "(%s+%s)" % (node_A.name, node_B.name)
return new_node
def compute(self, node, input_vals):
"""Given values of two input nodes, return result of element-wise addition."""
assert len(input_vals) == 2
return input_vals[0] + input_vals[1]
def gradient(self, node, output_grad):
"""Given gradient of add node, return gradient contributions to each input."""
return [output_grad, output_grad]
class SubOp(Op):
def __call__(self, node_A, node_B):
new_node = Op.__call__(self)
new_node.inputs = [node_A, node_B]
new_node.name = "%s-%s" % (node_A.name, node_B.name)
return new_node
def compute(self, node, input_vals):
assert len(input_vals) == 2
return input_vals[0] - input_vals[1]
def gradient(self, node, output_grad):
return [output_grad, -output_grad]
class AddByConstOp(Op):
"""Op to element-wise add a nodes by a constant."""
def __call__(self, node_A, const_val):
new_node = Op.__call__(self)
new_node.const_attr = const_val
new_node.inputs = [node_A]
new_node.name = "(%s+%s)" % (node_A.name, str(const_val))
return new_node
def compute(self, node, input_vals):
"""Given values of input node, return result of element-wise addition."""
assert len(input_vals) == 1
return input_vals[0] + node.const_attr
def gradient(self, node, output_grad):
"""Given gradient of add node, return gradient contribution to input."""
return [output_grad]
class SubByConstOp(Op):
def __call__(self, node_A, const_val):
new_node = Op.__call__(self)
new_node.const_attr = const_val
new_node.inputs = [node_A]
new_node.name = "(%s-%s)" % (node_A.name, str(const_val))
return new_node
def compute(self, node, input_vals):
assert len(input_vals) == 1
return input_vals[0] - node.const_attr
def gradient(self, node, output_grad):
return [output_grad]
class RSubByConstOp(Op):
def __call__(self, node_A, const_val):
new_node = Op.__call__(self)
new_node.const_attr = const_val
new_node.inputs = [node_A]
new_node.name = "(%s-%s)" % (str(const_val), node_A.name)
return new_node
def compute(self, node, input_vals):
assert len(input_vals) == 1
return node.const_attr - input_vals[0]
def gradient(self, node, output_grad):
return [-output_grad]
class MulOp(Op):
"""Op to element-wise multiply two nodes."""
def __call__(self, node_A, node_B):
new_node = Op.__call__(self)
new_node.inputs = [node_A, node_B]
new_node.name = "(%s*%s)" % (node_A.name, node_B.name)
return new_node
def compute(self, node, input_vals):
"""Given values of two input nodes, return result of element-wise multiplication."""
assert len(input_vals) == 2
return input_vals[0] * input_vals[1]
def gradient(self, node, output_grad):
"""Given gradient of multiply node, return gradient contributions to each input."""
return [node.inputs[1] * output_grad, node.inputs[0] * output_grad]
class DivOp(Op):
def __call__(self, node_A, node_B):
new_node = Op.__call__(self)
new_node.inputs = [node_A, node_B]
new_node.name = "%s/%s" % (node_A.name, node_B.name)
return new_node
def compute(self, node, input_vals):
assert len(input_vals) == 2
return input_vals[0] / input_vals[1]
def gradient(self, node, output_grad):
return [output_grad / node.inputs[1], -output_grad * node.inputs[0] / (node.inputs[1] * node.inputs[1])]
class DivByConstOp(Op):
def __call__(self, node_A, const_val):
new_node = Op.__call__(self)
new_node.inputs = [node_A]
new_node.const_attr = const_val
new_node.name = "%s/%s" % (node_A.name, str(const_val))
return new_node
def compute(self, node, input_vals):
assert len(input_vals) == 1
return input_vals[0] / node.const_attr
def gradient(self, node, output_grad):
return [output_grad / node.const_attr]
class RDivByConstOp(Op):
def __call__(self, node_A, const_val):
new_node = Op.__call__(self)
new_node.inputs = [node_A]
new_node.const_attr = const_val
new_node.name = "%s/%s" % (str(const_val), node_A.name)
return new_node
def compute(self, node, input_vals):
assert len(input_vals) == 1
return node.const_attr / input_vals[0]
def gradient(self, node, output_grad):
return [-output_grad * node.const_attr / (node.inputs[0] * node.inputs[0])]
class MulByConstOp(Op):
"""Op to element-wise multiply a nodes by a constant."""
def __call__(self, node_A, const_val):
new_node = Op.__call__(self)
new_node.const_attr = const_val
new_node.inputs = [node_A]
new_node.name = "(%s*%s)" % (node_A.name, str(const_val))
return new_node
def compute(self, node, input_vals):
"""Given values of input node, return result of element-wise multiplication."""
"""TODO: Your code here"""
assert len(input_vals) == 1
return input_vals[0] * node.const_attr
def gradient(self, node, output_grad):
"""Given gradient of multiplication node, return gradient contribution to input."""
"""TODO: Your code here"""
return [output_grad * node.const_attr]
class MatMulOp(Op):
"""Op to matrix multiply two nodes."""
def __call__(self, node_A, node_B, trans_A=False, trans_B=False):
"""Create a new node that is the result a matrix multiple of two input nodes.
Parameters
----------
node_A: lhs of matrix multiply
node_B: rhs of matrix multiply
trans_A: whether to transpose node_A
trans_B: whether to transpose node_B
Returns
-------
Returns a node that is the result a matrix multiple of two input nodes.
"""
new_node = Op.__call__(self)
new_node.matmul_attr_trans_A = trans_A
new_node.matmul_attr_trans_B = trans_B
new_node.inputs = [node_A, node_B]
new_node.name = "MatMul(%s,%s,%s,%s)" % (node_A.name, node_B.name, str(trans_A), str(trans_B))
return new_node
def compute(self, node, input_vals):
"""Given values of input nodes, return result of matrix multiplication."""
mat_A = input_vals[0]
mat_B = input_vals[1]
if node.matmul_attr_trans_A:
mat_A = mat_A.T
if node.matmul_attr_trans_B:
mat_B = mat_B.T
return np.matmul(mat_A, mat_B)
def gradient(self, node, output_grad):
"""Given gradient of multiply node, return gradient contributions to each input.
Useful formula: if Y=AB, then dA=dY B^T, dB=A^T dY
"""
return [matmul_op(output_grad, node.inputs[1], False, True),
matmul_op(node.inputs[0], output_grad, True, False)]
class PlaceholderOp(Op):
"""Op to feed value to a nodes."""
def __call__(self):
"""Creates a variable node."""
new_node = Op.__call__(self)
return new_node
def compute(self, node, input_vals):
"""No compute function since node value is fed directly in Executor."""
assert False, "placeholder values provided by feed_dict"
def gradient(self, node, output_grad):
"""No gradient function since node has no inputs."""
return None
class ZerosLikeOp(Op):
"""Op that represents a constant np.zeros_like."""
def __call__(self, node_A):
"""Creates a node that represents a np.zeros array of same shape as node_A."""
new_node = Op.__call__(self)
new_node.inputs = [node_A]
new_node.name = "Zeroslike(%s)" % node_A.name
return new_node
def compute(self, node, input_vals):
"""Returns zeros_like of the same shape as input."""
assert(isinstance(input_vals[0], np.ndarray))
return np.zeros(input_vals[0].shape)
def gradient(self, node, output_grad):
return [zeroslike_op(node.inputs[0])]
class OnesLikeOp(Op):
"""Op that represents a constant np.ones_like."""
def __call__(self, node_A):
"""Creates a node that represents a np.ones array of same shape as node_A."""
new_node = Op.__call__(self)
new_node.inputs = [node_A]
new_node.name = "Oneslike(%s)" % node_A.name
return new_node
def compute(self, node, input_vals):
"""Returns ones_like of the same shape as input."""
assert(isinstance(input_vals[0], np.ndarray))
return np.ones(input_vals[0].shape)
def gradient(self, node, output_grad):
return [zeroslike_op(node.inputs[0])]
class LogOp(Op):
def __call__(self, node):
new_node = Op.__call__(self)
new_node.inputs = [node]
new_node.name = "log(%s)" % node.name
return new_node
def compute(self, node, input_vals):
assert len(input_vals) == 1
return np.log(input_vals[0])
def gradient(self, node, output_grad):
return [output_grad / node.inputs[0]]
class ExpOp(Op):
def __call__(self, node):
new_node = Op.__call__(self)
new_node.inputs = [node]
new_node.name = "exp(%s)" % node.name
return new_node
def compute(self, node, input_vals):
assert len(input_vals) == 1
return np.exp(input_vals[0])
def gradient(self, node, output_grad):
return [output_grad * exp_op(node.inputs[0])]
class ReduceSumOp(Op):
def __call__(self, node):
new_node = Op.__call__(self)
new_node.inputs = [node]
new_node.name = "reduce_sum(%s)" % node.name
return new_node
def compute(self, node, input_vals):
assert isinstance(input_vals[0], np.ndarray)
return np.sum(input_vals[0])
def gradient(self, node, output_grad):
return [output_grad * oneslike_op(node.inputs[0])]
# Create global singletons of operators.
add_op = AddOp()
mul_op = MulOp()
div_op = DivOp()
sub_op = SubOp()
neg_op = NegOp()
add_byconst_op = AddByConstOp()
rsub_byconst_op = RSubByConstOp()
sub_byconst_op = SubByConstOp()
mul_byconst_op = MulByConstOp()
div_byconst_op = DivByConstOp()
rdiv_byconst_op = RDivByConstOp()
matmul_op = MatMulOp()
placeholder_op = PlaceholderOp()
oneslike_op = OnesLikeOp()
zeroslike_op = ZerosLikeOp()
log_op = LogOp()
exp_op = ExpOp()
reduce_sum = ReduceSumOp()
def exp(val):
if isinstance(val, Node):
return exp_op(val)
return np.exp(val)
def log(val):
if isinstance(val, Node):
return log_op(val)
return np.log(val)
class Executor:
"""Executor computes values for a given subset of nodes in a computation graph."""
def __init__(self, eval_node_list):
"""
Parameters
----------
eval_node_list: list of nodes whose values need to be computed.
"""
self.eval_node_list = eval_node_list
def run(self, feed_dict):
"""Computes values of nodes in eval_node_list given computation graph.
Parameters
----------
feed_dict: list of variable nodes whose values are supplied by user.
Returns
-------
A list of values for nodes in eval_node_list.
"""
node_to_val_map = dict(feed_dict)
# Traverse graph in topological sort order and compute values for all nodes.
topo_order = find_topo_sort(self.eval_node_list)
for node in topo_order:
if isinstance(node.op, PlaceholderOp):
continue
vals = [node_to_val_map[n] for n in node.inputs]
compute_val = node.op.compute(node, vals)
node_to_val_map[node] = compute_val if isinstance(compute_val, np.ndarray) else np.array(compute_val)
# Collect node values.
node_val_results = [node_to_val_map[node] for node in self.eval_node_list]
return node_val_results
def gradients(output_node, node_list):
"""Take gradient of output node with respect to each node in node_list.
Parameters
----------
output_node: output node that we are taking derivative of.
node_list: list of nodes that we are taking derivative wrt.
Returns
-------
A list of gradient values, one for each node in node_list respectively.
"""
# a map from node to a list of gradient contributions from each output node
node_to_output_grads_list = {}
# Special note on initializing gradient of output_node as oneslike_op(output_node):
# We are really taking a derivative of the scalar reduce_sum(output_node)
# instead of the vector output_node. But this is the common case for loss function.
node_to_output_grads_list[output_node] = [oneslike_op(output_node)]
# a map from node to the gradient of that node
node_to_output_grad = {}
# Traverse graph in reverse topological order given the output_node that we are taking gradient wrt.
reverse_topo_order = reversed(find_topo_sort([output_node]))
for node in reverse_topo_order:
grad = sum_node_list(node_to_output_grads_list[node])
node_to_output_grad[node] = grad
for i in range(len(node.inputs)):
ch = node.inputs[i]
grads = node.op.gradient(node, grad)
grads_list = node_to_output_grads_list.get(ch, [])
grads_list.append(grads[i])
node_to_output_grads_list[ch] = grads_list
# Collect results for gradients requested.
grad_node_list = [node_to_output_grad[node] for node in node_list]
return grad_node_list
##############################
####### Helper Methods #######
##############################
def find_topo_sort(node_list):
"""Given a list of nodes, return a topological sort list of nodes ending in them.
A simple algorithm is to do a post-order DFS traversal on the given nodes,
going backwards based on input edges. Since a node is added to the ordering
after all its predecessors are traversed due to post-order DFS, we get a topological
sort.
"""
visited = set()
topo_order = []
for node in node_list:
topo_sort_dfs(node, visited, topo_order)
return topo_order
def topo_sort_dfs(node, visited, topo_order):
"""Post-order DFS"""
if node in visited:
return
visited.add(node)
for n in node.inputs:
topo_sort_dfs(n, visited, topo_order)
topo_order.append(node)
def sum_node_list(node_list):
"""Custom sum function in order to avoid create redundant nodes in Python sum implementation."""
from operator import add
from functools import reduce
return reduce(add, node_list)
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/Anders221/AutodiffEngine.git
[email protected]:Anders221/AutodiffEngine.git
Anders221
AutodiffEngine
project_3408244
master

搜索帮助