-
Notifications
You must be signed in to change notification settings - Fork 131
Add R2 score function #253
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
from chainer.backends import cuda | ||
from chainer import function | ||
from chainer.utils import type_check | ||
|
||
|
||
class R2Score(function.Function): | ||
|
||
def __init__(self, sample_weight, multioutput, ignore_nan=False): | ||
if sample_weight is not None: | ||
raise NotImplementedError() | ||
if multioutput in ['uniform_average', 'raw_values']: | ||
self.multioutput = multioutput | ||
else: | ||
raise ValueError("invalid multioutput argument") | ||
self.ignore_nan = ignore_nan | ||
|
||
def check_type_forward(self, in_types): | ||
type_check.expect(in_types.size() == 2) | ||
pred_type, true_type = in_types | ||
|
||
type_check.expect( | ||
pred_type.dtype.kind == 'f', | ||
true_type.dtype.kind == 'f' | ||
) | ||
|
||
type_check.expect( | ||
pred_type.shape == true_type.shape, | ||
) | ||
|
||
def forward(self, inputs): | ||
xp = cuda.get_array_module(*inputs) | ||
pred, true = inputs | ||
diff = pred - true | ||
dev = true - xp.mean(true, axis=0) | ||
if self.ignore_nan: | ||
diff[xp.isnan(diff)] = 0. | ||
dev[xp.isnan(dev)] = 0. | ||
SS_res = xp.asarray( | ||
xp.sum(diff ** 2, axis=0)) | ||
SS_tot = xp.asarray( | ||
xp.sum(dev ** 2, axis=0)) | ||
SS_tot_iszero = SS_tot == 0 | ||
SS_tot[SS_tot_iszero] = 1 # Assign dummy value to avoid zero-division | ||
ret = xp.where( | ||
SS_tot_iszero, 0.0, 1 - SS_res / SS_tot).astype(pred.dtype) | ||
if self.multioutput == 'uniform_average': | ||
return xp.asarray(ret.mean()), | ||
elif self.multioutput == 'raw_values': | ||
return ret, | ||
|
||
|
||
def r2_score(pred, true, sample_weight=None, multioutput='uniform_average', | ||
ignore_nan=False): | ||
"""Computes R^2(coefficient of determination) regression score function. | ||
Args: | ||
pred(Variable): Variable holding a vector, matrix or tensor of | ||
estimated target values. | ||
true(Variable): Variable holding a vector, matrix or tensor of | ||
correct target values. | ||
sample_weight: This argument is for compatibility with scikit-learn's | ||
implementation of r2_score. Current implementation admits None | ||
only. | ||
multioutput(string): ['uniform_average', 'raw_values']. if | ||
'uniform_average', this function returns an average of R^2 | ||
score of multiple output. If 'raw_average', this function | ||
return a set of R^2 score of multiple output. | ||
Returns: | ||
Variable: A Variable holding a scalar array of the R^2 score if | ||
'multioutput' is 'uniform_average' or a vector of R^2 scores if | ||
'multioutput' is 'raw_values'. | ||
.. note:: This function is non-differentiable. | ||
""" | ||
return R2Score(sample_weight=sample_weight, | ||
multioutput=multioutput, ignore_nan=ignore_nan)(pred, true) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,8 @@ | ||
from chainer_chemistry.training.extensions import batch_evaluator # NOQA | ||
from chainer_chemistry.training.extensions import roc_auc_evaluator # NOQA | ||
from chainer_chemistry.training.extensions import r2_score_evaluator # NOQA | ||
|
||
# import class and function | ||
from chainer_chemistry.training.extensions.batch_evaluator import BatchEvaluator # NOQA | ||
from chainer_chemistry.training.extensions.roc_auc_evaluator import ROCAUCEvaluator # NOQA | ||
from chainer_chemistry.training.extensions.r2_score_evaluator import R2ScoreEvaluator # NOQA |
101 changes: 101 additions & 0 deletions
101
chainer_chemistry/training/extensions/r2_score_evaluator.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
from chainer.backends import cuda | ||
from chainer.dataset import convert | ||
|
||
from chainer_chemistry.training.extensions.batch_evaluator import BatchEvaluator # NOQA | ||
|
||
|
||
class R2ScoreEvaluator(BatchEvaluator): | ||
|
||
"""Evaluator with calculates R^2 (coefficient of determination) | ||
regression score. | ||
|
||
Args: | ||
iterator: Dataset iterator for the dataset to calculate | ||
R^2(coefficient of determination) regression score. | ||
It can also be a dictionary of iterators. If this is just an | ||
iterator, the iterator is registered by the name ``'main'``. | ||
target: Link object or a dictionary of links to evaluate. If this is | ||
just a link object, the link is registered by the name ``'main'``. | ||
converter: Converter function to build input arrays and true label. | ||
:func:`~chainer.dataset.concat_examples` is used by default. | ||
It is expected to return input arrays of the form | ||
`[x_0, ..., x_n, t]`, where `x_0, ..., x_n` are the inputs to | ||
the evaluation function and `t` is the true label. | ||
device: Device to which the training data is sent. Negative value | ||
indicates the host memory (CPU). | ||
eval_hook: Function to prepare for each evaluation process. It is | ||
called at the beginning of the evaluation. The evaluator extension | ||
object is passed at each call. | ||
eval_func: Evaluation function called at each iteration. The target | ||
link to evaluate as a callable is used by default. | ||
name (str): name of this extension. When `name` is None, | ||
`default_name='validation'` which is defined in super class | ||
`Evaluator` is used as extension name. This name affects to the | ||
reported key name. | ||
pos_labels (int or list): labels of the positive class, other classes | ||
are considered as negative. | ||
ignore_labels (int or list or None): labels to be ignored. | ||
`None` is used to not ignore all labels. | ||
raise_value_error (bool): If `False`, `ValueError` caused by | ||
`roc_auc_score` calculation is suppressed and ignored with a | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. memo: change this docstring |
||
warning message. | ||
logger: | ||
sample_weight: This argument is for compatibility with | ||
scikit-learn's implementation of r2_score. Current | ||
implementation admits None only. | ||
multioutput (str): If 'uniform_average', this function returns an | ||
average of R^2 score of multiple output. If 'raw_average', this | ||
function return a set of R^2 score of multiple output. | ||
|
||
Attributes: | ||
converter: Converter function. | ||
device: Device to which the training data is sent. | ||
eval_hook: Function to prepare for each evaluation process. | ||
eval_func: Evaluation function called at each iteration. | ||
pos_labels (list): labels of the positive class | ||
ignore_labels (list): labels to be ignored. | ||
|
||
""" | ||
|
||
def __init__(self, iterator, target, converter=convert.concat_examples, | ||
device=None, eval_hook=None, eval_func=None, name=None, | ||
pos_label=1, ignore_labels=None, raise_value_error=True, | ||
logger=None, sample_weight=None, | ||
multioutput='uniform_average', ignore_nan=False): | ||
metrics_fun = {'r2_score': self.r2_score} | ||
super(R2ScoreEvaluator, self).__init__( | ||
iterator, target, converter=converter, device=device, | ||
eval_hook=eval_hook, eval_func=eval_func, metrics_fun=metrics_fun, | ||
name=name, logger=logger) | ||
|
||
self.pos_label = pos_label | ||
self.ignore_labels = ignore_labels | ||
self.raise_value_error = raise_value_error | ||
self.sample_weight = sample_weight | ||
self.multioutput = multioutput | ||
self.ignore_nan = ignore_nan | ||
|
||
def r2_score(self, pred, true, sample_weight=None, | ||
multioutput='uniform_average', ignore_nan=False): | ||
|
||
if self.sample_weight is not None: | ||
raise NotImplementedError() | ||
if self.multioutput not in ['uniform_average', 'raw_values']: | ||
raise ValueError('invalid multioutput argument') | ||
|
||
xp = cuda.get_array_module(pred) | ||
diff = pred - true | ||
dev = true - xp.mean(true, axis=0) | ||
if self.ignore_nan: | ||
diff[xp.isnan(diff)] = 0. | ||
dev[xp.isnan(dev)] = 0. | ||
SS_res = xp.asarray(xp.sum(diff ** 2, axis=0)) | ||
SS_tot = xp.asarray(xp.sum(dev ** 2, axis=0)) | ||
SS_tot_iszero = SS_tot == 0 | ||
SS_tot[SS_tot_iszero] = 1 # Assign dummy value to avoid zero-division | ||
ret = xp.where( | ||
SS_tot_iszero, 0.0, 1 - SS_res / SS_tot).astype(pred.dtype) | ||
if self.multioutput == 'uniform_average': | ||
return xp.asarray(ret.mean()), | ||
elif self.multioutput == 'raw_values': | ||
return ret, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
import numpy | ||
import pytest | ||
|
||
from chainer import cuda | ||
|
||
import chainer_chemistry | ||
|
||
|
||
def r2_score(pred, true, sample_weight=None, multioutput="uniform_average", | ||
ignore_nan=False): | ||
diff = pred - true | ||
dev = true - numpy.mean(true, axis=0) | ||
if ignore_nan: | ||
diff[numpy.isnan(diff)] = 0. | ||
dev[numpy.isnan(dev)] = 0. | ||
SS_res = numpy.asarray( | ||
numpy.sum(diff ** 2, axis=0)) | ||
SS_tot = numpy.asarray( | ||
numpy.sum(dev ** 2, axis=0)) | ||
|
||
if multioutput == 'uniform_average': | ||
if numpy.any(SS_tot == 0): | ||
return 0.0 | ||
else: | ||
return (1 - SS_res / SS_tot).mean() | ||
elif multioutput == 'raw_values': | ||
if numpy.any(SS_tot == 0): | ||
# Assign dummy value to avoid zero-division | ||
SS_tot_iszero = SS_tot == 0 | ||
SS_tot[SS_tot_iszero] = 1 | ||
|
||
return numpy.where(SS_tot_iszero, 0.0, 1 - SS_res / SS_tot) | ||
else: | ||
return 1 - SS_res / SS_tot | ||
|
||
|
||
@pytest.fixture | ||
def inputs(): | ||
numpy.random.seed(0) | ||
x0 = numpy.random.uniform(-1, 1, (4, 3)).astype(numpy.float32) | ||
# Add sufficient margin to prevent computational error | ||
diff = numpy.random.uniform(-1, 1, (4, 3)).astype(numpy.float32) | ||
diff[abs(diff) < 0.01] = 0.5 | ||
x1 = x0 + diff | ||
x2 = numpy.asarray([[0.3, numpy.nan, 0.2], | ||
[numpy.nan, 0.1, 0.5], | ||
[0.9, 0.7, numpy.nan], | ||
[0.2, -0.3, 0.4]]).astype(numpy.float32) | ||
return x0, x1, x2 | ||
|
||
|
||
def check_forward(inputs): | ||
x0, x1, _ = inputs | ||
y = chainer_chemistry.functions.r2_score(x0, x1) | ||
assert y.data.dtype == 'f' | ||
assert y.data.shape == () | ||
|
||
expect = r2_score(x0, x1) | ||
assert numpy.allclose(y.data, expect) | ||
|
||
|
||
def check_forward_ignore_nan(inputs): | ||
x0, _, x2 = inputs | ||
y = chainer_chemistry.functions.r2_score(x0, x2, ignore_nan=True) | ||
assert y.data.dtype == 'f' | ||
assert y.data.shape == () | ||
|
||
expect = r2_score(x0, x2, ignore_nan=True) | ||
assert numpy.allclose(y.data, expect) | ||
|
||
|
||
def check_forward_ignore_nan_with_nonnan_value(inputs): | ||
x0, x1, _ = inputs | ||
y = chainer_chemistry.functions.r2_score(x0, x1, ignore_nan=True) | ||
assert y.data.dtype == 'f' | ||
assert y.data.shape == () | ||
|
||
expect = r2_score(x0, x1, ignore_nan=True) | ||
assert numpy.allclose(y.data, expect) | ||
|
||
|
||
def test_forward_cpu(inputs): | ||
check_forward(inputs) | ||
check_forward_ignore_nan(inputs) | ||
check_forward_ignore_nan_with_nonnan_value(inputs) | ||
|
||
|
||
@pytest.mark.gpu | ||
def test_forward_gpu(inputs): | ||
x0, x1, x2 = inputs | ||
check_forward((cuda.to_gpu(x0), cuda.to_gpu(x1), None)) | ||
check_forward_ignore_nan((cuda.to_gpu(x0), None, cuda.to_gpu(x2))) | ||
|
||
|
||
if __name__ == '__main__': | ||
pytest.main([__file__, '-v']) |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
memo: apply pep8