'''Some functions for working with histograms.
'''
import numpy as np
from scipy.spatial.distance import cosine
from scipy.spatial.distance import jensenshannon
from scipy.stats import wasserstein_distance
[docs]def dH(H1, H2, mode='l2'):
'''Histogram metrics.
Parameters
==========
H1 : array_like
1d histogram.
H2 : array_like
1d histogram with bins matched to H1.
mode : {'l2', 'l1', 'vcos', 'intersect', 'chi2', 'jsd', 'emd'}, optional
Metric to use.
Returns
=======
float
Distance between H1, H2.
Notes
=====
Similar bins means the same number and size over the same range.
Modes:
- l2 -- Euclidean distance
- l1 -- Manhattan distance
- vcos -- Vector cosine distance
- intersect -- Histogram intersection distance
- chi2 -- Chi square distance
- jsd -- Jensen-Shannan Divergence
- emd -- Earth Mover's Distance
Issues:
- I'm not completely convinced that intersect is doing the right thing.
The quality of the metric will depend a lot on the qaulity of the
histograms themselves. Obviously more samples and well-chosen bins will
help out in the comparisons.
'''
val = None
if mode == 'l2':
val = np.linalg.norm(H1 - H2, ord=2)
if mode == 'l1':
val = np.linalg.norm(H1 - H2, ord=1)
if mode == 'vcos':
val = cosine(H1, H2)
if mode == 'intersect':
val = np.sum(np.min(np.stack((H1, H2)), axis=0))
if mode == 'chi2':
a = 2*((H1 - H2)**2).astype(float)
b = H1 + H2
val = np.sum(np.divide(a, b, out=np.zeros_like(a), where=b != 0))
if mode == 'jsd':
val = jensenshannon(H1, H2)
if mode == 'emd':
val = wasserstein_distance(H1, H2)
if val is None:
raise NotImplementedError()
return val
[docs]def hist_match(source, template):
'''Histogram matching.
Adjust the pixel values of a grayscale image such that its histogram
matches that of a target image
Parameters
----------
source: np.ndarray
Image to transform; the histogram is computed over the flattened
array
template: np.ndarray
Template image; can have different dimensions to source
Returns
-------
matched: np.ndarray
The transformed output image
Notes
=====
https://stackoverflow.com/questions/32655686/histogram-matching-of-two-images-in-python-2-x
'''
oldshape = source.shape
source = source.ravel()
template = template.ravel()
# get the set of unique pixel values and their corresponding indices and
# counts
_s_values, bin_idx, s_counts = np.unique(source, return_inverse=True,
return_counts=True)
t_values, t_counts = np.unique(template, return_counts=True)
# take the cumsum of the counts and normalize by the number of pixels to
# get the empirical cumulative distribution functions for the source and
# template images (maps pixel value --> quantile)
s_quantiles = np.cumsum(s_counts).astype(np.float64)
s_quantiles /= s_quantiles[-1]
t_quantiles = np.cumsum(t_counts).astype(np.float64)
t_quantiles /= t_quantiles[-1]
# interpolate linearly to find the pixel values in the template image
# that correspond most closely to the quantiles in the source image
interp_t_values = np.interp(s_quantiles, t_quantiles, t_values)
return interp_t_values[bin_idx].reshape(oldshape)