Source code for tagit.controller.sidebox.tags_suggested

"""

Part of the tagit module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2016

"""
# IMPORTS
import operator
from math import log

# INNER-MODULE IMPORTS
from ...basics import difference, fst, snd, split
from tags import CSidebox_Tags
from tagit.external.memoize import memoized
from ...token import Token_Tag

# EXPORTS
__all__ = ('CSidebox_Tags_Suggested', )

## CODE ##
[docs]class CSidebox_Tags_Suggested(CSidebox_Tags): """Give a recommendation which tags to include next in the filter. """ def __init__(self, widget, model, settings, parent=None): super(CSidebox_Tags_Suggested, self).__init__(widget, model, settings, parent) # Event bindings self.parent.bind(on_results_change=self.update) def __del__(self): # Unbind events self.parent.unbind(on_results_change=self.update)
[docs] def update(self, filter_, images): """Update the sidebox.""" t_tags, specials = split(Token_Tag.match, filter_.get_tokens()) # get candidates for the next token: Unused yet but occurs at least once filtered = map(lambda t: t.get(), t_tags) tags = self._tags_from_images(images) tags = difference(tags, filtered) # compute score s = Score(self.model, images, filtered, specials) scores = map(s.get_score, tags) # Get scores # get suggestions suggested = sorted(zip(tags, scores), key=snd, reverse=True) # order by score suggested = map(lambda (t, s): str(round(s, 2)) + ' ' + t, suggested) # stringify suggested = suggested[:3] # pick to # draw self.widget.update(suggested)
log2 = lambda x: log(x) / log(2.0) class Score(object): def __init__(self, model, results, filtered, specials=None): if specials is None: specials = [] self._specials = specials self._results = results self._model = model self._filtered = filtered self._constant = 1.0 # Active score functions self.scores = [ #(1.0, self.split_ratio) (1.0, self.evenness) #(1.0, self.entropy) #(1.0, self.self_information) #(1.0, self.mutual_information) #(1.0, self.specificality) ] if len(self._results) == 0 or self.images() == 0: self.scores = [(1.0, self.constant)] def get_score(self, tag): """Compute the weighted score for *tag*. """ weights, parts = zip(*(self.scores)) scores = map(lambda fu: fu(tag), parts) # Run scores return sum(map(operator.mul, weights, scores)) # Weighted sum ########################################################################### # SCORING METHODS # ########################################################################### @memoized def images(self): return 1.0 * self._model.num_images() @memoized def tags(self): return 1.0 * len(self._model.tags.get_all()) @memoized def associations(self): raise NotImplementedError() @memoized def results(self): return 1.0 * len(self._results) @memoized def tags_hist(self): return self._model.tags.histogram(include=self._results) @memoized def results_with_tag(self, tag): hist = self.tags_hist() return 1.0 * hist[tag] @memoized def results_without_tag(self, tag): return self.results() - self.results_with_tag(tag) @memoized def images_with_tag(self, tag): return 1.0 * len(self._model.query([Token_Tag(tag)])) @memoized def images_without_tag(self, tag): return self.images() - self.images_with_tag(tag) @memoized def tags_with_image(self, image): raise NotImplementedError() @memoized def tags_without_image(self, image): return self.tags() - self.tags_with_image(image) def constant(self, tag): return self._constant @memoized def split_ratio(self, tag): """# images with tag / # current results = p(x | y)""" return self.results_with_tag(tag) / self.results() @memoized def evenness(self, tag): """2.0 * abs(0.5 - split ratio)""" r = 1.0 - 2.0 * abs(0.5 - self.split_ratio(tag)) return r @memoized def entropy(self, tag): r_with = self.results_with_tag(tag) / self.results() r_with = r_with > 0 and r_with * log2(r_with) or 0.0 r_without = self.results_without_tag(tag) / self.results() r_without = r_without > 0 and r_without * log2(r_without) or 0.0 return - (r_with + r_without) @memoized def self_information(self, tag): """Id(x) = -log p(x) -log (# images w/(w/o) tag x / # images)""" return -1.0 * log2(self.images_with_tag(tag) / self.images()) @memoized def mutual_information(self, tag): r_with = self.results_with_tag(tag) / self.images() r_with = r_with * log2( r_with / (self.images_with_tag(tag) * self.results()) ) r_without = self.results_without_tag(tag) / self.images() if r_without > 0.0: # images_without_tag() == 0 -> r_without == 0 -> set 0! r_without = r_without * log2( r_without / (self.images_without_tag(tag) * self.results()) ) return r_with + r_without def specificality(self, tag): return self.images_with_tag(tag) / self.results_with_tag(tag) """ restrictiveness/specificality: # images within current results / # images with tag in total tag frequency: # images with tag assigned / # total assignments # images with tag assigned / # images # images with tag assigned / # tags # results # images with tag # images without tag # images # tags # total assignments # current results w/ tag # current results w/o tag the results is the intersection of sets event x occurrence of tag x in an image event !x 1 - event x (absence of tag x in an image) probability p(x) = # occurrences of (not) tag x / # total assignments = # images w/ (w/o) tag / # total assignments conditional probability p(x|y) = p(x, y) / p(y) (Bayes) p(x, y) = # occurrences of tag x and y / # total images = # occurrences of x in current results / # total images p(y) = # occurrences of y / # total images = # current results / # total images p(x|y) = # occurrences of tag x and y / # occurrences of y p(x|y) = (# occurrences of x in current results / # total images ) / (# current results / # total images) = # occurrences of x in current results / # current results p(x|y) = # current results w/ tag / # results self-information Id(x) = - log p(x) Id(x) = - log (# images w/(w/o) tag x / # images) conditional self-information Id(x|y) = - log p(x|y) entropy H(X | Y=y) = - sum_x p(x|y) * log p(x|y) sum goes over has(x) and !has(x) pseudo mutual information I(X; Y=y) = sum_x p(x, y) log p(x, y) / (p(x) * p(y)) sum goes over has(x) and !has(x) split ratio # images with tag / # current results p(x|y) evenness 2.0 * abs(0.5 - split ratio) example: tags: A, B, C, D images: 1, 2, 3 assignments: (A, 1), (A, 2), (A, 3), (B, 1), (B, 2), (C, 1) Probability of an image having tag x: p(x) = # images with tag x / # images x not x sum A 3 / 3 = 1 0 / 3 = 0 1 B 2 / 3 1 / 3 1 C 1 / 3 2 / 3 1 D 0 / 3 = 0 3 / 3 = 1 1 sum 6 / 3 = 2 6 / 3 = 2 4 A, B 2 / 3 A, C 1 / 3 A, D 0 / 3 B, C 1 / 3 B, D 0 / 3 C, D 0 / 3 Probability of an assignment being tag x: p(x) = # assignments of tag x / # total assignments x not x sum A 3 / 6 = 1 / 2 3 / 6 = 1 / 2 1 B 2 / 6 = 1 / 3 4 / 6 = 2 / 3 1 C 1 / 6 5 / 6 1 D 0 / 6 6 / 6 = 1 1 sum 6 / 6 = 1 18 / 6 = 3 4 Probability of a tag including image x: Functions --------- * images Total number of images * tags Total number of tags * associations Total number of associations * results Total number of current results * results_with_tag Number of current results with tag x * images_with_tag Number of images with tag x * tags_with_image Number of tags of image x Derived functions: * results_without_tag Number of current results without tag x results - results_with_tag * images_without_tag Number of images without tag x images - images_with_tag * tags_without_image Number of tags with no association to image x tags - tags_with_image Score functions: * split_ratio * evenness * entropy * self_information * cond_self_information * mutual_information """ ## EOF ##