Source code for tagit.tools

"""Console tools for tag management

Part of the tagit module.
A copy of the license is provided with the project.
Author: Matthias Baumgartner, 2016

"""
# IMPORTS
import operator
import sys
#import collections

# INNER-MODULE IMPORTS
from basics import fst, snd, unique, duplicates, item_count
from metrics import tags_similarity, stat_per_image, stat_per_tag

# CONSTANTS

# EXPORTS
__all__ = ('tags_collect', 'tags_stats', 'tags_histogram', 'images_export')

## CODE ##

[docs]def tags_histogram(model): hist = model.tags.histogram() t_len = max(map(len, hist.keys())) for key, value in sorted(hist.items(), key=snd, reverse=True): print key.ljust(t_len, ' '), value #d = collections.defaultdict(lambda: 0) #for v in hist.values(): d[v] += 1 #for e, v in d.iteritems(): # print str(e).ljust(5, ' '), v #import numpy as np #hist_hist = np.histogram(hist.values()) #for e, v in zip(*hist_hist): # print str(e).ljust(5, ' '), v
[docs]def tags_stats(model): """ Tag statistics: * Tag Histogram * correlation btw. tags (matrix, top-N list) """ titles = ['Number of tags', 'Minimum assignments', 'Maximum assignments', 'Average', 'St\'dev', 'Variance', '25-percentile', 'median', '75-percentile', 'Attached to all', 'Attached at most once'] titles += ['Number of images', 'Minimum assigned', 'Maximum assigned', 'Average', 'St\'dev', 'Variance', '25-percentile', 'median', '75-percentile', 'Images with all', 'Images with less than 5'] t_len = max(map(len, titles)) print "\n########### TAGS ###########" for s in stat_per_tag(model): t = titles.pop(0) print t.ljust(t_len, ' '), s print "\n########### IMAGES ###########" for s in stat_per_image(model): t = titles.pop(0) print t.ljust(t_len, ' '), s
[docs]def tags_collect(model): """Goes through all tags and queries the user if it should be merged with a similar one. The user can choose the following actions for each pair of tags: * l Merge to the left; Keeps the left tag, removes the right one * r Merge to the right; Keeps the right tag, removes the left one * n Don't merge, keep both tags * s Skip this pair, ask later * q Quit """ tags = model.tags.get_all() queries = reduce(operator.add, [zip([tag] * len(sim), sim) for tag, sim in zip(tags, tags_similarity(tags))]) queries = unique(queries) skip = [] while len(queries) > 0: queries = sorted(queries, key=lambda v: fst(v).lower()) left, right = queries.pop(0) if (right, left) in queries: queries.remove((right, left)) sys.stdout.write(left + " || " + right + " [lrNsq]: ") # Using prompt in raw_input leads to error (can't encode character) ans = raw_input().strip().lower() if ans == 'l': # Merge left model.tags.rename(right, left) queries = [(lhs == right and left or lhs, rhs == right and left or rhs) for lhs, rhs in queries] queries = unique(queries) elif ans == 'r': # Merge right model.tags.rename(left, right) queries = [(lhs == left and right or lhs, rhs == left and right or rhs) for lhs, rhs in queries] queries = unique(queries) elif ans == 'q': # Ignore and abort return elif ans == 's': # Skip skip.append((left, right)) else: # == 'n' # Ignore pass if len(queries) == 0 and len(skip) > 0: queries, skip = skip, []
[docs]def images_export(images, target, method=None, keep_structure=False, simulate=False, verbose=False): """Export *images* to a *target* directory. Method can be either of: * symlink Create symlinks in the *target* directory * hardlink Create hardlinks in the *target* directory * copy Copy the images to the *target* directory If *keep_structure* is True, the original directory structure is recreated starting after the shared prefix of *images*. Otherwise, the images will be exported to the *target* folder directly. If so, naming conflicts are handled by adding a sequence number to the image name. E.g. /foo/image.jpg, /bar/image.jpg -> export/image.jpg, export/image-1.jpg If *simulate* is True, path translations are returned (src, trg) but no action is actually taken. """ if method is None: method = 'symlink' method = method.lower() if method not in ('symlink', 'hardlink', 'copy'): raise Exception('export method not understood') from os.path import basename, commonprefix, join, splitext, dirname if keep_structure: prefix = commonprefix(images) targets = map(lambda s: join(target, s[len(prefix):]), images) else: targets = map(lambda s: join(target, basename(s)), images) dups = duplicates(targets) for dup in dups: num_dups = int(item_count(dup, targets)) for cnt in range(num_dups): root, ext = splitext(dup) targets[targets.index(dup)] = root + '-' + str(cnt).rjust(len(str(num_dups)), '0') + ext # root-00123.ext translations = zip(images, targets) if simulate: if verbose: for src, trg in translations: print src, '->', trg return translations # FIXME: Pretty printing if method == 'symlink': import os for src, trg in translations: # Create directory try: os.makedirs(dirname(trg)) except OSError: pass # Directory exists # Symlink file try: os.symlink(src, trg) except OSError: pass # File exists elif method == 'hardlink': import os for src, trg in translations: # Create directory try: os.makedirs(dirname(trg)) except OSError: pass # Directory exists # Link file try: os.link(src, trg) except OSError: pass # File exists elif method == 'copy': import shutil for src, trg in translations: # Create directory try: os.makedirs(dirname(trg)) except OSError: pass # Directory exists # Copy file try: shutil.copy2(src, trg) except shutil.Error: pass # Same file else: raise Exception('now, this really shouldn\'t happen...') ## EOF ##