Source code for lerot.comparison.OptimizedInterleaveVa

# This file is part of Lerot.
#
# Lerot is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Lerot is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Lerot.  If not, see <http://www.gnu.org/licenses/>.

import argparse
import collections
import itertools

from lerot.comparison.OptimizedInterleave import OptimizedInterleave
from lerot.utils import get_class, split_arg_str


def enumerate_allowed_dispositions(pos_limits, size_limits):
    """ Generator that yields position and block size pairs. """
    return itertools.product(
        range(pos_limits[0], pos_limits[1] + 1),
        range(size_limits[0], size_limits[1] + 1),
    )


def check_disposition(disposition, length):
    """ Receives an iterable of (size, pos) pairs and check if they are OK.

    The disposition is said to be OK if it is possible to place verticals
    of the size `size` at positions given by `pos` without intersection.
    """
    ranking = [0] * length
    for pos, size in disposition:
        end = pos + size
        if end > length:
            return False
        for rank in xrange(pos, end):
            if ranking[rank] != 0:
                return False
            ranking[rank] = 1
    return True


def fill_ranking(disposition, block_rankings, length):
    """ Fill the ranking with docs from block_rankings according to disposition. """
    ranking = [None] * length
    # Ignore the 'Web' ranking for now.
    web_ranking = block_rankings[-1]
    for disp, block_ranking in zip(disposition, block_rankings[:-1]):
        pos, size = disp
        for rank, doc in zip(xrange(pos, pos + size), block_ranking[:size]):
            ranking[rank] = doc
    # Fill the rest with 'Web' documents.
    idx = 0
    for rank in xrange(length):
        if ranking[rank] is None:
            ranking[rank] = web_ranking[idx]
            idx += 1
    return ranking


[docs]class OptimizedInterleaveVa(OptimizedInterleave): def __init__(self, arg_str=None): OptimizedInterleave.__init__(self, arg_str) parser = argparse.ArgumentParser() parser.add_argument('--allowed_leavings', choices=['prefix_constraint', 'prefix_constraint_va'], default='prefix_constraint_va') parser.add_argument("--credit_va", action="store_true", default=False) parser.add_argument("--um_class", type=str, default="environment.FederatedClickModel") parser.add_argument("--um_args", type=str, default="0.2 0.1") args = vars(parser.parse_known_args(split_arg_str(arg_str))[0]) self.allowed_leavings = getattr(self, args['allowed_leavings']) if args["credit_va"]: self.precompute_rank = self.precompute_rank_va self.um_class = get_class(args["um_class"]) self.um = self.um_class(args["um_args"])
[docs] def precompute_rank_va(self, R): sortR = R[:] examination = self.um.get_examination_prob(sortR) sortR.sort(key=dict(zip(sortR, examination)).get, reverse=True) return OptimizedInterleave.precompute_rank(self, sortR)
[docs] def prefix_constraint_va(self, rankings, length): rankings_by_type = collections.defaultdict(lambda: [[] for r in rankings]) for idx, ranking in enumerate(rankings): for doc in ranking: rankings_by_type[doc.get_type()][idx].append(doc) block_size_limits = {} block_pos_limits = {} for t, block_rankings in rankings_by_type.iteritems(): # Eq. (11). block_size_limits[t] = ( min(len(b) for b in block_rankings), max(len(b) for b in block_rankings) ) if t == 'Web': continue # Find the ranks (0-based) of all the vertical blocks # in each ranking: rank(X_t, X). block_poss = [] for ranking in rankings: for i, doc in enumerate(ranking): if doc.get_type() == t: block_poss.append(i) break else: block_poss.append(len(ranking)) # Eq. (12)-(13). block_pos_limits[t] = (min(block_poss), max(block_poss)) # print block_pos_limits # print block_size_limits nblocks = [len(set(d.get_type() for d in r if d.get_type() != 'Web')) for r in rankings] # Eq. (14)-(15). v_types = [t for t in rankings_by_type if t != 'Web'] # Now we iterate over all possible combinations of selected verticals, # positions and the sizes of the vertical blocks. Only if # such disposition is possible, i.e., the blocks don't overlap, # we iterate over all the possible contents of the blocks and # the 'Web' documents surrounding the blocks. L = [] for num_blocks in xrange(min(nblocks), max(nblocks) + 1): # All the combinations of num_blocks verticals for selected_verts in itertools.combinations(v_types, num_blocks): # All the possible pairs of position / size for each selected vertical. for disposition in itertools.product(*[enumerate_allowed_dispositions( block_pos_limits[t], block_size_limits[t]) for t in selected_verts]): if not check_disposition(disposition, length): continue # Eq. (9)-(10) -- the main prefix constraint (extended). allowed_leavings_by_type = dict(( t, self.prefix_constraint(rankings_by_type[t], disp[1]) ) for (t, disp) in zip(selected_verts, disposition)) selected_verts_list = list(selected_verts) num_insert_web_docs = length - sum(disp[1] for disp in disposition) if num_insert_web_docs > 0: allowed_leavings_by_type['Web'] = self.prefix_constraint( rankings_by_type['Web'], num_insert_web_docs ) selected_verts_list += ['Web'] # Now enumerate all the possible rankings for all vertical + Web for block_rankings in itertools.product( *[allowed_leavings_by_type[t] for t in selected_verts_list]): # Note: it is essential that block_rankings[-1] contains web ranking. ranking = fill_ranking(disposition, block_rankings, length) L.append(ranking) return L
import unittest from lerot.ranker import SyntheticDeterministicRankingFunction from lerot.document import Document class TestEvaluation(unittest.TestCase): def setUp(self): pass def makeSerps(self, templates): types = {'w': 'Web', 'i': 'Image', 'n': 'News'} serps = [] mapping = {} for template in templates: counter = 0 serp = [] for d in template: while counter in mapping and mapping[counter] != d: counter += 1 mapping[counter] = d counter += 1 serp.append(Document(counter, types[d])) serps.append(serp) return serps def testVA(self): #strA = 'wwwnnwwiiiwww' #strB = 'wnnnwiiwwwwww' #strA = 'wwwwwwwiiiwnw' #strB = 'wwwwwwwiiiwwn' strA = 'wni' strB = 'win' dA, dB = self.makeSerps([strA, strB]) #Web, News = 'Web', 'News' #dA, dB = [Document(1, Web), Document(0, Web), Document(5, Web), Document(9, Web), Document(4, Web), Document(7, Web), Document(10, Web), Document(2, News), Document(3, News), Document(6, News)], [Document(0, Web), Document(1, Web), Document(4, Web), Document(5, Web), Document(9, Web), Document(7, Web), Document(2, News), Document(3, News), Document(6, News), Document(8, News)] comparison = OptimizedInterleaveVa('') A = SyntheticDeterministicRankingFunction(dA) B = SyntheticDeterministicRankingFunction(dB) dI, a = comparison.interleave(A, B, None, min(len(dA), len(dB))) print dA print dB print list(dI) if __name__ == '__main__': # from pycallgraph import PyCallGraph # from pycallgraph.output import GraphvizOutput # # with PyCallGraph(output=GraphvizOutput()): unittest.main()