# This file is part of Lerot.
#
# Lerot is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Lerot is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Lerot. If not, see <http://www.gnu.org/licenses/>.
import argparse
import collections
import itertools
from lerot.comparison.OptimizedInterleave import OptimizedInterleave
from lerot.utils import get_class, split_arg_str
def enumerate_allowed_dispositions(pos_limits, size_limits):
""" Generator that yields position and block size pairs. """
return itertools.product(
range(pos_limits[0], pos_limits[1] + 1),
range(size_limits[0], size_limits[1] + 1),
)
def check_disposition(disposition, length):
""" Receives an iterable of (size, pos) pairs and check if they are OK.
The disposition is said to be OK if it is possible to place verticals
of the size `size` at positions given by `pos` without intersection.
"""
ranking = [0] * length
for pos, size in disposition:
end = pos + size
if end > length:
return False
for rank in xrange(pos, end):
if ranking[rank] != 0:
return False
ranking[rank] = 1
return True
def fill_ranking(disposition, block_rankings, length):
""" Fill the ranking with docs from block_rankings according to disposition. """
ranking = [None] * length
# Ignore the 'Web' ranking for now.
web_ranking = block_rankings[-1]
for disp, block_ranking in zip(disposition, block_rankings[:-1]):
pos, size = disp
for rank, doc in zip(xrange(pos, pos + size), block_ranking[:size]):
ranking[rank] = doc
# Fill the rest with 'Web' documents.
idx = 0
for rank in xrange(length):
if ranking[rank] is None:
ranking[rank] = web_ranking[idx]
idx += 1
return ranking
[docs]class OptimizedInterleaveVa(OptimizedInterleave):
def __init__(self, arg_str=None):
OptimizedInterleave.__init__(self, arg_str)
parser = argparse.ArgumentParser()
parser.add_argument('--allowed_leavings',
choices=['prefix_constraint',
'prefix_constraint_va'],
default='prefix_constraint_va')
parser.add_argument("--credit_va", action="store_true", default=False)
parser.add_argument("--um_class", type=str,
default="environment.FederatedClickModel")
parser.add_argument("--um_args", type=str,
default="0.2 0.1")
args = vars(parser.parse_known_args(split_arg_str(arg_str))[0])
self.allowed_leavings = getattr(self, args['allowed_leavings'])
if args["credit_va"]:
self.precompute_rank = self.precompute_rank_va
self.um_class = get_class(args["um_class"])
self.um = self.um_class(args["um_args"])
[docs] def precompute_rank_va(self, R):
sortR = R[:]
examination = self.um.get_examination_prob(sortR)
sortR.sort(key=dict(zip(sortR, examination)).get, reverse=True)
return OptimizedInterleave.precompute_rank(self, sortR)
[docs] def prefix_constraint_va(self, rankings, length):
rankings_by_type = collections.defaultdict(lambda: [[] for r in rankings])
for idx, ranking in enumerate(rankings):
for doc in ranking:
rankings_by_type[doc.get_type()][idx].append(doc)
block_size_limits = {}
block_pos_limits = {}
for t, block_rankings in rankings_by_type.iteritems():
# Eq. (11).
block_size_limits[t] = (
min(len(b) for b in block_rankings),
max(len(b) for b in block_rankings)
)
if t == 'Web':
continue
# Find the ranks (0-based) of all the vertical blocks
# in each ranking: rank(X_t, X).
block_poss = []
for ranking in rankings:
for i, doc in enumerate(ranking):
if doc.get_type() == t:
block_poss.append(i)
break
else:
block_poss.append(len(ranking))
# Eq. (12)-(13).
block_pos_limits[t] = (min(block_poss), max(block_poss))
# print block_pos_limits
# print block_size_limits
nblocks = [len(set(d.get_type() for d in r if d.get_type() != 'Web')) for r in rankings]
# Eq. (14)-(15).
v_types = [t for t in rankings_by_type if t != 'Web']
# Now we iterate over all possible combinations of selected verticals,
# positions and the sizes of the vertical blocks. Only if
# such disposition is possible, i.e., the blocks don't overlap,
# we iterate over all the possible contents of the blocks and
# the 'Web' documents surrounding the blocks.
L = []
for num_blocks in xrange(min(nblocks), max(nblocks) + 1):
# All the combinations of num_blocks verticals
for selected_verts in itertools.combinations(v_types, num_blocks):
# All the possible pairs of position / size for each selected vertical.
for disposition in itertools.product(*[enumerate_allowed_dispositions(
block_pos_limits[t], block_size_limits[t]) for t in selected_verts]):
if not check_disposition(disposition, length):
continue
# Eq. (9)-(10) -- the main prefix constraint (extended).
allowed_leavings_by_type = dict((
t, self.prefix_constraint(rankings_by_type[t], disp[1])
) for (t, disp) in zip(selected_verts, disposition))
selected_verts_list = list(selected_verts)
num_insert_web_docs = length - sum(disp[1] for disp in disposition)
if num_insert_web_docs > 0:
allowed_leavings_by_type['Web'] = self.prefix_constraint(
rankings_by_type['Web'], num_insert_web_docs
)
selected_verts_list += ['Web']
# Now enumerate all the possible rankings for all vertical + Web
for block_rankings in itertools.product(
*[allowed_leavings_by_type[t] for t in selected_verts_list]):
# Note: it is essential that block_rankings[-1] contains web ranking.
ranking = fill_ranking(disposition, block_rankings, length)
L.append(ranking)
return L
import unittest
from lerot.ranker import SyntheticDeterministicRankingFunction
from lerot.document import Document
class TestEvaluation(unittest.TestCase):
def setUp(self):
pass
def makeSerps(self, templates):
types = {'w': 'Web', 'i': 'Image', 'n': 'News'}
serps = []
mapping = {}
for template in templates:
counter = 0
serp = []
for d in template:
while counter in mapping and mapping[counter] != d:
counter += 1
mapping[counter] = d
counter += 1
serp.append(Document(counter, types[d]))
serps.append(serp)
return serps
def testVA(self):
#strA = 'wwwnnwwiiiwww'
#strB = 'wnnnwiiwwwwww'
#strA = 'wwwwwwwiiiwnw'
#strB = 'wwwwwwwiiiwwn'
strA = 'wni'
strB = 'win'
dA, dB = self.makeSerps([strA, strB])
#Web, News = 'Web', 'News'
#dA, dB = [Document(1, Web), Document(0, Web), Document(5, Web), Document(9, Web), Document(4, Web), Document(7, Web), Document(10, Web), Document(2, News), Document(3, News), Document(6, News)], [Document(0, Web), Document(1, Web), Document(4, Web), Document(5, Web), Document(9, Web), Document(7, Web), Document(2, News), Document(3, News), Document(6, News), Document(8, News)]
comparison = OptimizedInterleaveVa('')
A = SyntheticDeterministicRankingFunction(dA)
B = SyntheticDeterministicRankingFunction(dB)
dI, a = comparison.interleave(A, B, None, min(len(dA), len(dB)))
print dA
print dB
print list(dI)
if __name__ == '__main__':
# from pycallgraph import PyCallGraph
# from pycallgraph.output import GraphvizOutput
#
# with PyCallGraph(output=GraphvizOutput()):
unittest.main()