Source code for lerot.comparison.HistTeamDraft

# This file is part of Lerot.
#
# Lerot is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Lerot is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Lerot.  If not, see <http://www.gnu.org/licenses/>.

# KH, 2012/08/14

from random import randint

from .AbstractHistInterleavedComparison import AbstractHistInterleavedComparison


[docs]class HistTeamDraft(AbstractHistInterleavedComparison):
    """Team draft method, applied to historical data."""

    def __init__(self, arg_str=None):
        pass

    def _get_possible_assignment(self, l, r1, r2, query):
        r1.init_ranking(query)
        r2.init_ranking(query)
        length = min(r1.document_count(), r2.document_count(), len(l))
        a = []
        # get ranked list for each ranker
        l1, l2 = [], []
        for i in range(length):
            l1.append(r1.next())
            l2.append(r2.next())

        # determine overlap in top results (and whether the overlap matches l)
        i1, i2 = 0, 0
        for i in range(length):
            if l1[i] == l2[i]:
                if l[i] == l1[i]:
                    a.append(-1)
                    i1 += 1
                    i2 += 1
                else:
                    return None
            else:
                break
        # now check pairwise; per rank pair, one document needs to come from
        # each ranker
        while len(a) < length:
            # forward i1 and i2 to point to the next documents not yet in
            # l[0:len(a)]
            while i1 < len(a):
                if l1[i1] in l[:len(a)]:
                    i1 += 1
                else:
                    break
            while i2 < len(a):
                if l2[i2] in l[:len(a)]:
                    i2 += 1
                else:
                    break
            # if there is only one document left, we're fine with a document
            # from either list
            if length - len(a) == 1:
                next_doc = l[len(a)]
                if l1[i1] == next_doc and l2[i2] == next_doc:
                    random_pick = randint(0, 1)
                    a.append(random_pick)
                    if random_pick == 0:
                        i1 += 1
                    else:
                        i2 += 1
                elif l1[i1] == next_doc:
                    a.append(0)
                    i1 += 1
                elif l2[i2] == next_doc:
                    a.append(1)
                    i2 += 1
                else:
                    return None
            else:
                next_1 = l[len(a)]
                next_2 = l[len(a) + 1]
                assert(next_1 != next_2)
                match_1, match_2 = False, False
                # we have a match if the next document matches l1, and the next
                # document from l2 that is not yet in l matches next_2
                if l1[i1] == next_1 and ((l2[i2] == next_1 and
                    l2[i2 + 1] == next_2) or (l2[i2] != next_1 and
                    l2[i2] == next_2)):
                    match_1 = True
                # or if the same is true for l2
                if l2[i2] == next_1 and ((l1[i1] == next_1 and
                    l1[i1 + 1] == next_2) or (l1[i1] != next_1 and
                    l1[i1] == next_2)):
                    match_2 = True
                # two matches: delete one at random
                if match_1 and match_2:
                    if randint(0, 1):
                        match_2 = False
                    else:
                        match_1 = False
                # now we have at most one match left
                if match_1:
                    a.append(0)
                    a.append(1)
                elif match_2:
                    a.append(1)
                    a.append(0)
                else:
                    return None
        return a

[docs]    def infer_outcome(self, l, a, c, target_r1, target_r2, query):
        """assign clicks for contributed documents"""

        a = self._get_possible_assignment(l, target_r1, target_r2, query)
        if a is None:
            return 0

        c1 = sum([1 if val_a == 0 and val_c == 1 else 0
            for val_a, val_c in zip(a, c)])
        c2 = sum([1 if val_a == 1 and val_c == 1 else 0
            for val_a, val_c in zip(a, c)])
        return -1 if c1 > c2 else 1 if c2 > c1 else 0