有两种方法实现容斥原理,一种是遍历,一种是递归(DFS),对于hyperloglog求交集而言选择遍历。
第一步初始化 redis
import redis
r = redis.Redis(host='localhost', port=6379, db=0)
r.pfadd('day1',*['user_1','user_2','user_3','user_4','user_5','user_6','user_7'])
r.pfadd('day2',*['user_8','user_2','user_3','user_4','user_5','user_6','user_7'])
r.pfadd('day3',*['user_8','user_9','user_3','user_4','user_5','user_6','user_7'])
r.pfadd('day4',*['user_8','user_9','user_10','user_4','user_5','user_6','user_7'])
r.pfadd('day5',*['user_8','user_9','user_10','user_11','user_5','user_6','user_7'])
r.pfadd('day6',*['user_8','user_9','user_10','user_11','user_12','user_6','user_7'])
r.pfadd('day7',*['user_8','user_9','user_10','user_11','user_12','user_13','user_7'])
r.pfadd('day8',*['user_8','user_9','user_10','user_11','user_12','user_13','user_14'])
第二部实现排列函数和归属函数
#求排列
from itertools import combinations
def arrangement(length):
combins = {}
for i in range(length):
combins[i+1] = [c for c in combinations(range(length), (i+1))]
return combins
#lst1是否包含lst2
def is_exist_in_tuple(lst1,lst2):
d1 = {}
for i in lst1:
if i in d1:
d1[i] += 1
else:
d1[i] = 1
d2 = {}
for i in lst2:
if i in d2:
d2[i] += 1
else:
d2[i] = 1
for key,value in d2.items():
if key not in d1 or value>d1[key]:
return False
return True
第三步 实现交集函数
#combins--排列字典
#combins_intersection--交集
#i--第几层
#j--第几个
#k--第几个的第几个
#下面开始计算交集
#l--同i
#m--同j
def intersection(lst):
count_intersection = 0
combins = arrangement(len(lst))
combins_intersection = combins
for i in range(len(combins)):
for j in range(len(combins[i+1])):
for k in range(len(combins[i+1][j])):
r.pfmerge("combins_intersection%s_%s" % (i+1,j), lst[combins[i+1][j][k]])
combins = arrangement(len(lst))
combins_intersection[i+1][j] = r.pfcount("combins_intersection%s_%s" % (i+1,j))
if(i>0):
for l in range((i)):
for m in range(len(combins[l+1])):
if(is_exist_in_tuple(combins[i+1][j], combins[l+1][m])):
combins_intersection[i+1][j] += ((-1)**(l+1))*abs(combins_intersection[l+1][m])
for n in range(len(combins[i+1])):
combins_intersection[i+1][n] = abs(combins_intersection[i+1][n])
for i in range(len(combins)):
for j in range(len(combins[i+1])):
r.delete("combins_intersection%s_%s" % (i+1,j))
return combins_intersection[i+1][0]
测试
combins_intersection1 = intersection(["day1","day2","day3","day4","day5","day6","day7","day8"])
combins_intersection = intersection(["day1","day2","day3","day4","day5","day6","day7"])
看结果输出为
print combins_intersection
print "----------------"
print combins_intersection1
1
----------------
0