Coverage for /home/runner/work/viur-core/viur-core/viur/src/viur/core/bones/randomslice.py: 18%

60 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-09-03 13:41 +0000

1from random import random, sample, shuffle 

2import typing as t 

3 

4from itertools import chain 

5from math import ceil 

6 

7from viur.core import db 

8from viur.core.bones.base import BaseBone 

9 

10 

11class RandomSliceBone(BaseBone): 

12 """ 

13 This class is particularly useful when you want to retrieve a random sample of elements from a 

14 larger dataset without needing to fetch all the data from the database. By performing multiple 

15 subqueries and processing the results, RandomSliceBone provides an efficient way to get a 

16 randomized selection of elements from the database query. 

17 Simulates the orderby=random from SQL. 

18 If you sort by this bone, the query will return a random set of elements from that query. 

19 

20 :param visible: Indicates if the bone is visible, defaults to False. 

21 :param readOnly: Indicates if the bone is read-only, defaults to True. 

22 :param slices: The number of slices to use, defaults to 2. 

23 :param sliceSize: The size of each slice, defaults to 0.5. 

24 :param kwargs: Additional keyword arguments. 

25 """ 

26 

27 type = "randomslice" 

28 

29 def __init__(self, *, visible=False, readOnly=True, slices=2, sliceSize=0.5, **kwargs): 

30 """ 

31 Initializes a new RandomSliceBone. 

32 

33 

34 """ 

35 if visible or not readOnly: 

36 raise NotImplemented("A RandomSliceBone must not visible and readonly!") 

37 super().__init__(indexed=True, visible=False, readOnly=True, **kwargs) 

38 self.slices = slices 

39 self.sliceSize = sliceSize 

40 

41 def serialize(self, skel: 'SkeletonInstance', name: str, parentIndexed: bool) -> bool: 

42 """ 

43 Serializes the bone into a format that can be written into the datastore. Instead of using 

44 the existing value, it writes a randomly chosen float in the range [0, 1) as the value for 

45 this bone. 

46 

47 :param SkeletonInstance skel: The SkeletonInstance this bone is part of. 

48 :param str name: The property name this bone has in its Skeleton (not the description). 

49 :param bool parentIndexed: Indicates if the parent bone is indexed. 

50 :return: Returns True if the serialization is successful. 

51 :rtype: bool 

52 """ 

53 skel.dbEntity[name] = random() 

54 skel.dbEntity.exclude_from_indexes.discard(name) # Random bones can never be not indexed 

55 return True 

56 

57 def buildDBSort( 

58 self, 

59 name: str, 

60 skel: 'viur.core.skeleton.SkeletonInstance', 

61 dbFilter: db.Query, 

62 rawFilter: dict 

63 ) -> t.Optional[db.Query]: 

64 """ 

65 Modifies the database query to return a random selection of elements by creating multiple 

66 subqueries, each covering a slice of the data. This method doesn't just change the order of 

67 the selected elements, but also changes which elements are returned. 

68 

69 :param str name: The property name this bone has in its Skeleton (not the description). 

70 :param SkeletonInstance skel: The :class:viur.core.skeleton.Skeleton instance this bone is part of. 

71 :param db.Query dbFilter: The current :class:viur.core.db.Query instance the filters should be applied to. 

72 :param Dict rawFilter: The dictionary of filters the client wants to have applied. 

73 :return: The modified :class:viur.core.db.Query instance. 

74 :rtype: Optional[db.Query] 

75 

76 .. note:: The rawFilter is controlled by the client, so you must expect and safely handle 

77 malformed data. 

78 

79 The method also contains an inner function, applyFilterHook, that applies the filter hook to 

80 the given filter if set, or returns the unmodified filter. This allows the orderby=random 

81 functionality to be used in relational queries as well. 

82 """ 

83 

84 def applyFilterHook(dbfilter, property, value): 

85 """ 

86 Applies dbfilter._filterHook to the given filter if set, 

87 else return the unmodified filter. 

88 Allows orderby=random also be used in relational-queries. 

89 """ 

90 if dbFilter._filterHook is None: 

91 return property, value 

92 try: 

93 property, value = dbFilter._filterHook(dbFilter, property, value) 

94 except: 

95 # Either, the filterHook tried to do something special to dbFilter (which won't 

96 # work as we are currently rewriting the core part of it) or it thinks that the query 

97 # is unsatisfiable (fe. because of a missing ref/parent key in RelationalBone). 

98 # In each case we kill the query here - making it to return no results 

99 raise RuntimeError() 

100 return property, value 

101 

102 if "orderby" in rawFilter and rawFilter["orderby"] == name: 

103 # We select a random set of elements from that collection 

104 assert not isinstance(dbFilter.queries, 

105 list), "Orderby random is not possible on a query that already uses an IN-filter!" 

106 origFilter: dict = dbFilter.queries.filters 

107 origKind = dbFilter.getKind() 

108 queries = [] 

109 for unused in range(0, self.slices): # Fetch 3 Slices from the set 

110 rndVal = random() # Choose our Slice center 

111 # Right Side 

112 q = db.QueryDefinition(origKind, {}, []) 

113 property, value = applyFilterHook(dbFilter, f"{name} <=", rndVal) 

114 q.filters[property] = value 

115 q.orders = [(name, db.SortOrder.Descending)] 

116 queries.append(q) 

117 # Left Side 

118 q = db.QueryDefinition(origKind, {}, []) 

119 property, value = applyFilterHook(dbFilter, f"{name} >", rndVal) 

120 q.filters[property] = value 

121 q.orders = [(name, db.SortOrder.Ascending)] 

122 queries.append(q) 

123 dbFilter.queries = queries 

124 # Map the original filter back in 

125 for k, v in origFilter.items(): 

126 dbFilter.filter(k, v) 

127 dbFilter._customMultiQueryMerge = self.customMultiQueryMerge 

128 dbFilter._calculateInternalMultiQueryLimit = self.calculateInternalMultiQueryLimit 

129 

130 def calculateInternalMultiQueryLimit(self, query: db.Query, targetAmount: int) -> int: 

131 """ 

132 Calculates the number of entries to be fetched in each subquery. 

133 

134 :param db.Query query: The :class:viur.core.db.Query instance. 

135 :param int targetAmount: The number of entries to be returned from the db.Query. 

136 :return: The number of elements the db.Query should fetch on each subquery. 

137 :rtype: int 

138 """ 

139 return ceil(targetAmount * self.sliceSize) 

140 

141 def customMultiQueryMerge(self, dbFilter: db.Query, result: list[db.Entity], targetAmount: int) \ 

142 -> list[db.Entity]: 

143 """ 

144 Merges the results of multiple subqueries by randomly selecting 'targetAmount' elements 

145 from the combined 'result' list. 

146 

147 :param db.Query dbFilter: The db.Query instance calling this function. 

148 :param List[db.Entity] result: The list of results for each subquery that has been run. 

149 :param int targetAmount: The number of results to be returned from the db.Query. 

150 :return: A list of elements to be returned from the db.Query. 

151 :rtype: List[db.Entity] 

152 """ 

153 # res is a list of iterators at this point, chain them together 

154 res = chain(*[list(x) for x in result]) 

155 # Remove duplicates 

156 tmpDict = {} 

157 for item in res: 

158 tmpDict[str(item.key)] = item 

159 res = list(tmpDict.values()) 

160 # Slice the requested amount of results our 3times lager set 

161 res = sample(res, min(len(res), targetAmount)) 

162 shuffle(res) 

163 return res