Coverage for /home/runner/work/viur-core/viur-core/viur/src/viur/core/bones/randomslice.py: 18%
60 statements
« prev ^ index » next coverage.py v7.6.3, created at 2024-10-16 22:16 +0000
« prev ^ index » next coverage.py v7.6.3, created at 2024-10-16 22:16 +0000
1from random import random, sample, shuffle
2import typing as t
4from itertools import chain
5from math import ceil
7from viur.core import db
8from viur.core.bones.base import BaseBone
11class RandomSliceBone(BaseBone):
12 """
13 This class is particularly useful when you want to retrieve a random sample of elements from a
14 larger dataset without needing to fetch all the data from the database. By performing multiple
15 subqueries and processing the results, RandomSliceBone provides an efficient way to get a
16 randomized selection of elements from the database query.
17 Simulates the orderby=random from SQL.
18 If you sort by this bone, the query will return a random set of elements from that query.
20 :param visible: Indicates if the bone is visible, defaults to False.
21 :param readOnly: Indicates if the bone is read-only, defaults to True.
22 :param slices: The number of slices to use, defaults to 2.
23 :param sliceSize: The size of each slice, defaults to 0.5.
24 :param kwargs: Additional keyword arguments.
25 """
27 type = "randomslice"
29 def __init__(self, *, visible=False, readOnly=True, slices=2, sliceSize=0.5, **kwargs):
30 """
31 Initializes a new RandomSliceBone.
34 """
35 if visible or not readOnly:
36 raise NotImplemented("A RandomSliceBone must not visible and readonly!")
37 super().__init__(indexed=True, visible=False, readOnly=True, **kwargs)
38 self.slices = slices
39 self.sliceSize = sliceSize
41 def serialize(self, skel: 'SkeletonInstance', name: str, parentIndexed: bool) -> bool:
42 """
43 Serializes the bone into a format that can be written into the datastore. Instead of using
44 the existing value, it writes a randomly chosen float in the range [0, 1) as the value for
45 this bone.
47 :param SkeletonInstance skel: The SkeletonInstance this bone is part of.
48 :param str name: The property name this bone has in its Skeleton (not the description).
49 :param bool parentIndexed: Indicates if the parent bone is indexed.
50 :return: Returns True if the serialization is successful.
51 :rtype: bool
52 """
53 skel.dbEntity[name] = random()
54 skel.dbEntity.exclude_from_indexes.discard(name) # Random bones can never be not indexed
55 return True
57 def buildDBSort(
58 self,
59 name: str,
60 skel: 'viur.core.skeleton.SkeletonInstance',
61 dbFilter: db.Query,
62 rawFilter: dict
63 ) -> t.Optional[db.Query]:
64 """
65 Modifies the database query to return a random selection of elements by creating multiple
66 subqueries, each covering a slice of the data. This method doesn't just change the order of
67 the selected elements, but also changes which elements are returned.
69 :param str name: The property name this bone has in its Skeleton (not the description).
70 :param SkeletonInstance skel: The :class:viur.core.skeleton.Skeleton instance this bone is part of.
71 :param db.Query dbFilter: The current :class:viur.core.db.Query instance the filters should be applied to.
72 :param Dict rawFilter: The dictionary of filters the client wants to have applied.
73 :return: The modified :class:viur.core.db.Query instance.
74 :rtype: Optional[db.Query]
76 .. note:: The rawFilter is controlled by the client, so you must expect and safely handle
77 malformed data.
79 The method also contains an inner function, applyFilterHook, that applies the filter hook to
80 the given filter if set, or returns the unmodified filter. This allows the orderby=random
81 functionality to be used in relational queries as well.
82 """
84 def applyFilterHook(dbfilter, property, value):
85 """
86 Applies dbfilter._filterHook to the given filter if set,
87 else return the unmodified filter.
88 Allows orderby=random also be used in relational-queries.
89 """
90 if dbFilter._filterHook is None:
91 return property, value
92 try:
93 property, value = dbFilter._filterHook(dbFilter, property, value)
94 except:
95 # Either, the filterHook tried to do something special to dbFilter (which won't
96 # work as we are currently rewriting the core part of it) or it thinks that the query
97 # is unsatisfiable (fe. because of a missing ref/parent key in RelationalBone).
98 # In each case we kill the query here - making it to return no results
99 raise RuntimeError()
100 return property, value
102 if "orderby" in rawFilter and rawFilter["orderby"] == name:
103 # We select a random set of elements from that collection
104 assert not isinstance(dbFilter.queries,
105 list), "Orderby random is not possible on a query that already uses an IN-filter!"
106 origFilter: dict = dbFilter.queries.filters
107 origKind = dbFilter.getKind()
108 queries = []
109 for unused in range(0, self.slices): # Fetch 3 Slices from the set
110 rndVal = random() # Choose our Slice center
111 # Right Side
112 q = db.QueryDefinition(origKind, {}, [])
113 property, value = applyFilterHook(dbFilter, f"{name} <=", rndVal)
114 q.filters[property] = value
115 q.orders = [(name, db.SortOrder.Descending)]
116 queries.append(q)
117 # Left Side
118 q = db.QueryDefinition(origKind, {}, [])
119 property, value = applyFilterHook(dbFilter, f"{name} >", rndVal)
120 q.filters[property] = value
121 q.orders = [(name, db.SortOrder.Ascending)]
122 queries.append(q)
123 dbFilter.queries = queries
124 # Map the original filter back in
125 for k, v in origFilter.items():
126 dbFilter.filter(k, v)
127 dbFilter._customMultiQueryMerge = self.customMultiQueryMerge
128 dbFilter._calculateInternalMultiQueryLimit = self.calculateInternalMultiQueryLimit
130 def calculateInternalMultiQueryLimit(self, query: db.Query, targetAmount: int) -> int:
131 """
132 Calculates the number of entries to be fetched in each subquery.
134 :param db.Query query: The :class:viur.core.db.Query instance.
135 :param int targetAmount: The number of entries to be returned from the db.Query.
136 :return: The number of elements the db.Query should fetch on each subquery.
137 :rtype: int
138 """
139 return ceil(targetAmount * self.sliceSize)
141 def customMultiQueryMerge(self, dbFilter: db.Query, result: list[db.Entity], targetAmount: int) \
142 -> list[db.Entity]:
143 """
144 Merges the results of multiple subqueries by randomly selecting 'targetAmount' elements
145 from the combined 'result' list.
147 :param db.Query dbFilter: The db.Query instance calling this function.
148 :param List[db.Entity] result: The list of results for each subquery that has been run.
149 :param int targetAmount: The number of results to be returned from the db.Query.
150 :return: A list of elements to be returned from the db.Query.
151 :rtype: List[db.Entity]
152 """
153 # res is a list of iterators at this point, chain them together
154 res = chain(*[list(x) for x in result])
155 # Remove duplicates
156 tmpDict = {}
157 for item in res:
158 tmpDict[str(item.key)] = item
159 res = list(tmpDict.values())
160 # Slice the requested amount of results our 3times lager set
161 res = sample(res, min(len(res), targetAmount))
162 shuffle(res)
163 return res