Coverage for /home/runner/work/viur-core/viur-core/viur/src/viur/core/request.py: 7%
343 statements
« prev ^ index » next coverage.py v7.6.3, created at 2024-10-16 22:16 +0000
« prev ^ index » next coverage.py v7.6.3, created at 2024-10-16 22:16 +0000
1"""
2 This module implements the WSGI (Web Server Gateway Interface) layer for ViUR. This is the main entry
3 point for incomming http requests. The main class is the :class:BrowserHandler. Each request will get it's
4 own instance of that class which then holds the reference to the request and response object.
5 Additionally, this module defines the RequestValidator interface which provides a very early hook into the
6 request processing (useful for global ratelimiting, DDoS prevention or access control).
7"""
8import fnmatch
9import json
10import logging
11import os
12import time
13import traceback
14import typing as t
15import unicodedata
16import webob
17from abc import ABC, abstractmethod
18from urllib import parse
19from urllib.parse import unquote, urljoin, urlparse
20from viur.core import current, db, errors, session, utils
21from viur.core.config import conf
22from viur.core.logging import client as loggingClient, requestLogger, requestLoggingRessource
23from viur.core.module import Method
24from viur.core.securityheaders import extendCsp
25from viur.core.tasks import _appengineServiceIPs
27TEMPLATE_STYLE_KEY = "style"
30class RequestValidator(ABC):
31 """
32 RequestValidators can be used to validate a request very early on. If the validate method returns a tuple,
33 the request is aborted. Can be used to block requests from bots.
35 To register or remove a validator, access it in main.py through
36 :attr: viur.core.request.Router.requestValidators
37 """
38 # Internal name to trace which validator aborted the request
39 name = "RequestValidator"
41 @staticmethod
42 @abstractmethod
43 def validate(request: 'BrowseHandler') -> t.Optional[tuple[int, str, str]]:
44 """
45 The function that checks the current request. If the request is valid, simply return None.
46 If the request should be blocked, it must return a tuple of
47 - The HTTP status code (as int)
48 - The Description of that status code (eg "Forbidden")
49 - The Response Body (can be a simple string or an HTML-Page)
50 :param request: The Request instance to check
51 :return: None on success, an Error-Tuple otherwise
52 """
53 raise NotImplementedError()
56class FetchMetaDataValidator(RequestValidator):
57 """
58 This validator examines the headers "Sec-Fetch-Site", "sec-fetch-mode" and "sec-fetch-dest" as
59 recommended by https://web.dev/fetch-metadata/
60 """
61 name = "FetchMetaDataValidator"
63 @staticmethod
64 def validate(request: 'BrowseHandler') -> t.Optional[tuple[int, str, str]]:
65 headers = request.request.headers
66 if not headers.get("sec-fetch-site"): # These headers are not send by all browsers
67 return None
68 if headers.get('sec-fetch-site') in {"same-origin", "none"}: # A Request from our site
69 return None
70 if os.environ['GAE_ENV'] == "localdev" and headers.get('sec-fetch-site') == "same-site":
71 # We are accepting a request with same-site only in local dev mode
72 return None
73 if headers.get('sec-fetch-mode') == 'navigate' and not request.isPostRequest \
74 and headers.get('sec-fetch-dest') not in {'object', 'embed'}: # Incoming navigation GET request
75 return None
76 return 403, "Forbidden", "Request rejected due to fetch metadata"
79class Router:
80 """
81 This class accepts the requests, collect its parameters and routes the request
82 to its destination function.
83 The basic control flow is
84 - Setting up internal variables
85 - Running the Request validators
86 - Emitting the headers (especially the security related ones)
87 - Run the TLS check (ensure it's a secure connection or check if the URL is whitelisted)
88 - Load or initialize a new session
89 - Set up i18n (choosing the language etc)
90 - Run the request preprocessor (if any)
91 - Normalize & sanity check the parameters
92 - Resolve the exposed function and call it
93 - Save the session / tear down the request
94 - Return the response generated
97 :warning: Don't instantiate! Don't subclass! DON'T TOUCH! ;)
98 """
100 # List of requestValidators used to preflight-check an request before it's being dispatched within ViUR
101 requestValidators = [FetchMetaDataValidator]
103 def __init__(self, environ: dict):
104 super().__init__()
105 self.startTime = time.time()
107 self.request = webob.Request(environ)
108 self.response = webob.Response()
110 self.maxLogLevel = logging.DEBUG
111 self._traceID = \
112 self.request.headers.get("X-Cloud-Trace-Context", "").split("/")[0] or utils.string.random()
113 self.is_deferred = False
114 self.path = ""
115 self.path_list = ()
117 self.skey_checked = False # indicates whether @skey-decorator-check has already performed within a request
118 self.internalRequest = False
119 self.disableCache = False # Shall this request bypass the caches?
120 self.pendingTasks = []
121 self.args = ()
122 self.kwargs = {}
123 self.context = {}
124 self.template_style: str | None = None
126 # Check if it's a HTTP-Method we support
127 self.method = self.request.method.lower()
128 self.isPostRequest = self.method == "post"
129 self.isSSLConnection = self.request.host_url.lower().startswith("https://") # We have an encrypted channel
131 db.currentDbAccessLog.set(set())
133 # Set context variables
134 current.language.set(conf.i18n.default_language)
135 current.request.set(self)
136 current.session.set(session.Session())
137 current.request_data.set({})
139 # Process actual request
140 self._process()
142 # Unset context variables
143 current.language.set(None)
144 current.request_data.set(None)
145 current.session.set(None)
146 current.request.set(None)
147 current.user.set(None)
149 @property
150 def isDevServer(self) -> bool:
151 import warnings
152 msg = "Use of `isDevServer` is deprecated; Use `conf.instance.is_dev_server` instead!"
153 warnings.warn(msg, DeprecationWarning, stacklevel=2)
154 logging.warning(msg)
155 return conf.instance.is_dev_server
157 def _select_language(self, path: str) -> str:
158 """
159 Tries to select the best language for the current request. Depending on the value of
160 conf.i18n.language_method, we'll either try to load it from the session, determine it by the domain
161 or extract it from the URL.
162 """
163 sessionReference = current.session.get()
164 if not conf.i18n.available_languages:
165 # This project doesn't use the multi-language feature, nothing to do here
166 return path
167 if conf.i18n.language_method == "session":
168 # We store the language inside the session, try to load it from there
169 if "lang" not in sessionReference:
170 if "X-Appengine-Country" in self.request.headers:
171 lng = self.request.headers["X-Appengine-Country"].lower()
172 if lng in conf.i18n.available_languages + list(conf.i18n.language_alias_map.keys()):
173 sessionReference["lang"] = lng
174 current.language.set(lng)
175 else:
176 sessionReference["lang"] = conf.i18n.default_language
177 else:
178 current.language.set(sessionReference["lang"])
179 elif conf.i18n.language_method == "domain":
180 host = self.request.host_url.lower()
181 host = host[host.find("://") + 3:].strip(" /") # strip http(s)://
182 if host.startswith("www."):
183 host = host[4:]
184 if host in conf.i18n.domain_language_mapping:
185 current.language.set(conf.i18n.domain_language_mapping[host])
186 else: # We have no language configured for this domain, try to read it from session
187 if "lang" in sessionReference:
188 current.language.set(sessionReference["lang"])
189 elif conf.i18n.language_method == "url":
190 tmppath = urlparse(path).path
191 tmppath = [unquote(x) for x in tmppath.lower().strip("/").split("/")]
192 if (
193 len(tmppath) > 0
194 and tmppath[0] in conf.i18n.available_languages + list(conf.i18n.language_alias_map.keys())
195 ):
196 current.language.set(tmppath[0])
197 return path[len(tmppath[0]) + 1:] # Return the path stripped by its language segment
198 else: # This URL doesnt contain an language prefix, try to read it from session
199 if "lang" in sessionReference:
200 current.language.set(sessionReference["lang"])
201 elif "X-Appengine-Country" in self.request.headers.keys():
202 lng = self.request.headers["X-Appengine-Country"].lower()
203 if lng in conf.i18n.available_languages or lng in conf.i18n.language_alias_map:
204 current.language.set(lng)
205 return path
207 def _process(self):
208 if self.method not in ("get", "post", "head"):
209 logging.error(f"{self.method=} not supported")
210 return
212 if self.request.headers.get("X-AppEngine-TaskName", None) is not None: # Check if we run in the appengine
213 if self.request.environ.get("HTTP_X_APPENGINE_USER_IP") in _appengineServiceIPs:
214 self.is_deferred = True
215 elif os.getenv("TASKS_EMULATOR") is not None:
216 self.is_deferred = True
218 current.language.set(conf.i18n.default_language)
219 # Check if we should process or abort the request
220 for validator, reqValidatorResult in [(x, x.validate(self)) for x in self.requestValidators]:
221 if reqValidatorResult is not None:
222 logging.warning(f"Request rejected by validator {validator.name}")
223 statusCode, statusStr, statusDescr = reqValidatorResult
224 self.response.status = f"{statusCode} {statusStr}"
225 self.response.write(statusDescr)
226 return
228 path = self.request.path
230 # Add CSP headers early (if any)
231 if conf.security.content_security_policy and conf.security.content_security_policy["_headerCache"]:
232 for k, v in conf.security.content_security_policy["_headerCache"].items():
233 self.response.headers[k] = v
234 if self.isSSLConnection: # Check for HTST and PKP headers only if we have a secure channel.
235 if conf.security.strict_transport_security:
236 self.response.headers["Strict-Transport-Security"] = conf.security.strict_transport_security
237 # Check for X-Security-Headers we shall emit
238 if conf.security.x_content_type_options:
239 self.response.headers["X-Content-Type-Options"] = "nosniff"
240 if conf.security.x_xss_protection is not None:
241 if conf.security.x_xss_protection:
242 self.response.headers["X-XSS-Protection"] = "1; mode=block"
243 elif conf.security.x_xss_protection is False:
244 self.response.headers["X-XSS-Protection"] = "0"
245 if conf.security.x_frame_options is not None and isinstance(conf.security.x_frame_options, tuple):
246 mode, uri = conf.security.x_frame_options
247 if mode in ["deny", "sameorigin"]:
248 self.response.headers["X-Frame-Options"] = mode
249 elif mode == "allow-from":
250 self.response.headers["X-Frame-Options"] = f"allow-from {uri}"
251 if conf.security.x_permitted_cross_domain_policies is not None:
252 self.response.headers["X-Permitted-Cross-Domain-Policies"] = conf.security.x_permitted_cross_domain_policies
253 if conf.security.referrer_policy:
254 self.response.headers["Referrer-Policy"] = conf.security.referrer_policy
255 if conf.security.permissions_policy.get("_headerCache"):
256 self.response.headers["Permissions-Policy"] = conf.security.permissions_policy["_headerCache"]
257 if conf.security.enable_coep:
258 self.response.headers["Cross-Origin-Embedder-Policy"] = "require-corp"
259 if conf.security.enable_coop:
260 self.response.headers["Cross-Origin-Opener-Policy"] = conf.security.enable_coop
261 if conf.security.enable_corp:
262 self.response.headers["Cross-Origin-Resource-Policy"] = conf.security.enable_corp
264 # Ensure that TLS is used if required
265 if conf.security.force_ssl and not self.isSSLConnection and not conf.instance.is_dev_server:
266 isWhitelisted = False
267 reqPath = self.request.path
268 for testUrl in conf.security.no_ssl_check_urls:
269 if testUrl.endswith("*"):
270 if reqPath.startswith(testUrl[:-1]):
271 isWhitelisted = True
272 break
273 else:
274 if testUrl == reqPath:
275 isWhitelisted = True
276 break
277 if not isWhitelisted: # Some URLs need to be whitelisted (as f.e. the Tasks-Queue doesn't call using https)
278 # Redirect the user to the startpage (using ssl this time)
279 host = self.request.host_url.lower()
280 host = host[host.find("://") + 3:].strip(" /") # strip http(s)://
281 self.response.status = "302 Found"
282 self.response.headers['Location'] = f"https://{host}/"
283 return
284 if path.startswith("/_ah/warmup"):
285 self.response.write("okay")
286 return
288 try:
289 current.session.get().load(self)
291 # Load current user into context variable if user module is there.
292 if user_mod := getattr(conf.main_app.vi, "user", None):
293 current.user.set(user_mod.getCurrentUser())
295 path = self._select_language(path)[1:]
297 # Check for closed system
298 if conf.security.closed_system:
299 if not current.user.get():
300 if not any(fnmatch.fnmatch(path, pat) for pat in conf.security.closed_system_allowed_paths):
301 raise errors.Unauthorized()
303 if conf.request_preprocessor:
304 path = conf.request_preprocessor(path)
306 self._route(path)
308 except errors.Redirect as e:
309 if conf.debug.trace_exceptions:
310 logging.warning("""conf.debug.trace_exceptions is set, won't handle this exception""")
311 raise
312 self.response.status = f"{e.status} {e.name}"
313 url = e.url
314 if url.startswith(('.', '/')):
315 url = str(urljoin(self.request.url, url))
316 self.response.headers['Location'] = url
318 except Exception as e:
319 if conf.debug.trace_exceptions:
320 logging.warning("""conf.debug.trace_exceptions is set, won't handle this exception""")
321 raise
322 self.response.body = b""
323 if isinstance(e, errors.HTTPException):
324 logging.info(f"[{e.status}] {e.name}: {e.descr}", exc_info=conf.debug.trace)
325 self.response.status = f"{e.status} {e.name}"
326 # Set machine-readable x-viur-error response header in case there is an exception description.
327 if e.descr:
328 self.response.headers["x-viur-error"] = e.descr.replace("\n", "")
329 else:
330 self.response.status = 500
331 logging.error("ViUR has caught an unhandled exception!")
332 logging.exception(e)
334 res = None
335 if conf.error_handler:
336 try:
337 res = conf.error_handler(e)
338 except Exception as newE:
339 logging.error("viur.error_handler failed!")
340 logging.exception(newE)
341 res = None
342 if not res:
343 descr = "The server encountered an unexpected error and is unable to process your request."
345 if isinstance(e, errors.HTTPException):
346 error_info = {
347 "status": e.status,
348 "reason": e.name,
349 "title": str(translate(e.name)),
350 "descr": e.descr,
351 }
352 else:
353 error_info = {
354 "status": 500,
355 "reason": "Internal Server Error",
356 "title": str(translate("Internal Server Error")),
357 "descr": descr
358 }
360 if conf.instance.is_dev_server:
361 error_info["traceback"] = traceback.format_exc()
363 error_info["logo"] = conf.error_logo
365 if (len(self.path_list) > 0 and self.path_list[0] in ("vi", "json")) or \
366 current.request.get().response.headers["Content-Type"] == "application/json":
367 current.request.get().response.headers["Content-Type"] = "application/json"
368 res = json.dumps(error_info)
369 else: # We render the error in html
370 # Try to get the template from html/error/
371 if filename := conf.main_app.render.getTemplateFileName((f"{error_info['status']}", "error"),
372 raise_exception=False):
373 template = conf.main_app.render.getEnv().get_template(filename)
374 nonce = utils.string.random(16)
375 res = template.render(error_info, nonce=nonce)
376 extendCsp({"style-src": [f"nonce-{nonce}"]})
377 else:
378 res = (f'<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8">'
379 f'<title>{error_info["status"]} - {error_info["reason"]}</title>'
380 f'</head><body><h1>{error_info["status"]} - {error_info["reason"]}</h1>')
382 self.response.write(res.encode("UTF-8"))
384 finally:
385 self.saveSession()
386 if conf.instance.is_dev_server and conf.debug.dev_server_cloud_logging:
387 # Emit the outer log only on dev_appserver (we'll use the existing request log when live)
388 SEVERITY = "DEBUG"
389 if self.maxLogLevel >= 50:
390 SEVERITY = "CRITICAL"
391 elif self.maxLogLevel >= 40:
392 SEVERITY = "ERROR"
393 elif self.maxLogLevel >= 30:
394 SEVERITY = "WARNING"
395 elif self.maxLogLevel >= 20:
396 SEVERITY = "INFO"
398 TRACE = "projects/{}/traces/{}".format(loggingClient.project, self._traceID)
400 REQUEST = {
401 'requestMethod': self.request.method,
402 'requestUrl': self.request.url,
403 'status': self.response.status_code,
404 'userAgent': self.request.headers.get('USER-AGENT'),
405 'responseSize': self.response.content_length,
406 'latency': "%0.3fs" % (time.time() - self.startTime),
407 'remoteIp': self.request.environ.get("HTTP_X_APPENGINE_USER_IP")
408 }
409 requestLogger.log_text(
410 "",
411 client=loggingClient,
412 severity=SEVERITY,
413 http_request=REQUEST,
414 trace=TRACE,
415 resource=requestLoggingRessource,
416 operation={
417 "first": True,
418 "last": True,
419 "id": self._traceID
420 }
421 )
423 if conf.instance.is_dev_server:
424 self.is_deferred = True
426 while self.pendingTasks:
427 task = self.pendingTasks.pop()
428 logging.debug(f"Deferred task emulation, executing {task=}")
429 try:
430 task()
431 except Exception: # noqa
432 logging.exception(f"Deferred Task emulation {task} failed")
434 def _route(self, path: str) -> None:
435 """
436 Does the actual work of sanitizing the parameter, determine which exposed-function to call
437 (and with which parameters)
438 """
440 # Parse the URL
441 if path := parse.urlparse(path).path:
442 self.path = path
443 self.path_list = tuple(unicodedata.normalize("NFC", parse.unquote(part))
444 for part in path.strip("/").split("/"))
446 # Prevent Hash-collision attacks
447 if len(self.request.params) > conf.max_post_params_count:
448 raise errors.BadRequest(
449 f"Too many arguments supplied, exceeding maximum"
450 f" of {conf.max_post_params_count} allowed arguments per request"
451 )
453 param_filter = conf.param_filter_function
454 if param_filter and not callable(param_filter):
455 raise ValueError(f"""{param_filter=} is not callable""")
457 for key, value in self.request.params.items():
458 try:
459 key = unicodedata.normalize("NFC", key)
460 value = unicodedata.normalize("NFC", value)
461 except UnicodeError:
462 # We received invalid unicode data (usually happens when
463 # someone tries to exploit unicode normalisation bugs)
464 raise errors.BadRequest()
466 if param_filter and param_filter(key, value):
467 continue
469 if key == TEMPLATE_STYLE_KEY:
470 self.template_style = value
471 continue
473 if key in self.kwargs:
474 if isinstance(self.kwargs[key], list):
475 self.kwargs[key].append(value)
476 else: # Convert that key to a list
477 self.kwargs[key] = [self.kwargs[key], value]
478 else:
479 self.kwargs[key] = value
481 if "self" in self.kwargs or "return" in self.kwargs: # self or return is reserved for bound methods
482 raise errors.BadRequest()
484 caller = conf.main_resolver
485 idx = 0 # Count how may items from *args we'd have consumed (so the rest can go into *args of the called func
486 path_found = True
488 for part in self.path_list:
489 # TODO: Remove canAccess guards... solve differently.
490 if "canAccess" in caller and not caller["canAccess"]():
491 # We have a canAccess function guarding that object,
492 # and it returns False...
493 raise errors.Unauthorized()
495 idx += 1
497 if part not in caller:
498 part = "index"
500 if caller := caller.get(part):
501 if isinstance(caller, Method):
502 if part == "index":
503 idx -= 1
505 self.args = tuple(self.path_list[idx:])
506 break
508 elif part == "index":
509 path_found = False
510 break
512 else:
513 path_found = False
514 break
516 if not path_found:
517 raise errors.NotFound(
518 f"""The path {utils.string.escape("/".join(self.path_list[:idx]))} could not be found""")
520 if not isinstance(caller, Method):
521 # try to find "index" function
522 if (index := caller.get("index")) and isinstance(index, Method):
523 caller = index
524 else:
525 raise errors.MethodNotAllowed()
527 # Check for internal exposed
528 if caller.exposed is False and not self.internalRequest:
529 raise errors.NotFound()
531 # Check for @force_ssl flag
532 if not self.internalRequest \
533 and caller.ssl \
534 and not self.request.host_url.lower().startswith("https://") \
535 and not conf.instance.is_dev_server:
536 raise errors.PreconditionFailed("You must use SSL to access this resource!")
538 # Check for @force_post flag
539 if not self.isPostRequest and caller.methods == ("POST", ):
540 raise errors.MethodNotAllowed("You must use POST to access this resource!")
542 # Check if this request should bypass the caches
543 if self.request.headers.get("X-Viur-Disable-Cache"):
544 # No cache requested, check if the current user is allowed to do so
545 if (user := current.user.get()) and "root" in user["access"]:
546 logging.debug("Caching disabled by X-Viur-Disable-Cache header")
547 self.disableCache = True
549 # Destill context as self.context, if available
550 if context := {k: v for k, v in self.kwargs.items() if k.startswith("@")}:
551 # Remove context parameters from kwargs
552 kwargs = {k: v for k, v in self.kwargs.items() if k not in context}
553 # Remove leading "@" from context parameters
554 self.context |= {k[1:]: v for k, v in context.items() if len(k) > 1}
555 else:
556 kwargs = self.kwargs
558 if ((self.internalRequest and conf.debug.trace_internal_call_routing)
559 or conf.debug.trace_external_call_routing):
560 logging.debug(
561 f"Calling {caller._func!r} with args={self.args!r}, {kwargs=} within context={self.context!r}"
562 )
564 # Now call the routed method!
565 res = caller(*self.args, **kwargs)
567 if not isinstance(res, bytes): # Convert the result to bytes if it is not already!
568 res = str(res).encode("UTF-8")
569 self.response.write(res)
571 def saveSession(self) -> None:
572 current.session.get().save(self)
575from .i18n import translate # noqa: E402