Coverage for /home/runner/work/viur-core/viur-core/viur/src/viur/core/request.py: 9%

343 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-09-03 13:41 +0000

1""" 

2 This module implements the WSGI (Web Server Gateway Interface) layer for ViUR. This is the main entry 

3 point for incomming http requests. The main class is the :class:BrowserHandler. Each request will get it's 

4 own instance of that class which then holds the reference to the request and response object. 

5 Additionally, this module defines the RequestValidator interface which provides a very early hook into the 

6 request processing (useful for global ratelimiting, DDoS prevention or access control). 

7""" 

8import fnmatch 

9import json 

10import logging 

11import os 

12import time 

13import traceback 

14import typing as t 

15import unicodedata 

16import webob 

17from abc import ABC, abstractmethod 

18from urllib import parse 

19from urllib.parse import unquote, urljoin, urlparse 

20from viur.core import current, db, errors, session, utils 

21from viur.core.config import conf 

22from viur.core.logging import client as loggingClient, requestLogger, requestLoggingRessource 

23from viur.core.module import Method 

24from viur.core.securityheaders import extendCsp 

25from viur.core.tasks import _appengineServiceIPs 

26 

27TEMPLATE_STYLE_KEY = "style" 

28 

29 

30class RequestValidator(ABC): 

31 """ 

32 RequestValidators can be used to validate a request very early on. If the validate method returns a tuple, 

33 the request is aborted. Can be used to block requests from bots. 

34 

35 To register or remove a validator, access it in main.py through 

36 :attr: viur.core.request.Router.requestValidators 

37 """ 

38 # Internal name to trace which validator aborted the request 

39 name = "RequestValidator" 

40 

41 @staticmethod 

42 @abstractmethod 

43 def validate(request: 'BrowseHandler') -> t.Optional[tuple[int, str, str]]: 

44 """ 

45 The function that checks the current request. If the request is valid, simply return None. 

46 If the request should be blocked, it must return a tuple of 

47 - The HTTP status code (as int) 

48 - The Description of that status code (eg "Forbidden") 

49 - The Response Body (can be a simple string or an HTML-Page) 

50 :param request: The Request instance to check 

51 :return: None on success, an Error-Tuple otherwise 

52 """ 

53 raise NotImplementedError() 

54 

55 

56class FetchMetaDataValidator(RequestValidator): 

57 """ 

58 This validator examines the headers "Sec-Fetch-Site", "sec-fetch-mode" and "sec-fetch-dest" as 

59 recommended by https://web.dev/fetch-metadata/ 

60 """ 

61 name = "FetchMetaDataValidator" 

62 

63 @staticmethod 

64 def validate(request: 'BrowseHandler') -> t.Optional[tuple[int, str, str]]: 

65 headers = request.request.headers 

66 if not headers.get("sec-fetch-site"): # These headers are not send by all browsers 

67 return None 

68 if headers.get('sec-fetch-site') in {"same-origin", "none"}: # A Request from our site 

69 return None 

70 if os.environ['GAE_ENV'] == "localdev" and headers.get('sec-fetch-site') == "same-site": 

71 # We are accepting a request with same-site only in local dev mode 

72 return None 

73 if headers.get('sec-fetch-mode') == 'navigate' and not request.isPostRequest \ 

74 and headers.get('sec-fetch-dest') not in {'object', 'embed'}: # Incoming navigation GET request 

75 return None 

76 return 403, "Forbidden", "Request rejected due to fetch metadata" 

77 

78 

79class Router: 

80 """ 

81 This class accepts the requests, collect its parameters and routes the request 

82 to its destination function. 

83 The basic control flow is 

84 - Setting up internal variables 

85 - Running the Request validators 

86 - Emitting the headers (especially the security related ones) 

87 - Run the TLS check (ensure it's a secure connection or check if the URL is whitelisted) 

88 - Load or initialize a new session 

89 - Set up i18n (choosing the language etc) 

90 - Run the request preprocessor (if any) 

91 - Normalize & sanity check the parameters 

92 - Resolve the exposed function and call it 

93 - Save the session / tear down the request 

94 - Return the response generated 

95 

96 

97 :warning: Don't instantiate! Don't subclass! DON'T TOUCH! ;) 

98 """ 

99 

100 # List of requestValidators used to preflight-check an request before it's being dispatched within ViUR 

101 requestValidators = [FetchMetaDataValidator] 

102 

103 def __init__(self, environ: dict): 

104 super().__init__() 

105 self.startTime = time.time() 

106 

107 self.request = webob.Request(environ) 

108 self.response = webob.Response() 

109 

110 self.maxLogLevel = logging.DEBUG 

111 self._traceID = \ 

112 self.request.headers.get("X-Cloud-Trace-Context", "").split("/")[0] or utils.string.random() 

113 self.is_deferred = False 

114 self.path = "" 

115 self.path_list = () 

116 

117 self.skey_checked = False # indicates whether @skey-decorator-check has already performed within a request 

118 self.internalRequest = False 

119 self.disableCache = False # Shall this request bypass the caches? 

120 self.pendingTasks = [] 

121 self.args = () 

122 self.kwargs = {} 

123 self.context = {} 

124 self.template_style: str | None = None 

125 

126 # Check if it's a HTTP-Method we support 

127 self.method = self.request.method.lower() 

128 self.isPostRequest = self.method == "post" 

129 self.isSSLConnection = self.request.host_url.lower().startswith("https://") # We have an encrypted channel 

130 

131 db.currentDbAccessLog.set(set()) 

132 

133 # Set context variables 

134 current.language.set(conf.i18n.default_language) 

135 current.request.set(self) 

136 current.session.set(session.Session()) 

137 current.request_data.set({}) 

138 

139 # Process actual request 

140 self._process() 

141 

142 # Unset context variables 

143 current.language.set(None) 

144 current.request_data.set(None) 

145 current.session.set(None) 

146 current.request.set(None) 

147 current.user.set(None) 

148 

149 @property 

150 def isDevServer(self) -> bool: 

151 import warnings 

152 msg = "Use of `isDevServer` is deprecated; Use `conf.instance.is_dev_server` instead!" 

153 warnings.warn(msg, DeprecationWarning, stacklevel=2) 

154 logging.warning(msg) 

155 return conf.instance.is_dev_server 

156 

157 def _select_language(self, path: str) -> str: 

158 """ 

159 Tries to select the best language for the current request. Depending on the value of 

160 conf.i18n.language_method, we'll either try to load it from the session, determine it by the domain 

161 or extract it from the URL. 

162 """ 

163 sessionReference = current.session.get() 

164 if not conf.i18n.available_languages: 

165 # This project doesn't use the multi-language feature, nothing to do here 

166 return path 

167 if conf.i18n.language_method == "session": 

168 # We store the language inside the session, try to load it from there 

169 if "lang" not in sessionReference: 

170 if "X-Appengine-Country" in self.request.headers: 

171 lng = self.request.headers["X-Appengine-Country"].lower() 

172 if lng in conf.i18n.available_languages + list(conf.i18n.language_alias_map.keys()): 

173 sessionReference["lang"] = lng 

174 current.language.set(lng) 

175 else: 

176 sessionReference["lang"] = conf.i18n.default_language 

177 else: 

178 current.language.set(sessionReference["lang"]) 

179 elif conf.i18n.language_method == "domain": 

180 host = self.request.host_url.lower() 

181 host = host[host.find("://") + 3:].strip(" /") # strip http(s):// 

182 if host.startswith("www."): 

183 host = host[4:] 

184 if host in conf.i18n.domain_language_mapping: 

185 current.language.set(conf.i18n.domain_language_mapping[host]) 

186 else: # We have no language configured for this domain, try to read it from session 

187 if "lang" in sessionReference: 

188 current.language.set(sessionReference["lang"]) 

189 elif conf.i18n.language_method == "url": 

190 tmppath = urlparse(path).path 

191 tmppath = [unquote(x) for x in tmppath.lower().strip("/").split("/")] 

192 if ( 

193 len(tmppath) > 0 

194 and tmppath[0] in conf.i18n.available_languages + list(conf.i18n.language_alias_map.keys()) 

195 ): 

196 current.language.set(tmppath[0]) 

197 return path[len(tmppath[0]) + 1:] # Return the path stripped by its language segment 

198 else: # This URL doesnt contain an language prefix, try to read it from session 

199 if "lang" in sessionReference: 

200 current.language.set(sessionReference["lang"]) 

201 elif "X-Appengine-Country" in self.request.headers.keys(): 

202 lng = self.request.headers["X-Appengine-Country"].lower() 

203 if lng in conf.i18n.available_languages or lng in conf.i18n.language_alias_map: 

204 current.language.set(lng) 

205 return path 

206 

207 def _process(self): 

208 if self.method not in ("get", "post", "head"): 

209 logging.error(f"{self.method=} not supported") 

210 return 

211 

212 if self.request.headers.get("X-AppEngine-TaskName", None) is not None: # Check if we run in the appengine 

213 if self.request.environ.get("HTTP_X_APPENGINE_USER_IP") in _appengineServiceIPs: 

214 self.is_deferred = True 

215 elif os.getenv("TASKS_EMULATOR") is not None: 

216 self.is_deferred = True 

217 

218 current.language.set(conf.i18n.default_language) 

219 # Check if we should process or abort the request 

220 for validator, reqValidatorResult in [(x, x.validate(self)) for x in self.requestValidators]: 

221 if reqValidatorResult is not None: 

222 logging.warning(f"Request rejected by validator {validator.name}") 

223 statusCode, statusStr, statusDescr = reqValidatorResult 

224 self.response.status = f"{statusCode} {statusStr}" 

225 self.response.write(statusDescr) 

226 return 

227 

228 path = self.request.path 

229 

230 # Add CSP headers early (if any) 

231 if conf.security.content_security_policy and conf.security.content_security_policy["_headerCache"]: 

232 for k, v in conf.security.content_security_policy["_headerCache"].items(): 

233 self.response.headers[k] = v 

234 if self.isSSLConnection: # Check for HTST and PKP headers only if we have a secure channel. 

235 if conf.security.strict_transport_security: 

236 self.response.headers["Strict-Transport-Security"] = conf.security.strict_transport_security 

237 # Check for X-Security-Headers we shall emit 

238 if conf.security.x_content_type_options: 

239 self.response.headers["X-Content-Type-Options"] = "nosniff" 

240 if conf.security.x_xss_protection is not None: 

241 if conf.security.x_xss_protection: 

242 self.response.headers["X-XSS-Protection"] = "1; mode=block" 

243 elif conf.security.x_xss_protection is False: 

244 self.response.headers["X-XSS-Protection"] = "0" 

245 if conf.security.x_frame_options is not None and isinstance(conf.security.x_frame_options, tuple): 

246 mode, uri = conf.security.x_frame_options 

247 if mode in ["deny", "sameorigin"]: 

248 self.response.headers["X-Frame-Options"] = mode 

249 elif mode == "allow-from": 

250 self.response.headers["X-Frame-Options"] = f"allow-from {uri}" 

251 if conf.security.x_permitted_cross_domain_policies is not None: 

252 self.response.headers["X-Permitted-Cross-Domain-Policies"] = conf.security.x_permitted_cross_domain_policies 

253 if conf.security.referrer_policy: 

254 self.response.headers["Referrer-Policy"] = conf.security.referrer_policy 

255 if conf.security.permissions_policy.get("_headerCache"): 

256 self.response.headers["Permissions-Policy"] = conf.security.permissions_policy["_headerCache"] 

257 if conf.security.enable_coep: 

258 self.response.headers["Cross-Origin-Embedder-Policy"] = "require-corp" 

259 if conf.security.enable_coop: 

260 self.response.headers["Cross-Origin-Opener-Policy"] = conf.security.enable_coop 

261 if conf.security.enable_corp: 

262 self.response.headers["Cross-Origin-Resource-Policy"] = conf.security.enable_corp 

263 

264 # Ensure that TLS is used if required 

265 if conf.security.force_ssl and not self.isSSLConnection and not conf.instance.is_dev_server: 

266 isWhitelisted = False 

267 reqPath = self.request.path 

268 for testUrl in conf.security.no_ssl_check_urls: 

269 if testUrl.endswith("*"): 

270 if reqPath.startswith(testUrl[:-1]): 

271 isWhitelisted = True 

272 break 

273 else: 

274 if testUrl == reqPath: 

275 isWhitelisted = True 

276 break 

277 if not isWhitelisted: # Some URLs need to be whitelisted (as f.e. the Tasks-Queue doesn't call using https) 

278 # Redirect the user to the startpage (using ssl this time) 

279 host = self.request.host_url.lower() 

280 host = host[host.find("://") + 3:].strip(" /") # strip http(s):// 

281 self.response.status = "302 Found" 

282 self.response.headers['Location'] = f"https://{host}/" 

283 return 

284 if path.startswith("/_ah/warmup"): 

285 self.response.write("okay") 

286 return 

287 

288 try: 

289 current.session.get().load(self) 

290 

291 # Load current user into context variable if user module is there. 

292 if user_mod := getattr(conf.main_app.vi, "user", None): 

293 current.user.set(user_mod.getCurrentUser()) 

294 

295 path = self._select_language(path)[1:] 

296 

297 # Check for closed system 

298 if conf.security.closed_system: 

299 if not current.user.get(): 

300 if not any(fnmatch.fnmatch(path, pat) for pat in conf.security.closed_system_allowed_paths): 

301 raise errors.Unauthorized() 

302 

303 if conf.request_preprocessor: 

304 path = conf.request_preprocessor(path) 

305 

306 self._route(path) 

307 

308 except errors.Redirect as e: 

309 if conf.debug.trace_exceptions: 

310 logging.warning("""conf.debug.trace_exceptions is set, won't handle this exception""") 

311 raise 

312 self.response.status = f"{e.status} {e.name}" 

313 url = e.url 

314 if url.startswith(('.', '/')): 

315 url = str(urljoin(self.request.url, url)) 

316 self.response.headers['Location'] = url 

317 

318 except Exception as e: 

319 if conf.debug.trace_exceptions: 

320 logging.warning("""conf.debug.trace_exceptions is set, won't handle this exception""") 

321 raise 

322 self.response.body = b"" 

323 if isinstance(e, errors.HTTPException): 

324 logging.info(f"[{e.status}] {e.name}: {e.descr}", exc_info=conf.debug.trace) 

325 self.response.status = f"{e.status} {e.name}" 

326 # Set machine-readable x-viur-error response header in case there is an exception description. 

327 if e.descr: 

328 self.response.headers["x-viur-error"] = e.descr.replace("\n", "") 

329 else: 

330 self.response.status = 500 

331 logging.error("ViUR has caught an unhandled exception!") 

332 logging.exception(e) 

333 

334 res = None 

335 if conf.error_handler: 

336 try: 

337 res = conf.error_handler(e) 

338 except Exception as newE: 

339 logging.error("viur.error_handler failed!") 

340 logging.exception(newE) 

341 res = None 

342 if not res: 

343 descr = "The server encountered an unexpected error and is unable to process your request." 

344 

345 if isinstance(e, errors.HTTPException): 

346 error_info = { 

347 "status": e.status, 

348 "reason": e.name, 

349 "title": str(translate(e.name)), 

350 "descr": e.descr, 

351 } 

352 else: 

353 error_info = { 

354 "status": 500, 

355 "reason": "Internal Server Error", 

356 "title": str(translate("Internal Server Error")), 

357 "descr": descr 

358 } 

359 

360 if conf.instance.is_dev_server: 

361 error_info["traceback"] = traceback.format_exc() 

362 

363 error_info["logo"] = conf.error_logo 

364 

365 if (len(self.path_list) > 0 and self.path_list[0] in ("vi", "json")) or \ 

366 current.request.get().response.headers["Content-Type"] == "application/json": 

367 current.request.get().response.headers["Content-Type"] = "application/json" 

368 res = json.dumps(error_info) 

369 else: # We render the error in html 

370 # Try to get the template from html/error/ 

371 if filename := conf.main_app.render.getTemplateFileName((f"{error_info['status']}", "error"), 

372 raise_exception=False): 

373 template = conf.main_app.render.getEnv().get_template(filename) 

374 nonce = utils.string.random(16) 

375 res = template.render(error_info, nonce=nonce) 

376 extendCsp({"style-src": [f"nonce-{nonce}"]}) 

377 else: 

378 res = (f'<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8">' 

379 f'<title>{error_info["status"]} - {error_info["reason"]}</title>' 

380 f'</head><body><h1>{error_info["status"]} - {error_info["reason"]}</h1>') 

381 

382 self.response.write(res.encode("UTF-8")) 

383 

384 finally: 

385 self.saveSession() 

386 if conf.instance.is_dev_server and conf.debug.dev_server_cloud_logging: 

387 # Emit the outer log only on dev_appserver (we'll use the existing request log when live) 

388 SEVERITY = "DEBUG" 

389 if self.maxLogLevel >= 50: 

390 SEVERITY = "CRITICAL" 

391 elif self.maxLogLevel >= 40: 

392 SEVERITY = "ERROR" 

393 elif self.maxLogLevel >= 30: 

394 SEVERITY = "WARNING" 

395 elif self.maxLogLevel >= 20: 

396 SEVERITY = "INFO" 

397 

398 TRACE = "projects/{}/traces/{}".format(loggingClient.project, self._traceID) 

399 

400 REQUEST = { 

401 'requestMethod': self.request.method, 

402 'requestUrl': self.request.url, 

403 'status': self.response.status_code, 

404 'userAgent': self.request.headers.get('USER-AGENT'), 

405 'responseSize': self.response.content_length, 

406 'latency': "%0.3fs" % (time.time() - self.startTime), 

407 'remoteIp': self.request.environ.get("HTTP_X_APPENGINE_USER_IP") 

408 } 

409 requestLogger.log_text( 

410 "", 

411 client=loggingClient, 

412 severity=SEVERITY, 

413 http_request=REQUEST, 

414 trace=TRACE, 

415 resource=requestLoggingRessource, 

416 operation={ 

417 "first": True, 

418 "last": True, 

419 "id": self._traceID 

420 } 

421 ) 

422 

423 if conf.instance.is_dev_server: 

424 self.is_deferred = True 

425 

426 while self.pendingTasks: 

427 task = self.pendingTasks.pop() 

428 logging.debug(f"Deferred task emulation, executing {task=}") 

429 try: 

430 task() 

431 except Exception: # noqa 

432 logging.exception(f"Deferred Task emulation {task} failed") 

433 

434 def _route(self, path: str) -> None: 

435 """ 

436 Does the actual work of sanitizing the parameter, determine which exposed-function to call 

437 (and with which parameters) 

438 """ 

439 

440 # Parse the URL 

441 if path := parse.urlparse(path).path: 

442 self.path = path 

443 self.path_list = tuple(unicodedata.normalize("NFC", parse.unquote(part)) 

444 for part in path.strip("/").split("/")) 

445 

446 # Prevent Hash-collision attacks 

447 if len(self.request.params) > conf.max_post_params_count: 

448 raise errors.BadRequest( 

449 f"Too many arguments supplied, exceeding maximum" 

450 f" of {conf.max_post_params_count} allowed arguments per request" 

451 ) 

452 

453 param_filter = conf.param_filter_function 

454 if param_filter and not callable(param_filter): 

455 raise ValueError(f"""{param_filter=} is not callable""") 

456 

457 for key, value in self.request.params.items(): 

458 try: 

459 key = unicodedata.normalize("NFC", key) 

460 value = unicodedata.normalize("NFC", value) 

461 except UnicodeError: 

462 # We received invalid unicode data (usually happens when 

463 # someone tries to exploit unicode normalisation bugs) 

464 raise errors.BadRequest() 

465 

466 if param_filter and param_filter(key, value): 

467 continue 

468 

469 if key == TEMPLATE_STYLE_KEY: 

470 self.template_style = value 

471 continue 

472 

473 if key in self.kwargs: 

474 if isinstance(self.kwargs[key], list): 

475 self.kwargs[key].append(value) 

476 else: # Convert that key to a list 

477 self.kwargs[key] = [self.kwargs[key], value] 

478 else: 

479 self.kwargs[key] = value 

480 

481 if "self" in self.kwargs or "return" in self.kwargs: # self or return is reserved for bound methods 

482 raise errors.BadRequest() 

483 

484 caller = conf.main_resolver 

485 idx = 0 # Count how may items from *args we'd have consumed (so the rest can go into *args of the called func 

486 path_found = True 

487 

488 for part in self.path_list: 

489 # TODO: Remove canAccess guards... solve differently. 

490 if "canAccess" in caller and not caller["canAccess"](): 

491 # We have a canAccess function guarding that object, 

492 # and it returns False... 

493 raise errors.Unauthorized() 

494 

495 idx += 1 

496 

497 if part not in caller: 

498 part = "index" 

499 

500 if caller := caller.get(part): 

501 if isinstance(caller, Method): 

502 if part == "index": 

503 idx -= 1 

504 

505 self.args = tuple(self.path_list[idx:]) 

506 break 

507 

508 elif part == "index": 

509 path_found = False 

510 break 

511 

512 else: 

513 path_found = False 

514 break 

515 

516 if not path_found: 

517 raise errors.NotFound( 

518 f"""The path {utils.string.escape("/".join(self.path_list[:idx]))} could not be found""") 

519 

520 if not isinstance(caller, Method): 

521 # try to find "index" function 

522 if (index := caller.get("index")) and isinstance(index, Method): 

523 caller = index 

524 else: 

525 raise errors.MethodNotAllowed() 

526 

527 # Check for internal exposed 

528 if caller.exposed is False and not self.internalRequest: 

529 raise errors.NotFound() 

530 

531 # Check for @force_ssl flag 

532 if not self.internalRequest \ 

533 and caller.ssl \ 

534 and not self.request.host_url.lower().startswith("https://") \ 

535 and not conf.instance.is_dev_server: 

536 raise errors.PreconditionFailed("You must use SSL to access this resource!") 

537 

538 # Check for @force_post flag 

539 if not self.isPostRequest and caller.methods == ("POST", ): 

540 raise errors.MethodNotAllowed("You must use POST to access this resource!") 

541 

542 # Check if this request should bypass the caches 

543 if self.request.headers.get("X-Viur-Disable-Cache"): 

544 # No cache requested, check if the current user is allowed to do so 

545 if (user := current.user.get()) and "root" in user["access"]: 

546 logging.debug("Caching disabled by X-Viur-Disable-Cache header") 

547 self.disableCache = True 

548 

549 # Destill context as self.context, if available 

550 if context := {k: v for k, v in self.kwargs.items() if k.startswith("@")}: 

551 # Remove context parameters from kwargs 

552 kwargs = {k: v for k, v in self.kwargs.items() if k not in context} 

553 # Remove leading "@" from context parameters 

554 self.context |= {k[1:]: v for k, v in context.items() if len(k) > 1} 

555 else: 

556 kwargs = self.kwargs 

557 

558 if ((self.internalRequest and conf.debug.trace_internal_call_routing) 

559 or conf.debug.trace_external_call_routing): 

560 logging.debug( 

561 f"Calling {caller._func!r} with args={self.args!r}, {kwargs=} within context={self.context!r}" 

562 ) 

563 

564 # Now call the routed method! 

565 res = caller(*self.args, **kwargs) 

566 

567 if not isinstance(res, bytes): # Convert the result to bytes if it is not already! 

568 res = str(res).encode("UTF-8") 

569 self.response.write(res) 

570 

571 def saveSession(self) -> None: 

572 current.session.get().save(self) 

573 

574 

575from .i18n import translate # noqa: E402