Coverage for /home/runner/work/viur-core/viur-core/viur/src/viur/core/modules/file.py: 0%

580 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-09-03 13:41 +0000

1import base64 

2import datetime 

3import google.auth 

4import hashlib 

5import hmac 

6import html 

7import io 

8import json 

9import logging 

10import PIL 

11import PIL.ImageCms 

12import re 

13import requests 

14import string 

15import typing as t 

16from collections import namedtuple 

17from urllib.parse import quote as urlquote 

18from urllib.request import urlopen 

19from google.cloud import storage 

20from google.oauth2.service_account import Credentials as ServiceAccountCredentials 

21from viur.core import conf, current, db, errors, utils 

22from viur.core.bones import BaseBone, BooleanBone, KeyBone, NumericBone, StringBone 

23from viur.core.decorators import * 

24from viur.core.prototypes.tree import SkelType, Tree, TreeSkel 

25from viur.core.skeleton import SkeletonInstance, skeletonByKind 

26from viur.core.tasks import CallDeferred, DeleteEntitiesIter, PeriodicTask 

27 

28 

29# Globals for connectivity 

30 

31VALID_FILENAME_REGEX = re.compile( 

32 # || MAY NOT BE THE NAME | MADE OF SPECIAL CHARS | SPECIAL CHARS + `. `|` 

33 r"^(?!^(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])$)[^\x00-\x1F<>:\"\/\\|?*]*[^\x00-\x1F<>:\"\/\\|?*. ]$", 

34 re.IGNORECASE 

35) 

36 

37_CREDENTIALS, __PROJECT_ID = google.auth.default() 

38GOOGLE_STORAGE_CLIENT = storage.Client(__PROJECT_ID, _CREDENTIALS) 

39GOOGLE_STORAGE_BUCKET = GOOGLE_STORAGE_CLIENT.lookup_bucket(f"""{__PROJECT_ID}.appspot.com""") 

40 

41# FilePath is a descriptor for ViUR file components 

42FilePath = namedtuple("FilePath", ("dlkey", "is_derived", "filename")) 

43 

44 

45def importBlobFromViur2(dlKey, fileName): 

46 if not conf.viur2import_blobsource: 

47 return False 

48 existingImport = db.Get(db.Key("viur-viur2-blobimport", dlKey)) 

49 if existingImport: 

50 if existingImport["success"]: 

51 return existingImport["dlurl"] 

52 return False 

53 if conf.viur2import_blobsource["infoURL"]: 

54 try: 

55 importDataReq = urlopen(conf.viur2import_blobsource["infoURL"] + dlKey) 

56 except: 

57 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey)) 

58 marker["success"] = False 

59 marker["error"] = "Failed URL-FETCH 1" 

60 db.Put(marker) 

61 return False 

62 if importDataReq.status != 200: 

63 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey)) 

64 marker["success"] = False 

65 marker["error"] = "Failed URL-FETCH 2" 

66 db.Put(marker) 

67 return False 

68 importData = json.loads(importDataReq.read()) 

69 oldBlobName = conf.viur2import_blobsource["gsdir"] + "/" + importData["key"] 

70 srcBlob = storage.Blob(bucket=GOOGLE_STORAGE_BUCKET, 

71 name=conf.viur2import_blobsource["gsdir"] + "/" + importData["key"]) 

72 else: 

73 oldBlobName = conf.viur2import_blobsource["gsdir"] + "/" + dlKey 

74 srcBlob = storage.Blob(bucket=GOOGLE_STORAGE_BUCKET, name=conf.viur2import_blobsource["gsdir"] + "/" + dlKey) 

75 if not srcBlob.exists(): 

76 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey)) 

77 marker["success"] = False 

78 marker["error"] = "Local SRC-Blob missing" 

79 marker["oldBlobName"] = oldBlobName 

80 db.Put(marker) 

81 return False 

82 GOOGLE_STORAGE_BUCKET.rename_blob(srcBlob, f"{dlKey}/source/{fileName}") 

83 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey)) 

84 marker["success"] = True 

85 marker["old_src_key"] = dlKey 

86 marker["old_src_name"] = fileName 

87 marker["dlurl"] = File.create_download_url(dlKey, fileName, False, None) 

88 db.Put(marker) 

89 return marker["dlurl"] 

90 

91 

92def thumbnailer(fileSkel, existingFiles, params): 

93 file_name = html.unescape(fileSkel["name"]) 

94 blob = GOOGLE_STORAGE_BUCKET.get_blob(f"""{fileSkel["dlkey"]}/source/{file_name}""") 

95 if not blob: 

96 logging.warning(f"""Blob {fileSkel["dlkey"]}/source/{file_name} is missing from cloud storage!""") 

97 return 

98 fileData = io.BytesIO() 

99 blob.download_to_file(fileData) 

100 resList = [] 

101 for sizeDict in params: 

102 fileData.seek(0) 

103 outData = io.BytesIO() 

104 try: 

105 img = PIL.Image.open(fileData) 

106 except PIL.Image.UnidentifiedImageError: # Can't load this image; so there's no need to try other resolutions 

107 return [] 

108 iccProfile = img.info.get('icc_profile') 

109 if iccProfile: 

110 # JPEGs might be encoded with a non-standard color-profile; we need to compensate for this if we convert 

111 # to WEBp as we'll loose this color-profile information 

112 f = io.BytesIO(iccProfile) 

113 src_profile = PIL.ImageCms.ImageCmsProfile(f) 

114 dst_profile = PIL.ImageCms.createProfile('sRGB') 

115 try: 

116 img = PIL.ImageCms.profileToProfile( 

117 img, 

118 inputProfile=src_profile, 

119 outputProfile=dst_profile, 

120 outputMode="RGBA" if img.has_transparency_data else "RGB") 

121 except Exception as e: 

122 logging.exception(e) 

123 continue 

124 fileExtension = sizeDict.get("fileExtension", "webp") 

125 if "width" in sizeDict and "height" in sizeDict: 

126 width = sizeDict["width"] 

127 height = sizeDict["height"] 

128 targetName = f"thumbnail-{width}-{height}.{fileExtension}" 

129 elif "width" in sizeDict: 

130 width = sizeDict["width"] 

131 height = int((float(img.size[1]) * float(width / float(img.size[0])))) 

132 targetName = f"thumbnail-w{width}.{fileExtension}" 

133 else: # No default fallback - ignore 

134 continue 

135 mimeType = sizeDict.get("mimeType", "image/webp") 

136 img = img.resize((width, height), PIL.Image.LANCZOS) 

137 img.save(outData, fileExtension) 

138 outSize = outData.tell() 

139 outData.seek(0) 

140 targetBlob = GOOGLE_STORAGE_BUCKET.blob(f"""{fileSkel["dlkey"]}/derived/{targetName}""") 

141 targetBlob.upload_from_file(outData, content_type=mimeType) 

142 resList.append((targetName, outSize, mimeType, {"mimetype": mimeType, "width": width, "height": height})) 

143 return resList 

144 

145 

146def cloudfunction_thumbnailer(fileSkel, existingFiles, params): 

147 """External Thumbnailer for images. 

148 

149 The corresponding cloudfunction can be found here . 

150 https://github.com/viur-framework/viur-cloudfunctions/tree/main/thumbnailer 

151 

152 You can use it like so: 

153 main.py: 

154 

155 .. code-block:: python 

156 

157 from viur.core.modules.file import cloudfunction_thumbnailer 

158 

159 conf.file_thumbnailer_url = "https://xxxxx.cloudfunctions.net/imagerenderer" 

160 conf.file_derivations = {"thumbnail": cloudfunction_thumbnailer} 

161 

162 conf.derives_pdf = { 

163 "thumbnail": [{"width": 1920,"sites":"1,2"}] 

164 } 

165 

166 skeletons/xxx.py: 

167 .. code-block:: python 

168 

169 test = FileBone(derive=conf.derives_pdf) 

170 """ 

171 

172 if not conf.file_thumbnailer_url: 

173 raise ValueError("conf.file_thumbnailer_url is not set") 

174 

175 def getsignedurl(): 

176 if conf.instance.is_dev_server: 

177 signedUrl = File.create_download_url(fileSkel["dlkey"], fileSkel["name"]) 

178 else: 

179 path = f"""{fileSkel["dlkey"]}/source/{file_name}""" 

180 if not (blob := GOOGLE_STORAGE_BUCKET.get_blob(path)): 

181 logging.warning(f"Blob {path} is missing from cloud storage!") 

182 return None 

183 authRequest = google.auth.transport.requests.Request() 

184 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60) 

185 signing_credentials = google.auth.compute_engine.IDTokenCredentials(authRequest, "") 

186 content_disposition = f"""filename={fileSkel["name"]}""" 

187 signedUrl = blob.generate_signed_url( 

188 expiresAt, 

189 credentials=signing_credentials, 

190 response_disposition=content_disposition, 

191 version="v4") 

192 return signedUrl 

193 

194 def make_request(): 

195 headers = {"Content-Type": "application/json"} 

196 data_str = base64.b64encode(json.dumps(dataDict).encode("UTF-8")) 

197 sig = File.hmac_sign(data_str) 

198 datadump = json.dumps({"dataStr": data_str.decode('ASCII'), "sign": sig}) 

199 resp = requests.post(conf.file_thumbnailer_url, data=datadump, headers=headers, allow_redirects=False) 

200 if resp.status_code != 200: # Error Handling 

201 match resp.status_code: 

202 case 302: 

203 # The problem is Google resposen 302 to an auth Site when the cloudfunction was not found 

204 # https://cloud.google.com/functions/docs/troubleshooting#login 

205 logging.error("Cloudfunction not found") 

206 case 404: 

207 logging.error("Cloudfunction not found") 

208 case 403: 

209 logging.error("No permission for the Cloudfunction") 

210 case _: 

211 logging.error( 

212 f"cloudfunction_thumbnailer failed with code: {resp.status_code} and data: {resp.content}") 

213 return 

214 

215 try: 

216 response_data = resp.json() 

217 except Exception as e: 

218 logging.error(f"response could not be converted in json failed with: {e=}") 

219 return 

220 if "error" in response_data: 

221 logging.error(f"cloudfunction_thumbnailer failed with: {response_data.get('error')}") 

222 return 

223 

224 return response_data 

225 

226 file_name = html.unescape(fileSkel["name"]) 

227 

228 if not (url := getsignedurl()): 

229 return 

230 dataDict = { 

231 "url": url, 

232 "name": fileSkel["name"], 

233 "params": params, 

234 "minetype": fileSkel["mimetype"], 

235 "baseUrl": current.request.get().request.host_url.lower(), 

236 "targetKey": fileSkel["dlkey"], 

237 "nameOnly": True 

238 } 

239 if not (derivedData := make_request()): 

240 return 

241 

242 uploadUrls = {} 

243 for data in derivedData["values"]: 

244 fileName = File.sanitize_filename(data["name"]) 

245 blob = GOOGLE_STORAGE_BUCKET.blob(f"""{fileSkel["dlkey"]}/derived/{fileName}""") 

246 uploadUrls[fileSkel["dlkey"] + fileName] = blob.create_resumable_upload_session(timeout=60, 

247 content_type=data["mimeType"]) 

248 

249 if not (url := getsignedurl()): 

250 return 

251 

252 dataDict["url"] = url 

253 dataDict["nameOnly"] = False 

254 dataDict["uploadUrls"] = uploadUrls 

255 

256 if not (derivedData := make_request()): 

257 return 

258 reslist = [] 

259 try: 

260 for derived in derivedData["values"]: 

261 for key, value in derived.items(): 

262 reslist.append((key, value["size"], value["mimetype"], value["customData"])) 

263 

264 except Exception as e: 

265 logging.error(f"cloudfunction_thumbnailer failed with: {e=}") 

266 return reslist 

267 

268 

269class DownloadUrlBone(BaseBone): 

270 """ 

271 This bone is used to inject a freshly signed download url into a FileSkel. 

272 """ 

273 

274 def unserialize(self, skel, name): 

275 if "dlkey" in skel.dbEntity and "name" in skel.dbEntity: 

276 skel.accessedValues[name] = File.create_download_url( 

277 skel["dlkey"], skel["name"], expires=conf.render_json_download_url_expiration 

278 ) 

279 return True 

280 

281 return False 

282 

283 

284class FileLeafSkel(TreeSkel): 

285 """ 

286 Default file leaf skeleton. 

287 """ 

288 kindName = "file" 

289 

290 size = StringBone( 

291 descr="Size", 

292 readOnly=True, 

293 searchable=True, 

294 ) 

295 

296 dlkey = StringBone( 

297 descr="Download-Key", 

298 readOnly=True, 

299 ) 

300 

301 name = StringBone( 

302 descr="Filename", 

303 caseSensitive=False, 

304 searchable=True, 

305 vfunc=lambda val: None if File.is_valid_filename(val) else "Invalid filename provided", 

306 ) 

307 

308 mimetype = StringBone( 

309 descr="MIME-Type", 

310 readOnly=True, 

311 ) 

312 

313 weak = BooleanBone( 

314 descr="Weak reference", 

315 readOnly=True, 

316 visible=False, 

317 ) 

318 

319 pending = BooleanBone( 

320 descr="Pending upload", 

321 readOnly=True, 

322 visible=False, 

323 defaultValue=False, 

324 ) 

325 

326 width = NumericBone( 

327 descr="Width", 

328 readOnly=True, 

329 searchable=True, 

330 ) 

331 

332 height = NumericBone( 

333 descr="Height", 

334 readOnly=True, 

335 searchable=True, 

336 ) 

337 

338 downloadUrl = DownloadUrlBone( 

339 descr="Download-URL", 

340 readOnly=True, 

341 visible=False, 

342 ) 

343 

344 derived = BaseBone( 

345 descr="Derived Files", 

346 readOnly=True, 

347 visible=False, 

348 ) 

349 

350 pendingparententry = KeyBone( 

351 descr="Pending key Reference", 

352 readOnly=True, 

353 visible=False, 

354 ) 

355 

356 def preProcessBlobLocks(self, locks): 

357 """ 

358 Ensure that our dlkey is locked even if we don't have a filebone here 

359 """ 

360 if not self["weak"] and self["dlkey"]: 

361 locks.add(self["dlkey"]) 

362 return locks 

363 

364 @classmethod 

365 def refresh(cls, skelValues): 

366 super().refresh(skelValues) 

367 if conf.viur2import_blobsource: 

368 importData = importBlobFromViur2(skelValues["dlkey"], skelValues["name"]) 

369 if importData: 

370 if not skelValues["downloadUrl"]: 

371 skelValues["downloadUrl"] = importData 

372 skelValues["pendingparententry"] = False 

373 

374 

375class FileNodeSkel(TreeSkel): 

376 """ 

377 Default file node skeleton. 

378 """ 

379 kindName = "file_rootNode" # FIXME: VIUR4, don't use "_rootNode" kindname 

380 

381 name = StringBone( 

382 descr="Name", 

383 required=True, 

384 searchable=True 

385 ) 

386 

387 rootNode = BooleanBone( 

388 descr="Is RootNode", 

389 defaultValue=False, 

390 ) 

391 

392 

393class File(Tree): 

394 PENDING_POSTFIX = " (pending)" 

395 DOWNLOAD_URL_PREFIX = "/file/download/" 

396 MAX_FILENAME_LEN = 256 

397 

398 leafSkelCls = FileLeafSkel 

399 nodeSkelCls = FileNodeSkel 

400 

401 handler = "tree.simple.file" 

402 adminInfo = { 

403 "icon": "folder-fill", 

404 "handler": handler, # fixme: Use static handler; Remove with VIUR4! 

405 } 

406 

407 roles = { 

408 "*": "view", 

409 "editor": ("add", "edit"), 

410 "admin": "*", 

411 } 

412 

413 default_order = "name" 

414 

415 # Helper functions currently resist here 

416 

417 @staticmethod 

418 def is_valid_filename(filename: str) -> bool: 

419 """ 

420 Verifies a valid filename. 

421 

422 The filename should be valid on Linux, Mac OS and Windows. 

423 It should not be longer than MAX_FILENAME_LEN chars. 

424 

425 Rule set: https://stackoverflow.com/a/31976060/3749896 

426 Regex test: https://regex101.com/r/iBYpoC/1 

427 """ 

428 if len(filename) > File.MAX_FILENAME_LEN: 

429 return False 

430 

431 return bool(re.match(VALID_FILENAME_REGEX, filename)) 

432 

433 @staticmethod 

434 def hmac_sign(data: t.Any) -> str: 

435 assert conf.file_hmac_key is not None, "No hmac-key set!" 

436 if not isinstance(data, bytes): 

437 data = str(data).encode("UTF-8") 

438 return hmac.new(conf.file_hmac_key, msg=data, digestmod=hashlib.sha3_384).hexdigest() 

439 

440 @staticmethod 

441 def hmac_verify(data: t.Any, signature: str) -> bool: 

442 return hmac.compare_digest(File.hmac_sign(data.encode("ASCII")), signature) 

443 

444 @staticmethod 

445 def create_download_url( 

446 dlkey: str, 

447 filename: str, 

448 derived: bool = False, 

449 expires: t.Optional[datetime.timedelta | int] = datetime.timedelta(hours=1), 

450 download_filename: t.Optional[str] = None 

451 ) -> str: 

452 """ 

453 Utility function that creates a signed download-url for the given folder/filename combination 

454 

455 :param folder: The GCS-Folder (= the download-key) for that file 

456 :param filename: The name of the file. Either the original filename or the name of a derived file. 

457 :param derived: True, if it points to a derived file, False if it points to the original uploaded file 

458 :param expires: 

459 None if the file is supposed to be public (which causes it to be cached on the google ede caches), 

460 otherwise a datetime.timedelta of how long that link should be valid 

461 :param download_filename: If set, browser is enforced to download this blob with the given alternate 

462 filename 

463 :return: The signed download-url relative to the current domain (eg /download/...) 

464 """ 

465 if isinstance(expires, int): 

466 expires = datetime.timedelta(minutes=expires) 

467 

468 # Undo escaping on ()= performed on fileNames 

469 filename = filename.replace("&#040;", "(").replace("&#041;", ")").replace("&#061;", "=") 

470 filepath = f"""{dlkey}/{"derived" if derived else "source"}/{filename}""" 

471 

472 if download_filename: 

473 if not File.is_valid_filename(download_filename): 

474 raise errors.UnprocessableEntity(f"Invalid download_filename {download_filename!r} provided") 

475 

476 download_filename = urlquote(download_filename) 

477 

478 expires = (datetime.datetime.now() + expires).strftime("%Y%m%d%H%M") if expires else 0 

479 

480 data = base64.urlsafe_b64encode(f"""{filepath}\0{expires}\0{download_filename or ""}""".encode("UTF-8")) 

481 sig = File.hmac_sign(data) 

482 

483 return f"""{File.DOWNLOAD_URL_PREFIX}{data.decode("ASCII")}?sig={sig}""" 

484 

485 @staticmethod 

486 def parse_download_url(url) -> t.Optional[FilePath]: 

487 """ 

488 Parses a file download URL in the format `/file/download/xxxx?sig=yyyy` into its FilePath. 

489 

490 If the URL cannot be parsed, the function returns None. 

491 

492 :param url: The file download URL to be parsed. 

493 :return: A FilePath on success, None otherwise. 

494 """ 

495 if not url.startswith(File.DOWNLOAD_URL_PREFIX) or "?" not in url: 

496 return None 

497 

498 data, sig = url.removeprefix(File.DOWNLOAD_URL_PREFIX).split("?", 1) # Strip "/file/download/" and split on "?" 

499 sig = sig.removeprefix("sig=") 

500 

501 if not File.hmac_verify(data, sig): 

502 # Invalid signature 

503 return None 

504 

505 # Split the blobKey into the individual fields it should contain 

506 data = base64.urlsafe_b64decode(data).decode("UTF-8") 

507 

508 match data.count("\0"): 

509 case 3: 

510 dlpath, valid_until, _ = data.split("\0") 

511 case 2: 

512 # It's the old format, without an downloadFileName 

513 dlpath, valid_until = data.split("\0") 

514 case _: 

515 # Invalid path 

516 return None 

517 

518 if valid_until != "0" and datetime.strptime(valid_until, "%Y%m%d%H%M") < datetime.now(): 

519 # Signature expired 

520 return None 

521 

522 if dlpath.count("/") != 3: 

523 # Invalid path 

524 return None 

525 

526 dlkey, derived, filename = dlpath.split("/", 3) 

527 return FilePath(dlkey, derived != "source", filename) 

528 

529 @staticmethod 

530 def create_src_set( 

531 file: t.Union["SkeletonInstance", dict, str], 

532 expires: t.Optional[datetime.timedelta | int] = datetime.timedelta(hours=1), 

533 width: t.Optional[int] = None, 

534 height: t.Optional[int] = None 

535 ) -> str: 

536 """ 

537 Generates a string suitable for use as the srcset tag in html. This functionality provides the browser 

538 with a list of images in different sizes and allows it to choose the smallest file that will fill it's 

539 viewport without upscaling. 

540 

541 :param file: The file skeleton (or if multiple=True a single value from it) to generate the srcset. 

542 :param expires: 

543 None if the file is supposed to be public (which causes it to be cached on the google edecaches), 

544 otherwise it's lifetime in seconds 

545 :param width: 

546 A list of widths that should be included in the srcset. 

547 If a given width is not available, it will be skipped. 

548 :param height: A list of heights that should be included in the srcset. If a given height is not available, 

549 it will be skipped. 

550 :return: The srctag generated or an empty string if a invalid file object was supplied 

551 """ 

552 if not width and not height: 

553 logging.error("Neither width or height supplied") 

554 return "" 

555 

556 if isinstance(file, str): 

557 file = db.Query("file").filter("dlkey =", file).order(("creationdate", db.SortOrder.Ascending)).getEntry() 

558 

559 if not file: 

560 return "" 

561 

562 if "dlkey" not in file and "dest" in file: 

563 file = file["dest"] 

564 

565 from viur.core.skeleton import SkeletonInstance # avoid circular imports 

566 

567 if not ( 

568 isinstance(file, (SkeletonInstance, dict)) 

569 and "dlkey" in file 

570 and "derived" in file 

571 ): 

572 logging.error("Invalid file supplied") 

573 return "" 

574 

575 if not isinstance(file["derived"], dict): 

576 logging.error("No derives available") 

577 return "" 

578 

579 src_set = [] 

580 for filename, derivate in file["derived"]["files"].items(): 

581 customData = derivate.get("customData", {}) 

582 

583 if width and customData.get("width") in width: 

584 src_set.append( 

585 f"""{File.create_download_url(file["dlkey"], filename, True, expires)} {customData["width"]}w""" 

586 ) 

587 

588 if height and customData.get("height") in height: 

589 src_set.append( 

590 f"""{File.create_download_url(file["dlkey"], filename, True, expires)} {customData["height"]}h""" 

591 ) 

592 

593 return ", ".join(src_set) 

594 

595 def write(self, filename: str, content: t.Any, mimetype: str = "text/plain", width: int = None, 

596 height: int = None) -> db.Key: 

597 """ 

598 Write a file from any buffer into the file module. 

599 

600 :param filename: Filename to be written. 

601 :param content: The file content to be written, as bytes-like object. 

602 :param mimetype: The file's mimetype. 

603 :param width: Optional width information for the file. 

604 :param height: Optional height information for the file. 

605 

606 :return: Returns the key of the file object written. This can be associated e.g. with a FileBone. 

607 """ 

608 if not File.is_valid_filename(filename): 

609 raise ValueError(f"{filename=} is invalid") 

610 

611 dl_key = utils.string.random() 

612 

613 blob = GOOGLE_STORAGE_BUCKET.blob(f"{dl_key}/source/{filename}") 

614 blob.upload_from_file(io.BytesIO(content), content_type=mimetype) 

615 

616 skel = self.addSkel("leaf") 

617 skel["name"] = filename 

618 skel["size"] = blob.size 

619 skel["mimetype"] = mimetype 

620 skel["dlkey"] = dl_key 

621 skel["weak"] = True 

622 skel["width"] = width 

623 skel["height"] = height 

624 

625 return skel.toDB() 

626 

627 def read(self, key: db.Key | int | str | None = None, path: str | None = None) -> tuple[io.BytesIO, str]: 

628 """ 

629 Read a file from the Cloud Storage. 

630 

631 If a key and a path are provided, the key is preferred. 

632 This means that the entry in the db is searched first and if this is not found, the path is used. 

633 

634 :param key: Key of the LeafSkel that contains the "dlkey" and the "name". 

635 :param path: The path of the file in the Cloud Storage Bucket. 

636 

637 :return: Returns the file as a io.BytesIO buffer and the content-type 

638 """ 

639 if not key and not path: 

640 raise ValueError("Please provide a key or a path") 

641 if key: 

642 skel = self.viewSkel("leaf") 

643 if not skel.fromDB(db.keyHelper(key, skel.kindName)): 

644 if not path: 

645 raise ValueError("This skeleton is not in the database!") 

646 else: 

647 path = f"""{skel["dlkey"]}/source/{skel["name"]}""" 

648 

649 blob = GOOGLE_STORAGE_BUCKET.blob(path) 

650 return io.BytesIO(blob.download_as_bytes()), blob.content_type 

651 

652 @CallDeferred 

653 def deleteRecursive(self, parentKey): 

654 files = db.Query(self.leafSkelCls().kindName).filter("parentdir =", parentKey).iter() 

655 for fileEntry in files: 

656 self.mark_for_deletion(fileEntry["dlkey"]) 

657 skel = self.leafSkelCls() 

658 

659 if skel.fromDB(str(fileEntry.key())): 

660 skel.delete() 

661 dirs = db.Query(self.nodeSkelCls().kindName).filter("parentdir", parentKey).iter() 

662 for d in dirs: 

663 self.deleteRecursive(d.key) 

664 skel = self.nodeSkelCls() 

665 if skel.fromDB(d.key): 

666 skel.delete() 

667 

668 @exposed 

669 @skey 

670 def getUploadURL( 

671 self, 

672 fileName: str, 

673 mimeType: str, 

674 size: t.Optional[int] = None, 

675 node: t.Optional[str | db.Key] = None, 

676 authData: t.Optional[str] = None, 

677 authSig: t.Optional[str] = None 

678 ): 

679 filename = fileName.strip() # VIUR4 FIXME: just for compatiblity of the parameter names 

680 

681 if not File.is_valid_filename(filename): 

682 raise errors.UnprocessableEntity(f"Invalid filename {filename!r} provided") 

683 

684 # Validate the mimetype from the client seems legit 

685 mimetype = mimeType.strip().lower() 

686 if not ( 

687 mimetype 

688 and mimetype.count("/") == 1 

689 and all(ch in string.ascii_letters + string.digits + "/-.+" for ch in mimetype) 

690 ): 

691 raise errors.UnprocessableEntity(f"Invalid mime-type {mimetype!r} provided") 

692 

693 # Validate authentication data 

694 if authData and authSig: 

695 # First, validate the signature, otherwise we don't need to proceed further 

696 if not self.hmac_verify(authData, authSig): 

697 raise errors.Unauthorized() 

698 

699 authData = json.loads(base64.b64decode(authData.encode("ASCII")).decode("UTF-8")) 

700 

701 if datetime.datetime.strptime(authData["validUntil"], "%Y%m%d%H%M") < datetime.datetime.now(): 

702 raise errors.Gone("The upload URL has expired") 

703 

704 if authData["validMimeTypes"]: 

705 for validMimeType in authData["validMimeTypes"]: 

706 if ( 

707 validMimeType == mimetype 

708 or (validMimeType.endswith("*") and mimetype.startswith(validMimeType[:-1])) 

709 ): 

710 break 

711 else: 

712 raise errors.UnprocessableEntity(f"Invalid mime-type {mimetype} provided") 

713 

714 node = authData["node"] 

715 maxSize = authData["maxSize"] 

716 

717 else: 

718 rootNode = None 

719 if node and not (rootNode := self.getRootNode(node)): 

720 raise errors.NotFound(f"No valid root node found for {node=}") 

721 

722 if not self.canAdd("leaf", rootNode): 

723 raise errors.Forbidden() 

724 

725 maxSize = None # The user has some file/add permissions, don't restrict fileSize 

726 

727 if maxSize: 

728 if size > maxSize: 

729 raise errors.UnprocessableEntity(f"Size {size} exceeds maximum size {maxSize}") 

730 else: 

731 size = None 

732 

733 # Create upload-URL and download key 

734 dlkey = utils.string.random() # let's roll a random key 

735 blob = GOOGLE_STORAGE_BUCKET.blob(f"{dlkey}/source/{filename}") 

736 upload_url = blob.create_resumable_upload_session(content_type=mimeType, size=size, timeout=60) 

737 

738 # Create a corresponding file-lock object early, otherwise we would have to ensure that the file-lock object 

739 # the user creates matches the file he had uploaded 

740 file_skel = self.addSkel("leaf") 

741 

742 file_skel["name"] = filename + self.PENDING_POSTFIX 

743 file_skel["size"] = 0 

744 file_skel["mimetype"] = "application/octetstream" 

745 file_skel["dlkey"] = dlkey 

746 file_skel["parentdir"] = None 

747 file_skel["pendingparententry"] = db.keyHelper(node, self.addSkel("node").kindName) if node else None 

748 file_skel["pending"] = True 

749 file_skel["weak"] = True 

750 file_skel["width"] = 0 

751 file_skel["height"] = 0 

752 

753 key = db.encodeKey(file_skel.toDB()) 

754 

755 # Mark that entry dirty as we might never receive an add 

756 self.mark_for_deletion(dlkey) 

757 

758 # In this case, we'd have to store the key in the users session so he can call add() later on 

759 if authData and authSig: 

760 session = current.session.get() 

761 

762 if "pendingFileUploadKeys" not in session: 

763 session["pendingFileUploadKeys"] = [] 

764 

765 session["pendingFileUploadKeys"].append(key) 

766 

767 # Clamp to the latest 50 pending uploads 

768 session["pendingFileUploadKeys"] = session["pendingFileUploadKeys"][-50:] 

769 session.markChanged() 

770 

771 return self.render.view({ 

772 "uploadUrl": upload_url, 

773 "uploadKey": key, 

774 }) 

775 

776 @exposed 

777 def download(self, blobKey: str, fileName: str = "", download: bool = False, sig: str = "", *args, **kwargs): 

778 """ 

779 Download a file. 

780 :param blobKey: The unique blob key of the file. 

781 :param fileName: Optional filename to provide in the header. 

782 :param download: Set header to attachment retrival, set explictly to "1" if download is wanted. 

783 """ 

784 if filename := fileName.strip(): 

785 if not File.is_valid_filename(filename): 

786 raise errors.UnprocessableEntity(f"The provided filename {filename!r} is invalid!") 

787 

788 download_filename = "" 

789 

790 if not sig: 

791 # Check if the current user has the right to download *any* blob present in this application. 

792 # blobKey is then the path inside cloudstore - not a base64 encoded tuple 

793 if not (usr := current.user.get()): 

794 raise errors.Unauthorized() 

795 if "root" not in usr["access"] and "file-view" not in usr["access"]: 

796 raise errors.Forbidden() 

797 validUntil = "-1" # Prevent this from being cached down below 

798 blob = GOOGLE_STORAGE_BUCKET.get_blob(blobKey) 

799 

800 else: 

801 # We got an request including a signature (probably a guest or a user without file-view access) 

802 # First, validate the signature, otherwise we don't need to proceed any further 

803 if not self.hmac_verify(blobKey, sig): 

804 raise errors.Forbidden() 

805 # Split the blobKey into the individual fields it should contain 

806 try: 

807 dlPath, validUntil, download_filename = base64.urlsafe_b64decode(blobKey).decode("UTF-8").split("\0") 

808 except: # It's the old format, without an downloadFileName 

809 dlPath, validUntil = base64.urlsafe_b64decode(blobKey).decode("UTF-8").split("\0") 

810 

811 if validUntil != "0" and datetime.datetime.strptime(validUntil, "%Y%m%d%H%M") < datetime.datetime.now(): 

812 blob = None 

813 else: 

814 blob = GOOGLE_STORAGE_BUCKET.get_blob(dlPath) 

815 

816 if not blob: 

817 raise errors.Gone("The requested blob has expired.") 

818 

819 if not filename: 

820 filename = download_filename or urlquote(blob.name.split("/")[-1]) 

821 

822 content_disposition = "; ".join( 

823 item for item in ( 

824 "attachment" if download else None, 

825 f"filename={filename}" if filename else None, 

826 ) if item 

827 ) 

828 

829 if isinstance(_CREDENTIALS, ServiceAccountCredentials): 

830 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60) 

831 signedUrl = blob.generate_signed_url(expiresAt, response_disposition=content_disposition, version="v4") 

832 raise errors.Redirect(signedUrl) 

833 

834 elif conf.instance.is_dev_server: # No Service-Account to sign with - Serve everything directly 

835 response = current.request.get().response 

836 response.headers["Content-Type"] = blob.content_type 

837 if content_disposition: 

838 response.headers["Content-Disposition"] = content_disposition 

839 return blob.download_as_bytes() 

840 

841 if validUntil == "0": # Its an indefinitely valid URL 

842 if blob.size < 5 * 1024 * 1024: # Less than 5 MB - Serve directly and push it into the ede caches 

843 response = current.request.get().response 

844 response.headers["Content-Type"] = blob.content_type 

845 response.headers["Cache-Control"] = "public, max-age=604800" # 7 Days 

846 if content_disposition: 

847 response.headers["Content-Disposition"] = content_disposition 

848 return blob.download_as_bytes() 

849 

850 # Default fallback - create a signed URL and redirect 

851 authRequest = google.auth.transport.requests.Request() 

852 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60) 

853 signing_credentials = google.auth.compute_engine.IDTokenCredentials(authRequest, "") 

854 signedUrl = blob.generate_signed_url( 

855 expiresAt, 

856 credentials=signing_credentials, 

857 response_disposition=content_disposition, 

858 version="v4") 

859 

860 raise errors.Redirect(signedUrl) 

861 

862 @exposed 

863 @force_ssl 

864 @force_post 

865 @skey(allow_empty=True) 

866 def add(self, skelType: SkelType, node: db.Key | int | str | None = None, *args, **kwargs): 

867 # We can't add files directly (they need to be uploaded 

868 if skelType == "leaf": # We need to handle leafs separately here 

869 targetKey = kwargs.get("key") 

870 skel = self.addSkel("leaf") 

871 

872 if not skel.fromDB(targetKey): 

873 raise errors.NotFound() 

874 

875 if not skel["pending"]: 

876 raise errors.PreconditionFailed() 

877 

878 skel["pending"] = False 

879 skel["parententry"] = skel["pendingparententry"] 

880 

881 if skel["parententry"]: 

882 rootNode = self.getRootNode(skel["parententry"]) 

883 else: 

884 rootNode = None 

885 

886 if not self.canAdd("leaf", rootNode): 

887 # Check for a marker in this session (created if using a signed upload URL) 

888 session = current.session.get() 

889 if targetKey not in (session.get("pendingFileUploadKeys") or []): 

890 raise errors.Forbidden() 

891 session["pendingFileUploadKeys"].remove(targetKey) 

892 session.markChanged() 

893 

894 # Now read the blob from the dlkey folder 

895 blobs = list(GOOGLE_STORAGE_BUCKET.list_blobs(prefix=f"""{skel["dlkey"]}/""")) 

896 if len(blobs) != 1: 

897 logging.error("Invalid number of blobs in folder") 

898 logging.error(targetKey) 

899 raise errors.PreconditionFailed() 

900 

901 # only one item is allowed here! 

902 blob = blobs[0] 

903 

904 # update the corresponding file skeleton 

905 skel["name"] = skel["name"].removesuffix(self.PENDING_POSTFIX) 

906 skel["mimetype"] = utils.string.escape(blob.content_type) 

907 skel["size"] = blob.size 

908 skel["parentrepo"] = rootNode["key"] if rootNode else None 

909 skel["weak"] = rootNode is None 

910 

911 skel.toDB() 

912 

913 # Add updated download-URL as the auto-generated isn't valid yet 

914 skel["downloadUrl"] = self.create_download_url(skel["dlkey"], skel["name"]) 

915 return self.render.addSuccess(skel) 

916 

917 return super().add(skelType, node, *args, **kwargs) 

918 

919 def onEdit(self, skelType: SkelType, skel: SkeletonInstance): 

920 super().onEdit(skelType, skel) 

921 old_skel = self.editSkel(skelType) 

922 old_skel.setEntity(skel.dbEntity) 

923 

924 if old_skel["name"] == skel["name"]: # name not changed we can return 

925 return 

926 

927 # Move Blob to new name 

928 # https://cloud.google.com/storage/docs/copying-renaming-moving-objects 

929 old_path = f"{skel['dlkey']}/source/{html.unescape(old_skel['name'])}" 

930 new_path = f"{skel['dlkey']}/source/{html.unescape(skel['name'])}" 

931 

932 if not (old_blob := GOOGLE_STORAGE_BUCKET.get_blob(old_path)): 

933 raise errors.Gone() 

934 

935 GOOGLE_STORAGE_BUCKET.copy_blob(old_blob, GOOGLE_STORAGE_BUCKET, new_path, if_generation_match=0) 

936 GOOGLE_STORAGE_BUCKET.delete_blob(old_path) 

937 

938 def mark_for_deletion(self, dlkey: str) -> None: 

939 """ 

940 Adds a marker to the datastore that the file specified as *dlkey* can be deleted. 

941 

942 Once the mark has been set, the data store is checked four times (default: every 4 hours) 

943 if the file is in use somewhere. If it is still in use, the mark goes away, otherwise 

944 the mark and the file are removed from the datastore. These delayed checks are necessary 

945 due to database inconsistency. 

946 

947 :param dlkey: Unique download-key of the file that shall be marked for deletion. 

948 """ 

949 fileObj = db.Query("viur-deleted-files").filter("dlkey", dlkey).getEntry() 

950 

951 if fileObj: # Its allready marked 

952 return 

953 

954 fileObj = db.Entity(db.Key("viur-deleted-files")) 

955 fileObj["itercount"] = 0 

956 fileObj["dlkey"] = str(dlkey) 

957 

958 db.Put(fileObj) 

959 

960 

961File.json = True 

962File.html = True 

963 

964 

965@PeriodicTask(60 * 4) 

966def startCheckForUnreferencedBlobs(): 

967 """ 

968 Start searching for blob locks that have been recently freed 

969 """ 

970 doCheckForUnreferencedBlobs() 

971 

972 

973@CallDeferred 

974def doCheckForUnreferencedBlobs(cursor=None): 

975 def getOldBlobKeysTxn(dbKey): 

976 obj = db.Get(dbKey) 

977 res = obj["old_blob_references"] or [] 

978 if obj["is_stale"]: 

979 db.Delete(dbKey) 

980 else: 

981 obj["has_old_blob_references"] = False 

982 obj["old_blob_references"] = [] 

983 db.Put(obj) 

984 return res 

985 

986 query = db.Query("viur-blob-locks").filter("has_old_blob_references", True).setCursor(cursor) 

987 for lockObj in query.run(100): 

988 oldBlobKeys = db.RunInTransaction(getOldBlobKeysTxn, lockObj.key) 

989 for blobKey in oldBlobKeys: 

990 if db.Query("viur-blob-locks").filter("active_blob_references =", blobKey).getEntry(): 

991 # This blob is referenced elsewhere 

992 logging.info(f"Stale blob is still referenced, {blobKey}") 

993 continue 

994 # Add a marker and schedule it for deletion 

995 fileObj = db.Query("viur-deleted-files").filter("dlkey", blobKey).getEntry() 

996 if fileObj: # Its already marked 

997 logging.info(f"Stale blob already marked for deletion, {blobKey}") 

998 return 

999 fileObj = db.Entity(db.Key("viur-deleted-files")) 

1000 fileObj["itercount"] = 0 

1001 fileObj["dlkey"] = str(blobKey) 

1002 logging.info(f"Stale blob marked dirty, {blobKey}") 

1003 db.Put(fileObj) 

1004 newCursor = query.getCursor() 

1005 if newCursor: 

1006 doCheckForUnreferencedBlobs(newCursor) 

1007 

1008 

1009@PeriodicTask(0) 

1010def startCleanupDeletedFiles(): 

1011 """ 

1012 Increase deletion counter on each blob currently not referenced and delete 

1013 it if that counter reaches maxIterCount 

1014 """ 

1015 doCleanupDeletedFiles() 

1016 

1017 

1018@CallDeferred 

1019def doCleanupDeletedFiles(cursor=None): 

1020 maxIterCount = 2 # How often a file will be checked for deletion 

1021 query = db.Query("viur-deleted-files") 

1022 if cursor: 

1023 query.setCursor(cursor) 

1024 for file in query.run(100): 

1025 if not "dlkey" in file: 

1026 db.Delete(file.key) 

1027 elif db.Query("viur-blob-locks").filter("active_blob_references =", file["dlkey"]).getEntry(): 

1028 logging.info(f"""is referenced, {file["dlkey"]}""") 

1029 db.Delete(file.key) 

1030 else: 

1031 if file["itercount"] > maxIterCount: 

1032 logging.info(f"""Finally deleting, {file["dlkey"]}""") 

1033 blobs = GOOGLE_STORAGE_BUCKET.list_blobs(prefix=f"""{file["dlkey"]}/""") 

1034 for blob in blobs: 

1035 blob.delete() 

1036 db.Delete(file.key) 

1037 # There should be exactly 1 or 0 of these 

1038 for f in skeletonByKind("file")().all().filter("dlkey =", file["dlkey"]).fetch(99): 

1039 f.delete() 

1040 else: 

1041 logging.debug(f"""Increasing count, {file["dlkey"]}""") 

1042 file["itercount"] += 1 

1043 db.Put(file) 

1044 newCursor = query.getCursor() 

1045 if newCursor: 

1046 doCleanupDeletedFiles(newCursor) 

1047 

1048 

1049@PeriodicTask(60 * 4) 

1050def start_delete_pending_files(): 

1051 """ 

1052 Start deletion of pending FileSkels that are older than 7 days. 

1053 """ 

1054 DeleteEntitiesIter.startIterOnQuery( 

1055 FileLeafSkel().all() 

1056 .filter("pending =", True) 

1057 .filter("creationdate <", utils.utcNow() - datetime.timedelta(days=7)) 

1058 )