Coverage for /home/runner/work/viur-core/viur-core/viur/src/viur/core/modules/file.py: 0%
580 statements
« prev ^ index » next coverage.py v7.6.3, created at 2024-10-16 22:16 +0000
« prev ^ index » next coverage.py v7.6.3, created at 2024-10-16 22:16 +0000
1import base64
2import datetime
3import google.auth
4import hashlib
5import hmac
6import html
7import io
8import json
9import logging
10import PIL
11import PIL.ImageCms
12import re
13import requests
14import string
15import typing as t
16from collections import namedtuple
17from urllib.parse import quote as urlquote
18from urllib.request import urlopen
19from google.cloud import storage
20from google.oauth2.service_account import Credentials as ServiceAccountCredentials
21from viur.core import conf, current, db, errors, utils
22from viur.core.bones import BaseBone, BooleanBone, KeyBone, NumericBone, StringBone
23from viur.core.decorators import *
24from viur.core.prototypes.tree import SkelType, Tree, TreeSkel
25from viur.core.skeleton import SkeletonInstance, skeletonByKind
26from viur.core.tasks import CallDeferred, DeleteEntitiesIter, PeriodicTask
29# Globals for connectivity
31VALID_FILENAME_REGEX = re.compile(
32 # || MAY NOT BE THE NAME | MADE OF SPECIAL CHARS | SPECIAL CHARS + `. `|`
33 r"^(?!^(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])$)[^\x00-\x1F<>:\"\/\\|?*]*[^\x00-\x1F<>:\"\/\\|?*. ]$",
34 re.IGNORECASE
35)
37_CREDENTIALS, __PROJECT_ID = google.auth.default()
38GOOGLE_STORAGE_CLIENT = storage.Client(__PROJECT_ID, _CREDENTIALS)
39GOOGLE_STORAGE_BUCKET = GOOGLE_STORAGE_CLIENT.lookup_bucket(f"""{__PROJECT_ID}.appspot.com""")
41# FilePath is a descriptor for ViUR file components
42FilePath = namedtuple("FilePath", ("dlkey", "is_derived", "filename"))
45def importBlobFromViur2(dlKey, fileName):
46 if not conf.viur2import_blobsource:
47 return False
48 existingImport = db.Get(db.Key("viur-viur2-blobimport", dlKey))
49 if existingImport:
50 if existingImport["success"]:
51 return existingImport["dlurl"]
52 return False
53 if conf.viur2import_blobsource["infoURL"]:
54 try:
55 importDataReq = urlopen(conf.viur2import_blobsource["infoURL"] + dlKey)
56 except:
57 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey))
58 marker["success"] = False
59 marker["error"] = "Failed URL-FETCH 1"
60 db.Put(marker)
61 return False
62 if importDataReq.status != 200:
63 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey))
64 marker["success"] = False
65 marker["error"] = "Failed URL-FETCH 2"
66 db.Put(marker)
67 return False
68 importData = json.loads(importDataReq.read())
69 oldBlobName = conf.viur2import_blobsource["gsdir"] + "/" + importData["key"]
70 srcBlob = storage.Blob(bucket=GOOGLE_STORAGE_BUCKET,
71 name=conf.viur2import_blobsource["gsdir"] + "/" + importData["key"])
72 else:
73 oldBlobName = conf.viur2import_blobsource["gsdir"] + "/" + dlKey
74 srcBlob = storage.Blob(bucket=GOOGLE_STORAGE_BUCKET, name=conf.viur2import_blobsource["gsdir"] + "/" + dlKey)
75 if not srcBlob.exists():
76 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey))
77 marker["success"] = False
78 marker["error"] = "Local SRC-Blob missing"
79 marker["oldBlobName"] = oldBlobName
80 db.Put(marker)
81 return False
82 GOOGLE_STORAGE_BUCKET.rename_blob(srcBlob, f"{dlKey}/source/{fileName}")
83 marker = db.Entity(db.Key("viur-viur2-blobimport", dlKey))
84 marker["success"] = True
85 marker["old_src_key"] = dlKey
86 marker["old_src_name"] = fileName
87 marker["dlurl"] = File.create_download_url(dlKey, fileName, False, None)
88 db.Put(marker)
89 return marker["dlurl"]
92def thumbnailer(fileSkel, existingFiles, params):
93 file_name = html.unescape(fileSkel["name"])
94 blob = GOOGLE_STORAGE_BUCKET.get_blob(f"""{fileSkel["dlkey"]}/source/{file_name}""")
95 if not blob:
96 logging.warning(f"""Blob {fileSkel["dlkey"]}/source/{file_name} is missing from cloud storage!""")
97 return
98 fileData = io.BytesIO()
99 blob.download_to_file(fileData)
100 resList = []
101 for sizeDict in params:
102 fileData.seek(0)
103 outData = io.BytesIO()
104 try:
105 img = PIL.Image.open(fileData)
106 except PIL.Image.UnidentifiedImageError: # Can't load this image; so there's no need to try other resolutions
107 return []
108 iccProfile = img.info.get('icc_profile')
109 if iccProfile:
110 # JPEGs might be encoded with a non-standard color-profile; we need to compensate for this if we convert
111 # to WEBp as we'll loose this color-profile information
112 f = io.BytesIO(iccProfile)
113 src_profile = PIL.ImageCms.ImageCmsProfile(f)
114 dst_profile = PIL.ImageCms.createProfile('sRGB')
115 try:
116 img = PIL.ImageCms.profileToProfile(
117 img,
118 inputProfile=src_profile,
119 outputProfile=dst_profile,
120 outputMode="RGBA" if img.has_transparency_data else "RGB")
121 except Exception as e:
122 logging.exception(e)
123 continue
124 fileExtension = sizeDict.get("fileExtension", "webp")
125 if "width" in sizeDict and "height" in sizeDict:
126 width = sizeDict["width"]
127 height = sizeDict["height"]
128 targetName = f"thumbnail-{width}-{height}.{fileExtension}"
129 elif "width" in sizeDict:
130 width = sizeDict["width"]
131 height = int((float(img.size[1]) * float(width / float(img.size[0]))))
132 targetName = f"thumbnail-w{width}.{fileExtension}"
133 else: # No default fallback - ignore
134 continue
135 mimeType = sizeDict.get("mimeType", "image/webp")
136 img = img.resize((width, height), PIL.Image.LANCZOS)
137 img.save(outData, fileExtension)
138 outSize = outData.tell()
139 outData.seek(0)
140 targetBlob = GOOGLE_STORAGE_BUCKET.blob(f"""{fileSkel["dlkey"]}/derived/{targetName}""")
141 targetBlob.upload_from_file(outData, content_type=mimeType)
142 resList.append((targetName, outSize, mimeType, {"mimetype": mimeType, "width": width, "height": height}))
143 return resList
146def cloudfunction_thumbnailer(fileSkel, existingFiles, params):
147 """External Thumbnailer for images.
149 The corresponding cloudfunction can be found here .
150 https://github.com/viur-framework/viur-cloudfunctions/tree/main/thumbnailer
152 You can use it like so:
153 main.py:
155 .. code-block:: python
157 from viur.core.modules.file import cloudfunction_thumbnailer
159 conf.file_thumbnailer_url = "https://xxxxx.cloudfunctions.net/imagerenderer"
160 conf.file_derivations = {"thumbnail": cloudfunction_thumbnailer}
162 conf.derives_pdf = {
163 "thumbnail": [{"width": 1920,"sites":"1,2"}]
164 }
166 skeletons/xxx.py:
167 .. code-block:: python
169 test = FileBone(derive=conf.derives_pdf)
170 """
172 if not conf.file_thumbnailer_url:
173 raise ValueError("conf.file_thumbnailer_url is not set")
175 def getsignedurl():
176 if conf.instance.is_dev_server:
177 signedUrl = File.create_download_url(fileSkel["dlkey"], fileSkel["name"])
178 else:
179 path = f"""{fileSkel["dlkey"]}/source/{file_name}"""
180 if not (blob := GOOGLE_STORAGE_BUCKET.get_blob(path)):
181 logging.warning(f"Blob {path} is missing from cloud storage!")
182 return None
183 authRequest = google.auth.transport.requests.Request()
184 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60)
185 signing_credentials = google.auth.compute_engine.IDTokenCredentials(authRequest, "")
186 content_disposition = f"""filename={fileSkel["name"]}"""
187 signedUrl = blob.generate_signed_url(
188 expiresAt,
189 credentials=signing_credentials,
190 response_disposition=content_disposition,
191 version="v4")
192 return signedUrl
194 def make_request():
195 headers = {"Content-Type": "application/json"}
196 data_str = base64.b64encode(json.dumps(dataDict).encode("UTF-8"))
197 sig = File.hmac_sign(data_str)
198 datadump = json.dumps({"dataStr": data_str.decode('ASCII'), "sign": sig})
199 resp = requests.post(conf.file_thumbnailer_url, data=datadump, headers=headers, allow_redirects=False)
200 if resp.status_code != 200: # Error Handling
201 match resp.status_code:
202 case 302:
203 # The problem is Google resposen 302 to an auth Site when the cloudfunction was not found
204 # https://cloud.google.com/functions/docs/troubleshooting#login
205 logging.error("Cloudfunction not found")
206 case 404:
207 logging.error("Cloudfunction not found")
208 case 403:
209 logging.error("No permission for the Cloudfunction")
210 case _:
211 logging.error(
212 f"cloudfunction_thumbnailer failed with code: {resp.status_code} and data: {resp.content}")
213 return
215 try:
216 response_data = resp.json()
217 except Exception as e:
218 logging.error(f"response could not be converted in json failed with: {e=}")
219 return
220 if "error" in response_data:
221 logging.error(f"cloudfunction_thumbnailer failed with: {response_data.get('error')}")
222 return
224 return response_data
226 file_name = html.unescape(fileSkel["name"])
228 if not (url := getsignedurl()):
229 return
230 dataDict = {
231 "url": url,
232 "name": fileSkel["name"],
233 "params": params,
234 "minetype": fileSkel["mimetype"],
235 "baseUrl": current.request.get().request.host_url.lower(),
236 "targetKey": fileSkel["dlkey"],
237 "nameOnly": True
238 }
239 if not (derivedData := make_request()):
240 return
242 uploadUrls = {}
243 for data in derivedData["values"]:
244 fileName = File.sanitize_filename(data["name"])
245 blob = GOOGLE_STORAGE_BUCKET.blob(f"""{fileSkel["dlkey"]}/derived/{fileName}""")
246 uploadUrls[fileSkel["dlkey"] + fileName] = blob.create_resumable_upload_session(timeout=60,
247 content_type=data["mimeType"])
249 if not (url := getsignedurl()):
250 return
252 dataDict["url"] = url
253 dataDict["nameOnly"] = False
254 dataDict["uploadUrls"] = uploadUrls
256 if not (derivedData := make_request()):
257 return
258 reslist = []
259 try:
260 for derived in derivedData["values"]:
261 for key, value in derived.items():
262 reslist.append((key, value["size"], value["mimetype"], value["customData"]))
264 except Exception as e:
265 logging.error(f"cloudfunction_thumbnailer failed with: {e=}")
266 return reslist
269class DownloadUrlBone(BaseBone):
270 """
271 This bone is used to inject a freshly signed download url into a FileSkel.
272 """
274 def unserialize(self, skel, name):
275 if "dlkey" in skel.dbEntity and "name" in skel.dbEntity:
276 skel.accessedValues[name] = File.create_download_url(
277 skel["dlkey"], skel["name"], expires=conf.render_json_download_url_expiration
278 )
279 return True
281 return False
284class FileLeafSkel(TreeSkel):
285 """
286 Default file leaf skeleton.
287 """
288 kindName = "file"
290 size = StringBone(
291 descr="Size",
292 readOnly=True,
293 searchable=True,
294 )
296 dlkey = StringBone(
297 descr="Download-Key",
298 readOnly=True,
299 )
301 name = StringBone(
302 descr="Filename",
303 caseSensitive=False,
304 searchable=True,
305 vfunc=lambda val: None if File.is_valid_filename(val) else "Invalid filename provided",
306 )
308 mimetype = StringBone(
309 descr="MIME-Type",
310 readOnly=True,
311 )
313 weak = BooleanBone(
314 descr="Weak reference",
315 readOnly=True,
316 visible=False,
317 )
319 pending = BooleanBone(
320 descr="Pending upload",
321 readOnly=True,
322 visible=False,
323 defaultValue=False,
324 )
326 width = NumericBone(
327 descr="Width",
328 readOnly=True,
329 searchable=True,
330 )
332 height = NumericBone(
333 descr="Height",
334 readOnly=True,
335 searchable=True,
336 )
338 downloadUrl = DownloadUrlBone(
339 descr="Download-URL",
340 readOnly=True,
341 visible=False,
342 )
344 derived = BaseBone(
345 descr="Derived Files",
346 readOnly=True,
347 visible=False,
348 )
350 pendingparententry = KeyBone(
351 descr="Pending key Reference",
352 readOnly=True,
353 visible=False,
354 )
356 def preProcessBlobLocks(self, locks):
357 """
358 Ensure that our dlkey is locked even if we don't have a filebone here
359 """
360 if not self["weak"] and self["dlkey"]:
361 locks.add(self["dlkey"])
362 return locks
364 @classmethod
365 def refresh(cls, skelValues):
366 super().refresh(skelValues)
367 if conf.viur2import_blobsource:
368 importData = importBlobFromViur2(skelValues["dlkey"], skelValues["name"])
369 if importData:
370 if not skelValues["downloadUrl"]:
371 skelValues["downloadUrl"] = importData
372 skelValues["pendingparententry"] = False
375class FileNodeSkel(TreeSkel):
376 """
377 Default file node skeleton.
378 """
379 kindName = "file_rootNode" # FIXME: VIUR4, don't use "_rootNode" kindname
381 name = StringBone(
382 descr="Name",
383 required=True,
384 searchable=True
385 )
387 rootNode = BooleanBone(
388 descr="Is RootNode",
389 defaultValue=False,
390 )
393class File(Tree):
394 PENDING_POSTFIX = " (pending)"
395 DOWNLOAD_URL_PREFIX = "/file/download/"
396 MAX_FILENAME_LEN = 256
398 leafSkelCls = FileLeafSkel
399 nodeSkelCls = FileNodeSkel
401 handler = "tree.simple.file"
402 adminInfo = {
403 "icon": "folder-fill",
404 "handler": handler, # fixme: Use static handler; Remove with VIUR4!
405 }
407 roles = {
408 "*": "view",
409 "editor": ("add", "edit"),
410 "admin": "*",
411 }
413 default_order = "name"
415 # Helper functions currently resist here
417 @staticmethod
418 def is_valid_filename(filename: str) -> bool:
419 """
420 Verifies a valid filename.
422 The filename should be valid on Linux, Mac OS and Windows.
423 It should not be longer than MAX_FILENAME_LEN chars.
425 Rule set: https://stackoverflow.com/a/31976060/3749896
426 Regex test: https://regex101.com/r/iBYpoC/1
427 """
428 if len(filename) > File.MAX_FILENAME_LEN:
429 return False
431 return bool(re.match(VALID_FILENAME_REGEX, filename))
433 @staticmethod
434 def hmac_sign(data: t.Any) -> str:
435 assert conf.file_hmac_key is not None, "No hmac-key set!"
436 if not isinstance(data, bytes):
437 data = str(data).encode("UTF-8")
438 return hmac.new(conf.file_hmac_key, msg=data, digestmod=hashlib.sha3_384).hexdigest()
440 @staticmethod
441 def hmac_verify(data: t.Any, signature: str) -> bool:
442 return hmac.compare_digest(File.hmac_sign(data.encode("ASCII")), signature)
444 @staticmethod
445 def create_download_url(
446 dlkey: str,
447 filename: str,
448 derived: bool = False,
449 expires: t.Optional[datetime.timedelta | int] = datetime.timedelta(hours=1),
450 download_filename: t.Optional[str] = None
451 ) -> str:
452 """
453 Utility function that creates a signed download-url for the given folder/filename combination
455 :param folder: The GCS-Folder (= the download-key) for that file
456 :param filename: The name of the file. Either the original filename or the name of a derived file.
457 :param derived: True, if it points to a derived file, False if it points to the original uploaded file
458 :param expires:
459 None if the file is supposed to be public (which causes it to be cached on the google ede caches),
460 otherwise a datetime.timedelta of how long that link should be valid
461 :param download_filename: If set, browser is enforced to download this blob with the given alternate
462 filename
463 :return: The signed download-url relative to the current domain (eg /download/...)
464 """
465 if isinstance(expires, int):
466 expires = datetime.timedelta(minutes=expires)
468 # Undo escaping on ()= performed on fileNames
469 filename = filename.replace("(", "(").replace(")", ")").replace("=", "=")
470 filepath = f"""{dlkey}/{"derived" if derived else "source"}/{filename}"""
472 if download_filename:
473 if not File.is_valid_filename(download_filename):
474 raise errors.UnprocessableEntity(f"Invalid download_filename {download_filename!r} provided")
476 download_filename = urlquote(download_filename)
478 expires = (datetime.datetime.now() + expires).strftime("%Y%m%d%H%M") if expires else 0
480 data = base64.urlsafe_b64encode(f"""{filepath}\0{expires}\0{download_filename or ""}""".encode("UTF-8"))
481 sig = File.hmac_sign(data)
483 return f"""{File.DOWNLOAD_URL_PREFIX}{data.decode("ASCII")}?sig={sig}"""
485 @staticmethod
486 def parse_download_url(url) -> t.Optional[FilePath]:
487 """
488 Parses a file download URL in the format `/file/download/xxxx?sig=yyyy` into its FilePath.
490 If the URL cannot be parsed, the function returns None.
492 :param url: The file download URL to be parsed.
493 :return: A FilePath on success, None otherwise.
494 """
495 if not url.startswith(File.DOWNLOAD_URL_PREFIX) or "?" not in url:
496 return None
498 data, sig = url.removeprefix(File.DOWNLOAD_URL_PREFIX).split("?", 1) # Strip "/file/download/" and split on "?"
499 sig = sig.removeprefix("sig=")
501 if not File.hmac_verify(data, sig):
502 # Invalid signature
503 return None
505 # Split the blobKey into the individual fields it should contain
506 data = base64.urlsafe_b64decode(data).decode("UTF-8")
508 match data.count("\0"):
509 case 2:
510 dlpath, valid_until, _ = data.split("\0")
511 case 1:
512 # It's the old format, without an downloadFileName
513 dlpath, valid_until = data.split("\0")
514 case _:
515 # Invalid path
516 return None
518 if valid_until != "0" and datetime.strptime(valid_until, "%Y%m%d%H%M") < datetime.now():
519 # Signature expired
520 return None
522 if dlpath.count("/") != 3:
523 # Invalid path
524 return None
526 dlkey, derived, filename = dlpath.split("/", 3)
527 return FilePath(dlkey, derived != "source", filename)
529 @staticmethod
530 def create_src_set(
531 file: t.Union["SkeletonInstance", dict, str],
532 expires: t.Optional[datetime.timedelta | int] = datetime.timedelta(hours=1),
533 width: t.Optional[int] = None,
534 height: t.Optional[int] = None
535 ) -> str:
536 """
537 Generates a string suitable for use as the srcset tag in html. This functionality provides the browser
538 with a list of images in different sizes and allows it to choose the smallest file that will fill it's
539 viewport without upscaling.
541 :param file: The file skeleton (or if multiple=True a single value from it) to generate the srcset.
542 :param expires:
543 None if the file is supposed to be public (which causes it to be cached on the google edecaches),
544 otherwise it's lifetime in seconds
545 :param width:
546 A list of widths that should be included in the srcset.
547 If a given width is not available, it will be skipped.
548 :param height: A list of heights that should be included in the srcset. If a given height is not available,
549 it will be skipped.
550 :return: The srctag generated or an empty string if a invalid file object was supplied
551 """
552 if not width and not height:
553 logging.error("Neither width or height supplied")
554 return ""
556 if isinstance(file, str):
557 file = db.Query("file").filter("dlkey =", file).order(("creationdate", db.SortOrder.Ascending)).getEntry()
559 if not file:
560 return ""
562 if "dlkey" not in file and "dest" in file:
563 file = file["dest"]
565 from viur.core.skeleton import SkeletonInstance # avoid circular imports
567 if not (
568 isinstance(file, (SkeletonInstance, dict))
569 and "dlkey" in file
570 and "derived" in file
571 ):
572 logging.error("Invalid file supplied")
573 return ""
575 if not isinstance(file["derived"], dict):
576 logging.error("No derives available")
577 return ""
579 src_set = []
580 for filename, derivate in file["derived"]["files"].items():
581 customData = derivate.get("customData", {})
583 if width and customData.get("width") in width:
584 src_set.append(
585 f"""{File.create_download_url(file["dlkey"], filename, True, expires)} {customData["width"]}w"""
586 )
588 if height and customData.get("height") in height:
589 src_set.append(
590 f"""{File.create_download_url(file["dlkey"], filename, True, expires)} {customData["height"]}h"""
591 )
593 return ", ".join(src_set)
595 def write(self, filename: str, content: t.Any, mimetype: str = "text/plain", width: int = None,
596 height: int = None) -> db.Key:
597 """
598 Write a file from any buffer into the file module.
600 :param filename: Filename to be written.
601 :param content: The file content to be written, as bytes-like object.
602 :param mimetype: The file's mimetype.
603 :param width: Optional width information for the file.
604 :param height: Optional height information for the file.
606 :return: Returns the key of the file object written. This can be associated e.g. with a FileBone.
607 """
608 if not File.is_valid_filename(filename):
609 raise ValueError(f"{filename=} is invalid")
611 dl_key = utils.string.random()
613 blob = GOOGLE_STORAGE_BUCKET.blob(f"{dl_key}/source/{filename}")
614 blob.upload_from_file(io.BytesIO(content), content_type=mimetype)
616 skel = self.addSkel("leaf")
617 skel["name"] = filename
618 skel["size"] = blob.size
619 skel["mimetype"] = mimetype
620 skel["dlkey"] = dl_key
621 skel["weak"] = True
622 skel["width"] = width
623 skel["height"] = height
625 return skel.toDB()
627 def read(self, key: db.Key | int | str | None = None, path: str | None = None) -> tuple[io.BytesIO, str]:
628 """
629 Read a file from the Cloud Storage.
631 If a key and a path are provided, the key is preferred.
632 This means that the entry in the db is searched first and if this is not found, the path is used.
634 :param key: Key of the LeafSkel that contains the "dlkey" and the "name".
635 :param path: The path of the file in the Cloud Storage Bucket.
637 :return: Returns the file as a io.BytesIO buffer and the content-type
638 """
639 if not key and not path:
640 raise ValueError("Please provide a key or a path")
641 if key:
642 skel = self.viewSkel("leaf")
643 if not skel.fromDB(db.keyHelper(key, skel.kindName)):
644 if not path:
645 raise ValueError("This skeleton is not in the database!")
646 else:
647 path = f"""{skel["dlkey"]}/source/{skel["name"]}"""
649 blob = GOOGLE_STORAGE_BUCKET.blob(path)
650 return io.BytesIO(blob.download_as_bytes()), blob.content_type
652 @CallDeferred
653 def deleteRecursive(self, parentKey):
654 files = db.Query(self.leafSkelCls().kindName).filter("parentdir =", parentKey).iter()
655 for fileEntry in files:
656 self.mark_for_deletion(fileEntry["dlkey"])
657 skel = self.leafSkelCls()
659 if skel.fromDB(str(fileEntry.key())):
660 skel.delete()
661 dirs = db.Query(self.nodeSkelCls().kindName).filter("parentdir", parentKey).iter()
662 for d in dirs:
663 self.deleteRecursive(d.key)
664 skel = self.nodeSkelCls()
665 if skel.fromDB(d.key):
666 skel.delete()
668 @exposed
669 @skey
670 def getUploadURL(
671 self,
672 fileName: str,
673 mimeType: str,
674 size: t.Optional[int] = None,
675 node: t.Optional[str | db.Key] = None,
676 authData: t.Optional[str] = None,
677 authSig: t.Optional[str] = None
678 ):
679 filename = fileName.strip() # VIUR4 FIXME: just for compatiblity of the parameter names
681 if not File.is_valid_filename(filename):
682 raise errors.UnprocessableEntity(f"Invalid filename {filename!r} provided")
684 # Validate the mimetype from the client seems legit
685 mimetype = mimeType.strip().lower()
686 if not (
687 mimetype
688 and mimetype.count("/") == 1
689 and all(ch in string.ascii_letters + string.digits + "/-.+" for ch in mimetype)
690 ):
691 raise errors.UnprocessableEntity(f"Invalid mime-type {mimetype!r} provided")
693 # Validate authentication data
694 if authData and authSig:
695 # First, validate the signature, otherwise we don't need to proceed further
696 if not self.hmac_verify(authData, authSig):
697 raise errors.Unauthorized()
699 authData = json.loads(base64.b64decode(authData.encode("ASCII")).decode("UTF-8"))
701 if datetime.datetime.strptime(authData["validUntil"], "%Y%m%d%H%M") < datetime.datetime.now():
702 raise errors.Gone("The upload URL has expired")
704 if authData["validMimeTypes"]:
705 for validMimeType in authData["validMimeTypes"]:
706 if (
707 validMimeType == mimetype
708 or (validMimeType.endswith("*") and mimetype.startswith(validMimeType[:-1]))
709 ):
710 break
711 else:
712 raise errors.UnprocessableEntity(f"Invalid mime-type {mimetype} provided")
714 node = authData["node"]
715 maxSize = authData["maxSize"]
717 else:
718 rootNode = None
719 if node and not (rootNode := self.getRootNode(node)):
720 raise errors.NotFound(f"No valid root node found for {node=}")
722 if not self.canAdd("leaf", rootNode):
723 raise errors.Forbidden()
725 maxSize = None # The user has some file/add permissions, don't restrict fileSize
727 if maxSize:
728 if size > maxSize:
729 raise errors.UnprocessableEntity(f"Size {size} exceeds maximum size {maxSize}")
730 else:
731 size = None
733 # Create upload-URL and download key
734 dlkey = utils.string.random() # let's roll a random key
735 blob = GOOGLE_STORAGE_BUCKET.blob(f"{dlkey}/source/{filename}")
736 upload_url = blob.create_resumable_upload_session(content_type=mimeType, size=size, timeout=60)
738 # Create a corresponding file-lock object early, otherwise we would have to ensure that the file-lock object
739 # the user creates matches the file he had uploaded
740 file_skel = self.addSkel("leaf")
742 file_skel["name"] = filename + self.PENDING_POSTFIX
743 file_skel["size"] = 0
744 file_skel["mimetype"] = "application/octetstream"
745 file_skel["dlkey"] = dlkey
746 file_skel["parentdir"] = None
747 file_skel["pendingparententry"] = db.keyHelper(node, self.addSkel("node").kindName) if node else None
748 file_skel["pending"] = True
749 file_skel["weak"] = True
750 file_skel["width"] = 0
751 file_skel["height"] = 0
753 key = db.encodeKey(file_skel.toDB())
755 # Mark that entry dirty as we might never receive an add
756 self.mark_for_deletion(dlkey)
758 # In this case, we'd have to store the key in the users session so he can call add() later on
759 if authData and authSig:
760 session = current.session.get()
762 if "pendingFileUploadKeys" not in session:
763 session["pendingFileUploadKeys"] = []
765 session["pendingFileUploadKeys"].append(key)
767 # Clamp to the latest 50 pending uploads
768 session["pendingFileUploadKeys"] = session["pendingFileUploadKeys"][-50:]
769 session.markChanged()
771 return self.render.view({
772 "uploadUrl": upload_url,
773 "uploadKey": key,
774 })
776 @exposed
777 def download(self, blobKey: str, fileName: str = "", download: bool = False, sig: str = "", *args, **kwargs):
778 """
779 Download a file.
780 :param blobKey: The unique blob key of the file.
781 :param fileName: Optional filename to provide in the header.
782 :param download: Set header to attachment retrival, set explictly to "1" if download is wanted.
783 """
784 if filename := fileName.strip():
785 if not File.is_valid_filename(filename):
786 raise errors.UnprocessableEntity(f"The provided filename {filename!r} is invalid!")
788 download_filename = ""
790 if not sig:
791 # Check if the current user has the right to download *any* blob present in this application.
792 # blobKey is then the path inside cloudstore - not a base64 encoded tuple
793 if not (usr := current.user.get()):
794 raise errors.Unauthorized()
795 if "root" not in usr["access"] and "file-view" not in usr["access"]:
796 raise errors.Forbidden()
797 validUntil = "-1" # Prevent this from being cached down below
798 blob = GOOGLE_STORAGE_BUCKET.get_blob(blobKey)
800 else:
801 # We got an request including a signature (probably a guest or a user without file-view access)
802 # First, validate the signature, otherwise we don't need to proceed any further
803 if not self.hmac_verify(blobKey, sig):
804 raise errors.Forbidden()
805 # Split the blobKey into the individual fields it should contain
806 try:
807 dlPath, validUntil, download_filename = base64.urlsafe_b64decode(blobKey).decode("UTF-8").split("\0")
808 except: # It's the old format, without an downloadFileName
809 dlPath, validUntil = base64.urlsafe_b64decode(blobKey).decode("UTF-8").split("\0")
811 if validUntil != "0" and datetime.datetime.strptime(validUntil, "%Y%m%d%H%M") < datetime.datetime.now():
812 blob = None
813 else:
814 blob = GOOGLE_STORAGE_BUCKET.get_blob(dlPath)
816 if not blob:
817 raise errors.Gone("The requested blob has expired.")
819 if not filename:
820 filename = download_filename or urlquote(blob.name.split("/")[-1])
822 content_disposition = "; ".join(
823 item for item in (
824 "attachment" if download else None,
825 f"filename={filename}" if filename else None,
826 ) if item
827 )
829 if isinstance(_CREDENTIALS, ServiceAccountCredentials):
830 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60)
831 signedUrl = blob.generate_signed_url(expiresAt, response_disposition=content_disposition, version="v4")
832 raise errors.Redirect(signedUrl)
834 elif conf.instance.is_dev_server: # No Service-Account to sign with - Serve everything directly
835 response = current.request.get().response
836 response.headers["Content-Type"] = blob.content_type
837 if content_disposition:
838 response.headers["Content-Disposition"] = content_disposition
839 return blob.download_as_bytes()
841 if validUntil == "0": # Its an indefinitely valid URL
842 if blob.size < 5 * 1024 * 1024: # Less than 5 MB - Serve directly and push it into the ede caches
843 response = current.request.get().response
844 response.headers["Content-Type"] = blob.content_type
845 response.headers["Cache-Control"] = "public, max-age=604800" # 7 Days
846 if content_disposition:
847 response.headers["Content-Disposition"] = content_disposition
848 return blob.download_as_bytes()
850 # Default fallback - create a signed URL and redirect
851 authRequest = google.auth.transport.requests.Request()
852 expiresAt = datetime.datetime.now() + datetime.timedelta(seconds=60)
853 signing_credentials = google.auth.compute_engine.IDTokenCredentials(authRequest, "")
854 signedUrl = blob.generate_signed_url(
855 expiresAt,
856 credentials=signing_credentials,
857 response_disposition=content_disposition,
858 version="v4")
860 raise errors.Redirect(signedUrl)
862 @exposed
863 @force_ssl
864 @force_post
865 @skey(allow_empty=True)
866 def add(self, skelType: SkelType, node: db.Key | int | str | None = None, *args, **kwargs):
867 # We can't add files directly (they need to be uploaded
868 if skelType == "leaf": # We need to handle leafs separately here
869 targetKey = kwargs.get("key")
870 skel = self.addSkel("leaf")
872 if not skel.fromDB(targetKey):
873 raise errors.NotFound()
875 if not skel["pending"]:
876 raise errors.PreconditionFailed()
878 skel["pending"] = False
879 skel["parententry"] = skel["pendingparententry"]
881 if skel["parententry"]:
882 rootNode = self.getRootNode(skel["parententry"])
883 else:
884 rootNode = None
886 if not self.canAdd("leaf", rootNode):
887 # Check for a marker in this session (created if using a signed upload URL)
888 session = current.session.get()
889 if targetKey not in (session.get("pendingFileUploadKeys") or []):
890 raise errors.Forbidden()
891 session["pendingFileUploadKeys"].remove(targetKey)
892 session.markChanged()
894 # Now read the blob from the dlkey folder
895 blobs = list(GOOGLE_STORAGE_BUCKET.list_blobs(prefix=f"""{skel["dlkey"]}/"""))
896 if len(blobs) != 1:
897 logging.error("Invalid number of blobs in folder")
898 logging.error(targetKey)
899 raise errors.PreconditionFailed()
901 # only one item is allowed here!
902 blob = blobs[0]
904 # update the corresponding file skeleton
905 skel["name"] = skel["name"].removesuffix(self.PENDING_POSTFIX)
906 skel["mimetype"] = utils.string.escape(blob.content_type)
907 skel["size"] = blob.size
908 skel["parentrepo"] = rootNode["key"] if rootNode else None
909 skel["weak"] = rootNode is None
911 skel.toDB()
913 # Add updated download-URL as the auto-generated isn't valid yet
914 skel["downloadUrl"] = self.create_download_url(skel["dlkey"], skel["name"])
915 return self.render.addSuccess(skel)
917 return super().add(skelType, node, *args, **kwargs)
919 def onEdit(self, skelType: SkelType, skel: SkeletonInstance):
920 super().onEdit(skelType, skel)
921 old_skel = self.editSkel(skelType)
922 old_skel.setEntity(skel.dbEntity)
924 if old_skel["name"] == skel["name"]: # name not changed we can return
925 return
927 # Move Blob to new name
928 # https://cloud.google.com/storage/docs/copying-renaming-moving-objects
929 old_path = f"{skel['dlkey']}/source/{html.unescape(old_skel['name'])}"
930 new_path = f"{skel['dlkey']}/source/{html.unescape(skel['name'])}"
932 if not (old_blob := GOOGLE_STORAGE_BUCKET.get_blob(old_path)):
933 raise errors.Gone()
935 GOOGLE_STORAGE_BUCKET.copy_blob(old_blob, GOOGLE_STORAGE_BUCKET, new_path, if_generation_match=0)
936 GOOGLE_STORAGE_BUCKET.delete_blob(old_path)
938 def mark_for_deletion(self, dlkey: str) -> None:
939 """
940 Adds a marker to the datastore that the file specified as *dlkey* can be deleted.
942 Once the mark has been set, the data store is checked four times (default: every 4 hours)
943 if the file is in use somewhere. If it is still in use, the mark goes away, otherwise
944 the mark and the file are removed from the datastore. These delayed checks are necessary
945 due to database inconsistency.
947 :param dlkey: Unique download-key of the file that shall be marked for deletion.
948 """
949 fileObj = db.Query("viur-deleted-files").filter("dlkey", dlkey).getEntry()
951 if fileObj: # Its allready marked
952 return
954 fileObj = db.Entity(db.Key("viur-deleted-files"))
955 fileObj["itercount"] = 0
956 fileObj["dlkey"] = str(dlkey)
958 db.Put(fileObj)
961File.json = True
962File.html = True
965@PeriodicTask(60 * 4)
966def startCheckForUnreferencedBlobs():
967 """
968 Start searching for blob locks that have been recently freed
969 """
970 doCheckForUnreferencedBlobs()
973@CallDeferred
974def doCheckForUnreferencedBlobs(cursor=None):
975 def getOldBlobKeysTxn(dbKey):
976 obj = db.Get(dbKey)
977 res = obj["old_blob_references"] or []
978 if obj["is_stale"]:
979 db.Delete(dbKey)
980 else:
981 obj["has_old_blob_references"] = False
982 obj["old_blob_references"] = []
983 db.Put(obj)
984 return res
986 query = db.Query("viur-blob-locks").filter("has_old_blob_references", True).setCursor(cursor)
987 for lockObj in query.run(100):
988 oldBlobKeys = db.RunInTransaction(getOldBlobKeysTxn, lockObj.key)
989 for blobKey in oldBlobKeys:
990 if db.Query("viur-blob-locks").filter("active_blob_references =", blobKey).getEntry():
991 # This blob is referenced elsewhere
992 logging.info(f"Stale blob is still referenced, {blobKey}")
993 continue
994 # Add a marker and schedule it for deletion
995 fileObj = db.Query("viur-deleted-files").filter("dlkey", blobKey).getEntry()
996 if fileObj: # Its already marked
997 logging.info(f"Stale blob already marked for deletion, {blobKey}")
998 return
999 fileObj = db.Entity(db.Key("viur-deleted-files"))
1000 fileObj["itercount"] = 0
1001 fileObj["dlkey"] = str(blobKey)
1002 logging.info(f"Stale blob marked dirty, {blobKey}")
1003 db.Put(fileObj)
1004 newCursor = query.getCursor()
1005 if newCursor:
1006 doCheckForUnreferencedBlobs(newCursor)
1009@PeriodicTask(0)
1010def startCleanupDeletedFiles():
1011 """
1012 Increase deletion counter on each blob currently not referenced and delete
1013 it if that counter reaches maxIterCount
1014 """
1015 doCleanupDeletedFiles()
1018@CallDeferred
1019def doCleanupDeletedFiles(cursor=None):
1020 maxIterCount = 2 # How often a file will be checked for deletion
1021 query = db.Query("viur-deleted-files")
1022 if cursor:
1023 query.setCursor(cursor)
1024 for file in query.run(100):
1025 if not "dlkey" in file:
1026 db.Delete(file.key)
1027 elif db.Query("viur-blob-locks").filter("active_blob_references =", file["dlkey"]).getEntry():
1028 logging.info(f"""is referenced, {file["dlkey"]}""")
1029 db.Delete(file.key)
1030 else:
1031 if file["itercount"] > maxIterCount:
1032 logging.info(f"""Finally deleting, {file["dlkey"]}""")
1033 blobs = GOOGLE_STORAGE_BUCKET.list_blobs(prefix=f"""{file["dlkey"]}/""")
1034 for blob in blobs:
1035 blob.delete()
1036 db.Delete(file.key)
1037 # There should be exactly 1 or 0 of these
1038 for f in skeletonByKind("file")().all().filter("dlkey =", file["dlkey"]).fetch(99):
1039 f.delete()
1040 else:
1041 logging.debug(f"""Increasing count, {file["dlkey"]}""")
1042 file["itercount"] += 1
1043 db.Put(file)
1044 newCursor = query.getCursor()
1045 if newCursor:
1046 doCleanupDeletedFiles(newCursor)
1049@PeriodicTask(60 * 4)
1050def start_delete_pending_files():
1051 """
1052 Start deletion of pending FileSkels that are older than 7 days.
1053 """
1054 DeleteEntitiesIter.startIterOnQuery(
1055 FileLeafSkel().all()
1056 .filter("pending =", True)
1057 .filter("creationdate <", utils.utcNow() - datetime.timedelta(days=7))
1058 )