Michael Pilosov
6 months ago
commit
3c2d665bfd
14 changed files with 913 additions and 0 deletions
@ -0,0 +1,6 @@ |
|||
out/ |
|||
*.png |
|||
*.gif |
|||
*.mp4 |
|||
__pycache__/ |
|||
*.db |
@ -0,0 +1,51 @@ |
|||
# Use Python 3.10.11 slim image as the base image |
|||
FROM python:3.10.11-slim |
|||
|
|||
# Set environment variables to avoid writing .pyc files and buffering stdout and stderr |
|||
ENV PYTHONDONTWRITEBYTECODE 1 |
|||
ENV PYTHONUNBUFFERED 1 |
|||
|
|||
# Create a new user 'user' with UID and GID of 1000 |
|||
RUN groupadd -g 1000 user && \ |
|||
useradd -m -s /bin/bash -u 1000 -g user user |
|||
|
|||
# Set environment variables for the user install |
|||
ENV PATH=/home/user/.local/bin:$PATH |
|||
|
|||
# Install system dependencies as root |
|||
RUN apt-get update && \ |
|||
apt-get install -y --no-install-recommends make ffmpeg dumb-init && \ |
|||
rm -rf /var/lib/apt/lists/* |
|||
|
|||
# Set the home directory |
|||
WORKDIR /home/user/ |
|||
RUN chown -R user:user /home/user |
|||
|
|||
# Switch to non-root user before copying files and installing Python packages |
|||
USER user |
|||
|
|||
# Copy the requirements file to /tmp and install Python dependencies with user flag |
|||
COPY --chown=user:user requirements.txt /tmp/requirements.txt |
|||
RUN python -m pip install --upgrade pip |
|||
RUN pip install --no-cache-dir --user -r /tmp/requirements.txt |
|||
|
|||
# APPLICATION SETUP |
|||
|
|||
# Copy the default profiles file and set the appropriate permissions |
|||
COPY --chown=user:user profiles.default.toml /home/user/.prefect/profiles.toml |
|||
|
|||
# Copy the application files |
|||
COPY --chown=user:user app ./app |
|||
COPY --chown=user:user noaa_animate.py . |
|||
COPY --chown=user:user start.sh . |
|||
COPY --chown=user:user init_db.py . |
|||
RUN chmod +x start.sh |
|||
RUN mkdir -p out |
|||
RUN python init_db.py /home/user/.prefect/prefect.db |
|||
|
|||
# Set the correct ownership (recursively) for /app |
|||
# Already owned by user due to --chown in COPY commands |
|||
|
|||
# Define the entrypoint and the commands to execute |
|||
ENTRYPOINT ["dumb-init", "--"] |
|||
CMD ["./start.sh"] |
@ -0,0 +1,19 @@ |
|||
# NOAA Animation Web App |
|||
![preview](preview.jpg) |
|||
|
|||
Animate images from a website full of urls by way of a proxy server and some Python functions wrapped with the `prefect` decorator. |
|||
Uses https://services.swpc.noaa.gov as an example. |
|||
|
|||
|
|||
## Instructions |
|||
If you have `docker` and `make` installed, just run: |
|||
|
|||
```bash |
|||
make |
|||
``` |
|||
|
|||
and visit `localhost:4200` to see Prefect, and `localhost:9021/iframe` to view the UI. |
|||
|
|||
An example Flow of about 100 images: |
|||
![prefect](prefect.jpg) |
|||
|
@ -0,0 +1,150 @@ |
|||
import logging |
|||
import os |
|||
import re |
|||
import time |
|||
from datetime import datetime |
|||
|
|||
import requests |
|||
from flask import Flask, Response, render_template, request, send_from_directory |
|||
from prefect.deployments import run_deployment |
|||
|
|||
PORT = 9021 |
|||
app = Flask(__name__) |
|||
|
|||
logging.basicConfig(level=logging.DEBUG) |
|||
|
|||
|
|||
def deploy_name(): |
|||
return datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S") + "Z" |
|||
|
|||
|
|||
def get_host(): |
|||
host = os.environ.get("LIGHTNING_CLOUDSPACE_HOST") |
|||
if host is None: |
|||
default_host = os.environ.get("HOST_NAME", "0.0.0.0") |
|||
return f"{default_host}:{PORT}" |
|||
else: |
|||
return f"{PORT}-{host}" |
|||
|
|||
|
|||
@app.route("/iframe") |
|||
@app.route("/iframe/") |
|||
@app.route("/iframe/<path:subpath>") |
|||
def home(subpath="images/animations/"): |
|||
host = get_host() |
|||
initial_url = f"http://{host}/{subpath}" |
|||
api_url = f"http://{host}/api" |
|||
return render_template( |
|||
"index.html", initial_url=initial_url, host=f"http://{host}", api_url=api_url |
|||
) |
|||
|
|||
|
|||
@app.route("/api", methods=["POST"]) |
|||
def handle_api(): |
|||
data = request.json # This assumes you're sending JSON data. |
|||
url = data.get("url") |
|||
if not url.endswith("/"): |
|||
url += "/" |
|||
|
|||
logging.debug(f"Received URL: {url}") |
|||
params = {"url": url, "limit": 24 * 60, "ext": None} |
|||
response = run_deployment( |
|||
name="create-animations/noaa-animate", |
|||
parameters=params, |
|||
flow_run_name=f"{deploy_name()}.webapp.{url}", |
|||
) |
|||
# response is a FlowRun - need to get what we want from it. |
|||
|
|||
# Process the data as needed. |
|||
return { |
|||
"status": "success", |
|||
"message": f"{url} processed successfully", |
|||
# "response": response, |
|||
}, 200 |
|||
|
|||
|
|||
@app.route("/videos/<path:filename>") |
|||
def custom_static(filename): |
|||
return send_from_directory("../out", filename) |
|||
|
|||
|
|||
@app.route("/", methods=["GET"]) |
|||
@app.route("/<path:url>", methods=["GET"]) |
|||
def proxy(url=""): |
|||
original_base_url = "https://services.swpc.noaa.gov" |
|||
host = get_host() |
|||
proxy_base_url = f"http://{host}/" |
|||
|
|||
target_url = f"{original_base_url}/{url}" |
|||
logging.debug(f"Fetching URL: {target_url}") |
|||
|
|||
try: |
|||
headers = { |
|||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36" |
|||
} |
|||
response = requests.get(target_url, headers=headers, stream=True) |
|||
excluded_headers = [ |
|||
"content-encoding", |
|||
"content-length", |
|||
"transfer-encoding", |
|||
"connection", |
|||
] |
|||
headers = [ |
|||
(name, value) |
|||
for (name, value) in response.raw.headers.items() |
|||
if name.lower() not in excluded_headers |
|||
] |
|||
|
|||
if "text/html" in response.headers.get("Content-Type", ""): |
|||
content = response.content.decode("utf-8") |
|||
content = re.sub(r"'http://", "'https://", content) |
|||
content = re.sub( |
|||
r"https?://services.swpc.noaa.gov", proxy_base_url, content |
|||
) |
|||
|
|||
content = content.replace( |
|||
"</body>", |
|||
f""" |
|||
<script> |
|||
window.addEventListener('load', function() {{ |
|||
var hasSufficientImages = false; |
|||
var observer = new MutationObserver(function(mutations) {{ |
|||
mutations.forEach(function(mutation) {{ |
|||
if (mutation.type === 'childList') {{ |
|||
console.log('observer'); |
|||
checkImages(); |
|||
}} |
|||
}}); |
|||
}}); |
|||
observer.observe(document.body, {{ childList: true, subtree: true }}); |
|||
|
|||
function checkImages() {{ |
|||
var links = document.querySelectorAll('a'); |
|||
var imageLinkRegex = /\.(jpg|jpeg|png)(\\?.*)?(#.*)?$/i; |
|||
var numImages = Array.from(links).filter(link => imageLinkRegex.test(link.href)).length; |
|||
console.log('Number of eligible links:', numImages); |
|||
hasSufficientImages = numImages >= 60; |
|||
window.parent.postMessage({{ type: 'urlUpdate', url: '{original_base_url}/{url}', eligible: hasSufficientImages }}, '*'); |
|||
}} |
|||
|
|||
// Run initial check in case content is already loaded |
|||
console.log('initial'); |
|||
checkImages(); |
|||
}}); |
|||
</script> |
|||
</body>""", |
|||
) |
|||
content = content.encode("utf-8") |
|||
return Response(content, status=response.status_code, headers=headers) |
|||
else: |
|||
return Response( |
|||
response.content, status=response.status_code, headers=headers |
|||
) |
|||
|
|||
except Exception as e: |
|||
logging.error(f"Error fetching URL: {e}") |
|||
return Response(f"Error fetching URL: {e}", status=500) |
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
app.run(host="0.0.0.0", port=9021, debug=True) |
@ -0,0 +1,5 @@ |
|||
start: |
|||
gunicorn --worker-class gevent --bind 0.0.0.0:9021 app:app |
|||
|
|||
dev: |
|||
python app.py |
@ -0,0 +1,271 @@ |
|||
<!DOCTYPE html> |
|||
<html lang="en"> |
|||
|
|||
<head> |
|||
<meta charset="UTF-8"> |
|||
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|||
<title>Animator</title> |
|||
<style> |
|||
body { |
|||
font-family: Arial, sans-serif; |
|||
display: flex; |
|||
flex-direction: column; |
|||
align-items: center; |
|||
margin: 0; |
|||
padding: 20px; |
|||
} |
|||
|
|||
iframe { |
|||
width: 99%; |
|||
/* Full width to use available space */ |
|||
height: 500px; |
|||
border: 1px solid #ccc; |
|||
margin-bottom: 20px; |
|||
} |
|||
|
|||
#current-url { |
|||
font-size: 16px; |
|||
color: #333; |
|||
width: 100%; |
|||
/* Makes sure it takes the full container width */ |
|||
word-wrap: break-word; |
|||
/* Ensures the text wraps in the div */ |
|||
margin-bottom: 10px; |
|||
/* Space before the navigation buttons */ |
|||
} |
|||
|
|||
.button-group { |
|||
display: flex; |
|||
/* Aligns buttons in a row */ |
|||
justify-content: center; |
|||
/* Centers buttons horizontally */ |
|||
gap: 10px; |
|||
/* Adds space between buttons */ |
|||
} |
|||
|
|||
button { |
|||
padding: 5px 10px; |
|||
margin: 10px; |
|||
border-radius: 5px; |
|||
/* Rounded corners */ |
|||
border: 1px solid #ccc; |
|||
/* Grey border */ |
|||
display: inline-block; |
|||
/* Ensures buttons are inline and can control additional layout properties */ |
|||
font-size: 16px; |
|||
} |
|||
|
|||
#backButton, |
|||
#forwardButton { |
|||
background-color: #f0f0f0; |
|||
/* Light grey background */ |
|||
color: #333; |
|||
/* Dark text */ |
|||
padding: 0px 10px; |
|||
/* Reduced vertical padding for narrow height */ |
|||
cursor: pointer; |
|||
/* Cursor indicates button */ |
|||
height: 24px; |
|||
/* Fixed height for a narrower button */ |
|||
line-height: 16px; |
|||
/* Adjust line height to vertically center the text */ |
|||
margin: 2px; |
|||
/* Small margin to separate buttons slightly */ |
|||
} |
|||
|
|||
#backButton:hover, |
|||
#forwardButton:hover { |
|||
background-color: #e8e8e8; |
|||
/* Slightly darker background on hover */ |
|||
} |
|||
|
|||
video { |
|||
display: none; |
|||
/* Initially hide the video player */ |
|||
width: 99%; |
|||
/* Adjust based on your layout needs, or use max-width for responsiveness */ |
|||
height: auto; |
|||
/* Maintain aspect ratio */ |
|||
margin-top: 20px; |
|||
/* Ensure it's centered properly */ |
|||
max-width: 640px; |
|||
/* Max width of the video */ |
|||
border: 1px solid #ccc; |
|||
/* Optional, adds a border for better visibility */ |
|||
} |
|||
</style> |
|||
</head> |
|||
|
|||
<body> |
|||
<h1>Animate a Folder of Images</h1> |
|||
<p>Navigate to a folder of 60+ images.</p> |
|||
<iframe id="iframe" src="{{ initial_url }}"></iframe> |
|||
<div class="button-group"> <!-- Button group for inline display --> |
|||
<button id="backButton" onclick="goBack()">←</button> |
|||
<button id="forwardButton" onclick="goForward()">→</button> |
|||
</div> |
|||
<button id="submit-button" onclick="submitUrl()" style="display:none;">Create Latest Movie</button> |
|||
<div id="loading-spinner" style="display: none;"> |
|||
<div class="spinner"></div> |
|||
</div> |
|||
|
|||
<style> |
|||
.spinner { |
|||
border: 8px solid #f3f3f3; |
|||
/* Light grey */ |
|||
border-top: 8px solid #3498db; |
|||
/* Blue */ |
|||
border-radius: 50%; |
|||
width: 50px; |
|||
height: 50px; |
|||
animation: spin 45s linear infinite; |
|||
} |
|||
|
|||
@keyframes spin { |
|||
0% { |
|||
transform: rotate(0deg); |
|||
} |
|||
|
|||
100% { |
|||
transform: rotate(360deg); |
|||
} |
|||
} |
|||
</style> |
|||
|
|||
<video id="video-player" controls loop style="display: none;"> |
|||
<source id="video-source" type="video/mp4"> |
|||
Your browser does not support the video tag. |
|||
</video> |
|||
<script> |
|||
function goBack() { |
|||
document.getElementById('iframe').contentWindow.history.back(); |
|||
} |
|||
|
|||
function goForward() { |
|||
document.getElementById('iframe').contentWindow.history.forward(); |
|||
} |
|||
|
|||
// function updateUrl(url) { |
|||
// document.getElementById('url').textContent = url; |
|||
// } |
|||
|
|||
function updateUrl(url) { |
|||
document.getElementById('url').textContent = url; |
|||
} |
|||
|
|||
function handleVideoSuccess() { |
|||
console.log("Video loaded successfully."); |
|||
document.getElementById('video-player').style.display = 'block'; // Show the video player only if the video loads successfully |
|||
} |
|||
|
|||
function handleVideoError() { |
|||
console.log("Unable to load video."); |
|||
document.getElementById('video-player').style.display = 'none'; // Hide the video player |
|||
document.getElementById('submit-button').textContent = 'Generate Movie'; |
|||
} |
|||
|
|||
function updateVideo(url) { |
|||
// Convert the full URL to a format suitable for your video path |
|||
let formattedPath = url.replace(/https?:\/\//, '') // Remove the protocol part |
|||
.replace(/\./g, '_') // Replace dots with underscores |
|||
.replace(/\//g, '-'); // Replace slashes with hyphens |
|||
// Check if the formattedPath ends with a slash, if not append '-' |
|||
if (!formattedPath.endsWith('-')) { |
|||
formattedPath += '-'; |
|||
} |
|||
|
|||
// Append '.mp4' to the formatted path |
|||
let videoPath = `${formattedPath}latest.mp4`; |
|||
let videoPlayer = document.getElementById('video-player'); |
|||
let videoSource = document.getElementById('video-source'); |
|||
|
|||
videoPlayer.muted = true; |
|||
// Set up event listeners before setting the source |
|||
videoSource.onerror = handleVideoError; |
|||
// videoSource.onloadedmetadata = handleVideoSuccess; |
|||
|
|||
console.log("Fetched latest") |
|||
videoSource.src = `/videos/${videoPath}`; |
|||
videoPlayer.load(); |
|||
// videoPlayer.style.display = 'block'; |
|||
videoPlayer.play().then(() => { |
|||
// The video is playing, show the player |
|||
console.log("Video loaded and playing."); |
|||
videoPlayer.style.display = 'block'; |
|||
}).catch(error => { |
|||
// Error playing the video |
|||
console.log("Failed to play video: ", error); |
|||
videoPlayer.style.display = 'none'; |
|||
}); |
|||
document.getElementById('submit-button').textContent = 'Generate Latest Movie'; |
|||
} |
|||
|
|||
window.addEventListener('message', function (event) { |
|||
if (event.origin === '{{ host }}') { |
|||
var data = event.data; |
|||
if (data && data.type === 'urlUpdate') { |
|||
const submitButton = document.getElementById('submit-button'); |
|||
const videoPlayer = document.getElementById('video-player'); |
|||
updateUrl(data.url); |
|||
if (data.eligible) { |
|||
submitButton.style.display = 'block'; // Show the button |
|||
updateVideo(data.url); |
|||
} else { |
|||
submitButton.style.display = 'none'; // Hide the button |
|||
videoPlayer.style.display = 'none'; // Hide the video |
|||
} |
|||
const newSubpath = new URL(data.url).pathname; // Extract the path from the URL |
|||
// Update the browser's URL to reflect the iframe's navigation |
|||
const newPath = `/iframe${newSubpath}`; // Construct the new path |
|||
document.getElementById('share-button').setAttribute('data-url', window.location.origin + newPath); |
|||
// history.pushState({ path: newPath }, '', newPath); |
|||
} |
|||
} |
|||
}); |
|||
|
|||
function submitUrl() { |
|||
const url = document.getElementById('url').textContent; |
|||
const payload = { url: url }; |
|||
document.getElementById('loading-spinner').style.display = 'block'; // Show the loading spinner |
|||
document.getElementById('submit-button').style.display = 'none'; // Hide the button |
|||
console.log("Requesting new video.") |
|||
fetch('{{ api_url }}', { |
|||
method: 'POST', |
|||
headers: { |
|||
'Content-Type': 'application/json' |
|||
}, |
|||
body: JSON.stringify(payload) |
|||
}).then(response => response.json()) |
|||
.then(data => { |
|||
console.log(data); |
|||
// Hide the loading spinner |
|||
document.getElementById('loading-spinner').style.display = 'none'; |
|||
document.getElementById('submit-button').style.display = 'block'; |
|||
updateUrl(url); |
|||
// Re-attempt to load the video |
|||
updateVideo(url); |
|||
}) |
|||
.catch(error => { |
|||
console.error('Error:', error); |
|||
// Hide the loading spinner |
|||
document.getElementById('loading-spinner').style.display = 'none'; |
|||
document.getElementById('submit-button').style.display = 'block'; |
|||
}); |
|||
} |
|||
|
|||
function copyUrlToClipboard() { |
|||
const url = document.getElementById('share-button').getAttribute('data-url'); |
|||
navigator.clipboard.writeText(url).then(() => { |
|||
alert('URL copied to clipboard!'); |
|||
}).catch(err => { |
|||
console.error('Failed to copy: ', err); |
|||
}); |
|||
} |
|||
|
|||
</script> |
|||
<button id="share-button" onclick="copyUrlToClipboard()">Share Link</button> |
|||
|
|||
<div align="center" id="current-url">Source: <span id="url">Loading...</span></div> |
|||
</body> |
|||
|
|||
</html> |
@ -0,0 +1,97 @@ |
|||
import argparse |
|||
import logging |
|||
import os |
|||
import sqlite3 |
|||
|
|||
# Setup basic configuration for logging |
|||
logging.basicConfig( |
|||
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" |
|||
) |
|||
|
|||
|
|||
def initialize_db(db_path): |
|||
# Check if the database file already exists |
|||
db_exists = os.path.exists(db_path) |
|||
if db_exists: |
|||
logging.info(f"{db_path} exists") # Log a message if the database exists |
|||
return |
|||
try: |
|||
with sqlite3.connect( |
|||
db_path |
|||
) as conn: # Using 'with' to ensure that the connection is closed automatically |
|||
configure_database(conn) |
|||
except sqlite3.Error as e: |
|||
logging.error(f"Database error: {e}") # Log any SQLite errors that occur |
|||
except Exception as e: |
|||
logging.error( |
|||
f"Exception in initialize_db: {e}" |
|||
) # Log any other exceptions that occur |
|||
|
|||
|
|||
def configure_database(conn): |
|||
cursor = conn.cursor() |
|||
# Setting the journal mode to WAL for better concurrency |
|||
cursor.execute("PRAGMA journal_mode = WAL;") |
|||
# Setting synchronous to NORMAL for a balance between speed and reliability |
|||
cursor.execute("PRAGMA synchronous = NORMAL;") |
|||
# Setting a busy timeout to prevent immediate failures when the database is locked |
|||
cursor.execute("PRAGMA busy_timeout = 5000;") |
|||
# Increasing the cache size to reduce the number of disk I/O operations |
|||
cursor.execute("PRAGMA cache_size = -32000;") |
|||
# Enabling memory-mapped I/O for potentially faster file operations |
|||
cursor.execute("PRAGMA mmap_size = 536870912;") |
|||
# Setting locking mode to EXCLUSIVE can enhance performance for single-user scenarios |
|||
cursor.execute("PRAGMA locking_mode = EXCLUSIVE;") |
|||
# Ensuring foreign key constraints are enforced for data integrity |
|||
cursor.execute("PRAGMA foreign_keys = ON;") |
|||
conn.commit() # Commit all PRAGMA configurations |
|||
|
|||
logging.info("Set up database with multi-user optimizations.") |
|||
|
|||
|
|||
def batch_transact(db_path, operations): |
|||
try: |
|||
with sqlite3.connect( |
|||
db_path |
|||
) as conn: # Ensure that the connection is handled properly |
|||
cursor = conn.cursor() |
|||
cursor.execute( |
|||
"BEGIN TRANSACTION;" |
|||
) # Start a transaction for batch operations |
|||
for operation in operations: |
|||
cursor.execute( |
|||
operation |
|||
) # Execute each SQL operation provided in the operations list |
|||
cursor.execute("COMMIT;") # Commit all operations at once |
|||
except sqlite3.Error as e: |
|||
logging.error(f"Database error during batch transaction: {e}") |
|||
except Exception as e: |
|||
logging.error(f"Exception in batch_transact: {e}") |
|||
|
|||
|
|||
def maintenance(db_path): |
|||
try: |
|||
with sqlite3.connect(db_path) as conn: |
|||
cursor = conn.cursor() |
|||
cursor.execute( |
|||
"PRAGMA optimize;" |
|||
) # Optimize the database to maintain performance |
|||
cursor.execute("VACUUM;") # Reclaim space and defragment the database file |
|||
except sqlite3.Error as e: |
|||
logging.error(f"Database error during maintenance: {e}") |
|||
except Exception as e: |
|||
logging.error(f"Exception in maintenance: {e}") |
|||
|
|||
|
|||
def parse_args(): |
|||
parser = argparse.ArgumentParser( |
|||
description="Initialize and manage an SQLite database." |
|||
) |
|||
parser.add_argument("db_path", type=str, help="Path to the SQLite database file.") |
|||
args = parser.parse_args() |
|||
return args |
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
args = parse_args() # Parse the command-line arguments for the database path |
|||
initialize_db(args.db_path) # Use the parsed path to initialize the database |
@ -0,0 +1,10 @@ |
|||
run: build |
|||
docker run --rm -ti --name noaa -e HOST_NAME=localhost -p 9021:9021 -p 4200:4200 noaa |
|||
|
|||
build: |
|||
docker build -t noaa . |
|||
|
|||
lint: |
|||
isort --profile=black . |
|||
black . |
|||
|
@ -0,0 +1,266 @@ |
|||
import os |
|||
import re |
|||
from datetime import datetime, timedelta |
|||
from io import BytesIO |
|||
from typing import Dict, Iterator, List |
|||
|
|||
import httpx |
|||
# import imageio |
|||
import numpy as np |
|||
from moviepy.editor import ImageSequenceClip |
|||
from PIL import Image |
|||
from prefect import flow, task |
|||
from prefect.task_runners import ConcurrentTaskRunner |
|||
from prefect.tasks import task_input_hash |
|||
|
|||
BASE_URL = "https://services.swpc.noaa.gov/images/animations/geospace/" |
|||
|
|||
|
|||
@task( |
|||
retries=3, |
|||
retry_delay_seconds=5, |
|||
cache_key_fn=task_input_hash, |
|||
cache_expiration=timedelta(minutes=2), |
|||
log_prints=True, |
|||
) |
|||
def get_file_links(url: str, ext: str | None = None) -> Iterator[str]: |
|||
response = httpx.get(url) |
|||
response.raise_for_status() |
|||
webpage_content = response.text |
|||
if ext is None: |
|||
print("Extension not supplied. Inferring (less efficient) png/jpg/jpeg") |
|||
exts = ["png", "jpg", "jpeg"] |
|||
else: |
|||
exts = [ext.lower()] |
|||
lines = webpage_content.split("\n") |
|||
for line in lines: |
|||
for ext in exts: |
|||
if ext in line: # need to parse the href link |
|||
start_pos = line.find('href="') + len('href="') |
|||
end_pos = line.find('"', start_pos) |
|||
href = line[start_pos:end_pos] |
|||
if href.endswith(f"latest.{ext}"): |
|||
print("Skipping latest") |
|||
continue |
|||
if href.endswith(ext): |
|||
if not href.startswith("http"): |
|||
href = url + href |
|||
yield href |
|||
break # Exit the inner loop to avoid duplicate yields for multiple exts |
|||
|
|||
|
|||
def url_tail_hash(context, parameters): |
|||
# return a constant |
|||
return parameters["url"].split("/")[-1] |
|||
|
|||
|
|||
def out_path_hash(context, parameters): |
|||
return parameters["output_path"] + f"_L{len(parameters['images'])}" |
|||
|
|||
|
|||
@task( |
|||
retries=5, |
|||
retry_delay_seconds=1, |
|||
cache_key_fn=task_input_hash, |
|||
cache_expiration=timedelta(minutes=5), |
|||
result_storage_key="{parameters[url]}", |
|||
) |
|||
def get_content(url: str, params: Dict[str, any] | None = None): |
|||
response = httpx.get(f"https://{url}", params=params) |
|||
try: |
|||
response.raise_for_status() |
|||
return response.content |
|||
except httpx.HTTPStatusError: |
|||
return None |
|||
|
|||
|
|||
def preview_urls(urls): |
|||
print("URLS (head):") |
|||
print(urls[:5]) |
|||
print("URLS (tail):") |
|||
print(urls[-5:]) |
|||
|
|||
|
|||
@task( |
|||
cache_key_fn=task_input_hash, |
|||
cache_expiration=timedelta(hours=1), |
|||
) |
|||
def get_images(urls: List[str] | List[str], limit: int = 0): |
|||
if limit > 0: |
|||
print(f"Limiting to {limit} urls") |
|||
urls = urls[-limit:] |
|||
|
|||
urls = [url.replace("https://", "").replace("http://", "") for url in urls] |
|||
preview_urls(urls) |
|||
|
|||
futures = get_content.map(urls) |
|||
images = [ |
|||
(urls[i], f.result()) for i, f in enumerate(futures) if f.result() is not None |
|||
] |
|||
return images |
|||
|
|||
|
|||
def extract_timestamp_from_url(url: str) -> str: |
|||
# Assuming the timestamp format is in the format shown in the screenshot |
|||
match = re.search(r"\d{8}_\d{6}", url) |
|||
return match.group(0) if match else "" |
|||
|
|||
|
|||
# @task( |
|||
# cache_key_fn=out_path_hash, |
|||
# cache_expiration=timedelta(minutes=3), |
|||
# result_storage_key="{parameters[output_path]}", |
|||
# ) |
|||
# def create_animation( |
|||
# images: List[bytes], output_path: str, duration: float = 0.5 |
|||
# ) -> None: |
|||
# if not images: |
|||
# raise ValueError("No images!") |
|||
# pil_images = [Image.open(BytesIO(img_data)).convert("RGB") for img_data in images] |
|||
# imageio.mimsave(output_path, pil_images, duration=duration) |
|||
# return output_path |
|||
|
|||
|
|||
def make_even_dimensions(image): |
|||
width, height = image.size |
|||
if width % 2 == 1: |
|||
width -= 1 |
|||
if height % 2 == 1: |
|||
height -= 1 |
|||
return image.resize((width, height), Image.ANTIALIAS) |
|||
|
|||
|
|||
def crop_to_even(image): |
|||
width, height = image.size |
|||
# Adjust width and height to be even |
|||
if width % 2 == 1: |
|||
width -= 1 |
|||
if height % 2 == 1: |
|||
height -= 1 |
|||
return image.crop((0, 0, width, height)) |
|||
|
|||
|
|||
@task( |
|||
cache_key_fn=out_path_hash, |
|||
cache_expiration=timedelta(hours=4), |
|||
result_storage_key="{parameters[output_path]}", |
|||
) |
|||
def create_mp4_animation(images: List[bytes], output_path: str, fps: int = 24) -> None: |
|||
# Convert bytes to PIL images and then to numpy arrays |
|||
frames = [ |
|||
np.array(crop_to_even(Image.open(BytesIO(img_data)).convert("RGB"))) |
|||
for img_data in images |
|||
] |
|||
|
|||
# Create a video clip from the image sequence |
|||
clip = ImageSequenceClip(frames, fps=fps) |
|||
|
|||
# Write the video clip to a file |
|||
clip.write_videofile( |
|||
output_path, |
|||
codec="libx264", |
|||
ffmpeg_params=["-pix_fmt", "yuv420p"], |
|||
preset="medium", |
|||
bitrate="800k", |
|||
) |
|||
|
|||
return output_path |
|||
|
|||
|
|||
def format_output_name(url: str, latest: bool = False): |
|||
if latest: |
|||
now = "latest" |
|||
else: |
|||
now = datetime.now().strftime("%Y%m%d-%H:%M:%S") |
|||
return ( |
|||
url.replace("https://", "") |
|||
.replace("http://", "") |
|||
.replace("/", "-") |
|||
.replace(".", "_") |
|||
+ now |
|||
) |
|||
|
|||
|
|||
@task( |
|||
name="animate", |
|||
retries=0, |
|||
retry_delay_seconds=1, |
|||
log_prints=True, |
|||
cache_key_fn=task_input_hash, |
|||
cache_expiration=timedelta(minutes=3), |
|||
) |
|||
def animate( |
|||
url: str = "https://services.swpc.noaa.gov/images/animations/geospace/density/", |
|||
ext: str = "png", |
|||
latest: bool = True, |
|||
limit: int = 0, |
|||
): |
|||
urls = get_file_links(url, ext) |
|||
if len(urls) == 0: |
|||
raise ValueError("No urls scraped") |
|||
images = get_images(list(sorted(urls)), limit=limit) |
|||
if len(images) == 0: |
|||
raise ValueError("No images retrieved.") |
|||
print(f"Retrieved {len(images)} images.") |
|||
sorted_images = sorted(images, key=lambda x: extract_timestamp_from_url(x[0])) |
|||
print("Head:") |
|||
print([u for u, i in sorted_images[:5]]) |
|||
frames = [s[1] for s in sorted_images] |
|||
# create_animation(frames, "out.gif", duration=5) |
|||
out_name = format_output_name(url, latest=latest) |
|||
create_mp4_animation(frames, f"out/{out_name}.mp4") |
|||
|
|||
|
|||
def deploy_name(): |
|||
return datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S") + "Z" |
|||
|
|||
|
|||
@flow( |
|||
name="create-animations", |
|||
retries=0, |
|||
retry_delay_seconds=1, |
|||
log_prints=True, |
|||
task_runner=ConcurrentTaskRunner(), |
|||
flow_run_name=None, |
|||
timeout_seconds=90, |
|||
) |
|||
def create_animations( |
|||
url: str | List[str] = BASE_URL + "velocity/", |
|||
ext: str | None = None, |
|||
latest: bool = False, |
|||
limit: int = 0, |
|||
): |
|||
if isinstance(url, str): |
|||
url = [url] |
|||
|
|||
futures = animate.map(url, ext, latest, limit) |
|||
return futures |
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
# make_animation.from_source( |
|||
# source=TEST_REPO, |
|||
# entrypoint="noaa_animate.py:make_animation", |
|||
# ).deploy( |
|||
# name="noaa-animate", work_pool_name="process" |
|||
# ) |
|||
|
|||
from prefect.client.schemas.schedules import CronSchedule |
|||
|
|||
sched = CronSchedule(cron="*/15 * * * *", timezone="America/Denver") |
|||
|
|||
links = [ |
|||
BASE_URL + "density/", |
|||
BASE_URL + "velocity/", |
|||
BASE_URL + "pressure/", |
|||
] |
|||
sched_params = { |
|||
"latest": True, |
|||
"url": links, |
|||
"ext": "png", |
|||
"limit": 0, |
|||
} |
|||
create_animations.serve( |
|||
"noaa-animate", limit=8, schedule=None, parameters=sched_params |
|||
) |
|||
# make_animation(url) |
After Width: | Height: | Size: 448 KiB |
After Width: | Height: | Size: 408 KiB |
@ -0,0 +1,14 @@ |
|||
active = "default" |
|||
PREFECT_API_URL = "http://0.0.0.0:4200/api" |
|||
|
|||
[profiles.default] |
|||
PREFECT_TASK_SCHEDULING_MAX_SCHEDULED_QUEUE_SIZE = 4 |
|||
|
|||
PREFECT_API_SERVICES_SCHEDULER_DEPLOYMENT_BATCH_SIZE = 100 |
|||
PREFECT_API_SERVICES_SCHEDULER_ENABLED = true |
|||
PREFECT_API_SERVICES_SCHEDULER_INSERT_BATCH_SIZE = 500 |
|||
PREFECT_API_SERVICES_SCHEDULER_LOOP_SECONDS = 60 |
|||
PREFECT_API_SERVICES_SCHEDULER_MIN_RUNS = 3 |
|||
PREFECT_API_SERVICES_SCHEDULER_MAX_RUNS = 100 |
|||
PREFECT_API_SERVICES_SCHEDULER_MIN_SCHEDULED_TIME = '0:30:00' |
|||
PREFECT_API_SERVICES_SCHEDULER_MAX_SCHEDULED_TIME = '0 days, 8:00:00' |
@ -0,0 +1,9 @@ |
|||
prefect==2.17.1 |
|||
Flask==3.0.3 |
|||
gunicorn==22.0.0 |
|||
gevent==24.2.1 |
|||
moviepy==1.0.3 |
|||
pillow==10.3.0 |
|||
requests==2.32.3 |
|||
httpx==0.27.0 |
|||
# imageio==2.34.1 |
@ -0,0 +1,15 @@ |
|||
#!/bin/bash |
|||
# Start the web app |
|||
cd app && make & |
|||
|
|||
# Start Prefect in the background |
|||
prefect server start --host 0.0.0.0 & |
|||
|
|||
sleep 10 |
|||
|
|||
# Start the deployment |
|||
python noaa_animate.py & |
|||
|
|||
# Wait for all background jobs to finish |
|||
wait |
|||
|
Loading…
Reference in new issue