Michael Pilosov
6 months ago
commit
3c2d665bfd
14 changed files with 913 additions and 0 deletions
@ -0,0 +1,6 @@ |
|||||
|
out/ |
||||
|
*.png |
||||
|
*.gif |
||||
|
*.mp4 |
||||
|
__pycache__/ |
||||
|
*.db |
@ -0,0 +1,51 @@ |
|||||
|
# Use Python 3.10.11 slim image as the base image |
||||
|
FROM python:3.10.11-slim |
||||
|
|
||||
|
# Set environment variables to avoid writing .pyc files and buffering stdout and stderr |
||||
|
ENV PYTHONDONTWRITEBYTECODE 1 |
||||
|
ENV PYTHONUNBUFFERED 1 |
||||
|
|
||||
|
# Create a new user 'user' with UID and GID of 1000 |
||||
|
RUN groupadd -g 1000 user && \ |
||||
|
useradd -m -s /bin/bash -u 1000 -g user user |
||||
|
|
||||
|
# Set environment variables for the user install |
||||
|
ENV PATH=/home/user/.local/bin:$PATH |
||||
|
|
||||
|
# Install system dependencies as root |
||||
|
RUN apt-get update && \ |
||||
|
apt-get install -y --no-install-recommends make ffmpeg dumb-init && \ |
||||
|
rm -rf /var/lib/apt/lists/* |
||||
|
|
||||
|
# Set the home directory |
||||
|
WORKDIR /home/user/ |
||||
|
RUN chown -R user:user /home/user |
||||
|
|
||||
|
# Switch to non-root user before copying files and installing Python packages |
||||
|
USER user |
||||
|
|
||||
|
# Copy the requirements file to /tmp and install Python dependencies with user flag |
||||
|
COPY --chown=user:user requirements.txt /tmp/requirements.txt |
||||
|
RUN python -m pip install --upgrade pip |
||||
|
RUN pip install --no-cache-dir --user -r /tmp/requirements.txt |
||||
|
|
||||
|
# APPLICATION SETUP |
||||
|
|
||||
|
# Copy the default profiles file and set the appropriate permissions |
||||
|
COPY --chown=user:user profiles.default.toml /home/user/.prefect/profiles.toml |
||||
|
|
||||
|
# Copy the application files |
||||
|
COPY --chown=user:user app ./app |
||||
|
COPY --chown=user:user noaa_animate.py . |
||||
|
COPY --chown=user:user start.sh . |
||||
|
COPY --chown=user:user init_db.py . |
||||
|
RUN chmod +x start.sh |
||||
|
RUN mkdir -p out |
||||
|
RUN python init_db.py /home/user/.prefect/prefect.db |
||||
|
|
||||
|
# Set the correct ownership (recursively) for /app |
||||
|
# Already owned by user due to --chown in COPY commands |
||||
|
|
||||
|
# Define the entrypoint and the commands to execute |
||||
|
ENTRYPOINT ["dumb-init", "--"] |
||||
|
CMD ["./start.sh"] |
@ -0,0 +1,19 @@ |
|||||
|
# NOAA Animation Web App |
||||
|
![preview](preview.jpg) |
||||
|
|
||||
|
Animate images from a website full of urls by way of a proxy server and some Python functions wrapped with the `prefect` decorator. |
||||
|
Uses https://services.swpc.noaa.gov as an example. |
||||
|
|
||||
|
|
||||
|
## Instructions |
||||
|
If you have `docker` and `make` installed, just run: |
||||
|
|
||||
|
```bash |
||||
|
make |
||||
|
``` |
||||
|
|
||||
|
and visit `localhost:4200` to see Prefect, and `localhost:9021/iframe` to view the UI. |
||||
|
|
||||
|
An example Flow of about 100 images: |
||||
|
![prefect](prefect.jpg) |
||||
|
|
@ -0,0 +1,150 @@ |
|||||
|
import logging |
||||
|
import os |
||||
|
import re |
||||
|
import time |
||||
|
from datetime import datetime |
||||
|
|
||||
|
import requests |
||||
|
from flask import Flask, Response, render_template, request, send_from_directory |
||||
|
from prefect.deployments import run_deployment |
||||
|
|
||||
|
PORT = 9021 |
||||
|
app = Flask(__name__) |
||||
|
|
||||
|
logging.basicConfig(level=logging.DEBUG) |
||||
|
|
||||
|
|
||||
|
def deploy_name(): |
||||
|
return datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S") + "Z" |
||||
|
|
||||
|
|
||||
|
def get_host(): |
||||
|
host = os.environ.get("LIGHTNING_CLOUDSPACE_HOST") |
||||
|
if host is None: |
||||
|
default_host = os.environ.get("HOST_NAME", "0.0.0.0") |
||||
|
return f"{default_host}:{PORT}" |
||||
|
else: |
||||
|
return f"{PORT}-{host}" |
||||
|
|
||||
|
|
||||
|
@app.route("/iframe") |
||||
|
@app.route("/iframe/") |
||||
|
@app.route("/iframe/<path:subpath>") |
||||
|
def home(subpath="images/animations/"): |
||||
|
host = get_host() |
||||
|
initial_url = f"http://{host}/{subpath}" |
||||
|
api_url = f"http://{host}/api" |
||||
|
return render_template( |
||||
|
"index.html", initial_url=initial_url, host=f"http://{host}", api_url=api_url |
||||
|
) |
||||
|
|
||||
|
|
||||
|
@app.route("/api", methods=["POST"]) |
||||
|
def handle_api(): |
||||
|
data = request.json # This assumes you're sending JSON data. |
||||
|
url = data.get("url") |
||||
|
if not url.endswith("/"): |
||||
|
url += "/" |
||||
|
|
||||
|
logging.debug(f"Received URL: {url}") |
||||
|
params = {"url": url, "limit": 24 * 60, "ext": None} |
||||
|
response = run_deployment( |
||||
|
name="create-animations/noaa-animate", |
||||
|
parameters=params, |
||||
|
flow_run_name=f"{deploy_name()}.webapp.{url}", |
||||
|
) |
||||
|
# response is a FlowRun - need to get what we want from it. |
||||
|
|
||||
|
# Process the data as needed. |
||||
|
return { |
||||
|
"status": "success", |
||||
|
"message": f"{url} processed successfully", |
||||
|
# "response": response, |
||||
|
}, 200 |
||||
|
|
||||
|
|
||||
|
@app.route("/videos/<path:filename>") |
||||
|
def custom_static(filename): |
||||
|
return send_from_directory("../out", filename) |
||||
|
|
||||
|
|
||||
|
@app.route("/", methods=["GET"]) |
||||
|
@app.route("/<path:url>", methods=["GET"]) |
||||
|
def proxy(url=""): |
||||
|
original_base_url = "https://services.swpc.noaa.gov" |
||||
|
host = get_host() |
||||
|
proxy_base_url = f"http://{host}/" |
||||
|
|
||||
|
target_url = f"{original_base_url}/{url}" |
||||
|
logging.debug(f"Fetching URL: {target_url}") |
||||
|
|
||||
|
try: |
||||
|
headers = { |
||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36" |
||||
|
} |
||||
|
response = requests.get(target_url, headers=headers, stream=True) |
||||
|
excluded_headers = [ |
||||
|
"content-encoding", |
||||
|
"content-length", |
||||
|
"transfer-encoding", |
||||
|
"connection", |
||||
|
] |
||||
|
headers = [ |
||||
|
(name, value) |
||||
|
for (name, value) in response.raw.headers.items() |
||||
|
if name.lower() not in excluded_headers |
||||
|
] |
||||
|
|
||||
|
if "text/html" in response.headers.get("Content-Type", ""): |
||||
|
content = response.content.decode("utf-8") |
||||
|
content = re.sub(r"'http://", "'https://", content) |
||||
|
content = re.sub( |
||||
|
r"https?://services.swpc.noaa.gov", proxy_base_url, content |
||||
|
) |
||||
|
|
||||
|
content = content.replace( |
||||
|
"</body>", |
||||
|
f""" |
||||
|
<script> |
||||
|
window.addEventListener('load', function() {{ |
||||
|
var hasSufficientImages = false; |
||||
|
var observer = new MutationObserver(function(mutations) {{ |
||||
|
mutations.forEach(function(mutation) {{ |
||||
|
if (mutation.type === 'childList') {{ |
||||
|
console.log('observer'); |
||||
|
checkImages(); |
||||
|
}} |
||||
|
}}); |
||||
|
}}); |
||||
|
observer.observe(document.body, {{ childList: true, subtree: true }}); |
||||
|
|
||||
|
function checkImages() {{ |
||||
|
var links = document.querySelectorAll('a'); |
||||
|
var imageLinkRegex = /\.(jpg|jpeg|png)(\\?.*)?(#.*)?$/i; |
||||
|
var numImages = Array.from(links).filter(link => imageLinkRegex.test(link.href)).length; |
||||
|
console.log('Number of eligible links:', numImages); |
||||
|
hasSufficientImages = numImages >= 60; |
||||
|
window.parent.postMessage({{ type: 'urlUpdate', url: '{original_base_url}/{url}', eligible: hasSufficientImages }}, '*'); |
||||
|
}} |
||||
|
|
||||
|
// Run initial check in case content is already loaded |
||||
|
console.log('initial'); |
||||
|
checkImages(); |
||||
|
}}); |
||||
|
</script> |
||||
|
</body>""", |
||||
|
) |
||||
|
content = content.encode("utf-8") |
||||
|
return Response(content, status=response.status_code, headers=headers) |
||||
|
else: |
||||
|
return Response( |
||||
|
response.content, status=response.status_code, headers=headers |
||||
|
) |
||||
|
|
||||
|
except Exception as e: |
||||
|
logging.error(f"Error fetching URL: {e}") |
||||
|
return Response(f"Error fetching URL: {e}", status=500) |
||||
|
|
||||
|
|
||||
|
if __name__ == "__main__": |
||||
|
app.run(host="0.0.0.0", port=9021, debug=True) |
@ -0,0 +1,5 @@ |
|||||
|
start: |
||||
|
gunicorn --worker-class gevent --bind 0.0.0.0:9021 app:app |
||||
|
|
||||
|
dev: |
||||
|
python app.py |
@ -0,0 +1,271 @@ |
|||||
|
<!DOCTYPE html> |
||||
|
<html lang="en"> |
||||
|
|
||||
|
<head> |
||||
|
<meta charset="UTF-8"> |
||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
||||
|
<title>Animator</title> |
||||
|
<style> |
||||
|
body { |
||||
|
font-family: Arial, sans-serif; |
||||
|
display: flex; |
||||
|
flex-direction: column; |
||||
|
align-items: center; |
||||
|
margin: 0; |
||||
|
padding: 20px; |
||||
|
} |
||||
|
|
||||
|
iframe { |
||||
|
width: 99%; |
||||
|
/* Full width to use available space */ |
||||
|
height: 500px; |
||||
|
border: 1px solid #ccc; |
||||
|
margin-bottom: 20px; |
||||
|
} |
||||
|
|
||||
|
#current-url { |
||||
|
font-size: 16px; |
||||
|
color: #333; |
||||
|
width: 100%; |
||||
|
/* Makes sure it takes the full container width */ |
||||
|
word-wrap: break-word; |
||||
|
/* Ensures the text wraps in the div */ |
||||
|
margin-bottom: 10px; |
||||
|
/* Space before the navigation buttons */ |
||||
|
} |
||||
|
|
||||
|
.button-group { |
||||
|
display: flex; |
||||
|
/* Aligns buttons in a row */ |
||||
|
justify-content: center; |
||||
|
/* Centers buttons horizontally */ |
||||
|
gap: 10px; |
||||
|
/* Adds space between buttons */ |
||||
|
} |
||||
|
|
||||
|
button { |
||||
|
padding: 5px 10px; |
||||
|
margin: 10px; |
||||
|
border-radius: 5px; |
||||
|
/* Rounded corners */ |
||||
|
border: 1px solid #ccc; |
||||
|
/* Grey border */ |
||||
|
display: inline-block; |
||||
|
/* Ensures buttons are inline and can control additional layout properties */ |
||||
|
font-size: 16px; |
||||
|
} |
||||
|
|
||||
|
#backButton, |
||||
|
#forwardButton { |
||||
|
background-color: #f0f0f0; |
||||
|
/* Light grey background */ |
||||
|
color: #333; |
||||
|
/* Dark text */ |
||||
|
padding: 0px 10px; |
||||
|
/* Reduced vertical padding for narrow height */ |
||||
|
cursor: pointer; |
||||
|
/* Cursor indicates button */ |
||||
|
height: 24px; |
||||
|
/* Fixed height for a narrower button */ |
||||
|
line-height: 16px; |
||||
|
/* Adjust line height to vertically center the text */ |
||||
|
margin: 2px; |
||||
|
/* Small margin to separate buttons slightly */ |
||||
|
} |
||||
|
|
||||
|
#backButton:hover, |
||||
|
#forwardButton:hover { |
||||
|
background-color: #e8e8e8; |
||||
|
/* Slightly darker background on hover */ |
||||
|
} |
||||
|
|
||||
|
video { |
||||
|
display: none; |
||||
|
/* Initially hide the video player */ |
||||
|
width: 99%; |
||||
|
/* Adjust based on your layout needs, or use max-width for responsiveness */ |
||||
|
height: auto; |
||||
|
/* Maintain aspect ratio */ |
||||
|
margin-top: 20px; |
||||
|
/* Ensure it's centered properly */ |
||||
|
max-width: 640px; |
||||
|
/* Max width of the video */ |
||||
|
border: 1px solid #ccc; |
||||
|
/* Optional, adds a border for better visibility */ |
||||
|
} |
||||
|
</style> |
||||
|
</head> |
||||
|
|
||||
|
<body> |
||||
|
<h1>Animate a Folder of Images</h1> |
||||
|
<p>Navigate to a folder of 60+ images.</p> |
||||
|
<iframe id="iframe" src="{{ initial_url }}"></iframe> |
||||
|
<div class="button-group"> <!-- Button group for inline display --> |
||||
|
<button id="backButton" onclick="goBack()">←</button> |
||||
|
<button id="forwardButton" onclick="goForward()">→</button> |
||||
|
</div> |
||||
|
<button id="submit-button" onclick="submitUrl()" style="display:none;">Create Latest Movie</button> |
||||
|
<div id="loading-spinner" style="display: none;"> |
||||
|
<div class="spinner"></div> |
||||
|
</div> |
||||
|
|
||||
|
<style> |
||||
|
.spinner { |
||||
|
border: 8px solid #f3f3f3; |
||||
|
/* Light grey */ |
||||
|
border-top: 8px solid #3498db; |
||||
|
/* Blue */ |
||||
|
border-radius: 50%; |
||||
|
width: 50px; |
||||
|
height: 50px; |
||||
|
animation: spin 45s linear infinite; |
||||
|
} |
||||
|
|
||||
|
@keyframes spin { |
||||
|
0% { |
||||
|
transform: rotate(0deg); |
||||
|
} |
||||
|
|
||||
|
100% { |
||||
|
transform: rotate(360deg); |
||||
|
} |
||||
|
} |
||||
|
</style> |
||||
|
|
||||
|
<video id="video-player" controls loop style="display: none;"> |
||||
|
<source id="video-source" type="video/mp4"> |
||||
|
Your browser does not support the video tag. |
||||
|
</video> |
||||
|
<script> |
||||
|
function goBack() { |
||||
|
document.getElementById('iframe').contentWindow.history.back(); |
||||
|
} |
||||
|
|
||||
|
function goForward() { |
||||
|
document.getElementById('iframe').contentWindow.history.forward(); |
||||
|
} |
||||
|
|
||||
|
// function updateUrl(url) { |
||||
|
// document.getElementById('url').textContent = url; |
||||
|
// } |
||||
|
|
||||
|
function updateUrl(url) { |
||||
|
document.getElementById('url').textContent = url; |
||||
|
} |
||||
|
|
||||
|
function handleVideoSuccess() { |
||||
|
console.log("Video loaded successfully."); |
||||
|
document.getElementById('video-player').style.display = 'block'; // Show the video player only if the video loads successfully |
||||
|
} |
||||
|
|
||||
|
function handleVideoError() { |
||||
|
console.log("Unable to load video."); |
||||
|
document.getElementById('video-player').style.display = 'none'; // Hide the video player |
||||
|
document.getElementById('submit-button').textContent = 'Generate Movie'; |
||||
|
} |
||||
|
|
||||
|
function updateVideo(url) { |
||||
|
// Convert the full URL to a format suitable for your video path |
||||
|
let formattedPath = url.replace(/https?:\/\//, '') // Remove the protocol part |
||||
|
.replace(/\./g, '_') // Replace dots with underscores |
||||
|
.replace(/\//g, '-'); // Replace slashes with hyphens |
||||
|
// Check if the formattedPath ends with a slash, if not append '-' |
||||
|
if (!formattedPath.endsWith('-')) { |
||||
|
formattedPath += '-'; |
||||
|
} |
||||
|
|
||||
|
// Append '.mp4' to the formatted path |
||||
|
let videoPath = `${formattedPath}latest.mp4`; |
||||
|
let videoPlayer = document.getElementById('video-player'); |
||||
|
let videoSource = document.getElementById('video-source'); |
||||
|
|
||||
|
videoPlayer.muted = true; |
||||
|
// Set up event listeners before setting the source |
||||
|
videoSource.onerror = handleVideoError; |
||||
|
// videoSource.onloadedmetadata = handleVideoSuccess; |
||||
|
|
||||
|
console.log("Fetched latest") |
||||
|
videoSource.src = `/videos/${videoPath}`; |
||||
|
videoPlayer.load(); |
||||
|
// videoPlayer.style.display = 'block'; |
||||
|
videoPlayer.play().then(() => { |
||||
|
// The video is playing, show the player |
||||
|
console.log("Video loaded and playing."); |
||||
|
videoPlayer.style.display = 'block'; |
||||
|
}).catch(error => { |
||||
|
// Error playing the video |
||||
|
console.log("Failed to play video: ", error); |
||||
|
videoPlayer.style.display = 'none'; |
||||
|
}); |
||||
|
document.getElementById('submit-button').textContent = 'Generate Latest Movie'; |
||||
|
} |
||||
|
|
||||
|
window.addEventListener('message', function (event) { |
||||
|
if (event.origin === '{{ host }}') { |
||||
|
var data = event.data; |
||||
|
if (data && data.type === 'urlUpdate') { |
||||
|
const submitButton = document.getElementById('submit-button'); |
||||
|
const videoPlayer = document.getElementById('video-player'); |
||||
|
updateUrl(data.url); |
||||
|
if (data.eligible) { |
||||
|
submitButton.style.display = 'block'; // Show the button |
||||
|
updateVideo(data.url); |
||||
|
} else { |
||||
|
submitButton.style.display = 'none'; // Hide the button |
||||
|
videoPlayer.style.display = 'none'; // Hide the video |
||||
|
} |
||||
|
const newSubpath = new URL(data.url).pathname; // Extract the path from the URL |
||||
|
// Update the browser's URL to reflect the iframe's navigation |
||||
|
const newPath = `/iframe${newSubpath}`; // Construct the new path |
||||
|
document.getElementById('share-button').setAttribute('data-url', window.location.origin + newPath); |
||||
|
// history.pushState({ path: newPath }, '', newPath); |
||||
|
} |
||||
|
} |
||||
|
}); |
||||
|
|
||||
|
function submitUrl() { |
||||
|
const url = document.getElementById('url').textContent; |
||||
|
const payload = { url: url }; |
||||
|
document.getElementById('loading-spinner').style.display = 'block'; // Show the loading spinner |
||||
|
document.getElementById('submit-button').style.display = 'none'; // Hide the button |
||||
|
console.log("Requesting new video.") |
||||
|
fetch('{{ api_url }}', { |
||||
|
method: 'POST', |
||||
|
headers: { |
||||
|
'Content-Type': 'application/json' |
||||
|
}, |
||||
|
body: JSON.stringify(payload) |
||||
|
}).then(response => response.json()) |
||||
|
.then(data => { |
||||
|
console.log(data); |
||||
|
// Hide the loading spinner |
||||
|
document.getElementById('loading-spinner').style.display = 'none'; |
||||
|
document.getElementById('submit-button').style.display = 'block'; |
||||
|
updateUrl(url); |
||||
|
// Re-attempt to load the video |
||||
|
updateVideo(url); |
||||
|
}) |
||||
|
.catch(error => { |
||||
|
console.error('Error:', error); |
||||
|
// Hide the loading spinner |
||||
|
document.getElementById('loading-spinner').style.display = 'none'; |
||||
|
document.getElementById('submit-button').style.display = 'block'; |
||||
|
}); |
||||
|
} |
||||
|
|
||||
|
function copyUrlToClipboard() { |
||||
|
const url = document.getElementById('share-button').getAttribute('data-url'); |
||||
|
navigator.clipboard.writeText(url).then(() => { |
||||
|
alert('URL copied to clipboard!'); |
||||
|
}).catch(err => { |
||||
|
console.error('Failed to copy: ', err); |
||||
|
}); |
||||
|
} |
||||
|
|
||||
|
</script> |
||||
|
<button id="share-button" onclick="copyUrlToClipboard()">Share Link</button> |
||||
|
|
||||
|
<div align="center" id="current-url">Source: <span id="url">Loading...</span></div> |
||||
|
</body> |
||||
|
|
||||
|
</html> |
@ -0,0 +1,97 @@ |
|||||
|
import argparse |
||||
|
import logging |
||||
|
import os |
||||
|
import sqlite3 |
||||
|
|
||||
|
# Setup basic configuration for logging |
||||
|
logging.basicConfig( |
||||
|
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" |
||||
|
) |
||||
|
|
||||
|
|
||||
|
def initialize_db(db_path): |
||||
|
# Check if the database file already exists |
||||
|
db_exists = os.path.exists(db_path) |
||||
|
if db_exists: |
||||
|
logging.info(f"{db_path} exists") # Log a message if the database exists |
||||
|
return |
||||
|
try: |
||||
|
with sqlite3.connect( |
||||
|
db_path |
||||
|
) as conn: # Using 'with' to ensure that the connection is closed automatically |
||||
|
configure_database(conn) |
||||
|
except sqlite3.Error as e: |
||||
|
logging.error(f"Database error: {e}") # Log any SQLite errors that occur |
||||
|
except Exception as e: |
||||
|
logging.error( |
||||
|
f"Exception in initialize_db: {e}" |
||||
|
) # Log any other exceptions that occur |
||||
|
|
||||
|
|
||||
|
def configure_database(conn): |
||||
|
cursor = conn.cursor() |
||||
|
# Setting the journal mode to WAL for better concurrency |
||||
|
cursor.execute("PRAGMA journal_mode = WAL;") |
||||
|
# Setting synchronous to NORMAL for a balance between speed and reliability |
||||
|
cursor.execute("PRAGMA synchronous = NORMAL;") |
||||
|
# Setting a busy timeout to prevent immediate failures when the database is locked |
||||
|
cursor.execute("PRAGMA busy_timeout = 5000;") |
||||
|
# Increasing the cache size to reduce the number of disk I/O operations |
||||
|
cursor.execute("PRAGMA cache_size = -32000;") |
||||
|
# Enabling memory-mapped I/O for potentially faster file operations |
||||
|
cursor.execute("PRAGMA mmap_size = 536870912;") |
||||
|
# Setting locking mode to EXCLUSIVE can enhance performance for single-user scenarios |
||||
|
cursor.execute("PRAGMA locking_mode = EXCLUSIVE;") |
||||
|
# Ensuring foreign key constraints are enforced for data integrity |
||||
|
cursor.execute("PRAGMA foreign_keys = ON;") |
||||
|
conn.commit() # Commit all PRAGMA configurations |
||||
|
|
||||
|
logging.info("Set up database with multi-user optimizations.") |
||||
|
|
||||
|
|
||||
|
def batch_transact(db_path, operations): |
||||
|
try: |
||||
|
with sqlite3.connect( |
||||
|
db_path |
||||
|
) as conn: # Ensure that the connection is handled properly |
||||
|
cursor = conn.cursor() |
||||
|
cursor.execute( |
||||
|
"BEGIN TRANSACTION;" |
||||
|
) # Start a transaction for batch operations |
||||
|
for operation in operations: |
||||
|
cursor.execute( |
||||
|
operation |
||||
|
) # Execute each SQL operation provided in the operations list |
||||
|
cursor.execute("COMMIT;") # Commit all operations at once |
||||
|
except sqlite3.Error as e: |
||||
|
logging.error(f"Database error during batch transaction: {e}") |
||||
|
except Exception as e: |
||||
|
logging.error(f"Exception in batch_transact: {e}") |
||||
|
|
||||
|
|
||||
|
def maintenance(db_path): |
||||
|
try: |
||||
|
with sqlite3.connect(db_path) as conn: |
||||
|
cursor = conn.cursor() |
||||
|
cursor.execute( |
||||
|
"PRAGMA optimize;" |
||||
|
) # Optimize the database to maintain performance |
||||
|
cursor.execute("VACUUM;") # Reclaim space and defragment the database file |
||||
|
except sqlite3.Error as e: |
||||
|
logging.error(f"Database error during maintenance: {e}") |
||||
|
except Exception as e: |
||||
|
logging.error(f"Exception in maintenance: {e}") |
||||
|
|
||||
|
|
||||
|
def parse_args(): |
||||
|
parser = argparse.ArgumentParser( |
||||
|
description="Initialize and manage an SQLite database." |
||||
|
) |
||||
|
parser.add_argument("db_path", type=str, help="Path to the SQLite database file.") |
||||
|
args = parser.parse_args() |
||||
|
return args |
||||
|
|
||||
|
|
||||
|
if __name__ == "__main__": |
||||
|
args = parse_args() # Parse the command-line arguments for the database path |
||||
|
initialize_db(args.db_path) # Use the parsed path to initialize the database |
@ -0,0 +1,10 @@ |
|||||
|
run: build |
||||
|
docker run --rm -ti --name noaa -e HOST_NAME=localhost -p 9021:9021 -p 4200:4200 noaa |
||||
|
|
||||
|
build: |
||||
|
docker build -t noaa . |
||||
|
|
||||
|
lint: |
||||
|
isort --profile=black . |
||||
|
black . |
||||
|
|
@ -0,0 +1,266 @@ |
|||||
|
import os |
||||
|
import re |
||||
|
from datetime import datetime, timedelta |
||||
|
from io import BytesIO |
||||
|
from typing import Dict, Iterator, List |
||||
|
|
||||
|
import httpx |
||||
|
# import imageio |
||||
|
import numpy as np |
||||
|
from moviepy.editor import ImageSequenceClip |
||||
|
from PIL import Image |
||||
|
from prefect import flow, task |
||||
|
from prefect.task_runners import ConcurrentTaskRunner |
||||
|
from prefect.tasks import task_input_hash |
||||
|
|
||||
|
BASE_URL = "https://services.swpc.noaa.gov/images/animations/geospace/" |
||||
|
|
||||
|
|
||||
|
@task( |
||||
|
retries=3, |
||||
|
retry_delay_seconds=5, |
||||
|
cache_key_fn=task_input_hash, |
||||
|
cache_expiration=timedelta(minutes=2), |
||||
|
log_prints=True, |
||||
|
) |
||||
|
def get_file_links(url: str, ext: str | None = None) -> Iterator[str]: |
||||
|
response = httpx.get(url) |
||||
|
response.raise_for_status() |
||||
|
webpage_content = response.text |
||||
|
if ext is None: |
||||
|
print("Extension not supplied. Inferring (less efficient) png/jpg/jpeg") |
||||
|
exts = ["png", "jpg", "jpeg"] |
||||
|
else: |
||||
|
exts = [ext.lower()] |
||||
|
lines = webpage_content.split("\n") |
||||
|
for line in lines: |
||||
|
for ext in exts: |
||||
|
if ext in line: # need to parse the href link |
||||
|
start_pos = line.find('href="') + len('href="') |
||||
|
end_pos = line.find('"', start_pos) |
||||
|
href = line[start_pos:end_pos] |
||||
|
if href.endswith(f"latest.{ext}"): |
||||
|
print("Skipping latest") |
||||
|
continue |
||||
|
if href.endswith(ext): |
||||
|
if not href.startswith("http"): |
||||
|
href = url + href |
||||
|
yield href |
||||
|
break # Exit the inner loop to avoid duplicate yields for multiple exts |
||||
|
|
||||
|
|
||||
|
def url_tail_hash(context, parameters): |
||||
|
# return a constant |
||||
|
return parameters["url"].split("/")[-1] |
||||
|
|
||||
|
|
||||
|
def out_path_hash(context, parameters): |
||||
|
return parameters["output_path"] + f"_L{len(parameters['images'])}" |
||||
|
|
||||
|
|
||||
|
@task( |
||||
|
retries=5, |
||||
|
retry_delay_seconds=1, |
||||
|
cache_key_fn=task_input_hash, |
||||
|
cache_expiration=timedelta(minutes=5), |
||||
|
result_storage_key="{parameters[url]}", |
||||
|
) |
||||
|
def get_content(url: str, params: Dict[str, any] | None = None): |
||||
|
response = httpx.get(f"https://{url}", params=params) |
||||
|
try: |
||||
|
response.raise_for_status() |
||||
|
return response.content |
||||
|
except httpx.HTTPStatusError: |
||||
|
return None |
||||
|
|
||||
|
|
||||
|
def preview_urls(urls): |
||||
|
print("URLS (head):") |
||||
|
print(urls[:5]) |
||||
|
print("URLS (tail):") |
||||
|
print(urls[-5:]) |
||||
|
|
||||
|
|
||||
|
@task( |
||||
|
cache_key_fn=task_input_hash, |
||||
|
cache_expiration=timedelta(hours=1), |
||||
|
) |
||||
|
def get_images(urls: List[str] | List[str], limit: int = 0): |
||||
|
if limit > 0: |
||||
|
print(f"Limiting to {limit} urls") |
||||
|
urls = urls[-limit:] |
||||
|
|
||||
|
urls = [url.replace("https://", "").replace("http://", "") for url in urls] |
||||
|
preview_urls(urls) |
||||
|
|
||||
|
futures = get_content.map(urls) |
||||
|
images = [ |
||||
|
(urls[i], f.result()) for i, f in enumerate(futures) if f.result() is not None |
||||
|
] |
||||
|
return images |
||||
|
|
||||
|
|
||||
|
def extract_timestamp_from_url(url: str) -> str: |
||||
|
# Assuming the timestamp format is in the format shown in the screenshot |
||||
|
match = re.search(r"\d{8}_\d{6}", url) |
||||
|
return match.group(0) if match else "" |
||||
|
|
||||
|
|
||||
|
# @task( |
||||
|
# cache_key_fn=out_path_hash, |
||||
|
# cache_expiration=timedelta(minutes=3), |
||||
|
# result_storage_key="{parameters[output_path]}", |
||||
|
# ) |
||||
|
# def create_animation( |
||||
|
# images: List[bytes], output_path: str, duration: float = 0.5 |
||||
|
# ) -> None: |
||||
|
# if not images: |
||||
|
# raise ValueError("No images!") |
||||
|
# pil_images = [Image.open(BytesIO(img_data)).convert("RGB") for img_data in images] |
||||
|
# imageio.mimsave(output_path, pil_images, duration=duration) |
||||
|
# return output_path |
||||
|
|
||||
|
|
||||
|
def make_even_dimensions(image): |
||||
|
width, height = image.size |
||||
|
if width % 2 == 1: |
||||
|
width -= 1 |
||||
|
if height % 2 == 1: |
||||
|
height -= 1 |
||||
|
return image.resize((width, height), Image.ANTIALIAS) |
||||
|
|
||||
|
|
||||
|
def crop_to_even(image): |
||||
|
width, height = image.size |
||||
|
# Adjust width and height to be even |
||||
|
if width % 2 == 1: |
||||
|
width -= 1 |
||||
|
if height % 2 == 1: |
||||
|
height -= 1 |
||||
|
return image.crop((0, 0, width, height)) |
||||
|
|
||||
|
|
||||
|
@task( |
||||
|
cache_key_fn=out_path_hash, |
||||
|
cache_expiration=timedelta(hours=4), |
||||
|
result_storage_key="{parameters[output_path]}", |
||||
|
) |
||||
|
def create_mp4_animation(images: List[bytes], output_path: str, fps: int = 24) -> None: |
||||
|
# Convert bytes to PIL images and then to numpy arrays |
||||
|
frames = [ |
||||
|
np.array(crop_to_even(Image.open(BytesIO(img_data)).convert("RGB"))) |
||||
|
for img_data in images |
||||
|
] |
||||
|
|
||||
|
# Create a video clip from the image sequence |
||||
|
clip = ImageSequenceClip(frames, fps=fps) |
||||
|
|
||||
|
# Write the video clip to a file |
||||
|
clip.write_videofile( |
||||
|
output_path, |
||||
|
codec="libx264", |
||||
|
ffmpeg_params=["-pix_fmt", "yuv420p"], |
||||
|
preset="medium", |
||||
|
bitrate="800k", |
||||
|
) |
||||
|
|
||||
|
return output_path |
||||
|
|
||||
|
|
||||
|
def format_output_name(url: str, latest: bool = False): |
||||
|
if latest: |
||||
|
now = "latest" |
||||
|
else: |
||||
|
now = datetime.now().strftime("%Y%m%d-%H:%M:%S") |
||||
|
return ( |
||||
|
url.replace("https://", "") |
||||
|
.replace("http://", "") |
||||
|
.replace("/", "-") |
||||
|
.replace(".", "_") |
||||
|
+ now |
||||
|
) |
||||
|
|
||||
|
|
||||
|
@task( |
||||
|
name="animate", |
||||
|
retries=0, |
||||
|
retry_delay_seconds=1, |
||||
|
log_prints=True, |
||||
|
cache_key_fn=task_input_hash, |
||||
|
cache_expiration=timedelta(minutes=3), |
||||
|
) |
||||
|
def animate( |
||||
|
url: str = "https://services.swpc.noaa.gov/images/animations/geospace/density/", |
||||
|
ext: str = "png", |
||||
|
latest: bool = True, |
||||
|
limit: int = 0, |
||||
|
): |
||||
|
urls = get_file_links(url, ext) |
||||
|
if len(urls) == 0: |
||||
|
raise ValueError("No urls scraped") |
||||
|
images = get_images(list(sorted(urls)), limit=limit) |
||||
|
if len(images) == 0: |
||||
|
raise ValueError("No images retrieved.") |
||||
|
print(f"Retrieved {len(images)} images.") |
||||
|
sorted_images = sorted(images, key=lambda x: extract_timestamp_from_url(x[0])) |
||||
|
print("Head:") |
||||
|
print([u for u, i in sorted_images[:5]]) |
||||
|
frames = [s[1] for s in sorted_images] |
||||
|
# create_animation(frames, "out.gif", duration=5) |
||||
|
out_name = format_output_name(url, latest=latest) |
||||
|
create_mp4_animation(frames, f"out/{out_name}.mp4") |
||||
|
|
||||
|
|
||||
|
def deploy_name(): |
||||
|
return datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S") + "Z" |
||||
|
|
||||
|
|
||||
|
@flow( |
||||
|
name="create-animations", |
||||
|
retries=0, |
||||
|
retry_delay_seconds=1, |
||||
|
log_prints=True, |
||||
|
task_runner=ConcurrentTaskRunner(), |
||||
|
flow_run_name=None, |
||||
|
timeout_seconds=90, |
||||
|
) |
||||
|
def create_animations( |
||||
|
url: str | List[str] = BASE_URL + "velocity/", |
||||
|
ext: str | None = None, |
||||
|
latest: bool = False, |
||||
|
limit: int = 0, |
||||
|
): |
||||
|
if isinstance(url, str): |
||||
|
url = [url] |
||||
|
|
||||
|
futures = animate.map(url, ext, latest, limit) |
||||
|
return futures |
||||
|
|
||||
|
|
||||
|
if __name__ == "__main__": |
||||
|
# make_animation.from_source( |
||||
|
# source=TEST_REPO, |
||||
|
# entrypoint="noaa_animate.py:make_animation", |
||||
|
# ).deploy( |
||||
|
# name="noaa-animate", work_pool_name="process" |
||||
|
# ) |
||||
|
|
||||
|
from prefect.client.schemas.schedules import CronSchedule |
||||
|
|
||||
|
sched = CronSchedule(cron="*/15 * * * *", timezone="America/Denver") |
||||
|
|
||||
|
links = [ |
||||
|
BASE_URL + "density/", |
||||
|
BASE_URL + "velocity/", |
||||
|
BASE_URL + "pressure/", |
||||
|
] |
||||
|
sched_params = { |
||||
|
"latest": True, |
||||
|
"url": links, |
||||
|
"ext": "png", |
||||
|
"limit": 0, |
||||
|
} |
||||
|
create_animations.serve( |
||||
|
"noaa-animate", limit=8, schedule=None, parameters=sched_params |
||||
|
) |
||||
|
# make_animation(url) |
After Width: | Height: | Size: 448 KiB |
After Width: | Height: | Size: 408 KiB |
@ -0,0 +1,14 @@ |
|||||
|
active = "default" |
||||
|
PREFECT_API_URL = "http://0.0.0.0:4200/api" |
||||
|
|
||||
|
[profiles.default] |
||||
|
PREFECT_TASK_SCHEDULING_MAX_SCHEDULED_QUEUE_SIZE = 4 |
||||
|
|
||||
|
PREFECT_API_SERVICES_SCHEDULER_DEPLOYMENT_BATCH_SIZE = 100 |
||||
|
PREFECT_API_SERVICES_SCHEDULER_ENABLED = true |
||||
|
PREFECT_API_SERVICES_SCHEDULER_INSERT_BATCH_SIZE = 500 |
||||
|
PREFECT_API_SERVICES_SCHEDULER_LOOP_SECONDS = 60 |
||||
|
PREFECT_API_SERVICES_SCHEDULER_MIN_RUNS = 3 |
||||
|
PREFECT_API_SERVICES_SCHEDULER_MAX_RUNS = 100 |
||||
|
PREFECT_API_SERVICES_SCHEDULER_MIN_SCHEDULED_TIME = '0:30:00' |
||||
|
PREFECT_API_SERVICES_SCHEDULER_MAX_SCHEDULED_TIME = '0 days, 8:00:00' |
@ -0,0 +1,9 @@ |
|||||
|
prefect==2.17.1 |
||||
|
Flask==3.0.3 |
||||
|
gunicorn==22.0.0 |
||||
|
gevent==24.2.1 |
||||
|
moviepy==1.0.3 |
||||
|
pillow==10.3.0 |
||||
|
requests==2.32.3 |
||||
|
httpx==0.27.0 |
||||
|
# imageio==2.34.1 |
@ -0,0 +1,15 @@ |
|||||
|
#!/bin/bash |
||||
|
# Start the web app |
||||
|
cd app && make & |
||||
|
|
||||
|
# Start Prefect in the background |
||||
|
prefect server start --host 0.0.0.0 & |
||||
|
|
||||
|
sleep 10 |
||||
|
|
||||
|
# Start the deployment |
||||
|
python noaa_animate.py & |
||||
|
|
||||
|
# Wait for all background jobs to finish |
||||
|
wait |
||||
|
|
Loading…
Reference in new issue