-
Notifications
You must be signed in to change notification settings - Fork 8
fix: resolve unsafe concurrency in Android JNI layer #261
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,105 +1,60 @@ | ||
| //! Android JNI bindings for Forge App | ||
| //! | ||
| //! This module provides JNI functions to start and stop the Forge server from Android. | ||
| //! | ||
| //! ## Concurrency Safety | ||
| //! | ||
| //! This module uses proper synchronization to handle concurrent JNI calls: | ||
| //! - `LazyLock` for safe static initialization of the mutex | ||
| //! - Mutex lock error handling to avoid panics on poisoned mutexes | ||
| //! - Double-start prevention to avoid spawning multiple servers | ||
|
|
||
| #[cfg(target_os = "android")] | ||
| #[cfg(feature = "android")] | ||
| use jni::JNIEnv; | ||
| #[cfg(target_os = "android")] | ||
| use jni::objects::{JClass, JString}; | ||
| #[cfg(target_os = "android")] | ||
| #[cfg(feature = "android")] | ||
| use jni::objects::JClass; | ||
| #[cfg(feature = "android")] | ||
| use jni::sys::jint; | ||
| use std::sync::{Mutex, OnceLock}; | ||
| use std::sync::{LazyLock, Mutex, OnceLock}; | ||
| use tokio::runtime::Runtime; | ||
| use tokio::sync::oneshot; | ||
|
|
||
| static RUNTIME: OnceLock<Runtime> = OnceLock::new(); | ||
| static SERVER_HANDLE: Mutex<Option<tokio::task::JoinHandle<()>>> = Mutex::new(None); | ||
| static LAST_ERROR: Mutex<Option<String>> = Mutex::new(None); | ||
| static SERVER_HANDLE: LazyLock<Mutex<Option<tokio::task::JoinHandle<()>>>> = | ||
| LazyLock::new(|| Mutex::new(None)); | ||
|
|
||
| /// Initialize the Tokio runtime (called once) | ||
| fn get_runtime() -> &'static Runtime { | ||
| RUNTIME.get_or_init(|| { | ||
| // Initialize android_logger for Android (once) | ||
| #[cfg(target_os = "android")] | ||
| android_logger::init_once( | ||
| android_logger::Config::default() | ||
| .with_max_level(log::LevelFilter::Debug) | ||
| .with_tag("ForgeApp"), | ||
| ); | ||
|
|
||
| #[cfg(target_os = "android")] | ||
| unsafe { | ||
| std::env::set_var("RUST_LOG", "debug"); | ||
| } | ||
|
|
||
| // Initialize tracing subscriber | ||
| #[cfg(not(target_os = "android"))] | ||
| // Initialize tracing for Android (once) | ||
| tracing_subscriber::fmt::init(); | ||
|
|
||
| Runtime::new().expect("Failed to create Tokio runtime") | ||
| }) | ||
| } | ||
|
|
||
| fn set_last_error(error: String) { | ||
| *LAST_ERROR.lock().unwrap() = Some(error.clone()); | ||
|
|
||
| if let Ok(data_dir) = std::env::var("FORGE_DATA_DIR") { | ||
| let error_file = format!("{}/forge-last-error.txt", data_dir); | ||
| if let Err(e) = std::fs::write(&error_file, &error) { | ||
| tracing::error!("Failed to write error to file {}: {}", error_file, e); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| #[cfg(target_os = "android")] | ||
| #[unsafe(no_mangle)] | ||
| pub extern "C" fn Java_ai_namastex_forge_MainActivity_getLastError<'local>( | ||
| env: JNIEnv<'local>, | ||
| _class: JClass<'local>, | ||
| ) -> JString<'local> { | ||
| let error = LAST_ERROR | ||
| .lock() | ||
| .unwrap() | ||
| .clone() | ||
| .unwrap_or_else(|| "Unknown error".to_string()); | ||
|
|
||
| env.new_string(error) | ||
| .unwrap_or_else(|_| env.new_string("Failed to create error string").unwrap()) | ||
| } | ||
|
|
||
| #[cfg(target_os = "android")] | ||
| #[unsafe(no_mangle)] | ||
| pub extern "C" fn Java_ai_namastex_forge_MainActivity_setDataDir( | ||
| mut env: JNIEnv, | ||
| _class: JClass, | ||
| data_dir: JString, | ||
| ) { | ||
| let data_dir_str: String = env | ||
| .get_string(&data_dir) | ||
| .expect("Failed to get data_dir string") | ||
| .into(); | ||
|
|
||
| unsafe { | ||
| std::env::set_var("FORGE_DATA_DIR", &data_dir_str); | ||
| std::env::set_var( | ||
| "DATABASE_URL", | ||
| format!("sqlite:///{}/forge.db", data_dir_str), | ||
| ); | ||
| std::env::set_var( | ||
| "SQLX_DATABASE_URL", | ||
| format!("sqlite:///{}/forge.db", data_dir_str), | ||
| ); | ||
| } | ||
|
|
||
| tracing::info!("Android data directory set to: {}", data_dir_str); | ||
| } | ||
| /// Error code returned when the server is already running | ||
| #[cfg(feature = "android")] | ||
| const ERR_ALREADY_RUNNING: jint = -1; | ||
| /// Error code returned when mutex lock fails (poisoned) | ||
| #[cfg(feature = "android")] | ||
| #[allow(dead_code)] // Reserved for future use | ||
| const ERR_LOCK_FAILED: jint = -2; | ||
| /// Error code returned when server fails to start | ||
| #[cfg(feature = "android")] | ||
| const ERR_SERVER_START_FAILED: jint = -3; | ||
|
|
||
| /// Start the Forge server and return the port number | ||
| /// | ||
| /// This function blocks until the server successfully binds to the port, | ||
| /// preventing race conditions where the WebView tries to connect before | ||
| /// the server is ready. | ||
| #[cfg(target_os = "android")] | ||
| /// | ||
| /// # Returns | ||
| /// - Positive value: The port number the server is listening on | ||
| /// - `-1` (`ERR_ALREADY_RUNNING`): Server is already running | ||
| /// - `-2` (`ERR_LOCK_FAILED`): Failed to acquire lock (mutex poisoned) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The documentation states that |
||
| /// - `-3` (`ERR_SERVER_START_FAILED`): Server failed to start | ||
| #[cfg(feature = "android")] | ||
| #[unsafe(no_mangle)] | ||
| pub extern "C" fn Java_ai_namastex_forge_MainActivity_startServer( | ||
| _env: JNIEnv, | ||
|
Comment on lines
+57
to
60
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The Android JNI module now only exports Useful? React with 👍 / 👎. |
||
|
|
@@ -114,95 +69,91 @@ pub extern "C" fn Java_ai_namastex_forge_MainActivity_startServer( | |
| .and_then(|p| p.parse().ok()) | ||
| .unwrap_or(8887); | ||
|
|
||
| tracing::info!("Starting Forge server on port {}", port); | ||
| // Acquire lock with proper error handling for poisoned mutex | ||
| let mut guard = match SERVER_HANDLE.lock() { | ||
| Ok(guard) => guard, | ||
| Err(poisoned) => { | ||
| tracing::warn!("SERVER_HANDLE mutex was poisoned, recovering"); | ||
| // Recover from poisoned mutex - this is safe because we just store a JoinHandle | ||
| poisoned.into_inner() | ||
| } | ||
| }; | ||
|
|
||
| // Check if server is already running (double-start prevention) | ||
| if let Some(ref handle) = *guard { | ||
| if !handle.is_finished() { | ||
| tracing::warn!("Server already running, ignoring duplicate start request"); | ||
| return ERR_ALREADY_RUNNING; | ||
| } | ||
| // Previous server finished, clean up the old handle | ||
| tracing::info!("Previous server finished, starting new instance"); | ||
| } | ||
|
|
||
| // Create oneshot channel to signal when server is ready | ||
| let (ready_tx, ready_rx) = oneshot::channel(); | ||
|
|
||
| // Spawn server in background with error capture | ||
| // Spawn server in background | ||
| let handle = runtime.spawn(async move { | ||
| if let Err(e) = crate::run_server_with_readiness(Some(ready_tx)).await { | ||
| let error_msg = format!("Server initialization failed: {}", e); | ||
| tracing::error!("{}", error_msg); | ||
| set_last_error(error_msg); | ||
| tracing::error!("Server error: {}", e); | ||
| } | ||
| }); | ||
|
|
||
| // Block until server is ready to accept connections (with timeout) | ||
| let ready_result = runtime.block_on(async { | ||
| tokio::time::timeout(std::time::Duration::from_secs(10), ready_rx).await | ||
| }); | ||
| // Store handle before blocking to allow stopServer to work during startup | ||
| *guard = Some(handle); | ||
|
|
||
| match ready_result { | ||
| Ok(Ok(_)) => { | ||
| tracing::info!("Server ready on port {}", port); | ||
| *SERVER_HANDLE.lock().unwrap() = Some(handle); | ||
| port as jint | ||
| } | ||
| Ok(Err(_)) => { | ||
| runtime.block_on(async { | ||
| for _ in 0..10 { | ||
| if LAST_ERROR.lock().unwrap().is_some() { | ||
| break; | ||
| } | ||
| tokio::time::sleep(std::time::Duration::from_millis(50)).await; | ||
| } | ||
| }); | ||
|
|
||
| let has_specific_error = LAST_ERROR.lock().unwrap().is_some(); | ||
| if !has_specific_error { | ||
| set_last_error( | ||
| "Server failed to signal readiness - check initialization".to_string(), | ||
| ); | ||
| // Release the lock before blocking to avoid holding it during potentially long wait | ||
| drop(guard); | ||
|
|
||
| // Block until server is ready to accept connections | ||
| let ready = runtime.block_on(async { | ||
| match ready_rx.await { | ||
| Ok(_) => { | ||
| tracing::info!("Server ready on port {}", port); | ||
| true | ||
| } | ||
| tracing::error!("Server failed to signal readiness"); | ||
| handle.abort(); | ||
| -1 | ||
| } | ||
| Err(_) => { | ||
| runtime.block_on(async { | ||
| for _ in 0..10 { | ||
| if LAST_ERROR.lock().unwrap().is_some() { | ||
| break; | ||
| } | ||
| tokio::time::sleep(std::time::Duration::from_millis(50)).await; | ||
| } | ||
| }); | ||
|
|
||
| let has_specific_error = LAST_ERROR.lock().unwrap().is_some(); | ||
| if !has_specific_error { | ||
| set_last_error( | ||
| "Server startup timeout (10s) - initialization took too long".to_string(), | ||
| ); | ||
| Err(_) => { | ||
| tracing::error!("Server failed to signal readiness"); | ||
| false | ||
| } | ||
| tracing::error!("Server startup timeout"); | ||
| handle.abort(); | ||
| -1 | ||
| } | ||
| }); | ||
|
|
||
| if ready { | ||
| port as jint | ||
| } else { | ||
| ERR_SERVER_START_FAILED | ||
| } | ||
| } | ||
|
|
||
| /// Stop the Forge server | ||
| #[cfg(target_os = "android")] | ||
| /// | ||
| /// This function safely stops the server if it is running. | ||
| /// It handles mutex poisoning gracefully and logs all operations. | ||
| #[cfg(feature = "android")] | ||
| #[unsafe(no_mangle)] | ||
| pub extern "C" fn Java_ai_namastex_forge_MainActivity_getLogsPath<'local>( | ||
| env: JNIEnv<'local>, | ||
| _class: JClass<'local>, | ||
| ) -> JString<'local> { | ||
| let logs_path = if let Ok(data_dir) = std::env::var("FORGE_DATA_DIR") { | ||
| format!("{}/forge-debug.log", data_dir) | ||
| } else { | ||
| "/tmp/forge-debug.log".to_string() | ||
| pub extern "C" fn Java_ai_namastex_forge_MainActivity_stopServer( | ||
| _env: JNIEnv, | ||
| _class: JClass, | ||
| ) { | ||
| // Acquire lock with proper error handling for poisoned mutex | ||
| let mut guard = match SERVER_HANDLE.lock() { | ||
| Ok(guard) => guard, | ||
| Err(poisoned) => { | ||
| tracing::warn!("SERVER_HANDLE mutex was poisoned during stop, recovering"); | ||
| poisoned.into_inner() | ||
| } | ||
| }; | ||
|
|
||
| env.new_string(logs_path) | ||
| .unwrap_or_else(|_| env.new_string("").unwrap()) | ||
| } | ||
|
|
||
| #[cfg(target_os = "android")] | ||
| #[unsafe(no_mangle)] | ||
| pub extern "C" fn Java_ai_namastex_forge_MainActivity_stopServer(_env: JNIEnv, _class: JClass) { | ||
| if let Some(handle) = SERVER_HANDLE.lock().unwrap().take() { | ||
| handle.abort(); | ||
| if let Some(handle) = guard.take() { | ||
| if handle.is_finished() { | ||
| tracing::info!("Server was already stopped"); | ||
| } else { | ||
| tracing::info!("Stopping server..."); | ||
| handle.abort(); | ||
| tracing::info!("Server stop requested"); | ||
| } | ||
| } else { | ||
| tracing::debug!("stopServer called but no server was running"); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
android.rs now defines only the server handle and the start/stop JNI functions, removing the
setDataDir/getLastErrorbindings that previously usedLAST_ERROR. MainActivity.kt still declares those native methods (android/app/src/main/java/ai/namastex/forge/MainActivity.kt lines 23-27), so loading the library and invokingsetDataDirduring app startup will throwUnsatisfiedLinkError: Native method not found, preventing the Android app from running or reporting backend errors. The missing JNI exports need to be reinstated or the Kotlin declarations removed to avoid the crash.Useful? React with 👍 / 👎.