moksha/src/provider/namespace_provider.rs

373 lines
12 KiB
Rust

use crate::namespace::{NamespaceOption, NamespaceType};
use crate::unix::mounts::{FsType, MountDesc, MountOpt};
use crate::unix::MountHandler;
use crate::unix::ProcHandler;
use crate::provider::JailProvider;
use crate::errors::{Errno, Error};
use crate::types::Result;
use std::ffi::CString;
use std::path::PathBuf;
use std::fs;
use std::fs::File;
use std::os::unix::io::AsRawFd;
use std::cell::RefCell;
use std::cmp::Ordering;
use std::str::FromStr;
use num_traits::FromPrimitive;
use syscalls::*;
// #define CSIGNAL 0x000000ff /* signal mask to be sent at exit */
// #define CLONE_VM 0x00000100 /* set if VM shared between processes */
// #define CLONE_FS 0x00000200 /* set if fs info shared between processes */
// #define CLONE_FILES 0x00000400 /* set if open files shared between processes */
// #define CLONE_PIDFD 0x00001000 /* set if a pidfd should be placed in parent */
// #define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */
// #define CLONE_IO 0x80000000 /* Clone io context */
pub struct NamespaceProvider {
root_dir: RefCell<PathBuf>,
new_root: Option<PathBuf>,
pid: Option<u64>,
namespaces: Vec<NamespaceType>,
command: PathBuf,
argv: Vec<String>,
envp: Vec<String>,
mount_handler: RefCell<MountHandler>,
}
static POSTP_ROOT: &str = "/";
static PIVOT_OLD_ROOT: &str = "mnt";
fn map_namespace_type(tp: NamespaceType) -> NamespaceOption {
match tp {
NamespaceType::Cgroup => NamespaceOption::Cgroup,
NamespaceType::Ipc => NamespaceOption::Ipc,
NamespaceType::Mnt => NamespaceOption::Mnt,
NamespaceType::Net => NamespaceOption::Net,
NamespaceType::Pid => NamespaceOption::PidForChildren,
NamespaceType::Time => NamespaceOption::TimeForChildren,
NamespaceType::User => NamespaceOption::User,
NamespaceType::Uts => NamespaceOption::Uts,
}
}
impl NamespaceProvider {
pub fn new(
root_dir: PathBuf,
new_root: Option<PathBuf>, // None = no pivoting
pid: Option<u64>, // None = no entering
namespaces: Vec<NamespaceType>,
command: PathBuf,
argv: Vec<String>,
) -> NamespaceProvider {
NamespaceProvider {
root_dir: RefCell::new(root_dir.clone()),
new_root,
pid,
namespaces,
command,
argv,
envp: vec![],
mount_handler: RefCell::new(MountHandler {
root_dir: root_dir.clone(),
}),
}
}
fn chdir_root(&self) -> Result<()> {
self.mount_handler.borrow().chdir(
self.new_root
.clone()
.unwrap_or(self.root_dir.take())
.to_str()
.unwrap()
.to_string(),
)?;
Ok(())
}
fn setup_umounts(&self) -> Result<()> {
if !self.namespaces.contains(&NamespaceType::Mnt) {
log::info!("can't set up mounts without a 'mnt' namespace");
return Ok(());
}
let max_attempts = 3;
for attempt in 0..max_attempts {
let current_descriptors = self.mount_handler.borrow().get_mounts()?;
for desc in current_descriptors.iter() {
let umount_handler = |_| -> std::result::Result<(), Error> {
log::debug!("attempt #{} failed for {}", attempt, desc.target);
Ok(())
};
match (desc.clone().partition_type, attempt) {
(FsType::Proc, 2) => self
.mount_handler
.borrow()
.umount((*desc).clone(), attempt == (max_attempts - 1), false)
.or_else(umount_handler)?,
(FsType::Proc, _) => {}
(_, _) => self
.mount_handler
.borrow()
.umount((*desc).clone(), attempt == (max_attempts - 1), false)
.or_else(umount_handler)?,
}
}
}
Ok(())
}
fn setup_umount_old_root(&self) -> Result<()> {
if !self.namespaces.contains(&NamespaceType::Mnt) {
log::info!("can't set up mounts without a 'mnt' namespace");
return Ok(());
}
let old_root_mount = MountDesc {
source: "".to_string(),
target: self.root_dir.borrow().to_str().unwrap().to_string(),
partition_type: FsType::NoneFs,
options: vec![],
data: None,
};
self.mount_handler
.borrow()
.umount(old_root_mount, true, true)
}
fn setup_mounts(&self) -> Result<()> {
if !self.namespaces.contains(&NamespaceType::Mnt) {
log::info!("can't set up mounts without a 'mnt' namespace");
return Ok(());
}
// dir creation will have to be moved before pivoting
let proc_path = self.mount_handler.borrow().root_dir.join("proc");
let sys_path = self.mount_handler.borrow().root_dir.join("sys");
let devpts_path = self.mount_handler.borrow().root_dir.join("dev").join("pts");
let proc_mount = MountDesc {
source: "proc".to_string(),
target: proc_path.to_str().unwrap().to_string(),
partition_type: FsType::Proc,
options: vec![],
data: None,
};
let sys_mount = MountDesc {
source: "sys".to_string(),
target: sys_path.to_str().unwrap().to_string(),
partition_type: FsType::Sysfs,
options: vec![],
data: None,
};
let devpts_mount = MountDesc {
source: "devpts".to_string(),
target: devpts_path.to_str().unwrap().to_string(),
partition_type: FsType::Devpts,
options: vec![],
data: None,
};
fs::create_dir_all(proc_path.clone())?;
fs::create_dir_all(sys_path.clone())?;
fs::create_dir_all(devpts_path.clone())?;
self.mount_handler.borrow().mount(proc_mount)?;
self.mount_handler.borrow().mount(sys_mount)?;
self.mount_handler.borrow().mount(devpts_mount)?;
Ok(())
}
fn pivot_root(&self) -> Result<i64> {
if !self.namespaces.contains(&NamespaceType::Mnt) {
log::error!("can't pivot root without a 'mnt' namespace");
return Ok(0);
}
let new_root_str = self.new_root.clone().unwrap().to_str().unwrap().to_string();
let new_root_mount = MountDesc {
source: new_root_str.clone(),
target: new_root_str.clone(),
partition_type: FsType::NoneFs,
options: vec![MountOpt::Bind],
data: None,
};
log::debug!("bind mounting new root");
self.mount_handler.borrow().mount(new_root_mount)?;
let put_old_path = self.new_root.clone().unwrap().join("mnt");
let put_old = put_old_path.to_str().unwrap().to_string();
fs::create_dir_all(put_old_path)?;
cpointer_string!(new_root_cptr, new_root_cstring, new_root_str.clone());
cpointer_string!(put_old_cptr, put_old_cstring, put_old);
log::debug!("pivoting on {:?}", self.new_root.clone());
let syscall_result = unsafe { syscall!(SYS_pivot_root, new_root_cptr, put_old_cptr) }
.map_err(|errno| {
Error::ProviderError(format!("pivot_root, errno: {:?}", Errno::from_i64(errno)))
})?;
self.mount_handler.replace(MountHandler {
root_dir: PathBuf::from_str(POSTP_ROOT)?,
});
self.root_dir.replace(
self.mount_handler
.borrow()
.root_dir
.clone()
.join(PIVOT_OLD_ROOT),
);
self.mount_handler.borrow().chdir(POSTP_ROOT.to_string())?;
Ok(syscall_result)
}
fn fork(&self) -> Result<i64> {
let syscall_result = unsafe { syscall!(SYS_fork) };
syscall_result.map_err(|errno| {
Error::ProviderError(format!("fork, errno: {:?}", Errno::from_i64(errno)))
})
}
fn unshare(&self) -> Result<i64> {
let ns_flags = self
.namespaces
.iter()
.fold(0, |c, namespace| c | (*namespace as i32));
let syscall_result = unsafe { syscall!(SYS_unshare, ns_flags) };
syscall_result.map_err(|errno| {
Error::ProviderError(format!("unshare, errno: {:?}", Errno::from_i64(errno)))
})
}
// TODO: proc enter should come last
fn enter(&self) -> Result<i64> {
self.pid.as_ref().map_or(
Err(Error::ProviderError("no pid args supplied".to_string())),
|pid| {
let proc = ProcHandler::new(self.root_dir.take().clone());
let mut namespaces_w_ordering = self.namespaces.clone();
namespaces_w_ordering.sort_by(|a, b| match (a, b) {
(NamespaceType::Mnt, _) => Ordering::Greater,
(_, NamespaceType::Mnt) => Ordering::Less,
_ => Ordering::Equal,
});
namespaces_w_ordering
.iter()
.map(|ns_type| {
let new_ns_path = proc.ns_path(*pid, map_namespace_type(*ns_type));
log::debug!("entering {:?}", new_ns_path);
let new_ns_file = File::open(new_ns_path)?;
let new_ns_fd = new_ns_file.as_raw_fd();
let syscall_result = unsafe { syscall!(SYS_setns, new_ns_fd, 0) };
syscall_result.map_err(|errno| {
Error::ProviderError(format!(
"setns, errno: {:?}",
Errno::from_i64(errno)
))
})
})
.collect::<Result<Vec<i64>>>()
.map(|_| 0)
},
)
}
fn execve(&self) -> Result<i64> {
cpointer_pathbuf!(command_arg_cptr, command_cstring, self.command);
cpointer_stringvec!(argv_arg_cptr, argv_vec, self.argv);
cpointer_stringvec!(envp_arg_cptr, envp_vec, self.envp);
let syscall_result =
unsafe { syscall!(SYS_execve, command_arg_cptr, argv_arg_cptr, envp_arg_cptr) };
syscall_result.map_err(|errno| {
Error::ProviderError(format!("execve, errno: {:?}", Errno::from_i64(errno)))
})
}
fn wait_children(&self) -> Result<i64> {
let syscall_result = unsafe { syscall!(SYS_wait4, (-1) as i64, 0, 0, 0) };
syscall_result.map_err(|errno| {
Error::ProviderError(format!("execve, errno: {:?}", Errno::from_i64(errno)))
})
}
}
impl JailProvider for NamespaceProvider {
fn exec(&self) -> Result<()> {
if self.pid.is_none() {
self.chdir_root()?;
log::debug!("unsharing");
self.unshare()?;
} else {
log::debug!("entering");
self.enter()?;
}
log::debug!("forking");
let child_pid = self.fork()?;
if child_pid == 0 {
if self.pid.is_none() {
log::debug!("unmounting filesystems");
self.setup_umounts()?;
if self.new_root.is_some() {
log::debug!("pivoting root");
self.pivot_root()?;
// fd handling should go here
}
log::debug!("setting up mounts");
self.setup_mounts()?;
if self.new_root.is_some() {
log::debug!("unmounting old root");
self.setup_umount_old_root()?;
}
}
log::debug!("execveing");
self.execve()?;
} else {
log::info!("jailing: {}", child_pid);
self.wait_children()?;
log::info!("execution terminated");
}
Ok(())
}
}