Files
TumMediasiteDownloader/src/main.rs
2017-07-20 03:54:51 +02:00

315 lines
9.8 KiB
Rust

extern crate clap;
extern crate cookie;
extern crate dotenv;
extern crate json;
#[macro_use]
extern crate lazy_static;
extern crate reqwest;
extern crate zip;
mod catalog;
mod presentation;
use std::env;
use std::collections::HashMap;
use std::io::{self, Read};
use std::path::Path;
use std::sync::Mutex;
use clap::{App, Arg};
use reqwest::{Client, ClientBuilder, Response, RedirectPolicy, RequestBuilder};
use reqwest::header::{Cookie, SetCookie};
use catalog::*;
use presentation::Presentation;
// TODO: Old "Publish To Go" packages get sweeped from the server and you have to request a new one;
// Implement this
// TODO: sometimes you need to access the video listing at least once using moodle; emulate this
const MAX_RETRIES: u8 = 10;
lazy_static! {
static ref CLIENT: Client = ClientBuilder::new()
.expect("Failed to create ClientBuilder!")
// The login site redirects to itself if no redirect parameter is given
.redirect(RedirectPolicy::none())
.build()
.expect("Failed to create Client!");
static ref USERNAME: Mutex<String> = Mutex::new(String::new());
static ref PASSWORD: Mutex<String> = Mutex::new(String::new());
static ref AUTH: Mutex<String> = Mutex::new(String::new());
}
#[derive(Debug)]
enum DownloadError {
IoError(io::Error),
AuthorizationTimeout,
}
fn main() {
let matches = App::new("TumMediasiteDownloader")
.author("Boris-Chengbiao Zhou <bobo1239@web.de>")
.about(
"Downloads \'catalogs\' from the TUM's Mediasite lecture archive.",
)
.arg(
Arg::with_name("CATALOG_NAME")
.help(
"name of the catalog e.g. from the URL:\n\
https://streams.tum.de/Mediasite/Catalog/catalogs/era-2016 -> era-2016\n\
special cases (WS16/17; login included): DS, EIDI, ERA, ...",
)
.required(true)
.index(1),
)
.arg(
Arg::with_name("OUTPUT_DIRECTORY")
.help("where to output the downloaded files")
.required(true)
.index(2),
)
.arg(
Arg::with_name("username")
.short("u")
.help(
"username for login; can be omitted if the user from .env should be used",
)
.requires("password")
.takes_value(true),
)
.arg(
Arg::with_name("password")
.short("p")
.help(
"password for login; can be omitted if the user from .env should be used",
)
.requires("username")
.takes_value(true),
)
.get_matches();
let catalog_name = matches.value_of("CATALOG_NAME").unwrap();
let login = if let Some(username) = matches.value_of("username") {
let password = matches.value_of("password").unwrap();
Some((username, password))
} else {
None
};
let catalog_def = resolve_alias(catalog_name).unwrap_or((catalog_name, login));
let out_dir = Path::new(matches.value_of("OUTPUT_DIRECTORY").unwrap());
if out_dir.exists() {
assert!(out_dir.is_dir());
} else {
::std::fs::create_dir_all(out_dir).expect("Failed to create output directory!");
}
let (catalog_name, (username, password)) = if let Some((user, pass)) = catalog_def.1 {
(catalog_def.0, (user.to_string(), pass.to_string()))
} else {
(catalog_def.0, get_default_login())
};
println!("Preparing to download catalog \"{}\"!", catalog_name);
USERNAME.lock().unwrap().push_str(&username);
PASSWORD.lock().unwrap().push_str(&password);
get_auth();
download_catalog(catalog_name, out_dir);
}
fn get_default_login() -> (String, String) {
if dotenv::dotenv().is_err() {
println!("No .env found!");
}
let username = env::var("TUM_USERNAME").expect("Missing TUM_USERNAME environment variable!");
let password = env::var("TUM_PASSWORD").expect("Missing TUM_PASSWORD environment variable!");
(username, password)
}
fn get_auth() {
println!("Logging in!");
let username = USERNAME.lock().unwrap();
let password = PASSWORD.lock().unwrap();
let mut form_data = HashMap::new();
form_data.insert("UserName", &*username);
form_data.insert("Password", &*password);
let res = try_to_get_valid_response(
|client| {
let mut request_builder = client
.post("https://streams.tum.de/Mediasite/Login")
.unwrap();
request_builder
.form(&form_data)
.expect("Failed to serialize form_data!");
request_builder
},
|res| res.headers().get::<SetCookie>().is_some(),
).expect(
"Didn't receive a valid response trying to login! Maybe wrong login data?",
);
// FIXME: We're somehow only getting "302 Object moved" instead of the actual response
// => We can't determine if the login was successful
// (we still get a MediasiteAuth cookie that is useless)
// let body = read_response_body(&mut res);
// if body.contains("Unknown username or bad password.") {
// panic!("Unknown username or bad password!");
// }
let set_cookie: &SetCookie = res.headers().get().unwrap();
let cookie =
cookie::Cookie::parse(set_cookie.0[0].to_string()).expect("Failed to parse SetCookie");
assert_eq!(cookie.name(), "MediasiteAuth");
let mut auth = AUTH.lock().unwrap();
auth.clear();
auth.push_str(cookie.value());
}
fn download_catalog(catalog_name: &str, out_dir: &Path) {
let catalog_id = get_catalog_id(catalog_name);
let json = get_json(&catalog_id);
let presentations = json_to_presentations(&json);
println!(
"Starting to download {} presentations!",
presentations.len()
);
for (i, presentation) in presentations.iter().enumerate() {
println!(
"\nDownloading {}/{}: {}",
i + 1,
presentations.len(),
presentation.name()
);
for _ in 0..MAX_RETRIES {
match presentation.download(out_dir) {
Ok(()) => break,
Err(DownloadError::IoError(e)) => {
println!("Error during download: {:?}", e);
println!("Retrying!");
}
Err(DownloadError::AuthorizationTimeout) => {
println!("Authorization is not valid anymore. Refreshing!");
get_auth();
}
}
}
}
}
fn get_catalog_id(name: &str) -> String {
println!("Fetching catalog id!");
let url = format!("https://streams.tum.de/Mediasite/Catalog/catalogs/{}", name);
let mut res = try_to_get_response(|client| {
let mut request_builder = client.get(&url).unwrap();
request_builder.header(construct_cookie());
request_builder
});
let body = read_response_body(&mut res);
let prefix = "CatalogId: '";
let idx = body.find(prefix).expect(
"Failed to find CatalogId on the catalog page! Perhaps you got the wrong catalog \
name or an invalid login? Maybe you need to open the page in a browser once...",
);
let pre_len = prefix.len();
// Assuming all catalog ids follow this pattern!
let len = "a6fca0c1-0be4-4e66-83b7-bcdc4eb5e95e".len();
body[(idx + pre_len)..(idx + pre_len + len)].to_string()
}
fn json_to_presentations(json_str: &str) -> Vec<Presentation> {
let mut vec = Vec::new();
let mut json = json::parse(json_str).expect("Failed parsing the json!");
let mut count = 0;
for presentation in json["PresentationDetailsList"].members_mut() {
vec.push(Presentation::from(&*presentation));
count += 1;
}
assert_eq!(count, json["TotalItems"]); // Maybe there are multiple pages
vec
}
fn get_json(catalog_id: &str) -> String {
println!("Fetching catalog!");
let mut data = HashMap::new();
data.insert("CatalogId", catalog_id);
data.insert("CurrentFolderId", catalog_id);
data.insert("ItemsPerPage", "500");
let mut res = try_to_get_response(|client| {
let mut request_builder = client
.post(
"https://streams.tum.de/Mediasite/Catalog/Data/GetPresentationsForFolder",
)
.unwrap();
request_builder
.header(construct_cookie())
.json(&data)
.expect("Failed to serialize json!");
request_builder
});
read_response_body(&mut res)
}
fn construct_cookie() -> Cookie {
let mut cookie = Cookie::new();
cookie.append("MediasiteAuth", (*AUTH.lock().unwrap()).to_string());
cookie
}
fn try_to_get_response<F>(f: F) -> Response
where
F: Fn(&Client) -> RequestBuilder,
{
try_to_get_valid_response(f, |_| true).unwrap()
}
fn try_to_get_valid_response<F1, F2>(f1: F1, f2: F2) -> Result<Response, reqwest::Result<Response>>
where
F1: Fn(&Client) -> RequestBuilder,
F2: Fn(&Response) -> bool,
{
let mut last_response = None;
for retries in 0..MAX_RETRIES {
if retries > 0 {
println!("Retrying request!");
}
let response_res = f1(&*CLIENT).send();
if let Ok(response) = response_res {
if f2(&response) {
return Ok(response);
} else {
last_response = Some(Ok(response));
}
} else {
last_response = Some(response_res);
}
}
println!(
"Failed to get valid response! (tried {} times)",
MAX_RETRIES
);
Err(last_response.unwrap())
}
fn read_response_body(response: &mut Response) -> String {
let mut string = String::new();
response
.read_to_string(&mut string)
.expect("Failed to read body");
string
}