Browse Source

initial commit

master
Erik Zscheile 10 months ago
commit
5bec05ff82
6 changed files with 743 additions and 0 deletions
  1. +1
    -0
      .gitignore
  2. +486
    -0
      Cargo.lock
  3. +15
    -0
      Cargo.toml
  4. +3
    -0
      README.md
  5. +148
    -0
      src/fetch.rs
  6. +90
    -0
      src/main.rs

+ 1
- 0
.gitignore View File

@@ -0,0 +1 @@
/target

+ 486
- 0
Cargo.lock View File

@@ -0,0 +1,486 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "ansi_term"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
dependencies = [
"winapi",
]

[[package]]
name = "atty"
version = "0.2.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1803c647a3ec87095e7ae7acfca019e98de5ec9a7d01343f611cf3152ed71a90"
dependencies = [
"libc",
"winapi",
]

[[package]]
name = "autocfg"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2"

[[package]]
name = "bitflags"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"

[[package]]
name = "cc"
version = "1.0.48"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f52a465a666ca3d838ebbf08b241383421412fe7ebb463527bba275526d89f76"

[[package]]
name = "cfg-if"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"

[[package]]
name = "clap"
version = "2.33.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9"
dependencies = [
"ansi_term",
"atty",
"bitflags",
"strsim",
"textwrap",
"unicode-width",
"vec_map",
]

[[package]]
name = "clicolors-control"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90082ee5dcdd64dc4e9e0d37fbf3ee325419e39c0092191e0393df65518f741e"
dependencies = [
"atty",
"lazy_static",
"libc",
"winapi",
]

[[package]]
name = "console"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f5d540c2d34ac9dd0deb5f3b5f54c36c79efa78f6b3ad19106a554d07a7b5d9f"
dependencies = [
"clicolors-control",
"encode_unicode",
"lazy_static",
"libc",
"regex",
"termios",
"unicode-width",
"winapi",
]

[[package]]
name = "curl"
version = "0.4.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06aa71e9208a54def20792d877bc663d6aae0732b9852e612c4a933177c31283"
dependencies = [
"curl-sys",
"libc",
"openssl-probe",
"openssl-sys",
"schannel",
"socket2",
"winapi",
]

[[package]]
name = "curl-sys"
version = "0.4.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f659f3ffac9582d6177bb86d1d2aa649f4eb9d0d4de9d03ccc08b402832ea340"
dependencies = [
"cc",
"libc",
"libz-sys",
"openssl-sys",
"pkg-config",
"vcpkg",
"winapi",
]

[[package]]
name = "dsbfnr"
version = "0.0.0"
dependencies = [
"clap",
"curl",
"encoding",
"indicatif",
"thiserror",
"url",
]

[[package]]
name = "encode_unicode"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"

[[package]]
name = "encoding"
version = "0.2.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b0d943856b990d12d3b55b359144ff341533e516d94098b1d3fc1ac666d36ec"
dependencies = [
"encoding-index-japanese",
"encoding-index-korean",
"encoding-index-simpchinese",
"encoding-index-singlebyte",
"encoding-index-tradchinese",
]

[[package]]
name = "encoding-index-japanese"
version = "1.20141219.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04e8b2ff42e9a05335dbf8b5c6f7567e5591d0d916ccef4e0b1710d32a0d0c91"
dependencies = [
"encoding_index_tests",
]

[[package]]
name = "encoding-index-korean"
version = "1.20141219.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4dc33fb8e6bcba213fe2f14275f0963fd16f0a02c878e3095ecfdf5bee529d81"
dependencies = [
"encoding_index_tests",
]

[[package]]
name = "encoding-index-simpchinese"
version = "1.20141219.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d87a7194909b9118fc707194baa434a4e3b0fb6a5a757c73c3adb07aa25031f7"
dependencies = [
"encoding_index_tests",
]

[[package]]
name = "encoding-index-singlebyte"
version = "1.20141219.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3351d5acffb224af9ca265f435b859c7c01537c0849754d3db3fdf2bfe2ae84a"
dependencies = [
"encoding_index_tests",
]

[[package]]
name = "encoding-index-tradchinese"
version = "1.20141219.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd0e20d5688ce3cab59eb3ef3a2083a5c77bf496cb798dc6fcdb75f323890c18"
dependencies = [
"encoding_index_tests",
]

[[package]]
name = "encoding_index_tests"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569"

[[package]]
name = "idna"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02e2673c30ee86b5b96a9cb52ad15718aa1f966f5ab9ad54a8b95d5ca33120a9"
dependencies = [
"matches",
"unicode-bidi",
"unicode-normalization",
]

[[package]]
name = "indicatif"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8572bccfb0665e70b7faf44ee28841b8e0823450cd4ad562a76b5a3c4bf48487"
dependencies = [
"console",
"lazy_static",
"number_prefix",
"regex",
]

[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"

[[package]]
name = "libc"
version = "0.2.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d515b1f41455adea1313a4a2ac8a8a477634fbae63cc6100e3aebb207ce61558"

[[package]]
name = "libz-sys"
version = "1.0.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2eb5e43362e38e2bca2fd5f5134c4d4564a23a5c28e9b95411652021a8675ebe"
dependencies = [
"cc",
"libc",
"pkg-config",
"vcpkg",
]

[[package]]
name = "matches"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08"

[[package]]
name = "number_prefix"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a"

[[package]]
name = "openssl-probe"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77af24da69f9d9341038eba93a073b1fdaaa1b788221b00a69bce9e762cb32de"

[[package]]
name = "openssl-sys"
version = "0.9.53"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "465d16ae7fc0e313318f7de5cecf57b2fbe7511fd213978b457e1c96ff46736f"
dependencies = [
"autocfg",
"cc",
"libc",
"pkg-config",
"vcpkg",
]

[[package]]
name = "percent-encoding"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e"

[[package]]
name = "pkg-config"
version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05da548ad6865900e60eaba7f589cc0783590a92e940c26953ff81ddbab2d677"

[[package]]
name = "proc-macro2"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c9e470a8dc4aeae2dee2f335e8f533e2d4b347e1434e5671afc49b054592f27"
dependencies = [
"unicode-xid",
]

[[package]]
name = "quote"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe"
dependencies = [
"proc-macro2",
]

[[package]]
name = "redox_syscall"
version = "0.1.56"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84"

[[package]]
name = "regex"
version = "1.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc220bd33bdce8f093101afe22a037b8eb0e5af33592e6a9caafff0d4cb81cbd"
dependencies = [
"regex-syntax",
]

[[package]]
name = "regex-syntax"
version = "0.6.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11a7e20d1cce64ef2fed88b66d347f88bd9babb82845b2b858f3edbf59a4f716"

[[package]]
name = "schannel"
version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87f550b06b6cba9c8b8be3ee73f391990116bf527450d2556e9b9ce263b9a021"
dependencies = [
"lazy_static",
"winapi",
]

[[package]]
name = "smallvec"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ecf3b85f68e8abaa7555aa5abdb1153079387e60b718283d732f03897fcfc86"

[[package]]
name = "socket2"
version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8b74de517221a2cb01a53349cf54182acdc31a074727d3079068448c0676d85"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"winapi",
]

[[package]]
name = "strsim"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"

[[package]]
name = "syn"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dff0acdb207ae2fe6d5976617f887eb1e35a2ba52c13c7234c790960cdad9238"
dependencies = [
"proc-macro2",
"quote",
"unicode-xid",
]

[[package]]
name = "termios"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b620c5ea021d75a735c943269bb07d30c9b77d6ac6b236bc8b5c496ef05625"
dependencies = [
"libc",
]

[[package]]
name = "textwrap"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
dependencies = [
"unicode-width",
]

[[package]]
name = "thiserror"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f357d1814b33bc2dc221243f8424104bfe72dbe911d5b71b3816a2dff1c977e"
dependencies = [
"thiserror-impl",
]

[[package]]
name = "thiserror-impl"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb2e25d25307eb8436894f727aba8f65d07adf02e5b35a13cebed48bd282bfef"
dependencies = [
"proc-macro2",
"quote",
"syn",
]

[[package]]
name = "unicode-bidi"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5"
dependencies = [
"matches",
]

[[package]]
name = "unicode-normalization"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b561e267b2326bb4cebfc0ef9e68355c7abe6c6f522aeac2f5bf95d56c59bdcf"
dependencies = [
"smallvec",
]

[[package]]
name = "unicode-width"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479"

[[package]]
name = "unicode-xid"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"

[[package]]
name = "url"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75b414f6c464c879d7f9babf951f23bc3743fb7313c081b2e6ca719067ea9d61"
dependencies = [
"idna",
"matches",
"percent-encoding",
]

[[package]]
name = "vcpkg"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fc439f2794e98976c88a2a2dafce96b930fe8010b0a256b3c2199a773933168"

[[package]]
name = "vec_map"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a"

[[package]]
name = "winapi"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]

[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"

[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

+ 15
- 0
Cargo.toml View File

@@ -0,0 +1,15 @@
[package]
name = "dsbfnr"
version = "0.0.0"
authors = ["Erik Zscheile <erik.zscheile@gmail.com>"]
edition = "2018"
license = "MIT OR Apache-2.0"
repository = "https://gitlab.com/zserik/dsbfnr"

[dependencies]
clap = "2.33"
curl = "0.4"
encoding = "0.2"
indicatif = "0.13"
thiserror = "1.0"
url = "2.1"

+ 3
- 0
README.md View File

@@ -0,0 +1,3 @@
# dsbfnr

a dsbcontrol.de CMS data fetcher + (planned) evaluator for personal purposes

+ 148
- 0
src/fetch.rs View File

@@ -0,0 +1,148 @@
use {
curl::easy::Easy,
std::{
fs::File,
io::{BufWriter, Write},
path::{Path, PathBuf},
sync::Arc,
},
thiserror::Error,
};

struct ConnCache {
curldat: Easy,
destination: Arc<PathBuf>,
urlbase: Arc<url::Url>,
progbar: indicatif::ProgressBar,
}

#[derive(Debug, Error)]
pub enum ConnError {
#[error("I/O error")]
Io(#[from] std::io::Error),
#[error("curl error")]
Curl(#[from] curl::Error),
#[error("HTTP code {0}")]
Http(u32),
#[error("no data available")]
NoData,
}

impl ConnCache {
fn new(
destination: Arc<PathBuf>,
urlbase: Arc<url::Url>,
progbar_master: &indicatif::MultiProgress,
) -> Self {
// set progbar style
let progbar = progbar_master.add(indicatif::ProgressBar::new(1));
let sty = indicatif::ProgressStyle::default_bar()
.template("[{elapsed_precise}] {bar:80.cyan/blue} {pos:>7}/{len:7} {msg}")
.progress_chars("#:-");
progbar.set_style(sty);

// construct from parts
ConnCache {
curldat: Easy::new(),
destination,
urlbase,
progbar,
}
}

fn fetch_single<P: AsRef<Path>>(&mut self, url: &str, dstpostfix: P) -> Result<(), ConnError> {
let rdstp = self.destination.join(dstpostfix);
let filenam = rdstp
.file_name()
.and_then(|i| i.to_str())
.expect("invalid file name");

self.curldat.reset();
self.curldat.progress(true)?;
self.curldat.get(true)?;
self.curldat.fail_on_error(true)?;
self.curldat
.url(self.urlbase.join(url).expect("invalid URL").as_str())?;
self.progbar.reset();
self.progbar.set_position(0);
self.progbar.set_message(&format!("fetching: {}", filenam));

let mut f = File::create(&rdstp)?;
{
let mut writer = BufWriter::new(&mut f);
let pb = &mut self.progbar;
let mut transfer = self.curldat.transfer();
transfer.write_function(|new_data| {
use encoding::types::{DecoderTrap, Encoding};
// we re-encode the data while downloading
writer
.write_all(
encoding::all::ISO_8859_1
// 1. Latin-1 to UTF-8
.decode(new_data, DecoderTrap::Replace)
.expect("got invalid latin-1 data")
// 2. dos2unix
.replace("\r\n", "\n")
// 3. get rid of Latin-1 marker
.replacen(r#"<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">"#, r#"<meta http-equiv="Content-Type" content="text/html; charset=utf-8">"#, 1)
.as_bytes(),
)
.expect("write failed");
Ok(new_data.len())
})?;
transfer.progress_function(|total, cur, _, _| {
pb.set_length(total.round() as u64);
pb.set_position(cur.round() as u64);
true
})?;
transfer.perform()?;
}

f.sync_all()?;
let httpsc = self.curldat.response_code()?;
if httpsc != 200 {
Err(ConnError::Http(httpsc))
} else if f.metadata()?.len() == 0 {
Err(ConnError::NoData)
} else {
Ok(())
}
}
}

impl Drop for ConnCache {
fn drop(&mut self) {
self.progbar.finish();
}
}

/// This implementation assumes that all files in the 'web-folder' have the
/// following format: "X/XNNNNN.htm"
///
/// destination and urlbase should have a trailing '/'.
pub fn spawn_fetch_folder(
destination: &Arc<PathBuf>,
urlbase: &Arc<url::Url>,
fld: &str,
progbar_master: &indicatif::MultiProgress,
) -> std::thread::JoinHandle<()> {
let mut c2 = ConnCache::new(destination.clone(), urlbase.clone(), progbar_master);
{
let fpath = c2.destination.join(fld);
std::fs::create_dir_all(fpath).expect("unable to create directory for category");
}
let fld = fld.to_string();
std::thread::spawn(move || {
for i in 1.. {
let postfix = format!("{}/{}{:05}.htm", fld, fld, i);
if let Err(x) = c2.fetch_single(&postfix, &postfix) {
match &x {
ConnError::NoData => std::fs::remove_file(c2.destination.join(postfix))
.expect("unable to delete null-file"),
_ => c2.progbar.println(format!("Error @ {}: {}", postfix, x)),
}
break;
}
}
})
}

+ 90
- 0
src/main.rs View File

@@ -0,0 +1,90 @@
mod fetch;

fn main() {
use clap::Arg;
use std::sync::Arc;

let matches = clap::App::new("dsbfnr")
.version(clap::crate_version!())
.author("Erik Zscheile <erik.zscheile@gmail.com>")
.about("a dsbcontrol.de CMS data fetcher + evaluator for personal purposes")
.arg(
Arg::with_name("destdir")
.short("d")
.long("destdir")
.takes_value(true)
.help("sets the destination dir (defaults to '.')"),
)
.arg(
Arg::with_name("dsbpath")
.short("k")
.long("dsbpath")
.takes_value(true)
.required(true)
.help("sets the DSB path / key (known format: 'ORGID/REVID/WEEK)"),
)
.arg(
Arg::with_name("categories")
.short("c")
.long("categories")
.takes_value(true)
.multiple(true)
.help("append additional categories ('r' will always be fetched)"),
)
.arg(
Arg::with_name("eval-rooms")
.short("r")
.long("eval-rooms")
.help("enable evaluation of rooms"),
)
.get_matches();

let mut categories = vec!["r"];
if let Some(ctgs) = matches.values_of("categories") {
categories.extend(ctgs);
}

let destdir = Arc::new(std::path::PathBuf::from(
matches.value_of("destdir").unwrap_or(".").to_string() + "/",
));
let urlbase = Arc::new(
url::Url::parse(&format!(
"https://app.dsbcontrol.de/data/{}/",
matches.value_of("dsbpath").unwrap()
))
.expect("invalid dsbpath"),
);

curl::init();
let progbar_master = indicatif::MultiProgress::new();

let ths: Vec<_> = categories
.iter()
.map(|cat| {
(
cat,
fetch::spawn_fetch_folder(&destdir, &urlbase, cat, &progbar_master),
)
})
.collect();

progbar_master.join_and_clear().unwrap();

{
let artific_bar = progbar_master.add(indicatif::ProgressBar::new_spinner());
for i in ths.into_iter() {
artific_bar.set_message(&format!("waiting for: {}", i.0));
i.1.join().unwrap();
artific_bar.set_message(&format!("done waiting for: {}", i.0));
artific_bar.tick();
}
artific_bar.finish_with_message("done");
}

progbar_master.join_and_clear().unwrap();

if matches.is_present("eval-rooms") {
// TODO: impl eval-rooms
unimplemented!();
}
}

Loading…
Cancel
Save