You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

149 lines
4.7KB

  1. use {
  2. curl::easy::Easy,
  3. std::{
  4. fs::File,
  5. io::{BufWriter, Write},
  6. path::{Path, PathBuf},
  7. sync::Arc,
  8. },
  9. thiserror::Error,
  10. };
  11. struct ConnCache {
  12. curldat: Easy,
  13. destination: Arc<PathBuf>,
  14. urlbase: Arc<url::Url>,
  15. progbar: indicatif::ProgressBar,
  16. }
  17. #[derive(Debug, Error)]
  18. pub enum ConnError {
  19. #[error("I/O error")]
  20. Io(#[from] std::io::Error),
  21. #[error("curl error")]
  22. Curl(#[from] curl::Error),
  23. #[error("HTTP code {0}")]
  24. Http(u32),
  25. #[error("no data available")]
  26. NoData,
  27. }
  28. impl ConnCache {
  29. fn new(
  30. destination: Arc<PathBuf>,
  31. urlbase: Arc<url::Url>,
  32. progbar_master: &indicatif::MultiProgress,
  33. ) -> Self {
  34. // set progbar style
  35. let progbar = progbar_master.add(indicatif::ProgressBar::new(1));
  36. let sty = indicatif::ProgressStyle::default_bar()
  37. .template("[{elapsed_precise}] {bar:80.cyan/blue} {pos:>7}/{len:7} {msg}")
  38. .progress_chars("#:-");
  39. progbar.set_style(sty);
  40. // construct from parts
  41. ConnCache {
  42. curldat: Easy::new(),
  43. destination,
  44. urlbase,
  45. progbar,
  46. }
  47. }
  48. fn fetch_single<P: AsRef<Path>>(&mut self, url: &str, dstpostfix: P) -> Result<(), ConnError> {
  49. let rdstp = self.destination.join(dstpostfix);
  50. let filenam = rdstp
  51. .file_name()
  52. .and_then(|i| i.to_str())
  53. .expect("invalid file name");
  54. self.curldat.reset();
  55. self.curldat.progress(true)?;
  56. self.curldat.get(true)?;
  57. self.curldat.fail_on_error(true)?;
  58. self.curldat
  59. .url(self.urlbase.join(url).expect("invalid URL").as_str())?;
  60. self.progbar.reset();
  61. self.progbar.set_position(0);
  62. self.progbar.set_message(&format!("fetching: {}", filenam));
  63. let mut f = File::create(&rdstp)?;
  64. {
  65. let mut writer = BufWriter::new(&mut f);
  66. let pb = &mut self.progbar;
  67. let mut transfer = self.curldat.transfer();
  68. transfer.write_function(|new_data| {
  69. use encoding::types::{DecoderTrap, Encoding};
  70. // we re-encode the data while downloading
  71. writer
  72. .write_all(
  73. encoding::all::ISO_8859_1
  74. // 1. Latin-1 to UTF-8
  75. .decode(new_data, DecoderTrap::Replace)
  76. .expect("got invalid latin-1 data")
  77. // 2. dos2unix
  78. .replace("\r\n", "\n")
  79. // 3. get rid of Latin-1 marker
  80. .replacen(r#"<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">"#, r#"<meta http-equiv="Content-Type" content="text/html; charset=utf-8">"#, 1)
  81. .as_bytes(),
  82. )
  83. .expect("write failed");
  84. Ok(new_data.len())
  85. })?;
  86. transfer.progress_function(|total, cur, _, _| {
  87. pb.set_length(total.round() as u64);
  88. pb.set_position(cur.round() as u64);
  89. true
  90. })?;
  91. transfer.perform()?;
  92. }
  93. f.sync_all()?;
  94. let httpsc = self.curldat.response_code()?;
  95. if httpsc != 200 {
  96. Err(ConnError::Http(httpsc))
  97. } else if f.metadata()?.len() == 0 {
  98. Err(ConnError::NoData)
  99. } else {
  100. Ok(())
  101. }
  102. }
  103. }
  104. impl Drop for ConnCache {
  105. fn drop(&mut self) {
  106. self.progbar.finish();
  107. }
  108. }
  109. /// This implementation assumes that all files in the 'web-folder' have the
  110. /// following format: "X/XNNNNN.htm"
  111. ///
  112. /// destination and urlbase should have a trailing '/'.
  113. pub fn spawn_fetch_folder(
  114. destination: &Arc<PathBuf>,
  115. urlbase: &Arc<url::Url>,
  116. fld: &str,
  117. progbar_master: &indicatif::MultiProgress,
  118. ) -> std::thread::JoinHandle<()> {
  119. let mut c2 = ConnCache::new(destination.clone(), urlbase.clone(), progbar_master);
  120. {
  121. let fpath = c2.destination.join(fld);
  122. std::fs::create_dir_all(fpath).expect("unable to create directory for category");
  123. }
  124. let fld = fld.to_string();
  125. std::thread::spawn(move || {
  126. for i in 1.. {
  127. let postfix = format!("{}/{}{:05}.htm", fld, fld, i);
  128. if let Err(x) = c2.fetch_single(&postfix, &postfix) {
  129. match &x {
  130. ConnError::NoData => std::fs::remove_file(c2.destination.join(postfix))
  131. .expect("unable to delete null-file"),
  132. _ => c2.progbar.println(format!("Error @ {}: {}", postfix, x)),
  133. }
  134. break;
  135. }
  136. }
  137. })
  138. }