Skip to content

Commit

Permalink
let's fly 🚀
Browse files Browse the repository at this point in the history
  • Loading branch information
matoous committed Mar 4, 2024
1 parent f3c206d commit 285ec71
Show file tree
Hide file tree
Showing 11 changed files with 140 additions and 83 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
target/
db/
index/
wiki/
71 changes: 41 additions & 30 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

40 changes: 15 additions & 25 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,12 +1,3 @@
[package]
name = "mwp"
authors.workspace = true
version.workspace = true
edition.workspace = true
categories.workspace = true
repository.workspace = true
rust-version.workspace = true

[workspace]
resolver = "2"
members = [
Expand All @@ -16,6 +7,21 @@ members = [
"mwp-search",
]

default-members = [
"mwp-web"
]

[profile.release]
lto = "thin"
# debug = true

[profile.opt]
inherits = "release"
lto = "fat"
codegen-units = 1
# strip = "debuginfo" # TODO: or strip = true
opt-level = 3

[workspace.package]
name = "mwp"
version = "0.1.0"
Expand All @@ -25,19 +31,3 @@ categories = ["wiki", "knowledge-bage"]
repository = "https://github.com/matoous/mwp"
homepage = "https://github.com/matoous/mwp"
rust-version = "1.70"

[dependencies]
pulldown-cmark = "0.9.3"
tantivy = { version = "0.21.1", features = ["mmap"] }
tokio = { version = "1.36.0", features= ["full"]}
walkdir = "2.4.0"
lazy_static = "1.4.0"
time = "0.3.31"
url = { version = "2.5.0", features = ["serde"] }
sled = "0.34.7"
serde = "1.0.196"
serde_json = "1.0.113"
mwp-scraper = { path="./mwp-scraper" }
mwp-content = { path="./mwp-content" }
mwp-search = { path="./mwp-search" }
rusqlite = { version = "0.30.0", features = ["time", "url", "bundled"]}
8 changes: 5 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM rust:1.70-slim-bookworm as builder
FROM rust:1.74-slim-bookworm as builder

RUN apt update \
&& apt install -y libssl-dev pkg-config
Expand Down Expand Up @@ -26,8 +26,10 @@ RUN apt update \
ENV TZ=Etc/UTC

COPY --from=builder /app/target/release/mwp mwp
COPY db.db3 ./
COPY ./db.db3 ./
COPY ./wiki ./wiki
COPY ./mwp-web/static ./

EXPOSE 4444

CMD ["/app/mwp"]
CMD ["/app/mwp", "--adr", "0.0.0.0:4444"]
22 changes: 22 additions & 0 deletions fly.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# fly.toml app configuration file generated for mwp on 2024-03-04T11:18:35+01:00
#
# See https://fly.io/docs/reference/configuration/ for information about how to use this file.
#

app = 'mwp'
primary_region = 'ams'

[build]

[http_service]
internal_port = 4444
force_https = true
auto_stop_machines = true
auto_start_machines = true
min_machines_running = 0
processes = ['app']

[[vm]]
memory = '1gb'
cpu_kind = 'shared'
cpus = 1
14 changes: 13 additions & 1 deletion mwp-scraper/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,18 @@ repository.workspace = true
html-escape = "0.2.13"
lazy_static = "1.4.0"
lol_html = "1.2.0"
pulldown-cmark = "0.9.3"
regex = "1.10.3"
reqwest = "0.11.24"
url = "2.5.0"
serde = "1.0.196"
serde_json = "1.0.113"
sled = "0.34.7"
tantivy = { version = "0.21.1", features = ["mmap"] }
time = "0.3.31"
tokio = { version = "1.36.0", features= ["full"]}
url = { version = "2.5.0", features = ["serde"] }
walkdir = "2.4.0"
rusqlite = { version = "0.30.0", features = ["time", "url", "bundled"]}

mwp-content = { path="../mwp-content" }
mwp-search = { path="../mwp-search" }
16 changes: 0 additions & 16 deletions mwp-scraper/src/lib.rs

This file was deleted.

20 changes: 17 additions & 3 deletions src/main.rs → mwp-scraper/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,22 @@ use mwp_content::Link;
use mwp_search::Doc;
use rusqlite::Connection;
use time::OffsetDateTime;
use url::Url;

mod parser;

use crate::parser::{DomParser, DomParserResult};

pub async fn scrape(link: &Url) -> Result<DomParserResult, Box<dyn std::error::Error>> {
let response = reqwest::get(link.clone()).await?;

let html_text = response.text().await?;

let mut rewriter = DomParser::new();
rewriter.write(html_text.as_bytes())?;

Ok(rewriter.wrap())
}

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
Expand Down Expand Up @@ -92,9 +108,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
},
};

println!("scraping {}", link.url);

let data = mwp_scraper::scrape(&link.url).await;
let data = scrape(&link.url).await;
let data = match data {
Ok(data) => data,
Err(err) => {
Expand Down
3 changes: 2 additions & 1 deletion mwp-web/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ maud = { version = "0.26.0", features = ["actix-web"] }
serde = "1.0.196"
serde_json = "1.0.113"
tantivy = "0.21.1"
rusqlite = { version = "0.30.0", features = ["time", "url"]}
rusqlite = { version = "0.30.0", features = ["time", "url", "bundled"]}
clap = { version = "4.5.1", features = ["derive"]}

mwp-content = { path="../mwp-content" }
mwp-search = { path="../mwp-search" }
Expand Down
26 changes: 23 additions & 3 deletions mwp-web/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use actix_web::{
guard::{Guard, GuardContext},
web, App, HttpServer, Result as AwResult,
};
use clap::{command, Parser};
use maud::{html, Markup, PreEscaped};
use mwp_content::Content;
use mwp_search::{Doc, SearchIndex};
Expand Down Expand Up @@ -180,13 +181,31 @@ impl Guard for ContentGuard {
}
}

#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
struct Args {
/// Source of the wikipedia
#[arg(short, long, default_value = "./wiki")]
src: String,

/// The database file
#[arg(short, long, default_value = "./db.db3")]
db: String,

/// Address to serve on
#[arg(long, default_value = "127.0.0.1:4444")]
adr: String,
}

#[actix_web::main]
async fn main() -> std::io::Result<()> {
env_logger::init_from_env(env_logger::Env::new().default_filter_or("info"));

let args = Args::parse();

let index = SearchIndex::new().unwrap();

let conn = Connection::open("./db.db3").unwrap();
let conn = Connection::open(args.db).unwrap();
let mut stmt = conn
.prepare("SELECT title, url, domain, body, tags, created_at, scraped_at FROM links")
.unwrap();
Expand All @@ -211,7 +230,8 @@ async fn main() -> std::io::Result<()> {
builder.add(doc.unwrap()).unwrap();
}
builder.commit();
let content = Content::from_dir("../wiki").await;

let content = Content::from_dir(&args.src).await;

HttpServer::new(move || {
App::new()
Expand All @@ -229,7 +249,7 @@ async fn main() -> std::io::Result<()> {
)
.service(Files::new("/", "./mwp-web/static/"))
})
.bind(("127.0.0.1", 4444))?
.bind(&args.adr)?
.run()
.await
}
2 changes: 1 addition & 1 deletion rust-toolchain.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[toolchain]
channel = "1.70.0"
channel = "1.74.0"
components = ["rustfmt", "rust-src", "clippy"]

0 comments on commit 285ec71

Please sign in to comment.