Skip to content

Commit

Permalink
feature(web): Preserve title, tags and createdAt when importing a net…
Browse files Browse the repository at this point in the history
…scape html. Fixes #401
  • Loading branch information
MohamedBassem committed Sep 21, 2024
1 parent 52024ab commit d62c972
Show file tree
Hide file tree
Showing 7 changed files with 189 additions and 45 deletions.
133 changes: 103 additions & 30 deletions apps/web/components/dashboard/settings/ImportExport.tsx
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
"use client";

import assert from "assert";
import { useRouter } from "next/navigation";
import FilePickerButton from "@/components/ui/file-picker-button";
import { toast } from "@/components/ui/use-toast";
import { parseNetscapeBookmarkFile } from "@/lib/netscapeBookmarkParser";
import { useMutation } from "@tanstack/react-query";
import { TRPCClientError } from "@trpc/client";
import { Upload } from "lucide-react";

import { useCreateBookmarkWithPostHook } from "@hoarder/shared-react/hooks/bookmarks";
import {
useCreateBookmarkWithPostHook,
useUpdateBookmark,
useUpdateBookmarkTags,
} from "@hoarder/shared-react/hooks/bookmarks";
import {
useAddBookmarkToList,
useCreateBookmarkList,
Expand All @@ -17,29 +21,112 @@ import { BookmarkTypes } from "@hoarder/shared/types/bookmarks";

export function Import() {
const router = useRouter();
const { mutateAsync: createBookmark } = useCreateBookmarkWithPostHook();

const { mutateAsync: createBookmark } = useCreateBookmarkWithPostHook();
const { mutateAsync: updateBookmark } = useUpdateBookmark();
const { mutateAsync: createList } = useCreateBookmarkList();
const { mutateAsync: addToList } = useAddBookmarkToList();
const { mutateAsync: updateTags } = useUpdateBookmarkTags();

const { mutateAsync: parseAndCreateBookmark } = useMutation({
mutationFn: async (toImport: {
bookmark: {
title: string;
url: string | undefined;
tags: string[];
addDate?: number;
};
listId: string;
}) => {
const bookmark = toImport.bookmark;
if (bookmark.url === undefined) {
throw new Error("URL is undefined");
}
const url = new URL(bookmark.url);
const created = await createBookmark({
type: BookmarkTypes.LINK,
url: url.toString(),
});

await Promise.all([
// Update title and createdAt if they're set
bookmark.title.length > 0 || bookmark.addDate
? updateBookmark({
bookmarkId: created.id,
title: bookmark.title,
createdAt: bookmark.addDate
? new Date(bookmark.addDate * 1000)
: undefined,
})
: undefined,

// Add to import list
addToList({
bookmarkId: created.id,
listId: toImport.listId,
}).catch((e) => {
if (
e instanceof TRPCClientError &&
e.message.includes("already in the list")
) {
/* empty */
} else {
throw e;
}
}),

// Update tags
updateTags({
bookmarkId: created.id,
attach: bookmark.tags.map((t) => ({ tagName: t })),
detach: [],
}),
]);
return created;
},
});

const { mutateAsync: runUploadBookmarkFile } = useMutation({
mutationFn: async (file: File) => {
return await parseNetscapeBookmarkFile(file);
},
onSuccess: async (resp) => {
const results = await Promise.allSettled(
resp.map((url) =>
createBookmark({ type: BookmarkTypes.LINK, url: url.toString() }),
),
);

const failed = results.filter((r) => r.status == "rejected");
const successes = results.filter(
(r) => r.status == "fulfilled" && !r.value.alreadyExists,
);
const alreadyExisted = results.filter(
(r) => r.status == "fulfilled" && r.value.alreadyExists,
);
const importList = await createList({
name: `Imported Bookmarks`,
icon: "⬆️",
});

let done = 0;
const { id, update } = toast({
description: `Processed 0 bookmarks of ${resp.length}`,
variant: "default",
});

const successes = [];
const failed = [];
const alreadyExisted = [];
// Do the imports one by one
for (const parsedBookmark of resp) {
try {
const result = await parseAndCreateBookmark({
bookmark: parsedBookmark,
listId: importList.id,
});
if (result.alreadyExists) {
alreadyExisted.push(parsedBookmark);
} else {
successes.push(parsedBookmark);
}
} catch (e) {
failed.push(parsedBookmark);
}

update({
id,
description: `Processed ${done + 1} bookmarks of ${resp.length}`,
});
done++;
}

if (successes.length > 0 || alreadyExisted.length > 0) {
toast({
Expand All @@ -55,20 +142,6 @@ export function Import() {
});
}

const importList = await createList({
name: `Imported Bookmarks`,
icon: "⬆️",
});

if (successes.length > 0) {
await Promise.allSettled(
successes.map((r) => {
assert(r.status == "fulfilled");
addToList({ bookmarkId: r.value.id, listId: importList.id });
}),
);
}

router.push(`/dashboard/lists/${importList.id}`);
},
onError: (error) => {
Expand Down
35 changes: 23 additions & 12 deletions apps/web/lib/netscapeBookmarkParser.ts
Original file line number Diff line number Diff line change
@@ -1,20 +1,31 @@
function extractUrls(html: string): string[] {
const regex = /<a\s+(?:[^>]*?\s+)?href="(http[^"]*)"/gi;
let match;
const urls = [];

while ((match = regex.exec(html)) !== null) {
urls.push(match[1]);
}

return urls;
}
// Copied from https://gist.github.com/devster31/4e8c6548fd16ffb75c02e6f24e27f9b9
import * as cheerio from "cheerio";

export async function parseNetscapeBookmarkFile(file: File) {
const textContent = await file.text();

if (!textContent.startsWith("<!DOCTYPE NETSCAPE-Bookmark-file-1>")) {
throw Error("The uploaded html file does not seem to be a bookmark file");
}

return extractUrls(textContent).map((url) => new URL(url));
const $ = cheerio.load(textContent);

return $("a")
.map(function (_index, a) {
const $a = $(a);
const addDate = $a.attr("add_date");
let tags: string[] = [];
try {
tags = $a.attr("tags")?.split(",") ?? [];
} catch (e) {
/* empty */
}
return {
title: $a.text(),
url: $a.attr("href"),
tags: tags,
addDate: typeof addDate === "undefined" ? undefined : parseInt(addDate),
};
})
.get();
}
1 change: 1 addition & 0 deletions apps/web/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
"@trpc/react-query": "11.0.0-next-beta.308",
"@trpc/server": "11.0.0-next-beta.308",
"better-sqlite3": "^9.4.3",
"cheerio": "^1.0.0",
"class-variance-authority": "^0.7.0",
"clsx": "^2.1.0",
"dayjs": "^1.11.10",
Expand Down
6 changes: 3 additions & 3 deletions docs/docs/10-import.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

## Import using the WebUI

Hoarder supports importing bookmarks using the Netscape HTML Format.
Hoarder supports importing bookmarks using the Netscape HTML Format. Titles, tags and addition date will be preserved during the import. An automatically created list will contain all the imported bookmarks.

Simply open the WebUI of your Hoarder instance and drag and drop the bookmarks file into the UI.
To import the bookmark file, go to the settings and click "Import Bookmarks from HTML file".

:::info
All the URLs in the bookmarks file will be added automatically, you will not be able to pick and choose which bookmarks to import!
Expand Down Expand Up @@ -42,4 +42,4 @@ cat all_links.txt | xargs -I{} hoarder --api-key <key> --server-addr <addr> book
- Click on the three dots on the top right corner and choose `Export bookmarks`
- This will download an html file with all of your bookmarks.

You can use this file to import the bookmarks using the UI or CLI method described above
You can use this file to import the bookmarks using the UI or CLI method described above
1 change: 1 addition & 0 deletions packages/shared/types/bookmarks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ export const zUpdateBookmarksRequestSchema = z.object({
favourited: z.boolean().optional(),
note: z.string().optional(),
title: z.string().max(MAX_TITLE_LENGTH).nullish(),
createdAt: z.date().optional(),
});
export type ZUpdateBookmarksRequest = z.infer<
typeof zUpdateBookmarksRequestSchema
Expand Down
1 change: 1 addition & 0 deletions packages/trpc/routers/bookmarks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,7 @@ export const bookmarksAppRouter = router({
archived: input.archived,
favourited: input.favourited,
note: input.note,
createdAt: input.createdAt,
})
.where(
and(
Expand Down
57 changes: 57 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit d62c972

Please sign in to comment.