This commit is contained in:
45mg 2024-04-20 09:49:30 +00:00
commit 35755f0b05
17 changed files with 297 additions and 0 deletions

36
README.md Normal file
View file

@ -0,0 +1,36 @@
This is a set of rough scripts that I used to help me convert my Logseq graph to org-roam. Use at your own risk, and please read the code before you use it.
As I no longer have any use for this code (having finished converting my Logseq graph), I will not be maintaining it in any way. If you need any improvements, you'll have to fork the repo and work on them yourself.
# Supported
- conversion of Logseq's weird markdown dialect to org-mode: pandoc does most of the actual conversion, but various scripts are needed to massage Logseq's markdown into something it can understand
- converting links: Logseq links/tags are converted to org-roam links; page aliases are supported
# Not Supported
- queries and embeds
- images and other file assets
- journals (see Instructions below)
- many other things i didn't think of, no doubt
# Requirements
Tested with:
```
pandoc 3.1.9
emacs 29.1
```
In theory newer versions should work.
# Instructions
- Clone this repo.
- Backup your existing logseq graph folder, just in case.
- These scripts are designed to run on the 'pages' folder of your graph; decide what you're going to do with your journals. I combined them all into a single page like this:
``` sh
for file in journals/*; do
cat "$file" >> pages/journals.md
done
```
- run the shell script `logseq-migration` on your graph's `pages` folder:
`path/to/this/repo/logseq-migration pages`
This will create a folder named `pages_` containing converted org-mode files; see the comments in `logseq-migration` for details.
- in Emacs, run the code in `logseq-migration.el`. This will convert links and do some other post-processing.

9
add_title Executable file
View file

@ -0,0 +1,9 @@
#!/bin/bash
graphdir=$1
pagepath=$(sed 's/^\///' <<<"${2#"$graphdir"}")
title=${pagepath%.org}
echo "#+title: $title" > "$2".temp
cat "$2" >> "$2".temp
mv "$2".temp "$2"

20
backtick Executable file
View file

@ -0,0 +1,20 @@
#!/bin/bash
# Enclose Logseq block ids and block embeds in backticks.
# This is so pandoc will turn them into Org-mode 'verbatim' notation (eg.
# =foo=), which we can then process with elisp.
# set -x
IFS=$'\n'
# Match Logseq IDs (extended regex)
id_eregex='[0-9a-f]{8}-([0-9a-f]{4}-){3}[0-9a-f]{12}'
# Match the targets of embed/query syntax (extended regex)
target_eregex='([0-9a-f]|\(|\)|\[|\]|-)+'
# block ids
sed -E -i 's/id:: '"$id_eregex"'/`\0`/' "$1"
# embeds
sed -E -i 's/\{\{ *embed '"$target_eregex"' *\}\}/`\0`/' "$1"
# queries
sed -i 's/{{ *query.*}}/`\0`/' "$1"

5
convert Executable file
View file

@ -0,0 +1,5 @@
#!/bin/bash
DIR=~/bin/scripts/logseq-migration
find "${1:-.}" -wholename "*.md" -exec "$DIR"/pandoc-cmd '{}' \; -exec rm '{}' \;

106
convert-links.el Executable file
View file

@ -0,0 +1,106 @@
;;; convert-links.el --- convert logseq links to org-id links -*- lexical-binding: t; -*-
(setq logseq/id-regexp "[0-9a-f]\\{8\\}-\\(?:[0-9a-f]\\{4\\}-\\)\\{3\\}[0-9a-f]\\{12\\}")
(setq logseq/id-spec-regexp (concat "=id:: " logseq/id-regexp "=\\( \\|\n\\)"))
(setq logseq/filelink-target-regexp "[^]]*")
(setq logseq/alias-regexp "^alias:: \\(.*?\\)$")
(defun logseq/--get-id-part (match)
;; check if it's an id link
(if (string-match logseq/id-regexp match)
(substring-no-properties (match-string 0 match))
;; if not, check if it's a file link
(when (string-match logseq/filelink-regexp match)
(substring-no-properties (match-string 1 match)))))
(defun logseq/--convert-id-links-in-file (file hmap)
"Generate org-ids in FILE, and return the association with their contexts.
First, convert the file itself into an org-roam node; then, remove logseq
'id::'s and convert the headlines they apply to into org-roam nodes (by
assigning an org id).
Add the associations to HMAP. For file nodes, associate the page title with
the node's id. For headline nodes, associate the replaced logseq id with the
node id."
(find-file file)
(goto-char 0)
;; convert the file itself into a node
(re-search-forward "#\\+title: \\(.*\\)$")
(let* ((title (substring-no-properties (match-string 1)))
(id (org-id-get-create)))
(puthash title id hmap)
(if (re-search-forward logseq/alias-regexp nil t)
(let ((aliases (split-string (match-string-no-properties 1) ", *")))
(message "%s" aliases)
(dolist (alias aliases) (puthash alias id hmap))))
;; search for logseq ids
(while (re-search-forward logseq/id-spec-regexp nil t)
(let ((match (substring-no-properties (match-string 0))))
;; delete logseq id
(replace-match "" nil nil)
(let ((id-part (logseq/--get-id-part match)))
;; key is the old logseq id; value is newly created org id for this entry
(puthash id-part (org-id-get-create) hmap)))))
(save-buffer)
hmap)
(defun logseq/convert-id-links (graphdir)
;; (eq "a" "a") -> nil; (eql "a" "a") -> nil; (equal "a" "a") -> t
(let ((hmap (make-hash-table :test 'equal)))
(dolist (file (directory-files-recursively graphdir "org$"))
(logseq/--convert-id-links-in-file file hmap))
hmap))
(setq logseq/embed-regexp (concat "={{ *\\(embed\\) " logseq/id-regexp " *}}="))
(setq logseq/query-regexp (concat "={{ *\\(query\\) " logseq/id-regexp " *}}="))
(setq logseq/link-regexp (concat "\\[\\[file:((" logseq/id-regexp "))\\]\\[\\(.*\\)\\]\\]"))
(setq logseq/filelink-regexp
(concat "\\[\\[file:\\(" logseq/filelink-target-regexp "\\)\\]\\]"))
(setq logseq/blockref-regexp (concat "((" logseq/id-regexp "))"))
(defun logseq/--replace-with-org-links-in-file (file hmap)
"Replace Logseq link syntax in FILE with org-id links, based on the
associations in HMAP."
(find-file file)
(goto-char 0)
(while (or (re-search-forward logseq/embed-regexp nil t)
(re-search-forward logseq/query-regexp nil t)
(re-search-forward logseq/link-regexp nil t)
(re-search-forward logseq/filelink-regexp nil t)
(re-search-forward logseq/blockref-regexp nil t))
(let* ((match (substring-no-properties (match-string 0)))
(match-data (match-data))
(id-part (logseq/--get-id-part match))
(node-struct (org-roam-node-from-id
(gethash id-part hmap))))
(set-match-data match-data)
(when node-struct
;; HACK: prevent org-roam-node-insert from reading a node from
;; user; instead just use our node
(replace-match "" nil nil)
(cl-letf (((symbol-function 'org-roam-node-read)
(lambda (&rest args) node-struct)))
;; ((symbol-function 'org-roam-node-formatted)
;; (lambda (node) (org-roam-node-title node))))
(org-roam-node-insert)))
(set-match-data match-data)
(unless node-struct
(replace-match (substring-no-properties
(or (match-string 1) "")) nil nil)
(message (format "No node found for %s" match))))
(goto-char 0))
(save-buffer))
(defun logseq/replace-with-org-links (graphdir hmap)
(dolist (file (directory-files-recursively graphdir "org$"))
(logseq/--replace-with-org-links-in-file file hmap)))
;; (let* ((file "~/scratchdir/logseq_main_/pages/linux/arch/installation.org")
;; (table (logseq/--convert-id-links-in-file file (make-hash-table))))
;; (org-roam-db-sync t)
;; (logseq/--replace-with-org-links-in-file file table))
(defun logseq/convert-links (graphdir)
(let ((table (logseq/convert-id-links graphdir))
(org-roam-directory graphdir))
(org-roam-db-sync t)
(logseq/replace-with-org-links graphdir table)))

4
delete Executable file
View file

@ -0,0 +1,4 @@
#!/bin/bash
# Delete 'collapsed:: '
sed -i 's/collapsed:: .*//' "$1"

20
headings Executable file
View file

@ -0,0 +1,20 @@
#!/bin/bash
# Convert nested lists to #, ##, ###... so that pandoc's Org converter will turn
# them into nested headings.
perl -pi -e '
# Add an additional layer of nesting - all text in org files should be under a
# heading
s/^/ /;
# Discard any existing Markdown header syntax ("#" characters after the list bullet)
s/( *- )#* /$1/;
# For each level of indentation, add a "#"
s/ (?= *-)/#/g;
# Finally, remove list bullets
s/^(#*)-/$1/;
# Remove any Tab characters remaining (eg. they will still be present in code
# blocks)
s/^ +//' "$1"
# Add newlines between headings - apparently this is required by Markdown syntax
sed -i -E 's/^#/\'$'\n#/' "$1"

14
links Executable file
View file

@ -0,0 +1,14 @@
#!/bin/bash
# Convert Logseq link syntax into standard Markdown link syntax.
# Changing the path of links to the assets folder - if you don't know why you'd
# need this, then comment it out
sed -i 's/\[\.\.\/assets/\[assets/g' "$1"
# Convert '[[pagename]]' links
sed -E -i 's/\[\[(.*)\]\]/[\1](\1)/g' "$1"
# Convert '#pagename' links
sed -E -i '/```/,/```/ !s/ #([^ ]+)/ [\1](\1)/g' "$1"
# ('!' inverts range - everywhere except in code blocks. We need this because
# code comments may begin with '#')

16
logseq-migration Executable file
View file

@ -0,0 +1,16 @@
#!/bin/bash
# Main script, used to convert all the files in a directory to org files. Call
# it with the directory as argument: for example, `./logseq-migration pages`.
# This will create a copy of `pages` called `pages_`, and the conversion will be
# applied to all files in `pages`. Then it will copy `pages_` to `pages__`,
# which will serve as a backup for when you do further processing on `pages_`.
export scriptdir=~/bin/scripts/logseq-migration
rm -r "$1"_ "$1"__
cp -r "$1" "$1"_
"$scriptdir"/preproc "$1"_
"$scriptdir"/convert "$1"_
"$scriptdir"/postproc "$1"_
cp -r "$1"_ "$1"__

10
logseq-migration.el Normal file
View file

@ -0,0 +1,10 @@
;;; logseq-migration.el --- elisp processing of converted logseq graph -*- lexical-binding: t; -*-
(load-file "convert-links.el")
(load-file "remove-custom-ids.el")
(load-file "remove-logseq-property-entries.el")
(let ((graphdir "path/to/my/graph/pages_")) ;; change as needed
(logseq/remove-custom-ids graphdir)
(logseq/convert-links graphdir)
(logseq/remove-logseq-property-entries graphdir))
;; comment out the below line if you don't want to delete logseq properties

20
namespaces Executable file
View file

@ -0,0 +1,20 @@
#!/bin/bash
# Represent Logseq's 'namespaces' by moving the page files into directories.
# For example, a page named 'a/b/c' in Logseq, whose file is named 'a___b___c',
# will stored as 'c' in the path 'a/b'.
# if the page is not under a namespace, will return the same filename
path=$(sed 's/___/\//g' <<<"$1")
dir=${path%/*.*}
# echo $PWD/$dir
# echo $PWD/$path
mkdir -p "$PWD/$dir"
old=$PWD/$1
new=$PWD/$path
# to avoid same-file errors, run `mv` only if the file would actually be moved
# (if it was not under a namespace, $old and $new are the same thing)
[[ $(realpath "$old") = "$(realpath "$new")" ]] || mv "$old" "$new"

4
pandoc-cmd Executable file
View file

@ -0,0 +1,4 @@
#!/bin/bash
# Call pandoc on a .md file to produce a .org file
pandoc --wrap=none -f markdown -t org -o "${1%.*}.org" "$1"

5
postproc Executable file
View file

@ -0,0 +1,5 @@
#!/bin/bash
scriptdir=~/bin/scripts/logseq-migration
find "${1:-.}" -wholename "*.org" -exec "$scriptdir"/add_title "$1" '{}' \;

6
preproc Executable file
View file

@ -0,0 +1,6 @@
#!/bin/bash
# Overall preprocessing of Logseq markdown before converting to org
scriptdir=~/bin/scripts/logseq-migration
find "${1:-.}" -wholename "*.md" -exec "$scriptdir"/properties '{}' \; -exec "$scriptdir"/backtick '{}' \; -exec "$scriptdir"/delete '{}' \; -exec "$scriptdir"/headings '{}' \; -exec "$scriptdir"/links '{}' \; -exec "$scriptdir"/namespaces '{}' \;

5
properties Executable file
View file

@ -0,0 +1,5 @@
#!/bin/sh
# Move any property into a block, otherwise the pandoc org parser merges them
# all into a single line for some reason
sed -E -i 's/^[a-z]+::.*$/- !property-deleteme!\n \0/' "$1"

7
remove-custom-ids.el Normal file
View file

@ -0,0 +1,7 @@
;;; remove-custom-ids.el --- Remove CUSTOM_ID property assigned by pandoc org parser -*- lexical-binding: t; -*-
(defun logseq/remove-custom-ids (graphdir)
(dolist (file (directory-files-recursively graphdir "org$"))
(with-temp-file file
(insert-file-contents file)
(org-delete-property-globally "CUSTOM_ID"))))

View file

@ -0,0 +1,10 @@
;;; remove-logseq-property-entries.el --- Remove logseq property subtrees -*- lexical-binding: t; -*-
;; this will write to your kill ring!
(defun logseq/remove-logseq-property-entries (graphdir)
"In the 'properties' shell script, we moved each logseq property into a block
to prevent pandoc's org parser from messing them up. Now that we don't need the
properties anymore, we delete them."
(org-map-entries #'org-cut-subtree
"!property-deleteme!"
(directory-files-recursively graphdir "org$")))