init
This commit is contained in:
commit
35755f0b05
17 changed files with 297 additions and 0 deletions
36
README.md
Normal file
36
README.md
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
This is a set of rough scripts that I used to help me convert my Logseq graph to org-roam. Use at your own risk, and please read the code before you use it.
|
||||||
|
|
||||||
|
As I no longer have any use for this code (having finished converting my Logseq graph), I will not be maintaining it in any way. If you need any improvements, you'll have to fork the repo and work on them yourself.
|
||||||
|
|
||||||
|
# Supported
|
||||||
|
- conversion of Logseq's weird markdown dialect to org-mode: pandoc does most of the actual conversion, but various scripts are needed to massage Logseq's markdown into something it can understand
|
||||||
|
- converting links: Logseq links/tags are converted to org-roam links; page aliases are supported
|
||||||
|
|
||||||
|
# Not Supported
|
||||||
|
- queries and embeds
|
||||||
|
- images and other file assets
|
||||||
|
- journals (see Instructions below)
|
||||||
|
- many other things i didn't think of, no doubt
|
||||||
|
|
||||||
|
# Requirements
|
||||||
|
Tested with:
|
||||||
|
|
||||||
|
```
|
||||||
|
pandoc 3.1.9
|
||||||
|
emacs 29.1
|
||||||
|
```
|
||||||
|
In theory newer versions should work.
|
||||||
|
|
||||||
|
# Instructions
|
||||||
|
- Clone this repo.
|
||||||
|
- Backup your existing logseq graph folder, just in case.
|
||||||
|
- These scripts are designed to run on the 'pages' folder of your graph; decide what you're going to do with your journals. I combined them all into a single page like this:
|
||||||
|
``` sh
|
||||||
|
for file in journals/*; do
|
||||||
|
cat "$file" >> pages/journals.md
|
||||||
|
done
|
||||||
|
```
|
||||||
|
- run the shell script `logseq-migration` on your graph's `pages` folder:
|
||||||
|
`path/to/this/repo/logseq-migration pages`
|
||||||
|
This will create a folder named `pages_` containing converted org-mode files; see the comments in `logseq-migration` for details.
|
||||||
|
- in Emacs, run the code in `logseq-migration.el`. This will convert links and do some other post-processing.
|
9
add_title
Executable file
9
add_title
Executable file
|
@ -0,0 +1,9 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
graphdir=$1
|
||||||
|
pagepath=$(sed 's/^\///' <<<"${2#"$graphdir"}")
|
||||||
|
title=${pagepath%.org}
|
||||||
|
|
||||||
|
echo "#+title: $title" > "$2".temp
|
||||||
|
cat "$2" >> "$2".temp
|
||||||
|
mv "$2".temp "$2"
|
20
backtick
Executable file
20
backtick
Executable file
|
@ -0,0 +1,20 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# Enclose Logseq block ids and block embeds in backticks.
|
||||||
|
# This is so pandoc will turn them into Org-mode 'verbatim' notation (eg.
|
||||||
|
# =foo=), which we can then process with elisp.
|
||||||
|
|
||||||
|
# set -x
|
||||||
|
IFS=$'\n'
|
||||||
|
|
||||||
|
# Match Logseq IDs (extended regex)
|
||||||
|
id_eregex='[0-9a-f]{8}-([0-9a-f]{4}-){3}[0-9a-f]{12}'
|
||||||
|
|
||||||
|
# Match the targets of embed/query syntax (extended regex)
|
||||||
|
target_eregex='([0-9a-f]|\(|\)|\[|\]|-)+'
|
||||||
|
|
||||||
|
# block ids
|
||||||
|
sed -E -i 's/id:: '"$id_eregex"'/`\0`/' "$1"
|
||||||
|
# embeds
|
||||||
|
sed -E -i 's/\{\{ *embed '"$target_eregex"' *\}\}/`\0`/' "$1"
|
||||||
|
# queries
|
||||||
|
sed -i 's/{{ *query.*}}/`\0`/' "$1"
|
5
convert
Executable file
5
convert
Executable file
|
@ -0,0 +1,5 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
DIR=~/bin/scripts/logseq-migration
|
||||||
|
|
||||||
|
find "${1:-.}" -wholename "*.md" -exec "$DIR"/pandoc-cmd '{}' \; -exec rm '{}' \;
|
106
convert-links.el
Executable file
106
convert-links.el
Executable file
|
@ -0,0 +1,106 @@
|
||||||
|
;;; convert-links.el --- convert logseq links to org-id links -*- lexical-binding: t; -*-
|
||||||
|
|
||||||
|
(setq logseq/id-regexp "[0-9a-f]\\{8\\}-\\(?:[0-9a-f]\\{4\\}-\\)\\{3\\}[0-9a-f]\\{12\\}")
|
||||||
|
(setq logseq/id-spec-regexp (concat "=id:: " logseq/id-regexp "=\\( \\|\n\\)"))
|
||||||
|
(setq logseq/filelink-target-regexp "[^]]*")
|
||||||
|
(setq logseq/alias-regexp "^alias:: \\(.*?\\)$")
|
||||||
|
|
||||||
|
(defun logseq/--get-id-part (match)
|
||||||
|
;; check if it's an id link
|
||||||
|
(if (string-match logseq/id-regexp match)
|
||||||
|
(substring-no-properties (match-string 0 match))
|
||||||
|
;; if not, check if it's a file link
|
||||||
|
(when (string-match logseq/filelink-regexp match)
|
||||||
|
(substring-no-properties (match-string 1 match)))))
|
||||||
|
|
||||||
|
(defun logseq/--convert-id-links-in-file (file hmap)
|
||||||
|
"Generate org-ids in FILE, and return the association with their contexts.
|
||||||
|
First, convert the file itself into an org-roam node; then, remove logseq
|
||||||
|
'id::'s and convert the headlines they apply to into org-roam nodes (by
|
||||||
|
assigning an org id).
|
||||||
|
Add the associations to HMAP. For file nodes, associate the page title with
|
||||||
|
the node's id. For headline nodes, associate the replaced logseq id with the
|
||||||
|
node id."
|
||||||
|
(find-file file)
|
||||||
|
(goto-char 0)
|
||||||
|
;; convert the file itself into a node
|
||||||
|
(re-search-forward "#\\+title: \\(.*\\)$")
|
||||||
|
(let* ((title (substring-no-properties (match-string 1)))
|
||||||
|
(id (org-id-get-create)))
|
||||||
|
(puthash title id hmap)
|
||||||
|
(if (re-search-forward logseq/alias-regexp nil t)
|
||||||
|
(let ((aliases (split-string (match-string-no-properties 1) ", *")))
|
||||||
|
(message "%s" aliases)
|
||||||
|
(dolist (alias aliases) (puthash alias id hmap))))
|
||||||
|
;; search for logseq ids
|
||||||
|
(while (re-search-forward logseq/id-spec-regexp nil t)
|
||||||
|
(let ((match (substring-no-properties (match-string 0))))
|
||||||
|
;; delete logseq id
|
||||||
|
(replace-match "" nil nil)
|
||||||
|
(let ((id-part (logseq/--get-id-part match)))
|
||||||
|
;; key is the old logseq id; value is newly created org id for this entry
|
||||||
|
(puthash id-part (org-id-get-create) hmap)))))
|
||||||
|
(save-buffer)
|
||||||
|
hmap)
|
||||||
|
|
||||||
|
(defun logseq/convert-id-links (graphdir)
|
||||||
|
;; (eq "a" "a") -> nil; (eql "a" "a") -> nil; (equal "a" "a") -> t
|
||||||
|
(let ((hmap (make-hash-table :test 'equal)))
|
||||||
|
(dolist (file (directory-files-recursively graphdir "org$"))
|
||||||
|
(logseq/--convert-id-links-in-file file hmap))
|
||||||
|
hmap))
|
||||||
|
|
||||||
|
(setq logseq/embed-regexp (concat "={{ *\\(embed\\) " logseq/id-regexp " *}}="))
|
||||||
|
(setq logseq/query-regexp (concat "={{ *\\(query\\) " logseq/id-regexp " *}}="))
|
||||||
|
(setq logseq/link-regexp (concat "\\[\\[file:((" logseq/id-regexp "))\\]\\[\\(.*\\)\\]\\]"))
|
||||||
|
(setq logseq/filelink-regexp
|
||||||
|
(concat "\\[\\[file:\\(" logseq/filelink-target-regexp "\\)\\]\\]"))
|
||||||
|
(setq logseq/blockref-regexp (concat "((" logseq/id-regexp "))"))
|
||||||
|
|
||||||
|
(defun logseq/--replace-with-org-links-in-file (file hmap)
|
||||||
|
"Replace Logseq link syntax in FILE with org-id links, based on the
|
||||||
|
associations in HMAP."
|
||||||
|
(find-file file)
|
||||||
|
(goto-char 0)
|
||||||
|
(while (or (re-search-forward logseq/embed-regexp nil t)
|
||||||
|
(re-search-forward logseq/query-regexp nil t)
|
||||||
|
(re-search-forward logseq/link-regexp nil t)
|
||||||
|
(re-search-forward logseq/filelink-regexp nil t)
|
||||||
|
(re-search-forward logseq/blockref-regexp nil t))
|
||||||
|
(let* ((match (substring-no-properties (match-string 0)))
|
||||||
|
(match-data (match-data))
|
||||||
|
(id-part (logseq/--get-id-part match))
|
||||||
|
(node-struct (org-roam-node-from-id
|
||||||
|
(gethash id-part hmap))))
|
||||||
|
(set-match-data match-data)
|
||||||
|
(when node-struct
|
||||||
|
;; HACK: prevent org-roam-node-insert from reading a node from
|
||||||
|
;; user; instead just use our node
|
||||||
|
(replace-match "" nil nil)
|
||||||
|
(cl-letf (((symbol-function 'org-roam-node-read)
|
||||||
|
(lambda (&rest args) node-struct)))
|
||||||
|
;; ((symbol-function 'org-roam-node-formatted)
|
||||||
|
;; (lambda (node) (org-roam-node-title node))))
|
||||||
|
(org-roam-node-insert)))
|
||||||
|
(set-match-data match-data)
|
||||||
|
(unless node-struct
|
||||||
|
(replace-match (substring-no-properties
|
||||||
|
(or (match-string 1) "")) nil nil)
|
||||||
|
(message (format "No node found for %s" match))))
|
||||||
|
(goto-char 0))
|
||||||
|
(save-buffer))
|
||||||
|
|
||||||
|
(defun logseq/replace-with-org-links (graphdir hmap)
|
||||||
|
(dolist (file (directory-files-recursively graphdir "org$"))
|
||||||
|
(logseq/--replace-with-org-links-in-file file hmap)))
|
||||||
|
|
||||||
|
;; (let* ((file "~/scratchdir/logseq_main_/pages/linux/arch/installation.org")
|
||||||
|
;; (table (logseq/--convert-id-links-in-file file (make-hash-table))))
|
||||||
|
;; (org-roam-db-sync t)
|
||||||
|
;; (logseq/--replace-with-org-links-in-file file table))
|
||||||
|
|
||||||
|
(defun logseq/convert-links (graphdir)
|
||||||
|
(let ((table (logseq/convert-id-links graphdir))
|
||||||
|
(org-roam-directory graphdir))
|
||||||
|
(org-roam-db-sync t)
|
||||||
|
(logseq/replace-with-org-links graphdir table)))
|
4
delete
Executable file
4
delete
Executable file
|
@ -0,0 +1,4 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Delete 'collapsed:: '
|
||||||
|
sed -i 's/collapsed:: .*//' "$1"
|
20
headings
Executable file
20
headings
Executable file
|
@ -0,0 +1,20 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# Convert nested lists to #, ##, ###... so that pandoc's Org converter will turn
|
||||||
|
# them into nested headings.
|
||||||
|
|
||||||
|
perl -pi -e '
|
||||||
|
# Add an additional layer of nesting - all text in org files should be under a
|
||||||
|
# heading
|
||||||
|
s/^/ /;
|
||||||
|
# Discard any existing Markdown header syntax ("#" characters after the list bullet)
|
||||||
|
s/( *- )#* /$1/;
|
||||||
|
# For each level of indentation, add a "#"
|
||||||
|
s/ (?= *-)/#/g;
|
||||||
|
# Finally, remove list bullets
|
||||||
|
s/^(#*)-/$1/;
|
||||||
|
# Remove any Tab characters remaining (eg. they will still be present in code
|
||||||
|
# blocks)
|
||||||
|
s/^ +//' "$1"
|
||||||
|
|
||||||
|
# Add newlines between headings - apparently this is required by Markdown syntax
|
||||||
|
sed -i -E 's/^#/\'$'\n#/' "$1"
|
14
links
Executable file
14
links
Executable file
|
@ -0,0 +1,14 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# Convert Logseq link syntax into standard Markdown link syntax.
|
||||||
|
|
||||||
|
# Changing the path of links to the assets folder - if you don't know why you'd
|
||||||
|
# need this, then comment it out
|
||||||
|
sed -i 's/\[\.\.\/assets/\[assets/g' "$1"
|
||||||
|
|
||||||
|
# Convert '[[pagename]]' links
|
||||||
|
sed -E -i 's/\[\[(.*)\]\]/[\1](\1)/g' "$1"
|
||||||
|
|
||||||
|
# Convert '#pagename' links
|
||||||
|
sed -E -i '/```/,/```/ !s/ #([^ ]+)/ [\1](\1)/g' "$1"
|
||||||
|
# ('!' inverts range - everywhere except in code blocks. We need this because
|
||||||
|
# code comments may begin with '#')
|
16
logseq-migration
Executable file
16
logseq-migration
Executable file
|
@ -0,0 +1,16 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Main script, used to convert all the files in a directory to org files. Call
|
||||||
|
# it with the directory as argument: for example, `./logseq-migration pages`.
|
||||||
|
# This will create a copy of `pages` called `pages_`, and the conversion will be
|
||||||
|
# applied to all files in `pages`. Then it will copy `pages_` to `pages__`,
|
||||||
|
# which will serve as a backup for when you do further processing on `pages_`.
|
||||||
|
|
||||||
|
export scriptdir=~/bin/scripts/logseq-migration
|
||||||
|
|
||||||
|
rm -r "$1"_ "$1"__
|
||||||
|
cp -r "$1" "$1"_
|
||||||
|
"$scriptdir"/preproc "$1"_
|
||||||
|
"$scriptdir"/convert "$1"_
|
||||||
|
"$scriptdir"/postproc "$1"_
|
||||||
|
cp -r "$1"_ "$1"__
|
10
logseq-migration.el
Normal file
10
logseq-migration.el
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
;;; logseq-migration.el --- elisp processing of converted logseq graph -*- lexical-binding: t; -*-
|
||||||
|
|
||||||
|
(load-file "convert-links.el")
|
||||||
|
(load-file "remove-custom-ids.el")
|
||||||
|
(load-file "remove-logseq-property-entries.el")
|
||||||
|
(let ((graphdir "path/to/my/graph/pages_")) ;; change as needed
|
||||||
|
(logseq/remove-custom-ids graphdir)
|
||||||
|
(logseq/convert-links graphdir)
|
||||||
|
(logseq/remove-logseq-property-entries graphdir))
|
||||||
|
;; comment out the below line if you don't want to delete logseq properties
|
20
namespaces
Executable file
20
namespaces
Executable file
|
@ -0,0 +1,20 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# Represent Logseq's 'namespaces' by moving the page files into directories.
|
||||||
|
# For example, a page named 'a/b/c' in Logseq, whose file is named 'a___b___c',
|
||||||
|
# will stored as 'c' in the path 'a/b'.
|
||||||
|
|
||||||
|
# if the page is not under a namespace, will return the same filename
|
||||||
|
path=$(sed 's/___/\//g' <<<"$1")
|
||||||
|
|
||||||
|
dir=${path%/*.*}
|
||||||
|
|
||||||
|
# echo $PWD/$dir
|
||||||
|
# echo $PWD/$path
|
||||||
|
mkdir -p "$PWD/$dir"
|
||||||
|
|
||||||
|
old=$PWD/$1
|
||||||
|
new=$PWD/$path
|
||||||
|
|
||||||
|
# to avoid same-file errors, run `mv` only if the file would actually be moved
|
||||||
|
# (if it was not under a namespace, $old and $new are the same thing)
|
||||||
|
[[ $(realpath "$old") = "$(realpath "$new")" ]] || mv "$old" "$new"
|
4
pandoc-cmd
Executable file
4
pandoc-cmd
Executable file
|
@ -0,0 +1,4 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# Call pandoc on a .md file to produce a .org file
|
||||||
|
|
||||||
|
pandoc --wrap=none -f markdown -t org -o "${1%.*}.org" "$1"
|
5
postproc
Executable file
5
postproc
Executable file
|
@ -0,0 +1,5 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
scriptdir=~/bin/scripts/logseq-migration
|
||||||
|
|
||||||
|
find "${1:-.}" -wholename "*.org" -exec "$scriptdir"/add_title "$1" '{}' \;
|
6
preproc
Executable file
6
preproc
Executable file
|
@ -0,0 +1,6 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# Overall preprocessing of Logseq markdown before converting to org
|
||||||
|
|
||||||
|
scriptdir=~/bin/scripts/logseq-migration
|
||||||
|
|
||||||
|
find "${1:-.}" -wholename "*.md" -exec "$scriptdir"/properties '{}' \; -exec "$scriptdir"/backtick '{}' \; -exec "$scriptdir"/delete '{}' \; -exec "$scriptdir"/headings '{}' \; -exec "$scriptdir"/links '{}' \; -exec "$scriptdir"/namespaces '{}' \;
|
5
properties
Executable file
5
properties
Executable file
|
@ -0,0 +1,5 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
# Move any property into a block, otherwise the pandoc org parser merges them
|
||||||
|
# all into a single line for some reason
|
||||||
|
sed -E -i 's/^[a-z]+::.*$/- !property-deleteme!\n \0/' "$1"
|
7
remove-custom-ids.el
Normal file
7
remove-custom-ids.el
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
;;; remove-custom-ids.el --- Remove CUSTOM_ID property assigned by pandoc org parser -*- lexical-binding: t; -*-
|
||||||
|
|
||||||
|
(defun logseq/remove-custom-ids (graphdir)
|
||||||
|
(dolist (file (directory-files-recursively graphdir "org$"))
|
||||||
|
(with-temp-file file
|
||||||
|
(insert-file-contents file)
|
||||||
|
(org-delete-property-globally "CUSTOM_ID"))))
|
10
remove-logseq-property-entries.el
Normal file
10
remove-logseq-property-entries.el
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
;;; remove-logseq-property-entries.el --- Remove logseq property subtrees -*- lexical-binding: t; -*-
|
||||||
|
|
||||||
|
;; this will write to your kill ring!
|
||||||
|
(defun logseq/remove-logseq-property-entries (graphdir)
|
||||||
|
"In the 'properties' shell script, we moved each logseq property into a block
|
||||||
|
to prevent pandoc's org parser from messing them up. Now that we don't need the
|
||||||
|
properties anymore, we delete them."
|
||||||
|
(org-map-entries #'org-cut-subtree
|
||||||
|
"!property-deleteme!"
|
||||||
|
(directory-files-recursively graphdir "org$")))
|
Loading…
Reference in a new issue