init
This commit is contained in:
commit
35755f0b05
17 changed files with 297 additions and 0 deletions
36
README.md
Normal file
36
README.md
Normal file
|
@ -0,0 +1,36 @@
|
|||
This is a set of rough scripts that I used to help me convert my Logseq graph to org-roam. Use at your own risk, and please read the code before you use it.
|
||||
|
||||
As I no longer have any use for this code (having finished converting my Logseq graph), I will not be maintaining it in any way. If you need any improvements, you'll have to fork the repo and work on them yourself.
|
||||
|
||||
# Supported
|
||||
- conversion of Logseq's weird markdown dialect to org-mode: pandoc does most of the actual conversion, but various scripts are needed to massage Logseq's markdown into something it can understand
|
||||
- converting links: Logseq links/tags are converted to org-roam links; page aliases are supported
|
||||
|
||||
# Not Supported
|
||||
- queries and embeds
|
||||
- images and other file assets
|
||||
- journals (see Instructions below)
|
||||
- many other things i didn't think of, no doubt
|
||||
|
||||
# Requirements
|
||||
Tested with:
|
||||
|
||||
```
|
||||
pandoc 3.1.9
|
||||
emacs 29.1
|
||||
```
|
||||
In theory newer versions should work.
|
||||
|
||||
# Instructions
|
||||
- Clone this repo.
|
||||
- Backup your existing logseq graph folder, just in case.
|
||||
- These scripts are designed to run on the 'pages' folder of your graph; decide what you're going to do with your journals. I combined them all into a single page like this:
|
||||
``` sh
|
||||
for file in journals/*; do
|
||||
cat "$file" >> pages/journals.md
|
||||
done
|
||||
```
|
||||
- run the shell script `logseq-migration` on your graph's `pages` folder:
|
||||
`path/to/this/repo/logseq-migration pages`
|
||||
This will create a folder named `pages_` containing converted org-mode files; see the comments in `logseq-migration` for details.
|
||||
- in Emacs, run the code in `logseq-migration.el`. This will convert links and do some other post-processing.
|
9
add_title
Executable file
9
add_title
Executable file
|
@ -0,0 +1,9 @@
|
|||
#!/bin/bash
|
||||
|
||||
graphdir=$1
|
||||
pagepath=$(sed 's/^\///' <<<"${2#"$graphdir"}")
|
||||
title=${pagepath%.org}
|
||||
|
||||
echo "#+title: $title" > "$2".temp
|
||||
cat "$2" >> "$2".temp
|
||||
mv "$2".temp "$2"
|
20
backtick
Executable file
20
backtick
Executable file
|
@ -0,0 +1,20 @@
|
|||
#!/bin/bash
|
||||
# Enclose Logseq block ids and block embeds in backticks.
|
||||
# This is so pandoc will turn them into Org-mode 'verbatim' notation (eg.
|
||||
# =foo=), which we can then process with elisp.
|
||||
|
||||
# set -x
|
||||
IFS=$'\n'
|
||||
|
||||
# Match Logseq IDs (extended regex)
|
||||
id_eregex='[0-9a-f]{8}-([0-9a-f]{4}-){3}[0-9a-f]{12}'
|
||||
|
||||
# Match the targets of embed/query syntax (extended regex)
|
||||
target_eregex='([0-9a-f]|\(|\)|\[|\]|-)+'
|
||||
|
||||
# block ids
|
||||
sed -E -i 's/id:: '"$id_eregex"'/`\0`/' "$1"
|
||||
# embeds
|
||||
sed -E -i 's/\{\{ *embed '"$target_eregex"' *\}\}/`\0`/' "$1"
|
||||
# queries
|
||||
sed -i 's/{{ *query.*}}/`\0`/' "$1"
|
5
convert
Executable file
5
convert
Executable file
|
@ -0,0 +1,5 @@
|
|||
#!/bin/bash
|
||||
|
||||
DIR=~/bin/scripts/logseq-migration
|
||||
|
||||
find "${1:-.}" -wholename "*.md" -exec "$DIR"/pandoc-cmd '{}' \; -exec rm '{}' \;
|
106
convert-links.el
Executable file
106
convert-links.el
Executable file
|
@ -0,0 +1,106 @@
|
|||
;;; convert-links.el --- convert logseq links to org-id links -*- lexical-binding: t; -*-
|
||||
|
||||
(setq logseq/id-regexp "[0-9a-f]\\{8\\}-\\(?:[0-9a-f]\\{4\\}-\\)\\{3\\}[0-9a-f]\\{12\\}")
|
||||
(setq logseq/id-spec-regexp (concat "=id:: " logseq/id-regexp "=\\( \\|\n\\)"))
|
||||
(setq logseq/filelink-target-regexp "[^]]*")
|
||||
(setq logseq/alias-regexp "^alias:: \\(.*?\\)$")
|
||||
|
||||
(defun logseq/--get-id-part (match)
|
||||
;; check if it's an id link
|
||||
(if (string-match logseq/id-regexp match)
|
||||
(substring-no-properties (match-string 0 match))
|
||||
;; if not, check if it's a file link
|
||||
(when (string-match logseq/filelink-regexp match)
|
||||
(substring-no-properties (match-string 1 match)))))
|
||||
|
||||
(defun logseq/--convert-id-links-in-file (file hmap)
|
||||
"Generate org-ids in FILE, and return the association with their contexts.
|
||||
First, convert the file itself into an org-roam node; then, remove logseq
|
||||
'id::'s and convert the headlines they apply to into org-roam nodes (by
|
||||
assigning an org id).
|
||||
Add the associations to HMAP. For file nodes, associate the page title with
|
||||
the node's id. For headline nodes, associate the replaced logseq id with the
|
||||
node id."
|
||||
(find-file file)
|
||||
(goto-char 0)
|
||||
;; convert the file itself into a node
|
||||
(re-search-forward "#\\+title: \\(.*\\)$")
|
||||
(let* ((title (substring-no-properties (match-string 1)))
|
||||
(id (org-id-get-create)))
|
||||
(puthash title id hmap)
|
||||
(if (re-search-forward logseq/alias-regexp nil t)
|
||||
(let ((aliases (split-string (match-string-no-properties 1) ", *")))
|
||||
(message "%s" aliases)
|
||||
(dolist (alias aliases) (puthash alias id hmap))))
|
||||
;; search for logseq ids
|
||||
(while (re-search-forward logseq/id-spec-regexp nil t)
|
||||
(let ((match (substring-no-properties (match-string 0))))
|
||||
;; delete logseq id
|
||||
(replace-match "" nil nil)
|
||||
(let ((id-part (logseq/--get-id-part match)))
|
||||
;; key is the old logseq id; value is newly created org id for this entry
|
||||
(puthash id-part (org-id-get-create) hmap)))))
|
||||
(save-buffer)
|
||||
hmap)
|
||||
|
||||
(defun logseq/convert-id-links (graphdir)
|
||||
;; (eq "a" "a") -> nil; (eql "a" "a") -> nil; (equal "a" "a") -> t
|
||||
(let ((hmap (make-hash-table :test 'equal)))
|
||||
(dolist (file (directory-files-recursively graphdir "org$"))
|
||||
(logseq/--convert-id-links-in-file file hmap))
|
||||
hmap))
|
||||
|
||||
(setq logseq/embed-regexp (concat "={{ *\\(embed\\) " logseq/id-regexp " *}}="))
|
||||
(setq logseq/query-regexp (concat "={{ *\\(query\\) " logseq/id-regexp " *}}="))
|
||||
(setq logseq/link-regexp (concat "\\[\\[file:((" logseq/id-regexp "))\\]\\[\\(.*\\)\\]\\]"))
|
||||
(setq logseq/filelink-regexp
|
||||
(concat "\\[\\[file:\\(" logseq/filelink-target-regexp "\\)\\]\\]"))
|
||||
(setq logseq/blockref-regexp (concat "((" logseq/id-regexp "))"))
|
||||
|
||||
(defun logseq/--replace-with-org-links-in-file (file hmap)
|
||||
"Replace Logseq link syntax in FILE with org-id links, based on the
|
||||
associations in HMAP."
|
||||
(find-file file)
|
||||
(goto-char 0)
|
||||
(while (or (re-search-forward logseq/embed-regexp nil t)
|
||||
(re-search-forward logseq/query-regexp nil t)
|
||||
(re-search-forward logseq/link-regexp nil t)
|
||||
(re-search-forward logseq/filelink-regexp nil t)
|
||||
(re-search-forward logseq/blockref-regexp nil t))
|
||||
(let* ((match (substring-no-properties (match-string 0)))
|
||||
(match-data (match-data))
|
||||
(id-part (logseq/--get-id-part match))
|
||||
(node-struct (org-roam-node-from-id
|
||||
(gethash id-part hmap))))
|
||||
(set-match-data match-data)
|
||||
(when node-struct
|
||||
;; HACK: prevent org-roam-node-insert from reading a node from
|
||||
;; user; instead just use our node
|
||||
(replace-match "" nil nil)
|
||||
(cl-letf (((symbol-function 'org-roam-node-read)
|
||||
(lambda (&rest args) node-struct)))
|
||||
;; ((symbol-function 'org-roam-node-formatted)
|
||||
;; (lambda (node) (org-roam-node-title node))))
|
||||
(org-roam-node-insert)))
|
||||
(set-match-data match-data)
|
||||
(unless node-struct
|
||||
(replace-match (substring-no-properties
|
||||
(or (match-string 1) "")) nil nil)
|
||||
(message (format "No node found for %s" match))))
|
||||
(goto-char 0))
|
||||
(save-buffer))
|
||||
|
||||
(defun logseq/replace-with-org-links (graphdir hmap)
|
||||
(dolist (file (directory-files-recursively graphdir "org$"))
|
||||
(logseq/--replace-with-org-links-in-file file hmap)))
|
||||
|
||||
;; (let* ((file "~/scratchdir/logseq_main_/pages/linux/arch/installation.org")
|
||||
;; (table (logseq/--convert-id-links-in-file file (make-hash-table))))
|
||||
;; (org-roam-db-sync t)
|
||||
;; (logseq/--replace-with-org-links-in-file file table))
|
||||
|
||||
(defun logseq/convert-links (graphdir)
|
||||
(let ((table (logseq/convert-id-links graphdir))
|
||||
(org-roam-directory graphdir))
|
||||
(org-roam-db-sync t)
|
||||
(logseq/replace-with-org-links graphdir table)))
|
4
delete
Executable file
4
delete
Executable file
|
@ -0,0 +1,4 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Delete 'collapsed:: '
|
||||
sed -i 's/collapsed:: .*//' "$1"
|
20
headings
Executable file
20
headings
Executable file
|
@ -0,0 +1,20 @@
|
|||
#!/bin/bash
|
||||
# Convert nested lists to #, ##, ###... so that pandoc's Org converter will turn
|
||||
# them into nested headings.
|
||||
|
||||
perl -pi -e '
|
||||
# Add an additional layer of nesting - all text in org files should be under a
|
||||
# heading
|
||||
s/^/ /;
|
||||
# Discard any existing Markdown header syntax ("#" characters after the list bullet)
|
||||
s/( *- )#* /$1/;
|
||||
# For each level of indentation, add a "#"
|
||||
s/ (?= *-)/#/g;
|
||||
# Finally, remove list bullets
|
||||
s/^(#*)-/$1/;
|
||||
# Remove any Tab characters remaining (eg. they will still be present in code
|
||||
# blocks)
|
||||
s/^ +//' "$1"
|
||||
|
||||
# Add newlines between headings - apparently this is required by Markdown syntax
|
||||
sed -i -E 's/^#/\'$'\n#/' "$1"
|
14
links
Executable file
14
links
Executable file
|
@ -0,0 +1,14 @@
|
|||
#!/bin/bash
|
||||
# Convert Logseq link syntax into standard Markdown link syntax.
|
||||
|
||||
# Changing the path of links to the assets folder - if you don't know why you'd
|
||||
# need this, then comment it out
|
||||
sed -i 's/\[\.\.\/assets/\[assets/g' "$1"
|
||||
|
||||
# Convert '[[pagename]]' links
|
||||
sed -E -i 's/\[\[(.*)\]\]/[\1](\1)/g' "$1"
|
||||
|
||||
# Convert '#pagename' links
|
||||
sed -E -i '/```/,/```/ !s/ #([^ ]+)/ [\1](\1)/g' "$1"
|
||||
# ('!' inverts range - everywhere except in code blocks. We need this because
|
||||
# code comments may begin with '#')
|
16
logseq-migration
Executable file
16
logseq-migration
Executable file
|
@ -0,0 +1,16 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Main script, used to convert all the files in a directory to org files. Call
|
||||
# it with the directory as argument: for example, `./logseq-migration pages`.
|
||||
# This will create a copy of `pages` called `pages_`, and the conversion will be
|
||||
# applied to all files in `pages`. Then it will copy `pages_` to `pages__`,
|
||||
# which will serve as a backup for when you do further processing on `pages_`.
|
||||
|
||||
export scriptdir=~/bin/scripts/logseq-migration
|
||||
|
||||
rm -r "$1"_ "$1"__
|
||||
cp -r "$1" "$1"_
|
||||
"$scriptdir"/preproc "$1"_
|
||||
"$scriptdir"/convert "$1"_
|
||||
"$scriptdir"/postproc "$1"_
|
||||
cp -r "$1"_ "$1"__
|
10
logseq-migration.el
Normal file
10
logseq-migration.el
Normal file
|
@ -0,0 +1,10 @@
|
|||
;;; logseq-migration.el --- elisp processing of converted logseq graph -*- lexical-binding: t; -*-
|
||||
|
||||
(load-file "convert-links.el")
|
||||
(load-file "remove-custom-ids.el")
|
||||
(load-file "remove-logseq-property-entries.el")
|
||||
(let ((graphdir "path/to/my/graph/pages_")) ;; change as needed
|
||||
(logseq/remove-custom-ids graphdir)
|
||||
(logseq/convert-links graphdir)
|
||||
(logseq/remove-logseq-property-entries graphdir))
|
||||
;; comment out the below line if you don't want to delete logseq properties
|
20
namespaces
Executable file
20
namespaces
Executable file
|
@ -0,0 +1,20 @@
|
|||
#!/bin/bash
|
||||
# Represent Logseq's 'namespaces' by moving the page files into directories.
|
||||
# For example, a page named 'a/b/c' in Logseq, whose file is named 'a___b___c',
|
||||
# will stored as 'c' in the path 'a/b'.
|
||||
|
||||
# if the page is not under a namespace, will return the same filename
|
||||
path=$(sed 's/___/\//g' <<<"$1")
|
||||
|
||||
dir=${path%/*.*}
|
||||
|
||||
# echo $PWD/$dir
|
||||
# echo $PWD/$path
|
||||
mkdir -p "$PWD/$dir"
|
||||
|
||||
old=$PWD/$1
|
||||
new=$PWD/$path
|
||||
|
||||
# to avoid same-file errors, run `mv` only if the file would actually be moved
|
||||
# (if it was not under a namespace, $old and $new are the same thing)
|
||||
[[ $(realpath "$old") = "$(realpath "$new")" ]] || mv "$old" "$new"
|
4
pandoc-cmd
Executable file
4
pandoc-cmd
Executable file
|
@ -0,0 +1,4 @@
|
|||
#!/bin/bash
|
||||
# Call pandoc on a .md file to produce a .org file
|
||||
|
||||
pandoc --wrap=none -f markdown -t org -o "${1%.*}.org" "$1"
|
5
postproc
Executable file
5
postproc
Executable file
|
@ -0,0 +1,5 @@
|
|||
#!/bin/bash
|
||||
|
||||
scriptdir=~/bin/scripts/logseq-migration
|
||||
|
||||
find "${1:-.}" -wholename "*.org" -exec "$scriptdir"/add_title "$1" '{}' \;
|
6
preproc
Executable file
6
preproc
Executable file
|
@ -0,0 +1,6 @@
|
|||
#!/bin/bash
|
||||
# Overall preprocessing of Logseq markdown before converting to org
|
||||
|
||||
scriptdir=~/bin/scripts/logseq-migration
|
||||
|
||||
find "${1:-.}" -wholename "*.md" -exec "$scriptdir"/properties '{}' \; -exec "$scriptdir"/backtick '{}' \; -exec "$scriptdir"/delete '{}' \; -exec "$scriptdir"/headings '{}' \; -exec "$scriptdir"/links '{}' \; -exec "$scriptdir"/namespaces '{}' \;
|
5
properties
Executable file
5
properties
Executable file
|
@ -0,0 +1,5 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Move any property into a block, otherwise the pandoc org parser merges them
|
||||
# all into a single line for some reason
|
||||
sed -E -i 's/^[a-z]+::.*$/- !property-deleteme!\n \0/' "$1"
|
7
remove-custom-ids.el
Normal file
7
remove-custom-ids.el
Normal file
|
@ -0,0 +1,7 @@
|
|||
;;; remove-custom-ids.el --- Remove CUSTOM_ID property assigned by pandoc org parser -*- lexical-binding: t; -*-
|
||||
|
||||
(defun logseq/remove-custom-ids (graphdir)
|
||||
(dolist (file (directory-files-recursively graphdir "org$"))
|
||||
(with-temp-file file
|
||||
(insert-file-contents file)
|
||||
(org-delete-property-globally "CUSTOM_ID"))))
|
10
remove-logseq-property-entries.el
Normal file
10
remove-logseq-property-entries.el
Normal file
|
@ -0,0 +1,10 @@
|
|||
;;; remove-logseq-property-entries.el --- Remove logseq property subtrees -*- lexical-binding: t; -*-
|
||||
|
||||
;; this will write to your kill ring!
|
||||
(defun logseq/remove-logseq-property-entries (graphdir)
|
||||
"In the 'properties' shell script, we moved each logseq property into a block
|
||||
to prevent pandoc's org parser from messing them up. Now that we don't need the
|
||||
properties anymore, we delete them."
|
||||
(org-map-entries #'org-cut-subtree
|
||||
"!property-deleteme!"
|
||||
(directory-files-recursively graphdir "org$")))
|
Loading…
Reference in a new issue