(defpackage geiriadur-test (:use common-lisp)) (in-package geiriadur-test) (defparameter *orb* (CORBA:ORB_init)) (defun file-to-object (fn) (op:string_to_object *orb* (with-open-file (rd fn :direction :input) (read-line rd)))) (defvar *slovar* ()) (defvar *dict* ()) (defvar *master* ()) (defun connect-to-dictionary () (setf *master* (file-to-object "gweithdy.ior") *slovar* (file-to-object "geiriadur.ior") *dict* (file-to-object "geiriadur1.ior"))) (defvar *parts* #2A(("v." "г." "b." "v." "br.") ("n." "с." "e." "n." "ainm.") ("a." "п." "a." "a." "aid.") ("pron." "м." "rh." "pron." "for.") ("adv." "н." "adf." "adv." "dobhr.") ("num." "ч." "rhif." "num." "uimh.") ("praep." "пред." "ardd." "prep." "réamhfh.") ("art." "арт." "ban." "art." "alt") ("cj." "сз." "cs." "cj." "cón.") ("part." "част." "geir." "part." "cón.") ("praef." "прист." "rhagdd." "pref." "réimír"))) (defvar *welsh-letter-table* (make-hash-table :test #'equal)) (defvar *welsh-letters* '(("ch" . "cя") ("dd" . "dя") ("ff" . "fя") ("ng" . "gя") ("ll" . "lя") ("ph" . "pя") ("rh" . "rя") ("th" . "tя"))) (defvar *welsh-scanner* (cl-ppcre:create-scanner "ch|dd|ff|ng|ll|ph|rh|th")) (defun init () (dolist (elt *welsh-letters*) (setf (gethash (car elt) *welsh-letter-table*) (cdr elt))) (connect-to-dictionary)) (defun simplify (str) (map 'string (lambda (c) (case c (#\â #\a) (#\ê #\e) (#\î #\i) (#\ô #\o) (#\û #\u) (#\á #\a) (#\é #\e) (#\í #\i) (#\ó #\o) (#\ú #\u) (#\ä #\a) (#\ë #\e) (#\ï #\i) (#\ö #\o) (#\ü #\u) (#\à #\a) (#\è #\e) (#\ì #\i) (#\ò #\o) (#\ù #\u) (#\ŵ #\w) (#\ŷ #\y) (t c))) (string-downcase str))) (defun replacement (match &rest registers) (declare (ignore registers)) (gethash match *welsh-letter-table* match)) (defun welsh-abc (str) (cl-ppcre:regex-replace-all *welsh-scanner* str #'replacement :simple-calls t)) (defun welsh< (str1 str2) (string< (welsh-abc str1) (welsh-abc str2))) (defun get-available-words (fr to) (loop for w across (op:enumerate *slovar* fr to) collect (cons (op:id w) (simplify (op:value w))))) (defmethod print-object ((word dictionary:word) stream) (format stream "~a [~a] ~@[~a ~]~a" (op:value word) (op:pron word) (when (op:prt word) (aref *parts* (op:prt word) 0)) (substitute #\Space #\| (if (not (zerop (op:lang word))) (op:attr word) (cl-ppcre:regex-replace "\\|" (op:attr word) ""))))) (defun format-articles (stream fr to) (dolist (art (sort (get-available-words fr to) (if (= 1 fr) #'welsh< #'string<) :key #'cdr)) (let ((cur 0) (entry (svref (op:find *dict* (car art) to) 0)) (forms (when (> fr 0) (op:flist *master* (car art) t)))) (format stream "~&~%~a" (op:headword entry)) (when (and (< 0 (length forms)) (> 10 (length forms))) (map nil #'(lambda (f) (setf (op:prt f) nil) (format stream " {~a}" f)) forms)) (map nil #'(lambda (mean) (let ((us (floor (op:usuality mean) 10))) (format stream "~%~:[~8T~1*~;~6@a. ~]~@[~a ~]~{~a ~}" (when (/= cur us) (setf cur us)) us (when (< 10 (op:id (op:second mean))) (op:second mean)) (let ((notel (cl-ppcre:split "\\|" (op:note mean)))) (when (> 10 (op:id (op:second mean))) (setf (car notel) (string-trim "( )" (car notel)))) notel))) (map nil #'(lambda (sampla) (format stream "~%~12T~a ~:[=~;≈~] ~a" (op:first sampla) (char= #\► (schar (op:second sampla) 0)) (op:second sampla))) (op:usage mean))) (op:meanings entry)))))