(defpackage geiriadur-steel (:use common-lisp geiriadur)) (in-package geiriadur-steel) (defparameter *orb* (CORBA:ORB_init)) (defun file-to-object (fn) (op:string_to_object *orb* (with-open-file (rd fn :direction :input) (read-line rd)))) (defvar *dict* nil) (defvar *master* nil) (defvar *once* nil) (defvar *user* nil) (defvar *password*) (defun connect-to-dictionary () (setf *master* (file-to-object "gweithdy.ior") *dict* (file-to-object "geiriadur.ior"))) (defun init () (setf drakma:*drakma-default-external-format* :utf-8) (connect-to-dictionary)) (defun corba-call (fn &rest args) (handler-case (apply fn args) (omg.org/corba:object_not_exist () (progn (connect-to-dictionary) (apply fn args))) (omg.org/corba:comm_failure () (progn (connect-to-dictionary) (apply fn args))) (omg.org/corba:transient () (progn (connect-to-dictionary) (apply fn args))))) (defun add-writers-digest (args) (append args (list *user* (md5:md5sum-sequence (format nil "~a~a~a" *user* *once* *password*))))) (defun secure-corba-call (&rest fargus) (multiple-value-bind (ret once) (handler-case (apply #'corba-call (add-writers-digest fargus)) (dictionary:nopermission () (progn (setf *once* (corba-call #'op:once *master* *user*)) (apply #'corba-call (add-writers-digest fargus))))) (setf *once* once) ret)) (defun starling-transliteration (word) "почему-то не работает с буквой Ф" (with-output-to-string (s) (princ "[" s) (loop for w across word do (princ (case w (#\а #\a) (#\б #\b) (#\в #\v) (#\г #\g) (#\д #\d) (#\е "je") (#\ё "jo") (#\ж "zh") (#\з #\z) (#\и #\i) (#\й #\j) (#\к #\k) (#\л #\l) (#\м #\m) (#\н #\n) (#\о #\o) (#\п #\p) (#\р #\r) (#\с #\s) (#\т #\t) (#\у #\u) (#\ф #\f) (#\х "kh") (#\ц #\c) (#\ч "ch") (#\ш "sh") (#\щ "shh") (#\ъ #\`) (#\ы #\y) (#\ь #\') (#\э #\e) (#\ю "ju") (#\я "ja")) s)) (princ "]" s) s)) (defun neither-scripts-nor-empty-texts (node) (stp:filter-children #'(lambda (c) (not (typecase c (stp:element (member (stp:local-name c) '("script" "br") :test #'string=)) (stp:text (or (string= " " (stp:data c)) (string= " " (stp:data c))))))) node)) (defun dwyn (word) (split-sequence:split-sequence-if #'(lambda (n) (and (eq 'stp:element (type-of n)) (string= "hr" (stp:local-name n)))) (neither-scripts-nor-empty-texts (stp:nth-child 1 (stp:first-child (chtml:parse (substitute #\Space #\Dle (drakma:http-request "http://starling.rinet.ru/cgi-bin/morph.cgi" :parameters `(("word" . ,(format nil "[~a]" word))))) (stp:make-builder))))) :remove-empty-subseqs t)) (defun choose (nodes word part i) (let ((options (remove-if #'null (mapcar #'(lambda (n) (when (typecase n (stp:text t) (stp:element (and (string= "p" (stp:local-name n)) (stp:list-children n) (eq 'stp:text (type-of (stp:first-child n)))))) (multiple-value-bind (match regs) (cl-ppcre:scan-to-strings (format nil "([0-9]+\\. )*Исходная форма: ~a$" word) (cl-ppcre:regex-replace-all "\"|'" (stp:data (typecase n (stp:text n) (t (stp:first-child n)))) "")) (when match (cl-ppcre:register-groups-bind (p r) ("^Словарная информация:[ ]+([^ ]+) (.*)$" (stp:data (stp:first-child (stp:next-sibling n)))) (when (member p (nth part '(("нсв" "св-нсв" "св" "св," "нсв," "св-нсв,") ("м" "ж" "с" "мо" "жо" "мн." "мо//жо," "с//м," "мо-жо" "со" "м//ж," "жо//ж," "мо//м," "ж," "м//с," "м," "ж," "с," "мо," "жо," "ж//жо," "м//мо," ) ("п" "п,") ("мс" "мп" "мс-п") ("н") ("ч" "числ.-п" "числ.-п.") )) :test #'string=) (list (if (null (aref regs 0)) 1 (parse-integer (aref regs 0) :junk-allowed t)) (concatenate 'string p " " r) p))))))) (car nodes))))) (assert (not (null options))) (nth (min i (1- (length options))) options))) (defun genitive (attr pron) "иго, благо" (if (not (cl-ppcre:scan "Gen|Acc" attr)) pron (cl-ppcre:regex-replace "gə(sʲə)*$" pron "və\\1"))) (defun form-form (id text attr) (mapcar #'(lambda (t1) (let ((txt (cl-ppcre:regex-replace "([аэыоуяеиёю])['\"]" t1 (lambda (match reg1) (if (string= "е\"" match) "Ё" (string-upcase reg1))) :simple-calls t))) (dictionary:word :id 0 :lang 0 :prt id :attr attr :value (remove #\` (string-downcase txt)) :pron (genitive attr (remove #\* (geiriadur:transcribe txt)))))) (cl-ppcre:split ", |//" (string-trim " " text)))) (defun noun (id nodes) (when nodes (assert (= 1 (length nodes))) (assert (and (eq 'stp:element (type-of (car nodes))) (string= "table" (stp:local-name (car nodes))))) (let (forms) (dolist (row (cdr (stp:list-children (stp:first-child (car nodes))))) (let* ((cas (cdr (assoc (stp:data (stp:first-child (stp:first-child row))) '(("Nominative" . "Nom.") ("Genitive" . "Gen.") ("Genitive 2" . "Part.") ("Dative" . "Dat.") ("Accusative inanimate" . "Acc.|Inan.") ("Accusative animate" . "Acc.|Anim.") ("Instrumental" . "Ins.") ("Locative" . "Praep.") ("Locative 2" . "Loc.")) :test #'string=))) (cells (cdr (stp:list-children row))) (sg (remove-if #'(lambda (c) (zerop (stp:number-of-children c))) (butlast cells))) (pl (remove-if #'(lambda (c) (zerop (stp:number-of-children c))) (last cells)))) (assert (not (null cas))) (assert (> 2 (length sg))) (dolist (c sg) (dolist (f (form-form id (stp:data (stp:first-child c)) (concatenate 'string cas "|Sg."))) (push f forms))) (dolist (c pl) (dolist (f (form-form id (stp:data (stp:first-child c)) (concatenate 'string cas "|Pl."))) (push f forms))))) (nreverse forms)))) (defun adjective (id nodes &optional (abut "")) (when nodes (assert (> 4 (length nodes))) (assert (and (eq 'stp:element (type-of (car nodes))) (string= "table" (stp:local-name (car nodes))))) (let (forms) (dolist (row (cdr (stp:list-children (stp:first-child (car nodes))))) (let* ((cas (cdr (assoc (stp:data (stp:first-child (stp:first-child row))) '(("Nominative" . "Nom.") ("Genitive" . "Gen.") ("Genitive 2" . "Part.") ("Dative" . "Dat.") ("Accusative inanimate" . "Acc.|Inan.") ("Accusative animate" . "Acc.|Anim.") ("Instrumental" . "Ins.") ("Locative" . "Praep.") ("Locative 2" . "Loc.") ("Short form" . "Brev.")) :test #'string=))) (cells (cdr (stp:list-children row)))) (assert (not (null cas))) (assert (> 5 (length cells))) (map nil #'(lambda (c a) (when (not (zerop (stp:number-of-children c))) (dolist (f (form-form id (stp:data (stp:first-child c)) (concatenate 'string abut cas a))) (push f forms)))) cells '("|Sg.|Masc." "|Sg.|Fem." "|Sg.|Neut." "|Pl.")))) (when (third nodes) (dolist (f (form-form id (stp:data (third nodes)) (concatenate 'string abut "Comp."))) (push f forms))) (nreverse forms)))) (defun collect-positions-if (predicate seq &optional (start 0) positions) (let ((p (position-if predicate seq :start start))) (if (null p) (nreverse positions) (collect-positions-if predicate seq (1+ p) (push p positions))))) (defun preserving-split-if (predicate seq) (labels ((splitter (seq positions parts) (if (or (null (car positions)) (null (cadr positions))) (nreverse parts) (splitter seq (cdr positions) (push (subseq seq (car positions) (cadr positions)) parts))))) (splitter seq (sort (adjoin (length seq) (adjoin 0 (collect-positions-if predicate seq))) #'<) nil))) (defun verb (id nodes) (when nodes (let ((forms) (i+voices (preserving-split-if #'(lambda (n) (and (eq 'stp:element (type-of n)) (string= "h2" (stp:local-name n)))) nodes))) (assert (> 4 (length i+voices))) (assert (string= "Infinitive:" (stp:data (stp:first-child (caar i+voices))))) (dolist (f (form-form id (stp:data (second (car i+voices))) "Inf.")) (push f forms)) (dolist (voice (cdr i+voices)) (let ((voi (cdr (assoc (stp:data (stp:first-child (car voice))) '(("Active voice" . "") ("Passive voice" . "Pass.|")) :test #'string=)))) (assert (not (null voi))) (dolist (tense (preserving-split-if #'(lambda (n) (and (eq 'stp:element (type-of n)) (string= "h3" (stp:local-name n)))) (cdr voice))) (assert (member (stp:data (stp:first-child (car tense))) '("Present/future tense" "Past tense" "Imperative" "Present participle" "Past participle") :test #'string=)) (cond ((string= "Present/future tense" (stp:data (stp:first-child (car tense)))) (assert (> 5 (length tense))) (assert (and (eq 'stp:element (type-of (second tense))) (string= "table" (stp:local-name (second tense))))) (let ((time "Praes.")) (dolist (row (cdr (stp:list-children (stp:first-child (second tense))))) (let ((per (cdr (assoc (stp:data (stp:first-child (stp:first-child row))) '(("1 person" . "|1") ("2 person" . "|2") ("3 person" . "|3")) :test #'string=))) (cells (cdr (stp:list-children row)))) (assert (not (null per))) (assert (> 3 (length cells))) (map nil #'(lambda (c a) (when (not (zerop (stp:number-of-children c))) (dolist (f (form-form id (stp:data (stp:first-child c)) (concatenate 'string voi time per a))) (push f forms)))) cells '("|Sg." "|Pl.")))) (when (fourth tense) (dolist (f (form-form id (stp:data (fourth tense)) (concatenate 'string voi time "|Gerund."))) (push f forms))))) ((string= "Past tense" (stp:data (stp:first-child (car tense)))) (assert (> 5 (length tense))) (assert (and (eq 'stp:element (type-of (second tense))) (string= "table" (stp:local-name (second tense))))) (let ((time "Praet.")) (dolist (row (cdr (stp:list-children (stp:first-child (second tense))))) (let ((cells (stp:list-children row))) (assert (= 4 (length cells))) (map nil #'(lambda (c a) (when (not (zerop (stp:number-of-children c))) (dolist (f (form-form id (stp:data (stp:first-child c)) (concatenate 'string voi time a))) (push f forms)))) cells '("|Sg.|Masc." "|Sg.|Fem." "|Sg.|Neut." "|Pl.")))) (when (fourth tense) (dolist (f (form-form id (stp:data (fourth tense)) (concatenate 'string voi time "|Gerund."))) (push f forms))))) ((string= "Imperative" (stp:data (stp:first-child (car tense)))) (assert (> 3 (length tense))) (assert (and (eq 'stp:element (type-of (second tense))) (string= "table" (stp:local-name (second tense))))) (let ((time "Imp.")) (dolist (row (cdr (stp:list-children (stp:first-child (second tense))))) (let ((cells (stp:list-children row))) (assert (= 2 (length cells))) (map nil #'(lambda (c a) (when (not (zerop (stp:number-of-children c))) (dolist (f (form-form id (stp:data (stp:first-child c)) (concatenate 'string voi time a))) (push f forms)))) cells '("|Sg." "|Pl.")))))) ((string= "Present participle" (stp:data (stp:first-child (car tense)))) (assert (> 3 (length tense))) (assert (and (eq 'stp:element (type-of (second tense))) (string= "table" (stp:local-name (second tense))))) (let ((time "Particip.|")) (dolist (f (adjective id (cdr tense) (concatenate 'string voi time))) (push f forms)))) ((string= "Past participle" (stp:data (stp:first-child (car tense)))) (assert (> 3 (length tense))) (assert (and (eq 'stp:element (type-of (second tense))) (string= "table" (stp:local-name (second tense))))) (let ((time "Particip.II|")) (dolist (f (adjective id (cdr tense) (concatenate 'string voi time))) (push f forms)))) )))) (nreverse forms)))) (defun get-words (&optional (o 0) words) (let* ((i 2000) (w (corba-call #'op:list *master* :s 0 ".*" t 0 nil o i))) (loop for v across w do (push v words)) (if (> i (length w)) (sort words #'< :key #'op:id) (get-words (+ o i) words)))) (defun get-word (nodes w n &optional write) (let ((ch (choose nodes (op:value w) (op:prt w) n))) (when write (setf (op:attr w) (cl-ppcre:regex-replace "(\\|)*$" (op:attr w) (format nil "|" (cadr ch)))) (secure-corba-call #'op:edit *master* :s w) (loop for f across (corba-call #'op:flist *master* (op:id w) t) do (secure-corba-call #'op:destroy *master* :f (op:id f)))) (let ((forms (funcall (case (op:prt w) (0 #'verb) (1 #'noun) (2 #'adjective) (3 (cond ((string= "мс-п" (third ch)) #'adjective) ((string= "мс" (third ch)) #'noun) ((string= "мп" (third ch)) #'adjective) )) (5 (cond ((string= "ч" (third ch)) #'noun) ((search "числ.-п" (third ch)) #'adjective) )) ) (op:id w) (remove "variant:" (nth (car ch) nodes) :key #'(lambda (c) (if (not (eq 'stp:text (type-of c))) "" (stp:data c))) :test #'search)))) (when write (dolist (f forms) (secure-corba-call #'op:create *master* :f f nil))) (format t " ~a ~a" (cadr ch) (length forms))))) (defun dur (words part &key (err t) (start 0) (end most-positive-fixnum)) (let ((word "") (n 0) nodes) (dolist (w (remove-if-not #'(lambda (v) (and (< start (op:id v) end) (= part (op:prt v)))) words)) (format t "~a ~a" (op:id w) (op:value w)) (if (string= word (op:value w)) (incf n) (setf word (op:value w) nodes (dwyn (op:value w)) n 0)) (when (cdr nodes) (if err (get-word nodes w n) (handler-case (get-word nodes w n t) (condition (c) (format t " ~a" c))))) (format t "~%")))) (defun corba-violation () (dolist (d (clorb::io-descriptions-of clorb::*io-system*)) (clorb::connection-close (clorb::io-descriptor-connection d)))) (defun local-name (c) (when (eq 'stp:element (type-of c)) (stp:local-name c))) (defun delete-by-name (name c) (stp:delete-child name c :test #'string-equal :key #'local-name)) (defun irish (&optional (word "Aidbhint")) (let ((body (stp:nth-child 1 ; body (stp:first-child ; html (chtml:parse (remove #\Lf (remove #\Cr (drakma:http-request "http://193.1.97.44/scripts/focweb/Exe/focloir.exe" :parameters `(("VERBFORM" . "Dearfach") ("LANGUAGE" . "gaeilge") ("WORD" . ,word)) :method :post))) (stp:make-builder)))))) (assert (= 9 (stp:number-of-children body))) (let* ((dl (stp:nth-child 3 body)) (next nil) (len (or (stp:child-position-if #'(lambda (c) (and (eq 'stp:text (type-of (stp:first-child c))) (or (eql 0 (search "Is foirm de" (stp-data (stp:first-child c)))) (eql 0 (search "Tagann" (stp-data (stp:first-child c)))) (eql 0 (search "Níl an focal faighte" (stp-data (stp:first-child c))))))) dl) (stp:number-of-children dl)))) (when (> len 1) (assert (equal "dt" (local-name (stp:first-child dl)))) (do ((1st 0 2nd) (2nd (or (stp:child-position-if #'(lambda (c) (equal "dt" (local-name c))) dl :start 1 :end len) len) (or (when (> len 2nd) (stp:child-position-if #'(lambda (c) (equal "dt" (local-name c))) dl :start (1+ 2nd) :end len)) len))) ((= 1st len)) (let ((dt (stp:nth-child (+ 0 1st) dl))) (delete-by-name "a" dt) (stp:delete-child-if #'(lambda (c) (and (eq 'stp:text (type-of c)) (zerop (length (string-trim " " (stp-data c)))))) dt) (cond ((= 2 (stp:number-of-children dt)) (format t "~a ~a~%" (stp:data (stp:first-child (stp:nth-child 0 dt))) (stp:data (stp:nth-child 1 dt)))) ((and (= 1 (stp:number-of-children dt)) (eq 'stp:text (type-of (stp:first-child (stp:nth-child (+ 1 1st) dl)))) (or (eql 0 (search "=" (stp-data (stp:first-child (stp:nth-child (+ 1 1st) dl))))) (eql 0 (search "faoi" (stp-data (stp:first-child (stp:nth-child (+ 1 1st) dl))))))) nil) (t (assert (= 2 (stp:number-of-children dt))))) (loop for i from (+ 1 1st) below 2nd do (assert (equal "dd" (local-name (stp:nth-child i dl))))) ; может не быть определения? (let* ((fn (if (and (< 1 (stp:number-of-children (stp:nth-child (+ 1 1st) dl))) (equal "form" (local-name (stp:nth-child 1 (stp:nth-child (+ 1 1st) dl))))) (+ 1 1st) (+ 2 1st))) (form (stp:nth-child 1 (stp:nth-child fn dl)))) (assert (= 5 (stp:number-of-children form))) (setf next (string-trim " " (stp:attribute-value (stp:last-child form) "value")))))) (when (< len (stp:number-of-children dl)) (format t "~tIs foirm de~%"))) ;baininscneach = Fem. ;firinscneach = Masc. ;chéad, dara, tríú, cheathrú díochlaonadh = Decl.1,2,3,4 (when (null next) (cond ((string= word "caidhne") (setf next "caidhp")) ((string= word "comaoineach") (setf next "comard")) ((string= word "croisíní") (setf next "croite")) ((string= word "críostaí") (setf next "críostúil")) ((string= word "díorthach") (setf next "díorthú")) ((string= word "grafadh") (setf next "grafán")) ((string= word "gríobhán") (setf next "gríos")) ;((string= word "gríodán") (setf next "gríos")) ((string= word "guine") (setf next "guiséad")) ((string= word "gáirí") (setf next "gáitéar")) ((string= word "gúshnath") (setf next "gúta")) ((string= word "ina") (setf next "inbhear")) ((string= word "inár") (setf next "iníoctha")) ((string= word "lena") (setf next "leochaileach")) ((string= word "lúipin") (setf next "lúircín")) ((string= word "máistreás") (setf next "máistriúil")) ((string= word "neamhdhlisteanach") (setf next "neamhdhuine")) ((string= word "spéirbhean") (setf next "spéireata")) ((string= word "stráisiun") (setf next "stríoc")) ((string= word "séipéilin") (setf next "séire")) ((string= word "uaibhéallacht") (setf next "uaiféalta")) ((string= word "ule") (setf next "ullamh")) ((string= word "ín") (setf next "íobairt")) ((string= word "óráid") (setf next "óráidí")) (t (error "Níl")))) (when (string= word next) (cond ((string= next "ais-") (setf next "ais")) ((string= next "don") (setf next "dona")) ((string= next "dé") (setf next "déach")) ((string= next "glinne") (setf next "glinniúint")) ((string= next "lúide") (setf next "lúidín")) ((string= next "plás") (setf next "plástar")) ((string= next "raibh") (setf next "raibí")) ((string= next "túis") (setf next "túisce")) ((string= next "úth") (setf next nil)) (t (error next)))) next))) (defun gwyddeleg (&optional (word "Aidbhint")) (when word (gwyddeleg (irish word)))) (defun irish1 (&optional (word "Aidbhint")) (let ((body (stp:nth-child 1 ; body (stp:first-child ; html (chtml:parse (remove #\Lf (remove #\Cr (drakma:http-request "http://193.1.97.44/scripts/focweb/Exe/focloir.exe" :parameters `(("VERBFORM" . "Dearfach") ("LANGUAGE" . "gaeilge") ("WORD" . ,word)) :method :post))) (stp:make-builder)))))) (assert (= 9 (stp:number-of-children body))) (let ((dl (stp:nth-child 3 body))) dl)))