#!/bin/sh # # By Aleksey Cheusov # The following text is encoded with cp1251 character set # gawk ' function usage (){ printf "%s", "\ Convert cyrillic text to latinica and vice versa\n\ using http://www.geocities.com/Athens/Forum/5344/RL/latinica-ref.html\n\ as a specification\n\ \n\ CP-1251 charset is used!!!\n\ \n\ Usage: latinica [OPTIONS] [files...]\n\ OPTIONS:\n\ -h --help display this screen\n\ -V --version display version\n\ -r --rus2lat translates from latinica to Russian\n\ -l --lat2rus translates from Russian to latinica (default)\n\ " } function show_version (){ print "Latinica 0.0.2" print "Written by Aleksey Cheusov " } BEGIN { mode_lat2rus = "1" # 2 for rus2lat for (i=1; i <= ARGC; ++i){ if (ARGV [i] == "-r" || ARGV [i] == "--rus2lat"){ mode_lat2rus = 2 ARGV [i] = "" }else if (ARGV [i] == "-l" || ARGV [i] == "--lat2rus"){ mode_lat2rus = 1 ARGV [i] = "" }else if (ARGV [i] == "-h" || ARGV [i] == "--help"){ usage() exit 0 }else if (ARGV [i] == "-V" || ARGV [i] == "--version"){ show_version() exit 0 } } mode = mode_lat2rus } function lat2rus (s) { ######################################## gsub(/E[\'"'"'`]|`E/, "Ý", s) gsub(/J[\'"'"'`]/, "É", s) gsub(/e[\'"'"'`]|`e/, "ý", s) gsub(/j[\'"'"'`]/, "é", s) ######################################## gsub(/S[Hh][Cc][hH]/, "Ù", s) gsub(/s[hH][cC][hH]/, "ù", s) ######################################## gsub(/[YJ][Oo]/, "¨", s) gsub(/[YJ][Uu]/, "Þ", s) gsub(/[YJ][Aa]/, "ß", s) gsub(/Z[Hh]/, "Æ", s) gsub(/C[Hh]/, "×", s) gsub(/S[Hh]/, "Ø", s) gsub(/[yj][oO]/, "¸", s) gsub(/[yj][uU]/, "þ", s) gsub(/[yj][aA]/, "ÿ", s) gsub(/z[hH]/, "æ", s) gsub(/c[hH]/, "÷", s) gsub(/s[hH]/, "ø", s) gsub(/J[Ee]/, "Å", s) gsub(/j[eE]/, "å", s) ######################################## gsub(/\^[@]/, "Ý", s) gsub(/\_[@]/, "ý", s) s = gensub(/([[:upper:][:lower:]]\?)[\'"'"']([[:lower:]])/, "\\1ü\\2", "g", s) s = gensub(/([[:upper:][:lower:]])[`]([[:lower:]])/, "\\1ü\\2", "g", s) s = gensub(/([[:upper:][:lower:]]\?)[~]([[:lower:]])/, "\\1ú\\2", "g", s) s = gensub(/([[:upper:][:lower:]]\?)[@]([[:lower:]])/, "\\1ý\\2", "g", s) s = gensub(/([[:lower:]])[\'"'"']/, "\\1ü", "g", s) s = gensub(/([[:lower:]])[`]/, "\\1ü", "g", s) s = gensub(/([[:lower:]])[~]/, "\\1ú", "g", s) s = gensub(/([[:lower:]])[@]/, "\\1ý", "g", s) s = gensub(/([[:upper:]])[\'"'"']/, "\\1Ü", "g", s) s = gensub(/([[:upper:]])[`]/, "\\1Ü", "g", s) s = gensub(/([[:upper:]])[~]/, "\\1Ú", "g", s) s = gensub(/([^\\])@/, "\\1Ý", "g", s) gsub(/^@/, "Ý", s) gsub(/\\@/, "@", s) gsub(/\^[\'"'"'`]/, "Ü", s) gsub(/\_[\'"'"'`]/, "ü", s) gsub(/\^[~]/, "Ú", s) gsub(/\_[~]/, "ú", s) gsub(/\\\x27/, "'"'"'", s) gsub(/\\`/, "`", s) gsub(/\\~/, "~", s) gsub(/\\\\/, "\\", s) ############################## uppercase gsub(/A/, "À", s) gsub(/B/, "Á", s) gsub(/V/, "Â", s) gsub(/G/, "Ã", s) gsub(/D/, "Ä", s) gsub(/E/, "Å", s) gsub(/Z/, "Ç", s) gsub(/I/, "È", s) gsub(/J/, "É", s) gsub(/K/, "Ê", s) gsub(/L/, "Ë", s) gsub(/M/, "Ì", s) gsub(/N/, "Í", s) gsub(/O/, "Î", s) gsub(/P/, "Ï", s) gsub(/R/, "Ð", s) gsub(/S/, "Ñ", s) gsub(/T/, "Ò", s) gsub(/U/, "Ó", s) gsub(/F/, "Ô", s) gsub(/[XH]/, "Õ", s) gsub(/C/, "Ö", s) gsub(/[WQ]/, "Ù", s) gsub(/Y/, "Û", s) ############################## lowercase gsub(/a/, "à", s) gsub(/b/, "á", s) gsub(/v/, "â", s) gsub(/g/, "ã", s) gsub(/d/, "ä", s) gsub(/e/, "å", s) gsub(/z/, "ç", s) gsub(/i/, "è", s) gsub(/j/, "é", s) gsub(/k/, "ê", s) gsub(/l/, "ë", s) gsub(/m/, "ì", s) gsub(/n/, "í", s) gsub(/o/, "î", s) gsub(/p/, "ï", s) gsub(/r/, "ð", s) gsub(/s/, "ñ", s) gsub(/t/, "ò", s) gsub(/u/, "ó", s) gsub(/f/, "ô", s) gsub(/[xh]/, "õ", s) gsub(/c/, "ö", s) gsub(/[wq]/, "ù", s) gsub(/y/, "û", s) return s } function rus2lat (s){ # special characters gsub(/[\\]/, "\\\\", s) gsub(/[\'"'"']/, "\\'"'"'", s) gsub(/~/, "\\~", s) gsub(/`/, "\\`", s) gsub(/@/, "\\@", s) # lowercase gsub(/à/, "a", s) gsub(/á/, "b", s) gsub(/â/, "v", s) gsub(/ã/, "g", s) gsub(/ä/, "d", s) gsub(/å/, "e", s) gsub(/¸/, "yo", s) gsub(/æ/, "zh", s) gsub(/ç/, "z", s) gsub(/è/, "i", s) gsub(/é/, "j'"'"'", s) gsub(/ê/, "k", s) gsub(/ë/, "l", s) gsub(/ì/, "m", s) gsub(/í/, "n", s) gsub(/î/, "o", s) gsub(/ï/, "p", s) gsub(/ð/, "r", s) gsub(/ñ/, "s", s) gsub(/ò/, "t", s) gsub(/ó/, "u", s) gsub(/ô/, "f", s) gsub(/õ/, "x", s) gsub(/ö/, "c", s) gsub(/÷/, "ch", s) gsub(/ø/, "sh", s) gsub(/ù/, "shch", s) gsub(/ú/, "~", s) gsub(/û/, "y", s) gsub(/ü/, "'"'"'", s) gsub(/ý/, "e'"'"'", s) gsub(/þ/, "yu", s) gsub(/ÿ/, "ya", s) # uppercase gsub(/À/, "A", s) gsub(/Á/, "B", s) gsub(/Â/, "V", s) gsub(/Ã/, "G", s) gsub(/Ä/, "D", s) gsub(/Å/, "E", s) gsub(/¨/, "YO", s) gsub(/Æ/, "ZH", s) gsub(/Ç/, "Z", s) gsub(/È/, "I", s) gsub(/É/, "J'"'"'", s) gsub(/Ê/, "K", s) gsub(/Ë/, "L", s) gsub(/Ì/, "M", s) gsub(/Í/, "N", s) gsub(/Î/, "O", s) gsub(/Ï/, "P", s) gsub(/Ð/, "R", s) gsub(/Ñ/, "S", s) gsub(/Ò/, "T", s) gsub(/Ó/, "U", s) gsub(/Ô/, "F", s) gsub(/Õ/, "X", s) gsub(/Ö/, "C", s) gsub(/×/, "CH", s) gsub(/Ø/, "SH", s) gsub(/Ù/, "SHCH", s) gsub(/Ú/, "~", s) gsub(/Û/, "Y", s) gsub(/Ü/, "'"'"'", s) gsub(/Ý/, "E'"'"'", s) gsub(/Þ/, "YU", s) gsub(/ß/, "YA", s) # uppercase return s } function trans (s){ if (mode == 1){ return lat2rus(s) }else if (mode_lat2rus == 2){ return rus2lat(s) }else{ return s } } mode_lat2rus == 1 { #### latinica to russian translation if (index($0, "\\ ")){ while (idx = index($0, "\\ ")){ printf "%s", trans(substr($0, 1, idx-1)) mode = mode_lat2rus - mode $0 = substr($0, idx + 2) } print trans($0) }else{ print trans($0) } next } mode_lat2rus == 2 { #### russian to latinica translation if (match($0, /[qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM]+/)){ while (idx = match($0, /[qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM]+/)){ printf "%s\\ ", trans(substr($0, 1, idx-1)) mode = mode_lat2rus - mode printf "%s\\ ", trans(substr($0, idx, RLENGTH)) $0 = substr($0, idx + RLENGTH) } print trans($0) }else{ print trans($0) } next } { print "It'"'"'s impossible" > "/dev/stderr" exit 1 }' "$@"