Usuario:Ignacio Rodríguez/TemplateScript.js
(Redirigido desde «Usuario:Ninovolador/TemplateScript.js»)
Nota: Después de publicar, quizás necesite actualizar la caché de su navegador para ver los cambios.
- Firefox/Safari: Mantenga presionada la tecla Shift mientras pulsa el botón Actualizar, o presiona Ctrl+F5 o Ctrl+R (⌘+R en Mac)
- Google Chrome: presione Ctrl+Shift+R (⌘+Shift+R en Mac)
- Internet Explorer/Edge: mantenga presionada Ctrl mientras pulsa Actualizar, o presione Ctrl+F5
- Opera: Presiona Ctrl+F5.
/*
This page defines a TemplateScript library. It's not meant to be referenced
directly. See [[Wikisource:TemplateScript]] for usage.
*/
/* global $, pathoschild */
/**
* TemplateScript adds configurable templates and scripts to the sidebar, and adds an example regex editor.
* @see https://meta.wikimedia.org/wiki/TemplateScript
* @update-token [[File:Pathoschild/templatescript.js]]
*/
// <nowiki>
var addPageHeader;
var addPageFooter;
mw.loader.load('//tools-static.wmflabs.org/meta/scripts/i18n/es.js');
$.ajax('//tools-static.wmflabs.org/meta/scripts/pathoschild.templatescript.js', { dataType:'script', cache:true }).then(function() {
/*********
** Define library
*********/
pathoschild.TemplateScript.library.define({
key: 'wikisource.proofreading',
name: 'Herramientas de transcripción',
url: '//es.wikisource.org/wiki/Wikisource:TemplateScript',
description: 'Una serie de herramientas para facilitar la <a href="/wiki/Ayuda:Páginas de transcripción">transcripción en el espacio de nombres <tt>Página:</tt></a> Incluye herramientas para mejorar el OCR, agregar encabezados, y otras herramientas de formato.',
categories: [
{
name: 'Herramientas de transcripción',
scripts: [
{ key: 'add-header', name: 'Agregar encabezado', scriptUrl:'Special:MyPage/titulos.js', script: function(editor) { addPageHeader(editor); }, forNamespaces: 'page', accessKey:'3' },
{ key: 'add-footer', name: 'Agregar pie de página', scriptUrl:'Special:MyPage/titulos.js', script: function(editor) { addPageFooter(editor); }, forNamespaces: 'page', accessKey:'4' },
{ key: 'ocr', name: 'OCR (reconocimiento automático de texto)', script: function(editor) { do_hocr(); }, forNamespaces: 'page', accessKey:'1' },
{ key: 'cleanup-ocr', name: 'Limpiar OCR', script: function(editor) { pageCleanup(editor); }, forNamespaces: 'page', accessKey:'2' },
{ key: 'make-refs', name: 'Hacer referencias', script: function(editor) { makeReference(editor); }, forNamespaces: 'page', accessKey:'h' },
{ key: 'smallcaps', name: 'A versalitas', script: function(editor) { smallcaps(editor); }, forNamespaces: 'page', accessKey:'+' },
{ key: 'uppercase', name: 'A mayúsculas', script: function(editor) { upper(editor); }, forNamespaces: 'page' },
{ key: 'lowercase', name: 'A minúsculas', script: function(editor) { lower(editor); }, forNamespaces: 'page', accessKey:'-' },
{ key: 'bold', name: 'Negrita', script: function(editor) { bold(editor); }, forNamespaces: 'page', accessKey:'n' },
{ key: 'italics', name: 'Cursiva', script: function(editor) { italics(editor); }, forNamespaces: 'page', accessKey:'i' },
{ key: 'ortho-old', name: 'Ortografía antigua', script: function(editor) { orthoOld(editor); }, forNamespaces: 'page' }
]
}
]
});
/*********
** Page context
*********/
var state = {
initialised: false, // whether the page context has been initialised
page: {
number: null, // the djvu page number extracted from the URL
proofed: null
},
specialFormats: [] // work-specific header template formats
};
/*********
** Private methods
*********/
/**
* Initialise the data needed by the page tools.
*/
var _initialise = function() {
// only initialise once
//if(state.initialised)
// return;
//state.initialised = true;
// get page metadata
var pn = /\.(?:djvu|pdf)\/([0-9]+)/g.exec($("#firstHeading").html());
var pq = document.getElementById('pagequality');
state.page = {
number: pn !== null ? parseInt(pn[1], 10) : null,
proofed: pq && pq.getAttribute('class') && pq.getAttribute('class').match(/quality0|quality[2-4]/)
};
// get user-defined work formats
// expected format:
// {
// title: /History of England /,
// evenHeader: '{{rh|...}}',
// oddHeader: '{{rh|...}}',
// footer: '',
// footerWithReferences: '{{smallrefs}}'
// }
if(window.specialFormats) state.specialFormats = window.specialFormats.concat(state.specialFormats);
};
/**
* Convert the text to title case based on English rules.
* @param {string} text The text to convert.
*/
var _titlecase = function(text) {
// split text into individual words and examine them one by one
var words = text.toLowerCase().split(" ");
$.each(words, function(i, word) {
switch(word) {
case "a":
case "á":
case "e":
case "é":
case "i":
case "y":
case "o":
case "ó":
case "u":
case "el":
case "la":
case "los":
case "las":
case "un":
case "una":
case "unos":
case "de":
case "del":
return; // don't capitalise articles, "to" as part of an infinitive, prepositions or short conjunctions
default: // capitalise everything else
// capitalise words in parentheses
if (word.substring(0, 1) == '(' ) {
words[i] = word.substring(0, 2).toUpperCase() + word.substring(2, words[i].length);
return;
}
words[i] = word.substring(0, 1).toUpperCase() + word.substring(1, words[i].length);
return;
}
});
// capitalise first word regardless
words[0] = words[0].substring(0, 1).toUpperCase() + words[0].substring(1, words[0].length);
//
// capitalise last word regardless
// var last = words.length-1;
// words[last] = words[last].substring(0, 1).toUpperCase() + words[last].substring(1, words[last].length);
// reconstruct title
return words.join(' ');
};
/*********
** Script methods
*********/
/**
* Add a {{running header}} template to the page.
* @param {object} editor The script helpers for the page.
*/
function splitPagina(tpp) {
var testo = ["", "", ""];
testo[0] = tpp.substring(0, tpp.indexOf("</noinclude>") + 12);
testo[2] = tpp.substring(tpp.lastIndexOf("<noinclude>"));
testo[1] = tpp.substring(testo[0].length, tpp.length - testo[2].length);
return testo;
}
addPageHeader = function(editor) {
// pageName=mw.config.get("wgPageName");
// pageName=pageName.replace(/_/g," ");
pageName=$("#firstHeadingTitle").html(); // obtenerlo directamente del HTML para compatibilidad con EIS
var pagine={};
var t=(/(.+\/)(.+)$/).exec(pageName);
var t1=t[2]*1-1;
var t2=t[2]*1-2;
var p_cor=pageName;
var p_prec1=t[1]+t1;
var p_prec2=t[1]+t2;
var api = new mw.Api();
api.get( {
action: 'query',
prop: 'revisions',
titles:p_prec2+"|"+p_prec1,
rvprop:"content"
} ).done( function ( data ) {
$.each(data.query.pages, function(index,value) {
if (data.query.pages[index].missing!==undefined)
pagine[data.query.pages[index].title]=["","",""];
else
pagine[data.query.pages[index].title]=splitPagina(data.query.pages[index].revisions[0]["*"]);
});
cab1Prev = pagine[p_prec1][0];
cab1Prev = $.trim(cab1Prev.replace(/<noinclude\><pagequality.+?>/,"").replace("</noinclude>",""));
cab2Prev = pagine[p_prec2][0];
cab2Prev = $.trim(cab2Prev.replace(/<noinclude\><pagequality.+?>/,"").replace("</noinclude>",""));
var cab = cab2Prev.length == 0 ? cab1Prev : cab2Prev;
var q = cab2Prev.length == 0 ? 1 : 2;
try {
console.log(cab + " "+cab.length+" "+q);
num = cab.length >0 && cab.match(/(\d+)/)[0];
nuevoCab = cab.replace(/\d+/,num*1+q);
$("#wpHeaderTextbox").val(nuevoCab);
}
catch (error) {
$("#wpHeaderTextbox").val(cab);
}
});
};
/**
* Clean up OCR errors in the text, and push <noinclude> content at the top
* & bottom of the page into the header & footer boxes respectively.
* @param {object} editor The script helpers for the page.
*/
var pageCleanup = function(editor) {
_initialise();
// push <noinclude> content at the top & bottom into the header & footer
if (editor.get().match(/^<noinclude\>/)) {
var text = editor.get();
var e = text.indexOf("</noinclude>");
$('#wpHeaderTextbox').val(function(i, val) {
return $.trim(val + "\n" + text.substr(11, e-11).replace(/^\s+|\s+$/g, ''));
});
editor.set(text.substr(e+12));
}
if (editor.get().match(/<\/noinclude\>$/)) {
var text = editor.get();
var s = text.lastIndexOf("<noinclude>");
$('#wpFooterTextbox').val(function(i, val) {
return $.trim(text.substr(s+11, text.length-s-11-12).replace(/^\s+|\s+$/g, '') + "\n" + val);
});
editor.set(text.substr(0, s));
}
// clean up text
editor
// remove trailing spaces at the end of each line
.replace(/ +\n/g, '\n')
// remove trailing whitespace preceding a hard line break
.replace(/ +<br *\/?>/g, '<br />')
// remove trailing whitespace and numerals at the end of page text
// (numerals are nearly always page numbers in the footer)
.replace(/[\s\d]+$/g, '')
.replace(/^[\s\d]+/g, '')
// quitar restos de OCR de Google
.replace(/Digitized.{8,15}$/g, '')
// quitar Biblioteca Nacional de España
.replace(/(© )?Biblioteca Nacional de España/g, '')
// remove trailing spaces at the end of refs
.replace(/ +<\/ref>/g, '</ref>')
// remove trailing spaces at the end of template calls
.replace(/ +}}/g, '}}')
// convert double-hyphen to mdash (avoiding breaking HTML comment syntax)
.replace(/([^\!])--([^>])/g, '$1—$2')
// remove spacing around mdash, but only if it has spaces on both sides
// (we don't want to remove the trailing space from "...as follows:— ",
// bearing in mind that the space will already be gone if at end of line).
.replace(/ +— +/g, '—')
// aparecer "guiones suaves"
//.replace(/\u00AD/g, '-')
// join words that are hyphenated across a line break
// (but leave "|-" table syntax alone)
.replace(/([^\|])[\-¬\u00AD]\n/g, '$1');
// clean up pages if they don't have <poem>
if (!editor.contains('<poem>')) {
editor
// lines that start with " should probably be new lines,
// if the previous line ends in punctuation,
// other than a comma or semicolon
// and let's get rid of trailing space while we're at it
.replace(/([^\n\w,;])\n\" */g, '$1\n\n"')
// nueva línea y abre comillas (o cierra en algunos casos)
// probablemente sea continuación de las comillas, excepto
// si precede un punto (creo que de eso se encarga la expresión)
// anterior, pero mejor estar seguros.
.replace(/([^\n\.:])\n["«»]/g, '$1 ')
// lines that end with " should probably precede a new line,
// unless preceded by a comma,
// or unless the new line starts with a lower-case letter;
// and let's get rid of preceding space while we're at it
.replace(/([^,])\ *\"\n([^a-z\n])/g, '$1"\n\n$2')
//nueva línea y emdash probablemente es un diálogo y debería ser un nuevo párrafo
.replace(/([^\n])\n—/g, '$1\n\n—')
// remove single line breaks; preserve multiple.
// but not if there's a tag, template or table syntax either side of the line break
.replace(/([^>}\|\n])\n([^:#\*<{\|\n])/g, '$1 $2')
// collapse sequences of spaces into a single space
.replace(/ +/g, ' ');
}
// more page cleanup
editor
// dump spurious hard breaks at the end of paragraphs
.replace(/<br *\/?>\n\n/g, '\n\n')
//caracteres fantasma
.replace(/[�]/g, '')
// remove unwanted spaces around punctuation marks
.replace(/ ([;:\?!,\.])/g, '$1')
// unicodify
.replace(/—/g, '—')
.replace(/–/g, '–')
.replace(/"/g, '"')
// straighten quotes and apostrophes.
.replace(/[“”]/g, '"')
.replace(/[‘’`]/g, '\'')
//OCR fixes
// números
.replace(/([a-zA-ZáéíoúüÁÉÍÓU])1([a-zA-ZáéíoúüÁÉÍÓU])/g, '$1l$2')
// .replace(/([a-zA-ZáéíoúüÁÉÍÓU])1/g, '$1l')
// .replace(/([a-zA-ZáéíoúüÁÉÍÓU])0/g, '$1o')
.replace(/([a-zA-ZáéíoúüÁÉÍÓU])0([a-zA-ZáéíoúüÁÉÍÓU])/g, '$1o$2')
.replace(/([a-zA-ZáéíoúüÁÉÍÓU])[58~]([a-zA-ZáéíoúüÁÉÍÓU])/g, '$1s$2')
.replace(/([a-zA-ZáéíoúüÁÉÍÓU])6/g, '$1ó')
.replace(/([a-zA-ZáéíoúüÁÉÍÓU])0/g, '$1o')
.replace(/([a-zA-ZáéíoúüÁÉÍÓU])6([a-zA-ZáéíoúüÁÉÍÓU])/g, '$1é$2')
.replace(/!([a-záéíóú])/g, 'l$1')
.replace(/([a-záéíóú])l·([a-záéíóú])/g, '$1r$2')
.replace(/_/g, '—')
//.replace(/ \(l/g, ' d')
.replace(/([^IVXLCDM])I\)/g, '$1P') //defensa contra números romanos
.replace(/([a-záéíóú])U/g, '$1u')
//partículas y consonantongos que no existen, o casi no existen en español
//.replace(/(!)c([.,;:!?}{\]\[\|\s]|$)/g, 'e$1') // c final --> e
.replace(/aiia/g, 'ana')
.replace(/abn([.,;:!?}{\]\[\|\s]|$)/g, 'aba$1')
.replace(/aiit/g, 'ant')
.replace(/ hl([aeiouáéíóú])/g, 'bl$1')
//.replace(/hr/g, 'br')
.replace(/bn([^eiou ,.\n])/g, 'bu$1')
.replace(/cb/g, 'ch')
.replace(/Cb/g, 'Ch')
.replace(/chn([.,;:!?}{\]\[\|\s]|$)/g, 'cha$1')
.replace(/([ae])iit([eo])/g,'$1nt$2')
.replace(/([ae])ute(s?[.,;:!?}{\]\[\|\s]|$)/g, '$1nte$2')
.replace(/([scgp])nn/g,'$1un')
//.replace(/clas([.,;:!?}{\]\[\|\s]|$)/g,'das$1')
.replace(/dn([ds])([.,;:!?}{\]\[\|\s]|$)/g,'da$1$2')
.replace(/ rlic/g, ' dic')
.replace(/ [rc]lil/g, ' dil')
.replace(/clel/g, 'del')
.replace(/ cles/g, ' des')
.replace(/ clis/g, ' dis')
.replace(/clici/g, 'dici')
.replace(/clig/g, 'dig')
.replace(/clv/g, 'dv')
.replace(/cou/g, 'con')
.replace(/ lr/g, ' h')
.replace(/ hn/g, ' hu')
.replace(/nii/g, 'mi')
.replace(/ iia/g, ' na')
.replace(/iinp/g, 'imp')
.replace(/inip/g, 'imp')
.replace(/inb/g, 'mb')
.replace(/cne/g, 'cue')
.replace(/cleb/g, 'deb')
.replace(/snf/g, 'suf')
.replace(/ln([dn])/g, 'lu$1')
.replace(/rn([pb])/g, 'm$1')
.replace(/ iuc/g, ' inc')
.replace(/ llc/g, ' lle')
.replace(/ inu(?!b)/g, ' mu') // inubicable
.replace(/ rn/g, ' m')
.replace(/rrn/g, 'rm')
.replace(/mh/g, 'mb')
.replace(/nibr/g, 'mbr')
.replace(/mcn/g, 'men')
.replace(/iiip/g, 'mp')
.replace(/inp/g, 'mp')
.replace(/enip(?!o)/g, 'emp')
.replace(/onip/g, 'omp')
.replace(/ nne/g, ' nue')
.replace(/clia/g, 'cha')
.replace(/ lia/g, ' ha')
.replace(/([^um])irn([^o])/g, '$1im$2')
.replace(/([ae])iid/g, '$1nd')
.replace(/((?!eu|Eu).{2})cli([jrd])/g, '$1di$2')
.replace(/clist/g, 'dist')
.replace(/cnto([.,;:!?}{\]\[\|\s]|$)/g,'ento$1')
.replace(/nndo([.,;:!?}{\]\[\|\s]|$)/g,'ando$1')
.replace(/ii([.,;:!?}{\]\[\|\s]|$)/g,'n$1')
.replace(/cl([.,;:!?}{\]\[\|\s]|$)/g,'d$1')
.replace(/fc/g, 'fe')
.replace(/gnl/g,'gul')
.replace(/jni/g,'jui')
.replace(/lln([.,;:!?}{\]\[\|\s]|$)/g,'lla$1')
.replace(/nlt/g,'nst')
.replace(/iic/g,'nc')
.replace(/oncli/g,'ondi')
.replace(/pcr/g,'per')
.replace(/pne/g,'pue')
.replace(/oue/g,'one')
.replace(/([rl])cs([.,;:!?}{\]\[\|\s]|$)/g,'$1es$2')
.replace(/rcg/g,'reg')
.replace(/([^eé])tn/g, '$1tu')
.replace(/ sne/g,' sue')//etnia, étnico
.replace(/tc([lm])/g,'te$1')
.replace(/tiir/g,'tur')
// .replace(/urne/g,'ume')
.replace(/ Jni/g,' Uni')
.replace(/ vne/g,' vue')
.replace(/xol/g,'xcl')
.replace(/ gn/g,' gu')
.replace(/dn([oa])/g,'du$1')
.replace(/ oo/g,' co')
.replace(/([n])eoe/g,'$1ece')
.replace(/ncs/g,'nes')
.replace(/i\//g,'y')
.replace(/ i[)>]/g,' p')
.replace(/(?<=[a-z] )Ja (?![Jj]a)/g, 'la ') // evitar modificar Ja ja ja
//fi --> ñ
.replace(/ afio/g,' año')
.replace(/ afiad/g,' añad')
.replace(/ifia/g,'iña')
.replace(/compafi/g, 'compañ')
.replace(/([Ee])spa[fi]ia/g,'$1spaña')
.replace(/([Ee])spa[fi]iol/g,'$1spañol')
.replace(/afier/g,'añer')
.replace(/tafia/g,'taña')
.replace(/([Ss])efia([lrb])/g,'$1eña$2')
.replace(/([Ss])efior/g,'$1eñor')
.replace(/efio([ .,\n])/g,'eño$1')
.replace(/mafio([ .,\n])/g,'maño$1')
.replace(/trafio/g,'traño')
.replace(/empefi(?=o|a)/g, 'empeñ')
// tl —————> d (exepto en casos como atlas, y palabras aztecas)
.replace(/otl/g,'od')
.replace(/utl/g,'ud')
.replace(/tlo([.,;:!?}{\]\[\|\s]|$)/g,'do$1')
.replace(/itla/g,'ida')
// OCR a español (gracias Aleator)
.replace(/([Qq])(?:ll|n|li)/g, '$1u')
.replace(/([Qq])(?:o|a|tl|u)([eié])/g, '$1u$2')
//.replace(/([Qq])(?:o|a|tl|u)á/g, '$1ué')
.replace(/([Qq])[aou][cers]/g, '$1ue')
.replace(/([Qq])lll/g, '$1ue')
.replace(/d([A-ZÁÉÍÓÚ])/g,'¿$1')
.replace(/ d([qcdhp])/g,' ¿$1')
.replace(/([a-zó])[P]"/g,'$1?"')
.replace(/([a-zó])[P]([ ,\n])/g,'$1?$2')
.replace(/ DO /g,' no ')
.replace(/ UTI /g,' un ')
.replace(/ UD /g,' un ')
.replace(/ UDA /g,' una ')
.replace(/ u n /g,' un ')
.replace(/ n /g,' a ')
.replace(/ d /g,' el ')
.replace(/ im /g, ' un ')
.replace(/ ima /g, ' una ')
.replace(/ tari /g, ' tan ')
.replace(/ ([Aa])sl /g,' $sí ')
.replace(/ \'[lI]\'/g, ' T')
.replace(/]([a-záéíóú])/g,'l$1')
.replace(/fi/g,'fi')
.replace(/fl/g,'fl')
.replace(/ £[1l] /g,' El ')
.replace(/((?!para).{4})noi([ao])/g,'$1nci$2')
.replace(/au([.,;:!?}{\]\[\|\s]|$)/g, 'an$1')
.replace(/aha([.,;:!?}{\]\[\|\s]|$)/g, 'aba$1')
.replace(/ahan([.,;:!?}{\]\[\|\s]|$)/g, 'aban$1')
.replace(/ (a[iïíìl!1I]g[anou][nu])/g, ' algun')
.replace(/(ag[un]a)/g, 'agua')
.replace(/ afio /g, ' año ')
.replace(/ ao/g, ' au')
.replace(/hle([.,;:!?}{\]\[\|\s]|$)/g, 'ble$1')
.replace(/ oen/g, ' cen')
.replace(/c[iïíìl!1I][56d][un]([.,;:!?}{\]\[\|\s]|$)/g, 'ción$1')
.replace(/oio[un]/g, 'cion ')
.replace(/oió[un]/g, 'ción ')
.replace(/(c[iïíìl!1I][56d][ns][ce][8s] )/g, 'ciones ')
.replace(/( co[ir]no )/g, ' como ')
.replace(/( co[un][iïíìl!1I]o )/g, ' como ')
.replace(/( eomo )/g, ' como ')
.replace(/( [oce][op]n )/g, ' con ')
.replace(/cb/g, 'ch')
.replace(/ Kn/g, 'En')
.replace(/c[ïl!1I]o([.,;:!?}{\]\[\|\s]|$)/g, 'cio$1')
.replace(/dn([.,;:!?}{\]\[\|\s]|$)/g, 'da$1')
.replace(/tlar([.,;:!?}{\]\[\|\s]|$)/g, 'dar$1')
.replace(/tla([.,;:!?}{\]\[\|\s]|$)/g, 'da$1')
.replace(/( [tc][li]e )/g, ' de ')
.replace(/( d[ocs] )/g, ' de ')
.replace(/( ¿c )/g, ' de ')
.replace(/( c[li][ce]l )/g, ' del ')
.replace(/( d[ce][iïíìl!1I] )/g, ' del ')
.replace(/(d[ce][s8])/g, 'des')
.replace(/(d[ïì1I])/g, 'di')
.replace(/([BE][iïíìl!1I] )/g, 'El ')
.replace(/(EUa )/g, 'Ella ')
.replace(/ eua /g, ' ella')
.replace(/( [ce][iïíìl!1I] )/g, ' el ')
.replace(/( [ce][iïíìl!1I][iïíìl!1I]a)/g, ' ella')
.replace(/( [ce][iïíìl!1I][iïíìl!1I]o)/g, ' ello')
.replace(/(eU)/g, 'ell')
.replace(/cm(?!\.)/g, 'em')
.replace(/[BE][nqu] /g, 'En ')
.replace(/ cnal/g, ' cual')
.replace(/ cn/g, ' en')
.replace(/(?!cudo)[ce][un]do([.,;:!?}{\]\[\|\s]|$)/g, 'endo$1') // escudo, zancudo, picudo...`
.replace(/ [ce][un] /g, ' en ')
.replace(/[ce]n[lt] /g, 'ent ')
.replace(/[ce][un][lt]o[un][ce][ce]s/g, 'entonces')
.replace(/E[un][lt]o[un][ce][ce]s/g, 'Entonces')
.replace(/ [ce]n[it]r[ce]/g, ' entre')
.replace(/ [ce][as] /g, ' es ')
.replace(/[ce][8s][ce]ri/g, 'escri')
.replace(/ [ce][s8][lt]([aáà])([ t])/g, ' est$1$2')
.replace(/ [ce]t[ce]([.,;:!?}{\]\[\|\s]|$)/g, ' etc$1')
.replace(/ é[8s][tl]([aoe])/g, ' ést$1')
.replace(/exlr/g, 'extr')
.replace(/(g[iïíìl!1I][6é]s)/g, 'glés')
.replace(/(?!ma)(..)g[nu]é/g, '$1gué') //con excepción para magnético
.replace(/gn[ce]/g, 'gne')
.replace(/gu[ce]/g, 'gue')
.replace(/ha[nu] /g, 'han ')
.replace(/ ba /g, ' ha ')
.replace(/ h[ns]b/g, ' hab')
.replace(/ H[ns]b/g, ' Hab')
.replace(/ hah/g, ' hab')
.replace(/ Hah/g, ' Hab')
.replace(/[bh][oe]mbr[ce]/g, 'hombre')
.replace(/horn([^eoi])/g, 'hom$1')
.replace(/ (?!in|Ia)[iïíìl!1I][na] /g, ' la ')
.replace(/ [iïíìl!1I][na][a8s] /g, ' las ')
.replace(/ [iïíìl!1I]o /g, ' lo ')
.replace(/ [iïíìl!1I][oO0][a8s] /g, ' los ')
.replace(/lloa([.,;:!?}{\]\[\|\s]|$)/g, 'llos$1')
.replace(/llaa([.,;:!?}{\]\[\|\s]|$)/g, 'llas$1')
.replace(/loient[ec]([.,;:!?}{\]\[\|\s]|$)/g, 'lmente$1')
.replace(/m[ce][un]t[ce]([.,;:!?}{\]\[\|\s]|$)/g, 'mente$1')
.replace(/m[ce]ot[ce]([.,;:!?}{\]\[\|\s]|$)/g, 'mente$1')
.replace(/(?!te|ve|po|.r|vi|Te|Po)(..)ni[ce]nt[ce]([.,;:!?}{\]\[\|\s]|$)/g, '$1mente$2') //excepción para teniente, poniente, conveniente, interviniente, concerniente y similares
.replace(/ mne/g, ' mue')
.replace(/ misino([.,;:!?}{\]\[\|\s]|$)/g, ' mismo$1')
.replace(/(Q[anou][ce])/g, 'Que')
.replace(/([çq][anou][ce])/g, 'que')
.replace(/([çq]ii[ce])/g, 'que')
.replace(/q[a-záéíóú]e/g, 'que')
.replace(/q[a-záéíóú]é/g, 'qué')
.replace(/([çq][anou][óé])/g, 'qué')
.replace(/ [sB][ec] /g, ' se ')
.replace(/ sn /g, ' su ')
.replace(/ tau /g, ' tan ')
.replace(/trn([.,;:!?}{\]\[\|\s]|$)/g, 'tra$1')
.replace(/ TJ([a-z])/g, ' U$1')
.replace(/ [un][un] /g, ' un ')
.replace(/ [un](?:[un]|ii)a /g, ' una ')
.replace(/ o[un]a /g, ' una ')
.replace(/ \'?"?[lv] /g, ' y ')
.replace(/(•)/g, '.')
//.replace(/[lt!]'(?!')/g, 'r') -- demasiados falsos positivos
.replace(/i([A-ZÁÉÍÓÚ])/g,'¡$1')
//paréntesis en palabras
.replace(/\(\)/g,'o')
.replace(/\(\'\)/g,'ó')
.replace(/l\)([a-záéíóú])/g,'b$1')
.replace(/\(\(/g,'«')
.replace(/\)\)/g,'»')
.replace(/\(pi/g,'qu')
.replace(/\(]/g,'q')
.replace(/([^\]])]\)/g,'$1p')
//espacio después de punto o coma
.replace(/(\.)([A-ZÁÉÍÓÚ])/g,'$1 $2')
.replace(/([,;:!\?])([a-záéíóú])/g,'$1 $2')
//.replace(/([a-záéíóú][a-záéíóú][a-záéíóú])\. ([a-záéíóú][a-záéíóú])/g,'$1, $2') demasiados falsos positivos con abreviaturas
.replace(/([a-záéíóú])\'([a-záéíóú])/g,'$1 $2')
.replace(/\.-/g, '.—')
.replace(/\^ /g, ', ')
//.replace(/ P /g, '? ')
.replace(/[.,][.,][.,]/g, '...')
.replace(/([¿¡]) /g, '$1')
.replace(/\( /g, '(')
.replace(/ \)/g, ')');
};
/**
* As you work your way through the page, when you encounter a reference, just mark it with <ref></ref> tags and continue.
* Once you've got to the end of the page and proofed the references, simply highlight each reference in turn,
* and use this function to move it to its proper position.
* @param {object} editor The script helpers for the page.
*/
var makeReference = function(editor) {
_initialise();
var editbox = $('#wpTextbox1').get(0);
editbox.focus();
var refStart = editbox.selectionStart;
var refEnd = editbox.selectionEnd;
var firstref = editbox.value.indexOf('<ref></ref>');
if (firstref != -1) {
editbox.value = editbox.value.slice(0,firstref+5)
+ editbox.value.slice(refStart, refEnd)
+ editbox.value.slice(firstref+5, refStart)
+ editbox.value.slice(refEnd);
}
addPageFooter(editor);
};
/**
* Insert formatted references into the footer box if needed.
* @param {object} editor The script helpers for the page.
*/
addPageFooter = function(editor) {
_initialise();
var editbox = $('#wpTextbox1').get(0);
var footval = $('#wpFooterTextbox').val();
var footerbox = '';
var group = editbox.value.match(/<ref *group *= *["']*([^\s]+)["']* *>/); //
var endtable = editbox.value.match(/\n\|-\s*$/); //si termina con una tabla, agregar arreglos para cerrarla correctamente
if (editbox.value.indexOf("<ref") == -1 && editbox.value.indexOf("{{#tag:ref") == -1) {
}
else if (footval.indexOf("{{listaref") == -1) {
if (endtable){
footerbox = "{{npt}}\n|}";
}
footerbox += '\n{{listaref}}';
if (group){
footerbox += '\n{{listaref|group='+group[1]+'}}';
}
}
var final = footval + footerbox;
$('#wpFooterTextbox').val(final.trim());
};
/**
* Mark the selected text with {{sc}}. If the text is uppercase, it will be converted to titlecase.
* @param {object} editor The script helpers for the page.
*/
var smallcaps = function(editor) {
_initialise();
editor.replaceSelection(function(text) {
// Applying small-caps to all-caps text is pointless...
// ... unless the all-caps is OCR of text that is actually small-caps.
// Check if text is all-caps, and if it is, convert it to title case before applying small-caps.
if (text == text.toUpperCase())
text = _titlecase(text);
return '{{may|' + text + '}}';
});
};
/**
* Convert the text to uppercase.
* @param {object} editor The script helpers for the page.
*/
var upper = function(editor) {
_initialise();
editor.replaceSelection(function(text) {
return text.toUpperCase();
});
};
var lower = function(editor) {
_initialise();
editor.replaceSelection(function(text) {
return text.toLowerCase();
});
};
var bold = function(editor) {
_initialise();
editor.replaceSelection(function(text) {
return "'''"+text+"'''";
});
};
var italics = function(editor) {
_initialise();
editor.replaceSelection(function(text) {
return "''"+text+"''";
});
};
/*jshint boss:true*/
/*global $, mw*/
/*
* Query an ocr for a given Page:, first try to get the hocr text layer as it's available
* for most book, fast and of a better quality. If it fails, try the older and slower
* ocr method. hocr fail around 1/5000 books. ocr should never fails as it use the image
* visible on the Page:.
*/
var lang = mw.config.get( 'wgContentLanguage' );
function disable_input(set)
{
if (set) {
$(document).keyup(function(e) {
if (e.which == 27) { disable_input(false); }
});
}
set ? $('#wsOcr1').off('click') : $('#wsOcr1').on('click', do_hocr);
$('#wpTextbox1').prop('disabled', set);
}
function ocr_callback(data) {
if (data.error) {
alert(data.text);
} else {
// Checking if tb is disabled is required with chrome as ESC doesn't kill
// the query.
var tb = document.getElementById("wpTextbox1");
if (tb.disabled)
tb.value = data.text;
}
disable_input(false);
}
function hocr_callback(data) {
// Fallback to old OCR when data.text doesn’t contain XML to workaround T228594
if ( data.error || data.text.substring(0,5)!="<?xml" ) {
// Fallback to the slow way.
disable_input(false);
do_ocr();
return;
} else {
// Checking if tb is disabled is required with chrome as ESC doesn't kill
// the query.
var tb = document.getElementById("wpTextbox1");
if (tb.disabled) {
localStorage.ws_hOCR = data.text;
var text = $(data.text).text();
// Ugly as hell.
text = text.replace(/^ +/mg, '')
.replace(/\n{4,}/g, '@_@_@_@')
.replace(/\n{2,}/g, '____SPACE____')
.replace(/\n/g, ' ')
.replace(/____SPACE____/g, '\n')
.replace(/@_@_@_@/g, '\n\n');
tb.value = $.trim(text);
}
}
disable_input(false);
}
function do_hocr() {
disable_input(true);
var request_url = '//phetools.toolforge.org//hocr_cgi.py?cmd=hocr&book='
+ encodeURIComponent(mw.config.get('wgTitle')) + '&lang=' + lang + '&user=' + mw.config.get('wgUserName');
$.getJSON(request_url).done(hocr_callback).fail(do_ocr);
}
function do_ocr() {
if ($( '.prp-page-image img' ).length) {
disable_input(true);
// server side can't use protocol relative url, request it as https:
var url_image = 'https:' + $( '.prp-page-image img' ).attr('src');
var request_url = "//phetools.toolforge.org/ocr.php?cmd=ocr&url="+url_image+"&lang="+lang+"&user="+mw.config.get('wgUserName');
$.getJSON( request_url ).done( ocr_callback );
}
}
function orthoOld(editor) {
editor.replace(/ a /g,' á ')
.replace(/ o /g,' ó ')
.replace(/ e /g,' é ')
.replace(/ión\b/g,'ion')
;
}
});
// </nowiki>