MediaWiki:Gadget-Fill Index.js

De Wikisource, la biblioteca libre.

Nota: Después de publicar, quizás necesite actualizar la caché de su navegador para ver los cambios.

  • Firefox/Safari: Mantenga presionada la tecla Shift mientras pulsa el botón Actualizar, o presiona Ctrl+F5 o Ctrl+R (⌘+R en Mac)
  • Google Chrome: presione Ctrl+Shift+R (⌘+Shift+R en Mac)
  • Internet Explorer/Edge: mantenga presionada Ctrl mientras pulsa Actualizar, o presione Ctrl+F5
  • Opera: Presiona Ctrl+F5.
/*
 * Author: w:fr:Phe
 *
 * Import the contents of the "Book" template from Commons into the Index
 * page fields at Wikisourceau
 *
 * Modified: 2020-11-10:    More robust template handling to deal with Faebot
 *                          uploads (Inductiveload)
 *           2020-11-27:    Some simple heuristics to improve IA metadata
 *           2021-04-03:    Supports authors set with {{creator|wikidata=Qxxxx}}
 */

/* eslint-disable camelcase, one-var, vars-on-top */

( function ( mw, $, Promise ) {
	'use strict';

	// var FillIndex = {
	// };

	function parse_template( text, template ) {
		//  non-english text in Commons is surrounded by {{es|1=}}, let's get rid of that
		text = text.replace(/{{[a-z][a-z][a-z]?\|(?:[ ]*1[ ]*=[ ]*)*[ ]*(.*?)}}/g, "$1");
		// find the start of the the template in the wikitext
		var re = new RegExp( '{{ *' + template + '[ \n]*\\|', 'i' ),
			index = text.search( re );

		// The template is not
		if ( index < 0 ) {
			return null;
		}
		// expresión regular de la muerte 
		var rexp = /{{ *Book\s*\|\s*([\w ]*=*)((?:[^{}\[\]\|]|{{[^}]+}}|\[\[[^\]]+\]\])*)(?=\||}})/i;
		var end = false;
		var params = {};
		while (!end) {
			var a1 = text.match(rexp);
			if (a1){
				var param_name = a1[1].replace("=",'').trim();
				param_name = param_name[0].toUpperCase() + param_name.slice(1);
				var param_content = a1[2].trim();
				params[param_name]=param_content;
				text = text.replace(rexp, '{{Book');
			}
			else{
				end=true;
			}
		}
		return params;
	}

	/*
	 * Converts text to title case.
	 *
	 * BOOK IV. THE INSTRUCTIONS OF I -> Book IV. The Instructions of I.
	 *
	 * Takes care of:
	 *   - all-caps roman numerals
	 *   - always title-cases the first words after .
	 *   - otherwise title-cases words except a list of exceptions like 'a', 'of'
	 */
	var toTitleCase = function ( str ) {

		var titler = function ( word ) {
			if ( word.length === 0 ) {
				return word;
			}

			return word.replace( word[ 0 ], word[ 0 ].toUpperCase() );
		};

		var all_capped = function ( word ) {
			// check for roman numerals (and "I"), maybe followed by punct
			return ( word.search( /^[ivxlcdm]+\b.$/ ) > -1 );
		};

		// if bookish title case, not all words are capped
		// also translatable
		var no_cap_words = ['en', 'y', 'de', 'la', 'el', 'las', 'los', 'un', 'una', 'unas', 'unos'
		];

		var words = str.toLowerCase().split( ' ' );

		var titled = [];

		var new_sentence = true;

		for ( var i = 0; i < words.length; i++ ) {

			if ( all_capped( words[ i ] ) ) {
				// some words are all caps always
				titled.push( words[ i ].toUpperCase() );

			} else if ( new_sentence || no_cap_words.indexOf( words[ i ] ) === -1 ) {
				// new sentences and most words get title casing
				titled.push( titler( words[ i ] ) );
			} else {
				// lower
				titled.push( words[ i ] );
			}

			new_sentence = words[ i ].search( /\.$/ ) !== -1;
		}

		return titled.join( ' ' );
	};

	var extract_dict = {},
		field_names = {};

	function setup_extract_dict() {
		extract_dict = self.fill_index_data.extract_dict;
		field_names = self.fill_index_data.field_names;
	}

	/**
	 * Set the appropriate input field
	 *
	 * @param {string} idx     the field index
	 * @param {string|Promise} content the new content, or a Promise that resolves it
	 */
	function set_field( idx, content ) {
		console.log(idx, content);
		// this resolves with either the raw value, or the resolution of the Promise
		// eslint-disable-next-line compat/compat
		Promise.resolve( content ).then( function ( content_value ) {
			content_value = content_value.replace( / ([;:,]) ?/, '$1 ' );

			// fix any sneaky double spaces
			content_value = content_value.replace( / +/g, ' ' );

			var field_name = field_names[ idx ],
				f = document.getElementsByName( 'wpprpindex-' + field_name )[ 0 ];

			if ( f ) {
				f.value = content_value;
			}
		} );
	}

	function get_wd_author( qid ) {

		// eslint-disable-next-line compat/compat
		return new Promise( function ( resolve, reject ) {
			$.ajax( {
				url: '//wikidata.org/w/api.php',
				data: {
					format: 'json',
					action: 'wbgetentities',
					ids: qid,
					props: 'sitelinks'
				},
				dataType: 'jsonp',
				cache: true,
				success: function ( data ) {
					var author = data.entities[ qid ].sitelinks[ mw.config.get( 'wgWikiID' ) ].title;
					console.log( author );
					resolve( author );
				},
				error: function ( error ) {
					reject( error );
				}
			} );
		} );
	}

	// returns a promise that resolves the author
	function process_author( str ) {
		str = str.replace( /^[*:][ ]*/, '' );
		str = str.trim();

		var author_promise;

		if ( str.match( /Q[0-9]+/ ) ) {

			author_promise = get_wd_author( str );
		} else {

			// strip dates - these are nearly always not needed
			str = str.replace( /(?:, )?(?:(?:ca\.|fl\.) )?(\(?\d+-\d+\)?).?$/, '' );

			// strip birth date
			str = str.replace( /(?:, )(?:b\.|d\.) +\d{3,4}$/, '' );

			// strip initial expansions
			str = str.replace( /(?:[A-Z]. ?)+ \((.*)\)/, '$1' );

			str = str.replace( /, (Sir|Lord)$/, '' );

			// Last, First -> First Last
			str = str.replace( /^([^,]+), ([^,]+)$/, '$2 $1' );

			// Fix initials without dots
			str = str.replace( / ([A-Z]) /g, ' $1. ' );

			// Fix bogus fullstops
			// str = str.replace(/(?<!Jr|Sr)\.$/, "");

			// just resolve right now
			author_promise = Promise.resolve( str );
		}

		return author_promise.then( function ( author ) {
			// Quitar "Autor:" 2 veces cuando se carga desde wikidata. 
			author = author.replace(self.fill_index_data.ns_author_name + ':', '');
			
			// prevent the pipe trick triggering on the JS
			// eslint-disable-next-line no-useless-concat
			return '[' + '[' + self.fill_index_data.ns_author_name + ':' + author + '|]]';
		} );
	}

	// returns a promise that resolves with the processed author list
	function process_authors( str ) {

		// strip creator templates:
		str = str.replace( /{{[ ]*[Cc]reator[ ]*:[ ]*(.*?)[ ]*}}/g, '$1\n' );

		// TODO: fix wikidata here
		str = str.replace( /{{[ ]*[Cc]reator[ ]*\|[ ]*[Ww]ikidata[ ]*=[ ]*(Q[0-9]*)}}/g, '$1\n' );

		var as = str.split( '\n' );

		as = as.filter( function ( s ) {
			return !!s.trim();
		} );

		// map array to promises
		var promises = as.map( function ( author ) {
			return process_author( author );
		} );

		// eslint-disable-next-line compat/compat
		return Promise.all( promises ).then( function ( results ) {
			var list = results.join( ', ' );
			// console.log( list );
			return list;
		} );
	}

	function split_city_publisher( str ) {

		// most books are published in a few cities
		// Edit 2024: Most of our books are published somewhere else
		var cities = [ 
					/Buenos[ \-]*Aires/, 
					/Santiago(?: de Chile)?/, /Valpara[íi]so/, 
					/Bogot[áa]/, 
					/(?:La )?Habana/, 
					/M[ée][xjg]ico(?: City|D[\. ]*F[\. ]*)?/, /Vera[ ]*Cruz/, 
					/Puebla/, /Guadalajara/,  /Monter+ey/, /Zacatecas/, 
					/Lima/, 
					/Montevideo/, 
					/Caracas/, 
					/Madrid/, /Barcelona/, /Murcia/, /Se[uv]illa/, /Valencia/, 
					/Tarragona/, /Zaragoza/, /C[áa]diz/, /M[áa]laga/, 

					/(?:Nueva|New) York/, /Lond(?:on|res)/, /Par[ií]s/, /Rom[ae]/,
				
				//Dejo estos igual por compatibilidad
				/London/, /Edinburgh/, /Oxford/, /Cambridge/,
				/New York/, /Boston/, /Philadelphia/, /Washington D. ?C./,
				/Paris/,
				/Berlin/, /Stuttgart/, /Jena/,
				/Hong Kong/, /Shanghai/,
				/Calcutta/, /Bombay/, /Delhi/ ],

			city = '',
			publisher = str,
			parts;
		// get rid of "no data"  claimsso
		str = str.replace(/\[[Ss]\. *[ln]\. *\] *:* */g, '');
		
		if ( str.indexOf( ':' ) > -1 ) {
			// a colon: assume this is a city: publisher
			parts = str.split( ':' );
			city = parts[ 0 ];
			publisher = parts.slice( 1 ).join( ':' );
		} else {

			parts = str.split( /[,;:] / );

			if ( parts.length > 1 ) {
				for ( var i = 0; i < cities.length; i++ ) {
					if ( parts[ 0 ].match( cities[ i ] ) ) {
						city = parts[ 0 ];
						publisher = parts.slice( 1 ).join( ', ' );
						break;
					}
				}
			}
		}

		return [ publisher.trim(), city.trim() ];
	}

	function processVolume( v ) {

		// first, strip off either : Foo or (Foo):
		var match = v.match( /(.*?) *(?:\((.*)\)|: *(.*))?$/ );

		var vol = v;
		var v_desc = '';
		if ( match ) {
			vol = match[ 1 ];
			v_desc = match[ 2 ];
		}

		// Add "Volume " if it looks like we need it
		vol = vol.replace( /^(?:(?:vol|v)\. ?)?([-0-9]+)$/i, 'Volumen $1' );

		return [ vol, v_desc ];
	}

	function processCity( c ) {
		c = c.replace( /\{\{ *City *\| *(.*?) *\}\}/i, '$1' );
		return c;
	}
	function processSource( s ) {
		s = s.replace( /\{\{ *BDH *\| *1* *=* *http:\/\/bdh.bne.es\/bnesearch\/detalle\/(.*?) *\}\}/i, '{{BDH|$1}}' );
		s = s.replace(/:*\{\{ *Internet Archive link *\| *(.*?) *\}\}.*/si, '{{IA|$1}}' );
		return s;
	}
	function extract_content( data ) {
		var importationDone = false;
		// until Object entries is allowed
		// eslint-disable-next-line no-jquery/no-each-util
		$.each( data.query.pages, function ( ids, page ) {
			if ( ids < 0 ) {
				return;
			}

			var content = page.revisions[ 0 ][ '*' ],
				temp_parsed = parse_template( content, 'Book' );

			if ( temp_parsed === null ) {
				console.error( 'Failed to parse Book template' );
			} else {
				console.log(temp_parsed);
				var title = temp_parsed[ extract_dict.Title ];

				if ( title ) {
					//title = toTitleCase( title );
					if ( title.indexOf( ':' ) > -1 ) {
						// a colon: assume this is a title: subtitle
						var parts = title.split( ':' );
						title = parts[ 0 ];
						var subtitle = parts.slice( 1 ).join( ': ' );
						
						set_field( 'Title', "[[" + title + "]]" );
						set_field( 'Subtitle', subtitle);
					} else {
						set_field( 'Title', "[[" + title + "]]" );
					}
				}

				for ( var idx in extract_dict ) {

					var template_content = '';

					if ( typeof extract_dict[ idx ] === 'string' ) {
						template_content = temp_parsed[ extract_dict[ idx ] ];
					} else {
						// find the first matching parameter
						for ( var i = 0; i < extract_dict[ idx ].length; i++ ) {

							template_content = temp_parsed[ extract_dict[ idx ][ i ] ];

							if ( template_content !== undefined && template_content.length > 0 ) {
								break;
							}
						}
					}
					if ( template_content !== undefined && template_content.length > 0 ) {
						switch ( idx ) {
							case 'Editor':
							case 'Author':
							case 'Translator':
							case 'Illustrator':
								set_field( idx, process_authors( template_content ) );
								break;
							case 'Publisher':
							// it is very common for the Commons publisher field
							// to contain the location
								var pub_city = split_city_publisher( template_content );

								if ( pub_city[ 1 ].length > 0 ) {
									set_field( 'Publisher', pub_city[ 0 ] );
									set_field( 'City', pub_city[ 1 ] );
								} else {
									set_field( 'Publisher', pub_city[ 0 ] );
								}
								break;
							case 'Volume':

								var v = processVolume( template_content );

								var v_field;
								if ( title !== undefined ) {
									v_field = '[[' + title +
									'/' + v[ 0 ] + '|' + v[ 0 ] + ']]';
								} else {
									// fallback
									v_field = v[ 0 ];
								}

								if ( v[ 1 ] ) {
									v_field += ' (' + v[ 1 ] + ')';
								}
								set_field( idx, v_field );
								break;
							case 'Title':
								break;
							case 'City':
								set_field( idx, processCity( template_content ) );
								break;
							case 'Source':
								set_field(idx, processSource(template_content));
								break;
							default:
								set_field( idx, template_content );
						}
					}
				}
			}

			// set the file type selector
	
			// set the sort key
			{
				var skTitle = temp_parsed[ extract_dict.Title ];
				if ( skTitle !== undefined ) {
					var titlewords = skTitle.split( ' ' );
					// titlewords is also translatable
					if ( [ 'De', 'La', 'El', 'Las', 'Los', 'Un', 'Una', 'Unas', 'Unos' ].indexOf( titlewords[ 0 ] ) >= 0 ) {
						skTitle = titlewords.slice( 1 ).join( ' ' ) + ', ' + titlewords[ 0 ];
						skTitle = skTitle[ 0 ].toUpperCase() + skTitle.slice( 1 );
						set_field( 'Key', skTitle );
					}
				}
			}
			importationDone = true;
		} );

		return importationDone;
	}

	function common_content( data ) {
		if ( !extract_content( data ) ) {
			$.ajax( {
				url: mw.util.wikiScript( 'api' ),
				data: {
					format: 'json',
					action: 'query',
					prop: 'revisions',
					rvprop: 'content',
					titles: 'File:' + mw.config.get( 'wgTitle' )
				}
			} )
				.done( extract_content );
		}
	}

	function do_extraction() {
		$.ajax( {
			url: '//commons.wikimedia.org/w/api.php',
			data: {
				format: 'json',
				action: 'query',
				prop: 'revisions',
				rvprop: 'content',
				titles: 'File:' + mw.config.get( 'wgTitle' )
			},
			dataType: 'jsonp'
		} )
			.done( common_content );
	}

	function setup() {
		setup_extract_dict();
		// eslint-disable-next-line no-jquery/no-global-selector
		// permito IP?
		if ( $( '.mw-newarticletext' ).length === 0 &&  $( '.mw-newarticletextanon' ).length === 0 ) {

			// Portlet link to re-extract
			var portlet = mw.util.addPortletLink(
				'p-tb',
				'#',
				'Re-fill index',
				't-refill-index',
				'Re-import this index page\'s data from the Commons file'
			);

			$( portlet ).on( 'click', function ( e ) {
				e.preventDefault();
				do_extraction();
			} );
		} else {
			do_extraction();
		}
	}

	/* Localisation section, you can provide your own data before loading this script to
 * change the script behavior
 */
	if ( !self.fill_index_data ) {
		self.fill_index_data = {};
	}

	if ( !self.fill_index_data.ns_author_name ) {
		self.fill_index_data.ns_author_name = 'Autor';
	}

	if ( !self.fill_index_data.extract_dict ) {
	// Commons Book template field names
	// Should not need to be internationalised
		self.fill_index_data.extract_dict = {
			Editor: 'Editor',
			Publisher: 'Publisher',
			Author: 'Author',
			Translator: 'Translator',
			Volume: 'Volume',
			Illustrator: 'Illustrator',
			'Image page': 'Image page',
			Title: 'Title',
			Date: [ 'Publication date', 'Date' ],
			City: 'City',
			Source: 'Source',
			LCCN: 'LCCN',
			OCLC: 'OCLC',
			Subtitle: 'Subtitle',
			Series: 'Series title',
			Printer: 'Printer',
			Wikidata: 'Wikidata'
		};
	}

	if ( !self.fill_index_data.field_names ) {
	// Proofread page field names
		self.fill_index_data.field_names = {
			Editor: 'Editor',
			Publisher: 'Editorial',
			Author: 'Autor',
			Translator: 'Traductor',
			Volume: 'Volumen',
			Illustrator: 'Ilustrador',
			'Image page': 'Imagen',
			Title: 'Titulo',
			Date: 'Ano',
			City: 'Lugar',
			Source: 'Fuente',
			Subtitle: 'Subtitulo',
			Series: 'Serie',
			Printer: 'Imprenta',
			Wikidata: 'Wikidata'
		};
	}
	/* end of localisation section */

	$( function () {
		if ( mw.config.get( 'wgCanonicalNamespace' ) === 'Index' && mw.config.get( 'wgAction' ) === 'edit' ) {
			setup();
		}
	} );
// eslint-disable-next-line no-undef
}( mediaWiki, jQuery, Promise ) );