git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Pavel Volek <Pavel.Volek@ensimag.imag.fr>
To: git@vger.kernel.org
Cc: Volek Pavel <me@pavelvolek.cz>,
	Pavel Volek <Pavel.Volek@ensimag.imag.fr>,
	NGUYEN Kim Thuat <Kim-Thuat.Nguyen@ensimag.imag.fr>,
	ROUCHER IGLESIAS Javier <roucherj@ensimag.imag.fr>,
	Matthieu Moy <Matthieu.Moy@imag.fr>
Subject: [PATCHv2 2/2] git-remote-mediawiki: refactoring get_mw_pages function
Date: Mon, 11 Jun 2012 21:29:05 +0200	[thread overview]
Message-ID: <1339442945-8561-2-git-send-email-Pavel.Volek@ensimag.imag.fr> (raw)
In-Reply-To: <1339442945-8561-1-git-send-email-Pavel.Volek@ensimag.imag.fr>

From: Volek Pavel <me@pavelvolek.cz>

Splits the code in the get_mw_pages function into three separate functions.
One for getting list of all pages and all file attachments, second for pages
in category specified in configuration file and files related to these pages
and the last function to get from MW a list of specified pages with related
file attachments.

Signed-off-by: Pavel Volek <Pavel.Volek@ensimag.imag.fr>
Signed-off-by: NGUYEN Kim Thuat <Kim-Thuat.Nguyen@ensimag.imag.fr>
Signed-off-by: ROUCHER IGLESIAS Javier <roucherj@ensimag.imag.fr>
Signed-off-by: Matthieu Moy <Matthieu.Moy@imag.fr>
---
 contrib/mw-to-git/git-remote-mediawiki | 144 ++++++++++++++++++---------------
 1 file changed, 79 insertions(+), 65 deletions(-)

diff --git a/contrib/mw-to-git/git-remote-mediawiki b/contrib/mw-to-git/git-remote-mediawiki
index 14008ad..c0c0df7 100755
--- a/contrib/mw-to-git/git-remote-mediawiki
+++ b/contrib/mw-to-git/git-remote-mediawiki
@@ -212,91 +212,105 @@ sub get_mw_pages {
 	my $user_defined;
 	if (@tracked_pages) {
 		$user_defined = 1;
-		# The user provided a list of pages titles, but we
-		# still need to query the API to get the page IDs.
-
-		my @some_pages = @tracked_pages;
-		while (@some_pages) {
-			my $last = 50;
-			if ($#some_pages < $last) {
-				$last = $#some_pages;
-			}
-			my @slice = @some_pages[0..$last];
-			get_mw_first_pages(\@slice, \%pages);
-			@some_pages = @some_pages[51..$#some_pages];
-		}
-
-		# Get pages of related media files.
-		if ($import_media) {
-			get_mw_pages_for_linked_mediafiles(\@tracked_pages, \%pages);
-		}
+		get_mw_tracked_pages(\%pages);
 	}
 	if (@tracked_categories) {
 		$user_defined = 1;
-		foreach my $category (@tracked_categories) {
-			if (index($category, ':') < 0) {
-				# Mediawiki requires the Category
-				# prefix, but let's not force the user
-				# to specify it.
-				$category = "Category:" . $category;
-			}
-			my $mw_pages = $mediawiki->list( {
-				action => 'query',
-				list => 'categorymembers',
-				cmtitle => $category,
-				cmlimit => 'max' } )
-			    || die $mediawiki->{error}->{code} . ': ' . $mediawiki->{error}->{details};
-			foreach my $page (@{$mw_pages}) {
-				$pages{$page->{title}} = $page;
-			}
-
-			# Get pages of related media files.
-			if ($import_media) {
-				my @titles = map $_->{title}, @{$mw_pages};
-				get_mw_pages_for_linked_mediafiles(\@titles, \%pages);
-			}
-		}
+		get_mw_tracked_categories(\%pages);
 	}
 	if (!$user_defined) {
-		# No user-provided list, get the list of pages from
-		# the API.
+		get_mw_all_pages(\%pages);
+	}
+	return values(%pages);
+}
+
+sub get_mw_all_pages {
+	my $pages = shift;
+	# No user-provided list, get the list of pages from the API.
+	my $mw_pages = $mediawiki->list({
+		action => 'query',
+		list => 'allpages',
+		aplimit => 500
+	});
+	if (!defined($mw_pages)) {
+		print STDERR "fatal: could not get the list of wiki pages.\n";
+		print STDERR "fatal: '$url' does not appear to be a mediawiki\n";
+		print STDERR "fatal: make sure '$url/api.php' is a valid page.\n";
+		exit 1;
+	}
+	foreach my $page (@{$mw_pages}) {
+		$pages->{$page->{title}} = $page;
+	}
+
+	if ($import_media) {
+		# Attach list of all pages for meadia files from the API,
+		# they are in a different namespace, only one namespace
+		# can be queried at the same moment
 		my $mw_pages = $mediawiki->list({
 			action => 'query',
 			list => 'allpages',
-			aplimit => 500,
+			apnamespace => get_mw_namespace_id("File"),
+			aplimit => 500
 		});
 		if (!defined($mw_pages)) {
-			print STDERR "fatal: could not get the list of wiki pages.\n";
+			print STDERR "fatal: could not get the list of pages for media files.\n";
 			print STDERR "fatal: '$url' does not appear to be a mediawiki\n";
 			print STDERR "fatal: make sure '$url/api.php' is a valid page.\n";
 			exit 1;
 		}
 		foreach my $page (@{$mw_pages}) {
-			$pages{$page->{title}} = $page;
+			$pages->{$page->{title}} = $page;
+		}
+	}
+}
+
+sub get_mw_tracked_pages {
+	my $pages = shift;
+	# The user provided a list of pages titles, but we
+	# still need to query the API to get the page IDs.
+	my @some_pages = @tracked_pages;
+	while (@some_pages) {
+		my $last = 50;
+		if ($#some_pages < $last) {
+			$last = $#some_pages;
 		}
+		my @slice = @some_pages[0..$last];
+		get_mw_first_pages(\@slice, \%{$pages});
+		@some_pages = @some_pages[51..$#some_pages];
+	}
+
+	# Get pages of related media files.
+	if ($import_media) {
+		get_mw_pages_for_linked_mediafiles(\@tracked_pages, \%{$pages});
+	}
+}
 
+sub get_mw_tracked_categories {
+	my $pages = shift;
+	foreach my $category (@tracked_categories) {
+		if (index($category, ':') < 0) {
+			# Mediawiki requires the Category
+			# prefix, but let's not force the user
+			# to specify it.
+			$category = "Category:" . $category;
+		}
+		my $mw_pages = $mediawiki->list( {
+			action => 'query',
+			list => 'categorymembers',
+			cmtitle => $category,
+			cmlimit => 'max' } )
+			|| die $mediawiki->{error}->{code} . ': '
+				. $mediawiki->{error}->{details};
+		foreach my $page (@{$mw_pages}) {
+			$pages->{$page->{title}} = $page;
+		}
+
+		# Get pages of related media files.
 		if ($import_media) {
-			# Attach list of all pages for meadia files from the API,
-			# they are in a different namespace, only one namespace
-			# can be queried at the same moment
-			my $mw_pages = $mediawiki->list({
-				action => 'query',
-				list => 'allpages',
-				apnamespace => get_mw_namespace_id("File"),
-				aplimit => 500
-			});
-			if (!defined($mw_pages)) {
-				print STDERR "fatal: could not get the list of pages for media files.\n";
-				print STDERR "fatal: '$url' does not appear to be a mediawiki\n";
-				print STDERR "fatal: make sure '$url/api.php' is a valid page.\n";
-				exit 1;
-			}
-			foreach my $page (@{$mw_pages}) {
-				$pages{$page->{title}} = $page;
-			}
+			my @titles = map $_->{title}, @{$mw_pages};
+			get_mw_pages_for_linked_mediafiles(\@titles, \%{$pages});
 		}
 	}
-	return values(%pages);
 }
 
 sub get_mw_pages_for_linked_mediafiles {
-- 
1.7.10.2.552.gaa3bb87

  reply	other threads:[~2012-06-11 19:30 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-06-11 19:29 [PATCHv2 1/2] git-remote-mediawiki: import "File:" attachments Pavel Volek
2012-06-11 19:29 ` Pavel Volek [this message]
2012-06-12  9:06   ` [PATCHv2 2/2] git-remote-mediawiki: refactoring get_mw_pages function Simon Perrat
2012-06-12  9:24     ` konglu
2012-06-12  9:25     ` Matthieu Moy
2012-06-12  9:56       ` Simon Perrat
2012-06-11 20:38 ` [PATCHv2 1/2] git-remote-mediawiki: import "File:" attachments Matthieu Moy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1339442945-8561-2-git-send-email-Pavel.Volek@ensimag.imag.fr \
    --to=pavel.volek@ensimag.imag.fr \
    --cc=Kim-Thuat.Nguyen@ensimag.imag.fr \
    --cc=Matthieu.Moy@imag.fr \
    --cc=git@vger.kernel.org \
    --cc=me@pavelvolek.cz \
    --cc=roucherj@ensimag.imag.fr \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).