perlでツタヤの店舗を取得
#!/usr/bin/perl use strict; use warnings; use LWP::UserAgent; package Tshop; sub new{ my $class = shift; my $self = {}; bless $self,$class; } sub shopname{ my $self = shift; my $cont = shift; my @list = $cont =~ /<h2><a\s.*>.*<\/a><\/h2>/g; my $data = ''; foreach my $i(@list){ $i =~ s/<.*?>//g; $data.= $i."\n"; } return $data; } sub next{ my $self = shift; my $cont = shift; if($cont =~ /(<li class="tolLRWBliEnd">.*<\/li>)/){ return $1; }else{ return undef; } } sub filew{ my $self = shift; my $data = shift; my $outfile = 'shop.txt'; open(OUT , ">>",$outfile)||die('can not open!'); print OUT $data; close(OUT); } sub get_content{ my $self = shift; my $url = shift; my $num = shift; $url = $url.$num; my $ua = LWP::UserAgent->new(agent => "GoodleBot/0.1"); my $req = HTTP::Request->new(GET => $url); my $res = $ua->request($req); my $cont; if($res->is_success){ $cont = $res->content; } return $cont; } 1;
↓がテスト。
#!/usr/bin/perl use strict; use warnings; use Test::More "no_plan"; use_ok('Tshop'); use Tshop; my $st = 1; my $t = Tshop->new(); my $r = $t->get_content('http://store.tsutaya.co.jp/storelocator/result.html?u=0_26|1_0|2_0|3_0|4_0|5_0|6_0|7_0|8_0|9_0|10_0|11_0|12_0|14_0|15_0&k=&p=',$st); my $n = $t->next($r); my $data = $t->shopname($r); $t->filew($data); while($n){ $st++; $r = $t->get_content('http://store.tsutaya.co.jp/storelocator/result.html?u=0_26|1_0|2_0|3_0|4_0|5_0|6_0|7_0|8_0|9_0|10_0|11_0|12_0|14_0|15_0&k=&p=',$st); $n = $t->next($r); $data = $t->shopname($r); $t->filew($data); }