问题
I am sort of stuck. My goal is to extract data from a website that has several hundred pages. Its a sports website and i have to extract the team names and other relevant data. So far i have been successful doing it. I ran the loop for 6-7 pages and its works perfectly well. but when i change the loop to about month(25), it retrieves incomplete data.
For instance if the destination date is 25 October, it may stop randomly at 10-12 October.
I am using phpQuery
and my internet connection is 1MB.
Kindly guide me in this matter, Thank You.
include 'phpQueryOneFile.php';
$fileName = "test.txt";
$fileHandle = fopen($fileName, 'w') or die("can't open file");
$team2Arr[] = "" ; // Array that will store all th team2 names, in complete season
$finalCount2 = 0 ; // counter that is maintained for the $team2Arr, including 'FFF', means this
// will hold the atual size of array including 'FFF'
$totalNumberOfTeam2 = 0; // counter that will contain only the number of teams in total,
// excluding the "FFF", hence the aise of team2Arr - 'FFF's = # of teams
$team1Arr[] = "" ; // Array that will store all th team1 names, in complete season
$finalCount1 = 0; // counter that is maintained for the $team1Arr, including 'FFF', means this
// will hold the atual size of array including 'FFF'
$totalNumberOfTeam1 = 0; // counter that will contain only the number of teams in total,
// excluding the "FFF", hence the aise of team1Arr - 'FFF's = # of teams
$year = '2012' ;
$month = '10' ;
$day = '6';
while($day < 32)
{
if($day < 10)
{
$day = '0'.$day ;
}
//$doc[] = phpQuery::newDocumentFileHTML('http://www.nba.com/gameline/'.$date.'/');
phpQuery::newDocumentFileHTML('http://www.nba.com/gameline/'.$year.$month.$day.'/');
$time1Element[$i] = pq('.nbaFnlStatTxSm');
$time1[$i] = $time1Element[$i]->html();
//echo '<h2>This is The Time</h2>';
//echo '<pre>' . htmlentities($time1[$i]) . '</pre>';
fwrite($fileHandle, "This is The Time"."\n");
fwrite($fileHandle, $time1[$i]."\n"."\n");
$time2Element[$i] = pq('.nbaPreMnStatus .nbaPreStatTx');
$time2[$i] = $time2Element[$i]->html();
//echo '<h2>This is The Second Time that is not visible</h2>';
//echo '<pre>' . htmlentities($time2[$i]) . '</pre>';
fwrite($fileHandle, "This is Only Time"."\n");
fwrite($fileHandle, $time2[$i]."\n"."\n");
$dateElement[$i] = pq('#nbaCalSelectedDate');
$date[$i] = $dateElement[$i]->html();
//echo '<pre>' . htmlentities($dateElement[$i]) . '</pre>';
//echo '<h2>The Date of the match is</h2>';
//echo '<pre>'.$dateElement[$i].'</pre>';
fwrite($fileHandle, "The Date of the match is (Proccessed)"."\n");
fwrite($fileHandle, $date[$i]."\n"."\n");
fwrite($fileHandle, "The Date of the match is (Not Proccessed)"."\n");
fwrite($fileHandle, $dateElement[$i]."\n"."\n");
$team2Element[$i] = pq('.nbaPreMnStatusTeamHm .nbaModTopTeamName');
$team2[$i] = $team2Element[$i]->html();
//echo '<h2>The Home Team is </h2>';
//echo '<pre>' . htmlentities($team2[$i]). '</pre>';
fwrite($fileHandle, "The Home Team is "."\n");
fwrite($fileHandle, $team2[$i]."\n"."\n");
//here we split the data
//get the lenght of the string
echo ("The lenght of arrays is = ".strlen($team2[$i]));
//loop till 3
$out2 = ""; // String of lenght 3 that will pick and store 3 char and put in team2Arr
$threeCount = 0; // counter that is represents the second dimenssion of array,
// i.e character in string and number of strings are stored in array
$teamNumber = 1; // counter that is represents that team number of particular day
$numberOfTeams = strlen($team2[$i]) / 3; // this calutate the numberOfTeams on one day
echo ("<br / >"."team number = ".$numberOfTeams."<br />");
for($j=0 ; $j<$numberOfTeams ; $j++)
{
echo '<br />'."Team".$teamNumber." is : ";
//here we extract pair of 3 character
for($k=0 ; $k<3 ; $k++)
{
$out2 .= $team2[$i][$threeCount];
$threeCount++;
}
echo $out2."\n";
$team2Arr[$finalCount2] .= $out2;
fwrite($fileHandle, $out2."\n");
$teamNumber++;
$finalCount2++;
$totalNumberOfTeam2++;
$out2 = "";
}
$team2Arr[$finalCount2] = "FFF";
$finalCount2++;
$team1Element[$i] = pq('.nbaPreMnStatusTeamAw .nbaModTopTeamName');
$team1[$i] = $team1Element[$i]->html();
//echo '<h2>The Away Team is </h2>';
//echo '<pre>' . htmlentities($team1[$i]). '</pre>';
fwrite($fileHandle, "The Away Team is "."\n");
fwrite($fileHandle, $team1[$i]."\n"."\n");
echo ("\n"."The lenght of arrays is = ".strlen($team1[$i]));
//here we split the data
//get the lenght of the string
//loop till 3
$out1 = ""; // String of lenght 3 that will pick and store 3 char and put in team1Arr
$threeCount = 0; // REINITIALIZED counter that is represents the second dimenssion of array,
// i.e character in string and number of strings are stored in array
$teamNumber = 1; // REINITIALIZED counter that is represents that team number of particular day
$numberOfTeams = strlen($team1[$i]) / 3;
echo "<br />";
echo ("team number = ".$numberOfTeams."<br />");
for($j=0 ; $j<$numberOfTeams ; $j++)
{
echo "<br />"."Team".$teamNumber." is : ";
//here we extract pair of 3 character
for($k=0 ; $k<3 ; $k++)
{
$out1 .= $team1[$i][$threeCount];
$threeCount++;
}
echo $out1;
$team1Arr[$finalCount1] .= $out1;
fwrite($fileHandle, $out1."\n");
$teamNumber++;
$finalCount1++;
$totalNumberOfTeam1++;
$out1 = "";
}
$team1Arr[$finalCount1] = "FFF";
$finalCount1++;
$day = $day + 1 ;
$i++;
}
fwrite($fileHandle, $team2Arr);
fwrite($fileHandle, $team1Arr);
fclose($fileHandle);
echo "<br />"."number is iteratin : ".$i;
来源:https://stackoverflow.com/questions/12754520/having-issue-in-extracting-data-from-100-web-pages-in-a-single-loop