The task is to parse the category from one site.
- We go to the site, select all the links to the brand of car (site for the sale of auto parts).
- Save all links to the file /index.php
- We pass on each link, copy the path, create a folder from this path
- Copy all links to auto models in the file path / index.php
- Go to each model, then everything is similar to p.4
Repeat until the end of the directory and eventually save the last page in index.php. There is this:
<?php ini_set("max_execution_time", 3600); $custom_url = 'http://exist.ru/cat/TO'; include_once('simple_html_dom.php'); //Main Function function categories_parse($custom_url) { $header_m = '<? require($_SERVER["DOCUMENT_ROOT"]."/bitrix/header.php"); $APPLICATION->SetPageProperty("title", "Демонстрационная версия продукта «1С-Битрикс: Управление сайтом»"); $APPLICATION->SetPageProperty("NOT_SHOW_NAV_CHAIN", "Y"); $APPLICATION->SetTitle("Главная страница"); ?> <?$APPLICATION->IncludeComponent("bitrix:main.include", ".default", array( "AREA_FILE_SHOW" => "file", "PATH" => "/bitrix/templates/4/include_areas/show.php", "EDIT_TEMPLATE" => "" ),false); session_start(); process_form(); ?>'; $footer_m = '<?require($_SERVER["DOCUMENT_ROOT"]."/bitrix/footer.php");?>'; $a = 0; $i = 0; $parse_url = file_get_html($custom_url); $parse = $parse_url->find('div.catalog-column a'); $catalog_col = $parse_url->find('div.catalog-column'); foreach ($catalog_col as $col) { $catalog_col_m[$a] = $col->innertext; $file_main = fopen('./index.php', 'w'); fwrite($file_main, $header_m); fwrite($file_main, $catalog_col_m[$a]); fwrite($file_main, $footer_m); $a++; } foreach ($parse as $p) { $category_href_original[$i] = $p->href; $categoty_path[$i] = "." . $category_href_original[$i]; mkdir($categoty_path[$i], 0777, true); $category_href[$i] = 'http://exist.ru' . $p->href; model_parse($category_href[$i]); $i++; } } function model_parse($href) { $model_url = file_get_html($href); $i = 0; $model_link = $model_url->find('div.cell a'); $model_c = $model_url->find('div.cell'); $z = 0; foreach ($model_c as $cell) { $file_c_p = $href . 'index.php'; $file_c = fopen($file_c_p, 'w'); $model_col_m[$z] = $cell->innertext; fwrite($file_c, $header_c); fwrite($file_c, $model_col_m[$z]); fwrite($file_c, $footer_c); $z++; } foreach ($model_link as $lnk) { $model_href_original[$i] = $lnk->href; $model_path[$i] = "." . $model_href_original[$i]; $model_href[$i] = 'http://exist.ru' . $lnk->href; mkdir($model_path[$i], 0777, true); price_cat_parse($model_href[$i]); $i++; } } function price_cat_parse($price_cat_href) { $header_parse = '<? require($_SERVER["DOCUMENT_ROOT"]."/bitrix/header.php"); $APPLICATION->SetPageProperty("title", "Демонстрационная версия продукта «1С-Битрикс: Управление сайтом»"); $APPLICATION->SetPageProperty("NOT_SHOW_NAV_CHAIN", "Y"); $APPLICATION->SetTitle("Главная страница"); ?> <?$APPLICATION->IncludeComponent("bitrix:main.include", ".default", array( "AREA_FILE_SHOW" => "file", "PATH" => "/bitrix/templates/4/include_areas/show.php", "EDIT_TEMPLATE" => "" ),false); session_start(); process_form(); ?>'; $footer_parse = '<?require($_SERVER["DOCUMENT_ROOT"]."/bitrix/footer.php");?>'; $price_cat_href_n = $price_cat_href . 'index.php'; $price_cat_url = file_get_html($price_cat_href); $price_cat_lnk = $price_cat_url->find('.content-ul a'); $price_cat_content = $price_cat_url->find('tr', 0); $i = 0; $h = 0; $file_p_c = fopen($price_cat_href_n, 'w'); foreach ($price_cat_content as $cat_cont) { $price_cont[$h] = $cat_cont->innertext; fwrite($file_c, $header_parse); fwrite($file_c, $price_cont[$h]); fwrite($file_c, $footer_parse); $h++; } foreach ($price_cat_lnk as $lnk) { $price_cat_href_original[$i] = $lnk->href; $price_cat_path[$i] = "." . $price_cat_href_original[$i]; $price_cat_href[$i] = 'http://exist.ru' . "$price_cat_href_original[$i]"; // echo "$price_cat_href_n[$i] \n"; mkdir($price_cat_path[$i], 0777, true); price_parse($price_cat_href[$i], $price_cat_path[$i]); $i++; } } function price_parse($price_href, $folder_file) { $header = '<? require($_SERVER["DOCUMENT_ROOT"]."/bitrix/header.php"); $APPLICATION->SetPageProperty("title", "Демонстрационная версия продукта «1С-Битрикс: Управление сайтом»"); $APPLICATION->SetPageProperty("NOT_SHOW_NAV_CHAIN", "Y"); $APPLICATION->SetTitle("Главная страница"); ?> <?$APPLICATION->IncludeComponent("bitrix:main.include", ".default", array( "AREA_FILE_SHOW" => "file", "PATH" => "/bitrix/templates/4/include_areas/show.php", "EDIT_TEMPLATE" => "" ),false); session_start(); process_form(); ?>'; $footer = '<?require($_SERVER["DOCUMENT_ROOT"]."/bitrix/footer.php");?>'; $price_new_url = file_get_html($price_href); $price_cont = $price_new_url->find('table.tbl'); $i = 0; foreach ($price_cont as $cont) { $index = '/index.php'; $path_to_file = $folder_file . $index; $price_contnent_original[$i] = fopen($path_to_file, 'w+'); fwrite($price_contnent_original[$i], $header); fwrite($price_contnent_original[$i], $cont); fwrite($price_contnent_original[$i], $footer); echo "$path_to_file \n"; echo "file wrote \n"; $i++; } } categories_parse($custom_url); ?>
Help who can ....