<?php
/**
 * Shopee Product Detail Extractor — Metode Alternatif (IDs via Canonical + API spoof headers)
 * -------------------------------------------------------------------------------------------
 * Input : Beberapa URL produk Shopee (satu per baris)
 * Output: Judul, Deskripsi, Harga, Kategori, LINK GAMBAR 1..3 (tanpa menyimpan file)
 * Metode:
 *    A) Dari URL/HTML → ambil shopid & itemid (via pola URL, <link rel="canonical">, og:url, atau JSON inline)
 *    B) Panggil API item/get dengan header yang lebih "mirip browser" (x-api-source, x-shopee-language, if-none-match-)
 *    C) Jika API gagal / diblokir → fallback HTML (OG/Twitter/JSON‑LD + pola CDN /file/)
 * Catatan: WAF Shopee bisa blokir IP tertentu. Jika hasil kosong, coba IP/hosting lain.
 */

declare(strict_types=1);
mb_internal_encoding('UTF-8');

/* ===================== HTTP ===================== */
function http_get(string $url, array $extraHeaders = [], int $timeout = 25): array {
    $ch = curl_init();
    $baseHeaders = [
        'Accept: application/json,text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Encoding: gzip, deflate', // hindari br jika tidak support brotli
        'Accept-Language: id-ID,id;q=0.9,en-US;q=0.8,en;q=0.7',
        'Cache-Control: no-cache',
        'Pragma: no-cache',
        'Referer: https://shopee.co.id/',
    ];
    $headers = array_merge($baseHeaders, $extraHeaders);
    curl_setopt_array($ch, [
        CURLOPT_URL => $url,
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_FOLLOWLOCATION => true,
        CURLOPT_MAXREDIRS => 5,
        CURLOPT_CONNECTTIMEOUT => 12,
        CURLOPT_TIMEOUT => $timeout,
        CURLOPT_HTTPHEADER => $headers,
        CURLOPT_ENCODING => '',
        CURLOPT_IPRESOLVE => CURL_IPRESOLVE_V4,
        CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_2_0,
        CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
        CURLOPT_SSL_VERIFYPEER => true,
        CURLOPT_SSL_VERIFYHOST => 2,
    ]);
    $body = curl_exec($ch);
    $err  = curl_error($ch);
    $info = curl_getinfo($ch);
    curl_close($ch);
    return [
        'ok'   => $body !== false && ($info['http_code'] ?? 0) >= 200 && ($info['http_code'] ?? 0) < 400,
        'code' => $info['http_code'] ?? 0,
        'body' => $body ?: '',
        'err'  => $err,
    ];
}
function h(string $s): string { return htmlspecialchars($s, ENT_QUOTES|ENT_SUBSTITUTE, 'UTF-8'); }
function csvq(string $s): string { $s=str_replace(["\r","\n"],' ',$s); return '"'.str_replace('"','""',$s).'"'; }

/* ===================== URL / IDs ===================== */
function normalize_url(string $url): string {
    $url = trim($url); if($url==='') return '';
    if (!preg_match('~^https?://~i', $url)) $url = 'https://' . $url;
    return $url;
}
function parse_ids_from_any(string $url, string $html = ''): array {
    // 1) Dari URL
    if (preg_match('~/product/(\d+)/(\d+)~', $url, $m)) return [(int)$m[1], (int)$m[2]];
    if (preg_match('~i\.(\d+)\.(\d+)~', $url, $m))      return [(int)$m[1], (int)$m[2]];
    // 2) Dari canonical / og:url (sering memuat i.<shop>.<item>)
    if ($html !== '') {
        if (preg_match('~<link[^>]+rel=\"canonical\"[^>]+href=\"([^\"]+)\"~i', $html, $m)) {
            $c = html_entity_decode($m[1]);
            if (preg_match('~i\.(\d+)\.(\d+)~', $c, $mm)) return [(int)$mm[1], (int)$mm[2]];
            if (preg_match('~/product/(\d+)/(\d+)~', $c, $mm)) return [(int)$mm[1], (int)$mm[2]];
        }
        if (preg_match('~<meta[^>]+property=\"og:url\"[^>]+content=\"([^\"]+)\"~i', $html, $m)) {
            $c = html_entity_decode($m[1]);
            if (preg_match('~i\.(\d+)\.(\d+)~', $c, $mm)) return [(int)$mm[1], (int)$mm[2]];
            if (preg_match('~/product/(\d+)/(\d+)~', $c, $mm)) return [(int)$mm[1], (int)$mm[2]];
        }
        // 3) Dari JSON inline ("shopid":123, "itemid":456)
        if (preg_match('~\bshopid\b[^\d]*(\d+)~i', $html, $ms) && preg_match('~\bitemid\b[^\d]*(\d+)~i', $html, $mi)) {
            return [(int)$ms[1], (int)$mi[1]];
        }
    }
    return [null, null];
}

/* ===================== API Shopee ===================== */
function api_get_item(int $shopid, int $itemid, string $refUrl): ?array {
    $etag = '55b03-' . substr(md5(microtime(true).mt_rand()), 0, 8);
    $headers = [
        'x-api-source: pc',
        'x-shopee-language: id',
        'x-requested-with: XMLHttpRequest',
        'if-none-match-: ' . $etag,
        'referer: ' . $refUrl,
    ];
    $url = 'https://shopee.co.id/api/v4/item/get?itemid=' . $itemid . '&shopid=' . $shopid;
    $r = http_get($url, $headers, 25);
    if (!$r['ok']) return null;
    $j = json_decode($r['body'], true);
    return $j['data'] ?? null;
}
function map_price($v): string { if(!is_numeric($v)) return (string)$v; $n=(float)$v; if($n>10000000) $n/=100000; return number_format($n,0,',','.'); }
function cdn_url(string $hash): string { return 'https://down-id.img.susercontent.com/file/'.rawurlencode($hash); }

/* ===================== HTML Extractors ===================== */
function extract_meta_from_html(string $html): array {
    $title=$desc=$price=$cat='';
    foreach ([
        '~<meta[^>]+property=\"og:title\"[^>]+content=\"([^\"]*)\"~i',
        '~<meta[^>]+name=\"twitter:title\"[^>]+content=\"([^\"]*)\"~i',
        '~<meta[^>]+itemprop=\"name\"[^>]+content=\"([^\"]*)\"~i',
    ] as $re) if($title===''){ if(preg_match($re,$html,$m)) $title=trim($m[1]); }

    foreach ([
        '~<meta[^>]+property=\"og:description\"[^>]+content=\"([^\"]*)\"~i',
        '~<meta[^>]+name=\"description\"[^>]+content=\"([^\"]*)\"~i',
        '~<meta[^>]+name=\"twitter:description\"[^>]+content=\"([^\"]*)\"~i',
    ] as $re) if($desc===''){ if(preg_match($re,$html,$m)) $desc=trim($m[1]); }

    if ($price==='') if (preg_match('~\b\"price\"\s*:\s*\"?([0-9.,]+)\"?~i', $html, $m)) $price = $m[1];
    if ($cat==='')   if (preg_match('~\b\"category(_path)?\"\s*:\s*\"([^\"]+)\"~i', $html, $m)) $cat = $m[2] ?? $m[1];

    return [$title,$desc,$price,$cat];
}
function extract_images_from_html(string $html): array {
    $urls=[];
    foreach ([
        '~<meta[^>]+property=\"og:image\"[^>]+content=\"([^\"]+)\"~i',
        '~<meta[^>]+name=\"twitter:image\"[^>]+content=\"([^\"]+)\"~i',
    ] as $re) if(preg_match($re,$html,$m)) $urls[]=$m[1];

    if (preg_match_all('~https?://[^\"\']*shopee[^\"\']*/file/[a-zA-Z0-9._-]+~i', $html, $m)) foreach ($m[0] as $u) $urls[]=$u;

    if (preg_match_all('~<img[^>]+(src|data-src|srcset)=\"([^\"]+)\"~i', $html, $m)) {
        foreach ($m[2] as $val) {
            if (stripos($val,'data:')===0) continue;
            if (stripos($val,'//')===0) $val='https:'.$val;
            if (preg_match('~shopee~i',$val)) $urls[]=$val;
        }
    }
    $urls=array_values(array_unique($urls));
    return array_slice($urls,0,10);
}

/* ===================== Form ===================== */
$errors=[]; $rows=[]; $preview=!empty($_POST['preview']);
$urls_raw = (string)($_POST['product_urls'] ?? '');
$inputs=[]; if($urls_raw!==''){ foreach(preg_split('~\r?\n~',$urls_raw) as $line){ $u=normalize_url($line); if($u!=='') $inputs[]=$u; } }

if($inputs){
    foreach($inputs as $u){
        $htmlBody=''; $shopid=null; $itemid=null; $title=$desc=$price=$cat=''; $img1=$img2=$img3='';

        // 1) Ambil HTML (agar bisa cari canonical/og:url ketika URL tidak memuat id)
        $rh = http_get($u);
        if($rh['ok'] && $rh['body']!==''){ $htmlBody=$rh['body']; }

        // 2) Tentukan shopid & itemid (dari URL dulu, kalau kosong, dari HTML)
        [$shopid,$itemid] = parse_ids_from_any($u, $htmlBody);

        // 3) Coba API item/get dengan spoof headers
        if($shopid && $itemid){
            $item = api_get_item($shopid,$itemid,$u);
            if($item){
                $title = (string)($item['name']??'');
                $desc  = (string)($item['description']??'');
                $price = map_price($item['price'] ?? ($item['price_min'] ?? ''));
                $cats=[]; if(!empty($item['categories'])) foreach($item['categories'] as $c){ if(!empty($c['display_name'])) $cats[]=$c['display_name']; }
                $cat = implode(' > ',$cats);
                $imgs=[]; if(!empty($item['images'])) foreach($item['images'] as $hs){ $imgs[]=cdn_url((string)$hs); if(count($imgs)>=3) break; }
                $img1=$imgs[0]??''; $img2=$imgs[1]??''; $img3=$imgs[2]??'';
            }
        }

        // 4) Fallback HTML (lengkapi field yang kosong)
        if($htmlBody!==''){
            if($title==='' || $desc==='' || $price==='' || $cat===''){
                [$t,$d,$p,$c] = extract_meta_from_html($htmlBody);
                if($title==='') $title=$t; if($desc==='') $desc=$d; if($price==='') $price=$p; if($cat==='') $cat=$c;
            }
            if($img1==='' && $img2==='' && $img3===''){
                $imgs = extract_images_from_html($htmlBody);
                $img1=$imgs[0]??''; $img2=$imgs[1]??''; $img3=$imgs[2]??'';
            }
        }

        $rows[]=[
            'url'=>$u,
            'title'=>trim($title),
            'desc'=>trim($desc),
            'price'=>trim($price),
            'category'=>trim($cat),
            'img1'=>$img1,
            'img2'=>$img2,
            'img3'=>$img3,
        ];
    }
}

/* ===================== CSV ===================== */
$csv=''; if($rows){
    $csv .= implode(',',array_map('csvq',['URL','Judul','Deskripsi','Harga','Kategori','LINK GAMBAR 1','LINK GAMBAR 2','LINK GAMBAR 3']))."\n";
    foreach($rows as $r){
        $csv .= implode(',',array_map('csvq',[ $r['url'],$r['title'],$r['desc'],$r['price'],$r['category'],$r['img1'],$r['img2'],$r['img3'] ]))."\n";
    }
}
?>
<!doctype html>
<html lang="id">
<head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>Shopee Product Extractor — Alternatif</title>
  <style>
    :root { --bg:#0b1220; --card:#101828; --muted:#98a2b3; --text:#e5e7eb; --accent:#7c3aed; --err:#ef4444; }
    html,body{margin:0;padding:0;background:var(--bg);color:var(--text);font-family:system-ui,-apple-system,Segoe UI,Roboto,Inter,Ubuntu,Helvetica,Arial,sans-serif}
    a{color:#93c5fd;text-decoration:none}
    .wrap{max-width:1000px;margin:24px auto;padding:0 16px}
    .card{background:var(--card);border:1px solid #1f2937;border-radius:16px;padding:16px;margin-bottom:16px;box-shadow:0 10px 30px rgba(0,0,0,.2)}
    h1{font-size:clamp(20px,3vw,28px);margin:0 0 8px 0}
    p.lead{color:var(--muted);margin-top:4px}
    textarea{width:100%;min-height:120px;background:#0f172a;color:var(--text);border:1px solid #29354a;border-radius:12px;padding:10px}
    .row{display:flex;gap:12px;flex-wrap:wrap;align-items:center}
    .btn{cursor:pointer;border:none;border-radius:12px;padding:10px 14px;background:var(--accent);color:#fff;font-weight:600}
    .btn.ghost{background:transparent;border:1px solid #334155}
    .note{font-size:12px;color:var(--muted)}
    table{width:100%;border-collapse:collapse;margin-top:12px}
    th,td{border:1px solid #243042;padding:8px;vertical-align:top}
    th{background:#0f172a}
    .imgprev{max-height:70px;border-radius:8px}
    .topbar{display:flex;justify-content:space-between;gap:10px;align-items:center;margin-top:8px}
  </style>
</head>
<body>
<div class="wrap">
  <div class="card">
    <h1>🧩 Shopee Product Extractor — Metode Alternatif</h1>
    <p class="lead">Tempel URL produk • Sistem akan mencari <em>shopid</em>/<em>itemid</em> lewat canonical/og:url lalu mencoba API + fallback HTML.</p>
    <form method="post">
      <label class="note" for="urls">URL produk (satu per baris)</label>
      <textarea id="urls" name="product_urls" placeholder="https://shopee.co.id/product/123456/9876543210
https://shopee.co.id/nama-produk-keren-i.123456.9876543210"><?php echo h($_POST['product_urls'] ?? ''); ?></textarea>
      <div class="row" style="margin-top:10px">
        <button class="btn" type="submit">Ekstrak</button>
        <button type="button" class="btn ghost" onclick="demo()">Contoh</button>
      </div>
    </form>
  </div>

  <?php if (!empty($errors)): ?>
    <div class="card">
      <strong style="color:var(--err)">Kendala:</strong>
      <ul class="note">
        <?php foreach ($errors as $e): ?><li><?= $e ?></li><?php endforeach; ?>
      </ul>
    </div>
  <?php endif; ?>

  <?php if (!empty($rows)): ?>
    <div class="card">
      <div class="topbar">
        <div><strong>Hasil</strong> — <?= count($rows) ?> produk</div>
        <div class="row">
          <button class="btn ghost" type="button" onclick="copyCSV()">Copy CSV</button>
          <button class="btn ghost" type="button" onclick="downloadCSV()">Unduh CSV</button>
          <button class="btn ghost" type="button" onclick="downloadJSON()">Unduh JSON</button>
        </div>
      </div>
      <div style="overflow:auto">
        <table>
          <thead>
            <tr>
              <th>#</th>
              <th>URL</th>
              <th>Judul</th>
              <th>Deskripsi</th>
              <th>Harga</th>
              <th>Kategori</th>
              <th>LINK GAMBAR 1</th>
              <th>LINK GAMBAR 2</th>
              <th>LINK GAMBAR 3</th>
            </tr>
          </thead>
          <tbody>
            <?php foreach ($rows as $i => $r): ?>
              <tr>
                <td><?= $i+1 ?></td>
                <td><a href="<?= h($r['url']) ?>" target="_blank" rel="noopener">Link</a></td>
                <td><?= h($r['title']) ?></td>
                <td><?= h($r['desc']) ?></td>
                <td><?= h($r['price']) ?></td>
                <td><?= h($r['category']) ?></td>
                <td><a href="<?= h($r['img1']) ?>" target="_blank" rel="noopener"><?= h($r['img1']) ?></a></td>
                <td><a href="<?= h($r['img2']) ?>" target="_blank" rel="noopener"><?= h($r['img2']) ?></a></td>
                <td><a href="<?= h($r['img3']) ?>" target="_blank" rel="noopener"><?= h($r['img3']) ?></a></td>
              </tr>
            <?php endforeach; ?>
          </tbody>
        </table>
      </div>

      <?php $csv = $csv ?? ''; ?>
      <textarea id="csvdata" style="width:100%;min-height:0;height:1px;opacity:0"><?= h($csv) ?></textarea>
    </div>

    <script>
      function copyCSV(){ const ta=document.getElementById('csvdata'); ta.select(); document.execCommand('copy'); alert('CSV disalin'); }
      function downloadCSV(){ const csv=document.getElementById('csvdata').value; const b=new Blob([csv],{type:'text/csv;charset=utf-8;'}); const u=URL.createObjectURL(b); const a=document.createElement('a'); a.href=u;a.download='shopee-products.csv'; a.click(); URL.revokeObjectURL(u); }
      function downloadJSON(){ const rows=[]; const tb=document.querySelector('table tbody'); for(const tr of tb.querySelectorAll('tr')){ const t=tr.querySelectorAll('td'); if(!t.length) continue; rows.push({ url: t[1].querySelector('a')?.href || '', title: t[2].innerText.trim(), description: t[3].innerText.trim(), price: t[4].innerText.trim(), category: t[5].innerText.trim(), image1: t[6].innerText.trim(), image2: t[7].innerText.trim(), image3: t[8].innerText.trim(), }); } const b=new Blob([JSON.stringify(rows, null, 2)], {type:'application/json'}); const u=URL.createObjectURL(b); const a=document.createElement('a'); a.href=u; a.download='shopee-products.json'; a.click(); URL.revokeObjectURL(u); }
      function demo(){ document.getElementById('urls').value='https://shopee.co.id/product/123456/9876543210\nhttps://shopee.co.id/nama-produk-i.123456.9876543210'; }
    </script>
  <?php endif; ?>
</div>
</body>
</html>
