php/awk 处理csv 使用 SplFileObject 操作文件
取第5列,去掉开头结尾的引号,匹配以http://, https://, ftp://开头的行
* awk
awk -F"," 'str=gsub(/(^\"*)|(\"*$)/,"",$5) {if($5~/(^http(s)?:\/\/)|(^ftp:\/\/)/) print $5}' \ ../data/t_video.csv > ../data/urls.csv
* php
<?php // awk -F"," 'str=gsub(/(^\"*)|(\"*$)/,"",$5) {if($5~/(^http(s)?:\/\/)|(^ftp:\/\/)/) print $5}' \ // ../data/t_video.csv > ../data/urls.csv $in = new SplFileObject('../data/t_video.csv', 'rb'); if (!$in) { throw new Exception('open file for read failed!'); } $out = new SplFileObject('../data/urls.csv', 'w'); if (!$out) { throw new Exception('open file for write failed!'); } $a = []; $videoURL = ''; foreach ($in as $lineNum => $line) { $a = explode(',', $line); if (!isset($a[4])) { continue; } $videoURL = $a[4]; if (7<strlen($videoURL)) { $videoURL = trim($videoURL, "\""); if (preg_match('/^[http:\/\/|ftp:\/\/|https:\/\/]/', $videoURL)) { $out->fwrite($videoURL.PHP_EOL); } } }
input:
../data/t_video.csv
"9","其他","ULTIMATE PHOTO GUIDE",NULL,"http://118.190.209.209/media/mp4/1.mp4","http://118.190.209.209/media/png/1.png","ULTIMATE PHOTO GUIDE" "10","其他","THE VERGE",NULL,"http://118.190.209.209/media/mp4/2.mp4","http://118.190.209.209/media/png/2.the_verge.png","THE VERGE" "11","其他","Microsoft Power BI",NULL,"http://118.190.209.209/media/mp4/3.mp4","http://118.190.209.209/media/png/3.png","Microsoft Power BI"
output:
../data/url.csv
http://118.190.209.209/media/mp4/1.mp4 http://118.190.209.209/media/mp4/2.mp4 http://118.190.209.209/media/mp4/3.mp4
http://php.net/manual/en/class.splfileobject.php