array|false fgetcsv ( resource $handle [, int $length = 0 [, string $separator = "," [, string $enclosure = '"' [, string $escape = "\\" ]]]] )
| 参数 | 描述 | 默认值 | 必需 |
|---|---|---|---|
| handle | 由 fopen() 打开的文件指针 | - | 是 |
| length | 必须大于 CSV 文件中最长的行(以字符为单位) 设置为 0 表示无限制(PHP 5.1.0+) |
0 | 否 |
| separator | 字段分隔符(只允许单字符) 常用值:','(逗号)、';'(分号)、'\\t'(制表符) |
, | 否 |
| enclosure | 字段包裹字符(只允许单字符) 用于包含包含分隔符或换行符的字段 |
" | 否 |
| escape | 转义字符(只允许单字符) PHP 7.4.0+ 默认为反斜线,之前版本为 '\\' |
\\ | 否 |
FALSEFALSE示例 CSV 行: John,Doe,"123 Main St, Apt 4",New York
<?php
// 创建测试 CSV 文件
$csv_content = "Name,Age,City,Email
John Doe,30,New York,john@example.com
Jane Smith,25,Los Angeles,jane@example.com
Bob Johnson,35,Chicago,bob@example.com";
$filename = "users.csv";
file_put_contents($filename, $csv_content);
// 打开 CSV 文件
$handle = fopen($filename, "r");
if ($handle === false) {
die("无法打开文件");
}
echo "读取 CSV 文件内容:\n";
echo "=========================\n";
$row_number = 0;
while (($row = fgetcsv($handle)) !== false) {
$row_number++;
if ($row_number === 1) {
echo "表头:\n";
} else {
echo "第 " . ($row_number - 1) . " 行数据:\n";
}
// 显示每个字段
foreach ($row as $index => $field) {
echo " 字段 $index: " . htmlspecialchars($field) . "\n";
}
echo "\n";
}
// 关闭文件
fclose($handle);
// 清理测试文件
unlink($filename);
?>
<?php
/**
* 处理包含特殊字符的 CSV 文件
*/
function parse_complex_csv($filename) {
$handle = fopen($filename, "r");
if (!$handle) {
return false;
}
$data = [];
$header = null;
// 读取表头
$header = fgetcsv($handle);
if ($header === false) {
fclose($handle);
return false;
}
// 读取数据行
while (($row = fgetcsv($handle)) !== false) {
// 确保每行的字段数与表头相同
if (count($row) !== count($header)) {
echo "警告:行字段数与表头不一致,跳过此行\n";
continue;
}
// 将行数据与表头关联
$data[] = array_combine($header, $row);
}
fclose($handle);
return $data;
}
// 创建包含特殊字符的测试 CSV
$complex_csv = "Name,Description,Address,Salary
John Doe,\"Software Engineer, specializes in PHP\",\"123 Main St, Apt 4\",75000
Jane Smith,\"Data Analyst with \"\"expert\"\" skills\",\"456 Oak Ave\",82000
Bob Johnson,\"Manager\nHandles multiple teams\",\"789 Pine Rd\",95000";
$complex_file = "complex_data.csv";
file_put_contents($complex_file, $complex_csv);
echo "处理复杂 CSV 文件:\n";
echo "===================\n\n";
$parsed_data = parse_complex_csv($complex_file);
if ($parsed_data) {
echo "成功解析 " . count($parsed_data) . " 行数据:\n\n";
foreach ($parsed_data as $index => $row) {
echo "记录 #" . ($index + 1) . ":\n";
foreach ($row as $key => $value) {
echo " $key: " . htmlspecialchars($value) . "\n";
}
echo "\n";
}
} else {
echo "解析失败\n";
}
// 清理测试文件
unlink($complex_file);
?>
<?php
/**
* 读取使用不同分隔符的 CSV 文件
*/
function parse_custom_csv($filename, $delimiter = ',', $enclosure = '"', $escape = '\\') {
$handle = fopen($filename, "r");
if (!$handle) {
return false;
}
$data = [];
while (($row = fgetcsv($handle, 0, $delimiter, $enclosure, $escape)) !== false) {
$data[] = $row;
}
fclose($handle);
return $data;
}
// 测试不同格式的 CSV 文件
$test_files = [
'comma.csv' => [
'content' => "Name,Age,City\nJohn Doe,30,New York",
'delimiter' => ',',
'enclosure' => '"'
],
'semicolon.csv' => [
'content' => "Name;Age;City\nJohn Doe;30;New York",
'delimiter' => ';',
'enclosure' => '"'
],
'pipe.csv' => [
'content' => "Name|Age|City\nJohn Doe|30|New York",
'delimiter' => '|',
'enclosure' => '"'
],
'tab.csv' => [
'content' => "Name\tAge\tCity\nJohn Doe\t30\tNew York",
'delimiter' => "\t",
'enclosure' => '"'
]
];
echo "测试不同分隔符的 CSV 文件:\n";
echo "============================\n\n";
foreach ($test_files as $filename => $config) {
file_put_contents($filename, $config['content']);
echo "文件: $filename\n";
echo "分隔符: '" . addslashes($config['delimiter']) . "'\n";
$data = parse_custom_csv($filename, $config['delimiter'], $config['enclosure']);
if ($data) {
echo "解析结果:\n";
foreach ($data as $row) {
echo " " . implode(' | ', array_map('htmlspecialchars', $row)) . "\n";
}
} else {
echo "解析失败\n";
}
echo "\n";
// 清理测试文件
unlink($filename);
}
?>
<?php
/**
* 高效处理大 CSV 文件的类
* 使用生成器避免内存溢出
*/
class LargeCsvProcessor {
private $filename;
private $delimiter;
private $enclosure;
private $escape;
public function __construct($filename, $delimiter = ',', $enclosure = '"', $escape = '\\') {
$this->filename = $filename;
$this->delimiter = $delimiter;
$this->enclosure = $enclosure;
$this->escape = $escape;
}
/**
* 逐行读取 CSV 文件(生成器)
*/
public function readRows() {
$handle = fopen($this->filename, "r");
if (!$handle) {
throw new Exception("无法打开文件: " . $this->filename);
}
try {
while (($row = fgetcsv($handle, 0, $this->delimiter, $this->enclosure, $this->escape)) !== false) {
yield $row;
}
} finally {
fclose($handle);
}
}
/**
* 带表头关联的逐行读取
*/
public function readAssocRows() {
$handle = fopen($this->filename, "r");
if (!$handle) {
throw new Exception("无法打开文件: " . $this->filename);
}
// 读取表头
$header = fgetcsv($handle, 0, $this->delimiter, $this->enclosure, $this->escape);
if ($header === false) {
fclose($handle);
return;
}
try {
while (($row = fgetcsv($handle, 0, $this->delimiter, $this->enclosure, $this->escape)) !== false) {
// 确保行字段数与表头相同
if (count($row) === count($header)) {
yield array_combine($header, $row);
} else {
// 处理字段数不匹配的情况
yield array_combine($header, array_pad($row, count($header), ''));
}
}
} finally {
fclose($handle);
}
}
/**
* 统计 CSV 文件信息
*/
public function getFileStats() {
$handle = fopen($this->filename, "r");
if (!$handle) {
return false;
}
$stats = [
'total_rows' => 0,
'total_columns' => 0,
'max_line_length' => 0,
'has_header' => true // 假设第一行是表头
];
// 读取第一行(表头)
$header = fgetcsv($handle, 0, $this->delimiter, $this->enclosure, $this->escape);
if ($header === false) {
fclose($handle);
return $stats;
}
$stats['total_columns'] = count($header);
$stats['total_rows'] = 1; // 表头行
// 统计数据行
while (($row = fgetcsv($handle, 0, $this->delimiter, $this->enclosure, $this->escape)) !== false) {
$stats['total_rows']++;
$line_length = strlen(implode($this->delimiter, $row));
$stats['max_line_length'] = max($stats['max_line_length'], $line_length);
}
fclose($handle);
// 实际数据行数(不包括表头)
$stats['data_rows'] = $stats['total_rows'] - 1;
return $stats;
}
/**
* 导出处理过的数据到新 CSV 文件
*/
public function exportProcessed($output_filename, callable $processor) {
$input_handle = fopen($this->filename, "r");
$output_handle = fopen($output_filename, "w");
if (!$input_handle || !$output_handle) {
return false;
}
// 读取并写入表头
$header = fgetcsv($input_handle, 0, $this->delimiter, $this->enclosure, $this->escape);
if ($header !== false) {
fputcsv($output_handle, $header, $this->delimiter, $this->enclosure, $this->escape);
}
// 处理并写入数据行
while (($row = fgetcsv($input_handle, 0, $this->delimiter, $this->enclosure, $this->escape)) !== false) {
$processed_row = $processor($row);
if ($processed_row !== null) {
fputcsv($output_handle, $processed_row, $this->delimiter, $this->enclosure, $this->escape);
}
}
fclose($input_handle);
fclose($output_handle);
return true;
}
}
// 使用示例
echo "大 CSV 文件处理演示:\n";
echo "===================\n";
// 创建一个大 CSV 文件用于测试
$large_csv = "ID,Name,Email,Score,Timestamp\n";
for ($i = 1; $i <= 1000; $i++) {
$name = "User_" . $i;
$email = "user_" . $i . "@example.com";
$score = rand(50, 100);
$timestamp = date('Y-m-d H:i:s', time() - rand(0, 86400 * 365));
$large_csv .= "$i,$name,$email,$score,$timestamp\n";
}
$large_file = "large_data.csv";
file_put_contents($large_file, $large_csv);
try {
$processor = new LargeCsvProcessor($large_file);
// 获取文件统计信息
echo "1. 文件统计信息:\n";
$stats = $processor->getFileStats();
if ($stats) {
echo " 总列数: " . $stats['total_columns'] . "\n";
echo " 总行数: " . $stats['total_rows'] . "\n";
echo " 数据行数: " . $stats['data_rows'] . "\n";
echo " 最大行长度: " . $stats['max_line_length'] . " 字符\n";
}
// 逐行处理数据(使用生成器,内存高效)
echo "\n2. 逐行处理数据(前5行):\n";
$row_count = 0;
foreach ($processor->readAssocRows() as $row) {
$row_count++;
echo " 行 {$row_count}: ID={$row['ID']}, Name={$row['Name']}, Score={$row['Score']}\n";
if ($row_count >= 5) {
echo " ... 只显示前5行\n";
break;
}
}
// 导出处理过的数据
echo "\n3. 导出处理过的数据:\n";
$output_file = "processed_data.csv";
// 处理器函数:过滤分数大于90的行
$filter_high_scores = function($row) {
if (isset($row['Score']) && $row['Score'] > 90) {
return $row;
}
return null; // 不导出此行
};
if ($processor->exportProcessed($output_file, $filter_high_scores)) {
echo " ✅ 数据导出成功\n";
// 统计导出文件的行数
$exported_rows = count(file($output_file)) - 1; // 减去表头行
echo " 导出了 {$exported_rows} 行数据(分数>90)\n";
} else {
echo " ❌ 数据导出失败\n";
}
// 清理导出文件
if (file_exists($output_file)) {
unlink($output_file);
}
} catch (Exception $e) {
echo "错误: " . $e->getMessage() . "\n";
}
// 清理测试文件
unlink($large_file);
?>
<?php
/**
* CSV 数据验证和清洗类
*/
class CsvValidator {
private $rules = [];
private $errors = [];
/**
* 添加验证规则
*/
public function addRule($column, $rule, $message = null) {
if (!isset($this->rules[$column])) {
$this->rules[$column] = [];
}
$this->rules[$column][] = [
'rule' => $rule,
'message' => $message ?: "列 '{$column}' 验证失败"
];
}
/**
* 验证单行数据
*/
public function validateRow($header, $row) {
$errors = [];
$row_data = array_combine($header, $row);
foreach ($this->rules as $column => $column_rules) {
if (!isset($row_data[$column])) {
$errors[] = "列 '{$column}' 不存在";
continue;
}
$value = $row_data[$column];
foreach ($column_rules as $rule_info) {
$rule = $rule_info['rule'];
$message = $rule_info['message'];
if (!$this->checkRule($value, $rule)) {
$errors[] = "{$message} (值: '{$value}')";
}
}
}
return $errors;
}
/**
* 检查单个规则
*/
private function checkRule($value, $rule) {
if (is_callable($rule)) {
return $rule($value);
}
switch ($rule) {
case 'required':
return trim($value) !== '';
case 'email':
return filter_var($value, FILTER_VALIDATE_EMAIL) !== false;
case 'numeric':
return is_numeric($value);
case 'int':
return filter_var($value, FILTER_VALIDATE_INT) !== false;
case 'float':
return filter_var($value, FILTER_VALIDATE_FLOAT) !== false;
case 'date':
return strtotime($value) !== false;
default:
return true;
}
}
/**
* 清洗数据
*/
public function sanitizeRow($row) {
return array_map(function($value) {
// 去除多余空格
$value = trim($value);
// 处理常见的空值表示
if ($value === '' || strtolower($value) === 'null' || strtolower($value) === 'n/a') {
return '';
}
// 移除不可见字符
$value = preg_replace('/[\x00-\x1F\x7F]/', '', $value);
return $value;
}, $row);
}
}
// 使用示例
echo "CSV 数据验证和清洗演示:\n";
echo "========================\n";
// 创建包含问题的测试 CSV
$dirty_csv = "ID,Name,Email,Age,Salary,JoinDate
1,John Doe,john@example.com,30,75000,2020-01-15
2,Jane Smith,invalid-email,25,invalid-salary,2021-03-20
3,Bob Johnson,bob@example.com,,82000,invalid-date
4,,alice@example.com,28,91000,2022-05-10
5,Charlie Brown,charlie@example.com,35,105000,2023-07-25";
$dirty_file = "dirty_data.csv";
file_put_contents($dirty_file, $dirty_csv);
// 创建验证器
$validator = new CsvValidator();
// 添加验证规则
$validator->addRule('ID', 'required', 'ID不能为空');
$validator->addRule('Name', 'required', '姓名不能为空');
$validator->addRule('Email', 'email', '邮箱格式无效');
$validator->addRule('Age', function($value) {
if ($value === '') return true; // 允许为空
return filter_var($value, FILTER_VALIDATE_INT) !== false && $value >= 18 && $value <= 100;
}, '年龄必须是18-100之间的整数');
$validator->addRule('Salary', function($value) {
return is_numeric($value) && $value > 0;
}, '薪资必须是正数');
$validator->addRule('JoinDate', function($value) {
return strtotime($value) !== false;
}, '入职日期格式无效');
// 处理 CSV 文件
$handle = fopen($dirty_file, "r");
if (!$handle) {
die("无法打开文件");
}
echo "原始数据验证结果:\n";
echo "=================\n";
$header = fgetcsv($handle);
$valid_rows = [];
$invalid_rows = [];
$row_number = 1;
while (($row = fgetcsv($handle)) !== false) {
$row_number++;
// 清洗数据
$cleaned_row = $validator->sanitizeRow($row);
// 验证数据
$errors = $validator->validateRow($header, $cleaned_row);
if (empty($errors)) {
$valid_rows[] = $cleaned_row;
echo "✅ 行 {$row_number}: 验证通过\n";
} else {
$invalid_rows[] = [
'row_number' => $row_number,
'row' => $cleaned_row,
'errors' => $errors
];
echo "❌ 行 {$row_number}: " . implode('; ', $errors) . "\n";
}
}
fclose($handle);
// 显示统计信息
echo "\n验证统计:\n";
echo "有效行数: " . count($valid_rows) . "\n";
echo "无效行数: " . count($invalid_rows) . "\n";
echo "总行数: " . ($row_number - 1) . "\n";
// 清理测试文件
unlink($dirty_file);
?>
<?php
/**
* CSV 到数据库导入类
*/
class CsvToDatabase {
private $db;
private $table;
private $mapping = [];
public function __construct($db_connection, $table_name) {
$this->db = $db_connection;
$this->table = $table_name;
}
/**
* 设置 CSV 列到数据库字段的映射
*/
public function setColumnMapping($mapping) {
$this->mapping = $mapping;
}
/**
* 导入 CSV 文件到数据库
*/
public function import($filename, $has_header = true) {
$handle = fopen($filename, "r");
if (!$handle) {
throw new Exception("无法打开文件: " . $filename);
}
// 读取表头
if ($has_header) {
$header = fgetcsv($handle);
if ($header === false) {
fclose($handle);
return ['success' => false, 'message' => '文件为空或格式错误'];
}
} else {
// 如果没有表头,使用映射中的 CSV 列名
$header = array_keys($this->mapping);
// 重置文件指针到开始位置
rewind($handle);
}
// 验证映射
$db_fields = [];
foreach ($header as $csv_column) {
if (isset($this->mapping[$csv_column])) {
$db_fields[$csv_column] = $this->mapping[$csv_column];
} else {
// 如果没有映射,使用 CSV 列名作为数据库字段名
$db_fields[$csv_column] = $csv_column;
}
}
$total_rows = 0;
$success_rows = 0;
$error_rows = 0;
$errors = [];
// 准备插入语句
$db_field_names = array_values($db_fields);
$placeholders = implode(', ', array_fill(0, count($db_field_names), '?'));
$sql = "INSERT INTO " . $this->table . " (" . implode(', ', $db_field_names) . ") VALUES ($placeholders)";
$stmt = $this->db->prepare($sql);
if (!$stmt) {
fclose($handle);
throw new Exception("准备 SQL 语句失败: " . $this->db->error);
}
// 开始事务
$this->db->begin_transaction();
try {
while (($row = fgetcsv($handle)) !== false) {
$total_rows++;
// 确保行字段数与表头相同
if (count($row) !== count($header)) {
$error_rows++;
$errors[] = "行 {$total_rows}: 字段数不匹配";
continue;
}
// 准备绑定参数
$params = [];
foreach ($header as $index => $csv_column) {
$value = $row[$index];
// 数据清洗和转换
$value = $this->sanitizeValue($value, $db_fields[$csv_column]);
$params[] = $value;
}
// 绑定参数并执行
$types = str_repeat('s', count($params)); // 假设所有参数都是字符串
$stmt->bind_param($types, ...$params);
if ($stmt->execute()) {
$success_rows++;
} else {
$error_rows++;
$errors[] = "行 {$total_rows}: 插入失败 - " . $stmt->error;
}
}
// 提交事务
$this->db->commit();
} catch (Exception $e) {
// 回滚事务
$this->db->rollback();
fclose($handle);
throw $e;
}
fclose($handle);
$stmt->close();
return [
'success' => true,
'total_rows' => $total_rows,
'success_rows' => $success_rows,
'error_rows' => $error_rows,
'errors' => $errors
];
}
/**
* 数据清洗和转换
*/
private function sanitizeValue($value, $field_name) {
// 去除空白字符
$value = trim($value);
// 处理空值
if ($value === '' || strtolower($value) === 'null') {
return null;
}
// 根据字段名进行特定处理
if (preg_match('/date|time|_at$/i', $field_name)) {
// 尝试转换为标准日期格式
$timestamp = strtotime($value);
if ($timestamp !== false) {
return date('Y-m-d H:i:s', $timestamp);
}
}
// 处理数值字段
if (preg_match('/price|cost|amount|salary|total|sum/i', $field_name)) {
// 移除货币符号和千位分隔符
$value = preg_replace('/[^\d.-]/', '', $value);
if (is_numeric($value)) {
return (float) $value;
}
}
return $value;
}
}
// 使用示例(模拟数据库连接)
echo "CSV 数据导入数据库演示:\n";
echo "========================\n";
// 创建测试 CSV 文件
$import_csv = "ID,FullName,EmailAddress,SalaryAmount,JoinDate,Status
1,John Doe,john@example.com,\$75,000.00,2020-01-15,Active
2,Jane Smith,jane@example.com,\$82,500.00,2021-03-20,Active
3,Bob Johnson,bob@example.com,\$95,000.00,2022-05-10,Inactive
4,Alice Brown,alice@example.com,\$91,250.50,2023-07-25,Active
5,Charlie Wilson,charlie@example.com,\$105,000.00,2024-09-30,Active";
$import_file = "import_data.csv";
file_put_contents($import_file, $import_csv);
// 模拟数据库连接(实际使用时需要真实数据库连接)
class MockDatabase {
public $error = '';
public function prepare($sql) { return new MockStatement(); }
public function begin_transaction() {}
public function commit() {}
public function rollback() {}
}
class MockStatement {
public $error = '';
public function bind_param($types, ...$params) {}
public function execute() { return true; }
public function close() {}
}
$mock_db = new MockDatabase();
try {
// 创建导入器
$importer = new CsvToDatabase($mock_db, 'employees');
// 设置列映射(CSV列名 => 数据库字段名)
$mapping = [
'ID' => 'id',
'FullName' => 'full_name',
'EmailAddress' => 'email',
'SalaryAmount' => 'salary',
'JoinDate' => 'join_date',
'Status' => 'status'
];
$importer->setColumnMapping($mapping);
// 执行导入
echo "正在导入数据...\n";
$result = $importer->import($import_file);
if ($result['success']) {
echo "✅ 导入完成!\n";
echo "总行数: " . $result['total_rows'] . "\n";
echo "成功行数: " . $result['success_rows'] . "\n";
echo "失败行数: " . $result['error_rows'] . "\n";
if (!empty($result['errors'])) {
echo "\n错误详情:\n";
foreach ($result['errors'] as $error) {
echo "- " . $error . "\n";
}
}
} else {
echo "❌ 导入失败: " . $result['message'] . "\n";
}
} catch (Exception $e) {
echo "错误: " . $e->getMessage() . "\n";
}
// 清理测试文件
unlink($import_file);
?>
| 问题 | 症状 | 解决方法 |
|---|---|---|
| BOM(字节顺序标记) | 第一行出现乱码字符 | 使用 fseek($handle, 3) 跳过 BOM 或使用 UTF-8 BOM 识别 |
| 换行符不一致 | 行解析错误或字段包含换行符 | 使用正确的换行符处理或标准化为 \n |
| 字段数不一致 | 解析时数组长度变化 | 检查 CSV 文件或使用 array_pad() 补全字段 |
| 编码问题 | 中文字符乱码 | 使用 mb_convert_encoding() 转换编码为 UTF-8 |
| 引号处理 | 字段内引号导致解析错误 | 使用正确的转义字符(通常是双引号或反斜杠) |
| 大文件处理 | 内存不足或性能问题 | 使用生成器或分块读取,避免一次性加载整个文件 |
$handle = fopen($filename, "r");
while (($row = fgetcsv($handle)) !== false) {
// 处理每一行数据
}
fclose($handle);
$handle = fopen($filename, "r");
$header = fgetcsv($handle);
while (($row = fgetcsv($handle)) !== false) {
$data = array_combine($header, $row);
// 使用关联数组处理数据
}
fclose($handle);
function processLargeCsv($filename, $chunkSize = 1000) {
$handle = fopen($filename, "r");
$chunk = [];
$counter = 0;
while (($row = fgetcsv($handle)) !== false) {
$chunk[] = $row;
$counter++;
if ($counter % $chunkSize == 0) {
processChunk($chunk);
$chunk = [];
}
}
// 处理剩余数据
if (!empty($chunk)) {
processChunk($chunk);
}
fclose($handle);
}