Spaces:

kin525
/

Embedding

Running

App Files Files Community

kin525 commited on Nov 6

Commit

6171ba1

verified ·

1 Parent(s): 01edb13

Update core/file_processor.py

Browse files

Files changed (1) hide show

core/file_processor.py +179 -219

core/file_processor.py CHANGED Viewed

@@ -1,208 +1,249 @@
 # core/file_processor.py
 import os
 import tempfile
-from typing import Dict, Any
 import pandas as pd
 from io import BytesIO
 import base64
 import logging
 try:
     import fitz  # PyMuPDF
     PDF_SUPPORT = True
-except ImportError:
     PDF_SUPPORT = False
-    print("警告: PyMuPDF 不可用，PDF 處理將被停用")
 try:
     from docx import Document
     DOCX_SUPPORT = True
-except ImportError:
     DOCX_SUPPORT = False
-    print("警告: python-docx 不可用，Word 處理將被停用")
 try:
     import pytesseract
     from PIL import Image
     OCR_SUPPORT = True
-except ImportError:
     OCR_SUPPORT = False
-    print("警告: OCR 功能不可用")
 class FileProcessor:
     def __init__(self):
         self.logger = logging.getLogger(__name__)
-        self.supported_types = []
-        # 根據可用的庫動態支持文件類型
         if PDF_SUPPORT:
             self.supported_types.extend(['pdf'])
-            self.logger.info("PDF 處理支持已啟用")
         if DOCX_SUPPORT:
             self.supported_types.extend(['docx', 'doc'])
             self.logger.info("Word 處理支持已啟用")
         if OCR_SUPPORT:
             self.supported_types.extend(['jpg', 'jpeg', 'png'])
             self.logger.info("OCR 處理支持已啟用")
-        # 總是支持的類型
         self.supported_types.extend(['txt', 'md', 'xlsx', 'xls'])
         self.logger.info(f"文件處理器初始化完成，支持的類型: {self.supported_types}")
     def get_supported_types(self) -> list:
-        """獲取支持的文件類型列表"""
         return self.supported_types
-    def process_file(self, file_data: bytes, filename: str, file_type: str) -> Dict[str, Any]:
-        """統一文件處理入口"""
-        if file_type not in self.supported_types:
-            error_msg = f"不支持的文件類型: {file_type}"
-            self.logger.warning(error_msg)
-            return {
-                "text_content": error_msg,
-                "file_type": file_type,
-                "processed": False,
-                "error": error_msg
             }
-        processors = {
-            'pdf': self._process_pdf,
-            'xlsx': self._process_excel,
-            'xls': self._process_excel,
-            'docx': self._process_word,
-            'doc': self._process_word,
-            'jpg': self._process_image,
-            'jpeg': self._process_image,
-            'png': self._process_image,
-            'txt': self._process_text,
-            'md': self._process_text
-        }
-        try:
-            self.logger.info(f"開始處理文件: {filename}, 類型: {file_type}")
-            result = processors[file_type](file_data, filename)
-            self.logger.info(f"文件處理完成: {filename}, 狀態: {result.get('processed', False)}")
-            return result
         except Exception as e:
-            error_msg = f"處理文件時出錯: {str(e)}"
-            self.logger.error(f"處理文件失敗 {filename}: {error_msg}")
-            return {
-                "text_content": error_msg,
-                "file_type": file_type,
-                "processed": False,
-                "error": str(e)
-            }
     def _process_pdf(self, file_data: bytes, filename: str) -> Dict[str, Any]:
-        """處理 PDF 文件"""
         if not PDF_SUPPORT:
-            return {
-                "text_content": "PDF 處理不可用，請安裝 PyMuPDF",
-                "file_type": "pdf",
-                "processed": False
-            }
-        try:
-            # 使用內存流處理 PDF，避免臨時文件
-            doc = fitz.open(stream=file_data, filetype="pdf")
-            text_content = ""
-            page_count = len(doc)
-            for page_num in range(page_count):
-                page = doc[page_num]
-                text_content += page.get_text()
-            doc.close()
-            result = {
-                "text_content": text_content,
-                "page_count": page_count,
-                "file_type": "pdf",
-                "processed": True,
-                "content_length": len(text_content)
-            }
-            self.logger.info(f"PDF 處理成功: {filename}, 頁數: {page_count}, 內容長度: {len(text_content)}")
-            return result
-        except Exception as e:
-            self.logger.error(f"PDF 處理失敗 {filename}: {str(e)}")
-            return {
-                "text_content": f"PDF 處理失敗: {str(e)}",
-                "file_type": "pdf",
-                "processed": False,
-                "error": str(e)
-            }
     def _process_excel(self, file_data: bytes, filename: str) -> Dict[str, Any]:
-        """處理 Excel 文件"""
         try:
-            # 使用 BytesIO 避免臨時文件
             excel_file = BytesIO(file_data)
-            # 讀取 Excel 文件
             if filename.endswith('xlsx'):
                 df = pd.read_excel(excel_file, engine='openpyxl')
             else:
-                df = pd.read_excel(excel_file)  # .xls
-            text_content = f"Excel 文件: {filename}\n"
-            text_content += f"行數: {len(df)}, 列數: {len(df.columns)}\n\n"
             text_content += "列名: " + ", ".join(df.columns.astype(str)) + "\n\n"
-            # 添加數據預覽（前5行）
-            text_content += "數據預覽(前5行):\n"
-            text_content += df.head().to_string()
-            # 添加基本統計信息
             numeric_columns = df.select_dtypes(include=['number']).columns
             if not numeric_columns.empty:
                 text_content += f"\n\n數值列統計:\n{df[numeric_columns].describe().to_string()}"
-            result = {
-                "text_content": text_content,
-                "sheet_count": 1,  # 簡化處理，只讀第一個 sheet
-                "row_count": len(df),
-                "column_count": len(df.columns),
-                "file_type": "excel",
-                "processed": True,
-                "content_length": len(text_content)
-            }
             self.logger.info(f"Excel 處理成功: {filename}, 行: {len(df)}, 列: {len(df.columns)}")
             return result
         except Exception as e:
             self.logger.error(f"Excel 處理失敗 {filename}: {str(e)}")
-            return {
-                "text_content": f"Excel 處理失敗: {str(e)}",
-                "file_type": "excel",
-                "processed": False,
-                "error": str(e)
-            }
     def _process_word(self, file_data: bytes, filename: str) -> Dict[str, Any]:
-        """處理 Word 文件"""
-        if not DOCX_SUPPORT:
-            return {
-                "text_content": "Word 處理不可用，請安裝 python-docx",
-                "file_type": "word",
-                "processed": False
-            }
         try:
             doc = Document(BytesIO(file_data))
             text_content = ""
             paragraphs = []
             for paragraph in doc.paragraphs:
                 if paragraph.text.strip():
                     text_content += paragraph.text + "\n"
                     paragraphs.append(paragraph.text)
-            # 處理表格
             tables_text = ""
             for table in doc.tables:
                 for row in table.rows:
@@ -211,87 +252,40 @@ class FileProcessor:
                         row_text.append(cell.text.strip())
                     tables_text += " | ".join(row_text) + "\n"
                 tables_text += "\n"
             if tables_text:
                 text_content += "\n表格內容:\n" + tables_text
-            result = {
-                "text_content": text_content,
-                "paragraph_count": len(paragraphs),
-                "table_count": len(doc.tables),
-                "file_type": "word",
-                "processed": True,
-                "content_length": len(text_content)
-            }
             self.logger.info(f"Word 處理成功: {filename}, 段落數: {len(paragraphs)}, 表格數: {len(doc.tables)}")
             return result
         except Exception as e:
             self.logger.error(f"Word 處理失敗 {filename}: {str(e)}")
-            return {
-                "text_content": f"Word 處理失敗: {str(e)}",
-                "file_type": "word",
-                "processed": False,
-                "error": str(e)
-            }
     def _process_image(self, file_data: bytes, filename: str) -> Dict[str, Any]:
-        """處理圖片文件"""
         if not OCR_SUPPORT:
-            return {
-                "text_content": "OCR 處理不可用",
-                "file_type": "image",
-                "processed": False
-            }
         try:
             image = Image.open(BytesIO(file_data))
-            # 優化圖片以提高 OCR 準確率
             if image.mode != 'RGB':
                 image = image.convert('RGB')
-            # 調整圖片大小（如果過大）
             max_size = (2000, 2000)
             image.thumbnail(max_size, Image.Resampling.LANCZOS)
-            # OCR 識別
             text_content = pytesseract.image_to_string(image, lang='chi_sim+eng')
-            result = {
-                "text_content": text_content or "未識別到文字",
-                "image_size": image.size,
-                "file_type": "image",
-                "processed": bool(text_content.strip()),
-                "ocr_used": True,
-                "content_length": len(text_content)
-            }
             if text_content.strip():
                 self.logger.info(f"圖片 OCR 成功: {filename}, 識別文字長度: {len(text_content)}")
             else:
                 self.logger.warning(f"圖片 OCR 未識別到文字: {filename}")
             return result
         except Exception as e:
             self.logger.error(f"圖片處理失敗 {filename}: {str(e)}")
-            return {
-                "text_content": f"圖片處理失敗: {str(e)}",
-                "file_type": "image",
-                "processed": False,
-                "error": str(e)
-            }
     def _process_text(self, file_data: bytes, filename: str) -> Dict[str, Any]:
-        """處理文本文件"""
         try:
-            # 嘗試多種編碼
             encodings = ['utf-8', 'gbk', 'gb2312', 'latin-1', 'cp1252']
             text_content = None
             used_encoding = None
             for encoding in encodings:
                 try:
                     text_content = file_data.decode(encoding)
@@ -299,67 +293,33 @@ class FileProcessor:
                     break
                 except UnicodeDecodeError:
                     continue
             if text_content is None:
-                # 若所有編碼都失敗，使用忽略錯誤的方式
                 text_content = file_data.decode('utf-8', errors='ignore')
                 used_encoding = 'utf-8 (with errors ignored)'
-            result = {
-                "text_content": text_content,
-                "file_type": "text",
-                "processed": True,
-                "encoding": used_encoding,
-                "content_length": len(text_content)
-            }
             self.logger.info(f"文本處理成功: {filename}, 編碼: {used_encoding}, 長度: {len(text_content)}")
             return result
         except Exception as e:
             self.logger.error(f"文本處理失敗 {filename}: {str(e)}")
-            return {
-                "text_content": f"文本處理失敗: {str(e)}",
-                "file_type": "text",
-                "processed": False,
-                "error": str(e)
-            }
     def get_file_info(self, file_data: bytes, filename: str) -> Dict[str, Any]:
-        """獲取文件基本信息（不進行完整處理）"""
         file_type = filename.split('.')[-1].lower() if '.' in filename else 'unknown'
         file_size = len(file_data)
-        info = {
-            "filename": filename,
-            "file_type": file_type,
-            "file_size": file_size,
-            "file_size_human": self._format_file_size(file_size),
-            "supported": file_type in self.supported_types
-        }
         return info
     def _format_file_size(self, size_bytes: int) -> str:
-        """格式化文件大小"""
         if size_bytes == 0:
             return "0 B"
         size_names = ["B", "KB", "MB", "GB"]
         i = 0
         while size_bytes >= 1024 and i < len(size_names) - 1:
             size_bytes /= 1024.0
             i += 1
         return f"{size_bytes:.2f} {size_names[i]}"
-# 設置日誌
 def setup_logging():
-    """設置日誌配置"""
-    logging.basicConfig(
-        level=logging.INFO,
-        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-    )
 setup_logging()

 # core/file_processor.py
 import os
 import tempfile
+import subprocess
+from typing import Dict, Any, List
 import pandas as pd
 from io import BytesIO
 import base64
 import logging
+# Optional libs: prefer pymupdf (fitz), fallback to pypdf or pdftotext CLI
 try:
     import fitz  # PyMuPDF
     PDF_SUPPORT = True
+    PDF_BACKEND = "pymupdf"
+except Exception:
+    fitz = None
     PDF_SUPPORT = False
+    PDF_BACKEND = None
+try:
+    import pypdf
+    if not PDF_SUPPORT:
+        PDF_SUPPORT = True
+        PDF_BACKEND = PDF_BACKEND or "pypdf"
+except Exception:
+    pypdf = None
+def _pdftotext_available() -> bool:
+    try:
+        subprocess.run(["pdftotext", "-v"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=3)
+        return True
+    except Exception:
+        return False
+PDFTOTEXT_CLI = _pdftotext_available()
+if PDFTOTEXT_CLI and not PDF_SUPPORT:
+    PDF_SUPPORT = True
+    PDF_BACKEND = PDF_BACKEND or "pdftotext_cli"
 try:
     from docx import Document
     DOCX_SUPPORT = True
+except Exception:
     DOCX_SUPPORT = False
 try:
     import pytesseract
     from PIL import Image
     OCR_SUPPORT = True
+except Exception:
     OCR_SUPPORT = False
 class FileProcessor:
     def __init__(self):
         self.logger = logging.getLogger(__name__)
+        self.supported_types: List[str] = []
         if PDF_SUPPORT:
             self.supported_types.extend(['pdf'])
+            self.logger.info(f"PDF 處理支持已啟用 (backend={PDF_BACKEND})")
         if DOCX_SUPPORT:
             self.supported_types.extend(['docx', 'doc'])
             self.logger.info("Word 處理支持已啟用")
         if OCR_SUPPORT:
             self.supported_types.extend(['jpg', 'jpeg', 'png'])
             self.logger.info("OCR 處理支持已啟用")
         self.supported_types.extend(['txt', 'md', 'xlsx', 'xls'])
         self.logger.info(f"文件處理器初始化完成，支持的類型: {self.supported_types}")
     def get_supported_types(self) -> list:
         return self.supported_types
+    def process_file(self, file_input, filename: str = None, file_type: str = None) -> Dict[str, Any]:
+        """
+        Flexible process_file:
+          - Accepts bytes, path (str), file-like (has read), or dict with 'name'/'data'
+          - Infers filename and file_type if not provided
+          - Delegates to typed processors
+        """
+        try:
+            file_bytes = None
+            if isinstance(file_input, (bytes, bytearray)):
+                file_bytes = bytes(file_input)
+            elif isinstance(file_input, str) and os.path.exists(file_input):
+                with open(file_input, "rb") as f:
+                    file_bytes = f.read()
+                if not filename:
+                    filename = os.path.basename(file_input)
+            elif hasattr(file_input, "read"):
+                try:
+                    file_input.seek(0)
+                except Exception:
+                    pass
+                file_bytes = file_input.read()
+                if not filename:
+                    filename = getattr(file_input, "name", None) or getattr(file_input, "filename", None)
+            elif isinstance(file_input, dict):
+                filename = filename or file_input.get("name") or file_input.get("filename")
+                data = file_input.get("data") or file_input.get("content") or file_input.get("bytes")
+                if isinstance(data, str):
+                    try:
+                        file_bytes = base64.b64decode(data)
+                    except Exception:
+                        file_bytes = data.encode()
+                elif isinstance(data, (bytes, bytearray)):
+                    file_bytes = bytes(data)
+                else:
+                    fobj = file_input.get("file")
+                    if fobj and hasattr(fobj, "read"):
+                        try:
+                            fobj.seek(0)
+                        except Exception:
+                            pass
+                        file_bytes = fobj.read()
+            else:
+                return {"text_content": "無法識別的 file_input 類型", "file_type": file_type or "unknown", "processed": False, "error": "unsupported_input"}
+            if not filename:
+                filename = "uploaded_file"
+            if not file_type:
+                file_type = os.path.splitext(filename)[1].lower().lstrip(".") or "unknown"
+            file_type = file_type.lower()
+            if file_type not in self.supported_types:
+                error_msg = f"不支持的文件類型: {file_type}"
+                self.logger.warning(error_msg)
+                return {"text_content": error_msg, "file_type": file_type, "processed": False, "error": error_msg}
+            processors = {
+                'pdf': self._process_pdf,
+                'xlsx': self._process_excel,
+                'xls': self._process_excel,
+                'docx': self._process_word,
+                'doc': self._process_word,
+                'jpg': self._process_image,
+                'jpeg': self._process_image,
+                'png': self._process_image,
+                'txt': self._process_text,
+                'md': self._process_text
             }
+            try:
+                self.logger.info(f"開始處理文件: {filename}, 類型: {file_type}")
+                result = processors[file_type](file_bytes, filename)
+                self.logger.info(f"文件處理完成: {filename}, 狀態: {result.get('processed', False)}")
+                return result
+            except Exception as e:
+                error_msg = f"處理文件時出錯: {str(e)}"
+                self.logger.error(f"處理文件失敗 {filename}: {error_msg}")
+                return {"text_content": error_msg, "file_type": file_type, "processed": False, "error": str(e)}
         except Exception as e:
+            self.logger.exception("process_file top-level exception")
+            return {"text_content": str(e), "file_type": file_type or "unknown", "processed": False, "error": str(e)}
     def _process_pdf(self, file_data: bytes, filename: str) -> Dict[str, Any]:
         if not PDF_SUPPORT:
+            return {"text_content": "PDF 處理不可用，請安裝 PyMuPDF 或 pypdf 或 pdftotext", "file_type": "pdf", "processed": False}
+        if PDF_BACKEND == "pymupdf" and fitz is not None:
+            try:
+                doc = fitz.open(stream=file_data, filetype="pdf")
+                text_content = ""
+                page_count = len(doc)
+                for page_num in range(page_count):
+                    page = doc[page_num]
+                    text_content += page.get_text()
+                doc.close()
+                return {"text_content": text_content, "page_count": page_count, "file_type": "pdf", "processed": True, "content_length": len(text_content)}
+            except Exception as e:
+                self.logger.warning(f"PyMuPDF extraction failed: {e}")
+        if pypdf is not None:
+            try:
+                reader = pypdf.PdfReader(BytesIO(file_data))
+                text_content = []
+                for p in reader.pages:
+                    try:
+                        text_content.append(p.extract_text() or "")
+                    except Exception:
+                        text_content.append("")
+                full = "\n".join(text_content)
+                return {"text_content": full, "page_count": len(reader.pages), "file_type": "pdf", "processed": True, "content_length": len(full)}
+            except Exception as e:
+                self.logger.warning(f"pypdf extraction failed: {e}")
+        if PDFTOTEXT_CLI:
+            tmp_path = None
+            try:
+                with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
+                    tmp.write(file_data)
+                    tmp.flush()
+                    tmp_path = tmp.name
+                out_txt = tmp_path + ".txt"
+                subprocess.run(["pdftotext", "-layout", tmp_path, out_txt], check=True, timeout=30)
+                text_content = ""
+                if os.path.exists(out_txt):
+                    with open(out_txt, "r", encoding="utf-8", errors="ignore") as f:
+                        text_content = f.read()
+                    os.remove(out_txt)
+                os.remove(tmp_path)
+                return {"text_content": text_content, "page_count": None, "file_type": "pdf", "processed": True, "content_length": len(text_content)}
+            except Exception as e:
+                self.logger.warning(f"pdftotext CLI extraction failed: {e}")
+                try:
+                    if tmp_path and os.path.exists(tmp_path):
+                        os.remove(tmp_path)
+                except Exception:
+                    pass
+        return {"text_content": "PDF 處理失敗: 無可用的解析後備方法", "file_type": "pdf", "processed": False, "error": "no_pdf_backend"}
     def _process_excel(self, file_data: bytes, filename: str) -> Dict[str, Any]:
         try:
             excel_file = BytesIO(file_data)
             if filename.endswith('xlsx'):
                 df = pd.read_excel(excel_file, engine='openpyxl')
             else:
+                df = pd.read_excel(excel_file)
+            text_content = f"Excel 文件: {filename}\n行數: {len(df)}, 列數: {len(df.columns)}\n\n"
             text_content += "列名: " + ", ".join(df.columns.astype(str)) + "\n\n"
+            text_content += "數據預覽(前5行):\n" + df.head().to_string()
             numeric_columns = df.select_dtypes(include=['number']).columns
             if not numeric_columns.empty:
                 text_content += f"\n\n數值列統計:\n{df[numeric_columns].describe().to_string()}"
+            result = {"text_content": text_content, "sheet_count": 1, "row_count": len(df), "column_count": len(df.columns), "file_type": "excel", "processed": True, "content_length": len(text_content)}
             self.logger.info(f"Excel 處理成功: {filename}, 行: {len(df)}, 列: {len(df.columns)}")
             return result
         except Exception as e:
             self.logger.error(f"Excel 處理失敗 {filename}: {str(e)}")
+            return {"text_content": f"Excel 處理失敗: {str(e)}", "file_type": "excel", "processed": False, "error": str(e)}
     def _process_word(self, file_data: bytes, filename: str) -> Dict[str, Any]:
         try:
+            if not DOCX_SUPPORT:
+                return {"text_content": "Word 處理不可用，請安裝 python-docx", "file_type": "word", "processed": False}
             doc = Document(BytesIO(file_data))
             text_content = ""
             paragraphs = []
             for paragraph in doc.paragraphs:
                 if paragraph.text.strip():
                     text_content += paragraph.text + "\n"
                     paragraphs.append(paragraph.text)
             tables_text = ""
             for table in doc.tables:
                 for row in table.rows:
                         row_text.append(cell.text.strip())
                     tables_text += " | ".join(row_text) + "\n"
                 tables_text += "\n"
             if tables_text:
                 text_content += "\n表格內容:\n" + tables_text
+            result = {"text_content": text_content, "paragraph_count": len(paragraphs), "table_count": len(doc.tables), "file_type": "word", "processed": True, "content_length": len(text_content)}
             self.logger.info(f"Word 處理成功: {filename}, 段落數: {len(paragraphs)}, 表格數: {len(doc.tables)}")
             return result
         except Exception as e:
             self.logger.error(f"Word 處理失敗 {filename}: {str(e)}")
+            return {"text_content": f"Word 處理失敗: {str(e)}", "file_type": "word", "processed": False, "error": str(e)}
     def _process_image(self, file_data: bytes, filename: str) -> Dict[str, Any]:
         if not OCR_SUPPORT:
+            return {"text_content": "OCR 處理不可用", "file_type": "image", "processed": False}
         try:
             image = Image.open(BytesIO(file_data))
             if image.mode != 'RGB':
                 image = image.convert('RGB')
             max_size = (2000, 2000)
             image.thumbnail(max_size, Image.Resampling.LANCZOS)
             text_content = pytesseract.image_to_string(image, lang='chi_sim+eng')
+            result = {"text_content": text_content or "未識別到文字", "image_size": image.size, "file_type": "image", "processed": bool(text_content.strip()), "ocr_used": True, "content_length": len(text_content)}
             if text_content.strip():
                 self.logger.info(f"圖片 OCR 成功: {filename}, 識別文字長度: {len(text_content)}")
             else:
                 self.logger.warning(f"圖片 OCR 未識別到文字: {filename}")
             return result
         except Exception as e:
             self.logger.error(f"圖片處理失敗 {filename}: {str(e)}")
+            return {"text_content": f"圖片處理失敗: {str(e)}", "file_type": "image", "processed": False, "error": str(e)}
     def _process_text(self, file_data: bytes, filename: str) -> Dict[str, Any]:
         try:
             encodings = ['utf-8', 'gbk', 'gb2312', 'latin-1', 'cp1252']
             text_content = None
             used_encoding = None
             for encoding in encodings:
                 try:
                     text_content = file_data.decode(encoding)
                     break
                 except UnicodeDecodeError:
                     continue
             if text_content is None:
                 text_content = file_data.decode('utf-8', errors='ignore')
                 used_encoding = 'utf-8 (with errors ignored)'
+            result = {"text_content": text_content, "file_type": "text", "processed": True, "encoding": used_encoding, "content_length": len(text_content)}
             self.logger.info(f"文本處理成功: {filename}, 編碼: {used_encoding}, 長度: {len(text_content)}")
             return result
         except Exception as e:
             self.logger.error(f"文本處理失敗 {filename}: {str(e)}")
+            return {"text_content": f"文本處理失敗: {str(e)}", "file_type": "text", "processed": False, "error": str(e)}
     def get_file_info(self, file_data: bytes, filename: str) -> Dict[str, Any]:
         file_type = filename.split('.')[-1].lower() if '.' in filename else 'unknown'
         file_size = len(file_data)
+        info = {"filename": filename, "file_type": file_type, "file_size": file_size, "file_size_human": self._format_file_size(file_size), "supported": file_type in self.supported_types}
         return info
     def _format_file_size(self, size_bytes: int) -> str:
         if size_bytes == 0:
             return "0 B"
         size_names = ["B", "KB", "MB", "GB"]
         i = 0
         while size_bytes >= 1024 and i < len(size_names) - 1:
             size_bytes /= 1024.0
             i += 1
         return f"{size_bytes:.2f} {size_names[i]}"
 def setup_logging():
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 setup_logging()